Loading lesson path
Concept visual
Observability in Node.js applications involves collecting and analyzing metrics and logs to understand system behavior.
Metrics, Logs, and Traces (often called the "three pillars of observability") provide different but complementary views of your system's health and performance.
Basic Metrics Collection const express = require('express');
const client = require('prom-client');
// Create a Registry to register the metrics const register = new client.Registry();
// Add a default label which is added to all metrics register.setDefaultLabels({Formula
app: 'nodejs - monitoring - demo'});
// Enable collection of default metrics client.collectDefaultMetrics({ register });
// Create a custom metric const httpRequestDurationMicroseconds = new client.Histogram({
name: 'http_request_duration_seconds', help: 'Duration of HTTP requests in seconds', labelNames: ['method', 'route', 'code'], buckets: [0.1, 0.3, 0.5, 0.7, 1, 3, 5, 7, 10] // buckets for response time
});
const app = express();
// Custom middleware to track request duration app.use((req, res, next) => {
const end = httpRequestDurationMicroseconds.startTimer();
res.on('finish', () => {
end({ method: req.method, route: req.path, code: res.statusCode });
});
next();
});
// Expose metrics endpoint app.get('/metrics', async (req, res) => {
res.set('Content-Type', register.contentType);
res.end(await register.metrics());
});
// Example route app.get('/', (req, res) => {
res.send('Hello, Observability!');
});
const PORT = process.env.PORT || 3000;
app.listen(PORT, () => {
console.log(`Server running on port ${PORT}`);
});Memory Usage (Heap & RSS)
Request Rate & Duration
Distributed tracing helps track requests as they flow through multiple services in a microservices architecture.
// Install required packages
Formula
// npm install @opentelemetry/sdk - node @opentelemetry/auto - instrumentations - http// npm install @opentelemetry/exporter-trace-otlp-http const { NodeSDK } = require('@opentelemetry/sdk-node');
const { getNodeAutoInstrumentations } = require('@opentelemetry/auto-instrumentations-node');
const { OTLPTraceExporter } = require('@opentelemetry/exporter-trace-otlp-http');
const { Resource } = require('@opentelemetry/resources');
const { SemanticResourceAttributes } = require('@opentelemetry/semantic-conventions');
const sdk = new NodeSDK({
resource: new Resource({Formula
[SemanticResourceAttributes.SERVICE_NAME]: 'my - service',[SemanticResourceAttributes.SERVICE_VERSION]: '1.0.0',
}), traceExporter: new OTLPTraceExporter({Formula
url: 'http://collector:4318/v1/traces',}), instrumentations: [getNodeAutoInstrumentations()],
});
sdk.start().then(() => console.log('Tracing initialized')).catch((error) => console.log('Error initializing tracing', error));Structured Logging with Pino const pino = require('pino');
const express = require('express');
const pinoHttp = require('pino-http');
const logger = pino({
level: process.env.LOG_LEVEL || 'info', formatters: {
level: (label) => ({ level: label.toUpperCase() }),
},
});
const app = express();
// HTTP request logging middleware app.use(pinoHttp({
logger, customLogLevel: function (res, err) {
if (res.statusCode >= 400 && res.statusCode < 500) {
return 'warn';
} else if (res.statusCode >= 500 || err) {
return 'error';
}
return 'info';
},
}));
app.get('/', (req, res) => {
req.log.info('Processing request');
res.json({ status: 'ok' });
});
app.listen(3000, () => {
logger.info('Server started on port 3000');
});// Add context to logs app.use((req, res, next) => {
const childLogger = logger.child({
requestId: req.id, userId: req.user?.id || 'anonymous', path: req.path, method: req.method
});
req.log = childLogger;
next();
});Visualize your metrics with Grafana dashboards. Example queries for common metrics: # Node.js Memory Usage (RSS in MB)
process_resident_memory_bytes{job="nodejs"} / 1024 / 1024
# Request Duration (p99 in ms)Formula
histogram_quantile(0.99, sum(rate(http_request_duration_seconds_bucket[5m])) by (le)) * 1000# Error Rate sum(rate(http_requests_total{status=~"5.."}[5m])) / sum(rate(http_requests_total[5m]))
Alerting Rules (Prometheus)groups: - name: nodejs rules:
- alert: HighErrorRate expr: rate(http_requests_total{status=~"5.."}[5m]) / rate(http_requests_total[5m]) > 0.05 for: 10m labels:severity: critical annotations: summary: "High error rate on "
Formula
Prometheus + Grafana
Elasticsearch + Fluentd + Kibana (EFK)