Clustering
PMDaemon's advanced clustering capabilities enable you to scale applications horizontally across multiple instances with intelligent load distribution, automatic failover, and sophisticated process management strategies.
Overview
PMDaemon clustering provides:
- 🔄 Automatic load balancing - Distribute traffic across instances
- 📈 Horizontal scaling - Add/remove instances dynamically
- 🛡️ Fault tolerance - Automatic failover and recovery
- 🎯 Smart port allocation - Automatic port distribution
- 📊 Instance monitoring - Per-instance health and metrics
Basic Clustering
Simple Cluster Setup
# Start a basic cluster with 4 instances
pmdaemon start "node server.js" --name web-cluster \
--instances 4 \
--port 3000-3003
What happens:
- 4 Node.js processes start
- Ports 3000, 3001, 3002, 3003 are assigned
- Each instance gets
PM2_INSTANCE_ID
environment variable - Load balancing can be configured externally
Configuration File Clustering
{
"name": "web-cluster",
"script": "node",
"args": ["server.js"],
"instances": 4,
"exec_mode": "cluster",
"port": "3000-3003",
"env": {
"NODE_ENV": "production"
},
"instance_var": "INSTANCE_ID",
"merge_logs": true
}
Advanced Clustering Patterns
CPU-Based Auto Scaling
{
"name": "auto-scaled-cluster",
"script": "node",
"args": ["server.js"],
"instances": "max",
"port": "auto:3000-3100",
"env": {
"NODE_ENV": "production",
"CLUSTER_MODE": "auto"
}
}
"max" instances:
- Uses all available CPU cores
- Automatically adjusts to system capacity
- Optimal for CPU-bound applications
Heterogeneous Clustering
Different instance types with specialized roles:
{
"apps": [
{
"name": "master-instance",
"script": "node",
"args": ["server.js", "--role=master"],
"instances": 1,
"port": "3000",
"env": {
"ROLE": "master",
"ENABLE_CRON": "true",
"ENABLE_ADMIN": "true"
}
},
{
"name": "worker-instances",
"script": "node",
"args": ["server.js", "--role=worker"],
"instances": 6,
"port": "3001-3006",
"env": {
"ROLE": "worker",
"ENABLE_CRON": "false",
"ENABLE_ADMIN": "false"
}
}
]
}
Multi-Tier Clustering
{
"apps": [
{
"name": "frontend-tier",
"script": "node",
"args": ["frontend.js"],
"instances": 3,
"port": "8080-8082",
"env": {
"TIER": "frontend",
"BACKEND_URLS": "http://localhost:3000,http://localhost:3001,http://localhost:3002"
}
},
{
"name": "api-tier",
"script": "node",
"args": ["api.js"],
"instances": 4,
"port": "3000-3003",
"env": {
"TIER": "api",
"DATABASE_POOL_SIZE": "10"
}
},
{
"name": "worker-tier",
"script": "python",
"args": ["worker.py"],
"instances": 2,
"env": {
"TIER": "worker",
"QUEUE_NAME": "default"
}
}
]
}
Instance Management
Dynamic Scaling
# Scale up to 8 instances
pmdaemon scale web-cluster 8
# Scale down to 2 instances
pmdaemon scale web-cluster 2
# Auto-scale based on CPU usage
pmdaemon autoscale web-cluster --min 2 --max 10 --cpu-threshold 70
Instance-Specific Operations
# Restart specific instance
pmdaemon restart web-cluster --instance 2
# Stop specific instance
pmdaemon stop web-cluster --instance 1
# Get instance-specific logs
pmdaemon logs web-cluster --instance 0 --lines 50
Rolling Updates
# Rolling restart (one instance at a time)
pmdaemon restart web-cluster --rolling
# Rolling update with new configuration
pmdaemon reload web-cluster --rolling --instances 6
Load Balancing Integration
Nginx Load Balancer
upstream web_cluster {
least_conn;
server localhost:3000 weight=1 max_fails=3 fail_timeout=30s;
server localhost:3001 weight=1 max_fails=3 fail_timeout=30s;
server localhost:3002 weight=1 max_fails=3 fail_timeout=30s;
server localhost:3003 weight=1 max_fails=3 fail_timeout=30s;
# Health check
keepalive 32;
}
server {
listen 80;
server_name myapp.com;
location / {
proxy_pass http://web_cluster;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
# Connection settings
proxy_connect_timeout 5s;
proxy_send_timeout 60s;
proxy_read_timeout 60s;
# Health check
proxy_next_upstream error timeout invalid_header http_500 http_502 http_503;
}
# Health check endpoint
location /health {
access_log off;
return 200 "healthy\n";
add_header Content-Type text/plain;
}
}
HAProxy Configuration
global
daemon
maxconn 4096
defaults
mode http
timeout connect 5000ms
timeout client 50000ms
timeout server 50000ms
option httpchk GET /health
frontend web_frontend
bind *:80
default_backend web_cluster
backend web_cluster
balance roundrobin
option httpchk GET /health HTTP/1.1\r\nHost:\ localhost
server web1 localhost:3000 check inter 5s fall 3 rise 2
server web2 localhost:3001 check inter 5s fall 3 rise 2
server web3 localhost:3002 check inter 5s fall 3 rise 2
server web4 localhost:3003 check inter 5s fall 3 rise 2
# Statistics interface
listen stats
bind *:8404
stats enable
stats uri /stats
stats refresh 30s
Application-Level Clustering
Node.js Cluster Integration
// server.js - Cluster-aware application
const cluster = require('cluster');
const express = require('express');
const os = require('os');
const app = express();
const port = process.env.PORT || 3000;
const instanceId = process.env.PM2_INSTANCE_ID || 0;
// Instance-specific configuration
app.locals.instanceId = instanceId;
app.locals.workerId = cluster.worker?.id || 'master';
// Health check endpoint
app.get('/health', (req, res) => {
res.json({
status: 'healthy',
instance: instanceId,
worker: app.locals.workerId,
uptime: process.uptime(),
memory: process.memoryUsage(),
timestamp: new Date().toISOString()
});
});
// Instance-specific behavior
if (instanceId == 0) {
// Master instance handles cron jobs
const cron = require('node-cron');
cron.schedule('0 * * * *', () => {
console.log('Running hourly task on master instance');
// Perform scheduled tasks
});
}
// Graceful shutdown
process.on('SIGTERM', () => {
console.log(`Instance ${instanceId} received SIGTERM, shutting down gracefully`);
server.close(() => {
console.log(`Instance ${instanceId} shut down complete`);
process.exit(0);
});
});
const server = app.listen(port, () => {
console.log(`Instance ${instanceId} listening on port ${port}`);
});
Session Affinity
// Session store for cluster
const session = require('express-session');
const RedisStore = require('connect-redis')(session);
const redis = require('redis');
const redisClient = redis.createClient({
host: 'localhost',
port: 6379
});
app.use(session({
store: new RedisStore({ client: redisClient }),
secret: process.env.SESSION_SECRET,
resave: false,
saveUninitialized: false,
cookie: {
secure: process.env.NODE_ENV === 'production',
maxAge: 24 * 60 * 60 * 1000 // 24 hours
}
}));
Monitoring Clusters
Cluster Metrics
# Monitor all instances
pmdaemon monit web-cluster
# Get cluster statistics
pmdaemon stats web-cluster
# View cluster topology
pmdaemon topology web-cluster
Per-Instance Monitoring
# Monitor specific instance
pmdaemon monit web-cluster --instance 2
# Get instance metrics
pmdaemon info web-cluster --instance 1
# Compare instance performance
pmdaemon compare web-cluster
Cluster Health Dashboard
// cluster-dashboard.js
const PMDaemonClient = require('./pmdaemon-client');
class ClusterDashboard {
constructor() {
this.client = new PMDaemonClient();
}
async getClusterStatus(clusterName) {
const processes = await this.client.listProcesses();
const clusterProcesses = processes.processes.filter(p =>
p.name.startsWith(clusterName)
);
const status = {
name: clusterName,
totalInstances: clusterProcesses.length,
healthyInstances: clusterProcesses.filter(p => p.health === 'healthy').length,
onlineInstances: clusterProcesses.filter(p => p.status === 'online').length,
totalCpu: clusterProcesses.reduce((sum, p) => sum + p.cpu, 0),
totalMemory: clusterProcesses.reduce((sum, p) => sum + p.memory, 0),
instances: clusterProcesses.map(p => ({
id: p.id,
port: p.port,
status: p.status,
health: p.health,
cpu: p.cpu,
memory: p.memory,
uptime: p.uptime,
restarts: p.restarts
}))
};
return status;
}
async autoScale(clusterName, options = {}) {
const {
minInstances = 2,
maxInstances = 10,
cpuThreshold = 70,
memoryThreshold = 80
} = options;
const status = await this.getClusterStatus(clusterName);
const avgCpu = status.totalCpu / status.totalInstances;
const avgMemory = status.totalMemory / status.totalInstances / 1024 / 1024; // MB
let targetInstances = status.totalInstances;
// Scale up conditions
if (avgCpu > cpuThreshold && status.totalInstances < maxInstances) {
targetInstances = Math.min(maxInstances, status.totalInstances + 1);
console.log(`🔼 Scaling up ${clusterName}: CPU ${avgCpu}% > ${cpuThreshold}%`);
}
// Scale down conditions
if (avgCpu < cpuThreshold * 0.5 && status.totalInstances > minInstances) {
targetInstances = Math.max(minInstances, status.totalInstances - 1);
console.log(`🔽 Scaling down ${clusterName}: CPU ${avgCpu}% < ${cpuThreshold * 0.5}%`);
}
if (targetInstances !== status.totalInstances) {
await this.client.scaleCluster(clusterName, targetInstances);
console.log(`📊 Scaled ${clusterName} to ${targetInstances} instances`);
}
return targetInstances;
}
}
Fault Tolerance
Automatic Failover
{
"name": "fault-tolerant-cluster",
"script": "node",
"args": ["server.js"],
"instances": 4,
"port": "3000-3003",
"autorestart": true,
"max_restarts": 5,
"min_uptime": "10s",
"restart_delay": "2s",
"health_check": {
"check_type": "http",
"url": "http://localhost:${PORT}/health",
"timeout": 5,
"interval": 15,
"retries": 3
}
}
Circuit Breaker Pattern
// circuit-breaker.js
const CircuitBreaker = require('opossum');
const options = {
timeout: 3000,
errorThresholdPercentage: 50,
resetTimeout: 30000,
rollingCountTimeout: 10000,
rollingCountBuckets: 10
};
// Create circuit breaker for database calls
const dbBreaker = new CircuitBreaker(callDatabase, options);
dbBreaker.on('open', () => {
console.log('Circuit breaker opened - database calls failing');
});
dbBreaker.on('halfOpen', () => {
console.log('Circuit breaker half-open - testing database');
});
dbBreaker.on('close', () => {
console.log('Circuit breaker closed - database calls healthy');
});
async function callDatabase(query) {
// Database call implementation
return await db.query(query);
}
// Use circuit breaker in application
app.get('/api/data', async (req, res) => {
try {
const data = await dbBreaker.fire(req.query.sql);
res.json(data);
} catch (error) {
if (dbBreaker.opened) {
res.status(503).json({ error: 'Service temporarily unavailable' });
} else {
res.status(500).json({ error: 'Database error' });
}
}
});
Performance Optimization
Cluster-Specific Tuning
{
"name": "optimized-cluster",
"script": "node",
"args": [
"--max-old-space-size=512",
"--optimize-for-size",
"server.js"
],
"instances": 8,
"port": "auto:3000-3100",
"env": {
"NODE_ENV": "production",
"UV_THREADPOOL_SIZE": "4",
"CLUSTER_WORKER_SIZE": "1"
},
"max_memory_restart": "600M",
"instance_var": "WORKER_ID"
}
Resource Allocation
# CPU affinity for instances
pmdaemon start "node server.js" --name cpu-cluster \
--instances 4 \
--cpu-affinity "0,1,2,3"
# Memory limits per instance
pmdaemon start "node server.js" --name memory-cluster \
--instances 4 \
--max-memory 512M \
--port 3000-3003
Best Practices
1. Right-Size Your Clusters
# Start with conservative instance count
pmdaemon start "node server.js" --name web-cluster --instances 2
# Monitor and scale based on metrics
pmdaemon monit web-cluster
pmdaemon scale web-cluster 4 # Scale up if needed
2. Use Health Checks
{
"health_check": {
"check_type": "http",
"url": "http://localhost:${PORT}/health",
"timeout": 5,
"interval": 30,
"retries": 2
}
}
3. Implement Graceful Shutdown
process.on('SIGTERM', async () => {
console.log('Received SIGTERM, shutting down gracefully');
// Stop accepting new requests
server.close();
// Finish existing requests
await finishPendingRequests();
// Close database connections
await db.close();
process.exit(0);
});
4. Monitor Cluster Health
# Set up monitoring
pmdaemon start "node monitor.js" --name cluster-monitor \
--env CLUSTER_NAME=web-cluster \
--env CHECK_INTERVAL=30s
5. Use Load Balancer Health Checks
# Configure health checks in load balancer
location /health {
proxy_pass http://web_cluster;
proxy_connect_timeout 1s;
proxy_send_timeout 1s;
proxy_read_timeout 1s;
}
Next Steps
- Performance Tuning - Optimize cluster performance
- Security - Secure cluster deployments
- Monitoring - Advanced cluster monitoring
- Deployment Examples - Production cluster patterns