數據庫連接池與 Tomcat 線程監控警示系統設計
1. 監控架構設計
┌─────────────────┐ ┌────────────────┐ ┌───────────────┐
│ 應用服務器 │──────▶│ 監控數據收集 │──────▶│ 監控數據存儲 │
└─────────────────┘ └────────────────┘ └───────────────┘
│ │
▼ ▼
┌────────────────┐ ┌────────────────┐
│ 閾值檢測 │◀─────│ 監控面板 │
└────────────────┘ └────────────────┘
│
▼
┌────────────────┐
│ 告警通知 │
└────────────────┘
2. 監控指標設計
數據庫連接池監控
@Configuration
public class DatabasePoolConfig {
@Bean
public DataSourcePoolMetricsCollector dataSourceMetricsCollector(DataSource dataSource) {
return new DataSourcePoolMetricsCollector(dataSource);
}
}
@Component
@Slf4j
public class DataSourcePoolMetricsCollector {
private final DataSource dataSource;
private final MeterRegistry meterRegistry;
// 配置閾值
@Value("${monitor.db.active-ratio-threshold:0.8}")
private double activeRatioThreshold;
@Value("${monitor.db.waiting-threshold:0}")
private int waitingThreadThreshold;
@Scheduled(fixedRate = 5000)
public void collectMetrics() {
if (dataSource instanceof HikariDataSource) {
HikariDataSource hikari = (HikariDataSource) dataSource;
HikariPoolMXBean poolBean = hikari.getHikariPoolMXBean();
int active = poolBean.getActiveConnections();
int total = poolBean.getTotalConnections();
int waiting = poolBean.getThreadsAwaitingConnection();
double ratio = (double) active / total;
// 記錄指標
registerMetrics(active, total, waiting, ratio);
// 檢查告警條件
checkAlertConditions(active, total, waiting, ratio);
}
}
private void checkAlertConditions(int active, int total, int waiting, double ratio) {
if (ratio > activeRatioThreshold) {
AlertEvent event = new AlertEvent(
AlertLevel.WARNING,
"DatabasePool",
String.format("Connection pool utilization high: %.2f%% (%d/%d)",
ratio * 100, active, total)
);
alertService.sendAlert(event);
}
if (waiting > waitingThreadThreshold) {
AlertEvent event = new AlertEvent(
AlertLevel.CRITICAL,
"DatabasePool",
String.format("Threads waiting for connection: %d", waiting)
);
alertService.sendAlert(event);
}
}
}
Tomcat 線程監控
@Configuration
public class TomcatMetricsConfig {
@Bean
public TomcatMetricsCollector tomcatMetricsCollector(
ServletWebServerApplicationContext context) {
return new TomcatMetricsCollector(context);
}
}
@Component
@Slf4j
public class TomcatMetricsCollector {
private final ServletWebServerApplicationContext context;
private final MeterRegistry meterRegistry;
@Value("${monitor.tomcat.thread-utilization-threshold:0.8}")
private double threadUtilizationThreshold;
@Value("${monitor.tomcat.busy-thread-threshold:0.6}")
private double busyThreadThreshold;
@Scheduled(fixedRate = 5000)
public void collectMetrics() {
TomcatWebServer tomcatWebServer =
(TomcatWebServer) context.getWebServer();
Tomcat tomcat = tomcatWebServer.getTomcat();
for (Connector connector : tomcat.getService().findConnectors()) {
ThreadPoolExecutor executor =
(ThreadPoolExecutor) connector.getProtocolHandler().getExecutor();
int maxThreads = executor.getMaximumPoolSize();
int activeThreads = executor.getActiveCount();
int queueSize = executor.getQueue().size();
double utilization = (double) activeThreads / maxThreads;
// 記錄指標
registerMetrics(connector.getPort(), maxThreads, activeThreads,
queueSize, utilization);
// 檢查告警條件
checkAlertConditions(connector.getPort(), maxThreads, activeThreads,
queueSize, utilization);
}
}
private void checkAlertConditions(int port, int maxThreads, int activeThreads,
int queueSize, double utilization) {
if (utilization > threadUtilizationThreshold) {
AlertEvent event = new AlertEvent(
AlertLevel.WARNING,
"TomcatThreads",
String.format("Thread pool utilization high on port %d: %.2f%% (%d/%d)",
port, utilization * 100, activeThreads, maxThreads)
);
alertService.sendAlert(event);
}
if (queueSize > 10) {
AlertEvent event = new AlertEvent(
AlertLevel.WARNING,
"TomcatThreads",
String.format("Request queue building up on port %d: %d requests waiting",
port, queueSize)
);
alertService.sendAlert(event);
}
}
}
3. 告警服務設計
@Service
@Slf4j
public class AlertService {
private final List<AlertNotifier> notifiers;
@Value("${monitor.alert.rate-limit-seconds:60}")
private int rateLimitSeconds;
private final Map<String, Instant> lastAlertTimes = new ConcurrentHashMap<>();
public void sendAlert(AlertEvent event) {
String alertKey = event.getSource() + ":" + event.getMessage();
// 檢查告警頻率限制
Instant lastAlert = lastAlertTimes.get(alertKey);
Instant now = Instant.now();
if (lastAlert != null &&
Duration.between(lastAlert, now).getSeconds() < rateLimitSeconds) {
log.debug("Skipping alert due to rate limiting: {}", event);
return;
}
// 更新最後告警時間
lastAlertTimes.put(alertKey, now);
// 發送到所有配置的通知器
for (AlertNotifier notifier : notifiers) {
if (notifier.shouldNotify(event.getLevel())) {
notifier.sendNotification(event);
}
}
}
}
4. 通知實現
@Component
@Slf4j
public class EmailAlertNotifier implements AlertNotifier {
private final JavaMailSender mailSender;
@Value("${monitor.alert.email.recipients}")
private String[] recipients;
@Override
public boolean shouldNotify(AlertLevel level) {
return level.ordinal() >= AlertLevel.WARNING.ordinal();
}
@Override
public void sendNotification(AlertEvent event) {
SimpleMailMessage message = new SimpleMailMessage();
message.setTo(recipients);
message.setSubject(String.format("[%s] %s Alert",
event.getLevel(), event.getSource()));
message.setText(event.getMessage());
mailSender.send(message);
}
}
@Component
@Slf4j
public class SlackAlertNotifier implements AlertNotifier {
@Value("${monitor.alert.slack.webhook-url}")
private String webhookUrl;
@Override
public boolean shouldNotify(AlertLevel level) {
return level.ordinal() >= AlertLevel.CRITICAL.ordinal();
}
@Override
public void sendNotification(AlertEvent event) {
// 實現Slack webhook通知
// ...
}
}
5. 健康檢查端點
@Component
public class ConnectionPoolHealthIndicator implements HealthIndicator {
private final DataSource dataSource;
@Override
public Health health() {
if (dataSource instanceof HikariDataSource) {
HikariDataSource hikari = (HikariDataSource) dataSource;
HikariPoolMXBean poolBean = hikari.getHikariPoolMXBean();
int active = poolBean.getActiveConnections();
int total = poolBean.getTotalConnections();
int waiting = poolBean.getThreadsAwaitingConnection();
double ratio = total > 0 ? (double) active / total : 0;
Health.Builder builder = Health.up();
builder.withDetail("activeConnections", active);
builder.withDetail("totalConnections", total);
builder.withDetail("waitingThreads", waiting);
builder.withDetail("utilizationRatio", ratio);
if (ratio > 0.8 || waiting > 0) {
return builder.status(Status.WARNING).build();
}
return builder.build();
}
return Health.unknown().build();
}
}
6. 配置文件設計
# 監控配置
monitor:
# 數據庫連接池監控
db:
active-ratio-threshold: 0.8 # 活躍連接佔比閾值
waiting-threshold: 0 # 等待線程閾值
acquisition-timeout-threshold: 1000 # 獲取連接超時閾值(毫秒)
# Tomcat線程監控
tomcat:
thread-utilization-threshold: 0.8 # 線程池利用率閾值
busy-thread-threshold: 0.6 # 繁忙線程比例閾值
queue-size-threshold: 10 # 請求隊列大小閾值
# 告警配置
alert:
rate-limit-seconds: 60 # 相同告警的最小間隔(秒)
email:
recipients: ops@example.com,dba@example.com
slack:
webhook-url: https://hooks.slack.com/services/xxx/yyy/zzz
channel: #system-alerts
# 數據源配置
spring:
datasource:
hikari:
pool-name: App-DB-Pool
maximum-pool-size: 20
minimum-idle: 5
connection-timeout: 30000
register-mbeans: true # 開啟JMX監控
# Actuator配置
boot:
admin:
client:
url: http://admin-server:8080
# 服務器配置
server:
tomcat:
max-threads: 200
min-spare-threads: 20
max-connections: 10000
connection-timeout: 20000
mbeanregistry:
enabled: true # 開啟Tomcat MBean註冊
7. 集成到監控面板
使用 Prometheus + Grafana 進行可視化監控:
# prometheus.yml
scrape_configs:
- job_name: 'spring-app'
metrics_path: '/actuator/prometheus'
static_configs:
- targets: ['app-server:8080']
Grafana 儀表板設計: - 數據庫連接池儀表板 - Tomcat 線程池儀表板 - 系統警示一覽表
總結
這套監控告警系統的優點:
- 全面監控:覆蓋數據庫連接池和Tomcat線程池的關鍵指標
- 可配置性:通過配置文件設置告警閾值和通知方式
- 多級告警:支援不同嚴重級別的告警
- 告警限流:防止告警風暴
- 多渠道通知:支援電子郵件、Slack等多種通知方式
- 可視化:與Prometheus和Grafana集成,提供直觀的監控面板
通過這套系統,運維團隊可以及時發現並解決數據庫連接池和Tomcat線程池問題,提高系統穩定性。