跳转到内容

告警与通知系统

告警与通知系统

本节介绍如何基于能耗监测数据构建告警通知系统。学习完成后,您将能够:

  • 实现多级告警规则(功率/温度/异常)
  • 配置 Telegram Bot 通知
  • 配置邮件通知
  • 构建告警升级和抑制逻辑
┌──────────────────────────────────────────────────────┐
│ 告警系统架构 │
├──────────────────────────────────────────────────────┤
│ │
│ [能耗数据] → [阈值检测] → [告警判定] → [通知渠道] │
│ │ │
│ ├──→ Telegram │
│ ├──→ Email │
│ ├──→ Webhook │
│ └──→ Dashboard │
│ │
│ [告警抑制]: 同一设备同一类型在 N 分钟内不重复发送 │
│ [告警升级]: 持续异常未恢复时提高告警级别 │
│ │
└──────────────────────────────────────────────────────┘
Terminal window
# 1. 在 Telegram 中搜索 @BotFather
# 2. 发送 /newbot 创建新 Bot
# 3. 获取 Bot Token: 1234567890:ABCdefGHIjklmNOPqrstUVwxyz
# 4. 搜索 Bot 用户名并发送 /start
# 5. 获取 Chat ID: 访问 https://api.telegram.org/bot{TOKEN}/getUpdates
Terminal window
# 安装节点
# Manage Palette → Install → "node-red-contrib-telegrambot"
// Telegram Receiver 节点配置
{
"name": "Telegram Bot",
"bot": "energy-alert-bot",
"chatId": "123456789",
"usernames": ""
}
// Function: 格式化告警消息并发送到 Telegram
var alertData = msg.payload;
// 构建告警消息
var emoji = "";
var severity = alertData.severity || "info";
if (severity === "critical") {
emoji = "🚨";
} else if (severity === "warning") {
emoji = "⚠️";
} else if (severity === "info") {
emoji = "ℹ️";
}
var message = emoji + " *能耗告警*\n" +
"━━━━━━━━━━━━━━━\n" +
"设备: `" + (alertData.device || "未知") + "`\n" +
"告警: " + (alertData.alert || alertData.message || "未知") + "\n" +
"严重级别: " + severity + "\n" +
"时间: " + new Date(alertData.timestamp || Date.now()).toLocaleString() + "\n";
// 添加附加数据
if (alertData.power !== undefined) {
message += "功率: `" + alertData.power + "W`\n";
}
if (alertData.temperature !== undefined) {
message += "温度: `" + alertData.temperature + "°C`\n";
}
if (alertData.current_kwh !== undefined) {
message += "今日用电: `" + alertData.current_kwh + " kWh`\n";
}
// 格式化输出给 Telegram 节点
msg.payload = {
type: "message",
content: message,
parse_mode: "Markdown"
};
return msg;
Terminal window
# 安装节点
# Manage Palette → Install → "node-red-node-email"
// Email 节点配置
{
"name": "Send Alert Email",
"server": "smtp.gmail.com",
"port": "465",
"secure": true,
"username": "alerts@example.com",
"password": "********"
}
// Function: 构建告警邮件
var alertData = msg.payload;
msg.to = "customer@example.com";
msg.from = "iot-alerts@example.com";
msg.subject = "[IoT 告警] " + (alertData.device || "Unknown") +
" - " + (alertData.severity || "INFO");
msg.payload = "<h2>能耗监测告警通知</h2>" +
"<table border='1' cellpadding='8' cellspacing='0' style='border-collapse:collapse;'>" +
"<tr><td><b>设备</b></td><td>" + (alertData.device || "未知") + "</td></tr>" +
"<tr><td><b>告警类型</b></td><td>" + (alertData.type || alertData.alert || "未知") + "</td></tr>" +
"<tr><td><b>严重级别</b></td><td>" + (alertData.severity || "info") + "</td></tr>" +
"<tr><td><b>当前功率</b></td><td>" + (alertData.power || "N/A") + " W</td></tr>" +
"<tr><td><b>时间</b></td><td>" + new Date().toLocaleString() + "</td></tr>" +
"</table>" +
"<p>" + (alertData.message || "") + "</p>";
return msg;

防止同一告警频繁发送:

// Function: 告警抑制逻辑
// 同一设备同一类型在静默期内不重复发送
var alertData = msg.payload;
var deviceId = alertData.device || "unknown";
var alertType = alertData.type || alertData.alert || "generic";
var suppressKey = "suppress_" + deviceId + "_" + alertType;
// 静默期配置 (毫秒)
var SUPPRESS_WINDOW = {
critical: 5 * 60 * 1000, // 严重告警: 5 分钟静默
warning: 15 * 60 * 1000, // 警告告警: 15 分钟静默
info: 60 * 60 * 1000 // 信息通知: 1 小时静默
};
var severity = alertData.severity || "warning";
var windowMs = SUPPRESS_WINDOW[severity] || SUPPRESS_WINDOW.warning;
// 检查是否在静默期内
var lastSent = context.get(suppressKey);
var now = Date.now();
if (lastSent && (now - lastSent) < windowMs) {
// 还在静默期内,抑制告警
node.warn("Suppressed duplicate alert: " + suppressKey);
return null;
}
// 更新最后发送时间
context.set(suppressKey, now);
// 添加抑制信息到告警消息
msg.suppressed = false;
msg.suppress_window = windowMs / 1000 + "s";
return msg;

持续异常未恢复时升级告警级别:

// Function: 告警升级逻辑
var alertData = msg.payload;
var deviceId = alertData.device || "unknown";
// 升级配置
var ESCALATION = {
step1: { count: 3, severity: "warning", action: "notify_telegram" },
step2: { count: 6, severity: "critical", action: "notify_all" },
step3: { count: 10, severity: "critical", action: "shutdown" }
};
// 连续告警计数
var consecutiveKey = "consecutive_alerts_" + deviceId;
var consecutiveCount = (context.get(consecutiveKey) || 0) + 1;
// 检查是否需要升级
var escalationLevel = null;
if (consecutiveCount >= ESCALATION.step3.count) {
escalationLevel = ESCALATION.step3;
} else if (consecutiveCount >= ESCALATION.step2.count) {
escalationLevel = ESCALATION.step2;
} else if (consecutiveCount >= ESCALATION.step1.count) {
escalationLevel = ESCALATION.step1;
}
// 如果是恢复消息,重置计数
if (alertData.state === "normal" || alertData.state === "off") {
context.set(consecutiveKey, 0);
// 发送恢复通知
msg.payload.severity = "info";
msg.payload.message = "设备 " + deviceId + " 已恢复正常";
return msg;
}
context.set(consecutiveKey, consecutiveCount);
if (escalationLevel) {
alertData.escalated = true;
alertData.escalation_level = escalationLevel;
alertData.consecutive_count = consecutiveCount;
}
msg.payload = alertData;
return msg;
┌───────────────────────────────────────────────────────┐
│ 完整告警流程 │
├───────────────────────────────────────────────────────┤
│ │
│ [阈值检测 Flow] │
│ │ │
│ ▼ │
│ [告警抑制] ── 静默期内? → 丢弃 (return null) │
│ │ │
│ ▼ │
│ [告警升级] ── 连续次数检测 │
│ │ │
│ ├──→ [Telegram Bot] ──→ Telegram │
│ │ │
│ ├──→ [Email] ──→ 邮箱通知 │
│ │ │
│ ├──→ [InfluxDB Out] ──→ 告警历史存储 │
│ │ │
│ └──→ [Function: 自动响应] ──→ 断电/限电控制 │
│ │
└───────────────────────────────────────────────────────┘
Terminal window
# 1. 检查 Bot Token 是否正确
# 2. 确认 Chat ID 是否正确
# 3. 检查防火墙是否允许出站到 api.telegram.org
curl https://api.telegram.org/bot{TOKEN}/getMe
# 4. 测试发送消息
curl -X POST \
https://api.telegram.org/bot{TOKEN}/sendMessage \
-H "Content-Type: application/json" \
-d '{"chat_id":"{CHAT_ID}","text":"Test message"}'

建议至少配置两个渠道(如 Telegram + Email),Telegram 用于即时通知,Email 用于详细报告和记录。

Q2: 如何避免非工作时间被告警打扰?

Section titled “Q2: 如何避免非工作时间被告警打扰?”

在告警 Flow 中增加时间段过滤,仅在工作时间发送非紧急告警,紧急告警(如过载)始终发送。

告警数据可以同时写入 InfluxDB,在 Grafana 中创建告警历史面板查看趋势。

推荐做法:

  • 实现告警抑制避免消息轰炸
  • 配置告警升级处理持续异常
  • 至少使用两个通知渠道
  • 记录告警历史用于事后分析

避免做法:

  • 无抑制机制导致告警风暴
  • 所有告警使用相同优先级
  • 告警信息过于冗长或无价值
  • 通知渠道单一(渠道故障则丢失告警)
  1. Telegram Bot 提供实时即时消息通知
  2. Email 提供详细告警报告
  3. 告警抑制防止同一告警频繁发送
  4. 告警升级处理持续未恢复的异常
  5. 告警系统应与阈值检测和自动响应联动