Featured image of post Django开发告警接口(webhook)对接Altermanager告警

Django开发告警接口(webhook)对接Altermanager告警

告警日志统计告警去重统计告警人分组邮件原理图此项目主要使用开发告警接口,对接告警,实现告警人分组,邮件等告警信息日志,告警信息统计等等开发过程告警产生时间告警恢复时间实例告警名称状态告警级别告警信息知。。。。。。。

告警日志统计

告警去重统计

告警人分组邮件

原理图

此项目主要使用Django开发告警接口,对接Altermanager告警,实现告警人分组,邮件等

告警信息日志,告警信息统计等等

开发过程

model

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from django.db import models


# Create your models here.
class alerts(models.Model):
    startsAt = models.DateTimeField(verbose_name='告警产生时间')
    endsAt = models.DateTimeField(verbose_name='告警恢复时间')
    instance = models.CharField(max_length=50, verbose_name='实例', blank=True)
    alertname = models.CharField(max_length=100, verbose_name='告警名称')
    status = models.CharField(max_length=20, verbose_name='状态', blank=True)
    severity = models.CharField(max_length=20, verbose_name='告警级别', blank=True)
    message = models.CharField(max_length=1000, verbose_name='告警信息', blank=True)
    known = models.BooleanField(default=False, verbose_name='知悉')
    memo = models.CharField(max_length=50, verbose_name='知悉备注', blank=True)

    def __str__(self):
        return self.message

    class Meta:
        db_table = 'alerts'
        verbose_name = '告警日志'
        verbose_name_plural = verbose_name
        # ordering = ['-startsAt']  # 按故障时间倒排


class production(models.Model):
    startsAt = models.DateTimeField(verbose_name='告警产生时间')
    endsAt = models.DateTimeField(verbose_name='告警恢复时间')
    instance = models.CharField(max_length=50, verbose_name='实例', blank=True)
    alertname = models.CharField(max_length=100, verbose_name='告警名称')
    status = models.CharField(max_length=20, verbose_name='状态', blank=True)
    severity = models.CharField(max_length=20, verbose_name='告警级别', blank=True)
    message = models.CharField(max_length=1000, verbose_name='告警信息', blank=True)
    known = models.BooleanField(default=False, verbose_name='知悉')

    # memo = models.CharField(max_length=50, verbose_name='知悉备注', blank=True)

    def __str__(self):
        return self.message

    class Meta:
        db_table = 'production'
        verbose_name = '告警统计'
        verbose_name_plural = verbose_name


class alarmuser(models.Model):
    username = models.CharField(max_length=20, verbose_name='告警人', blank=True)
    useremail = models.EmailField(max_length=20, verbose_name='告警邮件', blank=True)
    group = models.CharField(max_length=20, verbose_name='分组', blank=True)

    def __str__(self):
        return self.username

    class Meta:
        db_table = 'alarmuser'
        verbose_name = '告警人配置'
        verbose_name_plural = verbose_name

注册xadmin后台

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import xadmin

from django.contrib import admin

from xadmin import views
from webhook.models import alerts, production, alarmuser


class GlobalSetting:
    site_title = "长风破浪"
    site_footer = "长风破浪"
    menu_style = "accordion"  # 这个是设置菜单主题
    enable_themes = True
    use_bootswatch = True
    refresh_times = [5, 10, 30, 60]


xadmin.site.register(views.CommAdminView, GlobalSetting)


class AlertsAdmin(object):
    """xadmin的全局配置"""
    site_title = "长风破浪"  # 设置站点标题
    site_footer = "长风破浪"  # 设置站点的页脚
    menu_style = "accordion"  # 设置菜单折叠
    '''设置列表可显示的字段'''
    list_display = ('startsAt', 'endsAt', 'instance', 'alertname', 'status', 'severity', 'message', 'known', 'memo',)

    list_filter = ['status', 'severity', 'startsAt', 'endsAt']
    search_fields = ['instance', 'alertname']
    # list_per_page设置每页显示多少条记录,默认是100条
    list_per_page = 50


class ProductionAdmin(object):
    menu_style = "accordion"  # 设置菜单折叠
    list_display = ('startsAt', 'endsAt', 'instance', 'alertname', 'status', 'severity', 'message', 'known',)

    list_filter = ['status', 'severity', 'startsAt', 'endsAt']
    search_fields = ['instance', 'alertname']
    # list_per_page设置每页显示多少条记录,默认是100条
    list_per_page = 50


class AlarmuserAdmin(object):
    list_display = ('username', 'useremail', 'group')


xadmin.site.register(alerts, AlertsAdmin)
xadmin.site.register(production, ProductionAdmin)
xadmin.site.register(alarmuser, AlarmuserAdmin)

view视图

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
import json
import smtplib
from email.mime.text import MIMEText

import yaml
from dateutil import parser
from django.http import HttpResponse
from webhook.models import alerts as alerts_t, alarmuser
from webhook.models import production
import datetime
from jinja2 import Environment, FileSystemLoader


# 获取html目录
class ParseingTemplate:
    def __init__(self, templatefile):
        self.templatefile = templatefile

    def template(self, **kwargs):
        try:
            env = Environment(loader=FileSystemLoader('templates'))
            template = env.get_template(self.templatefile)
            template_content = template.render(kwargs)
            return template_content
        except Exception as error:
            raise error


# 时区转换(增加八个小时)
def time_zone_conversion(utctime):
    format_time = parser.parse(utctime).strftime('%Y-%m-%dT%H:%M:%SZ')
    time_format = datetime.datetime.strptime(format_time, "%Y-%m-%dT%H:%M:%SZ")
    return str(time_format + datetime.timedelta(hours=8))


# 获取告警人的分组和邮件
def get_email(email_name=None, action=0):
    """
    :param email_name: 发送的邮件列表名
    :param action: 操作类型,0: 查询收件人的邮件地址列表, 1: 查询收件人的列表名称, 2: 获取邮件账号信息
    :return: 根据action的值,返回不通的数据结构
    """
    if action == 0:
        email = alarmuser.objects.filter(group=email_name)
        email_lsit = []
        for i in email:
            email_lsit.append(i.useremail)
        print('显示邮件', email_lsit)
        return email_lsit
    elif action == 1:
        group_list = []
        group = alarmuser.objects.values("group").distinct()
        for i in group:
            group_list.append(i['group'])
        print('显示组:', group_list)


# 获取邮件的地址的配置
def get_email_conf(file, email_name=None, action=0):
    """
    :param file: yaml格式的文件类型
    :param email_name: 发送的邮件列表名
    :param action: 操作类型,0: 查询收件人的邮件地址列表, 1: 查询收件人的列表名称, 2: 获取邮件账号信息
    :return: 根据action的值,返回不通的数据结构
    """
    try:
        with open(file, 'r', encoding='utf-8') as fr:
            read_conf = yaml.safe_load(fr)
            if action == 0:
                for email in read_conf['email']:
                    if email['name'] == email_name:
                        return email['receive_addr']
                    else:
                        print("%s does not match for %s" % (email_name, file))
                else:
                    print("No recipient address configured")
            elif action == 1:
                return [items['name'] for items in read_conf['email']]
            elif action == 2:
                return read_conf['send']
    except KeyError:
        print("%s not exist" % email_name)
        exit(-1)
    except FileNotFoundError:
        print("%s file not found" % file)
        exit(-2)
    except Exception as e:
        raise e


# 发送邮件地址
def sendEmail(title, content, receivers=None):
    if receivers is None:
        receivers = ['chenf-o@glodon.com']
    send_dict = get_email_conf('email.yaml', action=2)
    mail_host = send_dict['smtp_host']
    mail_user = send_dict['send_user']
    mail_pass = send_dict['send_pass']
    sender = send_dict['send_addr']
    print(mail_host, mail_user, mail_pass, sender)
    msg = MIMEText(content, 'html', 'utf-8')
    msg['From'] = "{}".format(sender)
    msg['To'] = ",".join(receivers)
    print(receivers)
    print(msg['To'])
    msg['Subject'] = title
    try:
        smtpObj = smtplib.SMTP_SSL(mail_host, 465)
        smtpObj.login(mail_user, mail_pass)
        smtpObj.sendmail(sender, receivers, msg.as_string())
        print('mail send successful.')
    except smtplib.SMTPException as e:
        print(e)


# 先保存,后发邮件
def webhook(request):
    if request.method == "GET":
        # email = get_email(action=1)
        return HttpResponse('禁止get')
    if request.method == 'POST':
        try:
            request_data = request.body
            print(request_data.decode())
            request_dict = json.loads(request_data.decode('utf-8'))
            alerts = request_dict['alerts']

            prometheus_data = json.loads(request.body)

            for i in alerts:
                msg = i['annotations']['message'] if 'message' in i['annotations'] else 'null'
                if msg == 'null' and 'summary' in i['annotations']:
                    msg = i['annotations']['summary']
                print(i['startsAt'][0:19] + i['status'] + " :" + msg)
                if msg == 'null': print(i)
                ints = i['labels']['instance'] if 'instance' in i['labels'] else 'unknown'

                print(ints + " --- " + i['labels']['alertname'] + " ---- " + i['labels']['severity'])
                print(i['endsAt'])
                a = alerts_t()
                a.startsAt = time_zone_conversion(i['startsAt'])
                str = '0001-01-01'
                print(str in i['endsAt'])
                if (str in i['endsAt']):
                    a.endsAt = '0001-01-01 00:00:00'
                else:
                    a.endsAt = time_zone_conversion(i['endsAt'])
                print(a.endsAt)
                a.instance = ints
                a.alertname = i['labels']['alertname']
                if i['status'] == 'firing':
                    a.status = '告警中'
                if i['status'] == 'resolved':
                    a.status = '已恢复'
                    a.known = True
                a.severity = i['labels']['severity']
                a.message = msg
                a.save()

                startime = time_zone_conversion(i['startsAt'])
                endtime = a.endsAt
                instances = ints
                AlarmObject = production.objects.filter(startsAt=startime, endsAt=endtime, instance=instances)
                print(AlarmObject)
                if AlarmObject.exists():
                    pass
                else:
                    b = production()
                    b.startsAt = time_zone_conversion(i['startsAt'])
                    # print(str in i['endsAt'])
                    if (str in i['endsAt']):
                        b.endsAt = '0001-01-01 00:00:00'
                    else:
                        b.endsAt = time_zone_conversion(i['endsAt'])
                    # print(b.endsAt)
                    b.instance = ints
                    b.alertname = i['labels']['alertname']
                    if i['status'] == 'firing':
                        b.status = '告警中'
                    if i['status'] == 'resolved':
                        b.status = '已恢复'
                        b.known = True
                    b.severity = i['labels']['severity']
                    b.message = msg
                    b.save()
                print('显示a:', a)
                # 时间转换,转换成东八区时间
            for k, v in prometheus_data.items():
                if k == 'alerts':
                    for items in v:
                        if items['status'] == 'firing':
                            items['startsAt'] = time_zone_conversion(items['startsAt'])
                        else:
                            items['startsAt'] = time_zone_conversion(items['startsAt'])
                            items['endsAt'] = time_zone_conversion(items['endsAt'])
            print(prometheus_data)
            team_name = prometheus_data["commonLabels"]["team"]
            print(team_name)
            generate_html_template_subj = ParseingTemplate('email_template_firing.html')
            html_template_content = generate_html_template_subj.template(
                prometheus_monitor_info=prometheus_data
            )
            # 获取收件人邮件列表
            email_list = get_email(email_name=team_name, action=0)
            print(email_list)
            print(prometheus_data['commonLabels']['alertname'])
            sendEmail(
                prometheus_data['commonLabels']['alertname'],
                html_template_content,
                receivers=email_list
            )
            # return "prometheus monitor"
            return HttpResponse(1)
        except Exception as e:
            print(e)
            raise e
        # finally:
        #     return HttpResponse(1)

需要完整代码,请留言

稍后整理完毕传到github上

GitHub - nangongchengfeng/Alerts

屎一样的代码,请大佬忽略。后期会使用falsk 更新