Пример #1
0
def update_check_plan(app_config_dict, queue):
    logger = logging.getLogger("task.generator")

    pipe = REDIS_DB.pipeline()
    change_list = pop_multi(pipe, '_app_change_list', 30)
    logger.info(u'发现应用配置发生变动,应用列表:%s', change_list)

    for appname in set(change_list):
        # 获取此应用的最新配置
        new_config = REDIS_DB.hgetall(appname + '_config')
        # 1. 此应用已经被删除了
        if new_config is None:

            app_config_dict.pop(appname, None)
            continue

        item = app_config_dict.get(appname)
        # 2. 此应用是新添加的应用
        if item is None:
            host_list = new_config['host_list']
            for host in host_list.split(','):
                task = CheckTask(appname, host,
                                 int(new_config['check_interval']))
                queue.put_task(task)

        else:
            old_host_list = item['host_list'].split(',')
            new_host_list = new_config['host_list'].split(',')
            for host in (set(new_host_list) - set(old_host_list)):
                task = CheckTask(appname, host,
                                 int(new_config['check_interval']))
                queue.put_task(task)

        # 更新配置
        app_config_dict[appname] = new_config
Пример #2
0
def record_app_status(task, status, info, check_latency, duration):
    pipe = REDIS_DB.pipeline()

    # 1. 获取应用的部分配置
    pipe.hget(task.appname + '_config', 'max_check_attempts')
    pipe.hget(task.appname + '_config', 'notify_interval')
    pipe.hget(task.appname + '_config', 'check_interval')
    pipe.hget(task.appname + '_config', 'parent_app_list')
    res_list = pipe.execute()

    max_check_attempts = int(res_list[0])
    notify_interval = int(res_list[1])
    check_interval = int(res_list[2])
    parent_app_list = res_list[3]

    # 2. 获取上一次状态信息
    key = 'ah_' + task.appname + '_' + task.host

    if not REDIS_DB.exists(key):
        pipe.hset(key, 'current_status', 'OK')
        pipe.hset(key, 'status_info', 'OK - ')
        pipe.hset(key, 'current_attempt', 0)
        pipe.hset(key, 'last_state_change',
                  datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
        pipe.hset(key, 'last_notification',
                  datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

    pipe.execute()

    status_dict = REDIS_DB.hgetall(key)
    current_status = status_dict.pop('current_status')
    current_attempt = status_dict.pop('current_attempt')
    current_attempt = int(current_attempt)

    last_state_change = status_dict.pop('last_state_change')
    last_state_change = datetime.strptime(last_state_change,
                                          '%Y-%m-%d %H:%M:%S')
    last_notification = status_dict.pop('last_notification')
    last_notification = datetime.strptime(last_notification,
                                          '%Y-%m-%d %H:%M:%S')

    if current_status != status or status != 'OK':
        current_attempt = current_attempt + 1
        pipe.hset(key, 'current_attempt', current_attempt)

        if current_attempt >= max_check_attempts:
            current_status = status
            pipe.hset(key, 'current_status', current_status)
            pipe.hset(key, 'status_info', info)
            pipe.hset(key, 'current_attempt', 0)
            pipe.hset(key, 'last_state_change',
                      datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

            # recovery 事件不受notify_interval影响
            if status == 'OK':
                notify_app.apply_async(
                    (task.appname, task.host, 'RECOVERY', status, info))
            margin = total_seconds(datetime.now() - last_notification)
            if status != 'OK' and margin > notify_interval * 60:
                pipe.hset(key, 'last_notification',
                          datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
                notify_app.apply_async(
                    (task.appname, task.host, 'PROBLEM', status, info))

    else:
        pipe.hset(key, 'current_attempt', 0)

    pipe.hset(key, 'duration', duration)
    pipe.hset(key, 'check_latency', check_latency)
    next_scheduled_check = task.plan_time + timedelta(seconds=check_interval *
                                                      60)
    next_scheduled_check = next_scheduled_check.strftime('%Y-%m-%d %H:%M:%S')
    pipe.hset(key, 'next_scheduled_check', next_scheduled_check)

    # -------- 其它特殊情况 (为了方便判断主机是否存活) ------------
    if parent_app_list is not None:
        if parent_app_list.find('HOST_STATUS') != -1:
            key = 'ah_HOST_STATUS_' + task.host
            pipe.hset(key, 'current_status', current_status)

        if parent_app_list.find('SYS_PING') != -1:
            key = 'ah_SYS_PING_' + task.host
            pipe.hset(key, 'current_status', current_status)

    pipe.execute()
Пример #3
0
def record_app_status(task, status, info, check_latency, duration):
    pipe = REDIS_DB.pipeline()

    # 1. 获取应用的部分配置
    pipe.hget(task.appname + '_config', 'max_check_attempts')
    pipe.hget(task.appname + '_config', 'notify_interval')
    pipe.hget(task.appname + '_config', 'check_interval')
    pipe.hget(task.appname + '_config', 'parent_app_list')
    res_list = pipe.execute()
    
    max_check_attempts = int(res_list[0])
    notify_interval = int(res_list[1])
    check_interval = int(res_list[2])
    parent_app_list = res_list[3]
    
    # 2. 获取上一次状态信息
    key = 'ah_' + task.appname + '_' + task.host
    
    if not REDIS_DB.exists(key):
        pipe.hset(key, 'current_status', 'OK')
        pipe.hset(key, 'status_info', 'OK - ')
        pipe.hset(key, 'current_attempt', 0)
        pipe.hset(key, 'last_state_change', datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
        pipe.hset(key, 'last_notification', datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
    
    pipe.execute()
        
    status_dict = REDIS_DB.hgetall(key)
    current_status = status_dict.pop('current_status')
    current_attempt = status_dict.pop('current_attempt')
    current_attempt = int(current_attempt)
    
    last_state_change = status_dict.pop('last_state_change')
    last_state_change = datetime.strptime(last_state_change, '%Y-%m-%d %H:%M:%S')
    last_notification = status_dict.pop('last_notification')
    last_notification = datetime.strptime(last_notification, '%Y-%m-%d %H:%M:%S')
        
    if current_status != status or status != 'OK':
        current_attempt = current_attempt + 1
        pipe.hset(key, 'current_attempt', current_attempt)
        
        if current_attempt >= max_check_attempts:
            current_status = status
            pipe.hset(key, 'current_status', current_status)
            pipe.hset(key, 'status_info', info)
            pipe.hset(key, 'current_attempt', 0)
            pipe.hset(key, 'last_state_change', datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
            
            # recovery 事件不受notify_interval影响
            if status == 'OK':
                notify_app.apply_async((task.appname, task.host, 'RECOVERY', status, info))
            margin = total_seconds(datetime.now() - last_notification)
            if status !='OK' and margin > notify_interval * 60:
                pipe.hset(key, 'last_notification', datetime.now().strftime('%Y-%m-%d %H:%M:%S'))
                notify_app.apply_async((task.appname, task.host, 'PROBLEM', status, info))
                
    else:
        pipe.hset(key, 'current_attempt', 0)
    
    pipe.hset(key, 'duration', duration)
    pipe.hset(key, 'check_latency', check_latency)
    next_scheduled_check = task.plan_time + timedelta(seconds=check_interval * 60)
    next_scheduled_check = next_scheduled_check.strftime('%Y-%m-%d %H:%M:%S')
    pipe.hset(key, 'next_scheduled_check', next_scheduled_check)

    
    # -------- 其它特殊情况 (为了方便判断主机是否存活) ------------
    if parent_app_list is not None:
        if parent_app_list.find('HOST_STATUS') != -1:
            key = 'ah_HOST_STATUS_' + task.host
            pipe.hset(key, 'current_status', current_status)
            
        if parent_app_list.find('SYS_PING') != -1:
            key = 'ah_SYS_PING_' + task.host
            pipe.hset(key, 'current_status', current_status)
    
    pipe.execute()