Exemplo n.º 1
0
 def modules(cc_biz_id, set_id_list=None):
     """获取业务下的模块列表,按机器数量排序"""
     with ignored(Exception):
         ips = set([])
         if set_id_list and (not hasattr(set_id_list, "__iter__")):
             set_id_list = [set_id_list]
         if set_id_list is not None:
             set_id_list = ";".join(map(str, set_id_list))
         module_list = client.cc.get_modules_by_property(
             app_id=cc_biz_id, set_id=set_id_list).get("data") or []
         module_id_list = [
             module_item["ModuleID"] for module_item in module_list
         ]
         host_list = CCBiz._hosts(cc_biz_id).get("data") or []
         if module_id_list:
             module_info = collections.defaultdict(set)
             for host in host_list:
                 if host["ModuleID"] in module_id_list:
                     module_info[host["ModuleID"]].add(host_key(host))
                     module_info["0"].add(host_key(host))
             for set_item in module_list:
                 set_item["host_count"] = len(
                     module_info[set_item["ModuleID"]])
                 set_item["ModuleName"] += ' [%s]' % set_item["host_count"]
             module_list = sorted(module_list,
                                  key=lambda x: x['host_count'],
                                  reverse=True)
             return module_list, len(module_info["0"])
     return [], 0
Exemplo n.º 2
0
 def agent_status(cc_biz_id, host_id_list):
     """获取agent状态信息
     agent状态详细分成4个状态:正常,离线,未安装。已安装,无数据。
     """
     result = collections.defaultdict(int)
     with ignored(Exception):
         ip_info_list = list()
         if not hasattr(host_id_list, "__iter__"):
             host_id_list = [host_id_list]
         for host_id in host_id_list:
             ip, plat_id = parse_host_id(host_id)
             plat_id = plat_id if plat_id != "1" else "0"
             ip_info_list.append({"ip": ip, "plat_id": plat_id})
         if not ip_info_list:
             return {}
         status_list = client.job.get_agent_status(
             app_id=cc_biz_id, ip_infos=ip_info_list,
             is_real_time=1).get("data") or []
         for info in status_list:
             plat_id = info["plat_id"]
             ip = info["ip"]
             host_id = host_key(ip=ip, plat_id=plat_id)
             exist = bool(info["status"])
             if not exist:
                 result[host_id] = AGENT_STATUS.NOT_EXIST
                 continue
             else:
                 result[host_id] = AGENT_STATUS.ON
     return result
Exemplo n.º 3
0
 def sets(cc_biz_id):
     """获取业务下SET列表,按机器数量排序"""
     # 获取set列表
     with ignored(Exception):
         ips = set([])
         set_list = client.cc.get_sets_by_property(
             app_id=cc_biz_id).get("data") or []
         set_id_list = [set_item["SetID"] for set_item in set_list]
         host_list = CCBiz._hosts(cc_biz_id).get("data") or []
         if set_id_list:
             set_info = collections.defaultdict(set)
             for host in host_list:
                 if host["SetID"] in set_id_list:
                     set_info[host["SetID"]].add(host_key(host))
                     set_info["0"].add(host_key(host))
             for set_item in set_list:
                 set_item["host_count"] = len(set_info[set_item["SetID"]])
                 set_item["SetName"] += ' [%s]' % set_item["host_count"]
             set_list = sorted(set_list,
                               key=lambda x: x['host_count'],
                               reverse=True)
             return set_list, len(set_info["0"])
     return [], 0
Exemplo n.º 4
0
 def set_module_ips(cc_biz_id, set_id="", module_id=""):
     """获取业务下大区下模块的ip列表,根据平台id和ip组合去重"""
     ips = set(list())
     with ignored(Exception):
         if module_id:
             result = client.cc.get_module_host_list(app_id=cc_biz_id,
                                                     module_id=module_id)
         else:
             result = client.cc.get_hosts_by_property(app_id=cc_biz_id,
                                                      set_id=set_id)
         if result["result"]:
             for host in (result.get("data") or []):
                 ips.add(host_key(host))
     return sorted(list(set(ips)))
Exemplo n.º 5
0
def get_agent_status(request, cc_biz_id):
    """获取agent安装状态

    :param request:
    :param cc_biz_id:
    :return:
    """
    hosts_list = CCBiz.hosts(cc_biz_id).get('data') or []
    hostid_list = [host_key(h) for h in hosts_list]
    agent_status_info = CCBiz.agent_status(cc_biz_id, hostid_list)
    if hosts_list and not len(agent_status_info):
        return render_json({
            'result': False,
            'message': u"查询agent状态接口失败",
            'data': []
        })
    agent_fail_cnt = agent_ok_cnt = 0
    ok_ip_list = list()
    fail_ip_list = list()
    for hostid, status in agent_status_info.iteritems():
        ip, plat_id = parse_host_id(hostid)
        if status == AGENT_STATUS.ON:
            agent_ok_cnt += 1
            ok_ip_list.append({'ip': ip, 'cc_plat_id': plat_id})
        else:
            agent_fail_cnt += 1
            fail_ip_list.append({'ip': ip, 'cc_plat_id': plat_id})
    details = {
        'agent_ok_cnt': agent_ok_cnt,
        'agent_fail_cnt': agent_fail_cnt,
        'ok_ip_list': ok_ip_list,
        'fail_ip_list': fail_ip_list,
    }
    res = {
        'result': True,
        'message': "",
        'data': agent_ok_cnt,
        'details': details
    }

    return render_json(res)
Exemplo n.º 6
0
def index(request, cc_biz_id):
    """
    request.method:
    get:    基础性能页面首页
    post:   主机性能信息,包括主机属性,主机各指标信息。
    """
    def get_access_status(cc_biz_id):
        """
        获取数据平台基础性能接入进展
        :return: {
            "accessed": True,
            "div_message": u"正在接入中",
            "btn_message": u"确定"
        }
        """
        btn_message = u"确定"
        div_message = u"您的性能指标采集任务已下发正在进行中,请稍候再试!"
        # step 1 获取该业务下基础性能接入的data_id
        data_id_exist = any(get_bp_data_id(cc_biz_id))
        # step 2 如果data_id 不存在,则直接返回未接入
        if not data_id_exist:
            div_message = (u"检测到您的业务尚未开启监控,"
                           u"点击下面按钮开启主机指标采集,\n"
                           u"10-20分钟后刷新此页面即可查看到主机数据,请耐心等待!\n"
                           u"(未安装agent的主机,"
                           u"在主机详情页中按照指引完成部署并开启数据采集。)")
            btn_message = u"开始采集"
        return {
            "accessed": data_id_exist,
            "div_message": div_message,
            "btn_message": btn_message
        }

    data = {
        "hosts": [],
        "update_time": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
    }
    if request.method == "GET":
        count_per_page = 10
        # 获取云区域信息
        plat_info = CCBiz.plat_info(cc_biz_id)
        checkhost_id = request.GET.get("alarm_strategy_id")
        return render_mako_context(request, '/monitor/performanceV2/home.html',
                                   locals())
    ts = TimeStats(u"get_host_info_list: %s" % cc_biz_id)
    # 获取所有主机信息(前台分页)
    ts.split(u"获取主机列表")
    hosts = CCBiz.hosts(cc_biz_id).get("data") or []
    host_ids = set([host_key(h) for h in hosts])
    ts.split(u"获取主机agent状态")
    hosts_agent_status = CCBiz.agent_status(cc_biz_id, host_ids)
    default_status_info = {
        'alarm': {
            'level': [0, 0, 0],
        },
        'component': [],
    }
    try:
        # 主机性能数据结果表字典,当性能获取需要查询之前查过的结果表的时候,
        # 直接使用已有的client,节省查询时间。
        # cpu 总使用率
        ts.split(u"获取主机cpu 总使用率")
        cpu_usage_info = HostIndex.get_host_performance(
            cc_biz_id,
            item="cpuusage",
            category="cpu",
            result_table_id="ja_gse_cpu_cpuusage")
        # CPU 5分钟负载
        ts.split(u"获取主机cpu 5分钟负载")
        cpu_load_info = HostIndex.get_host_performance(
            cc_biz_id,
            item="locdavg",
            category="cpu",
            result_table_id="ja_gse_cpu")
        # cpu 单核使用率
        ts.split(u"获取主机cpu 单核使用率")
        cpu_single_usage_info = HostIndex.get_host_performance(
            cc_biz_id,
            item="cpuusage",
            category="cpu",
            result_table_id="ja_gse_cpu_core_cpuusage")
        # 磁盘使用量 (暂时隐藏)
        # disk_used_info = get_host_performance(cc_biz_id, item="used_percent",
        # category="disk", result_table_id="ja_gse_disk_used")
        # IO等待
        ts.split(u"获取主机IO等待")
        io_util_info = HostIndex.get_host_performance(
            cc_biz_id,
            item="util",
            category="disk",
            result_table_id="ja_gse_disk_iostats")
        host_list = list()
        ts.split(u"处理整合所有数据")
        for h in hosts:
            host = Host(h)
            for k, v in default_status_info.iteritems():
                setattr(host, k, v)
            host.is_stickied = MonitorHostSticky.objects.host_is_stickied(
                host.id)
            # 基础性能数据
            host.cpu_usage = cpu_usage_info.get(host.id)
            host.cpu_single_usage = cpu_single_usage_info.get(host.id)
            host.cpu_load = cpu_load_info.get(host.id)
            host.io_util = io_util_info.get(host.id)
            if host.io_util and isinstance(host.io_util.get("val"), list):
                io_util = dict()
                for _io_util in host.io_util["val"]:
                    for k, v in _io_util.iteritems():
                        io_util[k] = v
                host.io_util["val"] = max(io_util.values())
            host.status = hosts_agent_status.get(host.id, AGENT_STATUS.UNKNOWN)
            index_list = [
                host.cpu_usage, host.cpu_single_usage, host.cpu_load,
                host.io_util
            ]
            if host.status == AGENT_STATUS.ON and not any(index_list):
                host.status = AGENT_STATUS.NO_DATA
            # 控制前端页面展示的参数
            # 是否被选中
            host.checked = False
            # 是否显示(按属性分组时)
            host._is_show = True
            host_list.append(host)
        host_list.sort(key=lambda x: x.status, reverse=True)
        data["hosts"] = host_list
        return render_json(ok_data(data))
    except (TableNotExistException, SqlQueryException) as e:
        # 前端引导用户接入
        if not hosts:
            data["need_access"] = False
            data["access_div_message"] = (
                u"检测到当前业务没有主机,请前往 %s 快速部署主机!" %
                href_link(u"Agent安装", AGENT_SETUP_URL))
            data["access_btn_message"] = u"确定"
        else:
            host_list = list()
            for h in hosts:
                host = Host(h)
                for k, v in default_status_info.iteritems():
                    setattr(host, k, v)
                host.is_stickied = MonitorHostSticky.objects.host_is_stickied(
                    host.id)
                # 基础性能数据
                host.cpu_usage = None
                host.cpu_single_usage = None
                host.cpu_load = None
                host.io_util = None
                host.status = hosts_agent_status.get(host.id,
                                                     AGENT_STATUS.UNKNOWN)
                if host.status == AGENT_STATUS.ON:
                    host.status = AGENT_STATUS.NO_DATA
                # 控制前端页面展示的参数
                # 是否被选中
                host.checked = False
                # 是否显示(按属性分组时)
                host._is_show = True
                host_list.append(host)
            host_list.sort(key=lambda x: x.status, reverse=True)
            data["hosts"] = host_list
            access_status = get_access_status(cc_biz_id)
            data["need_access"] = not access_status.get("accessed")
            if isinstance(e, SqlQueryException):
                logger.exception(u"数据平台查询失败:%s" % e)
                data["access_div_message"] = u"数据查询异常,请联系管理员"
            else:
                data["access_div_message"] = access_status.get("div_message")
            data["access_btn_message"] = access_status.get("btn_message")
        return render_json(failed_data(u"", data))
    # except SqlQueryException as e:
    #     logger.exception(u"数据平台查询失败:%s" % e)
    #     return render_json(failed_data(u"数据查询异常", data))
    except Exception as e:
        logger.exception(u"拉取主机性能信息失败: %s" % e)
        return render_json(failed_data(u"系统错误", data))
    finally:
        ts.stop()
        time_stats_info = ts.display()
        logger.warning(time_stats_info)