示例#1
0
 def feed(self, port_num, result):
     cur_time = time.time()
     if self.__feed_time is not None:
         diff_time = abs(cur_time - self.__feed_time)
         self._active_keys = sorted(result.keys())
         for _key in self._active_keys:
             if _key not in self._speed:
                 _alt_key = rewrite_key(_key)
                 if _alt_key.count("data"):
                     # magic multiplicator for IB data counter
                     _mult = 4
                     unit = "Byte/s"
                 else:
                     _mult = 1
                     unit = "1/s"
                 self._mult_dict[_key] = _mult
                 self._speed[_key] = hm_classes.MachineVectorEntry(
                     "{}.{}".format(self._get_root_key(port_num), _alt_key),
                     default=0.,
                     info="IB Readout for {} on port {}".format(
                         _key, port_num),
                     unit=unit,
                     base=1000,
                 )
             # update value, update for 2 minutes
             self._speed[_key].update(result[_key] * self._mult_dict[_key] /
                                      diff_time,
                                      valid_until=cur_time + 2 * 60)
     else:
         self._speed = {}
         self._active_keys = []
     self.__feed_time = cur_time
示例#2
0
 def _call(self, cur_time, builder):
     m_vectors = []
     self.__dl.check_freshness()
     if self._ibq_bin:
         _cmd = "{} --counters --errors --details -k -K 2>/dev/null".format(
             process_tools.find_file("ibqueryerrors"))
         _stat, _out = subprocess.getstatusoutput(_cmd)
         self.ibd.feed(_out)
         m_vectors = self.ibd.build_vectors(self.__dl)
     else:
         m_vectors = []
     if False:
         m_vectors.append(
             E(
                 "machine_vector",
                 time="{:d}".format(int(cur_time)),
                 name="im",
                 simple="0",
                 uuid="5f0a0564-913a-40d1-97ee-22151ae13c7f",
             ))
         dummy_v = hm_classes.MachineVectorEntry("test.value",
                                                 default=0,
                                                 info="test entry",
                                                 unit="1",
                                                 base=1,
                                                 factor=1,
                                                 value=4)
         m_vectors[0].append(dummy_v.build_xml(E))
     # print etree.tostring(m_vectors[0], pretty_print=True)
     return m_vectors
示例#3
0
 def _create_machvector(self, builder, cur_time, quota_cache):
     my_vector = builder("values")
     # 10 minutes valid
     valid_until = cur_time + self.Meta.min_time_between_runs * 2
     for dev_name, obj_type, num_id, stuff in quota_cache:
         if obj_type == "group":
             name = self._get_gid_info(num_id,
                                       {}).get("groupname", "unknown")
         else:
             name = self._get_uid_info(num_id, {}).get("login", "unknown")
         block_dict = stuff.get_block_dict()
         pfix = "quota.{}.{}.{}".format(obj_type, name, dev_name)
         my_vector.append(
             hm_classes.MachineVectorEntry(
                 "{}.soft".format(pfix),
                 info="Soft Limit for $2 $3 on $4",
                 default=0,
                 value=block_dict["soft"],
                 factor=1000,
                 base=1000,
                 valid_until=valid_until,
                 unit="B",
             ).build_xml(builder))
         my_vector.append(
             hm_classes.MachineVectorEntry(
                 "{}.hard".format(pfix),
                 info="Hard Limit for $2 $3 on $4",
                 default=0,
                 value=block_dict["hard"],
                 factor=1000,
                 base=1000,
                 valid_until=valid_until,
                 unit="B",
             ).build_xml(builder))
         my_vector.append(
             hm_classes.MachineVectorEntry(
                 "{}.used".format(pfix),
                 info="Used quota for $2 $3 on $4",
                 default=0,
                 value=block_dict["used"],
                 factor=1000,
                 base=1000,
                 valid_until=valid_until,
                 unit="B",
             ).build_xml(builder))
     return my_vector
示例#4
0
 def _vector_entry(v_type, csr):
     # build local vector entry
     service = icswServiceEnum[csr.config_service_enum.enum_name]
     return hm_classes.MachineVectorEntry(
         "icsw.ova.{}.{}.{}.{}".format(
             v_type,
             csr.content_type.model,
             csr.action,
             service.name,
         ),
         info="Ova consumed by {}@{} on {} ({})".format(
             csr.action,
             service.name,
             csr.content_type.model,
             v_type,
         ),
         default=0,
         value=csr.consumed,
         factor=1,
         base=1,
         valid_until=cur_time + 3600,
     ).build_xml(_bldr)
示例#5
0
    def _update_fairshare(self):
        self._update()
        # get user list
        cur_stat, cur_out = call_command("{} -suserl".format(
            self._get_sge_bin("qconf"), ),
                                         log_com=self.log)
        if cur_stat:
            # problem calling, return immediately
            return
        _users = [line.strip() for line in cur_out.split("\n")]
        _fs_tree = self.__sge_info.get_tree().find("fstree")
        if _fs_tree is not None:
            # fairshare tree found
            # check if all users are present
            for _user in _users:
                _user_el = _fs_tree.find(".//node[@name='{}']".format(_user))
                if _user_el is None:
                    _path = global_config[
                        "FAIRSHARE_TREE_NODE_TEMPLATE"].format(
                            project="defaultproject",
                            user=_user,
                        )
                    _shares = global_config["FAIRSHARE_TREE_DEFAULT_SHARES"]
                    self.log(
                        "No user element for user '{}' found, adding node at {} with {:d} shares"
                        .format(
                            _user,
                            _path,
                            _shares,
                        ), logging_tools.LOG_LEVEL_WARN)
                    _cur_stat, _cur_out = call_command(
                        "{} -astnode {}={:d}".format(
                            self._get_sge_bin("qconf"),
                            _path,
                            _shares,
                        ),
                        log_com=self.log)
        else:
            self.log("no fairshare tree element found",
                     logging_tools.LOG_LEVEL_WARN)
        # todo: match user list with sharetree config
        cur_stat, cur_out = call_command("{} -n -c 1 ".format(
            self._get_sge_bin("sge_share_mon"), ),
                                         log_com=self.log)
        _float_re = re.compile("^\d+\.\d+")
        # headers
        drop_com = server_command.srv_command(command="set_vector")
        _bldr = drop_com.builder()
        _rms_vector = _bldr("values")
        # 10 minutes valid
        act_time = int(time.time())
        valid_until = act_time + 10 * 60
        if not cur_stat:
            for _line in cur_out.split("\n"):
                _dict = {}
                for _part in _line.strip().split():
                    _header, _value = _part.split("=", 1)
                    _header = _header.replace("%", "")
                    if _float_re.match(_value):
                        _dict[_header] = float(_value)
                    elif _value.isdigit():
                        _dict[_header] = int(_value)
                    else:
                        _dict[_header] = _value
                # filter
                if _dict["project_name"] == "defaultproject" and _dict.get(
                        "user_name", None):
                    _user = _dict["user_name"]
                    for _t_key, _key, _info in [
                        ("cpu", "cpu", "CPU usage"),
                        ("io", "io", "IO usage"),
                        ("mem", "mem", "Memory usage"),
                        ("ltcpu", "ltcpu", "long target CPU usage"),
                        ("ltio", "ltio", "long target IO usage"),
                        ("ltmem", "ltmem", "long target Memory usage"),
                        ("job_count", "job_count", "Job count"),
                        ("share.short_target", "short_target_share",
                         "short target share"),
                        ("share.long_target", "long_target_share",
                         "long target share"),
                        ("share.actual", "actual_share", "actual share"),
                        ("shares", "shares", "configured shares"),
                        ("level", "level", "level"),
                        ("total", "total", "total"),
                    ]:
                        _rms_vector.append(
                            hm_classes.MachineVectorEntry(
                                "rms.fairshare.{}.{}".format(_user, _t_key),
                                info="{} for user {}".format(_info, _user),
                                default=0.,
                                value=_dict[_key],
                                factor=1,
                                valid_until=valid_until,
                                base=1000,
                            ).build_xml(_bldr))

        drop_com["vector_rms"] = _rms_vector
        drop_com["vector_rms"].attrib["type"] = "vector"
        self.vector_socket.send_unicode(str(drop_com))
示例#6
0
    def generate_slotinfo(self, node_res, run_res):
        act_time = int(time.time())
        # vector socket
        drop_com = server_command.srv_command(command="set_vector")
        _bldr = drop_com.builder()
        _rms_vector = _bldr("values")
        # 10 minutes valid
        valid_until = act_time + 10 * 60

        _queue_names = set()
        _host_names = set()
        _s_time = time.time()
        _host_stats = {}

        # print("*", _owner_dict)
        # print("*", _pe_text, _owner_text, _slots)
        # queue dict
        _queues = {"total": QueueInfo()}
        for _node in node_res.findall(".//node"):
            # print(etree.tostring(_node, pretty_print=True))
            _host = _node.findtext("host")
            _queue = _node.findtext("queue")
            _queue_names.add(_queue)
            _host_names.add(_host)
            _si = _node.findtext("slot_info")
            _su, _sr, _st = (int(_val) for _val in _si.split("/"))
            _state = _node.findtext("state")
            _queues["total"].feed(_st, _sr, _su, _state)
            if _queue not in _queues:
                _queues[_queue] = QueueInfo()
            _queues[_queue].feed(_st, _sr, _su, _state)
            if _host not in _host_stats:
                _host_stats[_host] = QueueInfo()
            _host_stats[_host].feed(_st, _sr, _su, _state)
        # print node_res
        _rms_vector.append(
            hm_classes.MachineVectorEntry(
                "rms.clusterqueues.total",
                info="ClusterQueues defined",
                default=0,
                value=len(_queue_names),
                factor=1,
                valid_until=valid_until,
                base=1000,
            ).build_xml(_bldr))
        _rms_vector.append(
            hm_classes.MachineVectorEntry(
                "rms.hosts.total",
                info="Hosts defined",
                default=0,
                value=len(_host_names),
                factor=1,
                valid_until=valid_until,
                base=1000,
            ).build_xml(_bldr))
        report_list = [
            ("total", "slots defined"),
            ("reserved", "slots reserved"),
            ("used", "slots used"),
            ("free", "slots free"),
            ("error", "instances in error state"),
            ("disabled", "instances in disabled state"),
            ("alarm", "instances in alarm state"),
            ("unknown", "instances in error state"),
            ("count", "instances"),
        ]
        for q_name, q_value in _queues.items():
            # sanitize queue name
            q_name = q_name.replace(".", "_")
            for _key, _info in report_list:
                _rms_vector.append(
                    hm_classes.MachineVectorEntry(
                        "rms.queues.{}.{}".format(q_name, _key),
                        info="{} in queue {}".format(_info, q_name),
                        default=0,
                        value=getattr(q_value, _key),
                        factor=1,
                        valid_until=valid_until,
                        base=1000,
                    ).build_xml(_bldr))

        # accounting records
        total_slots = _queues["total"].total
        # print(etree.tostring(run_res, pretty_print=True))
        _owner_dict = {
            _rms_user.name: {
                "obj": _rms_user,
                "slots": []
            }
            for _rms_user in rms_user.objects.all()
        }
        account_run = rms_accounting_run.objects.create(
            slots_defined=total_slots)

        # print(_owner_dict)
        # running slots info
        for _node in run_res.findall(".//job"):
            _pe_text = _node.findtext("granted_pe")
            _owner_text = _node.findtext("owner")
            if _pe_text == "-":
                _slots = 1
            else:
                _slots = int(_pe_text.split("(")[1].split(")")[0])
            if _owner_text not in _owner_dict:
                new_user = rms_user(name=_owner_text, )
                new_user.save()
                _owner_dict[new_user.name] = {
                    "obj": new_user,
                    "slots": [],
                }
            _owner_dict[_owner_text]["slots"].append(_slots)
        _total = 0
        _records = []
        for _name, _struct in _owner_dict.items():
            _slots = sum(_struct["slots"])
            _records.append(
                rms_accounting_record(
                    rms_accounting_run=account_run,
                    rms_user=_struct["obj"],
                    slots_used=_slots,
                ))
            _total += _slots
            _rms_vector.append(
                hm_classes.MachineVectorEntry(
                    "rms.user.{}.slots".format(_name),
                    info="Slots used by user '{}'".format(_name),
                    default=0,
                    value=_slots,
                    factor=1,
                    valid_until=valid_until,
                    base=1,
                ).build_xml(_bldr))
        # total vector
        _rms_vector.append(
            hm_classes.MachineVectorEntry(
                "rms.user.slots",
                info="Slots used by all users",
                default=0,
                value=_total,
                factor=1,
                valid_until=valid_until,
                base=1,
            ).build_xml(_bldr))
        # create accounting records
        rms_accounting_record.objects.bulk_create(_records)
        drop_com["vector_rms"] = _rms_vector
        drop_com["vector_rms"].attrib["type"] = "vector"
        # for cap_name in self.__cap_list:
        #    self.__server_cap_dict[cap_name](cur_time, drop_com)
        self.vector_socket.send_unicode(str(drop_com))
        # collectd commands
        valid_hosts = {
            _host: _dev
            for _host, _dev in [(_host, self._get_device(_host))
                                for _host in _host_names]
            if _dev is not None and _host in _host_stats
        }
        for _host_name, _dev in valid_hosts.items():
            mach_vect = E.machine_vector(
                time="{:d}".format(act_time),
                simple="0",
                name=_dev.full_name,
                uuid=_dev.uuid,
            )
            q_value = _host_stats[_host_name]
            mach_vect.extend([
                hm_classes.MachineVectorEntry(
                    "rms.slots.{}".format(_key),
                    info="{}".format(_info),
                    default=0,
                    value=getattr(q_value, _key),
                    factor=1,
                    valid_until=valid_until,
                    base=1000,
                ).build_xml(E) for _key, _info in report_list
            ])
            try:
                self.collectd_socket.send_unicode(
                    etree.tostring(mach_vect, encoding="unicode"),
                    zmq.DONTWAIT)
            except:
                self.log(
                    "error sending rms-slot info regarding {} to collectd: {}".
                    format(
                        _dev.full_name,
                        process_tools.get_except_info(),
                    ), logging_tools.LOG_LEVEL_ERROR)
        _e_time = time.time()
        self.log("info handling took {}".format(
            logging_tools.get_diff_time_str(_e_time - _s_time)))
示例#7
0
 def _call(self, cur_time, builder):
     apc_dict = self.do_apc_call()
     if apc_dict and self.Meta.creates_machvector:
         my_vector = builder("values")
         valid_until = cur_time + self.Meta.min_time_between_runs * 2
         for key, value in apc_dict.items():
             if value.split():
                 first_v = value.split()[0]
                 if key == "linev":
                     my_vector.append(
                         hm_classes.MachineVectorEntry(
                             "usv.volt.line",
                             info="Line voltage",
                             default=0.,
                             first_v=float(first_v),
                             base=1000,
                             valid_until=valid_until,
                             unit="Volt",
                         ).build_xml(builder))
                 elif key == "loadpct":
                     my_vector.append(
                         hm_classes.MachineVectorEntry(
                             "usv.percent.load",
                             info="Percent Load Capacity",
                             default=0.,
                             value=float(first_v),
                             base=1,
                             valid_until=valid_until,
                             unit="%",
                         ).build_xml(builder))
                 elif key == "bcharge":
                     my_vector.append(
                         hm_classes.MachineVectorEntry(
                             "usv.percent.charge",
                             info="Battery charge",
                             default=0.,
                             value=float(first_v),
                             base=1,
                             valid_until=valid_until,
                             unit="%",
                         ).build_xml(builder))
                 elif key == "timeleft":
                     my_vector.append(
                         hm_classes.MachineVectorEntry(
                             "usv.time.left",
                             info="Time left in minutes",
                             default=0.,
                             value=float(first_v),
                             base=1,
                             valid_until=valid_until,
                             unit="m",
                         ).build_xml(builder))
                 elif key == "itemp":
                     my_vector.append(
                         hm_classes.MachineVectorEntry(
                             "usv.temp.int",
                             info="Internal temperature",
                             default=0.,
                             value=float(first_v),
                             base=1,
                             valid_until=valid_until,
                             unit="C",
                         ).build_xml(builder))
     else:
         my_vector = None
     return my_vector
示例#8
0
文件: server.py 项目: walong365/icsw
 def _show_meminfo(self, res_list):
     act_time = time.time()
     self.__act_meminfo_line += 1
     valid_entries = [
         entry for entry in res_list
         if entry.entry.find(".//memory_info[@valid='1']") is not None
     ]
     act_meminfo_keys = [entry.name for entry in valid_entries]
     if act_meminfo_keys != self.__last_meminfo_keys or self.__act_meminfo_line > 100:
         self.__act_meminfo_line = 0
         self.__last_meminfo_keys = act_meminfo_keys
         self.log("Memory info mapping: {}".format(", ".join([
             "{:d}: {}".format(act_meminfo_keys.index(key) + 1, key)
             for key in act_meminfo_keys
         ])))
     if hm_classes and self.vector_socket:
         drop_com = server_command.srv_command(command="set_vector")
         mv_valid = act_time + 2 * global_config["MIN_MEMCHECK_TIME"]
         my_vector = drop_com.builder("values")
         # handle removal of old keys, track pids, TODO, FIXME
         old_keys = set(self.mis_dict.keys())
         new_keys = set()
         for entry in valid_entries:
             key = entry.name
             mem_el = entry.entry.find(".//memory_info")
             tot_mem = int(mem_el.text.strip())
             if mem_el.find("details") is not None:
                 for _detail in mem_el.findall("details/mem"):
                     proc_name = _detail.get("name")
                     f_key = (key, proc_name)
                     info_str = "memory usage of {} ({})".format(
                         key, proc_name)
                     if f_key not in self.mis_dict:
                         self.mis_dict[
                             f_key] = hm_classes.MachineVectorEntry(
                                 "mem.icsw.{}.{}".format(key, proc_name),
                                 info=info_str,
                                 default=0,
                                 unit="Byte",
                                 base=1024)
                     self.mis_dict[f_key].update(int(_detail.text))
                     self.mis_dict[f_key].info = info_str
                     self.mis_dict[f_key].valid_until = mv_valid
                     new_keys.add(f_key)
                     my_vector.append(self.mis_dict[f_key].build_xml(
                         drop_com.builder))
             if key not in self.mis_dict:
                 self.mis_dict[key] = hm_classes.MachineVectorEntry(
                     "mem.icsw.{}.total".format(key),
                     info="memory usage of {}".format(key),
                     default=0,
                     unit="Byte",
                     base=1024)
             self.mis_dict[key].update(tot_mem)
             self.mis_dict[key].valid_until = mv_valid
             new_keys.add(key)
             my_vector.append(self.mis_dict[key].build_xml(
                 drop_com.builder))
         drop_com["vector"] = my_vector
         drop_com["vector"].attrib["type"] = "vector"
         self.vector_socket.send_unicode(str(drop_com))
         del_keys = old_keys - new_keys
         if del_keys:
             self.log("removing {} from mis_dict".format(
                 logging_tools.get_plural("key", len(del_keys))))
             for del_key in del_keys:
                 del self.mis_dict[del_key]
     self.log("Memory info: {}".format(" / ".join([
         process_tools.beautify_mem_info(int(
             _el.entry.find(".//memory_info").text),
                                         short=True)
         for _el in valid_entries
     ])))
示例#9
0
    def add_ova_statistics(self, cur_time, drop_com):
        def _vector_entry(v_type, csr):
            # build local vector entry
            service = icswServiceEnum[csr.config_service_enum.enum_name]
            return hm_classes.MachineVectorEntry(
                "icsw.ova.{}.{}.{}.{}".format(
                    v_type,
                    csr.content_type.model,
                    csr.action,
                    service.name,
                ),
                info="Ova consumed by {}@{} on {} ({})".format(
                    csr.action,
                    service.name,
                    csr.content_type.model,
                    v_type,
                ),
                default=0,
                value=csr.consumed,
                factor=1,
                base=1,
                valid_until=cur_time + 3600,
            ).build_xml(_bldr)

        _bldr = drop_com.builder
        # print "*", cur_time, drop_com, _bldr
        my_vector = _bldr("values")
        _total = 0
        _total_ghost = 0
        for _csr in icswEggConsumer.objects.all().select_related(
                "config_service_enum"):
            my_vector.append(_vector_entry("ghost", _csr))
            _total_ghost += _csr.consumed
            if not _csr.ghost:
                my_vector.append(_vector_entry("consume", _csr))
                _total += _csr.consumed
        my_vector.append(
            hm_classes.MachineVectorEntry(
                "icsw.ova.overall.total",
                info="Ova consumed by all actions on all models",
                default=0,
                value=_total,
                factor=1,
                base=1,
                valid_until=cur_time + 3600,
            ).build_xml(_bldr))
        my_vector.append(
            hm_classes.MachineVectorEntry(
                "icsw.ova.overall.ghost",
                info="Ova consumed by all actions on all models (ghost)",
                default=0,
                value=_total_ghost,
                factor=1,
                base=1,
                valid_until=cur_time + 3600,
            ).build_xml(_bldr))
        # add ova per license
        ova_per_lic = icswEggBasket.objects.get_values_per_license_name()
        for lic_id_name, values in ova_per_lic.items():
            lic_id_name = lic_id_name or "global"
            for v_name, v_value in values.items():
                my_vector.append(
                    hm_classes.MachineVectorEntry(
                        "icsw.ova.license.{}.{}".format(lic_id_name, v_name),
                        info="Ova {} for license {}".format(
                            v_name, lic_id_name),
                        default=0,
                        value=v_value,
                        factor=1,
                        base=1,
                        valid_until=cur_time + 3600,
                    ).build_xml(_bldr))
        drop_com["vector_ova"] = my_vector
        drop_com["vector_ova"].attrib["type"] = "vector"