def feed(self, port_num, result): cur_time = time.time() if self.__feed_time is not None: diff_time = abs(cur_time - self.__feed_time) self._active_keys = sorted(result.keys()) for _key in self._active_keys: if _key not in self._speed: _alt_key = rewrite_key(_key) if _alt_key.count("data"): # magic multiplicator for IB data counter _mult = 4 unit = "Byte/s" else: _mult = 1 unit = "1/s" self._mult_dict[_key] = _mult self._speed[_key] = hm_classes.MachineVectorEntry( "{}.{}".format(self._get_root_key(port_num), _alt_key), default=0., info="IB Readout for {} on port {}".format( _key, port_num), unit=unit, base=1000, ) # update value, update for 2 minutes self._speed[_key].update(result[_key] * self._mult_dict[_key] / diff_time, valid_until=cur_time + 2 * 60) else: self._speed = {} self._active_keys = [] self.__feed_time = cur_time
def _call(self, cur_time, builder): m_vectors = [] self.__dl.check_freshness() if self._ibq_bin: _cmd = "{} --counters --errors --details -k -K 2>/dev/null".format( process_tools.find_file("ibqueryerrors")) _stat, _out = subprocess.getstatusoutput(_cmd) self.ibd.feed(_out) m_vectors = self.ibd.build_vectors(self.__dl) else: m_vectors = [] if False: m_vectors.append( E( "machine_vector", time="{:d}".format(int(cur_time)), name="im", simple="0", uuid="5f0a0564-913a-40d1-97ee-22151ae13c7f", )) dummy_v = hm_classes.MachineVectorEntry("test.value", default=0, info="test entry", unit="1", base=1, factor=1, value=4) m_vectors[0].append(dummy_v.build_xml(E)) # print etree.tostring(m_vectors[0], pretty_print=True) return m_vectors
def _create_machvector(self, builder, cur_time, quota_cache): my_vector = builder("values") # 10 minutes valid valid_until = cur_time + self.Meta.min_time_between_runs * 2 for dev_name, obj_type, num_id, stuff in quota_cache: if obj_type == "group": name = self._get_gid_info(num_id, {}).get("groupname", "unknown") else: name = self._get_uid_info(num_id, {}).get("login", "unknown") block_dict = stuff.get_block_dict() pfix = "quota.{}.{}.{}".format(obj_type, name, dev_name) my_vector.append( hm_classes.MachineVectorEntry( "{}.soft".format(pfix), info="Soft Limit for $2 $3 on $4", default=0, value=block_dict["soft"], factor=1000, base=1000, valid_until=valid_until, unit="B", ).build_xml(builder)) my_vector.append( hm_classes.MachineVectorEntry( "{}.hard".format(pfix), info="Hard Limit for $2 $3 on $4", default=0, value=block_dict["hard"], factor=1000, base=1000, valid_until=valid_until, unit="B", ).build_xml(builder)) my_vector.append( hm_classes.MachineVectorEntry( "{}.used".format(pfix), info="Used quota for $2 $3 on $4", default=0, value=block_dict["used"], factor=1000, base=1000, valid_until=valid_until, unit="B", ).build_xml(builder)) return my_vector
def _vector_entry(v_type, csr): # build local vector entry service = icswServiceEnum[csr.config_service_enum.enum_name] return hm_classes.MachineVectorEntry( "icsw.ova.{}.{}.{}.{}".format( v_type, csr.content_type.model, csr.action, service.name, ), info="Ova consumed by {}@{} on {} ({})".format( csr.action, service.name, csr.content_type.model, v_type, ), default=0, value=csr.consumed, factor=1, base=1, valid_until=cur_time + 3600, ).build_xml(_bldr)
def _update_fairshare(self): self._update() # get user list cur_stat, cur_out = call_command("{} -suserl".format( self._get_sge_bin("qconf"), ), log_com=self.log) if cur_stat: # problem calling, return immediately return _users = [line.strip() for line in cur_out.split("\n")] _fs_tree = self.__sge_info.get_tree().find("fstree") if _fs_tree is not None: # fairshare tree found # check if all users are present for _user in _users: _user_el = _fs_tree.find(".//node[@name='{}']".format(_user)) if _user_el is None: _path = global_config[ "FAIRSHARE_TREE_NODE_TEMPLATE"].format( project="defaultproject", user=_user, ) _shares = global_config["FAIRSHARE_TREE_DEFAULT_SHARES"] self.log( "No user element for user '{}' found, adding node at {} with {:d} shares" .format( _user, _path, _shares, ), logging_tools.LOG_LEVEL_WARN) _cur_stat, _cur_out = call_command( "{} -astnode {}={:d}".format( self._get_sge_bin("qconf"), _path, _shares, ), log_com=self.log) else: self.log("no fairshare tree element found", logging_tools.LOG_LEVEL_WARN) # todo: match user list with sharetree config cur_stat, cur_out = call_command("{} -n -c 1 ".format( self._get_sge_bin("sge_share_mon"), ), log_com=self.log) _float_re = re.compile("^\d+\.\d+") # headers drop_com = server_command.srv_command(command="set_vector") _bldr = drop_com.builder() _rms_vector = _bldr("values") # 10 minutes valid act_time = int(time.time()) valid_until = act_time + 10 * 60 if not cur_stat: for _line in cur_out.split("\n"): _dict = {} for _part in _line.strip().split(): _header, _value = _part.split("=", 1) _header = _header.replace("%", "") if _float_re.match(_value): _dict[_header] = float(_value) elif _value.isdigit(): _dict[_header] = int(_value) else: _dict[_header] = _value # filter if _dict["project_name"] == "defaultproject" and _dict.get( "user_name", None): _user = _dict["user_name"] for _t_key, _key, _info in [ ("cpu", "cpu", "CPU usage"), ("io", "io", "IO usage"), ("mem", "mem", "Memory usage"), ("ltcpu", "ltcpu", "long target CPU usage"), ("ltio", "ltio", "long target IO usage"), ("ltmem", "ltmem", "long target Memory usage"), ("job_count", "job_count", "Job count"), ("share.short_target", "short_target_share", "short target share"), ("share.long_target", "long_target_share", "long target share"), ("share.actual", "actual_share", "actual share"), ("shares", "shares", "configured shares"), ("level", "level", "level"), ("total", "total", "total"), ]: _rms_vector.append( hm_classes.MachineVectorEntry( "rms.fairshare.{}.{}".format(_user, _t_key), info="{} for user {}".format(_info, _user), default=0., value=_dict[_key], factor=1, valid_until=valid_until, base=1000, ).build_xml(_bldr)) drop_com["vector_rms"] = _rms_vector drop_com["vector_rms"].attrib["type"] = "vector" self.vector_socket.send_unicode(str(drop_com))
def generate_slotinfo(self, node_res, run_res): act_time = int(time.time()) # vector socket drop_com = server_command.srv_command(command="set_vector") _bldr = drop_com.builder() _rms_vector = _bldr("values") # 10 minutes valid valid_until = act_time + 10 * 60 _queue_names = set() _host_names = set() _s_time = time.time() _host_stats = {} # print("*", _owner_dict) # print("*", _pe_text, _owner_text, _slots) # queue dict _queues = {"total": QueueInfo()} for _node in node_res.findall(".//node"): # print(etree.tostring(_node, pretty_print=True)) _host = _node.findtext("host") _queue = _node.findtext("queue") _queue_names.add(_queue) _host_names.add(_host) _si = _node.findtext("slot_info") _su, _sr, _st = (int(_val) for _val in _si.split("/")) _state = _node.findtext("state") _queues["total"].feed(_st, _sr, _su, _state) if _queue not in _queues: _queues[_queue] = QueueInfo() _queues[_queue].feed(_st, _sr, _su, _state) if _host not in _host_stats: _host_stats[_host] = QueueInfo() _host_stats[_host].feed(_st, _sr, _su, _state) # print node_res _rms_vector.append( hm_classes.MachineVectorEntry( "rms.clusterqueues.total", info="ClusterQueues defined", default=0, value=len(_queue_names), factor=1, valid_until=valid_until, base=1000, ).build_xml(_bldr)) _rms_vector.append( hm_classes.MachineVectorEntry( "rms.hosts.total", info="Hosts defined", default=0, value=len(_host_names), factor=1, valid_until=valid_until, base=1000, ).build_xml(_bldr)) report_list = [ ("total", "slots defined"), ("reserved", "slots reserved"), ("used", "slots used"), ("free", "slots free"), ("error", "instances in error state"), ("disabled", "instances in disabled state"), ("alarm", "instances in alarm state"), ("unknown", "instances in error state"), ("count", "instances"), ] for q_name, q_value in _queues.items(): # sanitize queue name q_name = q_name.replace(".", "_") for _key, _info in report_list: _rms_vector.append( hm_classes.MachineVectorEntry( "rms.queues.{}.{}".format(q_name, _key), info="{} in queue {}".format(_info, q_name), default=0, value=getattr(q_value, _key), factor=1, valid_until=valid_until, base=1000, ).build_xml(_bldr)) # accounting records total_slots = _queues["total"].total # print(etree.tostring(run_res, pretty_print=True)) _owner_dict = { _rms_user.name: { "obj": _rms_user, "slots": [] } for _rms_user in rms_user.objects.all() } account_run = rms_accounting_run.objects.create( slots_defined=total_slots) # print(_owner_dict) # running slots info for _node in run_res.findall(".//job"): _pe_text = _node.findtext("granted_pe") _owner_text = _node.findtext("owner") if _pe_text == "-": _slots = 1 else: _slots = int(_pe_text.split("(")[1].split(")")[0]) if _owner_text not in _owner_dict: new_user = rms_user(name=_owner_text, ) new_user.save() _owner_dict[new_user.name] = { "obj": new_user, "slots": [], } _owner_dict[_owner_text]["slots"].append(_slots) _total = 0 _records = [] for _name, _struct in _owner_dict.items(): _slots = sum(_struct["slots"]) _records.append( rms_accounting_record( rms_accounting_run=account_run, rms_user=_struct["obj"], slots_used=_slots, )) _total += _slots _rms_vector.append( hm_classes.MachineVectorEntry( "rms.user.{}.slots".format(_name), info="Slots used by user '{}'".format(_name), default=0, value=_slots, factor=1, valid_until=valid_until, base=1, ).build_xml(_bldr)) # total vector _rms_vector.append( hm_classes.MachineVectorEntry( "rms.user.slots", info="Slots used by all users", default=0, value=_total, factor=1, valid_until=valid_until, base=1, ).build_xml(_bldr)) # create accounting records rms_accounting_record.objects.bulk_create(_records) drop_com["vector_rms"] = _rms_vector drop_com["vector_rms"].attrib["type"] = "vector" # for cap_name in self.__cap_list: # self.__server_cap_dict[cap_name](cur_time, drop_com) self.vector_socket.send_unicode(str(drop_com)) # collectd commands valid_hosts = { _host: _dev for _host, _dev in [(_host, self._get_device(_host)) for _host in _host_names] if _dev is not None and _host in _host_stats } for _host_name, _dev in valid_hosts.items(): mach_vect = E.machine_vector( time="{:d}".format(act_time), simple="0", name=_dev.full_name, uuid=_dev.uuid, ) q_value = _host_stats[_host_name] mach_vect.extend([ hm_classes.MachineVectorEntry( "rms.slots.{}".format(_key), info="{}".format(_info), default=0, value=getattr(q_value, _key), factor=1, valid_until=valid_until, base=1000, ).build_xml(E) for _key, _info in report_list ]) try: self.collectd_socket.send_unicode( etree.tostring(mach_vect, encoding="unicode"), zmq.DONTWAIT) except: self.log( "error sending rms-slot info regarding {} to collectd: {}". format( _dev.full_name, process_tools.get_except_info(), ), logging_tools.LOG_LEVEL_ERROR) _e_time = time.time() self.log("info handling took {}".format( logging_tools.get_diff_time_str(_e_time - _s_time)))
def _call(self, cur_time, builder): apc_dict = self.do_apc_call() if apc_dict and self.Meta.creates_machvector: my_vector = builder("values") valid_until = cur_time + self.Meta.min_time_between_runs * 2 for key, value in apc_dict.items(): if value.split(): first_v = value.split()[0] if key == "linev": my_vector.append( hm_classes.MachineVectorEntry( "usv.volt.line", info="Line voltage", default=0., first_v=float(first_v), base=1000, valid_until=valid_until, unit="Volt", ).build_xml(builder)) elif key == "loadpct": my_vector.append( hm_classes.MachineVectorEntry( "usv.percent.load", info="Percent Load Capacity", default=0., value=float(first_v), base=1, valid_until=valid_until, unit="%", ).build_xml(builder)) elif key == "bcharge": my_vector.append( hm_classes.MachineVectorEntry( "usv.percent.charge", info="Battery charge", default=0., value=float(first_v), base=1, valid_until=valid_until, unit="%", ).build_xml(builder)) elif key == "timeleft": my_vector.append( hm_classes.MachineVectorEntry( "usv.time.left", info="Time left in minutes", default=0., value=float(first_v), base=1, valid_until=valid_until, unit="m", ).build_xml(builder)) elif key == "itemp": my_vector.append( hm_classes.MachineVectorEntry( "usv.temp.int", info="Internal temperature", default=0., value=float(first_v), base=1, valid_until=valid_until, unit="C", ).build_xml(builder)) else: my_vector = None return my_vector
def _show_meminfo(self, res_list): act_time = time.time() self.__act_meminfo_line += 1 valid_entries = [ entry for entry in res_list if entry.entry.find(".//memory_info[@valid='1']") is not None ] act_meminfo_keys = [entry.name for entry in valid_entries] if act_meminfo_keys != self.__last_meminfo_keys or self.__act_meminfo_line > 100: self.__act_meminfo_line = 0 self.__last_meminfo_keys = act_meminfo_keys self.log("Memory info mapping: {}".format(", ".join([ "{:d}: {}".format(act_meminfo_keys.index(key) + 1, key) for key in act_meminfo_keys ]))) if hm_classes and self.vector_socket: drop_com = server_command.srv_command(command="set_vector") mv_valid = act_time + 2 * global_config["MIN_MEMCHECK_TIME"] my_vector = drop_com.builder("values") # handle removal of old keys, track pids, TODO, FIXME old_keys = set(self.mis_dict.keys()) new_keys = set() for entry in valid_entries: key = entry.name mem_el = entry.entry.find(".//memory_info") tot_mem = int(mem_el.text.strip()) if mem_el.find("details") is not None: for _detail in mem_el.findall("details/mem"): proc_name = _detail.get("name") f_key = (key, proc_name) info_str = "memory usage of {} ({})".format( key, proc_name) if f_key not in self.mis_dict: self.mis_dict[ f_key] = hm_classes.MachineVectorEntry( "mem.icsw.{}.{}".format(key, proc_name), info=info_str, default=0, unit="Byte", base=1024) self.mis_dict[f_key].update(int(_detail.text)) self.mis_dict[f_key].info = info_str self.mis_dict[f_key].valid_until = mv_valid new_keys.add(f_key) my_vector.append(self.mis_dict[f_key].build_xml( drop_com.builder)) if key not in self.mis_dict: self.mis_dict[key] = hm_classes.MachineVectorEntry( "mem.icsw.{}.total".format(key), info="memory usage of {}".format(key), default=0, unit="Byte", base=1024) self.mis_dict[key].update(tot_mem) self.mis_dict[key].valid_until = mv_valid new_keys.add(key) my_vector.append(self.mis_dict[key].build_xml( drop_com.builder)) drop_com["vector"] = my_vector drop_com["vector"].attrib["type"] = "vector" self.vector_socket.send_unicode(str(drop_com)) del_keys = old_keys - new_keys if del_keys: self.log("removing {} from mis_dict".format( logging_tools.get_plural("key", len(del_keys)))) for del_key in del_keys: del self.mis_dict[del_key] self.log("Memory info: {}".format(" / ".join([ process_tools.beautify_mem_info(int( _el.entry.find(".//memory_info").text), short=True) for _el in valid_entries ])))
def add_ova_statistics(self, cur_time, drop_com): def _vector_entry(v_type, csr): # build local vector entry service = icswServiceEnum[csr.config_service_enum.enum_name] return hm_classes.MachineVectorEntry( "icsw.ova.{}.{}.{}.{}".format( v_type, csr.content_type.model, csr.action, service.name, ), info="Ova consumed by {}@{} on {} ({})".format( csr.action, service.name, csr.content_type.model, v_type, ), default=0, value=csr.consumed, factor=1, base=1, valid_until=cur_time + 3600, ).build_xml(_bldr) _bldr = drop_com.builder # print "*", cur_time, drop_com, _bldr my_vector = _bldr("values") _total = 0 _total_ghost = 0 for _csr in icswEggConsumer.objects.all().select_related( "config_service_enum"): my_vector.append(_vector_entry("ghost", _csr)) _total_ghost += _csr.consumed if not _csr.ghost: my_vector.append(_vector_entry("consume", _csr)) _total += _csr.consumed my_vector.append( hm_classes.MachineVectorEntry( "icsw.ova.overall.total", info="Ova consumed by all actions on all models", default=0, value=_total, factor=1, base=1, valid_until=cur_time + 3600, ).build_xml(_bldr)) my_vector.append( hm_classes.MachineVectorEntry( "icsw.ova.overall.ghost", info="Ova consumed by all actions on all models (ghost)", default=0, value=_total_ghost, factor=1, base=1, valid_until=cur_time + 3600, ).build_xml(_bldr)) # add ova per license ova_per_lic = icswEggBasket.objects.get_values_per_license_name() for lic_id_name, values in ova_per_lic.items(): lic_id_name = lic_id_name or "global" for v_name, v_value in values.items(): my_vector.append( hm_classes.MachineVectorEntry( "icsw.ova.license.{}.{}".format(lic_id_name, v_name), info="Ova {} for license {}".format( v_name, lic_id_name), default=0, value=v_value, factor=1, base=1, valid_until=cur_time + 3600, ).build_xml(_bldr)) drop_com["vector_ova"] = my_vector drop_com["vector_ova"].attrib["type"] = "vector"