def main(): unis_client = UNISInstance(unis_url=settings.UNIS_URL) unis_client.register_service_to_unis() s = Scheduler() while 1: start_time = time.time() s.check_settings() s.check_probes() s.check_procs() sleep_time = (start_time + settings.CHECK_INTERVAL) - time.time() if sleep_time > 0.001: time.sleep(sleep_time)
def get_interface_subjects(self): netdev = self._proc.open('net', 'dev') faces = [] subjects = {} type_map = {'ipv4': netifaces.AF_INET, 'ipv6': netifaces.AF_INET6, 'mac': netifaces.AF_LINK} for line in netdev: line = line.split() if line[0].count(":"): faces.append(line[0][:line[0].index(":")]) unis = UNISInstance(unis_url=self.unis_url) for face in faces: post_dict = {} try: capacity = ethtool.get_speed(face) except OSError: capacity = 0 # assume each port is a layer2 port for the main 'address' try: l2_addr = netifaces.ifaddresses(face)[type_map['mac']] if len(l2_addr): addr = {"type": "mac", "address": l2_addr[0]['addr']} post_dict['address'] = addr except: pass # add all the other address info we can find post_dict['properties'] = {} for t in type_map: try: addrs = netifaces.ifaddresses(face)[type_map[t]] for a in addrs: addr = {"type": t, "address": a['addr']} post_dict['properties'][t] = addr except: pass ### some sort of verification here that capacity is right post_dict['name'] = face post_dict['capacity'] = capacity # hack in a 'nodeRef' so we can find port from rspec post_dict['nodeRef'] = settings.URN_STRING[:-1] resp = unis.post_port(post_dict) if isinstance(resp, dict): subjects[face]=resp['selfRef'] return subjects
def __init__(self, service, measurement): self.config = measurement["configuration"] self.service = service self.unis = UNISInstance(service) self.pname = None self.pidfile = None self.id = None try: self.serviceType = self.config["service_type"] except Exception: logger.error("__init__", msg="Must specify service_type!") try: self.accessPoint = self.config["service_accesspoint"] except Exception: logger.error("__init__", msg="Must specify access point!") try: self.pidfile = self.config.get("pidfile", None) except Exception: logger.warn("__init__", msg="Config does not specify pidfile") self.pname = self.config.get("process_name", None) # check for existing service given accessPoint and serviceType try: service = self.unis.get("/services?accessPoint=%s&serviceType=%s" % (self.accessPoint, self.serviceType)) if service and len(service): self.id = service[0]["id"] except Exception as e: logger.error("%s" % e)
def __init__(self, initial_config={}, node_id=None, urn=None): if not node_id: node_id = settings.UNIS_ID self.node_id = node_id self.urn = urn self.config = initial_config self.unis = UNISInstance(self.config) self.service_setup = False
def __init__(self, initial_config={}, node_id=None, urn=None): if not node_id: node_id = settings.UNIS_ID self.node_id = node_id self.urn = urn self.config = initial_config self.unis = UNISInstance(self.config) self.node_setup = False self.service_setup = False self.exponential_backoff = int(self.config["properties"]["configurations"]["unis_poll_interval"])
def __init__(self, service, measurement): self.config = measurement["configuration"] self.service = service self.measurement = measurement self.collections_created = False self.ms = MSInstance(service, measurement) self.dl = DataLogger(service, measurement) self.mids = {} # {subject1: {metric1:mid, metric2:mid}, subj2: {...}} # {mid: [{"ts": ts, "value": val}, {"ts": ts, "value": val}]} self.mid_to_data = {} self.mid_to_et = {} self.unis = UNISInstance(service) self.num_collected = 0
def __init__(self, service, measurement): self.service = service self.measurement = measurement self.config = measurement["configuration"] logger.debug('Probe.__init__', config=self.config) self._proc = Proc(self.config.get("proc_dir", "/proc/")) self.node_subject=self.config.get( "subject", self.config.get("runningOn", {}).get('href', 'not found')) self.port_match_method=self.config.get( "port_match_method", "geni_utils.mac_match") self.port_match_method=blipp_import_method(self.port_match_method) self.unis = UNISInstance(service) logger.debug('Probe.__init__ ', subject=self.node_subject) self.subjects=self.get_interface_subjects()
def __init__(self, service, measurement): self.service = service self.measurement = measurement self.config = measurement["configuration"] self.unis = UNISInstance(service) self.command = self._substitute_command(str(self.config.get("command")), self.config) # TODO: get the latest ts and store it and use it as query bound next run. query_url = 'http://*****:*****@r*Buve5' query_db = "db=collectd" query_table = self.config.get("table") query_every = self.config.get("schedule_params").get("every") query = "q=select * from {0} where time > now() - {1}s;".format(query_table, query_every) self.command = ['curl', '-GET', query_url, '--data-urlencode', query_db, '--data-urlencode', query]
class ServiceConfigure(object): ''' ServiceConfigure is meant to be a generic class for any service which registers itself to, and gets configuration from UNIS. It was originally developed for BLiPP, but BLiPP specific features should be in the BlippConfigure class which extends ServiceConfigure. ''' def __init__(self, initial_config={}, node_id=None, urn=None): if not node_id: node_id = settings.UNIS_ID self.node_id = node_id self.urn = urn self.config = initial_config self.unis = UNISInstance(self.config) self.service_setup = False def initialize(self): self._setup_node(self.node_id) self._setup_service() def refresh(self): r = self.unis.get("/services/" + self.config["id"]) if not r: logger.warn('refresh', msg="refresh failed") else: self.config = r def _setup_node(self, node_id): config = self.config logger.debug('_setup_node', config=pprint.pformat(config)) hostname = settings.HOSTNAME urn = settings.HOST_URN if not self.urn else self.urn if node_id: r = self.unis.get("/nodes/" + str(node_id)) if not r: logger.warn('_setup_node', msg="node id %s not found" % node_id) node_id = None if not node_id: r = self.unis.get("/nodes?urn=" + urn) if r and len(r): r = r[0] logger.info('_setup_node', msg="found node with our URN and id %s" % r["id"]) else: r = self.unis.post("/nodes", data={ "$schema": settings.SCHEMAS["nodes"], "name": hostname, "urn": urn}) if r: self.node_id = r["id"] if r: config["runningOn"] = { "href": r["selfRef"], "rel": "full"} self.node_setup = True else: config["runningOn"] = {"href": ""} logger.warn('_setup_node', msg="Unable to set up node in UNIS") def _setup_service(self): config = self.config logger.debug('_setup_service', config=pprint.pformat(config)) r = None if config.get("id", None): r = self.unis.get("/services/" + config["id"]) if not r: logger.warn('_setup_service', msg="service id not specified or not found "\ "unis instance ...querying for service") rlist = self.unis.get("/services?name=" + config.get("name", None) +\ "&runningOn.href=" + config["runningOn"]["href"] + "&limit=2") # loop over the returned services and find one that # doesn't return 410 see # https://uisapp2.iu.edu/jira-prd/browse/GEMINI-98 if rlist: for i in range(len(rlist)): r = self.unis.get('/services/' + rlist[i]["id"]) if r: if isinstance(r, list): logger.warn('_setup_service', msg="id not unique... taking first result") r = r[0] logger.info('_setup_service', msg="%s service found with id %s" % (config["name"], r["id"])) break else: logger.warn('_setup_service', msg="no service found by id or querying "\ "...creating new service") if r: merge_dicts(config, r) # always update UNIS with the merged config if config.get("id", None): r = self.unis.put("/services/" + config["id"], data=config) else: r = self.unis.post("/services", data=config) if r: merge_dicts(config, r) if r: self.service_setup = True else: logger.warn('_setup_service', msg="unable to set up service in UNIS") def get(self, key, default=None): try: return self.config[key] except KeyError: return default def __getitem__(self, key): ''' This allows an object which is an instance of this class to behave like a dictionary when queried with [] syntax ''' return self.config[key]
class Collector: """Collects reported measurements and aggregates them for sending to MS at appropriate intervals. Also does a bunch of other stuff which should probably be handled by separate classes. Creates all the metadata objects, and the measurement object in UNIS for all data inserted. Depends directly on the MS and UNIS... output could be far more modular. """ def __init__(self, service, measurement): self.config = measurement["configuration"] self.service = service self.measurement = measurement self.collections_created = False self.ms = MSInstance(service, measurement) self.dl = DataLogger(service, measurement) self.mids = {} # {subject1: {metric1:mid, metric2:mid}, subj2: {...}} # {mid: [{"ts": ts, "value": val}, {"ts": ts, "value": val}]} self.mid_to_data = {} self.mid_to_et = {} self.unis = UNISInstance(service) self.num_collected = 0 def insert(self, data, ts): ''' Called (by probe_runner) to insert new data into this collector object. ''' mids = self.mids for subject, met_val in data.iteritems(): if "ts" in met_val: ts = met_val["ts"] del met_val["ts"] for metric, value in met_val.iteritems(): if metric not in self.measurement["eventTypes"]: self._add_et(metric) if not metric in mids.get(subject, {}): r = self.unis.find_or_create_metadata(subject, metric, self.measurement) mids.setdefault(subject, {})[metric] = r["id"] self.mid_to_data[r["id"]] = [] self.mid_to_et[mids[subject][metric]] = metric self._insert_datum(mids[subject][metric], ts, value) self.num_collected += 1 if self.num_collected >= self.config["reporting_params"]: ret = self.report() if ret: self.num_collected = 0 def _insert_datum(self, mid, ts, val): item = dict({"ts": ts * 10e5, "value":val}) self.mid_to_data[mid].append(item) def _add_et(self, metric): self.measurement["eventTypes"].append(metric) r = self.unis.put("/measurements/" + self.measurement["id"], data=self.measurement) if r: self.measurement = r def report(self): ''' Send all data collected so far, then clear stored data. ''' post_data = [] for mid, data in self.mid_to_data.iteritems(): if len(data): post_data.append({"mid":mid, "data":data}) ms_ret = self.ms.post_data(post_data) dl_ret = self.dl.write_data(post_data, self.mid_to_et) if not ms_ret and not dl_ret and self.num_collected < self.config["reporting_tolerance"] * self.config["reporting_params"]: return None self._clear_data() return True def _clear_data(self): for mid in self.mid_to_data: self.mid_to_data[mid]=[]
class Probe: TTL_DEFAULT = 600 def __init__(self, service, measurement): self.config = measurement["configuration"] self.service = service self.unis = UNISInstance(service) self.pname = None self.pidfile = None self.id = None try: self.serviceType = self.config["service_type"] except Exception: logger.error("__init__", msg="Must specify service_type!") try: self.accessPoint = self.config["service_accesspoint"] except Exception: logger.error("__init__", msg="Must specify access point!") try: self.pidfile = self.config.get("pidfile", None) except Exception: logger.warn("__init__", msg="Config does not specify pidfile") self.pname = self.config.get("process_name", None) # check for existing service given accessPoint and serviceType try: service = self.unis.get("/services?accessPoint=%s&serviceType=%s" % (self.accessPoint, self.serviceType)) if service and len(service): self.id = service[0]["id"] except Exception as e: logger.error("%s" % e) def get_data(self): stat = "UNKNOWN" # check pidfile if self.pidfile: pid = None try: self.pidfile = open(self.config["pidfile"]) pid = self.pidfile.read().rstrip() except IOError: logger.warn("__init__", msg="Could not open pidfile: %s" % self.config["pidfile"]) if pid: try: os.kill(int(pid), 0) stat = "ON" except OSError, err: if err.errno == errno.ESRCH: stat = "OFF" elif err.errno == errno.EPERM: logger.warn("get_data", msg="No permission to signal this process: %s" % pid) else: logger.warn("get_data", msg="Uknown error: %s" % error.errno) #We could assume if the pidfile exists, the process is running #if stat is "UNKNOWN" and os.path.exists("/proc/"+pid): # stat = "ON" self.pidfile.close() # check process name, this takes priority if self.pname: processes = psutil.process_iter() for p in processes: if p.name() == self.pname: stat = "ON" self.send_service(status=stat) return []
class Probe: """Get network statistics """ UNUSED_METRICS = ["errs_in", "errs_out", "drop_in", "drop_out", "fifo_in", "fifo_out", "frame_in", "compressed_in", "compressed_out", "multicast_in", "colls_out", "colls_in", "carrier_out", "carrier_in", "frame_out", "multicast_out"] def __init__(self, service, measurement): self.service = service self.measurement = measurement self.config = measurement["configuration"] logger.debug('Probe.__init__', config=self.config) self._proc = Proc(self.config.get("proc_dir", "/proc/")) self.node_subject=self.config.get( "subject", self.config.get("runningOn", {}).get('href', 'not found')) self.port_match_method=self.config.get( "port_match_method", "geni_utils.mac_match") self.port_match_method=blipp_import_method(self.port_match_method) self.unis = UNISInstance(service) logger.debug('Probe.__init__ ', subject=self.node_subject) self.subjects=self.get_interface_subjects() def get_data(self): netdev = self._proc.open('net', 'dev') netsnmp = self._proc.open('net', 'snmp') netdev.readline() data = self._get_dev_data(netdev.read()) sdata = self._get_snmp_data(netsnmp.read()) data[self.node_subject] = sdata data = full_event_types(data, EVENT_TYPES) return data def _get_dev_data(self, dev_string): headers_regex = re.compile( '[^|]*\|(?P<rxheaders>[^|]*)\|(?P<txheaders>.*)') dev_lines = dev_string.splitlines() matches = headers_regex.search(dev_lines.pop(0)).groupdict() txheaders = [ head + "_in" for head in matches['txheaders'].split() ] rxheaders = [ head + "_out" for head in matches['rxheaders'].split() ] headers = txheaders + rxheaders data = {} for line in dev_lines: if not line: continue line = line.replace(':', ' ') line = line.split() iface = line.pop(0) face_data = dict(zip(headers, line)) self._vals_to_int(face_data) errors = face_data.pop('errs_in') + face_data.pop('errs_out') drops = face_data.pop('drop_in') + face_data.pop('drop_out') face_data['errors'] = errors face_data['drops'] = drops for metric in self.UNUSED_METRICS: if metric in face_data: del face_data[metric] data[self.subjects[iface]] = face_data return data def _vals_to_int(self, adict): for k,v in adict.items(): adict[k] = int(v) return adict def _get_snmp_data(self, snmp_string): data = {} lines = snmp_string.splitlines() i = 0 while i < len(lines): line = lines[i].split() if line[0].lower()=="tcp:": i += 1 in_index = line.index("InSegs") out_index = line.index("OutSegs") retrans_index = line.index("RetransSegs") dataline = lines[i].split() data.update({"tcp_segments_in":dataline[in_index], "tcp_segments_out":dataline[out_index], "tcp_retrans":dataline[retrans_index]}) elif line[0].lower()=="udp:": i += 1 in_index = line.index("InDatagrams") out_index = line.index("OutDatagrams") dataline = lines[i].split() data.update({"datagrams_in":dataline[in_index], "datagrams_out":dataline[out_index]}) i += 1 return self._vals_to_int(data) def get_interface_subjects(self): subjects = {} unis_ports = self.get_interfaces_in_unis() for face in netifaces.interfaces(): local_port_dict = self._build_port_dict(face) portRef = self._find_or_post_port(unis_ports, local_port_dict, self.port_match_method) if isinstance(portRef, str) or isinstance(portRef, unicode): subjects[face]=portRef else: logger.warn('get_interface_subjects', msg="subject for face %s is of an unexpected type %s, portRef=%s"%(face, type(portRef), portRef)) subjects[face]="unexpected type" return subjects def _build_port_dict(self, port_name): type_map = {'ipv4': netifaces.AF_INET, 'ipv6': netifaces.AF_INET6, 'mac': netifaces.AF_LINK} post_dict = {} try: capacity = ethtool.get_speed(port_name) except OSError: capacity = 0 # assume each port is a layer2 port for the main 'address' try: l2_addr = netifaces.ifaddresses(port_name)[type_map['mac']] if len(l2_addr): addr = {"type": "mac", "address": l2_addr[0]['addr']} post_dict['address'] = addr.strip().replace(':', '').lower() except: pass # add all the other address info we can find post_dict['properties'] = {} for t in type_map: try: addrs = netifaces.ifaddresses(port_name)[type_map[t]] for a in addrs: addr = {"type": t, "address": a['addr']} post_dict['properties'][t] = addr except Exception as e: logger.exc('get_interface_subjects', e) # TODO some sort of verification here that capacity is right post_dict['name'] = port_name post_dict['capacity'] = capacity # hack in a 'nodeRef' so we can find port from rspec post_dict['nodeRef'] = settings.HOST_URN[:-1] return post_dict def get_interfaces_in_unis(self): node = self.unis.get(self.service["runningOn"]["href"]) port_list = node.get('ports', []) ports = [] for port in port_list: ports.append(self.unis.get(port['href'])) return ports def _find_or_post_port(self, ports, local_port, matching_method): for port in ports: if matching_method(port, local_port): return port["selfRef"] post = self.unis.post_port(local_port) if post: return post["selfRef"] else: logger.warn('_find_or_post_port', msg="post seems to have failed... subject for %s will be wrong" % local_port['name']) return "failed"
class Probe: def __init__(self, service, measurement): self.service = service self.measurement = measurement self.config = measurement["configuration"] self.unis = UNISInstance(service) self.command = self._substitute_command(str(self.config.get("command")), self.config) # TODO: get the latest ts and store it and use it as query bound next run. query_url = 'http://*****:*****@r*Buve5' query_db = "db=collectd" query_table = self.config.get("table") query_every = self.config.get("schedule_params").get("every") query = "q=select * from {0} where time > now() - {1}s;".format(query_table, query_every) self.command = ['curl', '-GET', query_url, '--data-urlencode', query_db, '--data-urlencode', query] def get_data(self): proc = subprocess.Popen(self.command, stdout = subprocess.PIPE, stderr = subprocess.PIPE) output = proc.communicate() if not output[0]: raise CmdError(output[1]) try: data = self._extract_data(output[0]) except ValueError as e: logger.exc("get_data", e) return {} # sample data [{'wsu.stor1-href': {ts: <ts>, et: [instance, type, type_instance, value]}}, # {'um.stor2-href': {ts: <ts>, et: [instance, type, type_instance, value]}}] return data def _extract_data(self, stdout): def normalize(values, time_index, host_index, subject_index, event_index, table_name): ''' carve the host and turn it into the subject of the data change name 'time' to 'ts' and remove it from the data ''' if '.'.join([table_name, values[event_index]]) not in EVENT_MAPS: # not event type that we are interested return {} host_name = values[host_index] try: host_obj = self.unis.get("/nodes?name=" + host_name) except ConnectionError: host_obj = None if not host_obj: # host not registered in this domain OR ConnectionError return {} if table_name == ETHTABLE: if 'ports' in host_obj[0]: ports = host_obj[0]['ports'] subject = None for port in ports: port_id = port['href'].split('/')[-1] try: port_obj = self.unis.get("/ports/" + port_id) except ConnectionError: port_obj = None if port_obj and port_obj['name'] == values[subject_index]: subject = port_obj['selfRef'] break if not subject: # none of the eth matches return {} else: # this host has no eth uploaded return {} else : # other tables (disk stat) will aggregate all instances subject = host_obj[0]['selfRef'] event_type = EVENT_MAPS['.'.join([table_name, values[event_index]])] dt = datetime.datetime.strptime(values[time_index], "%Y-%m-%dT%H:%M:%S.%fZ") time = calendar.timegm(dt.timetuple()) + (dt.microsecond / 1000000.0) values = values[-1] return {subject: {'ts': time, event_type: values}} json_output = json.loads(stdout) if not json_output['results'][0]: return [] table_name = json_output['results'][0]['series'][0]['name'] event_column = EVENT_COLUMNS[table_name] i = json_output['results'][0]['series'][0]['columns'].index('time') j = json_output['results'][0]['series'][0]['columns'].index('host') k = json_output['results'][0]['series'][0]['columns'].index('instance') e = json_output['results'][0]['series'][0]['columns'].index(event_column) ret = map(lambda x: normalize(x, i, j, k, e, table_name), json_output['results'][0]['series'][0]['values']) ret = [x for x in ret if x] if table_name == ETHTABLE: pass else: # aggregate all drive on one host # ts is chosen randomly from the same subject-et items tmp = dict() for data in ret: et = filter(lambda x: x != 'ts', data.itervalues().next().keys())[0] if (data.iterkeys().next(), et) in tmp: tmp[(data.iterkeys().next(), et)][et] += data.itervalues().next()[et] else: tmp[(data.iterkeys().next(), et)] = data.itervalues().next() ret = [] for k, v in tmp.iteritems(): ret.append({k[0]: v}) return ret def _substitute_command(self, command, config): ''' command in form "ping $ADDRESS" config should have substitutions like "address": "example.com" Note; now more complex ''' command = shlex.split(command) ret = [] for item in command: if item[0] == '$': if item[1:] in config: val = config[item[1:]] if isinstance(val, bool): if val: ret.append(item[1:]) elif item[1]=="-": ret.append(item[1:]) ret.append(str(val)) else: ret.append(str(val)) elif item: ret.append(item) logger.info('substitute_command', cmd=ret, name=self.config['name']) return ret