def config(conf): collectd.info('------ config ------') for node in conf.children: key = node.key.lower() val = node.values[0] if key == 'host': host = val elif key == 'port': port = int(val) elif key == 'db': db = int(val) elif key == 'key': key = val else: collectd.warning('redis_info plugin: Unknown config key: %s' % key) continue CONFIGS.append({ 'host': host, 'port': port, 'db': db, 'key': key, })
def logger(t, msg): if t == 'err': collectd.error('%s: %s' % (NAME, msg)) if t == 'warn': collectd.warning('%s: %s' % (NAME, msg)) elif t == 'verb' and VERBOSE_LOGGING == True: collectd.info('%s: %s' % (NAME, msg))
def read(): #collectd.info("read") for name in CONFIG: while True: #collectd.info("querying: " + name) try: switch = ENV.get_switch(name) v1 = collectd.Values(plugin='wemo') v1.type = 'power' v1.type_instance = 'power' v1.plugin_instance = name power = switch.current_power/1000.0 collectd.info("Got power from %s = %fW" % (name, power)) v1.values = [power] v1.dispatch() except UnknownDevice: collectd.error("Unknown device: " + name) except ConnectionError: ENV.start() ENV.discover() continue break env = None
def debug(param): """ debug messages and understand if we're in collectd or a program """ if DEBUG: if __name__ != '__main__': collectd.info("%s: DEBUG %s" % (PLUGIN_NAME, param)) else: sys.stderr.write("%s\n" % param)
def wait_for_collection(): while True: if os.path.isfile(HBASE_METRICS_TMP_FILE) and 'load' in open(HBASE_METRICS_TMP_FILE).read(): break else: collectd.info("Sleeping...") time.sleep(1)
def config(conf): global instances # get through the nodes under <Module "..."> for node in conf.children: if node.key == "Instance": # if the instance is named, get the first given name if len(node.values): if len(node.values) > 1: collectd.info("%s: Ignoring extra instance names (%s)" % (__name__, ", ".join(node.values[1:])) ) instance = node.values[0] # else register an empty name instance else: instance = '' _collects = dict(collects) # get the stats to collect for child in node.children: # get the stat collection name if child.key.find("Collect") == 0: collection = child.key[7:].lower() else: collection = child.key.lower() # check if this collection is known if collection in collects: _collects[collection] = True else: collectd.warning("%s: Ignoring unknown configuration option (%s)" % (__name__, child.key)) # add this instance to the dict of instances instances[instance] = _collects continue # unknown configuration node collectd.warning("%s: Ignoring unknown node type (%s)" % (__name__, node.key))
def read(data=None): vl = collectd.Values(plugin='mountedDrives') vl.host=os.uname()[1] # obtain the output of df output = subprocess.check_output("df") # split out the lines lines = output.splitlines() #/dev/sda1 487727216 11126468 452186748 3% / #/dev/sde1 240362656 180626476 47526380 80% /mnt/hd #/dev/sda6 478169232 236302260 217931280 53% /home # need to send 3 values, total, size, pcntUsed for each line # ignoring the first (the header), parse for line in lines[1:]: filesystem, size, used, avail, usePcnt, mountPoint = line.split() # TODO: allow volumes from config if filesystem not in ["udev", "none", "tmpfs"]: # strip out /dev/, replace / to _ fs = re.sub("dev/","", filesystem[1:]) if mountPoint == "/": mountPoint = "/ROOT" mount = re.sub("/","_",mountPoint) usePcnt = re.sub("%","", usePcnt) vl.type='df' vl.type_instance="{}{}".format(fs,mount) vl.values = [ size, used, usePcnt ] if Verbose: collectd.info("collectDf: Dispatch %s%s [ %s, %s, %s ]" % (fs, mount, size,used,usePcnt) ) vl.dispatch()
def log_verbose(msg): if not MYSQL_CONFIG['Verbose']: return if COLLECTD_ENABLED: collectd.info('mysql plugin: %s' % msg) else: print('mysql plugin: %s' % msg)
def write_stats(values, types, base_path=None, client=None): """ Actually write the stats to statsd! """ for idx, value in enumerate(values.values): value = int(value) if base_path is None: base_path = stats_path(values) # Append the data source name, if any if len(values.values) > 1: path = '.'.join((base_path, types[values.type][idx]['name'])) else: path = base_path collectd.info('%s: %s = %s' % (values.plugin, path, value)) if client is not None: # Intentionally *not* wrapped in a try/except so that an # exception here causes collectd to slow down trying to write # stats. client.gauge(path, value) else: # No statsd client, be noisy message = 'Statsd client is None, not sending metrics!' collectd.warning(message) # Raise an exception so we aren't *too* noisy. raise RuntimeError(message)
def kairosdb_connect(data): #collectd.info(repr(data)) if not data['conn'] and protocol == 'http': data['conn'] = httplib.HTTPConnection(data['host'], data['port']) return True elif not data['conn'] and protocol == 'https': data['conn'] = httplib.HTTPSConnection(data['host'], data['port']) return True elif not data['conn'] and protocol == 'telnet': # only attempt reconnect every 10 seconds if protocol of type Telnet now = time() if now - data['last_connect_time'] < 10: return False data['last_connect_time'] = now collectd.info('connecting to %s:%s' % (data['host'], data['port'])) try: data['conn'] = socket.socket(socket.AF_INET, socket.SOCK_STREAM) data['conn'].connect((data['host'], data['port'])) return True except: collectd.error('error connecting socket: %s' % format_exc()) return False else: return True
def configure_callback(conf): """Received configuration information""" global SOLR_HOST, SOLR_PORT, SOLR_INSTANCES, VERBOSE_LOGGING for node in conf.children: if node.key == "Instance": # if the instance is named, get the first given name if len(node.values): if len(node.values) > 1: collectd.info("%s: Ignoring extra instance names (%s)" % (__name__, ", ".join(node.values[1:])) ) SOLR_INSTANCE = node.values[0] # else register an empty name instance else: SOLR_INSTANCE = 'default' for child in node.children: if child.key == 'Host': SOLR_HOST = child.values[0] elif child.key == 'Port': SOLR_PORT = int(child.values[0]) elif child.key == 'Verbose': VERBOSE_LOGGING = bool(child.values[0]) else: collectd.warning('solr_info plugin: Unknown config key: %s.' % node.key) # add this instance to the dict of instances SOLR_INSTANCES[SOLR_INSTANCE] = "http://" + SOLR_HOST + ":" + str(SOLR_PORT) + "/solr/" + SOLR_INSTANCE continue log_verbose('Configured with host=%s, port=%s, instance=%s' % (SOLR_HOST, SOLR_PORT, SOLR_INSTANCE))
def config(self, obj): """Received configuration information""" for node in obj.children: if node.key == 'Host': self.host = node.values[0] elif node.key == 'Address': self.address = node.values[0] elif node.key == 'Hash': self.myhash = node.values[0] elif node.key == 'User': self.user = node.values[0] elif node.key == 'Password': self.password = node.values[0] elif node.key == 'NoSSL': self.ssl = int(node.values[0]) elif node.key == 'Timeout': self.timeout = node.values[0] elif node.key == 'Verbose': self.verbose = bool(node.values[0]) elif node.key == 'DiskInfo': self.diskInfo = bool(node.values[0]) elif node.key == 'VolInfo': self.volInfo = bool(node.values[0]) elif node.key == 'VdiskInfo': self.vdiskInfo = bool(node.values[0]) elif node.key == 'EnclosureInfo': self.enclosureInfo = bool(node.values[0]) elif node.key == 'ControllerInfo': self.controllerInfo = bool(node.values[0]) else: collectd.warning('P2000 plugin: Unknown config key: %s.' % node.key) if self.myhash: collectd.info('Configured with address=%s, hash=%s, ssl=%s, timeout=%s' % (self.address, self.myhash, self.ssl, self.timeout)) else: collectd.info('Configured with address=%s, user=%s, password=%s, ssl=%s, timeout=%s' % (self.address, self.user, self.password, self.ssl, self.timeout))
def get_ps_ovs_cpu_usage(): """ Returns a cpu usage of ovs-vswitchd (userspace) service in percent. This represents the load generated by data-path 'missed' packets plus internal OVS stuff. """ cpu_usage = 0.0 try: with open("/var/run/openvswitch/ovs-vswitchd.pid", "r") as pid_file: pid = pid_file.read().strip() cmd = ("ps", "-p", pid, "-o" "%cpu") ps_out = Popen(cmd, stdout=PIPE, stderr=PIPE, close_fds=True) std_err = ps_out.stderr.readlines() if std_err: collectd.info("ps wrote to stderr: %s" % std_err) cpu_usage = float(ps_out.stdout.readlines()[-1].strip()) except (OSError, IOError, ValueError) as exc: collectd.error("An error occured while getting cpu usage: %s" % exc) return cpu_usage
def config(self, cfg): if "Module.interval" in cfg: try: self.interval = int(cfg["Module.interval"][0]) collectd.info("MetricWriteTracker.interval == {}".format(self.interval)) except ValueError: collectd.error("module {0}, interval parameter must be an integer".format(self.__module__))
def logger(t, msg): if t == "err": collectd.error("%s: %s" % (NAME, msg)) if t == "warn": collectd.warning("%s: %s" % (NAME, msg)) elif t == "verb" and VERBOSE_LOGGING == True: collectd.info("%s: %s" % (NAME, msg))
def logger(self, t, msg): if t == 'err': collectd.error('%s: %s' % (self.name, msg)) if t == 'warn': collectd.warning('%s: %s' % (self.name, msg)) elif t == 'verb' and self.verbose: collectd.info('%s: %s' % (self.name, msg))
def _open_fifo(self, path, flags): """Opens the vsys FIFO using given flags or raises VsysOpenException. If self._open_nonblock is True, then os.O_NONBLOCK is added to flags. Args: path: str, path to a vsys FIFO. flags: int, flags to use when opening path with os.open. Returns: int, file descriptor for FIFO. Raises: VsysOpenException, if opening path fails. """ collectd.info('Opening: %s' % path) if self._open_nonblock: # NOTE: Open non-blocking, to detect when there is no reader. Or, so # reads can timeout using select or poll. flags |= os.O_NONBLOCK try: return os.open(path, flags) except OSError as err: # If opening for write, the error is likely errno.ENXIO. ENXIO occurs # when no reader has the other end open. e.g. when vsys is not running in # root context. raise VsysOpenException('Opening vsys fifo (%s) failed: %s' % (path, err))
def memsql_write(collectd_sample, data): """ Write handler for collectd. This function is called per sample taken from every plugin. It is parallelized among multiple threads by collectd. """ if data.exiting: return if data.node is not None: throttled_update_alias(data, collectd_sample) # get the value types for this sample types = data.typesdb if collectd_sample.type not in types: collectd.info('memsql_writer: do not know how to handle type %s. do you have all your types.db files configured?' % collectd_sample.type) return value_types = types[collectd_sample.type] if len(value_types) != len(collectd_sample.values): collectd.info('memsql_writer: differing number of values for type %s' % collectd_sample.type) return # for each value in this sample, insert it into the cache for (value_type, value) in zip(value_types, collectd_sample.values): cache_value(value, value_type[0], value_type[1], collectd_sample, data)
def read(): """ Makes API calls to Couchbase and records metrics to collectd. """ for module_config in CONFIGS: for request_type in module_config['api_urls']: collectd.info("Request type " + request_type + " for responce: " + module_config['api_urls'].get(request_type)) resp_obj = _api_call(module_config['api_urls'].get(request_type), module_config['opener']) if resp_obj is None: continue # 1. Prepare dimensions list collect_target = module_config['plugin_config'].get( 'CollectTarget') dimensions = _build_dimensions(collect_target, module_config) collectd.debug("Using dimensions:") collectd.debug(pprint.pformat(dimensions)) # 2. Parse metrics metrics = _parse_metrics(resp_obj, dimensions, request_type, module_config) # 3. Post metrics _post_metrics(metrics, module_config)
def run(self): if self.data.config.skipdiskusage: return while not self._stop.isSet(): time.sleep(5) if self.data.node is None: continue with self._disk_usage_lock: disk_usage = copy.deepcopy(self._disk_usage) for label, info in disk_usage.iteritems(): if self.data.config.skipcolumnardiskusage and label == "segments": continue try: with open(os.devnull, 'w') as devnull: p = subprocess.Popen(['du', '-k', '-s', info['path']], stderr=devnull, stdout=subprocess.PIPE) out = p.communicate()[0] size, _ = out.strip().split(None, 1) size = int(size) * 1024 with self._disk_usage_lock: if label in self._disk_usage: self._disk_usage[label]['bytes'] = size except (OSError, IndexError, ValueError) as e: collectd.info('Error in DiskUsageWorker: ' + str(e)) collectd.info(traceback.format_exc()) with self._disk_usage_lock: if label in self._disk_usage: self._disk_usage[label]['bytes'] = None
def configure(config_values): ''' Load information from configuration file ''' global PLUGIN_CONFIG collectd.info('Configuring RabbitMQ Plugin') for config_value in config_values.children: collectd.info("%s = %s" % (config_value.key, len(config_value.values) > 0)) if len(config_value.values) > 0: if config_value.key == 'Username': PLUGIN_CONFIG['username'] = config_value.values[0] elif config_value.key == 'Password': PLUGIN_CONFIG['password'] = config_value.values[0] elif config_value.key == 'Host': PLUGIN_CONFIG['host'] = config_value.values[0] elif config_value.key == 'Port': PLUGIN_CONFIG['port'] = config_value.values[0] elif config_value.key == 'Realm': PLUGIN_CONFIG['realm'] = config_value.values[0] elif config_value.key == 'Ignore': type_rmq = config_value.values[0] PLUGIN_CONFIG['ignore'] = {type_rmq: []} for regex in config_value.children: PLUGIN_CONFIG['ignore'][type_rmq].append( re.compile(regex.values[0]))
def memsql_parse_types_file(path, data): """ This function tries to parse a collectd compliant types.db file. Basically stolen from collectd-carbon. """ types = data.typesdb f = open(path, 'r') for line in f: fields = line.split() if len(fields) < 2: continue type_name = fields[0] if type_name[0] == '#': continue v = [] for ds in fields[1:]: ds = ds.rstrip(',') ds_fields = ds.split(':') if len(ds_fields) != 4: collectd.info('memsql_writer: cannot parse data source %s on type %s' % ( ds, type_name )) continue v.append(ds_fields) types[type_name] = v f.close()
def run(self): """Read the internal queue and send the msg to the AMQP client (self.__amqp_send) """ collectd.info("2AMQP: Starting %s..." % self.getName()) self.__amqp_connect() while True: msg = self.job_queue.get() # stop worker (ThreadManager.stop()) if msg[0] == 1: collectd.info("2AMQP: %s - Stopping..." % self.getName()) self.__amqp_disconnect() break self.__amqp_send(msg={ "host": msg[1].host, "interval": msg[1].interval, "plugin": msg[1].plugin, "plugin_instance": msg[1].plugin_instance, "time": msg[1].time, "type": msg[1].type, "type_instance": msg[1].type_instance, "values": msg[1].values } )
def configurator(collectd_conf): """ configure the cadvisor metrics collector options: host: ip of target mesos host port: port of target mesos host config_file: path to cadvisor.yaml """ global client collectd.info('Loading CAdvisorMetrics plugin') config = {} for item in collectd_conf.children: key = item.key.lower() val = item.values[0] if key == 'host': config['host'] = val elif key == 'port': config['port'] = int(val) elif key == 'configfile': config['config_file'] = val else: collectd.warning('cadvisor plugin: unknown config key {} = {}'.format(item.key, val)) client = CAdvisorMetrics(config)
def _read_data(self, service_actions, connection): """ Read data from the FRITZ!Box The data is read from all services & actions defined in SERVICE_ACTIONS. This function returns a dict in the following format: {value_instance: (value_type, value)} """ # Don't try to gather data if the connection is not available if connection is None: return {} # Construct a dict: # {(plugin_instance, value_instance): (value_type, value)} from the # queried results and applies a value conversion (if defined) values = {} for service_action in service_actions: index = 0 while True: parameters = {service_action.index_field: index} \ if service_action.index_field else {} if self._verbose: collectd.info("fritzcollectd: Calling action: " "{} {} {}".format(service_action.service, service_action.action, parameters)) readings = connection.call_action( service_action.service, service_action.action, **parameters) if not readings: if self._verbose: collectd.info("fritzcollectd: No readings received") break plugin_instance = [self._plugin_instance] if service_action.instance_field: readings.update(parameters) plugin_instance.append('{}{}'.format( service_action.instance_prefix, readings[service_action.instance_field] )) plugin_instance = '-'.join(filter(None, plugin_instance)) values.update({ # pragma: no branch (plugin_instance, value.value_instance): ( value.value_type, self.CONVERSION.get(action_argument, lambda x: x)( readings[action_argument]) ) for (action_argument, value) in service_actions[service_action].items() }) if not service_action.index_field: break index += 1 return values
def close_plugin(): '''This will clean up all opened connections''' global ser if ser is not None: ser.close() collectd.info("ravencollectd: Serial port closed.") else: collectd.debug("ravencollectd: Asking to close serial port, but it was never open.")
def stop(self): """Stop the threads (workers) via messaging (safe) """ collectd.info("2AMQP: Stopping threads...") for i in self.thrpool: self.job_queue.put((1, None))
def configure(configobj): global INTERVAL config = {c.key: c.values for c in configobj.children} INTERVAL = 10 if 'interval' in config: INTERVAL = config['interval'][0] collectd.info('gnocchi_status: Interval: {}'.format(INTERVAL)) collectd.register_read(read, INTERVAL)
def logger(t, msg): if t == 'err': collectd.error('%s: %s' % (NAME, msg)) elif t == 'warn': collectd.warning('%s: %s' % (NAME, msg)) elif t == 'info': collectd.info('%s: %s' % (NAME, msg)) else: collectd.notice('%s: %s' % (NAME, msg))
def shutdown(self): """Shutdown callback""" # pylint: disable=broad-except collectd.info("SHUTDOWN") try: self._writer.flush() except Exception as exc: if collectd is not None: collectd.error('Exception during shutdown: %s' % exc)
def connect_redis(self): try: retry_flag = True retry_count = 0 password_flag = False while retry_flag and retry_count < 3: try: if password_flag: self.redis_client = red.StrictRedis(host=self.host, port=self.port, password=self.password, db=0) else: self.redis_client = red.StrictRedis(host=self.host, port = self.port, db=0) server_details = self.redis_client.info(section="server") retry_flag = False collectd.info("Connection to Redis successfull in attempt %s" % (retry_count)) except Exception as e: collectd.error("Retry after 5 sec as connection to Redis failed in attempt %s" % (retry_count)) retry_count += 1 if (not password_flag) and retry_count == 3: retry_count = 0 password_flag = True time.sleep(5) except Exception as e: collectd.error("Exception in the connect_redis due to %s" % e) return
def config(config): global server_address server_address = '' #'/tmp/fastd-status' for node in config.children: key = node.key.lower() val = node.values[0] if key == 'server_address': server_address = val if server_address == '': server_address = glob.glob('/tmp/fastd*.socket') else: collectd.warning( "ffbs_fastd plugin: Unknown configuration key: %s." % key) continue if server_address == '': collectd.error('ffbs_fastd plugin: No Serveradress configured.') else: collectd.info( "ffbs_fastd plugin: Successful configured with server_address %s." % server_address)
def connect_mongo(self): try: retry_flag = True retry_count = 0 while retry_flag: try: self.conn = MongoClient(self.host + ':' + self.port, username=self.user, password=self.password, authMechanism='SCRAM-SHA-1') self.db = self.conn.admin retry_flag = False collectd.info( "Connection to Mongo successfull in attempt %s" % (retry_count)) except Exception as e: collectd.error( "Retry after 5 sec as connection to Mongo failed in attempt %s" % (retry_count)) retry_count += 1 time.sleep(5) except Exception as e: collectd.error("Exception in the connect_mongo due to %s" % e) return
def _remove_ip_from_unreachable_list(ip): """Remove IP address from the unreachable list and clear its NTP alarms""" # remove from unreachable list if its there if ip and ip in obj.unreachable_servers: eid = obj.base_eid + '=' + ip collectd.debug("%s trying to clear alarm %s" % (PLUGIN, eid)) try: # clear the alarm if its asserted if api.clear_fault(PLUGIN_ALARMID, eid) is True: collectd.info("%s %s:%s alarm cleared " % (PLUGIN, PLUGIN_ALARMID, eid)) else: # alarm does not exist collectd.info("%s %s:%s alarm clear" % (PLUGIN, PLUGIN_ALARMID, eid)) obj.unreachable_servers.remove(ip) except Exception as ex: collectd.error("%s 'clear_fault' exception ; %s:%s ; %s" % (PLUGIN, PLUGIN_ALARMID, eid, ex))
def writer(input_data=None): collectd.info('my py write called' + str(input_data)) stats_path = '.'.join( filter( None, [ # plugin name input_data.plugin, # plugin instance, if any getattr(input_data, 'plugin_instance', None), # type, if any getattr(input_data, 'type', None), # The name of the type instance input_data.type_instance, ])) for idx, value in enumerate(input_data.values): value = int(value) if len(input_data.values) > 1: path = '.'.join((stats_path, types[input_data.type][idx]['name'])) else: path = stats_path collectd.info('%s: %s = %s' % (input_data.plugin, path, value)) client.gauge(path, value)
def carbon_connect(data): result = False if not data['sock'] and protocol.lower() == 'tcp': # only attempt reconnect every 10 seconds if protocol of type TCP now = time() if now - data['last_connect_time'] < 10: return False data['last_connect_time'] = now collectd.info('connecting to %s:%s' % (data['host'], data['port'])) try: data['sock'] = socket.socket(socket.AF_INET, socket.SOCK_STREAM) data['sock'].connect((host, port)) result = True except: result = False collectd.warning('error connecting socket: %s' % format_exc()) else: # we're either connected, or protocol does not == tcp. we will send # data via udp/SOCK_DGRAM call. result = True return result
def add_disk_data(self): disk_static_data = self.get_disk_static_data() if not disk_static_data: collectd.error( "Plugin disk_stat: Unable to fetch static data for disk and partition" ) return None # get dynamic data disk_dynamic_data = self.get_dick_dynamic_data() if not disk_dynamic_data: collectd.info( "Plugin disk_stat: Unable to fetch dynamic data for disk and partition" ) return None # join data dict_final = self.disk_join_dicts(disk_static_data, disk_dynamic_data) # get aggregated disk data timestamp = int(round(time.time() * 1000)) dict_disk_stats = self.add_disk_aggregate(dict_final) dict_disk_stats[TIMESTAMP] = timestamp # calculate disk rate collectd.info("plugin rate started") self.add_disk_rate(dict_disk_stats) collectd.info("plugin rate ended") # calculate disk latency self.add_disk_latency(dict_disk_stats) collectd.info( "Plugin disk_stat latency: Calculated and added rate parameters successfully." ) collectd.info("Disk data %s: " % str(dict_disk_stats)) return dict_disk_stats
def raise_alarm(): """ Raise Remote Logging Server Alarm. """ repair = 'Ensure Remote Log Server IP is reachable from ' repair += 'Controller through OAM interface; otherwise ' repair += 'contact next level of support.' reason = 'Controller cannot establish connection with ' reason += 'remote logging server.' try: fault = fm_api.Fault( alarm_id=PLUGIN_ALARMID, alarm_state=fm_constants.FM_ALARM_STATE_SET, entity_type_id=fm_constants.FM_ENTITY_TYPE_HOST, entity_instance_id=obj.base_eid, severity=fm_constants.FM_ALARM_SEVERITY_MINOR, reason_text=reason, alarm_type=fm_constants.FM_ALARM_TYPE_1, probable_cause=fm_constants.ALARM_PROBABLE_CAUSE_6, proposed_repair_action=repair, service_affecting=False, suppression=False) alarm_uuid = api.set_fault(fault) if pc.is_uuid_like(alarm_uuid) is False: collectd.error("%s %s:%s set_fault failed:%s" % (PLUGIN, PLUGIN_ALARMID, obj.base_eid, alarm_uuid)) else: collectd.info("%s %s:%s alarm raised" % (PLUGIN, PLUGIN_ALARMID, obj.base_eid)) obj.alarmed = True except: collectd.error("%s %s:%s set_fault exception" % (PLUGIN, PLUGIN_ALARMID, obj.base_eid))
def connect_mysql(self): try: retry_flag = True retry_count = 0 while retry_flag and retry_count < 3: try: db = MySQLdb.connect(host=self.host, user=self.user, passwd=self.password, db='information_schema') self.cur = db.cursor() retry_flag = False collectd.info( "Connection to MySQL successfull in attempt %s" % (retry_count)) except Exception as e: collectd.error( "Retry after 5 sec as connection to MySQL failed in attempt %s" % (retry_count)) retry_count += 1 time.sleep(5) except Exception as e: collectd.error("Exception in the connect_mysql due to %s" % e) return
def get_tcp_buffersize(self): """Returns dictionary with values of tcpWin(low, medium and high), tcpRetrans and tcpResets.""" dict_tcp = {} try: with open("/proc/sys/net/ipv4/tcp_wmem") as tcp_wmem_file: wmem_lines = tcp_wmem_file.readline() except IOError: collectd.error( "Plugin tcp_stats: Could not open file : /proc/sys/net/ipv4/tcp_wmem" ) return None try: with open("/proc/sys/net/ipv4/tcp_rmem") as tcp_rmem_file: rmem_lines = tcp_rmem_file.readline() except IOError: collectd.error( "Plugin tcp_stats: Could not open file : /proc/sys/net/ipv4/tcp_rmem" ) return None read_low, read_medium, read_high = rmem_lines.split("\t") write_low, write_medium, write_high = wmem_lines.split("\t") dict_tcp[READ_TCPWIN_LOW] = round( float(read_low) / FACTOR, FLOATING_FACTOR) dict_tcp[READ_TCPWIN_MEDIUM] = round( float(read_medium) / FACTOR, FLOATING_FACTOR) dict_tcp[READ_TCPWIN_HIGH] = round( float(read_high) / FACTOR, FLOATING_FACTOR) collectd.info( "Plugin tcp_stats: TCP read buffer size got successfully") dict_tcp[WRITE_TCPWIN_LOW] = round( float(write_low) / FACTOR, FLOATING_FACTOR) dict_tcp[WRITE_TCPWIN_MEDIUM] = round( float(write_medium) / FACTOR, FLOATING_FACTOR) dict_tcp[WRITE_TCPWIN_HIGH] = round( float(write_high) / FACTOR, FLOATING_FACTOR) collectd.info( "Plugin tcp_stats: TCP write buffer size got successfully") (status, val_list) = self.get_retransmit_and_reset() if status == SUCCESS: collectd.info( "Plugin tcp_stats: TCP reset and retransmit values got successfully" ) tcp_resets = float(val_list[1]) + float(val_list[2]) dict_tcp[TCPRETRANS] = int(val_list[0]) dict_tcp[TCPRESET] = tcp_resets return dict_tcp
def read_callback(): (agents, qcalls, calls_total) = parse_queues() if agents is not None: for q in agents: n = agents[q] log_debug("Queue agents in %s: %d" % (q, n)) try: dispatch_value('queue_agents_%s' % q, 'agents', n, 'gauge', 'Agents') except Exception as err: collectd.info( 'ERROR dispatching Asterisk plugin data (queue_agents_%s): %s' % (q, str(err))) if qcalls is not None: for q in qcalls: n = qcalls[q] log_debug("Queue calls in %s: %d" % (q, n)) try: dispatch_value('queue_calls_%s' % q, 'calls', n, 'gauge', 'Calls') except Exception as err: collectd.info( 'ERROR dispatching Asterisk plugin data (queue_calls_%s): %s' % (q, str(err))) if calls_total is not None: log_debug("Queue calls total: %d" % calls_total) try: dispatch_value('queue_calls_total', 'calls', calls_total, 'gauge', 'Calls') except Exception as err: collectd.info( 'ERROR dispatching Asterisk plugin data (queue_calls_total): %s' % str(err)) calls = parse_calls() if calls is not None: log_debug("Calls total: %d" % calls) try: dispatch_value('calls_total', 'calls', calls, 'gauge', 'Calls') except Exception as err: collectd.info( 'ERROR dispatching Asterisk plugin data (calls_total): %s' % str(err))
def get_metric_key(): metric_key = "{{instanceid}}" collectd.info(metric_key) if metric_key == "no params file found": collectd.info('no params file found') return None if metric_key == '': collectd.info('no metric_key found') return None return metric_key
def get_resource_guid(): resource_guid = os.popen("getProperty resource_id").read().strip() collectd.info(resource_guid) if resource_guid == "no params file found": collectd.info('no params file found') return None if resource_guid == '': collectd.info('no resource_guid found') return None return resource_guid
def get_property(property): value = os.popen("getProperty " + property).read().strip() collectd.info(property + ': ' + str(value)) if value == "no params file found": collectd.info('no params file found') return None if value == '': collectd.info(property + ' not found') return None return value
def get_metric_key(): metric_key = "{{system_properties.metricKey}}" collectd.info(metric_key) if metric_key == "no params file found": collectd.info('no params file found') return None if metric_key == '': collectd.info('no metric_key found') return None return metric_key
def send_integrity_metric(): collectd.info('send_integrity_metric') resource_guid = get_resource_guid() if resource_guid == None: return state = 'OK' pid = os.popen("pgrep asterisk").read() if pid == '': collectd.info('asterisk not running') state = 'BROKEN' url = 'http://mgmt:8285/api/send/integrity/' + resource_guid + '?metricName=h_integrity&integrity=' + state collectd.info(url) try: urllib2.urlopen(url) except Exception as e: collectd.info('Send metric request failed')
def shutdown(self): collectd.info("write_warp10 plugin: Shutdown: Start") self.flush_timer.cancel() collectd.info("write_warp10 plugin: Shutdown: Timer cancelled") self.flush_timer.join() collectd.info("write_warp10 plugin: Shutdown: Timer thread joined") try: self._flush() except Exception as e: stack_str = repr(traceback.format_exception(*sys.exc_info())) collectd.error('write_warp10 plugin: Failed to post data before ' 'shutdown: %s' % stack_str)
def read_callback(): """ This function is regularly executed by collectd. It is important to minimize the execution time of the function which is why a lot of caching is performed using the environment objects. """ # Walk through the existing environments for name in ENVIRONMENT.keys(): env = ENVIRONMENT[name] collectd.info("read_callback: entering environment: " + name) context = ssl.SSLContext(ssl.PROTOCOL_SSLv23) context.verify_mode = ssl.CERT_NONE # Connects to vCenter Server try: serviceInstance = SmartConnect(host=env["host"], user=env["username"], pwd=env["password"], sslContext=context) except TypeError: ssl._create_default_https_context = ssl._create_unverified_context serviceInstance = SmartConnect(host=env["host"], user=env["username"], pwd=env["password"]) performanceManager = serviceInstance.RetrieveServiceContent( ).perfManager # Walk through all Clusters of Datacenter for datacenter in serviceInstance.RetrieveServiceContent( ).rootFolder.childEntity: if datacenter._wsdlName == "Datacenter": for cluster in datacenter.hostFolder.childEntity: if cluster._wsdlName == "ClusterComputeResource": # Walk throug all hosts in cluster, collect its metrics and dispatch them collectd.info( "read_callback: found %d hosts in cluster %s" % (len(cluster.host), cluster.name)) colletMetricsForEntities(performanceManager, env['host_counter_ids'], cluster.host, cluster._moId) # Walk throug all vms in host, collect its metrics and dispatch them for host in cluster.host: if host._wsdlName == "HostSystem": collectd.info( "read_callback: found %d vms in host %s" % (len(host.vm), host.name)) colletMetricsForEntities( performanceManager, env['vm_counter_ids'], host.vm, cluster._moId)
def clear_alarms(alarm_id_list): """Clear alarm state of all plugin alarms""" found = False for alarm_id in alarm_id_list: try: alarms = api.get_faults_by_id(alarm_id) except Exception as ex: collectd.error("%s 'get_faults_by_id' exception ;" " %s ; %s" % (PLUGIN, alarm_id, ex)) return False if alarms: for alarm in alarms: eid = alarm.entity_instance_id if this_hosts_alarm(obj.hostname, eid) is False: # ignore other host alarms continue if alarm_id == PLUGIN_OAM_PORT_ALARMID or \ alarm_id == PLUGIN_OAM_IFACE_ALARMID or \ alarm_id == PLUGIN_MGMT_PORT_ALARMID or \ alarm_id == PLUGIN_MGMT_IFACE_ALARMID or \ alarm_id == PLUGIN_CLSTR_PORT_ALARMID or \ alarm_id == PLUGIN_CLSTR_IFACE_ALARMID or \ alarm_id == PLUGIN_DATA_PORT_ALARMID or \ alarm_id == PLUGIN_DATA_IFACE_ALARMID: try: if api.clear_fault(alarm_id, eid) is False: collectd.info("%s %s:%s:%s alarm already cleared" % (PLUGIN, alarm.severity, alarm_id, eid)) else: found = True collectd.info("%s %s:%s:%s alarm cleared" % (PLUGIN, alarm.severity, alarm_id, eid)) except Exception as ex: collectd.error("%s 'clear_fault' exception ; " "%s:%s ; %s" % (PLUGIN, alarm_id, eid, ex)) return False if found is False: collectd.info("%s found no startup alarms" % PLUGIN) return True
def collect_data(self): """Collects all data.""" # get static data of disk and part disk_static_data = self.get_static_data() if not disk_static_data: collectd.error( "Plugin disk_stat: Unable to fetch static data for disk and partition" ) return None # get dynamic data disk_dynamic_data = self.get_dynamic_data() if not disk_dynamic_data: collectd.info( "Plugin disk_stat: Unable to fetch dynamic data for disk and partition" ) return None # join data dict_disks = self.join_dicts(disk_static_data, disk_dynamic_data) # Add aggregate key self.add_aggregate(dict_disks) collectd.info( "Plugin disk_stat: Added aggregate information successfully.") # Add common parameters self.add_common_params(dict_disks) collectd.info( "Plugin disk_stat: Added common parameters successfully.") # calculate rate self.add_rate(dict_disks) #calculate differential values READDATA, WRITEDATA, READIOCOUNT, WRITEIOCOUNT self.add_differential_value(dict_disks) # calculate latency self.add_latency(dict_disks) collectd.info( "Plugin disk_stat: Calculated and added rate parameters successfully." ) # set previous data to current data self.prev_data = dict_disks return dict_disks
def config_func(config): """ accept configuration from collectd """ path_set = False for node in config.children: key = node.key.lower() val = node.values[0] if key == 'path': global PATH PATH = val path_set = True if key == 'maxretention': global MAX_RETENTION MAX_RETENTION = val collectd.info('puppet plugin: Using overridden MaxRetention %s' % MAX_RETENTION) else: collectd.info('puppet plugin: Unknown config key "%s"' % key) if path_set: collectd.info('puppet plugin: Using overridden path %s' % PATH) else: collectd.info('puppet plugin: Using default path %s' % PATH)
def reader(): collectd.info('reader called') cmd = "uptime | awk '{print $9 $10 $11}'" collectd.info('cmd called') # os.system("uptime | awk '{print $8 $9 $10}' > /opt/custom_plugins/data.txt") out = '' with open('/opt/custom_plugins/sysdata.txt', 'r') as f: out = f.readline() # p2 = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=True) collectd.info('poen called') # (out, err) = p2.communicate() a, b, c = out.split('\n')[0].split(',') a = float(a) b = float(b) c = float(c) collectd.info('decoded ') # Dispatch value to collectd val = collectd.Values(type='load') val.plugin = 'load_avg' val.dispatch(values=[a, b, c])
def kairosdb_connect(self, data): # collectd.info(repr(data)) if not data['conn'] and self.protocol == 'http': try: collectd.info( "connecting pid=%d host=%s port=%s proto=%s http_timeout=%d" % (os.getpid(), data['host'], data['port'], self.protocol, self.http_timeout)) data['conn'] = httplib.HTTPConnection( data['host'], data['port'], timeout=self.http_timeout) return True except: collectd.error('error connecting to http connection: %s' % format_exc()) return False elif not data['conn'] and self.protocol == 'https': try: collectd.info( "connecting pid=%d host=%s port=%s proto=%s http_timeout=%d" % (os.getpid(), data['host'], data['port'], self.protocol, self.http_timeout)) data['conn'] = httplib.HTTPSConnection( data['host'], data['port'], timeout=self.http_timeout) return True except: collectd.error('error connecting to https connection: %s' % format_exc()) return False elif not data['conn'] and self.protocol == 'telnet': # only attempt reconnect every 10 seconds if protocol of type Telnet now = time() if now - data['last_connect_time'] < 10: return False data['last_connect_time'] = now collectd.info('connecting to %s:%s' % (data['host'], data['port'])) # noinspection PyBroadException try: data['conn'] = socket.socket(socket.AF_INET, socket.SOCK_STREAM) data['conn'].connect((data['host'], data['port'])) return True except: collectd.error('error connecting socket: %s' % format_exc()) return False else: return True
def init_func(): if obj.init_ready() is False: return False obj.hostname = obj.gethostname() obj.base_eid = 'host=' + obj.hostname # Create the interface independent alarm objects. create_interface_alarm_objects() # load monitored interfaces and supported modes if os.path.exists(PLUGIN_CONF_FILE): with open(PLUGIN_CONF_FILE, 'r') as infile: for line in infile: # The PTP interfaces used are specified in the ptp4l.conf # file as [interface]. There may be more than one. # Presently there is no need to track the function of the # interface ; namely mgmnt or oam. if line[0] == '[': interface = line.split(']')[0].split('[')[1] if interface and interface != 'global': interfaces[interface] = _get_supported_modes(interface) create_interface_alarm_objects(interface) if PLUGIN_CONF_TIMESTAMPING in line: obj.mode = line.split()[1].strip('\n') if obj.mode: collectd.info("%s Timestamping Mode: %s" % (PLUGIN, obj.mode)) else: collectd.error("%s failed to get Timestamping Mode" % PLUGIN) else: collectd.error("%s failed to load ptp4l configuration" % PLUGIN) obj.mode = None for key, value in interfaces.items(): collectd.info("%s interface %s supports timestamping modes: %s" % (PLUGIN, key, value)) # remove '# to dump alarm object data # print_alarm_objects() if tsc.nodetype == 'controller': obj.controller = True obj.virtual = obj.is_virtual() obj.init_done = True obj.log_throttle_count = 0 collectd.info("%s initialization complete" % PLUGIN)
def dispatch_data(self, doc_name, result): """Dispatch data to collectd.""" if doc_name == "topicStats": for item in ['messagesIn', 'bytesOut', 'bytesIn', 'totalFetchRequests', 'totalProduceRequests', 'produceMessageConversions',\ 'failedProduceRequests', 'fetchMessageConversions', 'failedFetchRequests', 'bytesRejected']: try: del result[item] except KeyError: pass #collectd.error("Key %s deletion error in topicStats doctype for topic %s: %s" % (item, result['_topicName'], str(err))) collectd.info( "Plugin kafkatopic: Succesfully sent topicStats: %s" % result['_topicName']) elif doc_name == "kafkaStats": for item in ["messagesInPerSec", "bytesInPerSec", "bytesOutPerSec", "isrExpandsPerSec", "isrShrinksPerSec", "leaderElectionPerSec",\ "uncleanLeaderElectionPerSec", "producerRequestsPerSec", "fetchConsumerRequestsPerSec", "fetchFollowerRequestsPerSec"]: try: del result[item] except KeyError: pass #collectd.error("Key %s deletion error in kafkaStats doctype: %s" % (item, str(err))) collectd.info( "Plugin kafkatopic: Succesfully sent %s doctype to collectd." % doc_name) collectd.debug("Plugin kafkatopic: Values dispatched =%s" % json.dumps(result)) elif doc_name == "consumerStats": collectd.info( "Plugin kafkatopic: Succesfully sent consumerStats of consumer group %s of topic %s" % (result['_groupName'], result['_topicName'])) else: collectd.info( "Plugin kafkatopic: Succesfully sent topic %s of partitionStats: %s." % (result['_topicName'], result['_partitionNum'])) utils.dispatch(result)
def send_load_metric(load): collectd.info('send_load_metric: ' + str(load)) resource_guid = get_resource_guid() if resource_guid == None: return query_args = { 'metricName':'h_load', 'load':load } encoded_args = urllib.urlencode(query_args) url = 'http://{{ almip }}:31285/api/send/load/'+resource_guid+'/?'+encoded_args collectd.info(url) collectd.info( urllib2.urlopen(url).read()) dispatch_service_metrics(load, "Load")
def configure(self, config_map): """ Configuration callback for collectd Args: config_map (dict): mapping of config name to value """ collectd.info("Configuring Spark Process Plugin ...") if "MasterPort" not in config_map and "WorkerPorts" not in config_map: raise ValueError( "No port provided for metrics url - \ please provide key 'MasterPort' and/or 'WorkerPorts'" ) return for key, value in config_map.items(): if key == METRIC_ADDRESS: if not _validate_url(value): raise ValueError("URL is not prefixed with http://") self.metric_address = value elif key == "MasterPort": collectd.info("MasterPort detected") self.master_port = str(int(value)) elif key == "WorkerPorts": collectd.info("WorkerPort(s) detected") for port in value: self.worker_ports.append(str(int(port))) elif key == "Dimensions" or key == "Dimension": self.global_dimensions.update(_dimensions_str_to_dict(value)) elif key == "EnhancedMetrics" and value == "True": self.enhanced_flag = True elif key == "IncludeMetrics": _add_metrics_to_set(self.include, value) elif key == "ExcludeMetrics": _add_metrics_to_set(self.exclude, value) collectd.info("Successfully configured Spark Process Plugin ...")
def send_integrity_metric(): collectd.info('send_integrity_metric') metric_key = get_metric_key() if metric_key == None: return state = 'OK' state_int = 1 pid = os.popen("pgrep asterisk").read() if pid == '': collectd.info('asterisk not running') state = 'BROKEN' state_int = 0 url = 'http://{{almip}}:31285/api/send/integrity/' + metric_key + '?metricName=h_integrity&integrity=' + state collectd.info(url) try: urllib2.urlopen(url) except Exception as e: collectd.info('Send metric request failed') dispatch_metric('asterisk', 'integrity', str(state_int))
def config_func(config): ''' Fetch module configuration or set default Value ''' for node in config.children: key = node.key.lower() val = node.values[0] if key == 'url': global URL URL = val elif key == 'private_token': global PRIVATE_TOKEN PRIVATE_TOKEN = val elif key == 'groups': # val format "group_1, group_2" global GROUPS GROUPS = val.split(',') else: collectd.info('gitlab_kpi plugin: Unknown config key "%s"' % key) collectd.info('gitlab_kpi plugin: Using url "%s"' % URL) collectd.info('gitlab_kpi plugin: Using Token "%s"' % PRIVATE_TOKEN) collectd.info('gitlab_kpi plugin: Using Groups %s' % GROUPS)
def configure(configobj): global INTERVAL global cl global queues_to_count config = {c.key: c.values for c in configobj.children} INTERVAL = config['interval'][0] host = config['host'][0] port = int(config['port'][0]) username = config['username'][0] password = config['password'][0] queues_to_count = config['message_count'] collectd.info('rabbitmq_monitoring: Interval: {}'.format(INTERVAL)) cl = Client('{}:{}'.format(host, port), username, password) collectd.info('rabbitmq_monitoring: Connecting to: {}:{} as user:{} password:{}'.format(host, port, username, password)) collectd.info('rabbitmq_monitoring: Counting messages on: {}'.format(queues_to_count)) collectd.register_read(read, INTERVAL)