def configure(self, args): if args.config_file: try: self.config = YamlConfig(args.config_file) if 'default' not in self.config.keys(): log("Error, you must have a default section in config file (for now)" ) exit(1) except Exception as exception: raise SystemExit( "Error while reading configuration file: {0}".format( exception.message)) else: config_data = """ default: vsphere_host: "{0}" vsphere_user: "******" vsphere_password: "******" ignore_ssl: {3} collect_only: vms: True datastores: True hosts: True """.format(os.environ.get('VSPHERE_HOST'), os.environ.get('VSPHERE_USER'), os.environ.get('VSPHERE_PASSWORD'), os.environ.get('VSPHERE_IGNORE_SSL', False)) self.config = yaml.load(config_data) self.config['default']['collect_only']['hosts'] = os.environ.get( 'VSPHERE_COLLECT_HOSTS', True) self.config['default']['collect_only'][ 'datastores'] = os.environ.get('VSPHERE_COLLECT_DATASTORES', True) self.config['default']['collect_only']['vms'] = os.environ.get( 'VSPHERE_COLLECT_VMS', True)
def __init__(self, args): try: self.config = YamlConfig(args.config_file) if 'default' not in self.config.keys(): print("Error, you must have a default section in config file") exit(1) except: raise SystemExit("Error, cannot read configuration file")
def load(): global config for path in search_path: path = os.path.abspath(os.path.expanduser(path)) if os.path.exists(path): config = YamlConfig(path) return raise RuntimeError("No configs found in search path: " + str(search_path))
def get_config(self, configurationfile): defaults = {} if os.path.exists(configurationfile): try: config = YamlConfig(configurationfile, defaults) except IOError as e: logging.error("Couldn't open configuration file: " + str(e)) return config else: logging.error("Config file doesn't exist: " + configurationfile) exit(0)
def configure(self, args): if args.config_file: try: self.config = YamlConfig(args.config_file) if 'default' not in self.config.keys(): logging.error("Error, you must have a default section in config file (for now)") exit(1) return except Exception as exception: raise SystemExit("Error while reading configuration file: {0}".format(exception.message)) self.config = { 'default': { 'vsphere_host': os.environ.get('VSPHERE_HOST'), 'vsphere_user': os.environ.get('VSPHERE_USER'), 'vsphere_password': os.environ.get('VSPHERE_PASSWORD'), 'ignore_ssl': get_bool_env('VSPHERE_IGNORE_SSL', False), 'specs_size': os.environ.get('VSPHERE_SPECS_SIZE', 5000), 'collect_only': { 'vms': get_bool_env('VSPHERE_COLLECT_VMS', True), 'vmguests': get_bool_env('VSPHERE_COLLECT_VMGUESTS', True), 'datastores': get_bool_env('VSPHERE_COLLECT_DATASTORES', True), 'hosts': get_bool_env('VSPHERE_COLLECT_HOSTS', True), 'snapshots': get_bool_env('VSPHERE_COLLECT_SNAPSHOTS', True), } } } for key in os.environ.keys(): if key == 'VSPHERE_USER': continue if not key.startswith('VSPHERE_') or not key.endswith('_USER'): continue section = key.split('_', 1)[1].rsplit('_', 1)[0] self.config[section.lower()] = { 'vsphere_host': os.environ.get('VSPHERE_{}_HOST'.format(section)), 'vsphere_user': os.environ.get('VSPHERE_{}_USER'.format(section)), 'vsphere_password': os.environ.get('VSPHERE_{}_PASSWORD'.format(section)), 'ignore_ssl': get_bool_env('VSPHERE_{}_IGNORE_SSL'.format(section), False), 'specs_size': os.environ.get('VSPHERE_{}_SPECS_SIZE'.format(section), 5000), 'collect_only': { 'vms': get_bool_env('VSPHERE_{}_COLLECT_VMS'.format(section), True), 'vmguests': get_bool_env('VSPHERE_{}_COLLECT_VMGUESTS'.format(section), True), 'datastores': get_bool_env('VSPHERE_{}_COLLECT_DATASTORES'.format(section), True), 'hosts': get_bool_env('VSPHERE_{}_COLLECT_HOSTS'.format(section), True), 'snapshots': get_bool_env('VSPHERE_{}_COLLECT_SNAPSHOTS'.format(section), True), } }
def get_config(configfile): # get the config from the config file and environment config = YamlConfig(configfile) config['refresh_interval'] = os.getenv('REFRESH_INTERVAL', config['refresh_interval']) if os.getenv('OS_PROM_CONFIGMAP_NAME'): config['configmap_name'] = os.environ['OS_PROM_CONFIGMAP_NAME'] else: logging.error("No configmap name in environment!") exit(1) if os.getenv('NAMESPACE'): config['namespace'] = os.environ['NAMESPACE'] if os.getenv('region'): config['region'] = os.environ['region'].lower() else: logging.error("No region in environment!") exit(1) return config
# metrics coverage (i.e. we get the metrics quicker than the averaging time) loop_sleep_time = 0.9 * \ self.configs['main']['interval'] - \ (loop_end_time - loop_start_time) if loop_sleep_time < 0: logging.warn('getting the metrics takes around ' + str( self.configs['main']['interval']) + ' seconds or longer - please increase the interval setting') loop_sleep_time = 0 logging.debug('====> loop end before sleep: %s' % datetime.now()) time.sleep(int(loop_sleep_time)) logging.debug('====> total loop end: %s' % datetime.now()) if __name__ == "__main__": # config file parsing parser = argparse.ArgumentParser() parser.add_argument( "-c", "--config", help="Specify config file", metavar="FILE", required=True) parser.add_argument( "-t", "--type", help="The type of exporter [VM, versions, datastores]", required=True) args, remaining_argv = parser.parse_known_args() config = YamlConfig(args.config, VcenterExporter.defaults) if args.type.upper() not in VcenterExporter.supported_types: sys.exit("Current supported exporter types [--t] are " + ', '.join(VcenterExporter.supported_types)) vcenter_exporter = VcenterExporter(config, args.type) vcenter_exporter.collect_metrics()
def main(): # config file parsing parser = argparse.ArgumentParser() parser.add_argument("-c", "--config", help="Specify config file", metavar="FILE") parser.add_argument("-d", "--datastore", help="Get metrics for datastores instead of vms", action='store_true') args, remaining_argv = parser.parse_known_args() config = YamlConfig(args.config, defaults) # list of vm properties we are using and which we get via property collector later # see: http://goo.gl/fjTEpW for all properties. # once for vms and once for datastores ... and some other stuff, which differs for the two cases if args.datastore == False: my_properties = [ "runtime.powerState", "runtime.host", "config.annotation", "config.name", "config.instanceUuid", "config.guestId", "summary.config.vmPathName" ] my_name = "vm" my_obj_type = vim.VirtualMachine else: my_properties = [ "summary.accessible", "summary.capacity", "summary.freeSpace", "summary.maintenanceMode", "summary.name", "summary.type", "summary.url", "overallStatus" ] my_name = "datastore" my_obj_type = vim.Datastore # set default log level if not defined in config file if config.get('main').get('log'): logger.setLevel( logging.getLevelName(config.get('main').get('log').upper())) else: logger.setLevel('INFO') FORMAT = '[%(asctime)s] [%(levelname)s] %(message)s' logging.basicConfig(stream=sys.stdout, format=FORMAT) # check for insecure ssl option si = None context = None if config.get('main').get('ignore_ssl') and \ hasattr(ssl, "_create_unverified_context"): context = ssl._create_unverified_context() # connect to vcenter try: si = SmartConnect(host=config.get('main').get('host'), user=config.get('main').get('user'), pwd=config.get('main').get('password'), port=int(config.get('main').get('port')), sslContext=context) atexit.register(Disconnect, si) except IOError as e: logging.error("Could not connect to vcenter." + e) if not si: raise SystemExit("Unable to connect to host with supplied info.") content = si.RetrieveContent() perfManager = content.perfManager # get the datacenter info datacenter = si.content.rootFolder.childEntity[0] datacentername = datacenter.name logging.debug('datacenter name: ' + datacentername) # create a list of vim.VirtualMachine / vim.Datastore objects so that we can query them for statistics container = content.rootFolder viewType = [my_obj_type] recursive = True # initialize some variables counterInfo = {} gauge = {} # time intervall to average vcenter data across in seconds interval = int(config.get('main').get('interval')) # compile a regex for trying to filter out openstack generated vms - they all have the "name:" field set openstack_match_regex = re.compile("^name:") # compile a regex for stripping out not required parts of hostnames etc. to have shorter label names (for better grafana display) if config.get('main').get('shorter_names_regex'): shorter_names_regex = re.compile( config.get('main').get('shorter_names_regex')) else: shorter_names_regex = re.compile('') logging.debug("name shortening regex: " + str(config.get('main').get('shorter_names_regex'))) # compile a regex for matching the vcenter_node name, so that we can deal only with the matching node or bb with this vcenter-exporter if config.get('main').get('host_match_regex'): host_match_regex = re.compile( config.get('main').get('host_match_regex')) else: host_match_regex = re.compile('') logging.debug("vcenter_node name (host) regex: " + str(config.get('main').get('host_match_regex'))) # compile a regex for matching the vmware_name against machines we do not want to collect metrics for (canary, blackbox vms etc.) if config.get('main').get('ignore_match_regex'): ignore_match_regex = re.compile( config.get('main').get('ignore_match_regex')) else: ignore_match_regex = re.compile( 'this_string_will_definitely_not_match_any_vmware_name') logging.debug("vmware name ignore regex: " + str(config.get('main').get('ignore_match_regex'))) # create a mapping from performance stats to their counterIDs # counterInfo: [performance stat => counterId] # performance stat example: cpu.usagemhz.LATEST # counterId example: 6 # level defines the amounts of metrics available and its default setting in the vcenter here is 1 counterids = perfManager.QueryPerfCounterByLevel(level=4) # start up the http server to expose the prometheus metrics start_http_server(int(config.get('main').get('listen_port'))) if args.datastore == False: logging.debug('list of all available metrics and their counterids') # loop over all counterids and build their full name and a dict relating it to the ids for c in counterids: fullName = c.groupInfo.key + "." + c.nameInfo.key + "." + c.rollupType logging.debug(fullName + ': ' + str(c.key)) counterInfo[fullName] = c.key # define a dict of vm gauges for the counter ids gauge['vcenter_' + fullName.replace('.', '_')] = Gauge( 'vcenter_' + fullName.replace('.', '_'), 'vcenter_' + fullName.replace('.', '_'), [ 'vmware_name', 'project_id', 'vcenter_name', 'vcenter_node', 'instance_uuid', 'guest_id', 'datastore', 'metric_detail' ]) # in case we have a configured set of metrics to handle, use those - otherwise use all we can get selected_metrics = config.get('main').get('vm_metrics') if selected_metrics: counterIDs = [ counterInfo[i] for i in selected_metrics if i in counterInfo ] else: counterIDs = [i.key for i in counterids] else: # define the gauges - they have to be defined by hand for the datastores, as there is no clear pattern behind gauge['vcenter_datastore_accessible'] = Gauge( 'vcenter_datastore_accessible', 'vcenter_datastore_accessible', ['datastore_name', 'datastore_type', 'datastore_url']) gauge['vcenter_datastore_capacity'] = Gauge( 'vcenter_datastore_capacity', 'vcenter_datastore_capacity', ['datastore_name', 'datastore_type', 'datastore_url']) gauge['vcenter_datastore_freespace'] = Gauge( 'vcenter_datastore_freespace', 'vcenter_datastore_freespace', ['datastore_name', 'datastore_type', 'datastore_url']) gauge['vcenter_datastore_maintenancemode'] = Gauge( 'vcenter_datastore_maintenancemode', 'vcenter_datastore_maintenancemode', ['datastore_name', 'datastore_type', 'datastore_url']) gauge['vcenter_datastore_overallstatus'] = Gauge( 'vcenter_datastore_overallstatus', 'vcenter_datastore_overallstatus', ['datastore_name', 'datastore_type', 'datastore_url']) # infinite loop for getting the metrics while True: logging.debug('====> total loop start: %s' % datetime.now()) # get the start time of the loop to be able to fill it to intervall exactly at the end loop_start_time = int(time.time()) # first the vm metric case if args.datastore == False: # get all the data regarding vcenter hosts hostView = content.viewManager.CreateContainerView( container, [vim.HostSystem], recursive) hostssystems = hostView.view # build a dict to lookup the hostname by its id later hostsystemsdict = {} for host in hostssystems: hostsystemsdict[host] = host.name logging.debug( 'list of all available vcenter nodes and their internal id') logging.debug(hostsystemsdict) # collect the properties we are interested in view = get_container_view(si, obj_type=[my_obj_type]) my_data = collect_properties(si, view_ref=view, obj_type=my_obj_type, path_set=my_properties, include_mors=True) my_count = 0 # define the time range in seconds the metric data from the vcenter should be averaged across # all based on vcenter time vchtime = si.CurrentTime() startTime = vchtime - timedelta(seconds=(interval + 60)) endTime = vchtime - timedelta(seconds=60) # loop over all vmware machines for item in my_data: try: if args.datastore == False: # only consider machines which have an annotation, are powered on, match our regex for the host system and are not in the ignore list if (item["runtime.powerState"] == "poweredOn" and openstack_match_regex.match( item["config.annotation"]) and host_match_regex.match( hostsystemsdict[item["runtime.host"]]) ) and not ignore_match_regex.match( item["config.name"]): logging.debug('current vm processed - ' + item["config.name"]) logging.debug('==> running on vcenter node: ' + hostsystemsdict[item["runtime.host"]]) # split the multi-line annotation into a dict per property (name, project-id, ...) annotation_lines = item["config.annotation"].split( '\n') # rename flavor: to flavor_, so that it does not break the split on : below annotation_lines = [ w.replace('flavor:', 'flavor_') for w in annotation_lines ] # the filter is for filtering out empty lines annotations = dict( s.split(':', 1) for s in filter(None, annotation_lines)) # datastore name datastore = item["summary.config.vmPathName"].split( '[', 1)[1].split(']')[0] # get a list of metricids for this vm in preparation for the stats query metricIDs = [ vim.PerformanceManager.MetricId(counterId=i, instance="*") for i in counterIDs ] # query spec for the metric stats query, the intervalId is the default one logging.debug( '==> vim.PerformanceManager.QuerySpec start: %s' % datetime.now()) spec = vim.PerformanceManager.QuerySpec( maxSample=1, entity=item["obj"], metricId=metricIDs, intervalId=20, startTime=startTime, endTime=endTime) logging.debug( '==> vim.PerformanceManager.QuerySpec end: %s' % datetime.now()) # get metric stats from vcenter logging.debug('==> perfManager.QueryStats start: %s' % datetime.now()) result = perfManager.QueryStats(querySpec=[spec]) logging.debug('==> perfManager.QueryStats end: %s' % datetime.now()) # loop over the metrics logging.debug('==> gauge loop start: %s' % datetime.now()) for val in result[0].value: # send gauges to prometheus exporter: metricname and value with # labels: vm name, project id, vcenter name, vcneter # node, instance uuid and metric detail (for instance a partition # for io or an interface for net metrics) - we update the gauge # only if the value is not -1 which means the vcenter has no value if val.value[0] != -1: if val.id.instance == '': metric_detail = 'total' else: metric_detail = val.id.instance gauge['vcenter_' + counterInfo.keys()[counterInfo.values( ).index(val.id.counterId)].replace( '.', '_')].labels( annotations['name'], annotations['projectid'], datacentername, shorter_names_regex.sub( '', hostsystemsdict[ item["runtime.host"]]), item["config.instanceUuid"], item["config.guestId"], datastore, metric_detail).set(val.value[0]) logging.debug('==> gauge loop end: %s' % datetime.now()) # alternatively the datastore metric case else: logging.debug('current datastore processed - ' + item["summary.name"]) logging.debug('==> accessible: ' + str(item["summary.accessible"])) # convert strings to numbers, so that we can generate a prometheus metric from them if item["summary.accessible"] == True: number_accessible = 1 else: number_accessible = 0 logging.debug('==> capacity: ' + str(item["summary.capacity"])) logging.debug('==> freeSpace: ' + str(item["summary.freeSpace"])) logging.debug('==> maintenanceMode: ' + str(item["summary.maintenanceMode"])) # convert strings to numbers, so that we can generate a prometheus metric from them if item["summary.maintenanceMode"] == "normal": number_maintenanceMode = 0 else: # fallback to note if we do not yet catch a value number_maintenanceMode = -1 logging.info( 'unexpected maintenanceMode for datastore ' + item["summary.name"]) logging.debug('==> type: ' + str(item["summary.type"])) logging.debug('==> url: ' + str(item["summary.url"])) logging.debug('==> overallStatus: ' + str(item["overallStatus"])) # convert strings to numbers, so that we can generate a prometheus metric from them if item["overallStatus"] == "green": number_overallStatus = 0 elif item["overallStatus"] == "yellow": number_overallStatus = 1 elif item["overallStatus"] == "red": number_overallStatus = 2 else: # fallback to note if we do not yet catch a value number_overallStatus = -1 logging.info( 'unexpected overallStatus for datastore ' + item["summary.name"]) # set the gauges for the datastore properties logging.debug('==> gauge start: %s' % datetime.now()) gauge['vcenter_datastore_accessible'].labels( item["summary.name"], item["summary.type"], item["summary.url"]).set(number_accessible) gauge['vcenter_datastore_capacity'].labels( item["summary.name"], item["summary.type"], item["summary.url"]).set(item["summary.capacity"]) gauge['vcenter_datastore_freespace'].labels( item["summary.name"], item["summary.type"], item["summary.url"]).set(item["summary.freeSpace"]) gauge['vcenter_datastore_maintenancemode'].labels( item["summary.name"], item["summary.type"], item["summary.url"]).set(number_maintenanceMode) gauge['vcenter_datastore_overallstatus'].labels( item["summary.name"], item["summary.type"], item["summary.url"]).set(number_overallStatus) logging.debug('==> gauge end: %s' % datetime.now()) my_count += 1 except IndexError: logging.info('a ' + my_name + ' disappeared during processing') loop_end_time = int(time.time()) logging.info('number of ' + my_name + 's we got metrics for: ' + str(my_count) + ' - actual runtime: ' + str(loop_end_time - loop_start_time) + 's') # this is the time we sleep to fill the loop runtime until it reaches "interval" # the 0.9 makes sure we have some overlap to the last interval to avoid gaps in # metrics coverage (i.e. we get the metrics quicker than the averaging time) loop_sleep_time = 0.9 * interval - (loop_end_time - loop_start_time) if loop_sleep_time < 0: logging.warn( 'getting the metrics takes around ' + str(interval) + ' seconds or longer - please increase the interval setting') loop_sleep_time = 0 logging.debug('====> loop end before sleep: %s' % datetime.now()) time.sleep(int(loop_sleep_time)) logging.debug('====> total loop end: %s' % datetime.now())
def set_config(): global _CONFIG if os.path.isfile(args.config_file): _CONFIG.update(YamlConfig(args.config_file, None)) else: raise Exception
def __init__(self, args): try: self.config = YamlConfig(args.config_file) except Exception as e: raise SystemExit(f'ERROR - {e}')
t = threading.Thread(target=httpd.serve_forever()) t.daemon = True t.start() except KeyboardInterrupt: logging.info("Stopping Arista eAPI Prometheus Server") def enable_logging(): # enable logging logger = logging.getLogger() app_environment = os.getenv('APP_ENV', default="production").lower() if app_environment == "production": logger.setLevel('INFO') else: logger.setLevel('DEBUG') format = '%(asctime)-15s %(process)d %(levelname)s %(filename)s:%(lineno)d %(message)s' logging.basicConfig(stream=sys.stdout, format=format) if __name__ == '__main__': # command line options parser = argparse.ArgumentParser() parser.add_argument( "-c", "--config", help="Specify config yaml file", metavar="FILE", required=False, default="config.yml") args = parser.parse_args() # get the config config = YamlConfig(args.config) enable_logging() falcon_app()