예제 #1
0
 def configure(self, args):
     if args.config_file:
         try:
             self.config = YamlConfig(args.config_file)
             if 'default' not in self.config.keys():
                 log("Error, you must have a default section in config file (for now)"
                     )
                 exit(1)
         except Exception as exception:
             raise SystemExit(
                 "Error while reading configuration file: {0}".format(
                     exception.message))
     else:
         config_data = """
         default:
             vsphere_host: "{0}"
             vsphere_user: "******"
             vsphere_password: "******"
             ignore_ssl: {3}
             collect_only:
                 vms: True
                 datastores: True
                 hosts: True
         """.format(os.environ.get('VSPHERE_HOST'),
                    os.environ.get('VSPHERE_USER'),
                    os.environ.get('VSPHERE_PASSWORD'),
                    os.environ.get('VSPHERE_IGNORE_SSL', False))
         self.config = yaml.load(config_data)
         self.config['default']['collect_only']['hosts'] = os.environ.get(
             'VSPHERE_COLLECT_HOSTS', True)
         self.config['default']['collect_only'][
             'datastores'] = os.environ.get('VSPHERE_COLLECT_DATASTORES',
                                            True)
         self.config['default']['collect_only']['vms'] = os.environ.get(
             'VSPHERE_COLLECT_VMS', True)
예제 #2
0
 def __init__(self, args):
     try:
         self.config = YamlConfig(args.config_file)
         if 'default' not in self.config.keys():
             print("Error, you must have a default section in config file")
             exit(1)
     except:
         raise SystemExit("Error, cannot read configuration file")
예제 #3
0
파일: config.py 프로젝트: adamwight/boxes
def load():
    global config
    for path in search_path:
        path = os.path.abspath(os.path.expanduser(path))
        if os.path.exists(path):
            config = YamlConfig(path)
            return
    raise RuntimeError("No configs found in search path: " + str(search_path))
예제 #4
0
 def get_config(self, configurationfile):
     defaults = {}
     if os.path.exists(configurationfile):
         try:
             config = YamlConfig(configurationfile, defaults)
         except IOError as e:
             logging.error("Couldn't open configuration file: " + str(e))
         return config
     else:
         logging.error("Config file doesn't exist: " + configurationfile)
         exit(0)
예제 #5
0
    def configure(self, args):
        if args.config_file:
            try:
                self.config = YamlConfig(args.config_file)
                if 'default' not in self.config.keys():
                    logging.error("Error, you must have a default section in config file (for now)")
                    exit(1)
                return
            except Exception as exception:
                raise SystemExit("Error while reading configuration file: {0}".format(exception.message))

        self.config = {
            'default': {
                'vsphere_host': os.environ.get('VSPHERE_HOST'),
                'vsphere_user': os.environ.get('VSPHERE_USER'),
                'vsphere_password': os.environ.get('VSPHERE_PASSWORD'),
                'ignore_ssl': get_bool_env('VSPHERE_IGNORE_SSL', False),
                'specs_size': os.environ.get('VSPHERE_SPECS_SIZE', 5000),
                'collect_only': {
                    'vms': get_bool_env('VSPHERE_COLLECT_VMS', True),
                    'vmguests': get_bool_env('VSPHERE_COLLECT_VMGUESTS', True),
                    'datastores': get_bool_env('VSPHERE_COLLECT_DATASTORES', True),
                    'hosts': get_bool_env('VSPHERE_COLLECT_HOSTS', True),
                    'snapshots': get_bool_env('VSPHERE_COLLECT_SNAPSHOTS', True),
                }
            }
        }

        for key in os.environ.keys():
            if key == 'VSPHERE_USER':
                continue
            if not key.startswith('VSPHERE_') or not key.endswith('_USER'):
                continue

            section = key.split('_', 1)[1].rsplit('_', 1)[0]

            self.config[section.lower()] = {
                'vsphere_host': os.environ.get('VSPHERE_{}_HOST'.format(section)),
                'vsphere_user': os.environ.get('VSPHERE_{}_USER'.format(section)),
                'vsphere_password': os.environ.get('VSPHERE_{}_PASSWORD'.format(section)),
                'ignore_ssl': get_bool_env('VSPHERE_{}_IGNORE_SSL'.format(section), False),
                'specs_size': os.environ.get('VSPHERE_{}_SPECS_SIZE'.format(section), 5000),
                'collect_only': {
                    'vms': get_bool_env('VSPHERE_{}_COLLECT_VMS'.format(section), True),
                    'vmguests': get_bool_env('VSPHERE_{}_COLLECT_VMGUESTS'.format(section), True),
                    'datastores': get_bool_env('VSPHERE_{}_COLLECT_DATASTORES'.format(section), True),
                    'hosts': get_bool_env('VSPHERE_{}_COLLECT_HOSTS'.format(section), True),
                    'snapshots': get_bool_env('VSPHERE_{}_COLLECT_SNAPSHOTS'.format(section), True),
                }
            }
예제 #6
0
def get_config(configfile):
    # get the config from the config file and environment
    config = YamlConfig(configfile)
    config['refresh_interval'] = os.getenv('REFRESH_INTERVAL',
                                           config['refresh_interval'])

    if os.getenv('OS_PROM_CONFIGMAP_NAME'):
        config['configmap_name'] = os.environ['OS_PROM_CONFIGMAP_NAME']
    else:
        logging.error("No configmap name in environment!")
        exit(1)

    if os.getenv('NAMESPACE'):
        config['namespace'] = os.environ['NAMESPACE']

    if os.getenv('region'):
        config['region'] = os.environ['region'].lower()
    else:
        logging.error("No region in environment!")
        exit(1)

    return config
예제 #7
0
            # metrics coverage (i.e. we get the metrics quicker than the averaging time)
            loop_sleep_time = 0.9 * \
                self.configs['main']['interval'] - \
                (loop_end_time - loop_start_time)
            if loop_sleep_time < 0:
                logging.warn('getting the metrics takes around ' + str(
                    self.configs['main']['interval']) + ' seconds or longer - please increase the interval setting')
                loop_sleep_time = 0

            logging.debug('====> loop end before sleep: %s' % datetime.now())
            time.sleep(int(loop_sleep_time))
            logging.debug('====> total loop end: %s' % datetime.now())


if __name__ == "__main__":
    # config file parsing
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-c", "--config", help="Specify config file", metavar="FILE", required=True)
    parser.add_argument(
        "-t", "--type", help="The type of exporter [VM, versions, datastores]", required=True)
    args, remaining_argv = parser.parse_known_args()
    config = YamlConfig(args.config, VcenterExporter.defaults)

    if args.type.upper() not in VcenterExporter.supported_types:
        sys.exit("Current supported exporter types [--t] are " +
                 ', '.join(VcenterExporter.supported_types))

    vcenter_exporter = VcenterExporter(config, args.type)
    vcenter_exporter.collect_metrics()
예제 #8
0
def main():

    # config file parsing
    parser = argparse.ArgumentParser()
    parser.add_argument("-c",
                        "--config",
                        help="Specify config file",
                        metavar="FILE")
    parser.add_argument("-d",
                        "--datastore",
                        help="Get metrics for datastores instead of vms",
                        action='store_true')
    args, remaining_argv = parser.parse_known_args()
    config = YamlConfig(args.config, defaults)

    # list of vm properties we are using and which we get via property collector later
    # see: http://goo.gl/fjTEpW for all properties.
    # once for vms and once for datastores ... and some other stuff, which differs for the two cases
    if args.datastore == False:
        my_properties = [
            "runtime.powerState", "runtime.host", "config.annotation",
            "config.name", "config.instanceUuid", "config.guestId",
            "summary.config.vmPathName"
        ]
        my_name = "vm"
        my_obj_type = vim.VirtualMachine
    else:
        my_properties = [
            "summary.accessible", "summary.capacity", "summary.freeSpace",
            "summary.maintenanceMode", "summary.name", "summary.type",
            "summary.url", "overallStatus"
        ]
        my_name = "datastore"
        my_obj_type = vim.Datastore

    # set default log level if not defined in config file
    if config.get('main').get('log'):
        logger.setLevel(
            logging.getLevelName(config.get('main').get('log').upper()))
    else:
        logger.setLevel('INFO')
    FORMAT = '[%(asctime)s] [%(levelname)s] %(message)s'
    logging.basicConfig(stream=sys.stdout, format=FORMAT)

    # check for insecure ssl option
    si = None
    context = None
    if config.get('main').get('ignore_ssl') and \
       hasattr(ssl, "_create_unverified_context"):
        context = ssl._create_unverified_context()

    # connect to vcenter
    try:
        si = SmartConnect(host=config.get('main').get('host'),
                          user=config.get('main').get('user'),
                          pwd=config.get('main').get('password'),
                          port=int(config.get('main').get('port')),
                          sslContext=context)
        atexit.register(Disconnect, si)

    except IOError as e:
        logging.error("Could not connect to vcenter." + e)

    if not si:
        raise SystemExit("Unable to connect to host with supplied info.")

    content = si.RetrieveContent()
    perfManager = content.perfManager

    # get the datacenter info
    datacenter = si.content.rootFolder.childEntity[0]
    datacentername = datacenter.name
    logging.debug('datacenter name: ' + datacentername)

    # create a list of vim.VirtualMachine / vim.Datastore objects so that we can query them for statistics
    container = content.rootFolder
    viewType = [my_obj_type]
    recursive = True

    # initialize some variables
    counterInfo = {}
    gauge = {}

    # time intervall to average vcenter data across in seconds
    interval = int(config.get('main').get('interval'))

    # compile a regex for trying to filter out openstack generated vms - they all have the "name:" field set
    openstack_match_regex = re.compile("^name:")

    # compile a regex for stripping out not required parts of hostnames etc. to have shorter label names (for better grafana display)
    if config.get('main').get('shorter_names_regex'):
        shorter_names_regex = re.compile(
            config.get('main').get('shorter_names_regex'))
    else:
        shorter_names_regex = re.compile('')
    logging.debug("name shortening regex: " +
                  str(config.get('main').get('shorter_names_regex')))

    # compile a regex for matching the vcenter_node name, so that we can deal only with the matching node or bb with this vcenter-exporter
    if config.get('main').get('host_match_regex'):
        host_match_regex = re.compile(
            config.get('main').get('host_match_regex'))
    else:
        host_match_regex = re.compile('')
    logging.debug("vcenter_node name (host) regex: " +
                  str(config.get('main').get('host_match_regex')))

    # compile a regex for matching the vmware_name against machines we do not want to collect metrics for (canary, blackbox vms etc.)
    if config.get('main').get('ignore_match_regex'):
        ignore_match_regex = re.compile(
            config.get('main').get('ignore_match_regex'))
    else:
        ignore_match_regex = re.compile(
            'this_string_will_definitely_not_match_any_vmware_name')
    logging.debug("vmware name ignore regex: " +
                  str(config.get('main').get('ignore_match_regex')))

    # create a mapping from performance stats to their counterIDs
    # counterInfo: [performance stat => counterId]
    # performance stat example: cpu.usagemhz.LATEST
    # counterId example: 6
    # level defines the amounts of metrics available and its default setting in the vcenter here is 1
    counterids = perfManager.QueryPerfCounterByLevel(level=4)

    # start up the http server to expose the prometheus metrics
    start_http_server(int(config.get('main').get('listen_port')))

    if args.datastore == False:
        logging.debug('list of all available metrics and their counterids')
        # loop over all counterids and build their full name and a dict relating it to the ids
        for c in counterids:
            fullName = c.groupInfo.key + "." + c.nameInfo.key + "." + c.rollupType
            logging.debug(fullName + ': ' + str(c.key))
            counterInfo[fullName] = c.key

            # define a dict of vm gauges for the counter ids
            gauge['vcenter_' + fullName.replace('.', '_')] = Gauge(
                'vcenter_' + fullName.replace('.', '_'),
                'vcenter_' + fullName.replace('.', '_'), [
                    'vmware_name', 'project_id', 'vcenter_name',
                    'vcenter_node', 'instance_uuid', 'guest_id', 'datastore',
                    'metric_detail'
                ])

        # in case we have a configured set of metrics to handle, use those - otherwise use all we can get
        selected_metrics = config.get('main').get('vm_metrics')
        if selected_metrics:
            counterIDs = [
                counterInfo[i] for i in selected_metrics if i in counterInfo
            ]
        else:
            counterIDs = [i.key for i in counterids]
    else:
        # define the gauges - they have to be defined by hand for the datastores, as there is no clear pattern behind
        gauge['vcenter_datastore_accessible'] = Gauge(
            'vcenter_datastore_accessible', 'vcenter_datastore_accessible',
            ['datastore_name', 'datastore_type', 'datastore_url'])
        gauge['vcenter_datastore_capacity'] = Gauge(
            'vcenter_datastore_capacity', 'vcenter_datastore_capacity',
            ['datastore_name', 'datastore_type', 'datastore_url'])
        gauge['vcenter_datastore_freespace'] = Gauge(
            'vcenter_datastore_freespace', 'vcenter_datastore_freespace',
            ['datastore_name', 'datastore_type', 'datastore_url'])
        gauge['vcenter_datastore_maintenancemode'] = Gauge(
            'vcenter_datastore_maintenancemode',
            'vcenter_datastore_maintenancemode',
            ['datastore_name', 'datastore_type', 'datastore_url'])
        gauge['vcenter_datastore_overallstatus'] = Gauge(
            'vcenter_datastore_overallstatus',
            'vcenter_datastore_overallstatus',
            ['datastore_name', 'datastore_type', 'datastore_url'])

    # infinite loop for getting the metrics
    while True:
        logging.debug('====> total loop start: %s' % datetime.now())
        # get the start time of the loop to be able to fill it to intervall exactly at the end
        loop_start_time = int(time.time())

        # first the vm metric case
        if args.datastore == False:
            # get all the data regarding vcenter hosts
            hostView = content.viewManager.CreateContainerView(
                container, [vim.HostSystem], recursive)

            hostssystems = hostView.view

            # build a dict to lookup the hostname by its id later
            hostsystemsdict = {}
            for host in hostssystems:
                hostsystemsdict[host] = host.name
            logging.debug(
                'list of all available vcenter nodes and their internal id')
            logging.debug(hostsystemsdict)

        # collect the properties we are interested in
        view = get_container_view(si, obj_type=[my_obj_type])
        my_data = collect_properties(si,
                                     view_ref=view,
                                     obj_type=my_obj_type,
                                     path_set=my_properties,
                                     include_mors=True)

        my_count = 0

        # define the time range in seconds the metric data from the vcenter should be averaged across
        # all based on vcenter time
        vchtime = si.CurrentTime()
        startTime = vchtime - timedelta(seconds=(interval + 60))
        endTime = vchtime - timedelta(seconds=60)

        # loop over all vmware machines
        for item in my_data:
            try:
                if args.datastore == False:
                    # only consider machines which have an annotation, are powered on, match our regex for the host system and are not in the ignore list
                    if (item["runtime.powerState"] == "poweredOn"
                            and openstack_match_regex.match(
                                item["config.annotation"])
                            and host_match_regex.match(
                                hostsystemsdict[item["runtime.host"]])
                        ) and not ignore_match_regex.match(
                            item["config.name"]):
                        logging.debug('current vm processed - ' +
                                      item["config.name"])

                        logging.debug('==> running on vcenter node: ' +
                                      hostsystemsdict[item["runtime.host"]])

                        # split the multi-line annotation into a dict per property (name, project-id, ...)
                        annotation_lines = item["config.annotation"].split(
                            '\n')

                        # rename flavor: to flavor_, so that it does not break the split on : below
                        annotation_lines = [
                            w.replace('flavor:', 'flavor_')
                            for w in annotation_lines
                        ]

                        # the filter is for filtering out empty lines
                        annotations = dict(
                            s.split(':', 1)
                            for s in filter(None, annotation_lines))

                        # datastore name
                        datastore = item["summary.config.vmPathName"].split(
                            '[', 1)[1].split(']')[0]

                        # get a list of metricids for this vm in preparation for the stats query
                        metricIDs = [
                            vim.PerformanceManager.MetricId(counterId=i,
                                                            instance="*")
                            for i in counterIDs
                        ]

                        # query spec for the metric stats query, the intervalId is the default one
                        logging.debug(
                            '==> vim.PerformanceManager.QuerySpec start: %s' %
                            datetime.now())
                        spec = vim.PerformanceManager.QuerySpec(
                            maxSample=1,
                            entity=item["obj"],
                            metricId=metricIDs,
                            intervalId=20,
                            startTime=startTime,
                            endTime=endTime)
                        logging.debug(
                            '==> vim.PerformanceManager.QuerySpec end: %s' %
                            datetime.now())

                        # get metric stats from vcenter
                        logging.debug('==> perfManager.QueryStats start: %s' %
                                      datetime.now())
                        result = perfManager.QueryStats(querySpec=[spec])
                        logging.debug('==> perfManager.QueryStats end: %s' %
                                      datetime.now())

                        # loop over the metrics
                        logging.debug('==> gauge loop start: %s' %
                                      datetime.now())
                        for val in result[0].value:
                            # send gauges to prometheus exporter: metricname and value with
                            # labels: vm name, project id, vcenter name, vcneter
                            # node, instance uuid and metric detail (for instance a partition
                            # for io or an interface for net metrics) - we update the gauge
                            # only if the value is not -1 which means the vcenter has no value
                            if val.value[0] != -1:
                                if val.id.instance == '':
                                    metric_detail = 'total'
                                else:
                                    metric_detail = val.id.instance
                                gauge['vcenter_' +
                                      counterInfo.keys()[counterInfo.values(
                                      ).index(val.id.counterId)].replace(
                                          '.', '_')].labels(
                                              annotations['name'],
                                              annotations['projectid'],
                                              datacentername,
                                              shorter_names_regex.sub(
                                                  '', hostsystemsdict[
                                                      item["runtime.host"]]),
                                              item["config.instanceUuid"],
                                              item["config.guestId"],
                                              datastore,
                                              metric_detail).set(val.value[0])
                        logging.debug('==> gauge loop end: %s' %
                                      datetime.now())
                # alternatively the datastore metric case
                else:
                    logging.debug('current datastore processed - ' +
                                  item["summary.name"])

                    logging.debug('==> accessible: ' +
                                  str(item["summary.accessible"]))
                    # convert strings to numbers, so that we can generate a prometheus metric from them
                    if item["summary.accessible"] == True:
                        number_accessible = 1
                    else:
                        number_accessible = 0
                    logging.debug('==> capacity: ' +
                                  str(item["summary.capacity"]))
                    logging.debug('==> freeSpace: ' +
                                  str(item["summary.freeSpace"]))
                    logging.debug('==> maintenanceMode: ' +
                                  str(item["summary.maintenanceMode"]))
                    # convert strings to numbers, so that we can generate a prometheus metric from them
                    if item["summary.maintenanceMode"] == "normal":
                        number_maintenanceMode = 0
                    else:
                        # fallback to note if we do not yet catch a value
                        number_maintenanceMode = -1
                        logging.info(
                            'unexpected maintenanceMode for datastore ' +
                            item["summary.name"])
                    logging.debug('==> type: ' + str(item["summary.type"]))
                    logging.debug('==> url: ' + str(item["summary.url"]))
                    logging.debug('==> overallStatus: ' +
                                  str(item["overallStatus"]))
                    # convert strings to numbers, so that we can generate a prometheus metric from them
                    if item["overallStatus"] == "green":
                        number_overallStatus = 0
                    elif item["overallStatus"] == "yellow":
                        number_overallStatus = 1
                    elif item["overallStatus"] == "red":
                        number_overallStatus = 2
                    else:
                        # fallback to note if we do not yet catch a value
                        number_overallStatus = -1
                        logging.info(
                            'unexpected overallStatus for datastore ' +
                            item["summary.name"])

                    # set the gauges for the datastore properties
                    logging.debug('==> gauge start: %s' % datetime.now())
                    gauge['vcenter_datastore_accessible'].labels(
                        item["summary.name"], item["summary.type"],
                        item["summary.url"]).set(number_accessible)
                    gauge['vcenter_datastore_capacity'].labels(
                        item["summary.name"], item["summary.type"],
                        item["summary.url"]).set(item["summary.capacity"])
                    gauge['vcenter_datastore_freespace'].labels(
                        item["summary.name"], item["summary.type"],
                        item["summary.url"]).set(item["summary.freeSpace"])
                    gauge['vcenter_datastore_maintenancemode'].labels(
                        item["summary.name"], item["summary.type"],
                        item["summary.url"]).set(number_maintenanceMode)
                    gauge['vcenter_datastore_overallstatus'].labels(
                        item["summary.name"], item["summary.type"],
                        item["summary.url"]).set(number_overallStatus)
                    logging.debug('==> gauge end: %s' % datetime.now())

                my_count += 1

            except IndexError:
                logging.info('a ' + my_name + ' disappeared during processing')

        loop_end_time = int(time.time())

        logging.info('number of ' + my_name + 's we got metrics for: ' +
                     str(my_count) + ' - actual runtime: ' +
                     str(loop_end_time - loop_start_time) + 's')

        # this is the time we sleep to fill the loop runtime until it reaches "interval"
        # the 0.9 makes sure we have some overlap to the last interval to avoid gaps in
        # metrics coverage (i.e. we get the metrics quicker than the averaging time)
        loop_sleep_time = 0.9 * interval - (loop_end_time - loop_start_time)
        if loop_sleep_time < 0:
            logging.warn(
                'getting the metrics takes around ' + str(interval) +
                ' seconds or longer - please increase the interval setting')
            loop_sleep_time = 0

        logging.debug('====> loop end before sleep: %s' % datetime.now())
        time.sleep(int(loop_sleep_time))
        logging.debug('====> total loop end: %s' % datetime.now())
예제 #9
0
def set_config():
    global _CONFIG
    if os.path.isfile(args.config_file):
        _CONFIG.update(YamlConfig(args.config_file, None))
    else:
        raise Exception
예제 #10
0
 def __init__(self, args):
     try:
         self.config = YamlConfig(args.config_file)
     except Exception as e:
         raise SystemExit(f'ERROR - {e}')
예제 #11
0
        t = threading.Thread(target=httpd.serve_forever())
        t.daemon = True
        t.start()
    except KeyboardInterrupt:
        logging.info("Stopping Arista eAPI Prometheus Server")

def enable_logging():
    # enable logging
    logger = logging.getLogger()
    app_environment = os.getenv('APP_ENV', default="production").lower()
    if app_environment == "production":
        logger.setLevel('INFO')
    else:
        logger.setLevel('DEBUG')
    format = '%(asctime)-15s %(process)d %(levelname)s %(filename)s:%(lineno)d %(message)s'
    logging.basicConfig(stream=sys.stdout, format=format)

if __name__ == '__main__':
    # command line options
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-c", "--config", help="Specify config yaml file", metavar="FILE", required=False, default="config.yml")
    args = parser.parse_args()

    # get the config
    config = YamlConfig(args.config)

    enable_logging()

    falcon_app()