예제 #1
0
    def write_dataset(self, gerrit):
        '''
        if dataset is empty then there is no need to write it
        '''
        if not self.observations == {}:
            self.create_dataset()
            yaml = YamlConfig(gerrit, self)
            yaml.write_file()

            fh = open(self.full_csv_path, 'w')
            fh.write(self.file_contents.getvalue())
            fh.close()
예제 #2
0
def main():

    # config file parsing
    parser = argparse.ArgumentParser()
    parser.add_argument("-c",
                        "--config",
                        help="Specify config file",
                        metavar="FILE")
    parser.add_argument("-d",
                        "--datastore",
                        help="Get metrics for datastores instead of vms",
                        action='store_true')
    args, remaining_argv = parser.parse_known_args()
    config = YamlConfig(args.config, defaults)

    # list of vm properties we are using and which we get via property collector later
    # see: http://goo.gl/fjTEpW for all properties.
    # once for vms and once for datastores ... and some other stuff, which differs for the two cases
    if args.datastore == False:
        my_properties = [
            "runtime.powerState", "runtime.host", "config.annotation",
            "config.name", "config.instanceUuid", "config.guestId",
            "summary.config.vmPathName"
        ]
        my_name = "vm"
        my_obj_type = vim.VirtualMachine
    else:
        my_properties = [
            "summary.accessible", "summary.capacity", "summary.freeSpace",
            "summary.maintenanceMode", "summary.name", "summary.type",
            "summary.url", "overallStatus"
        ]
        my_name = "datastore"
        my_obj_type = vim.Datastore

    # set default log level if not defined in config file
    if config.get('main').get('log'):
        logger.setLevel(
            logging.getLevelName(config.get('main').get('log').upper()))
    else:
        logger.setLevel('INFO')
    FORMAT = '[%(asctime)s] [%(levelname)s] %(message)s'
    logging.basicConfig(stream=sys.stdout, format=FORMAT)

    # check for insecure ssl option
    si = None
    context = None
    if config.get('main').get('ignore_ssl') and \
       hasattr(ssl, "_create_unverified_context"):
        context = ssl._create_unverified_context()

    # connect to vcenter
    try:
        si = SmartConnect(host=config.get('main').get('host'),
                          user=config.get('main').get('user'),
                          pwd=config.get('main').get('password'),
                          port=int(config.get('main').get('port')),
                          sslContext=context)
        atexit.register(Disconnect, si)

    except IOError as e:
        logging.error("Could not connect to vcenter." + e)

    if not si:
        raise SystemExit("Unable to connect to host with supplied info.")

    content = si.RetrieveContent()
    perfManager = content.perfManager

    # get the datacenter info
    datacenter = si.content.rootFolder.childEntity[0]
    datacentername = datacenter.name
    logging.debug('datacenter name: ' + datacentername)

    # create a list of vim.VirtualMachine / vim.Datastore objects so that we can query them for statistics
    container = content.rootFolder
    viewType = [my_obj_type]
    recursive = True

    # initialize some variables
    counterInfo = {}
    gauge = {}

    # time intervall to average vcenter data across in seconds
    interval = int(config.get('main').get('interval'))

    # compile a regex for trying to filter out openstack generated vms - they all have the "name:" field set
    openstack_match_regex = re.compile("^name:")

    # compile a regex for stripping out not required parts of hostnames etc. to have shorter label names (for better grafana display)
    if config.get('main').get('shorter_names_regex'):
        shorter_names_regex = re.compile(
            config.get('main').get('shorter_names_regex'))
    else:
        shorter_names_regex = re.compile('')
    logging.debug("name shortening regex: " +
                  str(config.get('main').get('shorter_names_regex')))

    # compile a regex for matching the vcenter_node name, so that we can deal only with the matching node or bb with this vcenter-exporter
    if config.get('main').get('host_match_regex'):
        host_match_regex = re.compile(
            config.get('main').get('host_match_regex'))
    else:
        host_match_regex = re.compile('')
    logging.debug("vcenter_node name (host) regex: " +
                  str(config.get('main').get('host_match_regex')))

    # compile a regex for matching the vmware_name against machines we do not want to collect metrics for (canary, blackbox vms etc.)
    if config.get('main').get('ignore_match_regex'):
        ignore_match_regex = re.compile(
            config.get('main').get('ignore_match_regex'))
    else:
        ignore_match_regex = re.compile(
            'this_string_will_definitely_not_match_any_vmware_name')
    logging.debug("vmware name ignore regex: " +
                  str(config.get('main').get('ignore_match_regex')))

    # create a mapping from performance stats to their counterIDs
    # counterInfo: [performance stat => counterId]
    # performance stat example: cpu.usagemhz.LATEST
    # counterId example: 6
    # level defines the amounts of metrics available and its default setting in the vcenter here is 1
    counterids = perfManager.QueryPerfCounterByLevel(level=4)

    # start up the http server to expose the prometheus metrics
    start_http_server(int(config.get('main').get('listen_port')))

    if args.datastore == False:
        logging.debug('list of all available metrics and their counterids')
        # loop over all counterids and build their full name and a dict relating it to the ids
        for c in counterids:
            fullName = c.groupInfo.key + "." + c.nameInfo.key + "." + c.rollupType
            logging.debug(fullName + ': ' + str(c.key))
            counterInfo[fullName] = c.key

            # define a dict of vm gauges for the counter ids
            gauge['vcenter_' + fullName.replace('.', '_')] = Gauge(
                'vcenter_' + fullName.replace('.', '_'),
                'vcenter_' + fullName.replace('.', '_'), [
                    'vmware_name', 'project_id', 'vcenter_name',
                    'vcenter_node', 'instance_uuid', 'guest_id', 'datastore',
                    'metric_detail'
                ])

        # in case we have a configured set of metrics to handle, use those - otherwise use all we can get
        selected_metrics = config.get('main').get('vm_metrics')
        if selected_metrics:
            counterIDs = [
                counterInfo[i] for i in selected_metrics if i in counterInfo
            ]
        else:
            counterIDs = [i.key for i in counterids]
    else:
        # define the gauges - they have to be defined by hand for the datastores, as there is no clear pattern behind
        gauge['vcenter_datastore_accessible'] = Gauge(
            'vcenter_datastore_accessible', 'vcenter_datastore_accessible',
            ['datastore_name', 'datastore_type', 'datastore_url'])
        gauge['vcenter_datastore_capacity'] = Gauge(
            'vcenter_datastore_capacity', 'vcenter_datastore_capacity',
            ['datastore_name', 'datastore_type', 'datastore_url'])
        gauge['vcenter_datastore_freespace'] = Gauge(
            'vcenter_datastore_freespace', 'vcenter_datastore_freespace',
            ['datastore_name', 'datastore_type', 'datastore_url'])
        gauge['vcenter_datastore_maintenancemode'] = Gauge(
            'vcenter_datastore_maintenancemode',
            'vcenter_datastore_maintenancemode',
            ['datastore_name', 'datastore_type', 'datastore_url'])
        gauge['vcenter_datastore_overallstatus'] = Gauge(
            'vcenter_datastore_overallstatus',
            'vcenter_datastore_overallstatus',
            ['datastore_name', 'datastore_type', 'datastore_url'])

    # infinite loop for getting the metrics
    while True:
        logging.debug('====> total loop start: %s' % datetime.now())
        # get the start time of the loop to be able to fill it to intervall exactly at the end
        loop_start_time = int(time.time())

        # first the vm metric case
        if args.datastore == False:
            # get all the data regarding vcenter hosts
            hostView = content.viewManager.CreateContainerView(
                container, [vim.HostSystem], recursive)

            hostssystems = hostView.view

            # build a dict to lookup the hostname by its id later
            hostsystemsdict = {}
            for host in hostssystems:
                hostsystemsdict[host] = host.name
            logging.debug(
                'list of all available vcenter nodes and their internal id')
            logging.debug(hostsystemsdict)

        # collect the properties we are interested in
        view = get_container_view(si, obj_type=[my_obj_type])
        my_data = collect_properties(si,
                                     view_ref=view,
                                     obj_type=my_obj_type,
                                     path_set=my_properties,
                                     include_mors=True)

        my_count = 0

        # define the time range in seconds the metric data from the vcenter should be averaged across
        # all based on vcenter time
        vchtime = si.CurrentTime()
        startTime = vchtime - timedelta(seconds=(interval + 60))
        endTime = vchtime - timedelta(seconds=60)

        # loop over all vmware machines
        for item in my_data:
            try:
                if args.datastore == False:
                    # only consider machines which have an annotation, are powered on, match our regex for the host system and are not in the ignore list
                    if (item["runtime.powerState"] == "poweredOn"
                            and openstack_match_regex.match(
                                item["config.annotation"])
                            and host_match_regex.match(
                                hostsystemsdict[item["runtime.host"]])
                        ) and not ignore_match_regex.match(
                            item["config.name"]):
                        logging.debug('current vm processed - ' +
                                      item["config.name"])

                        logging.debug('==> running on vcenter node: ' +
                                      hostsystemsdict[item["runtime.host"]])

                        # split the multi-line annotation into a dict per property (name, project-id, ...)
                        annotation_lines = item["config.annotation"].split(
                            '\n')

                        # rename flavor: to flavor_, so that it does not break the split on : below
                        annotation_lines = [
                            w.replace('flavor:', 'flavor_')
                            for w in annotation_lines
                        ]

                        # the filter is for filtering out empty lines
                        annotations = dict(
                            s.split(':', 1)
                            for s in filter(None, annotation_lines))

                        # datastore name
                        datastore = item["summary.config.vmPathName"].split(
                            '[', 1)[1].split(']')[0]

                        # get a list of metricids for this vm in preparation for the stats query
                        metricIDs = [
                            vim.PerformanceManager.MetricId(counterId=i,
                                                            instance="*")
                            for i in counterIDs
                        ]

                        # query spec for the metric stats query, the intervalId is the default one
                        logging.debug(
                            '==> vim.PerformanceManager.QuerySpec start: %s' %
                            datetime.now())
                        spec = vim.PerformanceManager.QuerySpec(
                            maxSample=1,
                            entity=item["obj"],
                            metricId=metricIDs,
                            intervalId=20,
                            startTime=startTime,
                            endTime=endTime)
                        logging.debug(
                            '==> vim.PerformanceManager.QuerySpec end: %s' %
                            datetime.now())

                        # get metric stats from vcenter
                        logging.debug('==> perfManager.QueryStats start: %s' %
                                      datetime.now())
                        result = perfManager.QueryStats(querySpec=[spec])
                        logging.debug('==> perfManager.QueryStats end: %s' %
                                      datetime.now())

                        # loop over the metrics
                        logging.debug('==> gauge loop start: %s' %
                                      datetime.now())
                        for val in result[0].value:
                            # send gauges to prometheus exporter: metricname and value with
                            # labels: vm name, project id, vcenter name, vcneter
                            # node, instance uuid and metric detail (for instance a partition
                            # for io or an interface for net metrics) - we update the gauge
                            # only if the value is not -1 which means the vcenter has no value
                            if val.value[0] != -1:
                                if val.id.instance == '':
                                    metric_detail = 'total'
                                else:
                                    metric_detail = val.id.instance
                                gauge['vcenter_' +
                                      counterInfo.keys()[counterInfo.values(
                                      ).index(val.id.counterId)].replace(
                                          '.', '_')].labels(
                                              annotations['name'],
                                              annotations['projectid'],
                                              datacentername,
                                              shorter_names_regex.sub(
                                                  '', hostsystemsdict[
                                                      item["runtime.host"]]),
                                              item["config.instanceUuid"],
                                              item["config.guestId"],
                                              datastore,
                                              metric_detail).set(val.value[0])
                        logging.debug('==> gauge loop end: %s' %
                                      datetime.now())
                # alternatively the datastore metric case
                else:
                    logging.debug('current datastore processed - ' +
                                  item["summary.name"])

                    logging.debug('==> accessible: ' +
                                  str(item["summary.accessible"]))
                    # convert strings to numbers, so that we can generate a prometheus metric from them
                    if item["summary.accessible"] == True:
                        number_accessible = 1
                    else:
                        number_accessible = 0
                    logging.debug('==> capacity: ' +
                                  str(item["summary.capacity"]))
                    logging.debug('==> freeSpace: ' +
                                  str(item["summary.freeSpace"]))
                    logging.debug('==> maintenanceMode: ' +
                                  str(item["summary.maintenanceMode"]))
                    # convert strings to numbers, so that we can generate a prometheus metric from them
                    if item["summary.maintenanceMode"] == "normal":
                        number_maintenanceMode = 0
                    else:
                        # fallback to note if we do not yet catch a value
                        number_maintenanceMode = -1
                        logging.info(
                            'unexpected maintenanceMode for datastore ' +
                            item["summary.name"])
                    logging.debug('==> type: ' + str(item["summary.type"]))
                    logging.debug('==> url: ' + str(item["summary.url"]))
                    logging.debug('==> overallStatus: ' +
                                  str(item["overallStatus"]))
                    # convert strings to numbers, so that we can generate a prometheus metric from them
                    if item["overallStatus"] == "green":
                        number_overallStatus = 0
                    elif item["overallStatus"] == "yellow":
                        number_overallStatus = 1
                    elif item["overallStatus"] == "red":
                        number_overallStatus = 2
                    else:
                        # fallback to note if we do not yet catch a value
                        number_overallStatus = -1
                        logging.info(
                            'unexpected overallStatus for datastore ' +
                            item["summary.name"])

                    # set the gauges for the datastore properties
                    logging.debug('==> gauge start: %s' % datetime.now())
                    gauge['vcenter_datastore_accessible'].labels(
                        item["summary.name"], item["summary.type"],
                        item["summary.url"]).set(number_accessible)
                    gauge['vcenter_datastore_capacity'].labels(
                        item["summary.name"], item["summary.type"],
                        item["summary.url"]).set(item["summary.capacity"])
                    gauge['vcenter_datastore_freespace'].labels(
                        item["summary.name"], item["summary.type"],
                        item["summary.url"]).set(item["summary.freeSpace"])
                    gauge['vcenter_datastore_maintenancemode'].labels(
                        item["summary.name"], item["summary.type"],
                        item["summary.url"]).set(number_maintenanceMode)
                    gauge['vcenter_datastore_overallstatus'].labels(
                        item["summary.name"], item["summary.type"],
                        item["summary.url"]).set(number_overallStatus)
                    logging.debug('==> gauge end: %s' % datetime.now())

                my_count += 1

            except IndexError:
                logging.info('a ' + my_name + ' disappeared during processing')

        loop_end_time = int(time.time())

        logging.info('number of ' + my_name + 's we got metrics for: ' +
                     str(my_count) + ' - actual runtime: ' +
                     str(loop_end_time - loop_start_time) + 's')

        # this is the time we sleep to fill the loop runtime until it reaches "interval"
        # the 0.9 makes sure we have some overlap to the last interval to avoid gaps in
        # metrics coverage (i.e. we get the metrics quicker than the averaging time)
        loop_sleep_time = 0.9 * interval - (loop_end_time - loop_start_time)
        if loop_sleep_time < 0:
            logging.warn(
                'getting the metrics takes around ' + str(interval) +
                ' seconds or longer - please increase the interval setting')
            loop_sleep_time = 0

        logging.debug('====> loop end before sleep: %s' % datetime.now())
        time.sleep(int(loop_sleep_time))
        logging.debug('====> total loop end: %s' % datetime.now())
예제 #3
0
def set_config():
    global _CONFIG
    if os.path.isfile(args.config_file):
        _CONFIG.update(YamlConfig(args.config_file, None))
    else:
        raise Exception
예제 #4
0
            # metrics coverage (i.e. we get the metrics quicker than the averaging time)
            loop_sleep_time = 0.9 * \
                self.configs['main']['vc_polling_interval'] - \
                (loop_end_time - loop_start_time)
            if loop_sleep_time < 0:
                logging.warn('getting the metrics takes around ' + str(
                    self.configs['main']['vc_polling_interval']) + ' seconds or longer - please increase the interval setting')
                loop_sleep_time = 0

            logging.debug('====> loop end before sleep: %s' % datetime.now())
            time.sleep(int(loop_sleep_time))
            logging.debug('====> total loop end: %s' % datetime.now())


if __name__ == "__main__":
    # config file parsing
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-c", "--config", help="Specify config file", metavar="FILE", default="config.yaml")
    parser.add_argument(
        "-t", "--type", help="The type of exporter [VM, versions, datastores]", default="versionsandapi")
    args, remaining_argv = parser.parse_known_args()
    config = YamlConfig(args.config, VcenterExporter.defaults)

    if args.type.upper() not in VcenterExporter.supported_types:
        sys.exit("Current supported exporter types [--t] are " +
                 ', '.join(VcenterExporter.supported_types))

    vcenter_exporter = VcenterExporter(config, args.type)
    vcenter_exporter.collect_metrics()
예제 #5
0
class VMWareMetricsResource(Resource):
    """
    VMWare twisted ``Resource`` handling multi endpoints
    Only handle /metrics and /healthz path
    """
    isLeaf = True

    def __init__(self):
        """
        Init Metric Resource
        """
        Resource.__init__(self)
        self.threader = Threader()

    def configure(self, args):
        if args.config_file:
            try:
                self.config = YamlConfig(args.config_file)
                if 'default' not in self.config.keys():
                    log("Error, you must have a default section in config file (for now)"
                        )
                    exit(1)
            except Exception as exception:
                raise SystemExit(
                    "Error while reading configuration file: {0}".format(
                        exception.message))
        else:
            config_data = """
            default:
                vsphere_host: "{0}"
                vsphere_user: "******"
                vsphere_password: "******"
                ignore_ssl: {3}
                collect_only:
                    vms: True
                    vmguests: True
                    datastores: True
                    hosts: True
                    snapshots: True
            """.format(os.environ.get('VSPHERE_HOST'),
                       os.environ.get('VSPHERE_USER'),
                       os.environ.get('VSPHERE_PASSWORD'),
                       os.environ.get('VSPHERE_IGNORE_SSL', False))
            self.config = yaml.load(config_data)
            self.config['default']['collect_only']['hosts'] = os.environ.get(
                'VSPHERE_COLLECT_HOSTS', True)
            self.config['default']['collect_only'][
                'datastores'] = os.environ.get('VSPHERE_COLLECT_DATASTORES',
                                               True)
            self.config['default']['collect_only']['vms'] = os.environ.get(
                'VSPHERE_COLLECT_VMS', True)
            self.config['default']['collect_only'][
                'vmguests'] = os.environ.get('VSPHERE_COLLECT_VMGUESTS', True)
            self.config['default']['collect_only'][
                'snapshots'] = os.environ.get('VSPHERE_COLLECT_SNAPSHOTS',
                                              True)

    def render_GET(self, request):
        """ handles get requests for metrics, health, and everything else """
        path = request.path.decode()
        request.setHeader("Content-Type", "text/plain; charset=UTF-8")
        if path == '/metrics':
            deferred_request = deferLater(reactor, 0, lambda: request)
            deferred_request.addCallback(self.generate_latest_metrics)
            deferred_request.addErrback(self.errback, request)
            return NOT_DONE_YET
        elif path == '/healthz':
            request.setResponseCode(200)
            log("Service is UP")
            return 'Server is UP'.encode()
        else:
            log("Uri not found: " + request.uri)
            request.setResponseCode(404)
            return '404 Not Found'.encode()

    def errback(self, failure, request):
        """ handles failures from requests """
        failure.printTraceback()
        log(failure)
        request.processingFailed(
            failure
        )  # This will send a trace to the browser and close the request.
        return None

    def generate_latest_metrics(self, request):
        """ gets the latest metrics """
        section = request.args.get('section', ['default'])[0]
        if self.config[section].get('vsphere_host') and self.config[
                section].get('vsphere_host') != "None":
            vsphere_host = self.config[section].get('vsphere_host')
        elif request.args.get('target', [None])[0]:
            vsphere_host = request.args.get('target', [None])[0]
        elif request.args.get('vsphere_host', [None])[0]:
            vsphere_host = request.args.get('vsphere_host')[0]
        else:
            request.setResponseCode(500)
            log("No vsphere_host or target defined")
            request.write('No vsphere_host or target defined!\n')
            request.finish()

        output = []
        for metric in self.collect(vsphere_host, section):
            output.append('# HELP {0} {1}'.format(
                metric.name,
                metric.documentation.replace('\\', r'\\').replace('\n',
                                                                  r'\n')))
            output.append('\n# TYPE {0} {1}\n'.format(metric.name,
                                                      metric.type))
            for name, labels, value in metric.samples:
                if labels:
                    labelstr = '{{{0}}}'.format(','.join([
                        '{0}="{1}"'.format(
                            k,
                            v.replace('\\', r'\\').replace('\n',
                                                           r'\n').replace(
                                                               '"', r'\"'))
                        for k, v in sorted(labels.items())
                    ]))
                else:
                    labelstr = ''
                if isinstance(value, int):
                    value = float(value)
                if isinstance(value, long):  # noqa: F821
                    value = float(value)
                if isinstance(value, float):
                    output.append('{0}{1} {2}\n'.format(
                        name, labelstr, _floatToGoString(value)))
        if output != []:
            request.write(''.join(output).encode('utf-8'))
            request.finish()
        else:
            request.setResponseCode(500, message=('cannot connect to vmware'))
            request.finish()
            return

    def collect(self, vsphere_host, section='default'):
        """ collects metrics """
        if section not in self.config.keys():
            log("{} is not a valid section, using default".format(section))
            section = 'default'
        host_inventory = {}
        ds_inventory = {}
        metric_list = {}
        metric_list['vms'] = {
            'vmware_vm_power_state':
            GaugeMetricFamily(
                'vmware_vm_power_state',
                'VMWare VM Power state (On / Off)',
                labels=['vm_name', 'host_name', 'dc_name', 'cluster_name']),
            'vmware_vm_boot_timestamp_seconds':
            GaugeMetricFamily(
                'vmware_vm_boot_timestamp_seconds',
                'VMWare VM boot time in seconds',
                labels=['vm_name', 'host_name', 'dc_name', 'cluster_name']),
            'vmware_vm_num_cpu':
            GaugeMetricFamily(
                'vmware_vm_num_cpu',
                'VMWare Number of processors in the virtual machine',
                labels=['vm_name', 'host_name', 'dc_name', 'cluster_name']),
        }
        metric_list['vmguests'] = {
            'vmware_vm_guest_disk_free':
            GaugeMetricFamily('vmware_vm_guest_disk_free',
                              'Disk metric per partition',
                              labels=[
                                  'vm_name',
                                  'host_name',
                                  'dc_name',
                                  'cluster_name',
                                  'partition',
                              ]),
            'vmware_vm_guest_disk_capacity':
            GaugeMetricFamily('vmware_vm_guest_disk_capacity',
                              'Disk capacity metric per partition',
                              labels=[
                                  'vm_name',
                                  'host_name',
                                  'dc_name',
                                  'cluster_name',
                                  'partition',
                              ]),
        }
        metric_list['snapshots'] = {
            'vmware_vm_snapshots':
            GaugeMetricFamily(
                'vmware_vm_snapshots',
                'VMWare current number of existing snapshots',
                labels=['vm_name', 'host_name', 'dc_name', 'cluster_name']),
            'vmware_vm_snapshot_timestamp_seconds':
            GaugeMetricFamily('vmware_vm_snapshot_timestamp_seconds',
                              'VMWare Snapshot creation time in seconds',
                              labels=[
                                  'vm_name', 'host_name', 'dc_name',
                                  'cluster_name', 'vm_snapshot_name'
                              ]),
        }
        metric_list['datastores'] = {
            'vmware_datastore_capacity_size':
            GaugeMetricFamily('vmware_datastore_capacity_size',
                              'VMWare Datasore capacity in bytes',
                              labels=['ds_name', 'dc_name', 'ds_cluster']),
            'vmware_datastore_freespace_size':
            GaugeMetricFamily('vmware_datastore_freespace_size',
                              'VMWare Datastore freespace in bytes',
                              labels=['ds_name', 'dc_name', 'ds_cluster']),
            'vmware_datastore_uncommited_size':
            GaugeMetricFamily('vmware_datastore_uncommited_size',
                              'VMWare Datastore uncommitted in bytes',
                              labels=['ds_name', 'dc_name', 'ds_cluster']),
            'vmware_datastore_provisoned_size':
            GaugeMetricFamily('vmware_datastore_provisoned_size',
                              'VMWare Datastore provisoned in bytes',
                              labels=['ds_name', 'dc_name', 'ds_cluster']),
            'vmware_datastore_hosts':
            GaugeMetricFamily('vmware_datastore_hosts',
                              'VMWare Hosts number using this datastore',
                              labels=['ds_name', 'dc_name', 'ds_cluster']),
            'vmware_datastore_vms':
            GaugeMetricFamily('vmware_datastore_vms',
                              'VMWare Virtual Machines count per datastore',
                              labels=['ds_name', 'dc_name', 'ds_cluster']),
            'vmware_datastore_maintenance_mode':
            GaugeMetricFamily(
                'vmware_datastore_maintenance_mode',
                'VMWare datastore maintenance mode (normal / inMaintenance / enteringMaintenance)',
                labels=['ds_name', 'dc_name', 'ds_cluster', 'mode']),
            'vmware_datastore_type':
            GaugeMetricFamily(
                'vmware_datastore_type',
                'VMWare datastore type (VMFS, NetworkFileSystem, NetworkFileSystem41, CIFS, VFAT, VSAN, VFFS)',
                labels=['ds_name', 'dc_name', 'ds_cluster', 'ds_type']),
            'vmware_datastore_accessible':
            GaugeMetricFamily('vmware_datastore_accessible',
                              'VMWare datastore accessible (true / false)',
                              labels=['ds_name', 'dc_name', 'ds_cluster'])
        }
        metric_list['hosts'] = {
            'vmware_host_power_state':
            GaugeMetricFamily('vmware_host_power_state',
                              'VMWare Host Power state (On / Off)',
                              labels=['host_name', 'dc_name', 'cluster_name']),
            'vmware_host_connection_state':
            GaugeMetricFamily(
                'vmware_host_connection_state',
                'VMWare Host connection state (connected / disconnected / notResponding)',
                labels=['host_name', 'dc_name', 'cluster_name', 'state']),
            'vmware_host_maintenance_mode':
            GaugeMetricFamily('vmware_host_maintenance_mode',
                              'VMWare Host maintenance mode (true / false)',
                              labels=['host_name', 'dc_name', 'cluster_name']),
            'vmware_host_boot_timestamp_seconds':
            GaugeMetricFamily('vmware_host_boot_timestamp_seconds',
                              'VMWare Host boot time in seconds',
                              labels=['host_name', 'dc_name', 'cluster_name']),
            'vmware_host_cpu_usage':
            GaugeMetricFamily('vmware_host_cpu_usage',
                              'VMWare Host CPU usage in Mhz',
                              labels=['host_name', 'dc_name', 'cluster_name']),
            'vmware_host_cpu_max':
            GaugeMetricFamily('vmware_host_cpu_max',
                              'VMWare Host CPU max availability in Mhz',
                              labels=['host_name', 'dc_name', 'cluster_name']),
            'vmware_host_memory_usage':
            GaugeMetricFamily('vmware_host_memory_usage',
                              'VMWare Host Memory usage in Mbytes',
                              labels=['host_name', 'dc_name', 'cluster_name']),
            'vmware_host_memory_max':
            GaugeMetricFamily('vmware_host_memory_max',
                              'VMWare Host Memory Max availability in Mbytes',
                              labels=['host_name', 'dc_name', 'cluster_name']),
        }

        metrics = {}
        for key, value in self.config[section]['collect_only'].items():
            if value is True:
                metrics.update(metric_list[key])

        log("Start collecting metrics from {0}".format(vsphere_host))

        self.vmware_connection = self._vmware_connect(vsphere_host, section)
        if not self.vmware_connection:
            log("Cannot connect to vmware")
            return

        content = self.vmware_connection.RetrieveContent()

        # Generate inventory dict
        log("Starting inventory collection")
        host_inventory, ds_inventory = self._vmware_get_inventory(content)
        log("Finished inventory collection")

        # Collect VMs metrics
        if self.config[section]['collect_only']['vms'] is True:
            log("Starting VM performance metrics collection")
            counter_info = self._vmware_perf_metrics(content)
            self._vmware_get_vms(content, metrics, counter_info,
                                 host_inventory)
            log("Finished VM performance metrics collection")

        # Collect VMs metrics
        if self.config[section]['collect_only']['vmguests'] is True:
            log("Starting VM Guests metrics collection")
            self._vmware_get_vmguests(content, metrics, host_inventory)
            log("Finished VM Guests metrics collection")

        # Collect Snapshots (count and age)
        if self.config[section]['collect_only']['snapshots'] is True:
            log("Starting VM snapshot metric collection")
            vm_snap_counts, vm_snap_ages = self._vmware_get_snapshots(
                content, host_inventory)
            for v in vm_snap_counts:
                metrics['vmware_vm_snapshots'].add_metric([
                    v['vm_name'], v['vm_host_name'], v['vm_dc_name'],
                    v['vm_cluster_name']
                ], v['vm_snapshot_count'])
            for vm_snap_age in vm_snap_ages:
                for v in vm_snap_age:
                    metrics['vmware_vm_snapshot_timestamp_seconds'].add_metric(
                        [
                            v['vm_name'], v['vm_host_name'], v['vm_dc_name'],
                            v['vm_cluster_name'], v['vm_snapshot_name']
                        ], v['vm_snapshot_timestamp_seconds'])
            log("Finished VM snapshot metric collection")

        # Collect Datastore metrics
        if self.config[section]['collect_only']['datastores'] is True:
            log("Starting datastore metrics collection")
            self._vmware_get_datastores(content, metrics, ds_inventory)
            log("Finished datastore metrics collection")

        # Collect Hosts metrics
        if self.config[section]['collect_only']['hosts'] is True:
            log("Starting host metrics collection")
            self._vmware_get_hosts(content, metrics, host_inventory)
            log("Finished host metrics collection")

        log("Finished collecting metrics from {0}".format(vsphere_host))
        self.threader.join()
        self._vmware_disconnect()

        for _key, metric in metrics.items():
            yield metric

    def _to_epoch(self, my_date):
        """ convert to epoch time """
        return (my_date -
                datetime(1970, 1, 1, tzinfo=pytz.utc)).total_seconds()

    def _vmware_get_obj(self, content, vimtype, name=None):
        """
         Get the vsphere object associated with a given text name
        """
        obj = None
        container = content.viewManager.CreateContainerView(
            content.rootFolder, vimtype, True)
        if name:
            for view in container.view:
                if view.name == name:
                    obj = view
                    return [obj]
        else:
            return container.view

    def _vmware_connect(self, vsphere_host, section):
        """
        Connect to Vcenter and get connection
        """
        vsphere_user = self.config[section].get('vsphere_user')
        vsphere_password = self.config[section].get('vsphere_password')

        context = None
        if self.config[section].get('ignore_ssl') and \
                hasattr(ssl, "_create_unverified_context"):
            context = ssl._create_unverified_context()

        try:
            vmware_connect = connect.SmartConnect(host=vsphere_host,
                                                  user=vsphere_user,
                                                  pwd=vsphere_password,
                                                  sslContext=context)
            return vmware_connect

        except vmodl.MethodFault as error:
            log("Caught vmodl fault: " + error.msg)
            return None

    def _vmware_disconnect(self):
        """
        Disconnect from Vcenter
        """
        connect.Disconnect(self.vmware_connection)

    def _vmware_perf_metrics(self, content):
        """
        create a mapping from performance stats to their counterIDs
        counter_info: [performance stat => counterId]
        performance stat example: cpu.usagemhz.LATEST
        """
        counter_info = {}
        for counter in content.perfManager.perfCounter:
            prefix = counter.groupInfo.key
            counter_full = "{}.{}.{}".format(prefix, counter.nameInfo.key,
                                             counter.rollupType)
            counter_info[counter_full] = counter.key
        return counter_info

    def _vmware_full_snapshots_list(self, snapshots):
        """
        Get snapshots from a VM list, recursively
        """
        snapshot_data = []
        for snapshot in snapshots:
            snap_timestamp = self._to_epoch(snapshot.createTime)
            snap_info = {
                'vm_snapshot_name': snapshot.name,
                'vm_snapshot_timestamp_seconds': snap_timestamp
            }
            snapshot_data.append(snap_info)
            snapshot_data = snapshot_data + self._vmware_full_snapshots_list(
                snapshot.childSnapshotList)
        return snapshot_data

    def _vmware_get_snapshot_details(self, snapshots_count_table,
                                     snapshots_age_table, virtual_machine,
                                     inventory):
        """
        Gathers snapshot details
        """
        snapshot_paths = self._vmware_full_snapshots_list(
            virtual_machine.snapshot.rootSnapshotList)

        _, host_name, dc_name, cluster_name = self._vmware_vm_metadata(
            inventory, virtual_machine)

        for snapshot_path in snapshot_paths:
            snapshot_path['vm_name'] = virtual_machine.name
            snapshot_path['vm_host_name'] = host_name
            snapshot_path['vm_dc_name'] = dc_name
            snapshot_path['vm_cluster_name'] = cluster_name

        # Add Snapshot count per VM
        snapshot_count = len(snapshot_paths)
        snapshot_count_info = {
            'vm_name': virtual_machine.name,
            'vm_host_name': host_name,
            'vm_dc_name': dc_name,
            'vm_cluster_name': cluster_name,
            'vm_snapshot_count': snapshot_count
        }
        snapshots_count_table.append(snapshot_count_info)
        snapshots_age_table.append(snapshot_paths)

    def _vmware_get_snapshots(self, content, inventory):
        """
        Get snapshots from all VM
        """
        snapshots_count_table = []
        snapshots_age_table = []
        virtual_machines = self._vmware_get_obj(content, [vim.VirtualMachine])
        for virtual_machine in virtual_machines:
            if not virtual_machine or virtual_machine.snapshot is None:
                continue
            else:
                self.threader.thread_it(self._vmware_get_snapshot_details, [
                    snapshots_count_table, snapshots_age_table,
                    virtual_machine, inventory
                ])
        return snapshots_count_table, snapshots_age_table

    def _vmware_get_datastores(self, content, ds_metrics, inventory):
        """
        Get Datastore information
        """
        datastores = self._vmware_get_obj(content, [vim.Datastore])
        for datastore in datastores:
            # ds.RefreshDatastoreStorageInfo()
            summary = datastore.summary
            ds_name = summary.name
            dc_name = inventory[ds_name]['dc']
            ds_cluster = inventory[ds_name]['ds_cluster']

            self.threader.thread_it(
                self._vmware_get_datastore_metrics,
                [datastore, dc_name, ds_cluster, ds_metrics, summary])

    def _vmware_get_datastore_metrics(self, datastore, dc_name, ds_cluster,
                                      ds_metrics, summary):
        """
        Get datastore metrics
        """
        ds_capacity = float(summary.capacity)
        ds_freespace = float(summary.freeSpace)
        ds_uncommitted = float(
            summary.uncommitted) if summary.uncommitted else 0
        ds_provisioned = ds_capacity - ds_freespace + ds_uncommitted

        ds_metrics['vmware_datastore_capacity_size'].add_metric(
            [summary.name, dc_name, ds_cluster], ds_capacity)
        ds_metrics['vmware_datastore_freespace_size'].add_metric(
            [summary.name, dc_name, ds_cluster], ds_freespace)
        ds_metrics['vmware_datastore_uncommited_size'].add_metric(
            [summary.name, dc_name, ds_cluster], ds_uncommitted)
        ds_metrics['vmware_datastore_provisoned_size'].add_metric(
            [summary.name, dc_name, ds_cluster], ds_provisioned)
        ds_metrics['vmware_datastore_hosts'].add_metric(
            [summary.name, dc_name, ds_cluster], len(datastore.host))
        ds_metrics['vmware_datastore_vms'].add_metric(
            [summary.name, dc_name, ds_cluster], len(datastore.vm))
        ds_metrics['vmware_datastore_maintenance_mode'].add_metric(
            [summary.name, dc_name, ds_cluster, summary.maintenanceMode], 1)
        ds_metrics['vmware_datastore_type'].add_metric(
            [summary.name, dc_name, ds_cluster, summary.type], 1)
        ds_metrics['vmware_datastore_accessible'].add_metric(
            [summary.name, dc_name, ds_cluster], summary.accessible * 1)

    def _vmware_get_vms(self, content, vm_metrics, counter_info, inventory):
        """
        Get VM information
        """

        # List of performance counter we want
        perf_list = [
            'cpu.ready.summation',
            'cpu.usage.average',
            'cpu.usagemhz.average',
            'disk.usage.average',
            'disk.read.average',
            'disk.write.average',
            'mem.usage.average',
            'net.received.average',
            'net.transmitted.average',
        ]

        # Prepare gauges
        for p in perf_list:
            p_metric = 'vmware_vm_' + p.replace('.', '_')
            vm_metrics[p_metric] = GaugeMetricFamily(
                p_metric,
                p_metric,
                labels=['vm_name', 'host_name', 'dc_name', 'cluster_name'])

        virtual_machines = self._vmware_get_obj(content, [vim.VirtualMachine])
        log("Total Virtual Machines: {0}".format(len(virtual_machines)))
        for virtual_machine in virtual_machines:
            self.threader.thread_it(self._vmware_get_vm_perf_metrics, [
                content, counter_info, perf_list, virtual_machine, vm_metrics,
                inventory
            ])

    def _vmware_get_vm_perf_metrics(self, content, counter_info, perf_list,
                                    virtual_machine, vm_metrics, inventory):
        """
        Loops over metrics in perf_list on vm
        """
        # DEBUG ME: log("Starting VM: " + vm.name)

        summary = virtual_machine.summary

        vm_power_state = 1 if summary.runtime.powerState == 'poweredOn' else 0
        vm_num_cpu = summary.config.numCpu

        vm_name, vm_host_name, vm_dc_name, vm_cluster_name = self._vmware_vm_metadata(
            inventory, virtual_machine, summary)
        vm_metadata = [vm_name, vm_host_name, vm_dc_name, vm_cluster_name]

        vm_metrics['vmware_vm_power_state'].add_metric(vm_metadata,
                                                       vm_power_state)
        vm_metrics['vmware_vm_num_cpu'].add_metric(vm_metadata, vm_num_cpu)

        # Get metrics for poweredOn vms only
        if vm_power_state:
            if summary.runtime.bootTime:
                vm_metrics['vmware_vm_boot_timestamp_seconds'].add_metric(
                    vm_metadata, self._to_epoch(summary.runtime.bootTime))

            for p in perf_list:
                self.threader.thread_it(self._vmware_get_vm_perf_metric, [
                    content, counter_info, p, virtual_machine, vm_metrics,
                    vm_metadata
                ])

        # Debug Me. log("Finished VM: " + vm.name)

    def _vmware_get_vm_perf_metric(self, content, counter_info, perf_metric,
                                   virtual_machine, vm_metrics, vm_metadata):
        """
        Get vm perf metric
        """

        perf_metric_name = 'vmware_vm_' + perf_metric.replace('.', '_')
        counter_key = counter_info[perf_metric]
        metric_id = vim.PerformanceManager.MetricId(counterId=counter_key,
                                                    instance='')
        spec = vim.PerformanceManager.QuerySpec(maxSample=1,
                                                entity=virtual_machine,
                                                metricId=[metric_id],
                                                intervalId=20)
        result = content.perfManager.QueryStats(querySpec=[spec])
        # DEBUG ME: log("{0} {1}: {2}".format(vm.name, p, float(sum(result[0].value[0].value))))
        try:
            vm_metrics[perf_metric_name].add_metric(
                vm_metadata, float(sum(result[0].value[0].value)))
        except:  # noqa: E722
            log("Error, cannot get vm metric {0} for {1}".format(
                perf_metric_name, vm_metadata))

    def _vmware_get_vmguests(self, content, vmguest_metrics, inventory):
        """
        Get VM Guest information
        """

        virtual_machines = self._vmware_get_obj(content, [vim.VirtualMachine])
        log("Total Virtual Machines: {0}".format(len(virtual_machines)))
        for virtual_machine in virtual_machines:
            self.threader.thread_it(
                self._vmware_get_vmguests_metrics,
                [content, virtual_machine, vmguest_metrics, inventory])

    def _vmware_get_vmguests_metrics(self, content, virtual_machine,
                                     vmguest_metrics, inventory):
        """
        Get VM Guest Metrics
        """

        summary = virtual_machine.summary

        vm_name, vm_host_name, vm_dc_name, vm_cluster_name = self._vmware_vm_metadata(
            inventory, virtual_machine, summary)

        # gather disk metrics
        if len(virtual_machine.guest.disk) > 0:
            for disk in virtual_machine.guest.disk:
                vmguest_metrics['vmware_vm_guest_disk_free'].add_metric([
                    vm_name, vm_host_name, vm_dc_name, vm_cluster_name,
                    disk.diskPath
                ], disk.freeSpace)
                vmguest_metrics['vmware_vm_guest_disk_capacity'].add_metric([
                    vm_name, vm_host_name, vm_dc_name, vm_cluster_name,
                    disk.diskPath
                ], disk.capacity)

    def _vmware_get_hosts(self, content, host_metrics, inventory):
        """
        Get Host (ESXi) information
        """
        hosts = self._vmware_get_obj(content, [vim.HostSystem])
        for host in hosts:
            summary = host.summary
            host_name, host_dc_name, host_cluster_name = self._vmware_host_metadata(
                inventory, host)
            host_metadata = [host_name, host_dc_name, host_cluster_name]

            # Power state
            power_state = 1 if summary.runtime.powerState == 'poweredOn' else 0
            host_metrics['vmware_host_power_state'].add_metric(
                host_metadata, power_state)

            if power_state:
                self.threader.thread_it(self._vmware_get_host_metrics, [
                    host_name, host_dc_name, host_cluster_name, host_metrics,
                    summary
                ])

    def _vmware_get_host_metrics(self, host_name, host_dc_name,
                                 host_cluster_name, host_metrics, summary):
        """
        Get Host Metrics
        """

        labels = [host_name, host_dc_name, host_cluster_name]

        if summary.runtime.bootTime:
            # Host uptime
            host_metrics['vmware_host_boot_timestamp_seconds'].add_metric(
                labels, self._to_epoch(summary.runtime.bootTime))

            # Host connection state (connected, disconnected, notResponding)
            metric_labels = labels
            metric_labels.append(summary.runtime.connectionState)
            host_metrics['vmware_host_connection_state'].add_metric(
                metric_labels, 1)

            # Host in maintenance mode?
            host_metrics['vmware_host_maintenance_mode'].add_metric(
                labels, summary.runtime.inMaintenanceMode * 1)

        # CPU Usage (in Mhz)
        host_metrics['vmware_host_cpu_usage'].add_metric(
            labels, summary.quickStats.overallCpuUsage)
        cpu_core_num = summary.hardware.numCpuCores
        cpu_total = summary.hardware.cpuMhz * cpu_core_num
        host_metrics['vmware_host_cpu_max'].add_metric(labels, cpu_total)

        # Memory Usage (in MB)
        host_metrics['vmware_host_memory_usage'].add_metric(
            labels, summary.quickStats.overallMemoryUsage)
        host_metrics['vmware_host_memory_max'].add_metric(
            labels,
            float(summary.hardware.memorySize) / 1024 / 1024)

    def _vmware_get_inventory(self, content):
        """
        Get host and datastore inventory (datacenter, cluster) information
        """
        host_inventory = {}
        ds_inventory = {}

        children = content.rootFolder.childEntity
        for child in children:  # Iterate though DataCenters
            dc = child
            hostFolders = dc.hostFolder.childEntity
            for folder in hostFolders:  # Iterate through host folders
                if isinstance(
                        folder,
                        vim.ClusterComputeResource):  # Folder is a Cluster
                    hosts = folder.host
                    for host in hosts:  # Iterate through Hosts in the Cluster
                        host_name = host.summary.config.name
                        host_inventory[host_name] = {}
                        host_inventory[host_name]['dc'] = dc.name
                        host_inventory[host_name]['cluster'] = folder.name
                else:  # Unclustered host
                    host_name = folder.name
                    host_inventory[host_name] = {}
                    host_inventory[host_name]['dc'] = dc.name
                    host_inventory[host_name]['cluster'] = ''

            dsFolders = dc.datastoreFolder.childEntity
            for folder in dsFolders:  # Iterate through datastore folders
                if isinstance(folder, vim.Datastore):  # Unclustered datastore
                    ds_inventory[folder.name] = {}
                    ds_inventory[folder.name]['dc'] = dc.name
                    ds_inventory[folder.name]['ds_cluster'] = ''
                else:  # Folder is a Datastore Cluster
                    datastores = folder.childEntity
                    for datastore in datastores:
                        ds_inventory[datastore.name] = {}
                        ds_inventory[datastore.name]['dc'] = dc.name
                        ds_inventory[
                            datastore.name]['ds_cluster'] = folder.name

        return host_inventory, ds_inventory

    def _vmware_vm_metadata(self, inventory, vm, summary=None):
        """
        Get VM metadata from inventory
        """
        if summary is None:
            summary = vm.summary
        vm_name = vm.name
        vm_host = summary.runtime.host
        vm_host_name = vm_host.name
        vm_dc_name = inventory[vm_host_name]['dc']
        vm_cluster_name = inventory[vm_host_name]['cluster']

        return vm_name, vm_host_name, vm_dc_name, vm_cluster_name

    def _vmware_host_metadata(self, inventory, host):
        """
        Get Host metadata from inventory
        """
        host_name = host.name
        host_dc_name = inventory[host_name]['dc']
        host_cluster_name = inventory[host_name]['cluster']

        return host_name, host_dc_name, host_cluster_name
예제 #6
0
class VMWareMetricsResource(Resource):
    """
    VMWare twisted ``Resource`` handling multi endpoints
    Only handle /metrics path
    """
    isLeaf = True

    def __init__(self, args):
        try:
            self.config = YamlConfig(args.config_file)
            if 'default' not in self.config.keys():
                print("Error, you must have a default section in config file")
                exit(1)
        except:
            raise SystemExit("Error, cannot read configuration file")

    def render_GET(self, request):
        path = request.path.decode()
        request.setHeader("Content-Type", "text/plain; charset=UTF-8")
        if path == '/metrics':
            if not request.args.get('target', [None])[0]:
                request.setResponseCode(404)
                return 'No target defined\r\n'.encode()
            d = deferLater(reactor, 0, lambda: request)
            d.addCallback(self.generate_latest_target)
            d.addErrback(self.errback, request)
            return NOT_DONE_YET
        else:
            request.setResponseCode(404)
            return '404 Not Found'.encode()

    def errback(self, failure, request):
        failure.printTraceback()
        request.processingFailed(
            failure
        )  # This will send a trace to the browser and close the request.
        return None

    def generate_latest_target(self, request):
        target = request.args.get('target', [None])[0]
        section = request.args.get('section', ['default'])[0]
        output = []
        for metric in self.collect(target, section):
            output.append('# HELP {0} {1}'.format(
                metric.name,
                metric.documentation.replace('\\', r'\\').replace('\n',
                                                                  r'\n')))
            output.append('\n# TYPE {0} {1}\n'.format(metric.name,
                                                      metric.type))
            for name, labels, value in metric.samples:
                if labels:
                    labelstr = '{{{0}}}'.format(','.join([
                        '{0}="{1}"'.format(
                            k,
                            v.replace('\\', r'\\').replace('\n',
                                                           r'\n').replace(
                                                               '"', r'\"'))
                        for k, v in sorted(labels.items())
                    ]))
                else:
                    labelstr = ''
                output.append('{0}{1} {2}\n'.format(name, labelstr,
                                                    _floatToGoString(value)))
        if output != []:
            request.write(''.join(output).encode('utf-8'))
            request.finish()
        else:
            request.setResponseCode(500, message=('cannot connect to vmware'))
            request.finish()
            return

    def collect(self, target=None, section='default'):
        if section not in self.config.keys():
            print("{} is not a valid section, using default".format(section))
            section = 'default'
        metrics = {
            'vmware_vm_power_state':
            GaugeMetricFamily('vmware_vm_power_state',
                              'VMWare VM Power state (On / Off)',
                              labels=['vm_name']),
            'vmware_vm_boot_timestamp_seconds':
            GaugeMetricFamily('vmware_vm_boot_timestamp_seconds',
                              'VMWare VM boot time in seconds',
                              labels=['vm_name']),
            'vmware_vm_snapshots':
            GaugeMetricFamily('vmware_vm_snapshots',
                              'VMWare current number of existing snapshots',
                              labels=['vm_name']),
            'vmware_vm_snapshot_timestamp_seconds':
            GaugeMetricFamily('vmware_vm_snapshot_timestamp_seconds',
                              'VMWare Snapshot creation time in seconds',
                              labels=['vm_name', 'vm_snapshot_name']),
            'vmware_datastore_capacity_size':
            GaugeMetricFamily('vmware_datastore_capacity_size',
                              'VMWare Datasore capacity in bytes',
                              labels=['ds_name']),
            'vmware_datastore_freespace_size':
            GaugeMetricFamily('vmware_datastore_freespace_size',
                              'VMWare Datastore freespace in bytes',
                              labels=['ds_name']),
            'vmware_datastore_uncommited_size':
            GaugeMetricFamily('vmware_datastore_uncommited_size',
                              'VMWare Datastore uncommitted in bytes',
                              labels=['ds_name']),
            'vmware_datastore_provisoned_size':
            GaugeMetricFamily('vmware_datastore_provisoned_size',
                              'VMWare Datastore provisoned in bytes',
                              labels=['ds_name']),
            'vmware_datastore_hosts':
            GaugeMetricFamily('vmware_datastore_hosts',
                              'VMWare Hosts number using this datastore',
                              labels=['ds_name']),
            'vmware_datastore_vms':
            GaugeMetricFamily(
                'vmware_datastore_vms',
                'VMWare Virtual Machines number using this datastore',
                labels=['ds_name']),
            'vmware_host_power_state':
            GaugeMetricFamily('vmware_host_power_state',
                              'VMWare Host Power state (On / Off)',
                              labels=['host_name']),
            'vmware_host_boot_timestamp_seconds':
            GaugeMetricFamily('vmware_host_boot_timestamp_seconds',
                              'VMWare Host boot time in seconds',
                              labels=['host_name']),
            'vmware_host_cpu_usage':
            GaugeMetricFamily('vmware_host_cpu_usage',
                              'VMWare Host CPU usage in Mhz',
                              labels=['host_name']),
            'vmware_host_cpu_max':
            GaugeMetricFamily('vmware_host_cpu_max',
                              'VMWare Host CPU max availability in Mhz',
                              labels=['host_name']),
            'vmware_host_memory_usage':
            GaugeMetricFamily('vmware_host_memory_usage',
                              'VMWare Host Memory usage in Mbytes',
                              labels=['host_name']),
            'vmware_host_memory_max':
            GaugeMetricFamily('vmware_host_memory_max',
                              'VMWare Host Memory Max availability in Mbytes',
                              labels=['host_name']),
        }

        print("[{0}] Start collecting vcenter metrics for {1}".format(
            datetime.utcnow().replace(tzinfo=pytz.utc), target))

        self.si = self._vmware_connect(target, section)
        if not self.si:
            print("Error, cannot connect to vmware")
            return

        content = self.si.RetrieveContent()

        # Get performance metrics counter information
        counter_info = self._vmware_perf_metrics(content)

        # Fill Snapshots (count and age)
        vm_counts, vm_ages = self._vmware_get_snapshots(content)
        for v in vm_counts:
            metrics['vmware_vm_snapshots'].add_metric([v['vm_name']],
                                                      v['snapshot_count'])
        for vm_age in vm_ages:
            for v in vm_age:
                metrics['vmware_vm_snapshot_timestamp_seconds'].add_metric(
                    [v['vm_name'], v['vm_snapshot_name']],
                    v['vm_snapshot_timestamp_seconds'])

        # Fill Datastore
        self._vmware_get_datastores(content, metrics)

        # Fill VM Informations
        self._vmware_get_vms(content, metrics, counter_info)

        # Fill Hosts Informations
        self._vmware_get_hosts(content, metrics)

        print("[{0}] Stop collecting vcenter metrics for {1}".format(
            datetime.utcnow().replace(tzinfo=pytz.utc), target))

        self._vmware_disconnect()

        for metricname, metric in metrics.items():
            yield metric

    def _to_unix_timestamp(self, my_date):
        return ((my_date -
                 datetime(1970, 1, 1, tzinfo=pytz.utc)).total_seconds())

    def _vmware_get_obj(self, content, vimtype, name=None):
        """
         Get the vsphere object associated with a given text name
        """
        obj = None
        container = content.viewManager.CreateContainerView(
            content.rootFolder, vimtype, True)
        if name:
            for c in container.view:
                if c.name == name:
                    obj = c
                    return [obj]
        else:
            return container.view

    def _vmware_connect(self, target, section):
        """
        Connect to Vcenter and get connection
        """

        context = None
        if self.config[section]['ignore_ssl'] and \
                hasattr(ssl, "_create_unverified_context"):
            context = ssl._create_unverified_context()

        try:
            si = connect.Connect(target,
                                 443,
                                 self.config[section]['vmware_user'],
                                 self.config[section]['vmware_password'],
                                 sslContext=context)

            return si

        except vmodl.MethodFault as error:
            print("Caught vmodl fault: " + error.msg)
            return None

    def _vmware_disconnect(self):
        """
        Disconnect from Vcenter
        """
        connect.Disconnect(self.si)

    def _vmware_perf_metrics(self, content):
        # create a mapping from performance stats to their counterIDs
        # counter_info: [performance stat => counterId]
        # performance stat example: cpu.usagemhz.LATEST
        counter_info = {}
        for c in content.perfManager.perfCounter:
            prefix = c.groupInfo.key
            counter_full = "{}.{}.{}".format(c.groupInfo.key, c.nameInfo.key,
                                             c.rollupType)
            counter_info[counter_full] = c.key
        return counter_info

    def _vmware_list_snapshots_recursively(self, snapshots):
        """
        Get snapshots from a VM list, recursively
        """
        snapshot_data = []
        for snapshot in snapshots:
            snap_timestamp = self._to_unix_timestamp(snapshot.createTime)
            snap_info = {
                'vm_snapshot_name': snapshot.name,
                'vm_snapshot_timestamp_seconds': snap_timestamp
            }
            snapshot_data.append(snap_info)
            snapshot_data = snapshot_data + self._vmware_list_snapshots_recursively(
                snapshot.childSnapshotList)
        return snapshot_data

    def _vmware_get_snapshots(self, content):
        """
        Get snapshots from all VM
        """
        snapshots_count_table = []
        snapshots_age_table = []
        for vm in self._vmware_get_obj(content, [vim.VirtualMachine]):

            if not vm or vm.snapshot is None:
                continue

            else:
                snapshot_paths = self._vmware_list_snapshots_recursively(
                    vm.snapshot.rootSnapshotList)
                for sn in snapshot_paths:
                    sn['vm_name'] = vm.name
                # Add Snapshot count per VM
                snapshot_count = len(snapshot_paths)
                snapshot_count_info = {
                    'vm_name': vm.name,
                    'snapshot_count': snapshot_count
                }
                snapshots_count_table.append(snapshot_count_info)
            snapshots_age_table.append(snapshot_paths)
        return snapshots_count_table, snapshots_age_table

    def _vmware_get_datastores(self, content, ds_metrics):
        """
        Get Datastore information
        """
        for ds in self._vmware_get_obj(content, [vim.Datastore]):
            #ds.RefreshDatastoreStorageInfo()
            summary = ds.summary
            ds_capacity = summary.capacity
            ds_freespace = summary.freeSpace
            ds_uncommitted = summary.uncommitted if summary.uncommitted else 0
            ds_provisioned = ds_capacity - ds_freespace + ds_uncommitted

            ds_metrics['vmware_datastore_capacity_size'].add_metric(
                [summary.name], ds_capacity)
            ds_metrics['vmware_datastore_freespace_size'].add_metric(
                [summary.name], ds_freespace)
            ds_metrics['vmware_datastore_uncommited_size'].add_metric(
                [summary.name], ds_uncommitted)
            ds_metrics['vmware_datastore_provisoned_size'].add_metric(
                [summary.name], ds_provisioned)
            ds_metrics['vmware_datastore_hosts'].add_metric([summary.name],
                                                            len(ds.host))
            ds_metrics['vmware_datastore_vms'].add_metric([summary.name],
                                                          len(ds.vm))

    def _vmware_get_vms(self, content, vm_metrics, counter_info):
        """
        Get VM information
        """

        # List of performance counter we want
        perf_list = [
            'cpu.ready.summation',
            'cpu.usage.average',
            'cpu.usagemhz.average',
            'disk.usage.average',
            'disk.read.average',
            'disk.write.average',
            'mem.usage.average',
            'net.received.average',
            'net.transmitted.average',
        ]

        # Prepare gauges
        for p in perf_list:
            p_metric = 'vmware_vm_' + p.replace('.', '_')
            vm_metrics[p_metric] = GaugeMetricFamily(p_metric,
                                                     p_metric,
                                                     labels=['vm_name'])

        for vm in self._vmware_get_obj(content, [vim.VirtualMachine]):
            summary = vm.summary

            power_state = 1 if summary.runtime.powerState == 'poweredOn' else 0
            vm_metrics['vmware_vm_power_state'].add_metric([vm.name],
                                                           power_state)

            # Get metrics for poweredOn vms only
            if power_state:
                if summary.runtime.bootTime:
                    vm_metrics['vmware_vm_boot_timestamp_seconds'].add_metric(
                        [vm.name],
                        self._to_unix_timestamp(summary.runtime.bootTime))

                for p in perf_list:
                    p_metric = 'vmware_vm_' + p.replace('.', '_')
                    counter_key = counter_info[p]
                    metric_id = vim.PerformanceManager.MetricId(
                        counterId=counter_key, instance='')
                    spec = vim.PerformanceManager.QuerySpec(
                        maxSample=1,
                        entity=vm,
                        metricId=[metric_id],
                        intervalId=20)
                    result = content.perfManager.QueryStats(querySpec=[spec])
                    try:
                        vm_metrics[p_metric].add_metric(
                            [vm.name], float(sum(result[0].value[0].value)))
                    except:
                        print(
                            "Error, cannot get vm metrics {0} for {1}".format(
                                p_metric, vm.name))
                        pass

    def _vmware_get_hosts(self, content, host_metrics):
        """
        Get Host (ESXi) information
        """
        for host in self._vmware_get_obj(content, [vim.HostSystem]):
            summary = host.summary

            # Power state
            power_state = 1 if summary.runtime.powerState == 'poweredOn' else 0
            host_metrics['vmware_host_power_state'].add_metric([host.name],
                                                               power_state)

            if power_state:
                # Uptime
                if summary.runtime.bootTime:
                    host_metrics[
                        'vmware_host_boot_timestamp_seconds'].add_metric(
                            [host.name],
                            self._to_unix_timestamp(summary.runtime.bootTime))

                # CPU Usage (in Mhz)
                host_metrics['vmware_host_cpu_usage'].add_metric(
                    [host.name], summary.quickStats.overallCpuUsage)
                cpu_core_num = summary.hardware.numCpuCores
                cpu_total = summary.hardware.cpuMhz * cpu_core_num
                host_metrics['vmware_host_cpu_max'].add_metric([host.name],
                                                               cpu_total)

                # Memory Usage (in Mhz)
                host_metrics['vmware_host_memory_usage'].add_metric(
                    [host.name], summary.quickStats.overallMemoryUsage)
                host_metrics['vmware_host_memory_max'].add_metric(
                    [host.name],
                    float(summary.hardware.memorySize) / 1024 / 1024)
예제 #7
0
 def __init__(self, args):
     try:
         self.config = YamlConfig(args.config_file)
     except Exception as e:
         raise SystemExit(f'ERROR - {e}')
예제 #8
0
    def configure(self, args):
        if args.config_file:
            try:
                self.config = YamlConfig(args.config_file)
                if 'default' not in self.config.keys():
                    log("Error, you must have a default section in config file (for now)"
                        )
                    exit(1)
                return
            except Exception as exception:
                raise SystemExit(
                    "Error while reading configuration file: {0}".format(
                        exception.message))

        self.config = {
            'default': {
                'vsphere_host': os.environ.get('VSPHERE_HOST'),
                'vsphere_user': os.environ.get('VSPHERE_USER'),
                'vsphere_password': os.environ.get('VSPHERE_PASSWORD'),
                'ignore_ssl': os.environ.get('VSPHERE_IGNORE_SSL', False),
                'collect_only': {
                    'vms':
                    os.environ.get('VSPHERE_COLLECT_VMS', True),
                    'vmguests':
                    os.environ.get('VSPHERE_COLLECT_VMGUESTS', True),
                    'datastores':
                    os.environ.get('VSPHERE_COLLECT_DATASTORES', True),
                    'hosts':
                    os.environ.get('VSPHERE_COLLECT_HOSTS', True),
                    'snapshots':
                    os.environ.get('VSPHERE_COLLECT_SNAPSHOTS', True),
                }
            }
        }

        for key in os.environ.keys():
            if key == 'VSPHERE_USER':
                continue
            if not key.startswith('VSPHERE_') or not key.endswith('_USER'):
                continue

            section = key.split('_', 1)[1].rsplit('_', 1)[0]

            self.config[section.lower()] = {
                'vsphere_host':
                os.environ.get('VSPHERE_{}_HOST'.format(section)),
                'vsphere_user':
                os.environ.get('VSPHERE_{}_USER'.format(section)),
                'vsphere_password':
                os.environ.get('VSPHERE_{}_PASSWORD'.format(section)),
                'ignore_ssl':
                os.environ.get('VSPHERE_{}_IGNORE_SSL'.format(section), False),
                'collect_only': {
                    'vms':
                    os.environ.get('VSPHERE_{}_COLLECT_VMS'.format(section),
                                   True),
                    'vmguests':
                    os.environ.get(
                        'VSPHERE_{}_COLLECT_VMGUESTS'.format(section), True),
                    'datastores':
                    os.environ.get(
                        'VSPHERE_{}_COLLECT_DATASTORES'.format(section), True),
                    'hosts':
                    os.environ.get('VSPHERE_{}_COLLECT_HOSTS'.format(section),
                                   True),
                    'snapshots':
                    os.environ.get(
                        'VSPHERE_{}_COLLECT_SNAPSHOTS'.format(section), True),
                }
            }
예제 #9
0
class VMWareMetricsResource(Resource):

    isLeaf = True

    def __init__(self, args):
        """
        Init Metric Resource
        """
        Resource.__init__(self)
        self.configure(args)

    def configure(self, args):
        if args.config_file:
            try:
                self.config = YamlConfig(args.config_file)
                if 'default' not in self.config.keys():
                    log("Error, you must have a default section in config file (for now)"
                        )
                    exit(1)
                return
            except Exception as exception:
                raise SystemExit(
                    "Error while reading configuration file: {0}".format(
                        exception.message))

        self.config = {
            'default': {
                'vsphere_host': os.environ.get('VSPHERE_HOST'),
                'vsphere_user': os.environ.get('VSPHERE_USER'),
                'vsphere_password': os.environ.get('VSPHERE_PASSWORD'),
                'ignore_ssl': os.environ.get('VSPHERE_IGNORE_SSL', False),
                'collect_only': {
                    'vms':
                    os.environ.get('VSPHERE_COLLECT_VMS', True),
                    'vmguests':
                    os.environ.get('VSPHERE_COLLECT_VMGUESTS', True),
                    'datastores':
                    os.environ.get('VSPHERE_COLLECT_DATASTORES', True),
                    'hosts':
                    os.environ.get('VSPHERE_COLLECT_HOSTS', True),
                    'snapshots':
                    os.environ.get('VSPHERE_COLLECT_SNAPSHOTS', True),
                }
            }
        }

        for key in os.environ.keys():
            if key == 'VSPHERE_USER':
                continue
            if not key.startswith('VSPHERE_') or not key.endswith('_USER'):
                continue

            section = key.split('_', 1)[1].rsplit('_', 1)[0]

            self.config[section.lower()] = {
                'vsphere_host':
                os.environ.get('VSPHERE_{}_HOST'.format(section)),
                'vsphere_user':
                os.environ.get('VSPHERE_{}_USER'.format(section)),
                'vsphere_password':
                os.environ.get('VSPHERE_{}_PASSWORD'.format(section)),
                'ignore_ssl':
                os.environ.get('VSPHERE_{}_IGNORE_SSL'.format(section), False),
                'collect_only': {
                    'vms':
                    os.environ.get('VSPHERE_{}_COLLECT_VMS'.format(section),
                                   True),
                    'vmguests':
                    os.environ.get(
                        'VSPHERE_{}_COLLECT_VMGUESTS'.format(section), True),
                    'datastores':
                    os.environ.get(
                        'VSPHERE_{}_COLLECT_DATASTORES'.format(section), True),
                    'hosts':
                    os.environ.get('VSPHERE_{}_COLLECT_HOSTS'.format(section),
                                   True),
                    'snapshots':
                    os.environ.get(
                        'VSPHERE_{}_COLLECT_SNAPSHOTS'.format(section), True),
                }
            }

    def render_GET(self, request):
        """ handles get requests for metrics, health, and everything else """
        self._async_render_GET(request)
        return NOT_DONE_YET

    @defer.inlineCallbacks
    def _async_render_GET(self, request):
        try:
            yield self.generate_latest_metrics(request)
        except Exception:
            log(traceback.format_exc())
            request.setResponseCode(500)
            request.write(b'# Collection failed')
            request.finish()

        # We used to call request.processingFailed to send a traceback to browser
        # This can make sense in debug mode for a HTML site - but we don't want
        # prometheus trying to parse a python traceback

    @defer.inlineCallbacks
    def generate_latest_metrics(self, request):
        """ gets the latest metrics """
        section = request.args.get('section', ['default'])[0]
        if section not in self.config.keys():
            log("{} is not a valid section, using default".format(section))
            section = 'default'

        if self.config[section].get('vsphere_host') and self.config[
                section].get('vsphere_host') != "None":
            vsphere_host = self.config[section].get('vsphere_host')
        elif request.args.get(b'target', [None])[0]:
            vsphere_host = request.args.get(b'target',
                                            [None])[0].decode('utf-8')
        elif request.args.get(b'vsphere_host', [None])[0]:
            vsphere_host = request.args.get(b'vsphere_host')[0].decode('utf-8')
        else:
            request.setResponseCode(500)
            log("No vsphere_host or target defined")
            request.write(b'No vsphere_host or target defined!\n')
            request.finish()
            return

        collector = VmwareCollector(
            vsphere_host,
            self.config[section]['vsphere_user'],
            self.config[section]['vsphere_password'],
            self.config[section]['collect_only'],
            self.config[section]['ignore_ssl'],
        )
        metrics = yield collector.collect()

        registry = CollectorRegistry()
        registry.register(ListCollector(metrics))
        output = generate_latest(registry)

        request.setHeader("Content-Type", "text/plain; charset=UTF-8")
        request.setResponseCode(200)
        request.write(output)
        request.finish()
예제 #10
0
class VMWareMetricsResource(Resource):
    """
    VMWare twisted ``Resource`` handling multi endpoints
    Only handle /metrics and /healthz path
    """
    isLeaf = True

    def __init__(self):
        """
        Init Metric Resource
        """
        Resource.__init__(self)

    def configure(self, args):
        if args.config_file:
            try:
                self.config = YamlConfig(args.config_file)
                if 'default' not in self.config.keys():
                    log("Error, you must have a default section in config file (for now)"
                        )
                    exit(1)
            except Exception as exception:
                raise SystemExit(
                    "Error while reading configuration file: {0}".format(
                        exception.message))
        else:
            config_data = """
            default:
                vsphere_host: "{0}"
                vsphere_user: "******"
                vsphere_password: "******"
                ignore_ssl: {3}
                collect_only:
                    vms: True
                    vmguests: True
                    datastores: True
                    hosts: True
                    snapshots: True
            """.format(os.environ.get('VSPHERE_HOST'),
                       os.environ.get('VSPHERE_USER'),
                       os.environ.get('VSPHERE_PASSWORD'),
                       os.environ.get('VSPHERE_IGNORE_SSL', False))
            self.config = yaml.load(config_data)
            self.config['default']['collect_only']['hosts'] = os.environ.get(
                'VSPHERE_COLLECT_HOSTS', True)
            self.config['default']['collect_only'][
                'datastores'] = os.environ.get('VSPHERE_COLLECT_DATASTORES',
                                               True)
            self.config['default']['collect_only']['vms'] = os.environ.get(
                'VSPHERE_COLLECT_VMS', True)
            self.config['default']['collect_only'][
                'vmguests'] = os.environ.get('VSPHERE_COLLECT_VMGUESTS', True)
            self.config['default']['collect_only'][
                'snapshots'] = os.environ.get('VSPHERE_COLLECT_SNAPSHOTS',
                                              True)

    def render_GET(self, request):
        """ handles get requests for metrics, health, and everything else """
        path = request.path.decode()
        request.setHeader("Content-Type", "text/plain; charset=UTF-8")
        if path == '/metrics':
            deferred_request = deferLater(reactor, 0, lambda: request)
            deferred_request.addCallback(self.generate_latest_metrics)
            deferred_request.addErrback(self.errback, request)
            return NOT_DONE_YET
        elif path == '/healthz':
            request.setResponseCode(200)
            log("Service is UP")
            return 'Server is UP'.encode()
        else:
            log(b"Uri not found: " + request.uri)
            request.setResponseCode(404)
            return '404 Not Found'.encode()

    def errback(self, failure, request):
        """ handles failures from requests """
        failure.printTraceback()
        log(failure)
        request.processingFailed(
            failure
        )  # This will send a trace to the browser and close the request.
        return None

    def generate_latest_metrics(self, request):
        """ gets the latest metrics """
        section = request.args.get('section', ['default'])[0]
        if section not in self.config.keys():
            log("{} is not a valid section, using default".format(section))
            section = 'default'

        if self.config[section].get('vsphere_host') and self.config[
                section].get('vsphere_host') != "None":
            vsphere_host = self.config[section].get('vsphere_host')
        elif request.args.get(b'target', [None])[0]:
            vsphere_host = request.args.get(b'target',
                                            [None])[0].decode('utf-8')
        elif request.args.get(b'vsphere_host', [None])[0]:
            vsphere_host = request.args.get(b'vsphere_host')[0].decode('utf-8')
        else:
            request.setResponseCode(500)
            log("No vsphere_host or target defined")
            request.write(b'No vsphere_host or target defined!\n')
            request.finish()
            return

        registry = CollectorRegistry()
        registry.register(
            VmwareCollector(
                vsphere_host,
                self.config[section]['vsphere_user'],
                self.config[section]['vsphere_password'],
                self.config[section]['collect_only'],
                self.config[section]['ignore_ssl'],
            ))
        output = generate_latest(registry)

        request.write(output)
        request.finish()
예제 #11
0
        t = threading.Thread(target=httpd.serve_forever())
        t.daemon = True
        t.start()
    except KeyboardInterrupt:
        logging.info("Stopping Arista eAPI Prometheus Server")

def enable_logging():
    # enable logging
    logger = logging.getLogger()
    app_environment = os.getenv('APP_ENV', default="production").lower()
    if app_environment == "production":
        logger.setLevel('INFO')
    else:
        logger.setLevel('DEBUG')
    format = '%(asctime)-15s %(process)d %(levelname)s %(filename)s:%(lineno)d %(message)s'
    logging.basicConfig(stream=sys.stdout, format=format)

if __name__ == '__main__':
    # command line options
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "-c", "--config", help="Specify config yaml file", metavar="FILE", required=False, default="config.yml")
    args = parser.parse_args()

    # get the config
    config = YamlConfig(args.config)

    enable_logging()

    falcon_app()