Пример #1
0
def _get_stats():
    libvirt = util_libvirt.get_libvirt()
    conn = libvirt.open('qemu:///system')

    # What's special about this node?
    retval = {
        'is_etcd_master': config.NODE_IS_ETCD_MASTER,
        'is_hypervisor': config.NODE_IS_HYPERVISOR,
        'is_network_node': config.NODE_IS_NETWORK_NODE,
    }

    # CPU info
    present_cpus, _, available_cpus = conn.getCPUMap()
    retval.update({
        'cpu_max': present_cpus,
        'cpu_available': available_cpus,
    })

    retval['cpu_max_per_instance'] = conn.getMaxVcpus(None)

    # This is disabled as data we don't currently use
    # for i in range(present_cpus):
    #    per_cpu_stats = conn.getCPUStats(i)
    #    for key in per_cpu_stats:
    #        retval['cpu_core%d_%s' % (i, key)] = per_cpu_stats[key]

    try:
        load_1, load_5, load_15 = psutil.getloadavg()
        retval.update({
            'cpu_load_1': load_1,
            'cpu_load_5': load_5,
            'cpu_load_15': load_15,
        })
    except Exception as e:
        util_general.ignore_exception('load average', e)

    # System memory info, converting bytes to mb
    stats = psutil.virtual_memory()
    retval.update({
        'memory_max': stats.total // 1024 // 1024,
        'memory_available': stats.available // 1024 // 1024
    })

    # libvirt memory info, converting kb to mb
    memory_status = conn.getMemoryStats(
        libvirt.VIR_NODE_MEMORY_STATS_ALL_CELLS)
    retval.update({
        'memory_max_libvirt': memory_status['total'] // 1024,
        'memory_available_libvirt': memory_status['free'] // 1024,
    })

    # Kernel Shared Memory (KSM) information
    ksm_details = {}
    for ent in os.listdir('/sys/kernel/mm/ksm'):
        with open('/sys/kernel/mm/ksm/%s' % ent) as f:
            ksm_details['memory_ksm_%s' % ent] = int(f.read().rstrip())
    retval.update(ksm_details)

    # Disk info. There could be more than one filesystem here, so we track
    # all of the paths we're fond of.
    fsids = []
    minimum = -1
    total = 0
    used = 0

    for path in ['', 'blobs', 'image_cache', 'instances', 'uploads']:
        # We need to make the paths we check if they don't exist, otherwise
        # they wont be included in the metrics and things get confused.
        fullpath = os.path.join(config.STORAGE_PATH, path)
        os.makedirs(fullpath, exist_ok=True)
        s = os.statvfs(fullpath)
        free = s.f_frsize * s.f_bavail

        if s.f_fsid not in fsids:
            total += s.f_frsize * s.f_blocks
            used += s.f_frsize * (s.f_blocks - s.f_bfree)
            if minimum == -1 or free < minimum:
                minimum = free

        if path == '':
            path = 'sfroot'
        retval['disk_free_%s' % path] = free

    retval.update({
        'disk_total': total,
        'disk_free': minimum,
        'disk_used': used
    })

    disk_counters = psutil.disk_io_counters()
    retval.update({
        'disk_read_bytes': disk_counters.read_bytes,
        'disk_write_bytes': disk_counters.write_bytes,
    })

    # Network info
    net_counters = psutil.net_io_counters()
    retval.update({
        'network_read_bytes': net_counters.bytes_recv,
        'network_write_bytes': net_counters.bytes_sent,
    })

    # Virtual machine consumption info
    total_instances = 0
    total_active_instances = 0
    total_instance_max_memory = 0
    total_instance_actual_memory = 0
    total_instance_vcpus = 0
    total_instance_cpu_time = 0

    for guest in conn.listAllDomains():
        try:
            active = guest.isActive() == 1
            if active:
                _, maxmem, mem, cpus, cpu_time = guest.info()

        except libvirt.libvirtError as e:
            LOG.debug('During resource calc ignored libvirt error: %s' % e)
            active = False

        if active:
            total_instances += 1
            total_active_instances += 1
            total_instance_max_memory += maxmem
            total_instance_actual_memory += mem
            total_instance_vcpus += cpus
            total_instance_cpu_time += cpu_time

    # Queue health statistics
    node_queue_processing, node_queue_waiting = etcd.get_queue_length(
        config.NODE_NAME)

    retval.update({
        'cpu_total_instance_vcpus':
        total_instance_vcpus,
        'cpu_total_instance_cpu_time':
        total_instance_cpu_time,
        'memory_total_instance_max':
        total_instance_max_memory // 1024,
        'memory_total_instance_actual':
        total_instance_actual_memory // 1024,
        'instances_total':
        total_instances,
        'instances_active':
        total_active_instances,
        'node_queue_processing':
        node_queue_processing,
        'node_queue_waiting':
        node_queue_waiting,
    })

    if config.NODE_IS_NETWORK_NODE:
        network_queue_processing, network_queue_waiting = etcd.get_queue_length(
            'networknode')

        retval.update({
            'network_queue_processing': network_queue_processing,
            'network_queue_waiting': network_queue_waiting,
        })

    return retval
Пример #2
0
def get_queue_length(queuename):
    return etcd.get_queue_length(queuename)
Пример #3
0
    def _reap_leaked_floating_ips(self):
        # Block until the network node queue is idle to avoid races
        processing, waiting = etcd.get_queue_length('networknode')
        while processing + waiting > 0:
            self.exit.wait(60)
            processing, waiting = etcd.get_queue_length('networknode')

        # Ensure we haven't leaked any floating IPs (because we used to)
        with db.get_lock('ipmanager', None, 'floating', ttl=120,
                         op='Cleanup leaks'):
            floating_ipm = IPManager.from_db('floating')

            # Collect floating gateways and floating IPs, while ensuring that
            # they are correctly reserved on the floating network as well
            floating_gateways = []
            for n in net.Networks([baseobject.active_states_filter]):
                fg = n.floating_gateway
                if fg:
                    floating_gateways.append(fg)
                    if floating_ipm.is_free(fg):
                        floating_ipm.reserve(fg, n.unique_label())
                        floating_ipm.persist()
                        LOG.with_fields({
                            'network': n.uuid,
                            'address': fg
                        }).error('Floating gateway not reserved correctly')

            LOG.info('Found floating gateways: %s' % floating_gateways)

            floating_addresses = []
            for ni in networkinterface.NetworkInterfaces([baseobject.active_states_filter]):
                fa = ni.floating.get('floating_address')
                if fa:
                    floating_addresses.append(fa)
                    if floating_ipm.is_free(fa):
                        floating_ipm.reserve(fa, ni.unique_label())
                        floating_ipm.persist()
                        LOG.with_fields({
                            'networkinterface': ni.uuid,
                            'address': fa
                        }).error('Floating address not reserved correctly')
            LOG.info('Found floating addresses: %s' % floating_addresses)

            floating_reserved = [
                floating_ipm.get_address_at_index(0),
                floating_ipm.get_address_at_index(1),
                floating_ipm.broadcast_address,
                floating_ipm.network_address
            ]
            LOG.info('Found floating reservations: %s' % floating_reserved)

            # Now the reverse check. Test if there are any reserved IPs which
            # are not actually in use. Free any we find.
            leaks = []
            for ip in floating_ipm.in_use:
                if ip not in itertools.chain(floating_gateways,
                                             floating_addresses,
                                             floating_reserved):
                    LOG.error('Floating IP %s has leaked.' % ip)

                    # This IP needs to have been allocated more than 300 seconds
                    # ago to ensure that the network setup isn't still queueud.
                    if time.time() - floating_ipm.in_use[ip]['when'] > 300:
                        leaks.append(ip)

            for ip in leaks:
                LOG.error('Leaked floating IP %s has been released.' % ip)
                floating_ipm.release(ip)
            floating_ipm.persist()