Пример #1
0
    def get_instances(self, filters):
        instances = set()
        try:
            nova = self.get_nova()
            filters['host'] = self._hostname
            servers = nova.servers.list(detailed=True, search_opts=filters)
            flavors = nova.flavors.list()

            for server in servers:
                for flavor in flavors:
                    if flavor.id == server.flavor["id"]:
                        extra_spec = flavor.get_keys()
                        if 'hw:cpu_policy' in extra_spec \
                                and extra_spec['hw:cpu_policy'] == 'dedicated':
                            inst = instance.instance(server.id, server.name,
                                                     extra_spec)
                            instances.update([inst])
            # get numa topology and pci info from libvirt
            if len(instances) > 0:
                for inst in instances:
                    domain = guest.get_guest_domain_by_uuid(
                        self._conn, inst.uuid)
                    inst.update(domain)
        except Exception as e:
            LOG.warning("Failed to get instances info! error=%s" % e)

        return instances
Пример #2
0
def get_inst(instance_uuid, callback):
    # get instance info from nova
    nova_client = nova_provider.get_nova_client()
    inst = nova_client.get_instance(instance_uuid)
    if inst is not None:
        LOG.debug("inst:%s" % inst)
        callback(inst)
Пример #3
0
def start_rabbitmq_client():
    """Start Rabbitmq client to listen instance notifications from Nova"""
    cfg = CONF.amqp
    rabbit_url = "rabbit://%s:%s@%s:%s/%s" % (cfg['user_id'], cfg['password'],
                                              cfg['host'], cfg['port'],
                                              cfg['virt_host'])
    topic = cfg['topic']
    LOG.info(rabbit_url)

    target = oslo_messaging.Target(exchange="nova",
                                   topic=topic,
                                   server="info",
                                   version="2.1",
                                   fanout=True)
    transport = oslo_messaging.get_notification_transport(CONF, url=rabbit_url)

    payload_decoder = UnversionedPayloadDecoder()

    if topic == 'versioned_notifications':
        payload_decoder = VersionedPayloadDecoder()

    endpoints = [
        InstanceOnlineNotificationEndpoint(payload_decoder),
        InstanceOfflineNotificationEndpoint(payload_decoder),
    ]

    server = oslo_messaging.get_notification_listener(transport, [target],
                                                      endpoints,
                                                      "threading",
                                                      allow_requeue=True)
    thread = threading.Thread(target=rpc_work, args=(server, ))
    thread.start()
    LOG.info("Rabbitmq Client Started!")

    return server
Пример #4
0
def set_irq_affinity(set_bitmap, irqs, cpulist):
    """Set irq affinity to the specified cpulist for list of irqs.

    :param set_bitmap: True: set bitmap file, False: set list file
    :param irqs: irq list
    :param cpulist: cpu list
    """
    _irqs = set()

    if set_bitmap:
        filename = 'smp_affinity'
    else:
        filename = 'smp_affinity_list'

    for irq in irqs:
        irq_aff_path = "%s/%s/%s" % (COMPUTE_IRQ, irq, filename)
        try:
            with open(irq_aff_path, 'w') as f:
                f.write(cpulist)
                LOG.info("PCI IRQ %s pinned to CPUS: %s" % (irq, cpulist))
            _irqs.update([irq])
        except Exception as e:
            LOG.warning("Failed to write pci affine file:%(F)s, irq:%(I)s, "
                        "error=%(E)s" % {
                            "F": filename,
                            "I": irq,
                            "E": e
                        })
    return _irqs
Пример #5
0
def process_signal_handler(signum, frame):
    """Process Signal Handler"""
    global stay_on

    if signum in [signal.SIGTERM, signal.SIGINT, signal.SIGTSTP]:
        stay_on = False
    else:
        LOG.info("Ignoring signal" % signum)
Пример #6
0
 def get_nova(self):
     try:
         sess = session.Session(auth=self._auth, verify=self._cacert)
         nova = client.Client('2.1', session=sess)
         return nova
     except Exception as e:
         LOG.warning("Failed to connect to nova!")
         raise Exception("could not connect nova!")
Пример #7
0
def get_guest_domain_by_uuid(conn, uuid):
    try:
        dom = conn.lookupByUUIDString(uuid)
    except Exception as e:
        LOG.warning("Failed to get domain for uuid=%s! error=%s" % (uuid, e))
        return None
    domain = get_guest_domain_info(dom)
    return domain
Пример #8
0
        def do_affine_pci_dev_instance(refresh_need):
            """Set pci device irq affinity for this instance."""

            _irqs = set()
            _msi_irqs = set()
            # refresh instance info.
            if refresh_need:
                nova_client = nova_provider.get_nova_client()
                _inst = nova_client.get_instance(inst.uuid)
            if _inst is None:
                return

            numa_topology = _inst.get_numa_topology()
            extra_spec = _inst.get_extra_spec()
            for pci_dev in _inst.pci_devices:
                try:
                    irqs, msi_irqs, pci_numa_node, pci_cpulist = \
                        pci_utils.set_irqs_affinity_by_pci_address(
                            pci_dev.address, extra_spec, numa_topology)
                except Exception as e:
                    irqs = set()
                    msi_irqs = set()
                    pci_numa_node = None
                    pci_cpulist = ''
                    LOG.error("Could not affine irqs for pci_addr:%(A)s, "
                              "error: %(E)s" % {
                                  "A": pci_dev.address,
                                  "E": e
                              })

                # Log irqs affined when there is a change in the counts.
                msi_irq_count = len(msi_irqs)
                if ((msi_irq_count != self._msi_irq_count[pci_dev.address])
                        or wait_for_irqs):
                    self._msi_irq_count[pci_dev.address] = msi_irq_count
                    LOG.info(
                        ("Instance=%(U)s: IRQs affined for pci_addr=%(A)s, "
                         "dev_id=%(D)s, dev_type=%(T)s, "
                         "vendor_id=%(V)s, product_id=%(P)s, "
                         "irqs=%(I)s, msi_irqs=%(M)s, "
                         "numa_node=%(N)s, cpulist=%(C)s") % {
                             'U': inst.uuid,
                             'A': pci_dev.address,
                             'D': pci_dev.dev_id,
                             'T': pci_dev.dev_type,
                             'V': pci_dev.vendor_id,
                             'P': pci_dev.product_id,
                             'I': ', '.join(map(str, irqs)),
                             'M': ', '.join(map(str, msi_irqs)),
                             'N': pci_numa_node,
                             'C': pci_cpulist
                         })
                _irqs.update(irqs)
                _msi_irqs.update(msi_irqs)
            return (_irqs, _msi_irqs, pci_cpulist)
Пример #9
0
 def affine_pci_dev_instance(self, instance, wait_for_irqs=True):
     if instance is not None:
         if instance.get_cpu_policy(
         ) == 'dedicated' and instance.get_pci_devices():
             LOG.debug("Instance=%s use dedicated cpu policy!!!" %
                       instance.uuid)
             irqs, msi_irqs, cpulist = \
                 self.affinePciIrqDriver.affine_pci_dev_irqs(instance, wait_for_irqs)
             # record instance on which pci affinity has been applied
             self.instance_irq_pcpulist_update(instance.uuid, irqs,
                                               msi_irqs, cpulist)
             return
Пример #10
0
    def instance_irq_pcpulist_update(self, uuid, irqs, msi_irqs, cpulist):
        if uuid in self.inst_dict:
            _prev = self.inst_dict[uuid]
            # get irqs that not appear anymore.
            _irqs = _prev[0].difference(irqs)
            _msi_irqs = _prev[1].difference(msi_irqs)

            # reset pci affinity for those pcis not used by intance anymore
            if (len(_irqs) + len(_msi_irqs)) > 0:
                self.reset_irq_affinity(uuid, _irqs, _msi_irqs)

        self.inst_dict[uuid] = [irqs, msi_irqs, cpulist]
        LOG.debug(self.inst_dict)
Пример #11
0
    def get_instance(self, uuid):
        try:
            nova = self.get_nova()
            server = nova.servers.get(uuid)
            flavor_info = nova.flavors.get(server.flavor["id"])
            hostname = server.__dict__['OS-EXT-SRV-ATTR:host']
        except Exception as e:
            LOG.warning("Could not get instance=%s from Nova! error=%s" %
                        (uuid, e))
            return None

        LOG.debug('GET VM:%s in node:%s' % (server.name, hostname))

        if hostname == self._hostname:
            inst = instance.instance(uuid, server.name, flavor_info.get_keys())
            # get numa topology and pci info from libvirt
            try:
                domain = guest.get_guest_domain_by_uuid(self._conn, uuid)
                if domain:
                    inst.update(domain)
            except Exception as e:
                LOG.warning("Failed to access libvirt! error=%s" % e)
            return inst
        else:
            LOG.debug('The VM is not in current host!')
            return None
Пример #12
0
    def reset_irq_affinity(self, uuid, irqs=None, msi_irqs=None):
        """Reset irq affinity for instance

        The instance has already been deleted or
        related PCI not used by it anymore.
        """

        if irqs or msi_irqs:
            # reset irq affinity for specified irqs
            _irqs = irqs
            _msi_irqs = msi_irqs

        elif uuid in self.inst_dict:
            # reset all irq affinity for deleted instance
            _irqs = self.inst_dict[uuid][0]
            _msi_irqs = self.inst_dict[uuid][1]
        else:
            LOG.debug("No pci affinity need to be reset for instance=%s!" %
                      uuid)
            return

        try:
            with open('%s/default_smp_affinity' % COMPUTE_IRQ) as f:
                cpulist = f.readline().strip()
            LOG.debug("default smp affinity bitmap:%s" % cpulist)

            for x in [_irqs, _msi_irqs]:
                if len(x) > 0:
                    pci_utils.set_irq_affinity(True, x, cpulist)

        except Exception as e:
            LOG.error("Failed to reset smp affinity! error=%s" % e)

        LOG.info("Reset smp affinity done for instance=%s!" % uuid)
Пример #13
0
def process_main():
    """Entry function for PCI Interrupt Affinity Agent"""

    LOG.info("Enter PCIInterruptAffinity Agent")

    try:
        signal.signal(signal.SIGTSTP, process_signal_handler)
        openstack_enabled = CONF.openstack.openstack_enabled
        if openstack_enabled:
            nova_client = nova_provider.get_nova_client()
            audit_srv = audits_initialize()
            rabbit_client = start_rabbitmq_client()

        while stay_on:
            time.sleep(1)

    except KeyboardInterrupt:
        LOG.info("keyboard Interrupt received.")
        pass

    except Exception as e:
        LOG.info("%s" % e)
        sys.exit(200)

    finally:
        LOG.error("process_main finalized!!!")
        if openstack_enabled:
            del nova_client
            audit_srv.tg.stop()
            rabbit_client.stop()
Пример #14
0
    def info(self, ctxt, publisher_id, event_type, payload, metadata):
        instance_host = self.payload_decoder.decode_instance_host(payload)
        current_host = os.getenv("COMPUTE_HOSTNAME",
                                 default=socket.gethostname())
        if instance_host is not None and instance_host != current_host:
            LOG.debug(
                "Requeue notification: instance_host=%s != current_host=%s" %
                (instance_host, current_host))
            return oslo_messaging.NotificationResult.REQUEUE

        instance_uuid = self.payload_decoder.decode_instance_uuid(payload)
        if instance_uuid:
            LOG.info(
                "Instance offline: uuid=%s, instance_host=%s, event_type=%s" %
                (instance_uuid, instance_host, event_type))
            affinity.pci_irq_affinity.reset_irq_affinity(instance_uuid)
Пример #15
0
def _get_pci_irq_affinity_mask(extra_spec):
    """Parse pci irq affinity mask based on flavor extra-spec.

    Returns set of vcpu ids with corresponding pci irq affinity mask.
    """

    if 'hw:pci_irq_affinity_mask' in extra_spec:
        pci_irq_affinity_mask = extra_spec['hw:pci_irq_affinity_mask']
        LOG.info("pci_irq_affinity_mask: %s" % pci_irq_affinity_mask)
    else:
        LOG.info('Not set pci_irq_affinity_mask!')
        return None

    cpuset_ids = parse_cpu_spec(pci_irq_affinity_mask)
    if not cpuset_ids:
        raise Exception("No CPUs available after parsing %r" %
                        pci_irq_affinity_mask)
    return cpuset_ids
Пример #16
0
    def find_and_fill_pci_addrs(dom_xml, device_type):
        LOG.debug("Finding pci_addrs for %s devices" % device_type)

        def parse_pci_addr(tag):
            return "%04x:%02x:%02x.%01x" % (int(
                tag.get('domain'), base=16), int(
                    tag.get('bus'), base=16), int(tag.get('slot'), base=16),
                                            int(tag.get('function'), base=16))

        for node in dom_xml.findall('./devices/' + device_type):
            for driver in node.findall('driver'):
                if driver.get('name').startswith('vfio'):
                    addr_tag = node.find('source/address')
                    if (addr_tag.get('type') == 'pci'
                            or node.get('type') == 'pci'):
                        pci_addr = parse_pci_addr(addr_tag)
                        LOG.debug("Add pci device: %s" % pci_addr)
                        pci_addrs.update([pci_addr])
Пример #17
0
    def update(self, domain):
        cells = set()
        for node_id in domain['nodelist']:
            cell = numa_cell(node_id, list(range(domain['nr_vcpus'])),
                             domain['cpu_pinning'])
            LOG.debug("cell_id=%s, vcpuset=%s, cpu_pinning=%s" %
                      (node_id, list(range(
                          domain['nr_vcpus'])), domain['cpu_pinning']))
            cells.update([cell])

        self.numa_topology = numa_topology(self.uuid, cells)
        if domain['IsCpuPinned']:
            self.cpu_policy = 'dedicated'
        else:
            self.cpu_policy = 'shared'

        for pci_addr in domain['pci_addrs']:
            pci_dev = pci_device(pci_addr)
            self.pci_devices.update([pci_dev])
Пример #18
0
        def _wait_for_msi_irqs(self, inst):
            """Check if each pci device has the expected number of msi irqs."""
            _prev = self._msi_irq_count.copy()
            addrs = set()

            for pci_dev in inst.pci_devices:
                addr = pci_dev.address
                addrs.update([addr])
                try:
                    irqs, msi_irqs = pci_utils.get_irqs_by_pci_address(addr)
                except Exception as e:
                    msi_irqs = set()
                    LOG.error(
                        '_wait_for_msi_irqs: pci_addr=%(A)s, error=%(E)s' % {
                            'A': addr,
                            'E': e
                        })
                self._msi_irq_count[addr] = len(msi_irqs)
                self._msi_irq_elapsed[
                    addr] += CONF.parameters.msi_irq_check_interval
                if _prev[addr] == self._msi_irq_count[addr]:
                    self._msi_irq_since[
                        addr] += CONF.parameters.msi_irq_check_interval
                else:
                    self._msi_irq_since[addr] = 0

            # Done when msi irq counts have not changed for some time
            if all((self._msi_irq_count[k] > 0) and (
                    self._msi_irq_since[k] >= CONF.parameters.msi_irq_since)
                   for k in addrs):
                raise loopingcall.LoopingCallDone()

            # Abort due to timeout
            if all(self._msi_irq_elapsed[k] >= CONF.parameters.msi_irq_timeout
                   for k in addrs):
                msg = ("reached %(timeout)s seconds timeout, waiting for "
                       "msi irqs of pci_addrs: %(addrs)s") % {
                           'timeout': CONF.parameters.msi_irq_timeout,
                           'addrs': list(addrs)
                       }
                LOG.warning(msg)
                raise loopingcall.LoopingCallDone()
Пример #19
0
def get_pci_irqs_pinned_cpuset(extra_spec=None,
                               numa_topology=None,
                               pci_numa_node=None):
    """Get pinned cpuset where pci irq are affined.

    :param extra_spec: extra_spec
    :param pci_numa_node: numa node of a specific PCI device
    :param numa_topology: instance numa topology
    :return: cpuset, cpulist
    """
    cpuset = set()
    cpulist = ''

    LOG.debug("extra_spec:%s, topo:%s, numa_node:%s" %
              (extra_spec, numa_topology, pci_numa_node))
    if numa_topology is None or pci_numa_node is None or pci_numa_node < 0:
        return (cpuset, cpulist)

    # Determine full affinity cpuset, but restrict to pci's numa node
    for cell in numa_topology.cells:
        if cell.id == pci_numa_node and cell.cpu_pinning is not None:
            cpuset.update(set(cell.cpu_pinning.values()))
    LOG.info("pinning pcpu list:%s" % cpuset)

    # Use extra-spec hw:pci_irq_affinity_mask only when the instance is pinned.
    if cpuset:
        pci_cpuset = _get_pci_irq_affinity_mask(extra_spec)
        if pci_cpuset:
            cpuset = set()
            for cell in numa_topology.cells:
                if cell.cpu_pinning is not None:
                    for vcpu in cell.cpuset:
                        if vcpu in pci_cpuset:
                            vcpu_cell, pcpu = numa_topology.vcpu_to_pcpu(vcpu)
                            cpuset.update(set([pcpu]))

    cpulist = list_to_range(input_list=list(cpuset))
    return (cpuset, cpulist)
Пример #20
0
    def _get_keystone_creds(self):
        creds = {}
        openstack_options = CONF.openstack
        creds_options = [
            'username', 'password', 'user_domain_name', 'project_name',
            'project_domain_name', 'keyring_service', 'auth_url'
        ]

        try:
            for option in creds_options:
                value = openstack_options[option]
                if value:
                    creds[option] = value

            if 'password' not in creds:
                creds['password'] = keyring.get_password(
                    creds['keyring_service'], creds['username'])
            creds.pop('keyring_service')

        except Exception as e:
            LOG.error("Could not get keystone creds configuration! Err=%s" % e)
            creds = None

        return creds
Пример #21
0
def query_instance_callback(inst):
    LOG.debug("query inst:%s" % inst)
    affinity.pci_irq_affinity.affine_pci_dev_instance(inst)
Пример #22
0
def get_irqs_by_pci_address(pci_addr):
    """Get list of PCI IRQs based on a VF's pci address

    Raises PciDeviceNotFoundById in case the pci device is not found,
    or when there is an underlying problem getting associated irqs.
    :param pci_addr: PCI address
    :return: irqs, msi_irqs
    """
    irqs = set()
    msi_irqs = set()

    dev_path = "%s/%s" % (COMPUTE_PCI_DEVICES, pci_addr)
    if not os.path.isdir(dev_path):
        raise Exception("PciDeviceNotFoundById id = %r" % pci_addr)

    _irqs = set()
    irq_path = "%s/irq" % (dev_path)
    try:
        with open(irq_path) as f:
            _irqs.update([int(x) for x in f.readline().split() if int(x) > 0])
    except Exception as e:
        LOG.error(
            'get_irqs_by_pci_address: '
            'pci_addr=%(A)s: irq_path=%(P)s; error=%(E)s', {
                'A': pci_addr,
                'P': irq_path,
                'E': e
            })
        raise Exception("PciDeviceNotFoundById id = %r" % pci_addr)

    _msi_irqs = set()
    msi_path = "%s/msi_irqs" % (dev_path)
    try:
        _msi_irqs.update([int(x) for x in os.listdir(msi_path) if int(x) > 0])
    except OSError as e:
        # msi_path disappears during configuration; do not treat
        # non-existance as fatal
        if e.errno == errno.ENOENT:
            return (irqs, msi_irqs)
        else:
            LOG.error(
                'get_irqs_by_pci_address: '
                'pci_addr=%(A)s: msi_path=%(P)s; error=%(E)s', {
                    'A': pci_addr,
                    'P': msi_path,
                    'E': e
                })
            raise Exception("PciDeviceNotFoundById id = %r" % pci_addr)
    except Exception as e:
        LOG.error(
            'get_irqs_by_pci_address: '
            'pci_addr=%(A)s: msi_path=%(P)s; error=%(E)s', {
                'A': pci_addr,
                'P': msi_path,
                'E': e
            })
        raise Exception("PciDeviceNotFoundById id = %r" % pci_addr)

    # Return only configured irqs, ignore any that are missing.
    for irq in _irqs:
        irq_path = "%s/%s" % (COMPUTE_IRQ, irq)
        if os.path.isdir(irq_path):
            irqs.update([irq])
    for irq in _msi_irqs:
        irq_path = "%s/%s" % (COMPUTE_IRQ, irq)
        if os.path.isdir(irq_path):
            msi_irqs.update([irq])
    return (irqs, msi_irqs)
Пример #23
0
def set_irqs_affinity_by_pci_address(pci_addr,
                                     extra_spec=None,
                                     numa_topology=None):
    """Set cpu affinity for list of PCI IRQs with a VF's pci address,

    Restrict cpuset to the numa node of the PCI.
    Return list
    Raises PciDeviceNotFoundById in case the pci device is not found,
    or when there is an underlying problem getting associated irqs.
    :param pci_addr: PCI address
    :param extra_spec: extra_spec
    :param numa_topology: instance numa topology
    :return: irqs, msi_irqs, numa_node, cpulist
    """
    irqs = set()
    msi_irqs = set()
    numa_node = None
    cpulist = ''

    if numa_topology is None:
        return (irqs, msi_irqs, numa_node, cpulist)

    # Get the irqs associated with pci addr
    _irqs, _msi_irqs = get_irqs_by_pci_address(pci_addr)
    LOG.debug("pci: %s, irqs: %s, msi_irqs: %s" % (pci_addr, _irqs, _msi_irqs))

    # Obtain physical numa_node for this pci addr
    numa_path = "%s/%s/numa_node" % (COMPUTE_PCI_DEVICES, pci_addr)
    try:
        with open(numa_path) as f:
            numa_node = [int(x) for x in f.readline().split()][0]
    except Exception as e:
        LOG.error(
            'set_irqs_affinity_by_pci_address: '
            'pci_addr=%(A)s: numa_path=%(P)s; error=%(E)s', {
                'A': pci_addr,
                'P': numa_path,
                'E': e
            })
        raise Exception("PciDeviceNotFoundById id = %r" % pci_addr)
    # Skip irq configuration if there is no associated numa node
    if numa_node is None or numa_node < 0:
        return (irqs, msi_irqs, numa_node, cpulist)

    # Determine the pinned cpuset where irqs are to be affined
    cpuset, cpulist = get_pci_irqs_pinned_cpuset(extra_spec, numa_topology,
                                                 numa_node)

    LOG.debug("cpuset where irqs are to be affined:%s or %s" %
              (cpuset, cpulist))

    # Skip irq configuration if there are no pinned cpus
    if not cpuset:
        return (irqs, msi_irqs, numa_node, cpulist)

    # Set IRQ affinity, but do not treat errors as fatal.
    LOG.debug("Setting affinity %s for irqs: %s and msi_irqs: %s" %
              (cpulist, _irqs, _msi_irqs))
    irqs = set_irq_affinity(False, _irqs, cpulist)
    msi_irqs = set_irq_affinity(False, _msi_irqs, cpulist)
    return (irqs, msi_irqs, numa_node, cpulist)