Exemplo n.º 1
0
 def _remove_mesh_element(self, node):
     LOG.withObj(self).info('Removing excess mesh element %s' % node)
     subst = self.subst_dict()
     subst['node'] = node
     util.execute(
         None, 'bridge fdb del to 00:00:00:00:00:00 dst %(node)s '
         'dev %(vx_interface)s' % subst)
Exemplo n.º 2
0
def create_flat(locks, cache_file, disk_file):
    """Make a flat copy of the disk from the image cache."""

    if os.path.exists(disk_file):
        return

    util.execute(locks, 'cp %s %s' % (cache_file, disk_file))
Exemplo n.º 3
0
    def _fetch(self, resp, locks=None):
        """Download the image if the latest version is not in the cache."""
        fetched = 0
        self.file_version += 1

        last_refresh = 0
        with open(self.version_image_path(), 'wb') as f:
            for chunk in resp.iter_content(chunk_size=8192):
                fetched += len(chunk)
                f.write(chunk)

                if time.time() - last_refresh > 5:
                    db.refresh_locks(locks)
                    last_refresh = time.time()

        LOG.withImage(self).withField('bytes_fetched',
                                      fetched).info('Fetch complete')

        # Check if decompression not required
        fn = self.version_image_path()
        if not self.url.endswith('.gz'):
            return fn

        # Check if already decompressed
        if not os.path.exists(f + '.orig'):
            util.execute(locks, 'gunzip -k -q -c %s > %s.orig' % (fn, fn))
        return fn + '.orig'
Exemplo n.º 4
0
 def _add_mesh_element(self, node):
     LOG.withObj(self).info('Adding new mesh element %s' % node)
     subst = self.subst_dict()
     subst['node'] = node
     util.execute(
         None, 'bridge fdb append to 00:00:00:00:00:00 '
         'dst %(node)s dev %(vx_interface)s' % subst)
Exemplo n.º 5
0
def create_cow(locks, cache_file, disk_file):
    """Create a COW layer on top of the image cache."""

    if os.path.exists(disk_file):
        return

    util.execute(locks,
                 'qemu-img create -b %s -f qcow2 %s' % (cache_file, disk_file))
Exemplo n.º 6
0
 def restart_dhcpd(self):
     self._make_config()
     self._make_hosts()
     if not self._send_signal(signal.SIGHUP):
         util.execute(
             None,
             '%(in_netns)s dnsmasq --conf-file=%(config_dir)s/config' %
             self.subst)
Exemplo n.º 7
0
 def run(self):
     LOG.info('Starting')
     util.execute(None, (config.get('API_COMMAND_LINE') % {
                         'port': config.get('API_PORT'),
                         'timeout': config.get('API_TIMEOUT'),
                         'name': daemon.process_name('api')
                         }),
                  env_variables=os.environ)
Exemplo n.º 8
0
def create_raw(locks, cache_file, disk_file):
    """Make a raw copy of the disk from the image cache."""

    if os.path.exists(disk_file):
        return

    util.execute(
        locks,
        'qemu-img convert -t none -O raw %s %s' % (cache_file, disk_file))
Exemplo n.º 9
0
def create_cow(locks, cache_file, disk_file, disk_size):
    """Create a COW layer on top of the image cache.

    disk_size is specified in Gigabytes.
    """

    if os.path.exists(disk_file):
        return

    util.execute(
        locks, 'qemu-img create -b %s -f qcow2 %s %dG' %
        (cache_file, disk_file, int(disk_size)))
Exemplo n.º 10
0
def _transcode(locks, actual_image, related_object):
    with util.RecordedOperation('transcode image', related_object):
        if os.path.exists(actual_image + '.qcow2'):
            return

        current_format = identify(actual_image).get('file format')
        if current_format == 'qcow2':
            os.link(actual_image, actual_image + '.qcow2')
            return

        util.execute(
            locks, 'qemu-img convert -t none -O qcow2 %s %s.qcow2' %
            (actual_image, actual_image))
Exemplo n.º 11
0
    def remove_floating_ip(self, floating_address, inner_address):
        LOG.withObj(self).info('Removing floating ip %s -> %s' %
                               (floating_address, inner_address))
        subst = self.subst_dict()
        subst['floating_address'] = floating_address
        subst['inner_address'] = inner_address

        util.execute(
            None, 'ip addr del %(floating_address)s/%(netmask)s '
            'dev %(physical_veth_outer)s' % subst)
        util.execute(
            None, '%(in_netns)s iptables -t nat -D PREROUTING '
            '-d %(floating_address)s '
            '-j DNAT --to-destination %(inner_address)s' % subst)
Exemplo n.º 12
0
    def add_floating_ip(self, floating_address, inner_address):
        logutil.info([self], 'Adding floating ip %s -> %s' %
                     (floating_address, inner_address))
        subst = self.subst_dict()
        subst['floating_address'] = floating_address
        subst['inner_address'] = inner_address

        util.execute(
            None, 'ip addr add %(floating_address)s/%(netmask)s '
            'dev %(physical_veth_outer)s' % subst)
        util.execute(
            None, '%(in_netns)s iptables -t nat -A PREROUTING '
            '-d %(floating_address)s -j DNAT --to-destination %(inner_address)s'
            % subst)
Exemplo n.º 13
0
def identify(path):
    """Work out what an image is."""

    if not os.path.exists(path):
        return {}

    out, _ = util.execute(None, 'qemu-img info %s' % path)

    data = {}
    for line in out.split('\n'):
        line = line.lstrip().rstrip()
        elems = line.split(': ')
        if len(elems) > 1:
            key = elems[0]
            value = ': '.join(elems[1:])

            m = VALUE_WITH_BRACKETS_RE.match(value)
            if m:
                value = float(m.group(1))

            elif value.endswith('K'):
                value = float(value[:-1]) * 1024
            elif value.endswith('M'):
                value = float(value[:-1]) * 1024 * 1024
            elif value.endswith('G'):
                value = float(value[:-1]) * 1024 * 1024 * 1024
            elif value.endswith('T'):
                value = float(value[:-1]) * 1024 * 1024 * 1024 * 1024

            try:
                data[key] = float(value)
            except Exception:
                data[key] = value

    return data
Exemplo n.º 14
0
def clean_events_mesh_operations(etcd_client):
    # TODO(andy): This can be removed when older versions do not exist

    # We probably need to cleanup excess network mesh events. We also need to
    # try and fetch small batches because of limits in the amount of data etcd3
    # can return at one time.

    # Save time and use the already available etcdctl client.
    net_keys, stderr = util.execute(
        None,
        'etcdctl get --prefix /sf/event/network/ | grep sf/event',
        check_exit_code=[0, 1])
    if stderr:
        print('ERROR: Unable to retrieve network keys:%s' % stderr)
        return

    # Split network events into networks
    network_events = {}
    for key in net_keys.split('\n'):
        if not key:
            continue
        _blank, _sf, _event, _network, uuid, _time = key.split('/')
        network_events.setdefault(uuid, []).append(key)

    # Delete all but last 50 events
    count = 0
    for keys in network_events.values():
        for k in keys[:-50]:
            print('--> Removing verbose network event %s' % k)
            etcd_client.delete(k)
            count += 1
    print(' - Cleaned up %d old network mesh events' % count)
Exemplo n.º 15
0
    def resize(self, locks, size):
        """Resize the image to the specified size."""
        image_path = self.version_image_path()
        backing_file = image_path + '.qcow2' + '.' + str(size) + 'G'

        if os.path.exists(backing_file):
            return backing_file

        current_size = identify(image_path).get('virtual size')

        if current_size == size * 1024 * 1024 * 1024:
            os.link(image_path, backing_file)
            return backing_file

        util.execute(
            locks, 'qemu-img create -b %s.qcow2 -f qcow2 %s %dG' %
            (image_path, backing_file, size))

        return backing_file
Exemplo n.º 16
0
def resize(locks, hashed_image_path, size):
    """Resize the image to the specified size."""

    backing_file = hashed_image_path + '.qcow2' + '.' + str(size) + 'G'

    if os.path.exists(backing_file):
        return backing_file

    current_size = identify(hashed_image_path).get('virtual size')

    if current_size == size * 1024 * 1024 * 1024:
        os.link(hashed_image_path, backing_file)
        return backing_file

    util.execute(locks,
                 'cp %s %s' % (hashed_image_path + '.qcow2', backing_file))
    util.execute(locks, 'qemu-img resize %s %sG' % (backing_file, size))

    return backing_file
Exemplo n.º 17
0
    def discover_mesh(self):
        mesh_re = re.compile(r'00:00:00:00:00:00 dst (.*) self permanent')

        stdout, _ = util.execute(
            None,
            'bridge fdb show brport %(vx_interface)s' % self.subst_dict())

        for line in stdout.split('\n'):
            m = mesh_re.match(line)
            if m:
                yield m.group(1)
Exemplo n.º 18
0
    def delete(self):
        subst = self.subst_dict()
        LOG.withFields(subst).debug('net.delete()')

        # Cleanup local node
        with db.get_object_lock(self, ttl=120, op='Network delete'):
            if util.check_for_interface(subst['vx_bridge']):
                with util.RecordedOperation('delete vxlan bridge', self):
                    util.execute(None, 'ip link delete %(vx_bridge)s' % subst)

            if util.check_for_interface(subst['vx_interface']):
                with util.RecordedOperation('delete vxlan interface', self):
                    util.execute(None,
                                 'ip link delete %(vx_interface)s' % subst)

            # If this is the network node do additional cleanup
            if util.is_network_node():
                if util.check_for_interface(subst['vx_veth_outer']):
                    with util.RecordedOperation('delete router veth', self):
                        util.execute(
                            None, 'ip link delete %(vx_veth_outer)s' % subst)

                if util.check_for_interface(subst['physical_veth_outer']):
                    with util.RecordedOperation('delete physical veth', self):
                        util.execute(
                            None,
                            'ip link delete %(physical_veth_outer)s' % subst)

                if os.path.exists('/var/run/netns/%(netns)s' % subst):
                    with util.RecordedOperation('delete netns', self):
                        util.execute(None, 'ip netns del %(netns)s' % subst)

                if self.db_entry['floating_gateway']:
                    with db.get_lock('ipmanager',
                                     None,
                                     'floating',
                                     ttl=120,
                                     op='Network delete'):
                        ipm = db.get_ipmanager('floating')
                        ipm.release(self.db_entry['floating_gateway'])
                        db.persist_ipmanager('floating', ipm.save())
Exemplo n.º 19
0
    def _fetch(self, resp, locks=None):
        """Download the image if we don't already have the latest version in cache."""

        # Do the fetch
        fetched = 0
        self.info['version'] += 1
        self.info['fetched_at'] = email.utils.formatdate()
        for field in VALIDATED_IMAGE_FIELDS:
            self.info[field] = resp.headers.get(field)

        last_refresh = 0
        with open(self.hashed_image_path + '.v%03d' % self.info['version'],
                  'wb') as f:
            for chunk in resp.iter_content(chunk_size=8192):
                fetched += len(chunk)
                f.write(chunk)

                if time.time() - last_refresh > 10:
                    db.refresh_locks(locks)

        if fetched > 0:
            self._persist_info()
            logutil.info([self], 'Fetch complete (%d bytes)' % fetched)

        # Decompress if required
        if self.info['url'].endswith('.gz'):
            if not os.path.exists(self.hashed_image_path +
                                  '.v%03d.orig' % self.info['version']):
                util.execute(
                    locks, 'gunzip -k -q -c %(img)s > %(img)s.orig' % {
                        'img':
                        self.hashed_image_path +
                        '.v%03d' % self.info['version']
                    })
            return '%s.v%03d.orig' % (self.hashed_image_path,
                                      self.info['version'])

        return '%s.v%03d' % (self.hashed_image_path, self.info['version'])
Exemplo n.º 20
0
    def _fetch(self, resp, locks=None):
        """Download the image if we don't already have the latest version in
        cache.
        """

        # Do the fetch
        fetched = 0
        self.file_version += 1
        self.fetched = email.utils.formatdate()
        self.modified = resp.headers.get('Last-Modified')
        self.size = resp.headers.get('Content-Length')

        last_refresh = 0
        with open(self.version_image_path(), 'wb') as f:
            for chunk in resp.iter_content(chunk_size=8192):
                fetched += len(chunk)
                f.write(chunk)

                if time.time() - last_refresh > 5:
                    db.refresh_locks(locks)
                    last_refresh = time.time()

        if fetched > 0:
            self.persist()
            LOG.withImage(self).withField('bytes_fetched',
                                          fetched).info('Fetch complete')

        # Check if decompression not required
        fn = self.version_image_path()
        if not self.url.endswith('.gz'):
            return fn

        # Check if already decompressed
        if not os.path.exists(f + '.orig'):
            util.execute(locks, 'gunzip -k -q -c %s > %s.orig' % (fn, fn))
        return fn + '.orig'
Exemplo n.º 21
0
    def _update_power_states(self):
        libvirt = util.get_libvirt()
        conn = libvirt.open(None)
        try:
            seen = []

            # Active VMs have an ID. Active means running in libvirt
            # land.
            for domain_id in conn.listDomainsID():
                domain = conn.lookupByID(domain_id)
                if not domain.name().startswith('sf:'):
                    continue

                instance_uuid = domain.name().split(':')[1]
                instance = db.get_instance(instance_uuid)
                if not instance:
                    # Instance is SF but not in database. Kill to reduce load.
                    logutil.warning([virt.ThinInstance(instance_uuid)],
                                    'Destroying unknown instance')
                    util.execute(None, 'virsh destroy "sf:%s"' % instance_uuid)
                    continue

                db.place_instance(instance_uuid,
                                  config.parsed.get('NODE_NAME'))
                seen.append(domain.name())

                if instance.get('state') == 'deleted':
                    # NOTE(mikal): a delete might be in-flight in the queue.
                    # We only worry about instances which should have gone
                    # away five minutes ago.
                    if time.time() - instance['state_updated'] < 300:
                        continue

                    db.instance_enforced_deletes_increment(instance_uuid)
                    attempts = instance.get('enforced_deletes', 0)

                    if attempts > 5:
                        # Sometimes we just can't delete the VM. Try the big hammer instead.
                        logutil.warning(
                            [virt.ThinInstance(instance_uuid)],
                            'Attempting alternate delete method for instance')
                        util.execute(None,
                                     'virsh destroy "sf:%s"' % instance_uuid)

                        db.add_event('instance', instance_uuid,
                                     'enforced delete', 'complete', None, None)
                    else:
                        i = virt.from_db(instance_uuid)
                        i.delete()
                        i.update_instance_state('deleted')

                    logutil.warning([virt.ThinInstance(instance_uuid)],
                                    'Deleting stray instance (attempt %d)' %
                                    attempts)

                    continue

                state = util.extract_power_state(libvirt, domain)
                db.update_instance_power_state(instance_uuid, state)
                if state == 'crashed':
                    db.update_instance_state(instance_uuid, 'error')

            # Inactive VMs just have a name, and are powered off
            # in our state system.
            for domain_name in conn.listDefinedDomains():
                if not domain_name.startswith('sf:'):
                    continue

                if domain_name not in seen:
                    instance_uuid = domain_name.split(':')[1]
                    instance = db.get_instance(instance_uuid)

                    if instance.get('state') == 'deleted':
                        # NOTE(mikal): a delete might be in-flight in the queue.
                        # We only worry about instances which should have gone
                        # away five minutes ago.
                        if time.time() - instance['state_updated'] < 300:
                            continue

                        domain = conn.lookupByName(domain_name)
                        domain.undefine()
                        logutil.info([virt.ThinInstance(instance_uuid)],
                                     'Detected stray instance')
                        db.add_event('instance', instance_uuid,
                                     'deleted stray', 'complete', None, None)
                        continue

                    db.place_instance(instance_uuid,
                                      config.parsed.get('NODE_NAME'))
                    instance_path = os.path.join(
                        config.parsed.get('STORAGE_PATH'), 'instances',
                        instance_uuid)

                    if not os.path.exists(instance_path):
                        # If we're inactive and our files aren't on disk,
                        # we have a problem.
                        logutil.info([virt.ThinInstance(instance_uuid)],
                                     'Detected error state for instance')
                        db.update_instance_state(instance_uuid, 'error')
                    elif instance.get('power_state') != 'off':
                        logutil.info([virt.ThinInstance(instance_uuid)],
                                     'Detected power off for instance')
                        db.update_instance_power_state(instance_uuid, 'off')
                        db.add_event('instance', instance_uuid,
                                     'detected poweroff', 'complete', None,
                                     None)

        except libvirt.libvirtError as e:
            logutil.error(None, 'Failed to lookup all domains: %s' % e)
Exemplo n.º 22
0
def snapshot(locks, source, destination):
    """Convert a possibly COW layered disk file into a snapshot."""

    util.execute(
        locks, 'qemu-img convert --force-share -O qcow2 %s %s' %
        (source, destination))
Exemplo n.º 23
0
    def create(self):
        subst = self.subst_dict()

        with db.get_lock('network', None, self.uuid, ttl=120):
            if not util.check_for_interface(subst['vx_interface']):
                with util.RecordedOperation('create vxlan interface', self):
                    util.execute(
                        None,
                        'ip link add %(vx_interface)s type vxlan id %(vx_id)s '
                        'dev %(physical_interface)s dstport 0' % subst)
                    util.execute(
                        None,
                        'sysctl -w net.ipv4.conf.%(vx_interface)s.arp_notify=1'
                        % subst)

            if not util.check_for_interface(subst['vx_bridge']):
                with util.RecordedOperation('create vxlan bridge', self):
                    util.execute(
                        None, 'ip link add %(vx_bridge)s type bridge' % subst)
                    util.execute(
                        None,
                        'ip link set %(vx_interface)s master %(vx_bridge)s' %
                        subst)
                    util.execute(None,
                                 'ip link set %(vx_interface)s up' % subst)
                    util.execute(None, 'ip link set %(vx_bridge)s up' % subst)
                    util.execute(
                        None,
                        'sysctl -w net.ipv4.conf.%(vx_bridge)s.arp_notify=1' %
                        subst)
                    util.execute(None, 'brctl setfd %(vx_bridge)s 0' % subst)
                    util.execute(None, 'brctl stp %(vx_bridge)s off' % subst)
                    util.execute(None,
                                 'brctl setageing %(vx_bridge)s 0' % subst)

        if util.is_network_node():
            if not os.path.exists('/var/run/netns/%(netns)s' % subst):
                with util.RecordedOperation('create netns', self):
                    util.execute(None, 'ip netns add %(netns)s' % subst)

            if not util.check_for_interface(subst['vx_veth_outer']):
                with util.RecordedOperation('create router veth', self):
                    util.execute(
                        None,
                        'ip link add %(vx_veth_outer)s type veth peer name %(vx_veth_inner)s'
                        % subst)
                    util.execute(
                        None, 'ip link set %(vx_veth_inner)s netns %(netns)s' %
                        subst)
                    util.execute(
                        None,
                        'brctl addif %(vx_bridge)s %(vx_veth_outer)s' % subst)
                    util.execute(None,
                                 'ip link set %(vx_veth_outer)s up' % subst)
                    util.execute(
                        None, '%(in_netns)s ip link set %(vx_veth_inner)s up' %
                        subst)
                    util.execute(
                        None,
                        '%(in_netns)s ip addr add %(router)s/%(netmask)s dev %(vx_veth_inner)s'
                        % subst)

            if not util.check_for_interface(subst['physical_veth_outer']):
                with util.RecordedOperation('create physical veth', self):
                    util.execute(
                        None,
                        'ip link add %(physical_veth_outer)s type veth peer name '
                        '%(physical_veth_inner)s' % subst)
                    util.execute(
                        None,
                        'brctl addif %(physical_bridge)s %(physical_veth_outer)s'
                        % subst)
                    util.execute(
                        None, 'ip link set %(physical_veth_outer)s up' % subst)
                    util.execute(
                        None,
                        'ip link set %(physical_veth_inner)s netns %(netns)s' %
                        subst)

            self.deploy_nat()
            self.update_dhcp()
        else:
            db.enqueue('networknode', {
                'type': 'deploy',
                'network_uuid': self.uuid
            })
            db.add_event('network', self.uuid, 'deploy', 'enqueued', None,
                         None)
Exemplo n.º 24
0
    def deploy_nat(self):
        if not self.provide_nat:
            return

        subst = self.subst_dict()
        if not self.floating_gateway:
            with db.get_lock('ipmanager', None, 'floating', ttl=120):
                ipm = db.get_ipmanager('floating')
                self.floating_gateway = ipm.get_random_free_address()
                db.persist_ipmanager('floating', ipm.save())
                self.persist_floating_gateway()

        # No lock because no data changing
        ipm = db.get_ipmanager('floating')
        subst['floating_router'] = ipm.get_address_at_index(1)
        subst['floating_gateway'] = self.floating_gateway
        subst['floating_netmask'] = ipm.netmask

        with db.get_lock('network', None, self.uuid, ttl=120):
            if not subst['floating_gateway'] in list(
                    util.get_interface_addresses(
                        subst['netns'], subst['physical_veth_inner'])):
                with util.RecordedOperation('enable virtual routing', self):
                    util.execute(
                        None,
                        '%(in_netns)s ip addr add %(floating_gateway)s/%(floating_netmask)s '
                        'dev %(physical_veth_inner)s' % subst)
                    util.execute(
                        None,
                        '%(in_netns)s ip link set %(physical_veth_inner)s up' %
                        subst)
                    util.execute(
                        None,
                        '%(in_netns)s route add default gw %(floating_router)s'
                        % subst)

            if not util.nat_rules_for_ipblock(self.network_address):
                with util.RecordedOperation('enable nat', self):
                    util.execute(None,
                                 'echo 1 > /proc/sys/net/ipv4/ip_forward')
                    util.execute(
                        None,
                        '%(in_netns)s iptables -A FORWARD -o %(physical_veth_inner)s '
                        '-i %(vx_veth_inner)s -j ACCEPT' % subst)
                    util.execute(
                        None,
                        '%(in_netns)s iptables -A FORWARD -i %(physical_veth_inner)s '
                        '-o %(vx_veth_inner)s -j ACCEPT' % subst)
                    util.execute(
                        None,
                        '%(in_netns)s iptables -t nat -A POSTROUTING -s %(ipblock)s/%(netmask)s '
                        '-o %(physical_veth_inner)s -j MASQUERADE' % subst)
Exemplo n.º 25
0
    def _update_power_states(self):
        libvirt = util.get_libvirt()
        conn = libvirt.open(None)
        try:
            seen = []

            # Active VMs have an ID. Active means running in libvirt
            # land.
            for domain_id in conn.listDomainsID():
                domain = conn.lookupByID(domain_id)
                if not domain.name().startswith('sf:'):
                    continue

                instance_uuid = domain.name().split(':')[1]
                log_ctx = LOG.withInstance(instance_uuid)

                instance = virt.Instance.from_db(instance_uuid)
                if not instance:
                    # Instance is SF but not in database. Kill to reduce load.
                    log_ctx.warning('Destroying unknown instance')
                    util.execute(None,
                                 'virsh destroy "sf:%s"' % instance_uuid)
                    continue

                instance.place_instance(config.NODE_NAME)
                seen.append(domain.name())

                if instance.state == 'deleted':
                    # NOTE(mikal): a delete might be in-flight in the queue.
                    # We only worry about instances which should have gone
                    # away five minutes ago.
                    if time.time() - instance.state_updated < 300:
                        continue

                    instance.enforced_deletes_increment()
                    attempts = instance.enforced_deletes

                    if attempts > 5:
                        # Sometimes we just can't delete the VM. Try the big
                        # hammer instead.
                        log_ctx.warning(
                            'Attempting alternate delete method for instance')
                        util.execute(None,
                                     'virsh destroy "sf:%s"' % instance_uuid)

                        instance.add_event('enforced delete', 'complete')
                    else:
                        instance.delete()

                    log_ctx.withField('attempt', attempts).warning(
                        'Deleting stray instance')

                    continue

                state = util.extract_power_state(libvirt, domain)
                instance.update_power_state(state)
                if state == 'crashed':
                    instance.update_instance_state('error')

            # Inactive VMs just have a name, and are powered off
            # in our state system.
            for domain_name in conn.listDefinedDomains():
                if not domain_name.startswith('sf:'):
                    continue

                if domain_name not in seen:
                    instance_uuid = domain_name.split(':')[1]
                    log_ctx = LOG.withInstance(instance_uuid)
                    instance = virt.Instance.from_db(instance_uuid)

                    if not instance:
                        # Instance is SF but not in database. Kill because
                        # unknown.
                        log_ctx.warning('Removing unknown inactive instance')
                        domain = conn.lookupByName(domain_name)
                        domain.undefine()
                        continue

                    if instance.state == 'deleted':
                        # NOTE(mikal): a delete might be in-flight in the queue.
                        # We only worry about instances which should have gone
                        # away five minutes ago.
                        if time.time() - instance['state_updated'] < 300:
                            continue

                        domain = conn.lookupByName(domain_name)
                        domain.undefine()
                        log_ctx.info('Detected stray instance')
                        instance.add_event('deleted stray', 'complete')
                        continue

                    instance.place_instance(config.NODE_NAME)

                    if not os.path.exists(instance.instance_path()):
                        # If we're inactive and our files aren't on disk,
                        # we have a problem.
                        log_ctx.info('Detected error state for instance')
                        instance.update_instance_state('error')

                    elif instance.power_state != 'off':
                        log_ctx.info('Detected power off for instance')
                        instance.update_power_state('off')
                        instance.add_event('detected poweroff', 'complete')

        except libvirt.libvirtError as e:
            LOG.error('Failed to lookup all domains: %s' % e)
Exemplo n.º 26
0
def main():
    global DAEMON_IMPLEMENTATIONS
    global DAEMON_PIDS

    setproctitle.setproctitle(daemon.process_name('main'))

    # Log configuration on startup
    for key, value in config.dict().items():
        LOG.info('Configuration item %s = %s' % (key, value))

    daemon.set_log_level(LOG, 'main')

    # Check in early and often, also reset processing queue items
    db.clear_stale_locks()
    db.see_this_node()
    db.restart_queues()

    def _start_daemon(d):
        pid = os.fork()
        if pid == 0:
            DAEMON_IMPLEMENTATIONS[d].Monitor(d).run()
        DAEMON_PIDS[pid] = d
        LOG.withField('pid', pid).info('Started %s' % d)

    # Resource usage publisher, we need this early because scheduling decisions
    # might happen quite early on.
    _start_daemon('resources')

    # If I am the network node, I need some setup
    if util.is_network_node():
        # Bootstrap the floating network in the Networks table
        floating_network = db.get_network('floating')
        if not floating_network:
            db.create_floating_network(config.get('FLOATING_NETWORK'))
            floating_network = net.from_db('floating')

        subst = {
            'physical_bridge':
            util.get_safe_interface_name('phy-br-%s' %
                                         config.get('NODE_EGRESS_NIC')),
            'physical_nic':
            config.get('NODE_EGRESS_NIC')
        }

        if not util.check_for_interface(subst['physical_bridge']):
            # NOTE(mikal): Adding the physical interface to the physical bridge
            # is considered outside the scope of the orchestration software as
            # it will cause the node to lose network connectivity. So instead
            # all we do is create a bridge if it doesn't exist and the wire
            # everything up to it. We can do egress NAT in that state, even if
            # floating IPs don't work.
            with util.RecordedOperation('create physical bridge', None):
                # No locking as read only
                ipm = db.get_ipmanager('floating')
                subst['master_float'] = ipm.get_address_at_index(1)
                subst['netmask'] = ipm.netmask

                util.create_interface(subst['physical_bridge'], 'bridge', '')
                util.execute(None,
                             'ip link set %(physical_bridge)s up' % subst)
                util.execute(
                    None, 'ip addr add %(master_float)s/%(netmask)s '
                    'dev %(physical_bridge)s' % subst)

                util.execute(
                    None, 'iptables -A FORWARD -o %(physical_nic)s '
                    '-i %(physical_bridge)s -j ACCEPT' % subst)
                util.execute(
                    None, 'iptables -A FORWARD -i %(physical_nic)s '
                    '-o %(physical_bridge)s -j ACCEPT' % subst)
                util.execute(
                    None, 'iptables -t nat -A POSTROUTING '
                    '-o %(physical_nic)s -j MASQUERADE' % subst)

    def _audit_daemons():
        running_daemons = []
        for pid in DAEMON_PIDS:
            running_daemons.append(DAEMON_PIDS[pid])

        for d in DAEMON_IMPLEMENTATIONS:
            if d not in running_daemons:
                _start_daemon(d)

        for d in DAEMON_PIDS:
            if not psutil.pid_exists(d):
                LOG.warning('%s pid is missing, restarting' % DAEMON_PIDS[d])
                _start_daemon(DAEMON_PIDS[d])

    _audit_daemons()
    restore_instances()

    while True:
        time.sleep(10)

        wpid, _ = os.waitpid(-1, os.WNOHANG)
        while wpid != 0:
            LOG.warning('%s died (pid %d)' %
                        (DAEMON_PIDS.get(wpid, 'unknown'), wpid))
            del DAEMON_PIDS[wpid]
            wpid, _ = os.waitpid(-1, os.WNOHANG)

        _audit_daemons()
        db.see_this_node()
Exemplo n.º 27
0
    def create(self):
        subst = self.subst_dict()

        with db.get_object_lock(self, ttl=120, op='Network create'):
            # Ensure network was not deleted whilst waiting for the lock.
            if self.is_dead():
                raise DeadNetwork('network=%s' % self)

            if not util.check_for_interface(subst['vx_interface']):
                with util.RecordedOperation('create vxlan interface', self):
                    util.create_interface(
                        subst['vx_interface'], 'vxlan',
                        'id %(vx_id)s dev %(physical_interface)s dstport 0' %
                        subst)
                    util.execute(
                        None, 'sysctl -w net.ipv4.conf.'
                        '%(vx_interface)s.arp_notify=1' % subst)

            if not util.check_for_interface(subst['vx_bridge']):
                with util.RecordedOperation('create vxlan bridge', self):
                    util.create_interface(subst['vx_bridge'], 'bridge', '')
                    util.execute(
                        None, 'ip link set %(vx_interface)s '
                        'master %(vx_bridge)s' % subst)
                    util.execute(None,
                                 'ip link set %(vx_interface)s up' % subst)
                    util.execute(None, 'ip link set %(vx_bridge)s up' % subst)
                    util.execute(
                        None, 'sysctl -w net.ipv4.conf.'
                        '%(vx_bridge)s.arp_notify=1' % subst)
                    util.execute(None, 'brctl setfd %(vx_bridge)s 0' % subst)
                    util.execute(None, 'brctl stp %(vx_bridge)s off' % subst)
                    util.execute(None,
                                 'brctl setageing %(vx_bridge)s 0' % subst)

        if util.is_network_node():
            if not os.path.exists('/var/run/netns/%(netns)s' % subst):
                with util.RecordedOperation('create netns', self):
                    util.execute(None, 'ip netns add %(netns)s' % subst)

            if not util.check_for_interface(subst['vx_veth_outer']):
                with util.RecordedOperation('create router veth', self):
                    util.create_interface(
                        subst['vx_veth_outer'], 'veth',
                        'peer name %(vx_veth_inner)s' % subst)
                    util.execute(
                        None, 'ip link set %(vx_veth_inner)s netns %(netns)s' %
                        subst)
                    util.execute(
                        None,
                        'brctl addif %(vx_bridge)s %(vx_veth_outer)s' % subst)
                    util.execute(None,
                                 'ip link set %(vx_veth_outer)s up' % subst)
                    util.execute(
                        None, '%(in_netns)s ip link set %(vx_veth_inner)s up' %
                        subst)
                    util.execute(
                        None,
                        '%(in_netns)s ip addr add %(router)s/%(netmask)s '
                        'dev %(vx_veth_inner)s' % subst)

            if not util.check_for_interface(subst['physical_veth_outer']):
                with util.RecordedOperation('create physical veth', self):
                    util.create_interface(
                        subst['physical_veth_outer'], 'veth',
                        'peer name %(physical_veth_inner)s' % subst)
                    util.execute(
                        None, 'brctl addif %(physical_bridge)s '
                        '%(physical_veth_outer)s' % subst)
                    util.execute(
                        None, 'ip link set %(physical_veth_outer)s up' % subst)
                    util.execute(
                        None, 'ip link set %(physical_veth_inner)s '
                        'netns %(netns)s' % subst)

            self.deploy_nat()
            self.update_dhcp()
        else:
            db.enqueue('networknode', DeployNetworkTask(self.db_entry['uuid']))
            db.add_event('network', self.db_entry['uuid'], 'deploy',
                         'enqueued', None, None)
Exemplo n.º 28
0
    def create(self, lock=None):
        db.update_instance_state(self.db_entry['uuid'], 'creating')

        # Ensure we have state on disk
        if not os.path.exists(self.instance_path):
            LOG.withObj(self).debug('Creating instance storage at %s' %
                                    self.instance_path)
            os.makedirs(self.instance_path)

        # Generate a config drive
        with util.RecordedOperation('make config drive', self):
            self._make_config_drive(
                os.path.join(
                    self.instance_path,
                    self.db_entry['block_devices']['devices'][1]['path']))

        # Prepare disks
        if not self.db_entry['block_devices']['finalized']:
            modified_disks = []
            for disk in self.db_entry['block_devices']['devices']:
                if disk.get('base'):
                    img = images.Image.from_url(disk['base'])
                    hashed_image_path = img.version_image_path()

                    with util.RecordedOperation('detect cdrom images', self):
                        try:
                            cd = pycdlib.PyCdlib()
                            cd.open(hashed_image_path)
                            disk['present_as'] = 'cdrom'
                        except Exception:
                            pass

                    if disk.get('present_as', 'cdrom') == 'cdrom':
                        # There is no point in resizing or COW'ing a cdrom
                        disk['path'] = disk['path'].replace('.qcow2', '.raw')
                        disk['type'] = 'raw'
                        disk['snapshot_ignores'] = True

                        try:
                            os.link(hashed_image_path, disk['path'])
                        except OSError:
                            # Different filesystems
                            util.execute([lock], 'cp %s %s' %
                                         (hashed_image_path, disk['path']))

                        # Due to limitations in some installers, cdroms are always on IDE
                        disk['device'] = 'hd%s' % disk['device'][-1]
                        disk['bus'] = 'ide'
                    else:
                        if config.parsed.get('DISK_FORMAT') == 'qcow':
                            with util.RecordedOperation(
                                    'create copy on write layer', self):
                                images.create_cow([lock], hashed_image_path,
                                                  disk['path'], disk['size'])

                            # Record the backing store for modern libvirts
                            disk['backing'] = (
                                '<backingStore type=\'file\'>\n'
                                '        <format type=\'qcow2\'/>\n'
                                '        <source file=\'%s\'/>\n'
                                '      </backingStore>\n' %
                                (hashed_image_path))

                        elif config.parsed.get('DISK_FORMAT') == 'qcow_flat':
                            with util.RecordedOperation('resize image', self):
                                resized_image_path = img.resize([lock],
                                                                disk['size'])

                            with util.RecordedOperation(
                                    'create flat layer', self):
                                images.create_flat([lock], resized_image_path,
                                                   disk['path'])

                        elif config.parsed.get('DISK_FORMAT') == 'flat':
                            with util.RecordedOperation('resize image', self):
                                resized_image_path = img.resize([lock],
                                                                disk['size'])

                            with util.RecordedOperation(
                                    'create raw disk', self):
                                images.create_raw([lock], resized_image_path,
                                                  disk['path'])

                        else:
                            raise Exception('Unknown disk format')

                elif not os.path.exists(disk['path']):
                    util.execute(
                        None, 'qemu-img create -f qcow2 %s %sG' %
                        (disk['path'], disk['size']))

                modified_disks.append(disk)

            self.db_entry['block_devices']['devices'] = modified_disks
            self.db_entry['block_devices']['finalized'] = True

        db.persist_block_devices(self.db_entry['uuid'],
                                 self.db_entry['block_devices'])

        # Create the actual instance
        with util.RecordedOperation('create domain XML', self):
            self._create_domain_xml()

        # Sometimes on Ubuntu 20.04 we need to wait for port binding to work.
        # Revisiting this is tracked by issue 320 on github.
        with util.RecordedOperation('create domain', self):
            if not self.power_on():
                attempts = 0
                while not self.power_on() and attempts < 100:
                    LOG.withObj(self).warning(
                        'Instance required an additional attempt to power on')
                    time.sleep(5)
                    attempts += 1

        if self.is_powered_on():
            LOG.withObj(self).info('Instance now powered on')
        else:
            LOG.withObj(self).info('Instance failed to power on')
        db.update_instance_state(self.db_entry['uuid'], 'created')
Exemplo n.º 29
0
    def deploy_nat(self):
        if not self.db_entry['provide_nat']:
            return

        subst = self.subst_dict()
        if not self.db_entry['floating_gateway']:
            with db.get_lock('ipmanager',
                             None,
                             'floating',
                             ttl=120,
                             op='Network deploy NAT'):
                ipm = db.get_ipmanager('floating')
                self.db_entry[
                    'floating_gateway'] = ipm.get_random_free_address()
                db.persist_ipmanager('floating', ipm.save())
                self.persist_floating_gateway()

        # No lock because no data changing
        ipm = db.get_ipmanager('floating')
        subst['floating_router'] = ipm.get_address_at_index(1)
        subst['floating_gateway'] = self.db_entry['floating_gateway']
        subst['floating_netmask'] = ipm.netmask

        with db.get_object_lock(self, ttl=120, op='Network deploy NAT'):
            # Ensure network was not deleted whilst waiting for the lock.
            if self.is_dead():
                raise DeadNetwork('network=%s' % self)

            with util.RecordedOperation('enable virtual routing', self):
                addresses = util.get_interface_addresses(
                    subst['netns'], subst['physical_veth_inner'])
                if not subst['floating_gateway'] in list(addresses):
                    util.execute(
                        None, '%(in_netns)s ip addr add '
                        '%(floating_gateway)s/%(floating_netmask)s '
                        'dev %(physical_veth_inner)s' % subst)
                    util.execute(
                        None, '%(in_netns)s ip link set '
                        '%(physical_veth_inner)s up' % subst)

                default_routes = util.get_default_routes(subst['netns'])
                if default_routes != [subst['floating_router']]:
                    if default_routes:
                        for default_route in default_routes:
                            util.execute(
                                None, '%s route del default gw %s' %
                                (subst['in_netns'], default_route))

                    util.execute(
                        None, '%(in_netns)s route add default '
                        'gw %(floating_router)s' % subst)

            if not util.nat_rules_for_ipblock(self.network_address):
                with util.RecordedOperation('enable nat', self):
                    util.execute(None,
                                 'echo 1 > /proc/sys/net/ipv4/ip_forward')
                    util.execute(
                        None, '%(in_netns)s iptables -A FORWARD '
                        '-o %(physical_veth_inner)s '
                        '-i %(vx_veth_inner)s -j ACCEPT' % subst)
                    util.execute(
                        None, '%(in_netns)s iptables -A FORWARD '
                        '-i %(physical_veth_inner)s '
                        '-o %(vx_veth_inner)s -j ACCEPT' % subst)
                    util.execute(
                        None, '%(in_netns)s iptables -t nat -A POSTROUTING '
                        '-s %(ipblock)s/%(netmask)s '
                        '-o %(physical_veth_inner)s '
                        '-j MASQUERADE' % subst)