示例#1
0
 def _remove_mesh_element(self, n):
     self.log.info('Removing excess mesh element %s', n)
     subst = self.subst_dict()
     subst['node'] = n
     util_process.execute(None,
                          'bridge fdb del to 00:00:00:00:00:00 dst %(node)s '
                          'dev %(vx_interface)s' % subst)
示例#2
0
 def _add_mesh_element(self, n):
     self.log.info('Adding new mesh element %s', n)
     subst = self.subst_dict()
     subst['node'] = n
     util_process.execute(None,
                          'bridge fdb append to 00:00:00:00:00:00 '
                          'dst %(node)s dev %(vx_interface)s' % subst)
示例#3
0
def create_blank(locks, disk_file, disk_size):
    """Make an empty image."""

    if os.path.exists(disk_file):
        return

    util_process.execute(
        locks, 'qemu-img create -o cluster_size=%s -f qcow2 %s %sG'
        % (constants.QCOW2_CLUSTER_SIZE, disk_file, disk_size),
        iopriority=util_process.PRIORITY_LOW)
示例#4
0
def snapshot(locks, source, destination):
    """Convert a possibly COW layered disk file into a snapshot."""
    cmd = 'qemu-img convert --force-share -o cluster_size=%s -O qcow2' % (
        constants.QCOW2_CLUSTER_SIZE)
    if config.COMPRESS_SNAPSHOTS:
        cmd += ' -c'
    util_process.execute(
        locks,
        ' '.join([cmd, source, destination]),
        iopriority=util_process.PRIORITY_LOW)
示例#5
0
    def restart_dhcpd(self):
        if not os.path.exists('/var/run/netns/%s' % self.network.uuid):
            return

        self._make_config()
        self._make_hosts()
        if not self._send_signal(signal.SIGHUP):
            util_process.execute(None,
                                 'dnsmasq --conf-file=%(config_dir)s/config' %
                                 self.subst,
                                 namespace=self.network.uuid)
示例#6
0
    def remove_floating_ip(self, floating_address, inner_address):
        self.log.info('Removing floating ip %s -> %s',
                      floating_address, inner_address)
        subst = self.subst_dict()
        subst['floating_address'] = floating_address
        subst['floating_address_as_hex'] = '%08x' % int(
            ipaddress.IPv4Address(floating_address))
        subst['inner_address'] = inner_address

        if util_network.check_for_interface('flt-%(floating_address_as_hex)s-o' % subst):
            util_process.execute(None,
                                 'ip link del flt-%(floating_address_as_hex)s-o'
                                 % subst)
示例#7
0
def create_interface(interface, interface_type, extra, mtu=None):
    if not mtu:
        mtu = config.MAX_HYPERVISOR_MTU - 50

    interface = get_safe_interface_name(interface)
    process.execute(
        None, 'ip link add %(interface)s mtu %(mtu)s '
        'type %(interface_type)s %(extra)s' % {
            'interface': interface,
            'interface_type': interface_type,
            'mtu': mtu,
            'extra': extra
        })
示例#8
0
    def delete_on_hypervisor(self):
        with self.get_lock(op='Network delete'):
            subst = self.subst_dict()

            if util_network.check_for_interface(subst['vx_bridge']):
                with util_general.RecordedOperation('delete vxlan bridge', self):
                    util_process.execute(
                        None, 'ip link delete %(vx_bridge)s' % subst)

            if util_network.check_for_interface(subst['vx_interface']):
                with util_general.RecordedOperation('delete vxlan interface', self):
                    util_process.execute(
                        None, 'ip link delete %(vx_interface)s' % subst)
示例#9
0
def create_qcow2(locks, cache_file, disk_file, disk_size=None):
    """Make a qcow2 copy of the disk from the image cache."""

    if os.path.exists(disk_file):
        return

    util_process.execute(
        locks,
        'qemu-img convert -t none -o cluster_size=%s -O qcow2 %s %s'
        % (constants.QCOW2_CLUSTER_SIZE, cache_file, disk_file),
        iopriority=util_process.PRIORITY_LOW)
    if disk_size:
        util_process.execute(
            locks, 'qemu-img resize %s %dG' % (disk_file, int(disk_size)),
            iopriority=util_process.PRIORITY_LOW)
示例#10
0
    def run(self):
        LOG.info('Starting')

        libvirt = util_libvirt.get_libvirt()
        conn = libvirt.open('qemu:///system')
        present_cpus, _, _ = conn.getCPUMap()

        os.makedirs('/var/run/sf', exist_ok=True)
        util_process.execute(None, (config.API_COMMAND_LINE % {
            'port': config.API_PORT,
            'timeout': config.API_TIMEOUT,
            'name': daemon.process_name('api'),
            'workers': present_cpus * 4
        }),
                             env_variables=os.environ,
                             check_exit_code=[0, 1, -15])
示例#11
0
def clean_events_mesh_operations(etcd_client):
    # TODO(andy): This can be removed when older releases do not exist

    # We probably need to cleanup excess network mesh events. We also need to
    # try and fetch small batches because of limits in the amount of data etcd3
    # can return at one time.

    # Save time and use the already available etcdctl client.
    net_keys, stderr = util_process.execute(
        None,
        'etcdctl get --prefix /sf/event/network/ | grep sf/event',
        check_exit_code=[0, 1])
    if stderr:
        print('ERROR: Unable to retrieve network keys:%s' % stderr)
        return

    # Split network events into networks
    network_events = defaultdict(list)
    for key in net_keys.split('\n'):
        if not key:
            continue
        _blank, _sf, _event, _network, uuid, _time = key.split('/')
        network_events[uuid].append(key)

    # Delete all but last 50 events
    count = 0
    for keys in network_events.values():
        for k in keys[:-50]:
            print('--> Removing verbose network event %s' % k)
            etcd_client.delete(k)
            count += 1
    print(' - Cleaned up %d old network mesh events' % count)
示例#12
0
def discover_interfaces():
    mac_to_iface = {'00:00:00:00:00:00': 'broadcast'}
    iface_to_mac = {}
    vxid_to_mac = {}

    iface_name = None
    iface_name_re = re.compile('^[0-9]+: ([^:]+): <')

    link_ether = None
    link_ether_re = re.compile('^    link/ether (.*) brd .*')

    stdout, _ = process.execute(None, 'ip addr list')
    for line in stdout.split('\n'):
        line = line.rstrip()

        m = iface_name_re.match(line)
        if m:
            iface_name = m.group(1)
            continue

        m = link_ether_re.match(line)
        if m:
            link_ether = m.group(1)
            mac_to_iface[link_ether] = iface_name
            iface_to_mac[iface_name] = link_ether

            if iface_name.startswith('vxlan-'):
                vxid = int(iface_name.split('-')[1], 16)
                vxid_to_mac[vxid] = link_ether

    return mac_to_iface, iface_to_mac, vxid_to_mac
示例#13
0
def get_interface_mtus(namespace=None):
    stdout, _ = process.execute(None,
                                'ip -pretty -json link show',
                                check_exit_code=[0, 1],
                                namespace=namespace)

    for elem in _clean_ip_json(stdout):
        yield elem['ifname'], elem['mtu']
示例#14
0
def get_interface_mtu(interface, namespace=None):
    stdout, _ = process.execute(None,
                                'ip -pretty -json link show %s' % interface,
                                check_exit_code=[0, 1],
                                namespace=namespace)

    for elem in _clean_ip_json(stdout):
        return elem['mtu']
示例#15
0
def nat_rules_for_ipblock(ipblock):
    out, _ = process.execute(None, 'iptables -t nat -L POSTROUTING -n -v')
    # Output looks like this:
    # Chain POSTROUTING (policy ACCEPT 199 packets, 18189 bytes)
    # pkts bytes target     prot opt in     out     source               destination
    #   23  1736 MASQUERADE  all  --  *      ens4    192.168.242.0/24     0.0.0.0/0

    for line in out.split('\n'):
        if line.find(str(ipblock)) != -1:
            return True

    return False
示例#16
0
def get_interface_addresses(name, namespace=None):
    stdout, _ = process.execute(None,
                                'ip -pretty -json addr show %s' % name,
                                check_exit_code=[0, 1],
                                namespace=namespace)

    for elem in _clean_ip_json(stdout):
        if 'addr_info' in elem:
            try:
                yield elem['addr_info'][0]['local']
            except IndexError:
                pass
示例#17
0
def get_interface_statistics(name, namespace=None):
    stdout, _ = process.execute(None,
                                'ip -s -pretty -json link show %s' % name,
                                check_exit_code=[0, 1],
                                namespace=namespace)

    if not stdout:
        raise exceptions.NoInterfaceStatistics(
            'No statistics for interface %s in namespace %s' %
            (name, namespace))

    stats = _clean_ip_json(stdout)
    return stats.get('stats64')
示例#18
0
def get_default_routes(namespace):
    stdout, _ = process.execute(None,
                                'ip route list default',
                                namespace=namespace)

    if not stdout:
        return []

    routes = []
    for line in stdout.split('\n'):
        elems = line.split(' ')
        if len(elems) > 3 and elems[2] not in routes:
            routes.append(elems[2])
    return routes
示例#19
0
    def discover_mesh(self):
        # The floating network does not have a vxlan mesh
        if self.uuid == 'floating':
            return

        mesh_re = re.compile(r'00:00:00:00:00:00 dst (.*) self permanent')

        stdout, _ = util_process.execute(
            None, 'bridge fdb show brport %(vx_interface)s' % self.subst_dict())

        for line in stdout.split('\n'):
            m = mesh_re.match(line)
            if m:
                yield m.group(1)
示例#20
0
def create_cow(locks, cache_file, disk_file, disk_size):
    """Create a COW layer on top of the image cache.

    disk_size is specified in GiBs.
    """

    if os.path.exists(disk_file):
        return

    info = identify(cache_file)
    virtual_size = None
    try:
        virtual_size = int(info['virtual size'])
    except TypeError:
        pass

    if (virtual_size and disk_size and
            virtual_size > disk_size * 1024 * 1024 * 1024):
        raise exceptions.ImagesCannotShrinkException(
            'The specified size of %dgb (%d bytes) is smaller than the existing size '
            'of the image of %s bytes.'
            % (disk_size, disk_size * 1024 * 1024 * 1024, info['virtual size']))

    if disk_size:
        util_process.execute(
            locks,
            ('qemu-img create -b %s -o cluster_size=%s -f qcow2 %s %dG'
             % (cache_file, constants.QCOW2_CLUSTER_SIZE, disk_file,
                int(disk_size))),
            iopriority=util_process.PRIORITY_LOW)
    else:
        util_process.execute(
            locks,
            'qemu-img create -b %s -o cluster_size=%s -f qcow2 %s'
            % (cache_file, constants.QCOW2_CLUSTER_SIZE, disk_file),
            iopriority=util_process.PRIORITY_LOW)
示例#21
0
def check_for_interface(name, namespace=None, up=False):
    if namespace:
        if not os.path.exists('/var/run/netns/%s' % namespace):
            return False

    stdout, stderr = process.execute(None,
                                     'ip -pretty -json link show %s' % name,
                                     check_exit_code=[0, 1],
                                     namespace=namespace)

    if stderr.rstrip('\n').endswith(' does not exist.'):
        return False

    if up:
        j = _clean_ip_json(stdout)
        return 'UP' in j[0]['flags']

    return True
示例#22
0
    def enable_nat(self):
        if not config.NODE_IS_NETWORK_NODE:
            return

        subst = self.subst_dict()
        if not util_network.nat_rules_for_ipblock(self.network_address):
            with util_general.RecordedOperation('enable nat', self):
                util_process.execute(
                    None, 'echo 1 > /proc/sys/net/ipv4/ip_forward')
                util_process.execute(
                    None,
                    'iptables -A FORWARD -o %(egress_veth_inner)s '
                    '-i %(vx_veth_inner)s -j ACCEPT' % subst,
                    namespace=self.uuid)
                util_process.execute(
                    None,
                    'iptables -A FORWARD -i %(egress_veth_inner)s '
                    '-o %(vx_veth_inner)s -j ACCEPT' % subst,
                    namespace=self.uuid)
                util_process.execute(
                    None,
                    'iptables -t nat -A POSTROUTING -s %(ipblock)s/%(netmask)s '
                    '-o %(egress_veth_inner)s -j MASQUERADE' % subst,
                    namespace=self.uuid)
示例#23
0
    def delete_on_network_node(self):
        with self.get_lock(op='Network delete'):
            subst = self.subst_dict()

            if util_network.check_for_interface(subst['vx_veth_outer']):
                with util_general.RecordedOperation('delete router veth', self):
                    util_process.execute(
                        None, 'ip link delete %(vx_veth_outer)s' % subst)

            if util_network.check_for_interface(subst['egress_veth_outer']):
                with util_general.RecordedOperation('delete egress veth', self):
                    util_process.execute(
                        None,
                        'ip link delete %(egress_veth_outer)s' % subst)

            if os.path.exists('/var/run/netns/%s' % self.uuid):
                with util_general.RecordedOperation('delete netns', self):
                    util_process.execute(
                        None, 'ip netns del %s' % self.uuid)

            if self.floating_gateway:
                with db.get_lock('ipmanager', None, 'floating', ttl=120,
                                 op='Network delete'):
                    ipm = IPManager.from_db('floating')
                    ipm.release(self.floating_gateway)
                    ipm.persist()
                    self.update_floating_gateway(None)

            self.state = self.STATE_DELETED

        # Ensure that all hypervisors remove this network. This is really
        # just catching strays, apart from on the network node where we
        # absolutely need to do this thing.
        for hyp in Nodes([active_nodes]):
            etcd.enqueue(hyp.uuid,
                         {'tasks': [
                             HypervisorDestroyNetworkTask(self.uuid)
                         ]})

        self.remove_dhcp()
        self.remove_nat()

        ipm = IPManager.from_db(self.uuid)
        ipm.delete()
示例#24
0
def identify(path):
    """Work out what an image is."""

    if not os.path.exists(path):
        return {}

    out, _ = util_process.execute(
        None, 'qemu-img info --force-share %s' % path)

    data = {}
    for line in out.split('\n'):
        line = line.lstrip().rstrip()
        elems = line.split(': ')
        if len(elems) > 1:
            key = elems[0]
            value = ': '.join(elems[1:])

            m = VALUE_WITH_BRACKETS_RE.match(value)
            if m:
                value = float(m.group(1))

            elif value.endswith('K'):
                value = float(value[:-1]) * 1024
            elif value.endswith('M'):
                value = float(value[:-1]) * 1024 * 1024
            elif value.endswith('G'):
                value = float(value[:-1]) * 1024 * 1024 * 1024
            elif value.endswith('T'):
                value = float(value[:-1]) * 1024 * 1024 * 1024 * 1024

            try:
                data[key] = float(value)
            except Exception:
                data[key] = value

    return data
示例#25
0
    def add_floating_ip(self, floating_address, inner_address):
        self.log.info('Adding floating ip %s -> %s',
                      floating_address, inner_address)
        subst = self.subst_dict()
        subst['floating_address'] = floating_address
        subst['floating_address_as_hex'] = '%08x' % int(
            ipaddress.IPv4Address(floating_address))
        subst['inner_address'] = inner_address

        util_network.create_interface(
            'flt-%(floating_address_as_hex)s-o' % subst, 'veth',
            'peer name flt-%(floating_address_as_hex)s-i' % subst)
        util_process.execute(
            None,  'ip link set flt-%(floating_address_as_hex)s-i netns %(netns)s' % subst)
        util_process.execute(
            None,
            'ip addr add %(floating_address)s/32 '
            'dev flt-%(floating_address_as_hex)s-i' % subst,
            namespace=self.uuid)
        util_process.execute(
            None,
            'iptables -t nat -A PREROUTING -d %(floating_address)s -j DNAT '
            '--to-destination %(inner_address)s' % subst,
            namespace=self.uuid)
示例#26
0
    def _update_power_states(self):
        libvirt = util_libvirt.get_libvirt()
        conn = libvirt.open('qemu:///system')
        try:
            seen = []

            # Active VMs have an ID. Active means running in libvirt
            # land.
            for domain_id in conn.listDomainsID():
                domain = conn.lookupByID(domain_id)
                if not domain.name().startswith('sf:'):
                    continue

                instance_uuid = domain.name().split(':')[1]
                log_ctx = LOG.with_instance(instance_uuid)

                inst = instance.Instance.from_db(instance_uuid)
                if not inst:
                    # Instance is SF but not in database. Kill to reduce load.
                    log_ctx.warning('Destroying unknown instance')
                    self._delete_instance_files(instance_uuid)
                    util_process.execute(
                        None, 'virsh destroy "sf:%s"' % instance_uuid)
                    util_process.execute(
                        None, 'virsh undefine --nvram "sf:%s"' % instance_uuid)
                    continue

                inst.place_instance(config.NODE_NAME)
                seen.append(domain.name())

                db_state = inst.state
                if db_state.value == dbo.STATE_DELETED:
                    # NOTE(mikal): a delete might be in-flight in the queue.
                    # We only worry about instances which should have gone
                    # away five minutes ago.
                    if time.time() - db_state.update_time < 300:
                        continue

                    inst.enforced_deletes_increment()
                    attempts = inst._db_get_attribute(
                        'enforced_deletes')['count']

                    if attempts > 5:
                        # Sometimes we just can't delete the VM. Try the big
                        # hammer instead.
                        log_ctx.warning(
                            'Attempting alternate delete method for instance')
                        self._delete_instance_files(instance_uuid)
                        util_process.execute(
                            None,
                            'virsh undefine --nvram "sf:%s"' % instance_uuid)
                        inst.add_event('enforced delete', 'complete')
                    else:
                        inst.delete()

                    log_ctx.with_field(
                        'attempt', attempts).warning('Deleting stray instance')
                    continue

                state = util_libvirt.extract_power_state(libvirt, domain)
                inst.update_power_state(state)
                if state == 'crashed':
                    if inst.state.value in [
                            dbo.STATE_DELETE_WAIT, dbo.STATE_DELETED
                    ]:
                        util_process.execute(
                            None,
                            'virsh undefine --nvram "sf:%s"' % instance_uuid)
                        inst.state.value = dbo.STATE_DELETED
                    else:
                        inst.state = inst.state.value + '-error'

            # Inactive VMs just have a name, and are powered off
            # in our state system.
            for domain_name in conn.listDefinedDomains():
                if not domain_name.startswith('sf:'):
                    continue

                if domain_name not in seen:
                    instance_uuid = domain_name.split(':')[1]
                    log_ctx = LOG.with_instance(instance_uuid)
                    inst = instance.Instance.from_db(instance_uuid)

                    if not inst:
                        # Instance is SF but not in database. Kill because
                        # unknown.
                        log_ctx.warning('Removing unknown inactive instance')
                        self._delete_instance_files(instance_uuid)
                        try:
                            domain = conn.lookupByName(domain_name)
                            # TODO(mikal): work out if we can pass
                            # VIR_DOMAIN_UNDEFINE_NVRAM with virDomainUndefineFlags()
                            domain.undefine()
                        except libvirt.libvirtError:
                            util_process.execute(
                                None, 'virsh undefine --nvram "sf:%s"' %
                                instance_uuid)
                        continue

                    db_state = inst.state
                    if db_state.value in [
                            dbo.STATE_DELETE_WAIT, dbo.STATE_DELETED
                    ]:
                        # NOTE(mikal): a delete might be in-flight in the queue.
                        # We only worry about instances which should have gone
                        # away five minutes ago.
                        if time.time() - db_state.update_time < 300:
                            continue

                        log_ctx.info('Detected stray instance')
                        self._delete_instance_files(instance_uuid)
                        try:
                            domain = conn.lookupByName(domain_name)
                            # TODO(mikal): work out if we can pass
                            # VIR_DOMAIN_UNDEFINE_NVRAM with virDomainUndefineFlags()
                            domain.undefine()
                        except libvirt.libvirtError:
                            util_process.execute(
                                None, 'virsh undefine --nvram "sf:%s"' %
                                instance_uuid)

                        inst.add_event('deleted stray', 'complete')
                        if db_state.value != dbo.STATE_DELETED:
                            inst.state.value = dbo.STATE_DELETED
                        continue

                    inst.place_instance(config.NODE_NAME)

                    db_power = inst.power_state
                    if not os.path.exists(inst.instance_path):
                        # If we're inactive and our files aren't on disk,
                        # we have a problem.
                        log_ctx.info('Detected error state for instance')
                        if inst.state.value in [
                                dbo.STATE_DELETE_WAIT, dbo.STATE_DELETED
                        ]:
                            inst.state.value = dbo.STATE_DELETED
                        else:
                            inst.state = inst.state.value + '-error'

                    elif not db_power or db_power['power_state'] != 'off':
                        log_ctx.info('Detected power off for instance')
                        inst.update_power_state('off')
                        inst.add_event('detected poweroff', 'complete')

        except libvirt.libvirtError as e:
            LOG.debug('Failed to lookup all domains: %s' % e)
示例#27
0
    def create_on_network_node(self):
        # The floating network does not have a vxlan mesh
        if self.uuid == 'floating':
            return

        with self.get_lock(op='create_on_network_node'):
            if self.is_dead():
                raise DeadNetwork('network=%s' % self)

            self._create_common()

            subst = self.subst_dict()
            if not os.path.exists('/var/run/netns/%s' % self.uuid):
                with util_general.RecordedOperation('create netns', self):
                    util_process.execute(None, 'ip netns add %s' % self.uuid)

            if not util_network.check_for_interface(subst['vx_veth_outer']):
                with util_general.RecordedOperation('create router veth', self):
                    util_network.create_interface(
                        subst['vx_veth_outer'], 'veth',
                        'peer name %(vx_veth_inner)s' % subst)
                    util_process.execute(
                        None, 'ip link set %(vx_veth_inner)s netns %(netns)s' % subst)

                    # Refer to bug 952 for more details here, but it turns out
                    # that adding an interface to a bridge overwrites the MTU of
                    # the bridge in an undesirable way. So we lookup the existing
                    # MTU and then re-specify it here.
                    subst['vx_bridge_mtu'] = util_network.get_interface_mtu(
                        subst['vx_bridge'])
                    util_process.execute(
                        None,
                        'ip link set %(vx_veth_outer)s master %(vx_bridge)s '
                        'mtu %(vx_bridge_mtu)s' % subst)

                    util_process.execute(
                        None, 'ip link set %(vx_veth_outer)s up' % subst)
                    util_process.execute(
                        None, 'ip link set %(vx_veth_inner)s up' % subst,
                        namespace=self.uuid)
                    util_process.execute(
                        None,
                        'ip addr add %(router)s/%(netmask)s '
                        'dev %(vx_veth_inner)s' % subst,
                        namespace=self.uuid)

            if not util_network.check_for_interface(subst['egress_veth_outer']):
                with util_general.RecordedOperation('create egress veth', self):
                    util_network.create_interface(
                        subst['egress_veth_outer'], 'veth',
                        'peer name %(egress_veth_inner)s' % subst)

                    # Refer to bug 952 for more details here, but it turns out
                    # that adding an interface to a bridge overwrites the MTU of
                    # the bridge in an undesirable way. So we lookup the existing
                    # MTU and then re-specify it here.
                    subst['egress_bridge_mtu'] = util_network.get_interface_mtu(
                        subst['egress_bridge'])
                    util_process.execute(
                        None,
                        'ip link set %(egress_veth_outer)s master %(egress_bridge)s '
                        'mtu %(egress_bridge_mtu)s' % subst)

                    util_process.execute(
                        None, 'ip link set %(egress_veth_outer)s up' % subst)
                    util_process.execute(
                        None, 'ip link set %(egress_veth_inner)s netns %(netns)s' % subst)

            if self.provide_nat:
                # We don't always need this lock, but acquiring it here means
                # we don't need to construct two identical ipmanagers one after
                # the other.
                with db.get_lock('ipmanager', None, 'floating', ttl=120,
                                 op='Network deploy NAT'):
                    ipm = IPManager.from_db('floating')
                    if not self.floating_gateway:
                        self.update_floating_gateway(
                            ipm.get_random_free_address(self.unique_label()))
                        ipm.persist()

                    subst['floating_router'] = ipm.get_address_at_index(1)
                    subst['floating_gateway'] = self.floating_gateway
                    subst['floating_netmask'] = ipm.netmask

                with util_general.RecordedOperation('enable virtual routing', self):
                    addresses = util_network.get_interface_addresses(
                        subst['egress_veth_inner'], namespace=subst['netns'])
                    if not subst['floating_gateway'] in list(addresses):
                        util_process.execute(
                            None,
                            'ip addr add %(floating_gateway)s/%(floating_netmask)s '
                            'dev %(egress_veth_inner)s' % subst,
                            namespace=self.uuid)
                        util_process.execute(
                            None, 'ip link set  %(egress_veth_inner)s up' % subst,
                            namespace=self.uuid)

                    default_routes = util_network.get_default_routes(
                        subst['netns'])
                    if default_routes != [subst['floating_router']]:
                        if default_routes:
                            for default_route in default_routes:
                                util_process.execute(
                                    None, 'route del default gw %s' % default_route,
                                    namespace=self.uuid)

                        util_process.execute(
                            None, 'route add default gw %(floating_router)s' % subst,
                            namespace=self.uuid)

                self.enable_nat()

        self.update_dhcp()

        # A final check to ensure we haven't raced with a delete
        if self.is_dead():
            raise DeadNetwork('network=%s' % self)
        self.state = self.STATE_CREATED
示例#28
0
def main():
    global DAEMON_IMPLEMENTATIONS
    global DAEMON_PIDS

    LOG.info('Starting...')
    setproctitle.setproctitle(
        daemon.process_name('main') + '-v%s' % util_general.get_version())

    # If you ran this, it means we're not shutting down any more
    n = Node.new(config.NODE_NAME, config.NODE_MESH_IP)
    n.state = Node.STATE_CREATED

    # Log configuration on startup
    for key, value in config.dict().items():
        LOG.info('Configuration item %s = %s' % (key, value))

    daemon.set_log_level(LOG, 'main')

    # Check in early and often, also reset processing queue items.
    etcd.clear_stale_locks()
    Node.observe_this_node()
    etcd.restart_queues()

    def _start_daemon(d):
        pid = os.fork()
        if pid == 0:
            try:
                DAEMON_IMPLEMENTATIONS[d].Monitor(d).run()
                sys.exit(0)
            except Exception as e:
                util_general.ignore_exception('daemon creation', e)
                sys.exit(1)

        DAEMON_PIDS[pid] = d
        LOG.with_field('pid', pid).info('Started %s' % d)

    # Resource usage publisher, we need this early because scheduling decisions
    # might happen quite early on.
    _start_daemon('resources')

    # If I am the network node, I need some setup
    if config.NODE_IS_NETWORK_NODE:
        # Bootstrap the floating network in the Networks table
        floating_network = net.Network.from_db('floating')
        if not floating_network:
            floating_network = net.Network.create_floating_network(
                config.FLOATING_NETWORK)

        subst = {
            'egress_bridge': util_network.get_safe_interface_name(
                'egr-br-%s' % config.NODE_EGRESS_NIC),
            'egress_nic': config.NODE_EGRESS_NIC
        }

        if not util_network.check_for_interface(subst['egress_bridge']):
            # NOTE(mikal): Adding the physical interface to the physical bridge
            # is considered outside the scope of the orchestration software as
            # it will cause the node to lose network connectivity. So instead
            # all we do is create a bridge if it doesn't exist and the wire
            # everything up to it. We can do egress NAT in that state, even if
            # floating IPs don't work.
            with util_general.RecordedOperation('create physical bridge', None):
                # No locking as read only
                ipm = IPManager.from_db('floating')
                subst['master_float'] = ipm.get_address_at_index(1)
                subst['netmask'] = ipm.netmask

                # We need to copy the MTU of the interface we are bridging to
                # or weird networking things happen.
                mtu = util_network.get_interface_mtu(config.NODE_EGRESS_NIC)

                util_network.create_interface(
                    subst['egress_bridge'], 'bridge', '', mtu=mtu)

                util_process.execute(None,
                                     'ip link set %(egress_bridge)s up' % subst)
                util_process.execute(None,
                                     'ip addr add %(master_float)s/%(netmask)s '
                                     'dev %(egress_bridge)s' % subst)

                util_process.execute(None,
                                     'iptables -A FORWARD -o %(egress_nic)s '
                                     '-i %(egress_bridge)s -j ACCEPT' % subst)
                util_process.execute(None,
                                     'iptables -A FORWARD -i %(egress_nic)s '
                                     '-o %(egress_bridge)s -j ACCEPT' % subst)
                util_process.execute(None,
                                     'iptables -t nat -A POSTROUTING '
                                     '-o %(egress_nic)s -j MASQUERADE' % subst)

    def _audit_daemons():
        running_daemons = []
        for pid in DAEMON_PIDS:
            running_daemons.append(DAEMON_PIDS[pid])

        for d in DAEMON_IMPLEMENTATIONS:
            if d not in running_daemons:
                _start_daemon(d)

        for d in list(DAEMON_PIDS):
            if not psutil.pid_exists(d):
                LOG.warning('%s pid is missing, restarting' % DAEMON_PIDS[d])
                _start_daemon(DAEMON_PIDS[d])

    _audit_daemons()
    restore_instances()

    running = True
    while True:
        time.sleep(5)

        try:
            wpid, _ = os.waitpid(-1, os.WNOHANG)
            while wpid != 0:
                LOG.warning('%s exited (pid %d)'
                            % (DAEMON_PIDS.get(wpid, 'unknown'), wpid))
                if wpid in DAEMON_PIDS:
                    del DAEMON_PIDS[wpid]
                wpid, _ = os.waitpid(-1, os.WNOHANG)

        except ChildProcessError:
            # We get this if there are no child processes
            pass

        n = Node.from_db(config.NODE_NAME)
        if n.state.value not in [Node.STATE_STOPPING, Node.STATE_STOPPED]:
            _audit_daemons()
            Node.observe_this_node()

        elif len(DAEMON_PIDS) == 0:
            n.state = Node.STATE_STOPPED
            return

        else:
            if running:
                for pid in DAEMON_PIDS:
                    try:
                        os.kill(pid, signal.SIGTERM)
                        LOG.info('Sent SIGTERM to %s (pid %s)'
                                 % (DAEMON_PIDS.get(pid, 'unknown'), pid))
                    except OSError as e:
                        LOG.warn('Failed to send SIGTERM to %s: %s' % (pid, e))

            running = False
示例#29
0
    def transcode_image(self, lock, b):
        # NOTE(mikal): it is assumed the caller holds a lock on the artifact, and passes
        # it in lock.

        # If this blob uuid is not the most recent index for the artifact, set that
        if self.__artifact.most_recent_index.get('blob_uuid') != b.uuid:
            self.__artifact.add_index(b.uuid)

        # Transcode if required, placing the transcoded file in a well known location.
        os.makedirs(os.path.join(config.STORAGE_PATH, 'image_cache'),
                    exist_ok=True)
        cached = util_general.file_permutation_exists(
            os.path.join(config.STORAGE_PATH, 'image_cache', b.uuid),
            ['iso', 'qcow2'])
        if cached:
            # We touch the file here, because we want to know when it was last used.
            pathlib.Path(cached).touch(exist_ok=True)

        else:
            blob_path = os.path.join(config.STORAGE_PATH, 'blobs', b.uuid)
            mimetype = b.info.get('mime-type', '')

            if mimetype in [
                    'application/x-cd-image', 'application/x-iso9660-image'
            ]:
                cache_path = os.path.join(config.STORAGE_PATH, 'image_cache',
                                          b.uuid + '.iso')
                util_general.link(blob_path, cache_path)

            else:
                if mimetype == 'application/gzip':
                    cache_path = os.path.join(config.STORAGE_PATH,
                                              'image_cache', b.uuid)
                    with util_general.RecordedOperation(
                            'decompress image', self.instance):
                        util_process.execute([lock],
                                             'gunzip -k -q -c %s > %s' %
                                             (blob_path, cache_path))
                    blob_path = cache_path

                cache_path = os.path.join(config.STORAGE_PATH, 'image_cache',
                                          b.uuid + '.qcow2')
                cache_info = util_image.identify(blob_path)

                # Convert the cluster size from qemu format to an int
                cluster_size_as_int = QCOW2_CLUSTER_SIZE
                if cluster_size_as_int.endswith('M'):
                    cluster_size_as_int = int(cluster_size_as_int[:-1]) * MiB
                elif cluster_size_as_int.endswith('K'):
                    cluster_size_as_int = int(cluster_size_as_int[:-1]) * KiB
                else:
                    cluster_size_as_int = int(cluster_size_as_int)

                if (cache_info.get('file format', '') == 'qcow2'
                        and cache_info.get('cluster_size',
                                           0) == cluster_size_as_int):
                    util_general.link(blob_path, cache_path)
                else:
                    with util_general.RecordedOperation(
                            'transcode image', self.instance):
                        self.log.with_object(b).info('Transcoding %s -> %s' %
                                                     (blob_path, cache_path))
                        util_image.create_qcow2([lock], blob_path, cache_path)

            shutil.chown(cache_path, config.LIBVIRT_USER, config.LIBVIRT_GROUP)
            self.log.with_fields(
                util_general.stat_log_fields(cache_path)).info(
                    'Cache file %s created' % cache_path)

        self.__artifact.state = Artifact.STATE_CREATED
示例#30
0
    def _create_common(self):
        # The floating network does not have a vxlan mesh
        if self.uuid == 'floating':
            return

        subst = self.subst_dict()

        if not util_network.check_for_interface(subst['vx_interface']):
            with util_general.RecordedOperation('create vxlan interface', self):
                util_network.create_interface(
                    subst['vx_interface'], 'vxlan',
                    'id %(vx_id)s dev %(mesh_interface)s dstport 0'
                    % subst)
                util_process.execute(None, 'sysctl -w net.ipv4.conf.'
                                     '%(vx_interface)s.arp_notify=1' % subst)

        if not util_network.check_for_interface(subst['vx_bridge']):
            with util_general.RecordedOperation('create vxlan bridge', self):
                util_network.create_interface(subst['vx_bridge'], 'bridge', '')
                util_process.execute(None, 'ip link set %(vx_interface)s '
                                     'master %(vx_bridge)s' % subst)
                util_process.execute(
                    None, 'ip link set %(vx_interface)s up' % subst)
                util_process.execute(
                    None, 'ip link set %(vx_bridge)s up' % subst)
                util_process.execute(None, 'sysctl -w net.ipv4.conf.'
                                     '%(vx_bridge)s.arp_notify=1' % subst)
                util_process.execute(
                    None, 'brctl setfd %(vx_bridge)s 0' % subst)
                util_process.execute(
                    None, 'brctl stp %(vx_bridge)s off' % subst)
                util_process.execute(
                    None, 'brctl setageing %(vx_bridge)s 0' % subst)