Exemplo n.º 1
0
    def deploy_nat(self):
        if not self.provide_nat:
            return

        subst = self.subst_dict()
        if not self.floating_gateway:
            with db.get_lock('ipmanager', None, 'floating', ttl=120):
                ipm = db.get_ipmanager('floating')
                self.floating_gateway = ipm.get_random_free_address()
                db.persist_ipmanager('floating', ipm.save())
                self.persist_floating_gateway()

        # No lock because no data changing
        ipm = db.get_ipmanager('floating')
        subst['floating_router'] = ipm.get_address_at_index(1)
        subst['floating_gateway'] = self.floating_gateway
        subst['floating_netmask'] = ipm.netmask

        with db.get_lock('network', None, self.uuid, ttl=120):
            if not subst['floating_gateway'] in list(
                    util.get_interface_addresses(
                        subst['netns'], subst['physical_veth_inner'])):
                with util.RecordedOperation('enable virtual routing', self):
                    util.execute(
                        None,
                        '%(in_netns)s ip addr add %(floating_gateway)s/%(floating_netmask)s '
                        'dev %(physical_veth_inner)s' % subst)
                    util.execute(
                        None,
                        '%(in_netns)s ip link set %(physical_veth_inner)s up' %
                        subst)
                    util.execute(
                        None,
                        '%(in_netns)s route add default gw %(floating_router)s'
                        % subst)

            if not util.nat_rules_for_ipblock(self.network_address):
                with util.RecordedOperation('enable nat', self):
                    util.execute(None,
                                 'echo 1 > /proc/sys/net/ipv4/ip_forward')
                    util.execute(
                        None,
                        '%(in_netns)s iptables -A FORWARD -o %(physical_veth_inner)s '
                        '-i %(vx_veth_inner)s -j ACCEPT' % subst)
                    util.execute(
                        None,
                        '%(in_netns)s iptables -A FORWARD -i %(physical_veth_inner)s '
                        '-o %(vx_veth_inner)s -j ACCEPT' % subst)
                    util.execute(
                        None,
                        '%(in_netns)s iptables -t nat -A POSTROUTING -s %(ipblock)s/%(netmask)s '
                        '-o %(physical_veth_inner)s -j MASQUERADE' % subst)
Exemplo n.º 2
0
    def delete(self):
        with util.RecordedOperation('delete domain', self):
            try:
                self.power_off()

                instance = self._get_domain()
                if instance:
                    instance.undefine()
            except Exception as e:
                util.ignore_exception('instance delete', e)

        with util.RecordedOperation('delete disks', self):
            try:
                if os.path.exists(self.instance_path):
                    shutil.rmtree(self.instance_path)
            except Exception as e:
                util.ignore_exception('instance delete', e)

        with util.RecordedOperation('release network addresses', self):
            for ni in db.get_instance_interfaces(self.db_entry['uuid']):
                db.update_network_interface_state(ni['uuid'], 'deleted')
                with db.get_lock('ipmanager',
                                 None,
                                 ni['network_uuid'],
                                 ttl=120,
                                 op='Instance delete'):
                    ipm = db.get_ipmanager(ni['network_uuid'])
                    ipm.release(ni['ipv4'])
                    db.persist_ipmanager(ni['network_uuid'], ipm.save())

        db.free_console_port(self.db_entry['console_port'])
        db.free_console_port(self.db_entry['vdi_port'])
Exemplo n.º 3
0
    def post(self, interface_uuid=None):
        ni = db.get_interface(interface_uuid)
        if not ni:
            return error(404, 'network interface not found')

        if not ni['floating']:
            return error(409, 'this interface does not have a floating ip')

        n = net.from_db(ni['network_uuid'])
        if not n:
            LOG.info('network(%s): network not found, genuinely missing' %
                     ni['network_uuid'])
            return error(404, 'network not found')

        if get_jwt_identity() not in [n.namespace, 'system']:
            LOG.info('%s: network not found, ownership test' % n)
            return error(404, 'network not found')

        i = virt.from_db(ni['instance_uuid'])
        if get_jwt_identity() not in [i.db_entry['namespace'], 'system']:
            LOG.info('%s: instance not found, ownership test' % i)
            return error(404, 'instance not found')

        float_net = net.from_db('floating')
        if not float_net:
            return error(404, 'floating network not found')

        db.add_event('interface', interface_uuid, 'api', 'defloat', None, None)
        with db.get_lock('sf/ipmanager/floating', ttl=120) as _:
            ipm = db.get_ipmanager('floating')
            ipm.release(ni['floating'])
            db.persist_ipmanager('floating', ipm.save())

        db.remove_floating_from_interface(ni['uuid'])
        n.remove_floating_ip(ni['floating'], ni['ipv4'])
Exemplo n.º 4
0
    def delete(self, network_uuid=None, network_from_db=None):
        db.add_event('network', network_uuid, 'api', 'delete', None, None)
        if network_uuid == 'floating':
            return error(403, 'you cannot delete the floating network')

        # We only delete unused networks
        if len(list(db.get_network_interfaces(network_uuid))) > 0:
            return error(403, 'you cannot delete an in use network')

        # Check if network has already been deleted
        if network_from_db['state'] == 'deleted':
            return error(404, 'network not found')

        with db.get_lock('sf/network/%s' % network_uuid, ttl=900) as _:
            n = net.from_db(network_uuid)
            n.remove_dhcp()
            n.delete()

            if n.floating_gateway:
                with db.get_lock('sf/ipmanager/floating', ttl=120) as _:
                    ipm = db.get_ipmanager('floating')
                    ipm.release(n.floating_gateway)
                    db.persist_ipmanager('floating', ipm.save())

            db.update_network_state(network_uuid, 'deleted')
Exemplo n.º 5
0
    def __init__(self,
                 uuid=None,
                 vxlan_id=1,
                 provide_dhcp=False,
                 provide_nat=False,
                 physical_nic='eth0',
                 ipblock=None,
                 floating_gateway=None,
                 namespace=None):
        self.uuid = uuid
        self.vxlan_id = vxlan_id
        self.provide_dhcp = provide_dhcp
        self.provide_nat = provide_nat
        self.physical_nic = physical_nic
        self.floating_gateway = floating_gateway
        self.namespace = namespace

        with db.get_lock('ipmanager', None, self.uuid, ttl=120):
            ipm = db.get_ipmanager(self.uuid)

            self.ipblock = ipm.network_address
            self.router = ipm.get_address_at_index(1)
            self.dhcp_start = ipm.get_address_at_index(2)
            self.netmask = ipm.netmask
            self.broadcast = ipm.broadcast_address
            self.network_address = ipm.network_address

            ipm.reserve(self.router)
            db.persist_ipmanager(self.uuid, ipm.save())
Exemplo n.º 6
0
    def delete(self):
        with util.RecordedOperation('delete domain', self) as _:
            try:
                self.power_off()

                instance = self._get_domain()
                instance.undefine()
            except Exception:
                pass

        with util.RecordedOperation('delete disks', self) as _:
            try:
                shutil.rmtree(self.instance_path)
            except Exception:
                pass

        with util.RecordedOperation('release network addreses', self) as _:
            for ni in db.get_instance_interfaces(self.db_entry['uuid']):
                with db.get_lock('sf/ipmanager/%s' % ni['network_uuid'],
                                 ttl=120) as _:
                    ipm = db.get_ipmanager(ni['network_uuid'])
                    ipm.release(ni['ipv4'])
                    db.persist_ipmanager(ni['network_uuid'], ipm.save())

        db.update_instance_state(self.db_entry['uuid'], 'deleted')
        db.free_console_port(self.db_entry['console_port'])
        db.free_console_port(self.db_entry['vdi_port'])
Exemplo n.º 7
0
 def error_with_cleanup(status_code, message):
     for network_uuid in allocations:
         n = net.from_db(network_uuid)
         for addr, _ in allocations[network_uuid]:
             with db.get_lock('sf/ipmanager/%s' % n.uuid, ttl=120) as _:
                 ipm = db.get_ipmanager(n.uuid)
                 ipm.release(addr)
                 db.persist_ipmanager(n.uuid, ipm.save())
     return error(status_code, message)
Exemplo n.º 8
0
def _delete_network(network_from_db):
    network_uuid = network_from_db['uuid']
    db.add_event('network', network_uuid, 'api', 'delete', None, None)

    n = net.from_db(network_uuid)
    n.remove_dhcp()
    n.delete()

    if n.floating_gateway:
        with db.get_lock('ipmanager', None, 'floating', ttl=120):
            ipm = db.get_ipmanager('floating')
            ipm.release(n.floating_gateway)
            db.persist_ipmanager('floating', ipm.save())

    db.update_network_state(network_uuid, 'deleted')
Exemplo n.º 9
0
    def post(self, interface_uuid=None):
        ni, n, err = _safe_get_network_interface(interface_uuid)
        if err:
            return err

        float_net = net.from_db('floating')
        if not float_net:
            return error(404, 'floating network not found')

        db.add_event('interface', interface_uuid, 'api', 'defloat', None, None)
        with db.get_lock('ipmanager', None, 'floating', ttl=120):
            ipm = db.get_ipmanager('floating')
            ipm.release(ni['floating'])
            db.persist_ipmanager('floating', ipm.save())

        db.remove_floating_from_interface(ni['uuid'])
        n.remove_floating_ip(ni['floating'], ni['ipv4'])
Exemplo n.º 10
0
    def post(self, interface_uuid=None):
        ni, n, err = _safe_get_network_interface(interface_uuid)
        if err:
            return err

        float_net = net.from_db('floating')
        if not float_net:
            return error(404, 'floating network not found')

        db.add_event('interface', interface_uuid, 'api', 'float', None, None)
        with db.get_lock('ipmanager', None, 'floating', ttl=120):
            ipm = db.get_ipmanager('floating')
            addr = ipm.get_random_free_address()
            db.persist_ipmanager('floating', ipm.save())

        db.add_floating_to_interface(ni['uuid'], addr)
        n.add_floating_ip(addr, ni['ipv4'])
Exemplo n.º 11
0
    def from_db(uuid):
        db_data = db.get_ipmanager(uuid)

        if 'ipmanager.v3' in db_data:
            ipm = IPManager(**db_data['ipmanager.v3'])
        else:
            db_data['ipmanager.v3'] = {}
            db_data['ipmanager.v3']['ipblock'] = db_data['ipmanager.v2'][
                'ipblock']
            db_data['ipmanager.v3']['in_use'] = {}

            for addr in db_data['ipmanager.v2']['in_use']:
                db_data['ipmanager.v3']['in_use'][addr] = {
                    'user': db_data['ipmanager.v2']['in_use'][addr],
                    'when': time.time()
                }
            ipm = IPManager(**db_data['ipmanager.v3'])

        return ipm
Exemplo n.º 12
0
    def __init__(self, db_entry):
        self.db_entry = db_entry
        self.physical_nic = config.get('NODE_EGRESS_NIC')

        with db.get_lock('ipmanager',
                         None,
                         self.db_entry['uuid'],
                         ttl=120,
                         op='Network object initialization'):
            ipm = db.get_ipmanager(self.db_entry['uuid'])

            self.ipblock = ipm.network_address
            self.router = ipm.get_address_at_index(1)
            self.dhcp_start = ipm.get_address_at_index(2)
            self.netmask = ipm.netmask
            self.broadcast = ipm.broadcast_address
            self.network_address = ipm.network_address

            ipm.reserve(self.router)
            db.persist_ipmanager(self.db_entry['uuid'], ipm.save())
Exemplo n.º 13
0
    def delete(self):
        subst = self.subst_dict()
        LOG.withFields(subst).debug('net.delete()')

        # Cleanup local node
        with db.get_object_lock(self, ttl=120, op='Network delete'):
            if util.check_for_interface(subst['vx_bridge']):
                with util.RecordedOperation('delete vxlan bridge', self):
                    util.execute(None, 'ip link delete %(vx_bridge)s' % subst)

            if util.check_for_interface(subst['vx_interface']):
                with util.RecordedOperation('delete vxlan interface', self):
                    util.execute(None,
                                 'ip link delete %(vx_interface)s' % subst)

            # If this is the network node do additional cleanup
            if util.is_network_node():
                if util.check_for_interface(subst['vx_veth_outer']):
                    with util.RecordedOperation('delete router veth', self):
                        util.execute(
                            None, 'ip link delete %(vx_veth_outer)s' % subst)

                if util.check_for_interface(subst['physical_veth_outer']):
                    with util.RecordedOperation('delete physical veth', self):
                        util.execute(
                            None,
                            'ip link delete %(physical_veth_outer)s' % subst)

                if os.path.exists('/var/run/netns/%(netns)s' % subst):
                    with util.RecordedOperation('delete netns', self):
                        util.execute(None, 'ip netns del %(netns)s' % subst)

                if self.db_entry['floating_gateway']:
                    with db.get_lock('ipmanager',
                                     None,
                                     'floating',
                                     ttl=120,
                                     op='Network delete'):
                        ipm = db.get_ipmanager('floating')
                        ipm.release(self.db_entry['floating_gateway'])
                        db.persist_ipmanager('floating', ipm.save())
Exemplo n.º 14
0
    def delete(self):
        subst = self.subst_dict()

        # Cleanup local node
        with db.get_lock('sf/net/%s' % self.uuid, ttl=120) as _:
            if util.check_for_interface(subst['vx_bridge']):
                with util.RecordedOperation('delete vxlan bridge', self) as _:
                    processutils.execute('ip link delete %(vx_bridge)s' % subst,
                                         shell=True)

            if util.check_for_interface(subst['vx_interface']):
                with util.RecordedOperation('delete vxlan interface', self) as _:
                    processutils.execute('ip link delete %(vx_interface)s' % subst,
                                         shell=True)

            # If this is the network node do additional cleanup
            if config.parsed.get('NODE_IP') == config.parsed.get('NETWORK_NODE_IP'):
                if util.check_for_interface(subst['vx_veth_outer']):
                    with util.RecordedOperation('delete router veth', self) as _:
                        processutils.execute('ip link delete %(vx_veth_outer)s' % subst,
                                             shell=True)

                if util.check_for_interface(subst['physical_veth_outer']):
                    with util.RecordedOperation('delete physical veth', self) as _:
                        processutils.execute('ip link delete %(physical_veth_outer)s' % subst,
                                             shell=True)

                if os.path.exists('/var/run/netns/%(netns)s' % subst):
                    with util.RecordedOperation('delete netns', self) as _:
                        processutils.execute('ip netns del %(netns)s' % subst,
                                             shell=True)

                if self.floating_gateway:
                    with db.get_lock('sf/ipmanager/floating', ttl=120) as _:
                        ipm = db.get_ipmanager('floating')
                        ipm.release(self.floating_gateway)
                        db.persist_ipmanager('floating', ipm.save())
Exemplo n.º 15
0
    def post(self,
             name=None,
             cpus=None,
             memory=None,
             network=None,
             disk=None,
             ssh_key=None,
             user_data=None,
             placed_on=None,
             namespace=None,
             instance_uuid=None):
        global SCHEDULER

        # We need to sanitise the name so its safe for DNS
        name = re.sub(r'([^a-zA-Z0-9_\-])', '', name)

        if not namespace:
            namespace = get_jwt_identity()

        # If accessing a foreign namespace, we need to be an admin
        if get_jwt_identity() not in [namespace, 'system']:
            return error(
                401,
                'only admins can create resources in a different namespace')

        # The instance needs to exist in the DB before network interfaces are created
        if not instance_uuid:
            instance_uuid = str(uuid.uuid4())
            db.add_event('instance', instance_uuid, 'uuid allocated', None,
                         None, None)

        # Create instance object
        instance = virt.from_db(instance_uuid)
        if instance:
            if get_jwt_identity() not in [
                    instance.db_entry['namespace'], 'system'
            ]:
                LOG.info('instance(%s): instance not found, ownership test' %
                         instance_uuid)
                return error(404, 'instance not found')

        if not instance:
            instance = virt.from_definition(uuid=instance_uuid,
                                            name=name,
                                            disks=disk,
                                            memory_mb=memory,
                                            vcpus=cpus,
                                            ssh_key=ssh_key,
                                            user_data=user_data,
                                            owner=namespace)

        if not SCHEDULER:
            SCHEDULER = scheduler.Scheduler()

        # Have we been placed?
        if not placed_on:
            candidates = SCHEDULER.place_instance(instance, network)
            if len(candidates) == 0:
                db.add_event('instance', instance_uuid, 'schedule', 'failed',
                             None, 'insufficient resources')
                db.update_instance_state(instance_uuid, 'error')
                return error(507, 'insufficient capacity')

            placed_on = candidates[0]
            db.place_instance(instance_uuid, placed_on)
            db.add_event('instance', instance_uuid, 'placement', None, None,
                         placed_on)

        else:
            try:
                candidates = SCHEDULER.place_instance(instance,
                                                      network,
                                                      candidates=[placed_on])
                if len(candidates) == 0:
                    db.add_event('instance', instance_uuid, 'schedule',
                                 'failed', None, 'insufficient resources')
                    db.update_instance_state(instance_uuid, 'error')
                    return error(507, 'insufficient capacity')
            except scheduler.CandidateNodeNotFoundException as e:
                return error(404, 'node not found: %s' % e)

        # Have we been placed on a different node?
        if not placed_on == config.parsed.get('NODE_NAME'):
            body = flask_get_post_body()
            body['placed_on'] = placed_on
            body['instance_uuid'] = instance_uuid
            body['namespace'] = namespace

            token = util.get_api_token(
                'http://%s:%d' % (placed_on, config.parsed.get('API_PORT')),
                namespace=namespace)
            r = requests.request('POST',
                                 'http://%s:%d/instances' %
                                 (placed_on, config.parsed.get('API_PORT')),
                                 data=json.dumps(body),
                                 headers={
                                     'Authorization': token,
                                     'User-Agent': util.get_user_agent()
                                 })

            LOG.info('Returning proxied request: %d, %s' %
                     (r.status_code, r.text))
            resp = flask.Response(r.text, mimetype='application/json')
            resp.status_code = r.status_code
            return resp

        # Check we can get the required IPs
        nets = {}
        allocations = {}

        def error_with_cleanup(status_code, message):
            for network_uuid in allocations:
                n = net.from_db(network_uuid)
                for addr, _ in allocations[network_uuid]:
                    with db.get_lock('sf/ipmanager/%s' % n.uuid, ttl=120) as _:
                        ipm = db.get_ipmanager(n.uuid)
                        ipm.release(addr)
                        db.persist_ipmanager(n.uuid, ipm.save())
            return error(status_code, message)

        order = 0
        if network:
            for netdesc in network:
                if 'network_uuid' not in netdesc or not netdesc['network_uuid']:
                    return error_with_cleanup(404, 'network not specified')

                if netdesc['network_uuid'] not in nets:
                    n = net.from_db(netdesc['network_uuid'])
                    if not n:
                        return error_with_cleanup(
                            404,
                            'network %s not found' % netdesc['network_uuid'])
                    nets[netdesc['network_uuid']] = n
                    n.create()

                with db.get_lock('sf/ipmanager/%s' % netdesc['network_uuid'],
                                 ttl=120) as _:
                    db.add_event('network', netdesc['network_uuid'],
                                 'allocate address', None, None, instance_uuid)
                    allocations.setdefault(netdesc['network_uuid'], [])
                    ipm = db.get_ipmanager(netdesc['network_uuid'])
                    if 'address' not in netdesc or not netdesc['address']:
                        netdesc['address'] = ipm.get_random_free_address()
                    else:
                        if not ipm.reserve(netdesc['address']):
                            return error_with_cleanup(
                                409, 'address %s in use' % netdesc['address'])
                    db.persist_ipmanager(netdesc['network_uuid'], ipm.save())
                    allocations[netdesc['network_uuid']].append(
                        (netdesc['address'], order))

                if 'model' not in netdesc or not netdesc['model']:
                    netdesc['model'] = 'virtio'

                db.create_network_interface(str(uuid.uuid4()), netdesc,
                                            instance_uuid, order)

                order += 1

        # Initialise metadata
        db.persist_metadata('instance', instance_uuid, {})

        # Now we can start the instance
        with db.get_lock('sf/instance/%s' % instance.db_entry['uuid'],
                         ttl=900) as lock:
            with util.RecordedOperation('ensure networks exist',
                                        instance) as _:
                for network_uuid in nets:
                    n = nets[network_uuid]
                    n.ensure_mesh()
                    n.update_dhcp()

            with util.RecordedOperation('instance creation', instance) as _:
                instance.create(lock=lock)

            for iface in db.get_instance_interfaces(instance.db_entry['uuid']):
                db.update_network_interface_state(iface['uuid'], 'created')

            return db.get_instance(instance_uuid)
Exemplo n.º 16
0
    def deploy_nat(self):
        if not self.db_entry['provide_nat']:
            return

        subst = self.subst_dict()
        if not self.db_entry['floating_gateway']:
            with db.get_lock('ipmanager',
                             None,
                             'floating',
                             ttl=120,
                             op='Network deploy NAT'):
                ipm = db.get_ipmanager('floating')
                self.db_entry[
                    'floating_gateway'] = ipm.get_random_free_address()
                db.persist_ipmanager('floating', ipm.save())
                self.persist_floating_gateway()

        # No lock because no data changing
        ipm = db.get_ipmanager('floating')
        subst['floating_router'] = ipm.get_address_at_index(1)
        subst['floating_gateway'] = self.db_entry['floating_gateway']
        subst['floating_netmask'] = ipm.netmask

        with db.get_object_lock(self, ttl=120, op='Network deploy NAT'):
            # Ensure network was not deleted whilst waiting for the lock.
            if self.is_dead():
                raise DeadNetwork('network=%s' % self)

            with util.RecordedOperation('enable virtual routing', self):
                addresses = util.get_interface_addresses(
                    subst['netns'], subst['physical_veth_inner'])
                if not subst['floating_gateway'] in list(addresses):
                    util.execute(
                        None, '%(in_netns)s ip addr add '
                        '%(floating_gateway)s/%(floating_netmask)s '
                        'dev %(physical_veth_inner)s' % subst)
                    util.execute(
                        None, '%(in_netns)s ip link set '
                        '%(physical_veth_inner)s up' % subst)

                default_routes = util.get_default_routes(subst['netns'])
                if default_routes != [subst['floating_router']]:
                    if default_routes:
                        for default_route in default_routes:
                            util.execute(
                                None, '%s route del default gw %s' %
                                (subst['in_netns'], default_route))

                    util.execute(
                        None, '%(in_netns)s route add default '
                        'gw %(floating_router)s' % subst)

            if not util.nat_rules_for_ipblock(self.network_address):
                with util.RecordedOperation('enable nat', self):
                    util.execute(None,
                                 'echo 1 > /proc/sys/net/ipv4/ip_forward')
                    util.execute(
                        None, '%(in_netns)s iptables -A FORWARD '
                        '-o %(physical_veth_inner)s '
                        '-i %(vx_veth_inner)s -j ACCEPT' % subst)
                    util.execute(
                        None, '%(in_netns)s iptables -A FORWARD '
                        '-i %(physical_veth_inner)s '
                        '-o %(vx_veth_inner)s -j ACCEPT' % subst)
                    util.execute(
                        None, '%(in_netns)s iptables -t nat -A POSTROUTING '
                        '-s %(ipblock)s/%(netmask)s '
                        '-o %(physical_veth_inner)s '
                        '-j MASQUERADE' % subst)
Exemplo n.º 17
0
def main():
    global DAEMON_IMPLEMENTATIONS
    global DAEMON_PIDS

    setproctitle.setproctitle(daemon.process_name('main'))

    # Log configuration on startup
    for key, value in config.dict().items():
        LOG.info('Configuration item %s = %s' % (key, value))

    daemon.set_log_level(LOG, 'main')

    # Check in early and often, also reset processing queue items
    db.clear_stale_locks()
    db.see_this_node()
    db.restart_queues()

    def _start_daemon(d):
        pid = os.fork()
        if pid == 0:
            DAEMON_IMPLEMENTATIONS[d].Monitor(d).run()
        DAEMON_PIDS[pid] = d
        LOG.withField('pid', pid).info('Started %s' % d)

    # Resource usage publisher, we need this early because scheduling decisions
    # might happen quite early on.
    _start_daemon('resources')

    # If I am the network node, I need some setup
    if util.is_network_node():
        # Bootstrap the floating network in the Networks table
        floating_network = db.get_network('floating')
        if not floating_network:
            db.create_floating_network(config.get('FLOATING_NETWORK'))
            floating_network = net.from_db('floating')

        subst = {
            'physical_bridge':
            util.get_safe_interface_name('phy-br-%s' %
                                         config.get('NODE_EGRESS_NIC')),
            'physical_nic':
            config.get('NODE_EGRESS_NIC')
        }

        if not util.check_for_interface(subst['physical_bridge']):
            # NOTE(mikal): Adding the physical interface to the physical bridge
            # is considered outside the scope of the orchestration software as
            # it will cause the node to lose network connectivity. So instead
            # all we do is create a bridge if it doesn't exist and the wire
            # everything up to it. We can do egress NAT in that state, even if
            # floating IPs don't work.
            with util.RecordedOperation('create physical bridge', None):
                # No locking as read only
                ipm = db.get_ipmanager('floating')
                subst['master_float'] = ipm.get_address_at_index(1)
                subst['netmask'] = ipm.netmask

                util.create_interface(subst['physical_bridge'], 'bridge', '')
                util.execute(None,
                             'ip link set %(physical_bridge)s up' % subst)
                util.execute(
                    None, 'ip addr add %(master_float)s/%(netmask)s '
                    'dev %(physical_bridge)s' % subst)

                util.execute(
                    None, 'iptables -A FORWARD -o %(physical_nic)s '
                    '-i %(physical_bridge)s -j ACCEPT' % subst)
                util.execute(
                    None, 'iptables -A FORWARD -i %(physical_nic)s '
                    '-o %(physical_bridge)s -j ACCEPT' % subst)
                util.execute(
                    None, 'iptables -t nat -A POSTROUTING '
                    '-o %(physical_nic)s -j MASQUERADE' % subst)

    def _audit_daemons():
        running_daemons = []
        for pid in DAEMON_PIDS:
            running_daemons.append(DAEMON_PIDS[pid])

        for d in DAEMON_IMPLEMENTATIONS:
            if d not in running_daemons:
                _start_daemon(d)

        for d in DAEMON_PIDS:
            if not psutil.pid_exists(d):
                LOG.warning('%s pid is missing, restarting' % DAEMON_PIDS[d])
                _start_daemon(DAEMON_PIDS[d])

    _audit_daemons()
    restore_instances()

    while True:
        time.sleep(10)

        wpid, _ = os.waitpid(-1, os.WNOHANG)
        while wpid != 0:
            LOG.warning('%s died (pid %d)' %
                        (DAEMON_PIDS.get(wpid, 'unknown'), wpid))
            del DAEMON_PIDS[wpid]
            wpid, _ = os.waitpid(-1, os.WNOHANG)

        _audit_daemons()
        db.see_this_node()
Exemplo n.º 18
0
    def post(self,
             name=None,
             cpus=None,
             memory=None,
             network=None,
             disk=None,
             ssh_key=None,
             user_data=None,
             placed_on=None,
             namespace=None,
             instance_uuid=None,
             video=None):
        global SCHEDULER

        # Check that the instance name is safe for use as a DNS host name
        if name != re.sub(r'([^a-zA-Z0-9_\-])', '', name) or len(name) > 63:
            return error(400,
                         'instance name must be useable as a DNS host name')

        # Sanity check
        if not disk:
            return error(400, 'instance must specify at least one disk')
        for d in disk:
            if not isinstance(d, dict):
                return error(400,
                             'disk specification should contain JSON objects')

        if network:
            for n in network:
                if not isinstance(n, dict):
                    return error(
                        400,
                        'network specification should contain JSON objects')

                if 'network_uuid' not in n:
                    return error(
                        400, 'network specification is missing network_uuid')

        if not video:
            video = {'model': 'cirrus', 'memory': 16384}

        if not namespace:
            namespace = get_jwt_identity()

        # Only system can specify a uuid
        if instance_uuid and get_jwt_identity() != 'system':
            return error(401, 'only system can specify an instance uuid')

        # If accessing a foreign namespace, we need to be an admin
        if get_jwt_identity() not in [namespace, 'system']:
            return error(
                401,
                'only admins can create resources in a different namespace')

        # The instance needs to exist in the DB before network interfaces are created
        if not instance_uuid:
            instance_uuid = str(uuid.uuid4())
            db.add_event('instance', instance_uuid, 'uuid allocated', None,
                         None, None)

        # Create instance object
        instance = virt.from_db(instance_uuid)
        if instance:
            if get_jwt_identity() not in [
                    instance.db_entry['namespace'], 'system'
            ]:
                logutil.info([virt.ThinInstance(instance_uuid)],
                             'Instance not found, ownership test')
                return error(404, 'instance not found')

        if not instance:
            instance = virt.from_definition(uuid=instance_uuid,
                                            name=name,
                                            disks=disk,
                                            memory_mb=memory,
                                            vcpus=cpus,
                                            ssh_key=ssh_key,
                                            user_data=user_data,
                                            owner=namespace,
                                            video=video,
                                            requested_placement=placed_on)

        # Initialise metadata
        db.persist_metadata('instance', instance_uuid, {})

        # Allocate IP addresses
        order = 0
        if network:
            for netdesc in network:
                n = net.from_db(netdesc['network_uuid'])
                if not n:
                    db.enqueue_instance_delete(
                        config.parsed.get('NODE_NAME'), instance_uuid, 'error',
                        'missing network %s during IP allocation phase' %
                        netdesc['network_uuid'])
                    return error(
                        404, 'network %s not found' % netdesc['network_uuid'])

                with db.get_lock('ipmanager',
                                 None,
                                 netdesc['network_uuid'],
                                 ttl=120):
                    db.add_event('network', netdesc['network_uuid'],
                                 'allocate address', None, None, instance_uuid)
                    ipm = db.get_ipmanager(netdesc['network_uuid'])
                    if 'address' not in netdesc or not netdesc['address']:
                        netdesc['address'] = ipm.get_random_free_address()
                    else:
                        if not ipm.reserve(netdesc['address']):
                            db.enqueue_instance_delete(
                                config.parsed.get('NODE_NAME'), instance_uuid,
                                'error',
                                'failed to reserve an IP on network %s' %
                                netdesc['network_uuid'])
                            return error(
                                409, 'address %s in use' % netdesc['address'])

                    db.persist_ipmanager(netdesc['network_uuid'], ipm.save())

                if 'model' not in netdesc or not netdesc['model']:
                    netdesc['model'] = 'virtio'

                db.create_network_interface(str(uuid.uuid4()), netdesc,
                                            instance_uuid, order)

        if not SCHEDULER:
            SCHEDULER = scheduler.Scheduler()

        try:
            # Have we been placed?
            if not placed_on:
                candidates = SCHEDULER.place_instance(instance, network)
                placement = candidates[0]

            else:
                SCHEDULER.place_instance(instance,
                                         network,
                                         candidates=[placed_on])
                placement = placed_on

        except exceptions.LowResourceException as e:
            db.add_event('instance', instance_uuid, 'schedule', 'failed', None,
                         'insufficient resources: ' + str(e))
            db.enqueue_instance_delete(config.parsed.get('NODE_NAME'),
                                       instance_uuid, 'error',
                                       'scheduling failed')
            return error(507, str(e))

        except exceptions.CandidateNodeNotFoundException as e:
            db.add_event('instance', instance_uuid, 'schedule', 'failed', None,
                         'candidate node not found: ' + str(e))
            db.enqueue_instance_delete(config.get.parsed('NODE_NAME'),
                                       instance_uuid, 'error',
                                       'scheduling failed')
            return error(404, 'node not found: %s' % e)

        # Record placement
        db.place_instance(instance_uuid, placement)
        db.add_event('instance', instance_uuid, 'placement', None, None,
                     placement)

        # Create a queue entry for the instance start
        tasks = [{
            'type': 'instance_preflight',
            'instance_uuid': instance_uuid,
            'network': network
        }]
        for disk in instance.db_entry['block_devices']['devices']:
            if 'base' in disk and disk['base']:
                tasks.append({
                    'type': 'image_fetch',
                    'instance_uuid': instance_uuid,
                    'url': disk['base']
                })
        tasks.append({
            'type': 'instance_start',
            'instance_uuid': instance_uuid,
            'network': network
        })

        # Enqueue creation tasks on desired node task queue
        db.enqueue(placement, {'tasks': tasks})
        db.add_event('instance', instance_uuid, 'create', 'enqueued', None,
                     None)

        # Watch for a while and return results if things are fast, give up
        # after a while and just return the current state
        start_time = time.time()
        while time.time() - start_time < config.parsed.get('API_ASYNC_WAIT'):
            i = db.get_instance(instance_uuid)
            if i['state'] in ['created', 'deleted', 'error']:
                return i
            time.sleep(0.5)
        return i
Exemplo n.º 19
0
def main():
    # Log configuration on startup
    for key in config.parsed.config:
        LOG.info('Configuration item %s = %s' % (key, config.parsed.get(key)))

    # Check in early and often
    db.see_this_node()

    # Resource usage publisher, we need this early because scheduling decisions
    # might happen quite early on.
    resource_pid = os.fork()
    if resource_pid == 0:
        resource_daemon.monitor().run()

    # If I am the network node, I need some setup
    if config.parsed.get('NODE_IP') == config.parsed.get('NETWORK_NODE_IP'):
        # Bootstrap the floating network in the Networks table
        floating_network = db.get_network('floating')
        if not floating_network:
            db.create_floating_network(config.parsed.get('FLOATING_NETWORK'))
            floating_network = net.from_db('floating')

        subst = {
            'physical_bridge':
            'phy-br-%s' % config.parsed.get('NODE_EGRESS_NIC'),
            'physical_nic': config.parsed.get('NODE_EGRESS_NIC')
        }

        if not util.check_for_interface(subst['physical_bridge']):
            # NOTE(mikal): Adding the physical interface to the physical bridge
            # is considered outside the scope of the orchestration software as it
            # will cause the node to lose network connectivity. So instead all we
            # do is create a bridge if it doesn't exist and the wire everything up
            # to it. We can do egress NAT in that state, even if floating IPs
            # don't work.
            with util.RecordedOperation('create physical bridge',
                                        'startup') as _:
                # No locking as read only
                ipm = db.get_ipmanager('floating')
                subst['master_float'] = ipm.get_address_at_index(1)
                subst['netmask'] = ipm.netmask

                processutils.execute(
                    'ip link add %(physical_bridge)s type bridge' % subst,
                    shell=True)
                processutils.execute('ip link set %(physical_bridge)s up' %
                                     subst,
                                     shell=True)
                processutils.execute(
                    'ip addr add %(master_float)s/%(netmask)s dev %(physical_bridge)s'
                    % subst,
                    shell=True)

                processutils.execute(
                    'iptables -A FORWARD -o %(physical_nic)s -i %(physical_bridge)s -j ACCEPT'
                    % subst,
                    shell=True)
                processutils.execute(
                    'iptables -A FORWARD -i %(physical_nic)s -o %(physical_bridge)s -j ACCEPT'
                    % subst,
                    shell=True)
                processutils.execute(
                    'iptables -t nat -A POSTROUTING -o %(physical_nic)s -j MASQUERADE'
                    % subst,
                    shell=True)

    # Network mesh maintenance
    net_pid = os.fork()
    if net_pid == 0:
        net_daemon.monitor().run()

    # Old object deleter
    cleaner_pid = os.fork()
    if cleaner_pid == 0:
        cleaner_daemon.monitor().run()

    # REST API
    external_api_pid = os.fork()
    if external_api_pid == 0:
        external_api_daemon.monitor().run()

    # Triggers
    trigger_pid = os.fork()
    if trigger_pid == 0:
        trigger_daemon.monitor().run()

    setproctitle.setproctitle('sf main')
    LOG.info('network monitor pid is %d' % net_pid)
    LOG.info('external api pid is %d' % external_api_pid)
    LOG.info('resources monitor pid is %d' % resource_pid)
    LOG.info('cleaner pid is %d' % cleaner_pid)
    LOG.info('trigger pid is %d' % trigger_pid)

    restore_instances()

    procnames = {
        external_api_pid: 'external api',
        net_pid: 'network monitor',
        resource_pid: 'resource monitor',
        cleaner_pid: 'cleaner'
    }

    while True:
        time.sleep(10)
        wpid, _ = os.waitpid(-1, os.WNOHANG)
        if wpid != 0:
            LOG.warning('Subprocess %d (%s) died' %
                        (wpid, procnames.get(wpid, 'unknown')))