def _create_common(self): # The floating network does not have a vxlan mesh if self.uuid == 'floating': return subst = self.subst_dict() if not util_network.check_for_interface(subst['vx_interface']): with util_general.RecordedOperation('create vxlan interface', self): util_network.create_interface( subst['vx_interface'], 'vxlan', 'id %(vx_id)s dev %(mesh_interface)s dstport 0' % subst) util_process.execute(None, 'sysctl -w net.ipv4.conf.' '%(vx_interface)s.arp_notify=1' % subst) if not util_network.check_for_interface(subst['vx_bridge']): with util_general.RecordedOperation('create vxlan bridge', self): util_network.create_interface(subst['vx_bridge'], 'bridge', '') util_process.execute(None, 'ip link set %(vx_interface)s ' 'master %(vx_bridge)s' % subst) util_process.execute( None, 'ip link set %(vx_interface)s up' % subst) util_process.execute( None, 'ip link set %(vx_bridge)s up' % subst) util_process.execute(None, 'sysctl -w net.ipv4.conf.' '%(vx_bridge)s.arp_notify=1' % subst) util_process.execute( None, 'brctl setfd %(vx_bridge)s 0' % subst) util_process.execute( None, 'brctl stp %(vx_bridge)s off' % subst) util_process.execute( None, 'brctl setageing %(vx_bridge)s 0' % subst)
def delete_on_hypervisor(self): with self.get_lock(op='Network delete'): subst = self.subst_dict() if util_network.check_for_interface(subst['vx_bridge']): with util_general.RecordedOperation('delete vxlan bridge', self): util_process.execute( None, 'ip link delete %(vx_bridge)s' % subst) if util_network.check_for_interface(subst['vx_interface']): with util_general.RecordedOperation('delete vxlan interface', self): util_process.execute( None, 'ip link delete %(vx_interface)s' % subst)
def create(self, iface_uuids, lock=None): self.state = self.STATE_CREATING self.interfaces = iface_uuids # Ensure we have state on disk os.makedirs(self.instance_path, exist_ok=True) # Configure block devices, include config drive creation self._configure_block_devices(lock) # Create the actual instance. Sometimes on Ubuntu 20.04 we need to wait # for port binding to work. Revisiting this is tracked by issue 320 on # github. with util_general.RecordedOperation('create domain', self): if not self.power_on(): attempts = 0 while not self.power_on() and attempts < 5: self.log.warning( 'Instance required an additional attempt to power on') time.sleep(5) attempts += 1 if self.is_powered_on(): self.log.info('Instance now powered on') self.state = self.STATE_CREATED else: self.log.info('Instance failed to power on') self.enqueue_delete_due_error('Instance failed to power on')
def remove_dhcp(self): if config.NODE_IS_NETWORK_NODE: subst = self.subst_dict() with util_general.RecordedOperation('remove dhcp', self): with self.get_lock(op='Network remove DHCP'): d = dhcp.DHCP(self, subst['vx_veth_inner']) d.remove_dhcpd() else: etcd.enqueue('networknode', RemoveDHCPNetworkTask(self.uuid))
def delete_on_network_node(self): with self.get_lock(op='Network delete'): subst = self.subst_dict() if util_network.check_for_interface(subst['vx_veth_outer']): with util_general.RecordedOperation('delete router veth', self): util_process.execute( None, 'ip link delete %(vx_veth_outer)s' % subst) if util_network.check_for_interface(subst['egress_veth_outer']): with util_general.RecordedOperation('delete egress veth', self): util_process.execute( None, 'ip link delete %(egress_veth_outer)s' % subst) if os.path.exists('/var/run/netns/%s' % self.uuid): with util_general.RecordedOperation('delete netns', self): util_process.execute( None, 'ip netns del %s' % self.uuid) if self.floating_gateway: with db.get_lock('ipmanager', None, 'floating', ttl=120, op='Network delete'): ipm = IPManager.from_db('floating') ipm.release(self.floating_gateway) ipm.persist() self.update_floating_gateway(None) self.state = self.STATE_DELETED # Ensure that all hypervisors remove this network. This is really # just catching strays, apart from on the network node where we # absolutely need to do this thing. for hyp in Nodes([active_nodes]): etcd.enqueue(hyp.uuid, {'tasks': [ HypervisorDestroyNetworkTask(self.uuid) ]}) self.remove_dhcp() self.remove_nat() ipm = IPManager.from_db(self.uuid) ipm.delete()
def delete(self): # Mark files we used in the image cache as recently used so that they # linger a little for possible future users. for disk in self.block_devices.get('devices', []): if 'blob_uuid' in disk and disk['blob_uuid']: cached_image_path = util_general.file_permutation_exists( os.path.join(config.STORAGE_PATH, 'image_cache', disk['blob_uuid']), ['iso', 'qcow2']) if cached_image_path: pathlib.Path(cached_image_path).touch(exist_ok=True) with util_general.RecordedOperation('delete domain', self): try: self.power_off() nvram_path = os.path.join(self.instance_path, 'nvram') if os.path.exists(nvram_path): os.unlink(nvram_path) if self.nvram_template: b = blob.Blob.from_db(self.nvram_template) b.ref_count_dec() inst = self._get_domain() if inst: inst.undefine() except Exception as e: util_general.ignore_exception( 'instance delete domain %s' % self, e) with util_general.RecordedOperation('delete disks', self): try: if os.path.exists(self.instance_path): shutil.rmtree(self.instance_path) except Exception as e: util_general.ignore_exception( 'instance delete disks %s' % self, e) self.deallocate_instance_ports() if self.state.value.endswith('-%s' % self.STATE_ERROR): self.state = self.STATE_ERROR else: self.state = self.STATE_DELETED
def update_dhcp(self): if not self.provide_dhcp: return if config.NODE_IS_NETWORK_NODE: subst = self.subst_dict() with util_general.RecordedOperation('update dhcp', self): with self.get_lock(op='Network update DHCP'): d = dhcp.DHCP(self, subst['vx_veth_inner']) d.restart_dhcpd() else: etcd.enqueue('networknode', UpdateDHCPNetworkTask(self.uuid))
def instance_start(inst, network): if inst.state.value.endswith('-error'): LOG.with_instance(inst).warning( 'You cannot start an instance in an error state.') return if inst.state.value in (dbo.STATE_DELETE_WAIT, dbo.STATE_DELETED): LOG.with_instance(inst).warning( 'You cannot start an instance which has been deleted.') return with inst.get_lock(ttl=900, op='Instance start') as lock: try: # Ensure networks are connected to this node iface_uuids = [] for netdesc in network: iface_uuids.append(netdesc['iface_uuid']) n = net.Network.from_db(netdesc['network_uuid']) if not n: inst.enqueue_delete_due_error('missing network: %s' % netdesc['network_uuid']) return if n.state.value != dbo.STATE_CREATED: inst.enqueue_delete_due_error('network is not active: %s' % n.uuid) return # We must record interfaces very early for the vxlan leak # detection code in the net daemon to work correctly. ni = networkinterface.NetworkInterface.from_db( netdesc['iface_uuid']) ni.state = dbo.STATE_CREATED n.create_on_hypervisor() n.ensure_mesh() n.update_dhcp() # Allocate console and VDI ports inst.allocate_instance_ports() # Now we can start the instance with util_general.RecordedOperation('instance creation', inst): inst.create(iface_uuids, lock=lock) except exceptions.InvalidStateException as e: # This instance is in an error or deleted state. Given the check # at the top of this method, that indicates a race. inst.enqueue_delete_due_error('invalid state transition: %s' % e) return
def snapshot_disk(disk, blob_uuid, related_object=None): if not os.path.exists(disk['path']): return ensure_blob_path() dest_path = Blob.filepath(blob_uuid) # Actually make the snapshot with util_general.RecordedOperation('snapshot %s' % disk['device'], related_object): util_image.snapshot(None, disk['path'], dest_path) st = os.stat(dest_path) # And make the associated blob b = Blob.new(blob_uuid, st.st_size, time.time(), time.time()) b.state = Blob.STATE_CREATED b.observe() b.request_replication() return b
def _http_get_inner(self, lock, url, checksum, checksum_type): """Fetch image if not downloaded and return image path.""" with util_general.RecordedOperation('fetch image', self.instance): resp = self._open_connection(url) blob_uuid = str(uuid.uuid4()) self.log.with_object(self.__artifact).with_fields({ 'blob': blob_uuid, 'url': url }).info('Commencing HTTP fetch to blob') b = blob.http_fetch(resp, blob_uuid, [lock], self.log) # Ensure checksum is correct if not verify_checksum( os.path.join(config.STORAGE_PATH, 'blobs', b.uuid), checksum, checksum_type): self.instance.add_event('fetch image', 'bad checksum') raise exceptions.BadCheckSum('url=%s' % url) # Only persist values after the file has been verified. b.observe() b.request_replication() return b
def enable_nat(self): if not config.NODE_IS_NETWORK_NODE: return subst = self.subst_dict() if not util_network.nat_rules_for_ipblock(self.network_address): with util_general.RecordedOperation('enable nat', self): util_process.execute( None, 'echo 1 > /proc/sys/net/ipv4/ip_forward') util_process.execute( None, 'iptables -A FORWARD -o %(egress_veth_inner)s ' '-i %(vx_veth_inner)s -j ACCEPT' % subst, namespace=self.uuid) util_process.execute( None, 'iptables -A FORWARD -i %(egress_veth_inner)s ' '-o %(vx_veth_inner)s -j ACCEPT' % subst, namespace=self.uuid) util_process.execute( None, 'iptables -t nat -A POSTROUTING -s %(ipblock)s/%(netmask)s ' '-o %(egress_veth_inner)s -j MASQUERADE' % subst, namespace=self.uuid)
def restore_instances(): # Ensure all instances for this node are defined and have up to date data. networks = [] instances = [] for inst in instance.Instances([instance.this_node_filter, instance.healthy_states_filter]): instance_problems = [] inst_interfaces = inst.interfaces if not inst_interfaces: inst_interfaces = [] updated_interfaces = False for ni in interfaces_for_instance(inst): if ni.network_uuid not in networks: networks.append(ni.network_uuid) if ni.uuid not in inst_interfaces: inst_interfaces.append(ni.uuid) updated_interfaces = True # We do not need a lock here because this loop only runs on the node # with the instance, and interfaces don't change post instance # creation. if updated_interfaces: inst.interfaces = inst_interfaces # TODO(mikal): do better here. # for disk in inst.disk_spec: # if disk.get('base'): # img = images.Image.new(disk['base']) # # NOTE(mikal): this check isn't great -- it checks for the original # # downloaded image, not the post transcode version # if (img.state in [dbo.STATE_DELETED, dbo.STATE_ERROR] or # not os.path.exists(img.version_image_path())): # instance_problems.append( # '%s missing from image cache' % disk['base']) # img.delete() if instance_problems: inst.enqueue_delete_due_error( 'instance bad on startup: %s' % '; '.join(instance_problems)) else: instances.append(inst) with util_general.RecordedOperation('restore networks', None): for network in networks: try: n = net.Network.from_db(network) if not n.is_dead(): LOG.with_object(n).info('Restoring network') n.create_on_hypervisor() n.ensure_mesh() except Exception as e: util_general.ignore_exception( 'restore network %s' % network, e) with util_general.RecordedOperation('restore instances', None): for inst in instances: try: with inst.get_lock(ttl=120, timeout=120, op='Instance restore'): started = ['on', 'transition-to-on', instance.Instance.STATE_INITIAL, 'unknown'] if inst.power_state not in started: continue LOG.with_object(inst).info('Restoring instance') inst.create_on_hypervisor() except Exception as e: util_general.ignore_exception( 'restore instance %s' % inst, e) inst.etcd.enqueue_delete_due_error( 'exception while restoring instance on daemon restart')
def transcode_image(self, lock, b): # NOTE(mikal): it is assumed the caller holds a lock on the artifact, and passes # it in lock. # If this blob uuid is not the most recent index for the artifact, set that if self.__artifact.most_recent_index.get('blob_uuid') != b.uuid: self.__artifact.add_index(b.uuid) # Transcode if required, placing the transcoded file in a well known location. os.makedirs(os.path.join(config.STORAGE_PATH, 'image_cache'), exist_ok=True) cached = util_general.file_permutation_exists( os.path.join(config.STORAGE_PATH, 'image_cache', b.uuid), ['iso', 'qcow2']) if cached: # We touch the file here, because we want to know when it was last used. pathlib.Path(cached).touch(exist_ok=True) else: blob_path = os.path.join(config.STORAGE_PATH, 'blobs', b.uuid) mimetype = b.info.get('mime-type', '') if mimetype in [ 'application/x-cd-image', 'application/x-iso9660-image' ]: cache_path = os.path.join(config.STORAGE_PATH, 'image_cache', b.uuid + '.iso') util_general.link(blob_path, cache_path) else: if mimetype == 'application/gzip': cache_path = os.path.join(config.STORAGE_PATH, 'image_cache', b.uuid) with util_general.RecordedOperation( 'decompress image', self.instance): util_process.execute([lock], 'gunzip -k -q -c %s > %s' % (blob_path, cache_path)) blob_path = cache_path cache_path = os.path.join(config.STORAGE_PATH, 'image_cache', b.uuid + '.qcow2') cache_info = util_image.identify(blob_path) # Convert the cluster size from qemu format to an int cluster_size_as_int = QCOW2_CLUSTER_SIZE if cluster_size_as_int.endswith('M'): cluster_size_as_int = int(cluster_size_as_int[:-1]) * MiB elif cluster_size_as_int.endswith('K'): cluster_size_as_int = int(cluster_size_as_int[:-1]) * KiB else: cluster_size_as_int = int(cluster_size_as_int) if (cache_info.get('file format', '') == 'qcow2' and cache_info.get('cluster_size', 0) == cluster_size_as_int): util_general.link(blob_path, cache_path) else: with util_general.RecordedOperation( 'transcode image', self.instance): self.log.with_object(b).info('Transcoding %s -> %s' % (blob_path, cache_path)) util_image.create_qcow2([lock], blob_path, cache_path) shutil.chown(cache_path, config.LIBVIRT_USER, config.LIBVIRT_GROUP) self.log.with_fields( util_general.stat_log_fields(cache_path)).info( 'Cache file %s created' % cache_path) self.__artifact.state = Artifact.STATE_CREATED
def _configure_block_devices(self, lock): with self.get_lock_attr('block_devices', 'Initialize block devices'): # Create block devices if required block_devices = self.block_devices if not block_devices: block_devices = self._initialize_block_devices() # Generate a config drive if self.configdrive == 'openstack-disk': with util_general.RecordedOperation('make config drive', self): self._make_config_drive_openstack_disk( os.path.join(self.instance_path, block_devices['devices'][1]['path'])) # Prepare disks. A this point we have a file for each blob in the image # cache at a well known location (the blob uuid with .qcow2 appended). if not block_devices['finalized']: modified_disks = [] for disk in block_devices['devices']: disk['source'] = "<source file='%s'/>" % disk['path'] disk['source_type'] = 'file' # All disk bases must have an associated blob, force that # if an image had to be fetched from outside the cluster. disk_base = None if disk.get('blob_uuid'): disk_base = '%s%s' % (artifact.BLOB_URL, disk['blob_uuid']) elif disk.get('base') and not util_general.noneish( disk.get('base')): a = artifact.Artifact.from_url( artifact.Artifact.TYPE_IMAGE, disk['base']) mri = a.most_recent_index if 'blob_uuid' not in mri: raise exceptions.ArtifactHasNoBlobs( 'Artifact %s of type %s has no versions' % (a.uuid, a.artifact_type)) disk['blob_uuid'] = mri['blob_uuid'] disk_base = '%s%s' % (artifact.BLOB_URL, disk['blob_uuid']) if disk_base: cached_image_path = util_general.file_permutation_exists( os.path.join(config.STORAGE_PATH, 'image_cache', disk['blob_uuid']), ['iso', 'qcow2']) if not cached_image_path: raise exceptions.ImageMissingFromCache( 'Image %s is missing' % disk['blob_uuid']) with util_general.RecordedOperation( 'detect cdrom images', self): try: cd = pycdlib.PyCdlib() cd.open(cached_image_path) disk['present_as'] = 'cdrom' except Exception: pass if disk.get('present_as', 'cdrom') == 'cdrom': # There is no point in resizing or COW'ing a cdrom disk['path'] = disk['path'].replace( '.qcow2', '.raw') disk['type'] = 'raw' disk['snapshot_ignores'] = True util_general.link(cached_image_path, disk['path']) # qemu does not support removable media on virtio buses. It also # only supports one IDE bus. This is quite limiting. Instead, we # use USB for cdrom drives, unless you've specified a bus other # than virtio in the creation request. if disk['bus'] == 'virtio': disk['bus'] = 'usb' disk['device'] = _get_disk_device( disk['bus'], LETTERS.find(disk['device'][-1])) elif disk['bus'] == 'nvme': # NVMe disks do not currently support a COW layer for the instance # disk. This is because we don't have a libvirt <disk/> element for # them and therefore can't specify their backing store. Instead we # produce a flat layer here. util_image.create_qcow2([lock], cached_image_path, disk['path'], disk_size=disk['size']) else: with util_general.RecordedOperation( 'create copy on write layer', self): util_image.create_cow([lock], cached_image_path, disk['path'], disk['size']) self.log.with_fields( util_general.stat_log_fields( disk['path'])).info( 'COW layer %s created' % disk['path']) # Record the backing store for modern libvirts disk['backing'] = ( '<backingStore type=\'file\'>\n' ' <format type=\'qcow2\'/>\n' ' <source file=\'%s\'/>\n' ' </backingStore>\n' % (cached_image_path)) elif not os.path.exists(disk['path']): util_image.create_blank([lock], disk['path'], disk['size']) shutil.chown(disk['path'], 'libvirt-qemu', 'libvirt-qemu') modified_disks.append(disk) block_devices['devices'] = modified_disks block_devices['finalized'] = True self._db_set_attribute('block_devices', block_devices)
def main(): global DAEMON_IMPLEMENTATIONS global DAEMON_PIDS LOG.info('Starting...') setproctitle.setproctitle( daemon.process_name('main') + '-v%s' % util_general.get_version()) # If you ran this, it means we're not shutting down any more n = Node.new(config.NODE_NAME, config.NODE_MESH_IP) n.state = Node.STATE_CREATED # Log configuration on startup for key, value in config.dict().items(): LOG.info('Configuration item %s = %s' % (key, value)) daemon.set_log_level(LOG, 'main') # Check in early and often, also reset processing queue items. etcd.clear_stale_locks() Node.observe_this_node() etcd.restart_queues() def _start_daemon(d): pid = os.fork() if pid == 0: try: DAEMON_IMPLEMENTATIONS[d].Monitor(d).run() sys.exit(0) except Exception as e: util_general.ignore_exception('daemon creation', e) sys.exit(1) DAEMON_PIDS[pid] = d LOG.with_field('pid', pid).info('Started %s' % d) # Resource usage publisher, we need this early because scheduling decisions # might happen quite early on. _start_daemon('resources') # If I am the network node, I need some setup if config.NODE_IS_NETWORK_NODE: # Bootstrap the floating network in the Networks table floating_network = net.Network.from_db('floating') if not floating_network: floating_network = net.Network.create_floating_network( config.FLOATING_NETWORK) subst = { 'egress_bridge': util_network.get_safe_interface_name( 'egr-br-%s' % config.NODE_EGRESS_NIC), 'egress_nic': config.NODE_EGRESS_NIC } if not util_network.check_for_interface(subst['egress_bridge']): # NOTE(mikal): Adding the physical interface to the physical bridge # is considered outside the scope of the orchestration software as # it will cause the node to lose network connectivity. So instead # all we do is create a bridge if it doesn't exist and the wire # everything up to it. We can do egress NAT in that state, even if # floating IPs don't work. with util_general.RecordedOperation('create physical bridge', None): # No locking as read only ipm = IPManager.from_db('floating') subst['master_float'] = ipm.get_address_at_index(1) subst['netmask'] = ipm.netmask # We need to copy the MTU of the interface we are bridging to # or weird networking things happen. mtu = util_network.get_interface_mtu(config.NODE_EGRESS_NIC) util_network.create_interface( subst['egress_bridge'], 'bridge', '', mtu=mtu) util_process.execute(None, 'ip link set %(egress_bridge)s up' % subst) util_process.execute(None, 'ip addr add %(master_float)s/%(netmask)s ' 'dev %(egress_bridge)s' % subst) util_process.execute(None, 'iptables -A FORWARD -o %(egress_nic)s ' '-i %(egress_bridge)s -j ACCEPT' % subst) util_process.execute(None, 'iptables -A FORWARD -i %(egress_nic)s ' '-o %(egress_bridge)s -j ACCEPT' % subst) util_process.execute(None, 'iptables -t nat -A POSTROUTING ' '-o %(egress_nic)s -j MASQUERADE' % subst) def _audit_daemons(): running_daemons = [] for pid in DAEMON_PIDS: running_daemons.append(DAEMON_PIDS[pid]) for d in DAEMON_IMPLEMENTATIONS: if d not in running_daemons: _start_daemon(d) for d in list(DAEMON_PIDS): if not psutil.pid_exists(d): LOG.warning('%s pid is missing, restarting' % DAEMON_PIDS[d]) _start_daemon(DAEMON_PIDS[d]) _audit_daemons() restore_instances() running = True while True: time.sleep(5) try: wpid, _ = os.waitpid(-1, os.WNOHANG) while wpid != 0: LOG.warning('%s exited (pid %d)' % (DAEMON_PIDS.get(wpid, 'unknown'), wpid)) if wpid in DAEMON_PIDS: del DAEMON_PIDS[wpid] wpid, _ = os.waitpid(-1, os.WNOHANG) except ChildProcessError: # We get this if there are no child processes pass n = Node.from_db(config.NODE_NAME) if n.state.value not in [Node.STATE_STOPPING, Node.STATE_STOPPED]: _audit_daemons() Node.observe_this_node() elif len(DAEMON_PIDS) == 0: n.state = Node.STATE_STOPPED return else: if running: for pid in DAEMON_PIDS: try: os.kill(pid, signal.SIGTERM) LOG.info('Sent SIGTERM to %s (pid %s)' % (DAEMON_PIDS.get(pid, 'unknown'), pid)) except OSError as e: LOG.warn('Failed to send SIGTERM to %s: %s' % (pid, e)) running = False
def create_on_network_node(self): # The floating network does not have a vxlan mesh if self.uuid == 'floating': return with self.get_lock(op='create_on_network_node'): if self.is_dead(): raise DeadNetwork('network=%s' % self) self._create_common() subst = self.subst_dict() if not os.path.exists('/var/run/netns/%s' % self.uuid): with util_general.RecordedOperation('create netns', self): util_process.execute(None, 'ip netns add %s' % self.uuid) if not util_network.check_for_interface(subst['vx_veth_outer']): with util_general.RecordedOperation('create router veth', self): util_network.create_interface( subst['vx_veth_outer'], 'veth', 'peer name %(vx_veth_inner)s' % subst) util_process.execute( None, 'ip link set %(vx_veth_inner)s netns %(netns)s' % subst) # Refer to bug 952 for more details here, but it turns out # that adding an interface to a bridge overwrites the MTU of # the bridge in an undesirable way. So we lookup the existing # MTU and then re-specify it here. subst['vx_bridge_mtu'] = util_network.get_interface_mtu( subst['vx_bridge']) util_process.execute( None, 'ip link set %(vx_veth_outer)s master %(vx_bridge)s ' 'mtu %(vx_bridge_mtu)s' % subst) util_process.execute( None, 'ip link set %(vx_veth_outer)s up' % subst) util_process.execute( None, 'ip link set %(vx_veth_inner)s up' % subst, namespace=self.uuid) util_process.execute( None, 'ip addr add %(router)s/%(netmask)s ' 'dev %(vx_veth_inner)s' % subst, namespace=self.uuid) if not util_network.check_for_interface(subst['egress_veth_outer']): with util_general.RecordedOperation('create egress veth', self): util_network.create_interface( subst['egress_veth_outer'], 'veth', 'peer name %(egress_veth_inner)s' % subst) # Refer to bug 952 for more details here, but it turns out # that adding an interface to a bridge overwrites the MTU of # the bridge in an undesirable way. So we lookup the existing # MTU and then re-specify it here. subst['egress_bridge_mtu'] = util_network.get_interface_mtu( subst['egress_bridge']) util_process.execute( None, 'ip link set %(egress_veth_outer)s master %(egress_bridge)s ' 'mtu %(egress_bridge_mtu)s' % subst) util_process.execute( None, 'ip link set %(egress_veth_outer)s up' % subst) util_process.execute( None, 'ip link set %(egress_veth_inner)s netns %(netns)s' % subst) if self.provide_nat: # We don't always need this lock, but acquiring it here means # we don't need to construct two identical ipmanagers one after # the other. with db.get_lock('ipmanager', None, 'floating', ttl=120, op='Network deploy NAT'): ipm = IPManager.from_db('floating') if not self.floating_gateway: self.update_floating_gateway( ipm.get_random_free_address(self.unique_label())) ipm.persist() subst['floating_router'] = ipm.get_address_at_index(1) subst['floating_gateway'] = self.floating_gateway subst['floating_netmask'] = ipm.netmask with util_general.RecordedOperation('enable virtual routing', self): addresses = util_network.get_interface_addresses( subst['egress_veth_inner'], namespace=subst['netns']) if not subst['floating_gateway'] in list(addresses): util_process.execute( None, 'ip addr add %(floating_gateway)s/%(floating_netmask)s ' 'dev %(egress_veth_inner)s' % subst, namespace=self.uuid) util_process.execute( None, 'ip link set %(egress_veth_inner)s up' % subst, namespace=self.uuid) default_routes = util_network.get_default_routes( subst['netns']) if default_routes != [subst['floating_router']]: if default_routes: for default_route in default_routes: util_process.execute( None, 'route del default gw %s' % default_route, namespace=self.uuid) util_process.execute( None, 'route add default gw %(floating_router)s' % subst, namespace=self.uuid) self.enable_nat() self.update_dhcp() # A final check to ensure we haven't raced with a delete if self.is_dead(): raise DeadNetwork('network=%s' % self) self.state = self.STATE_CREATED
def instance_delete(inst): with inst.get_lock(op='Instance delete'): # There are two delete state flows: # - error transition states (preflight-error etc) to error # - created to deleted # # We don't need delete_wait for the error states as they're already # in a transition state. if not inst.state.value.endswith('-error'): inst.state = dbo.STATE_DELETE_WAIT db.add_event('instance', inst.uuid, 'queued', 'delete', None, None) # Create list of networks used by instance. We cannot use the # interfaces cached in the instance here, because the instance # may have failed to get to the point where it populates that # field (an image fetch failure for example). instance_networks = [] interfaces = [] for ni in networkinterface.interfaces_for_instance(inst): if ni: interfaces.append(ni) if ni.network_uuid not in instance_networks: instance_networks.append(ni.network_uuid) # Stop the instance inst.power_off() # Delete the instance's interfaces with util_general.RecordedOperation('release network addresses', inst): for ni in interfaces: ni.delete() # Create list of networks used by all other instances host_networks = [] for i in instance.Instances( [instance.this_node_filter, instance.active_states_filter]): if not i.uuid == inst.uuid: for iface_uuid in inst.interfaces: ni = networkinterface.NetworkInterface.from_db(iface_uuid) if ni and ni.network_uuid not in host_networks: host_networks.append(ni.network_uuid) inst.delete() # Check each network used by the deleted instance for network in instance_networks: n = net.Network.from_db(network) if n: # If network used by another instance, only update if network in host_networks: if n.state.value == dbo.STATE_DELETE_WAIT: # Do not update a network about to be deleted continue with util_general.RecordedOperation( 'deallocate ip address', inst): n.update_dhcp() else: # Network not used by any other instance therefore delete with util_general.RecordedOperation( 'remove network from node', n): n.delete_on_hypervisor()