def _remove_mesh_element(self, n): self.log.info('Removing excess mesh element %s', n) subst = self.subst_dict() subst['node'] = n util_process.execute(None, 'bridge fdb del to 00:00:00:00:00:00 dst %(node)s ' 'dev %(vx_interface)s' % subst)
def _add_mesh_element(self, n): self.log.info('Adding new mesh element %s', n) subst = self.subst_dict() subst['node'] = n util_process.execute(None, 'bridge fdb append to 00:00:00:00:00:00 ' 'dst %(node)s dev %(vx_interface)s' % subst)
def create_blank(locks, disk_file, disk_size): """Make an empty image.""" if os.path.exists(disk_file): return util_process.execute( locks, 'qemu-img create -o cluster_size=%s -f qcow2 %s %sG' % (constants.QCOW2_CLUSTER_SIZE, disk_file, disk_size), iopriority=util_process.PRIORITY_LOW)
def snapshot(locks, source, destination): """Convert a possibly COW layered disk file into a snapshot.""" cmd = 'qemu-img convert --force-share -o cluster_size=%s -O qcow2' % ( constants.QCOW2_CLUSTER_SIZE) if config.COMPRESS_SNAPSHOTS: cmd += ' -c' util_process.execute( locks, ' '.join([cmd, source, destination]), iopriority=util_process.PRIORITY_LOW)
def restart_dhcpd(self): if not os.path.exists('/var/run/netns/%s' % self.network.uuid): return self._make_config() self._make_hosts() if not self._send_signal(signal.SIGHUP): util_process.execute(None, 'dnsmasq --conf-file=%(config_dir)s/config' % self.subst, namespace=self.network.uuid)
def remove_floating_ip(self, floating_address, inner_address): self.log.info('Removing floating ip %s -> %s', floating_address, inner_address) subst = self.subst_dict() subst['floating_address'] = floating_address subst['floating_address_as_hex'] = '%08x' % int( ipaddress.IPv4Address(floating_address)) subst['inner_address'] = inner_address if util_network.check_for_interface('flt-%(floating_address_as_hex)s-o' % subst): util_process.execute(None, 'ip link del flt-%(floating_address_as_hex)s-o' % subst)
def create_interface(interface, interface_type, extra, mtu=None): if not mtu: mtu = config.MAX_HYPERVISOR_MTU - 50 interface = get_safe_interface_name(interface) process.execute( None, 'ip link add %(interface)s mtu %(mtu)s ' 'type %(interface_type)s %(extra)s' % { 'interface': interface, 'interface_type': interface_type, 'mtu': mtu, 'extra': extra })
def delete_on_hypervisor(self): with self.get_lock(op='Network delete'): subst = self.subst_dict() if util_network.check_for_interface(subst['vx_bridge']): with util_general.RecordedOperation('delete vxlan bridge', self): util_process.execute( None, 'ip link delete %(vx_bridge)s' % subst) if util_network.check_for_interface(subst['vx_interface']): with util_general.RecordedOperation('delete vxlan interface', self): util_process.execute( None, 'ip link delete %(vx_interface)s' % subst)
def create_qcow2(locks, cache_file, disk_file, disk_size=None): """Make a qcow2 copy of the disk from the image cache.""" if os.path.exists(disk_file): return util_process.execute( locks, 'qemu-img convert -t none -o cluster_size=%s -O qcow2 %s %s' % (constants.QCOW2_CLUSTER_SIZE, cache_file, disk_file), iopriority=util_process.PRIORITY_LOW) if disk_size: util_process.execute( locks, 'qemu-img resize %s %dG' % (disk_file, int(disk_size)), iopriority=util_process.PRIORITY_LOW)
def run(self): LOG.info('Starting') libvirt = util_libvirt.get_libvirt() conn = libvirt.open('qemu:///system') present_cpus, _, _ = conn.getCPUMap() os.makedirs('/var/run/sf', exist_ok=True) util_process.execute(None, (config.API_COMMAND_LINE % { 'port': config.API_PORT, 'timeout': config.API_TIMEOUT, 'name': daemon.process_name('api'), 'workers': present_cpus * 4 }), env_variables=os.environ, check_exit_code=[0, 1, -15])
def clean_events_mesh_operations(etcd_client): # TODO(andy): This can be removed when older releases do not exist # We probably need to cleanup excess network mesh events. We also need to # try and fetch small batches because of limits in the amount of data etcd3 # can return at one time. # Save time and use the already available etcdctl client. net_keys, stderr = util_process.execute( None, 'etcdctl get --prefix /sf/event/network/ | grep sf/event', check_exit_code=[0, 1]) if stderr: print('ERROR: Unable to retrieve network keys:%s' % stderr) return # Split network events into networks network_events = defaultdict(list) for key in net_keys.split('\n'): if not key: continue _blank, _sf, _event, _network, uuid, _time = key.split('/') network_events[uuid].append(key) # Delete all but last 50 events count = 0 for keys in network_events.values(): for k in keys[:-50]: print('--> Removing verbose network event %s' % k) etcd_client.delete(k) count += 1 print(' - Cleaned up %d old network mesh events' % count)
def discover_interfaces(): mac_to_iface = {'00:00:00:00:00:00': 'broadcast'} iface_to_mac = {} vxid_to_mac = {} iface_name = None iface_name_re = re.compile('^[0-9]+: ([^:]+): <') link_ether = None link_ether_re = re.compile('^ link/ether (.*) brd .*') stdout, _ = process.execute(None, 'ip addr list') for line in stdout.split('\n'): line = line.rstrip() m = iface_name_re.match(line) if m: iface_name = m.group(1) continue m = link_ether_re.match(line) if m: link_ether = m.group(1) mac_to_iface[link_ether] = iface_name iface_to_mac[iface_name] = link_ether if iface_name.startswith('vxlan-'): vxid = int(iface_name.split('-')[1], 16) vxid_to_mac[vxid] = link_ether return mac_to_iface, iface_to_mac, vxid_to_mac
def get_interface_mtus(namespace=None): stdout, _ = process.execute(None, 'ip -pretty -json link show', check_exit_code=[0, 1], namespace=namespace) for elem in _clean_ip_json(stdout): yield elem['ifname'], elem['mtu']
def get_interface_mtu(interface, namespace=None): stdout, _ = process.execute(None, 'ip -pretty -json link show %s' % interface, check_exit_code=[0, 1], namespace=namespace) for elem in _clean_ip_json(stdout): return elem['mtu']
def nat_rules_for_ipblock(ipblock): out, _ = process.execute(None, 'iptables -t nat -L POSTROUTING -n -v') # Output looks like this: # Chain POSTROUTING (policy ACCEPT 199 packets, 18189 bytes) # pkts bytes target prot opt in out source destination # 23 1736 MASQUERADE all -- * ens4 192.168.242.0/24 0.0.0.0/0 for line in out.split('\n'): if line.find(str(ipblock)) != -1: return True return False
def get_interface_addresses(name, namespace=None): stdout, _ = process.execute(None, 'ip -pretty -json addr show %s' % name, check_exit_code=[0, 1], namespace=namespace) for elem in _clean_ip_json(stdout): if 'addr_info' in elem: try: yield elem['addr_info'][0]['local'] except IndexError: pass
def get_interface_statistics(name, namespace=None): stdout, _ = process.execute(None, 'ip -s -pretty -json link show %s' % name, check_exit_code=[0, 1], namespace=namespace) if not stdout: raise exceptions.NoInterfaceStatistics( 'No statistics for interface %s in namespace %s' % (name, namespace)) stats = _clean_ip_json(stdout) return stats.get('stats64')
def get_default_routes(namespace): stdout, _ = process.execute(None, 'ip route list default', namespace=namespace) if not stdout: return [] routes = [] for line in stdout.split('\n'): elems = line.split(' ') if len(elems) > 3 and elems[2] not in routes: routes.append(elems[2]) return routes
def discover_mesh(self): # The floating network does not have a vxlan mesh if self.uuid == 'floating': return mesh_re = re.compile(r'00:00:00:00:00:00 dst (.*) self permanent') stdout, _ = util_process.execute( None, 'bridge fdb show brport %(vx_interface)s' % self.subst_dict()) for line in stdout.split('\n'): m = mesh_re.match(line) if m: yield m.group(1)
def create_cow(locks, cache_file, disk_file, disk_size): """Create a COW layer on top of the image cache. disk_size is specified in GiBs. """ if os.path.exists(disk_file): return info = identify(cache_file) virtual_size = None try: virtual_size = int(info['virtual size']) except TypeError: pass if (virtual_size and disk_size and virtual_size > disk_size * 1024 * 1024 * 1024): raise exceptions.ImagesCannotShrinkException( 'The specified size of %dgb (%d bytes) is smaller than the existing size ' 'of the image of %s bytes.' % (disk_size, disk_size * 1024 * 1024 * 1024, info['virtual size'])) if disk_size: util_process.execute( locks, ('qemu-img create -b %s -o cluster_size=%s -f qcow2 %s %dG' % (cache_file, constants.QCOW2_CLUSTER_SIZE, disk_file, int(disk_size))), iopriority=util_process.PRIORITY_LOW) else: util_process.execute( locks, 'qemu-img create -b %s -o cluster_size=%s -f qcow2 %s' % (cache_file, constants.QCOW2_CLUSTER_SIZE, disk_file), iopriority=util_process.PRIORITY_LOW)
def check_for_interface(name, namespace=None, up=False): if namespace: if not os.path.exists('/var/run/netns/%s' % namespace): return False stdout, stderr = process.execute(None, 'ip -pretty -json link show %s' % name, check_exit_code=[0, 1], namespace=namespace) if stderr.rstrip('\n').endswith(' does not exist.'): return False if up: j = _clean_ip_json(stdout) return 'UP' in j[0]['flags'] return True
def enable_nat(self): if not config.NODE_IS_NETWORK_NODE: return subst = self.subst_dict() if not util_network.nat_rules_for_ipblock(self.network_address): with util_general.RecordedOperation('enable nat', self): util_process.execute( None, 'echo 1 > /proc/sys/net/ipv4/ip_forward') util_process.execute( None, 'iptables -A FORWARD -o %(egress_veth_inner)s ' '-i %(vx_veth_inner)s -j ACCEPT' % subst, namespace=self.uuid) util_process.execute( None, 'iptables -A FORWARD -i %(egress_veth_inner)s ' '-o %(vx_veth_inner)s -j ACCEPT' % subst, namespace=self.uuid) util_process.execute( None, 'iptables -t nat -A POSTROUTING -s %(ipblock)s/%(netmask)s ' '-o %(egress_veth_inner)s -j MASQUERADE' % subst, namespace=self.uuid)
def delete_on_network_node(self): with self.get_lock(op='Network delete'): subst = self.subst_dict() if util_network.check_for_interface(subst['vx_veth_outer']): with util_general.RecordedOperation('delete router veth', self): util_process.execute( None, 'ip link delete %(vx_veth_outer)s' % subst) if util_network.check_for_interface(subst['egress_veth_outer']): with util_general.RecordedOperation('delete egress veth', self): util_process.execute( None, 'ip link delete %(egress_veth_outer)s' % subst) if os.path.exists('/var/run/netns/%s' % self.uuid): with util_general.RecordedOperation('delete netns', self): util_process.execute( None, 'ip netns del %s' % self.uuid) if self.floating_gateway: with db.get_lock('ipmanager', None, 'floating', ttl=120, op='Network delete'): ipm = IPManager.from_db('floating') ipm.release(self.floating_gateway) ipm.persist() self.update_floating_gateway(None) self.state = self.STATE_DELETED # Ensure that all hypervisors remove this network. This is really # just catching strays, apart from on the network node where we # absolutely need to do this thing. for hyp in Nodes([active_nodes]): etcd.enqueue(hyp.uuid, {'tasks': [ HypervisorDestroyNetworkTask(self.uuid) ]}) self.remove_dhcp() self.remove_nat() ipm = IPManager.from_db(self.uuid) ipm.delete()
def identify(path): """Work out what an image is.""" if not os.path.exists(path): return {} out, _ = util_process.execute( None, 'qemu-img info --force-share %s' % path) data = {} for line in out.split('\n'): line = line.lstrip().rstrip() elems = line.split(': ') if len(elems) > 1: key = elems[0] value = ': '.join(elems[1:]) m = VALUE_WITH_BRACKETS_RE.match(value) if m: value = float(m.group(1)) elif value.endswith('K'): value = float(value[:-1]) * 1024 elif value.endswith('M'): value = float(value[:-1]) * 1024 * 1024 elif value.endswith('G'): value = float(value[:-1]) * 1024 * 1024 * 1024 elif value.endswith('T'): value = float(value[:-1]) * 1024 * 1024 * 1024 * 1024 try: data[key] = float(value) except Exception: data[key] = value return data
def add_floating_ip(self, floating_address, inner_address): self.log.info('Adding floating ip %s -> %s', floating_address, inner_address) subst = self.subst_dict() subst['floating_address'] = floating_address subst['floating_address_as_hex'] = '%08x' % int( ipaddress.IPv4Address(floating_address)) subst['inner_address'] = inner_address util_network.create_interface( 'flt-%(floating_address_as_hex)s-o' % subst, 'veth', 'peer name flt-%(floating_address_as_hex)s-i' % subst) util_process.execute( None, 'ip link set flt-%(floating_address_as_hex)s-i netns %(netns)s' % subst) util_process.execute( None, 'ip addr add %(floating_address)s/32 ' 'dev flt-%(floating_address_as_hex)s-i' % subst, namespace=self.uuid) util_process.execute( None, 'iptables -t nat -A PREROUTING -d %(floating_address)s -j DNAT ' '--to-destination %(inner_address)s' % subst, namespace=self.uuid)
def _update_power_states(self): libvirt = util_libvirt.get_libvirt() conn = libvirt.open('qemu:///system') try: seen = [] # Active VMs have an ID. Active means running in libvirt # land. for domain_id in conn.listDomainsID(): domain = conn.lookupByID(domain_id) if not domain.name().startswith('sf:'): continue instance_uuid = domain.name().split(':')[1] log_ctx = LOG.with_instance(instance_uuid) inst = instance.Instance.from_db(instance_uuid) if not inst: # Instance is SF but not in database. Kill to reduce load. log_ctx.warning('Destroying unknown instance') self._delete_instance_files(instance_uuid) util_process.execute( None, 'virsh destroy "sf:%s"' % instance_uuid) util_process.execute( None, 'virsh undefine --nvram "sf:%s"' % instance_uuid) continue inst.place_instance(config.NODE_NAME) seen.append(domain.name()) db_state = inst.state if db_state.value == dbo.STATE_DELETED: # NOTE(mikal): a delete might be in-flight in the queue. # We only worry about instances which should have gone # away five minutes ago. if time.time() - db_state.update_time < 300: continue inst.enforced_deletes_increment() attempts = inst._db_get_attribute( 'enforced_deletes')['count'] if attempts > 5: # Sometimes we just can't delete the VM. Try the big # hammer instead. log_ctx.warning( 'Attempting alternate delete method for instance') self._delete_instance_files(instance_uuid) util_process.execute( None, 'virsh undefine --nvram "sf:%s"' % instance_uuid) inst.add_event('enforced delete', 'complete') else: inst.delete() log_ctx.with_field( 'attempt', attempts).warning('Deleting stray instance') continue state = util_libvirt.extract_power_state(libvirt, domain) inst.update_power_state(state) if state == 'crashed': if inst.state.value in [ dbo.STATE_DELETE_WAIT, dbo.STATE_DELETED ]: util_process.execute( None, 'virsh undefine --nvram "sf:%s"' % instance_uuid) inst.state.value = dbo.STATE_DELETED else: inst.state = inst.state.value + '-error' # Inactive VMs just have a name, and are powered off # in our state system. for domain_name in conn.listDefinedDomains(): if not domain_name.startswith('sf:'): continue if domain_name not in seen: instance_uuid = domain_name.split(':')[1] log_ctx = LOG.with_instance(instance_uuid) inst = instance.Instance.from_db(instance_uuid) if not inst: # Instance is SF but not in database. Kill because # unknown. log_ctx.warning('Removing unknown inactive instance') self._delete_instance_files(instance_uuid) try: domain = conn.lookupByName(domain_name) # TODO(mikal): work out if we can pass # VIR_DOMAIN_UNDEFINE_NVRAM with virDomainUndefineFlags() domain.undefine() except libvirt.libvirtError: util_process.execute( None, 'virsh undefine --nvram "sf:%s"' % instance_uuid) continue db_state = inst.state if db_state.value in [ dbo.STATE_DELETE_WAIT, dbo.STATE_DELETED ]: # NOTE(mikal): a delete might be in-flight in the queue. # We only worry about instances which should have gone # away five minutes ago. if time.time() - db_state.update_time < 300: continue log_ctx.info('Detected stray instance') self._delete_instance_files(instance_uuid) try: domain = conn.lookupByName(domain_name) # TODO(mikal): work out if we can pass # VIR_DOMAIN_UNDEFINE_NVRAM with virDomainUndefineFlags() domain.undefine() except libvirt.libvirtError: util_process.execute( None, 'virsh undefine --nvram "sf:%s"' % instance_uuid) inst.add_event('deleted stray', 'complete') if db_state.value != dbo.STATE_DELETED: inst.state.value = dbo.STATE_DELETED continue inst.place_instance(config.NODE_NAME) db_power = inst.power_state if not os.path.exists(inst.instance_path): # If we're inactive and our files aren't on disk, # we have a problem. log_ctx.info('Detected error state for instance') if inst.state.value in [ dbo.STATE_DELETE_WAIT, dbo.STATE_DELETED ]: inst.state.value = dbo.STATE_DELETED else: inst.state = inst.state.value + '-error' elif not db_power or db_power['power_state'] != 'off': log_ctx.info('Detected power off for instance') inst.update_power_state('off') inst.add_event('detected poweroff', 'complete') except libvirt.libvirtError as e: LOG.debug('Failed to lookup all domains: %s' % e)
def create_on_network_node(self): # The floating network does not have a vxlan mesh if self.uuid == 'floating': return with self.get_lock(op='create_on_network_node'): if self.is_dead(): raise DeadNetwork('network=%s' % self) self._create_common() subst = self.subst_dict() if not os.path.exists('/var/run/netns/%s' % self.uuid): with util_general.RecordedOperation('create netns', self): util_process.execute(None, 'ip netns add %s' % self.uuid) if not util_network.check_for_interface(subst['vx_veth_outer']): with util_general.RecordedOperation('create router veth', self): util_network.create_interface( subst['vx_veth_outer'], 'veth', 'peer name %(vx_veth_inner)s' % subst) util_process.execute( None, 'ip link set %(vx_veth_inner)s netns %(netns)s' % subst) # Refer to bug 952 for more details here, but it turns out # that adding an interface to a bridge overwrites the MTU of # the bridge in an undesirable way. So we lookup the existing # MTU and then re-specify it here. subst['vx_bridge_mtu'] = util_network.get_interface_mtu( subst['vx_bridge']) util_process.execute( None, 'ip link set %(vx_veth_outer)s master %(vx_bridge)s ' 'mtu %(vx_bridge_mtu)s' % subst) util_process.execute( None, 'ip link set %(vx_veth_outer)s up' % subst) util_process.execute( None, 'ip link set %(vx_veth_inner)s up' % subst, namespace=self.uuid) util_process.execute( None, 'ip addr add %(router)s/%(netmask)s ' 'dev %(vx_veth_inner)s' % subst, namespace=self.uuid) if not util_network.check_for_interface(subst['egress_veth_outer']): with util_general.RecordedOperation('create egress veth', self): util_network.create_interface( subst['egress_veth_outer'], 'veth', 'peer name %(egress_veth_inner)s' % subst) # Refer to bug 952 for more details here, but it turns out # that adding an interface to a bridge overwrites the MTU of # the bridge in an undesirable way. So we lookup the existing # MTU and then re-specify it here. subst['egress_bridge_mtu'] = util_network.get_interface_mtu( subst['egress_bridge']) util_process.execute( None, 'ip link set %(egress_veth_outer)s master %(egress_bridge)s ' 'mtu %(egress_bridge_mtu)s' % subst) util_process.execute( None, 'ip link set %(egress_veth_outer)s up' % subst) util_process.execute( None, 'ip link set %(egress_veth_inner)s netns %(netns)s' % subst) if self.provide_nat: # We don't always need this lock, but acquiring it here means # we don't need to construct two identical ipmanagers one after # the other. with db.get_lock('ipmanager', None, 'floating', ttl=120, op='Network deploy NAT'): ipm = IPManager.from_db('floating') if not self.floating_gateway: self.update_floating_gateway( ipm.get_random_free_address(self.unique_label())) ipm.persist() subst['floating_router'] = ipm.get_address_at_index(1) subst['floating_gateway'] = self.floating_gateway subst['floating_netmask'] = ipm.netmask with util_general.RecordedOperation('enable virtual routing', self): addresses = util_network.get_interface_addresses( subst['egress_veth_inner'], namespace=subst['netns']) if not subst['floating_gateway'] in list(addresses): util_process.execute( None, 'ip addr add %(floating_gateway)s/%(floating_netmask)s ' 'dev %(egress_veth_inner)s' % subst, namespace=self.uuid) util_process.execute( None, 'ip link set %(egress_veth_inner)s up' % subst, namespace=self.uuid) default_routes = util_network.get_default_routes( subst['netns']) if default_routes != [subst['floating_router']]: if default_routes: for default_route in default_routes: util_process.execute( None, 'route del default gw %s' % default_route, namespace=self.uuid) util_process.execute( None, 'route add default gw %(floating_router)s' % subst, namespace=self.uuid) self.enable_nat() self.update_dhcp() # A final check to ensure we haven't raced with a delete if self.is_dead(): raise DeadNetwork('network=%s' % self) self.state = self.STATE_CREATED
def main(): global DAEMON_IMPLEMENTATIONS global DAEMON_PIDS LOG.info('Starting...') setproctitle.setproctitle( daemon.process_name('main') + '-v%s' % util_general.get_version()) # If you ran this, it means we're not shutting down any more n = Node.new(config.NODE_NAME, config.NODE_MESH_IP) n.state = Node.STATE_CREATED # Log configuration on startup for key, value in config.dict().items(): LOG.info('Configuration item %s = %s' % (key, value)) daemon.set_log_level(LOG, 'main') # Check in early and often, also reset processing queue items. etcd.clear_stale_locks() Node.observe_this_node() etcd.restart_queues() def _start_daemon(d): pid = os.fork() if pid == 0: try: DAEMON_IMPLEMENTATIONS[d].Monitor(d).run() sys.exit(0) except Exception as e: util_general.ignore_exception('daemon creation', e) sys.exit(1) DAEMON_PIDS[pid] = d LOG.with_field('pid', pid).info('Started %s' % d) # Resource usage publisher, we need this early because scheduling decisions # might happen quite early on. _start_daemon('resources') # If I am the network node, I need some setup if config.NODE_IS_NETWORK_NODE: # Bootstrap the floating network in the Networks table floating_network = net.Network.from_db('floating') if not floating_network: floating_network = net.Network.create_floating_network( config.FLOATING_NETWORK) subst = { 'egress_bridge': util_network.get_safe_interface_name( 'egr-br-%s' % config.NODE_EGRESS_NIC), 'egress_nic': config.NODE_EGRESS_NIC } if not util_network.check_for_interface(subst['egress_bridge']): # NOTE(mikal): Adding the physical interface to the physical bridge # is considered outside the scope of the orchestration software as # it will cause the node to lose network connectivity. So instead # all we do is create a bridge if it doesn't exist and the wire # everything up to it. We can do egress NAT in that state, even if # floating IPs don't work. with util_general.RecordedOperation('create physical bridge', None): # No locking as read only ipm = IPManager.from_db('floating') subst['master_float'] = ipm.get_address_at_index(1) subst['netmask'] = ipm.netmask # We need to copy the MTU of the interface we are bridging to # or weird networking things happen. mtu = util_network.get_interface_mtu(config.NODE_EGRESS_NIC) util_network.create_interface( subst['egress_bridge'], 'bridge', '', mtu=mtu) util_process.execute(None, 'ip link set %(egress_bridge)s up' % subst) util_process.execute(None, 'ip addr add %(master_float)s/%(netmask)s ' 'dev %(egress_bridge)s' % subst) util_process.execute(None, 'iptables -A FORWARD -o %(egress_nic)s ' '-i %(egress_bridge)s -j ACCEPT' % subst) util_process.execute(None, 'iptables -A FORWARD -i %(egress_nic)s ' '-o %(egress_bridge)s -j ACCEPT' % subst) util_process.execute(None, 'iptables -t nat -A POSTROUTING ' '-o %(egress_nic)s -j MASQUERADE' % subst) def _audit_daemons(): running_daemons = [] for pid in DAEMON_PIDS: running_daemons.append(DAEMON_PIDS[pid]) for d in DAEMON_IMPLEMENTATIONS: if d not in running_daemons: _start_daemon(d) for d in list(DAEMON_PIDS): if not psutil.pid_exists(d): LOG.warning('%s pid is missing, restarting' % DAEMON_PIDS[d]) _start_daemon(DAEMON_PIDS[d]) _audit_daemons() restore_instances() running = True while True: time.sleep(5) try: wpid, _ = os.waitpid(-1, os.WNOHANG) while wpid != 0: LOG.warning('%s exited (pid %d)' % (DAEMON_PIDS.get(wpid, 'unknown'), wpid)) if wpid in DAEMON_PIDS: del DAEMON_PIDS[wpid] wpid, _ = os.waitpid(-1, os.WNOHANG) except ChildProcessError: # We get this if there are no child processes pass n = Node.from_db(config.NODE_NAME) if n.state.value not in [Node.STATE_STOPPING, Node.STATE_STOPPED]: _audit_daemons() Node.observe_this_node() elif len(DAEMON_PIDS) == 0: n.state = Node.STATE_STOPPED return else: if running: for pid in DAEMON_PIDS: try: os.kill(pid, signal.SIGTERM) LOG.info('Sent SIGTERM to %s (pid %s)' % (DAEMON_PIDS.get(pid, 'unknown'), pid)) except OSError as e: LOG.warn('Failed to send SIGTERM to %s: %s' % (pid, e)) running = False
def transcode_image(self, lock, b): # NOTE(mikal): it is assumed the caller holds a lock on the artifact, and passes # it in lock. # If this blob uuid is not the most recent index for the artifact, set that if self.__artifact.most_recent_index.get('blob_uuid') != b.uuid: self.__artifact.add_index(b.uuid) # Transcode if required, placing the transcoded file in a well known location. os.makedirs(os.path.join(config.STORAGE_PATH, 'image_cache'), exist_ok=True) cached = util_general.file_permutation_exists( os.path.join(config.STORAGE_PATH, 'image_cache', b.uuid), ['iso', 'qcow2']) if cached: # We touch the file here, because we want to know when it was last used. pathlib.Path(cached).touch(exist_ok=True) else: blob_path = os.path.join(config.STORAGE_PATH, 'blobs', b.uuid) mimetype = b.info.get('mime-type', '') if mimetype in [ 'application/x-cd-image', 'application/x-iso9660-image' ]: cache_path = os.path.join(config.STORAGE_PATH, 'image_cache', b.uuid + '.iso') util_general.link(blob_path, cache_path) else: if mimetype == 'application/gzip': cache_path = os.path.join(config.STORAGE_PATH, 'image_cache', b.uuid) with util_general.RecordedOperation( 'decompress image', self.instance): util_process.execute([lock], 'gunzip -k -q -c %s > %s' % (blob_path, cache_path)) blob_path = cache_path cache_path = os.path.join(config.STORAGE_PATH, 'image_cache', b.uuid + '.qcow2') cache_info = util_image.identify(blob_path) # Convert the cluster size from qemu format to an int cluster_size_as_int = QCOW2_CLUSTER_SIZE if cluster_size_as_int.endswith('M'): cluster_size_as_int = int(cluster_size_as_int[:-1]) * MiB elif cluster_size_as_int.endswith('K'): cluster_size_as_int = int(cluster_size_as_int[:-1]) * KiB else: cluster_size_as_int = int(cluster_size_as_int) if (cache_info.get('file format', '') == 'qcow2' and cache_info.get('cluster_size', 0) == cluster_size_as_int): util_general.link(blob_path, cache_path) else: with util_general.RecordedOperation( 'transcode image', self.instance): self.log.with_object(b).info('Transcoding %s -> %s' % (blob_path, cache_path)) util_image.create_qcow2([lock], blob_path, cache_path) shutil.chown(cache_path, config.LIBVIRT_USER, config.LIBVIRT_GROUP) self.log.with_fields( util_general.stat_log_fields(cache_path)).info( 'Cache file %s created' % cache_path) self.__artifact.state = Artifact.STATE_CREATED
def _create_common(self): # The floating network does not have a vxlan mesh if self.uuid == 'floating': return subst = self.subst_dict() if not util_network.check_for_interface(subst['vx_interface']): with util_general.RecordedOperation('create vxlan interface', self): util_network.create_interface( subst['vx_interface'], 'vxlan', 'id %(vx_id)s dev %(mesh_interface)s dstport 0' % subst) util_process.execute(None, 'sysctl -w net.ipv4.conf.' '%(vx_interface)s.arp_notify=1' % subst) if not util_network.check_for_interface(subst['vx_bridge']): with util_general.RecordedOperation('create vxlan bridge', self): util_network.create_interface(subst['vx_bridge'], 'bridge', '') util_process.execute(None, 'ip link set %(vx_interface)s ' 'master %(vx_bridge)s' % subst) util_process.execute( None, 'ip link set %(vx_interface)s up' % subst) util_process.execute( None, 'ip link set %(vx_bridge)s up' % subst) util_process.execute(None, 'sysctl -w net.ipv4.conf.' '%(vx_bridge)s.arp_notify=1' % subst) util_process.execute( None, 'brctl setfd %(vx_bridge)s 0' % subst) util_process.execute( None, 'brctl stp %(vx_bridge)s off' % subst) util_process.execute( None, 'brctl setageing %(vx_bridge)s 0' % subst)