def _find_most_matching_images(self, requested_images, candidates): candidates_image_matches = {} for node in candidates: candidates_image_matches[node] = 0 present_images = [] for inst in list(db.get_instances(only_node=node)): if inst['block_devices']: for disk in inst['block_devices']['devices']: if (disk.get('base') and not disk.get('base') in present_images): present_images.append(disk.get('base')) for image in present_images: if image in requested_images: candidates_image_matches[node] += 1 candidates_by_image_matches = {} for node in candidates: matches = candidates_image_matches[node] candidates_by_image_matches.setdefault(matches, []) candidates_by_image_matches[matches].append(node) if len(candidates_by_image_matches) == 0: return candidates max_matches = max(candidates_by_image_matches.keys()) return candidates_by_image_matches[max_matches]
def delete(self, instance_uuid=None, instance_from_db_virt=None): db.add_event('instance', instance_uuid, 'API DELETE', None, None, None) instance_networks = [] for iface in list(db.get_instance_interfaces(instance_uuid)): if not iface['network_uuid'] in instance_networks: instance_networks.append(iface['network_uuid']) host_networks = [] for inst in list(db.get_instances(local_only=True)): if not inst['uuid'] == instance_uuid: for iface in db.get_instance_interfaces(inst['uuid']): if not iface['network_uuid'] in host_networks: host_networks.append(iface['network_uuid']) instance_from_db_virt.delete() for network in instance_networks: n = net.from_db(network) if n: if network in host_networks: with util.RecordedOperation('deallocate ip address', instance_from_db_virt) as _: n.update_dhcp() else: with util.RecordedOperation('remove network', n) as _: n.delete()
def _find_most_matching_networks(self, requested_networks, candidates): candidates_network_matches = {} for node in candidates: candidates_network_matches[node] = 0 present_networks = [] for inst in list(db.get_instances(only_node=node)): for iface in db.get_instance_interfaces(inst['uuid']): if not iface['network_uuid'] in present_networks: present_networks.append(iface['network_uuid']) for network in present_networks: if network in requested_networks: candidates_network_matches[node] += 1 candidates_by_network_matches = {} for node in candidates: matches = candidates_network_matches[node] candidates_by_network_matches.setdefault(matches, []) candidates_by_network_matches[matches].append(node) if len(candidates_by_network_matches) == 0: return candidates max_matches = max(candidates_by_network_matches.keys()) return candidates_by_network_matches[max_matches]
def restore_instances(): # Ensure all instances for this node are defined networks = [] instances = [] for inst in list( db.get_instances(only_node=config.parsed.get('NODE_NAME'))): for iface in db.get_instance_interfaces(inst['uuid']): if not iface['network_uuid'] in networks: networks.append(iface['network_uuid']) instances.append(inst['uuid']) with util.RecordedOperation('restore networks', None) as _: for network in networks: try: n = net.from_db(network) LOG.info('%s Restoring network' % n) n.create() n.ensure_mesh() n.update_dhcp() except Exception as e: LOG.error('%s Failed to restore network: %s' % (n, e)) with util.RecordedOperation('restore instances', None) as _: for instance in instances: try: i = virt.from_db(instance) LOG.info('%s Restoring instance' % i) i.create() except Exception as e: LOG.error('%s Failed to restore instance: %s' % (i, e)) db.update_instance_state(instance, 'error')
def delete(self, namespace): if not namespace: return error(400, 'no namespace specified') if namespace == 'system': return error(403, 'you cannot delete the system namespace') # The namespace must be empty instances = [] deleted_instances = [] for i in db.get_instances(all=True, namespace=namespace): if i['state'] in ['deleted', 'error']: deleted_instances.append(i['uuid']) else: instances.append(i['uuid']) if len(instances) > 0: return error(400, 'you cannot delete a namespace with instances') networks = [] deleted_networks = [] for n in db.get_networks(all=True, namespace=namespace): if n['state'] in ['deleted', 'error']: deleted_networks.append(n['uuid']) else: networks.append(n['uuid']) if len(networks) > 0: return error(400, 'you cannot delete a namespace with networks') db.delete_namespace(namespace) db.delete_metadata('namespace', namespace)
def run(self): logutil.info(None, 'Starting') observers = {} while True: # Cleanup terminated observers all_observers = list(observers.keys()) for instance_uuid in all_observers: if not observers[instance_uuid].is_alive(): # Reap process observers[instance_uuid].join(1) logutil.info([virt.ThinInstance(instance_uuid)], 'Trigger observer has terminated') db.add_event('instance', instance_uuid, 'trigger monitor', 'crashed', None, None) del observers[instance_uuid] # Start missing observers extra_instances = list(observers.keys()) for inst in db.get_instances( only_node=config.parsed.get('NODE_NAME')): if inst['uuid'] in extra_instances: extra_instances.remove(inst['uuid']) if inst['state'] != 'created': continue if inst['uuid'] not in observers: console_path = os.path.join( config.parsed.get('STORAGE_PATH'), 'instances', inst['uuid'], 'console.log') p = multiprocessing.Process( target=observe, args=(console_path, inst['uuid']), name='%s-%s' % (daemon.process_name('triggers'), inst['uuid'])) p.start() observers[inst['uuid']] = p logutil.info([virt.ThinInstance(inst['uuid'])], 'Started trigger observer') db.add_event('instance', inst['uuid'], 'trigger monitor', 'started', None, None) # Cleanup extra observers for instance_uuid in extra_instances: p = observers[instance_uuid] try: os.kill(p.pid, signal.SIGKILL) except Exception: pass del observers[instance_uuid] logutil.info([virt.ThinInstance(instance_uuid)], 'Finished trigger observer') db.add_event('instance', instance_uuid, 'trigger monitor', 'finished', None, None) time.sleep(1)
def delete(self, confirm=False, namespace=None): """Delete all instances in the namespace.""" if confirm is not True: return error(400, 'parameter confirm is not set true') if get_jwt_identity() == 'system': if not isinstance(namespace, str): # A client using a system key must specify the namespace. This # ensures that deleting all instances in the cluster (by # specifying namespace='system') is a deliberate act. return error(400, 'system user must specify parameter namespace') else: if namespace and namespace != get_jwt_identity(): return error(401, 'you cannot delete other namespaces') namespace = get_jwt_identity() instances_del = [] tasks_by_node = {} for instance in list(db.get_instances(all=all, namespace=namespace)): if instance['state'] in ['deleted', 'error']: continue # If this instance is not on a node, just do the DB cleanup locally if not instance['node']: node = config.parsed.get('NODE_NAME') else: node = instance['node'] tasks_by_node.setdefault(node, []) tasks_by_node[node].append({ 'type': 'instance_delete', 'instance_uuid': instance['uuid'], 'next_state': 'deleted', 'next_state_message': None }) instances_del.append(instance['uuid']) for node in tasks_by_node: db.enqueue(node, {'tasks': tasks_by_node[node]}) waiting_for = copy.copy(instances_del) start_time = time.time() while ( waiting_for and (time.time() - start_time < config.parsed.get('API_ASYNC_WAIT'))): for instance_uuid in copy.copy(waiting_for): i = db.get_instance(instance_uuid) if i['state'] in ['deleted', 'error']: waiting_for.remove(instance_uuid) return instances_del
def run(self): observers = {} while True: # Cleanup terminated observers all_observers = list(observers.keys()) for instance_uuid in all_observers: if not observers[instance_uuid].is_alive(): LOG.info( 'Trigger observer for instance %s has terminated' % instance_uuid) del observers[instance_uuid] # Start missing observers extra_instances = list(observers.keys()) for inst in list( db.get_instances( only_node=config.parsed.get('NODE_NAME'))): if inst['uuid'] in extra_instances: extra_instances.remove(inst['uuid']) if inst['state'] != 'created': continue if inst['uuid'] not in observers: console_path = os.path.join( config.parsed.get('STORAGE_PATH'), 'instances', inst['uuid'], 'console.log') p = multiprocessing.Process( target=observe, args=(console_path, inst['uuid']), name='sf trigger %s' % inst['uuid']) p.start() observers[inst['uuid']] = p LOG.info('Started trigger observer for instance %s' % inst['uuid']) # Cleanup extra observers for instance_uuid in extra_instances: p = observers[instance_uuid] try: os.kill(p.pid, signal.SIGKILL) except Exception: pass del observers[instance_uuid] LOG.info('Finished trigger observer for instance %s' % instance_uuid) time.sleep(1)
def run(self): while True: time.sleep(30) # We do not reap unused networks from the network node, as they might be # in use for instances on other hypervisor nodes. if config.parsed.get('NODE_IP') != config.parsed.get('NETWORK_NODE_IP'): host_networks = [] for inst in list(db.get_instances(only_node=config.parsed.get('NODE_NAME'))): for iface in db.get_instance_interfaces(inst['uuid']): if not iface['network_uuid'] in host_networks: host_networks.append(iface['network_uuid']) for network in host_networks: n = net.from_db(network) n.ensure_mesh()
def restore_instances(): # Ensure all instances for this node are defined networks = [] instances = [] for inst in list( db.get_instances(only_node=config.parsed.get('NODE_NAME'))): for iface in db.get_instance_interfaces(inst['uuid']): if not iface['network_uuid'] in networks: networks.append(iface['network_uuid']) instances.append(inst['uuid']) with util.RecordedOperation('restore networks', None): for network in networks: try: n = net.from_db(network) LOG.withObj(n).info('Restoring network') n.create() n.ensure_mesh() n.update_dhcp() except Exception as e: util.ignore_exception('restore network %s' % network, e) with util.RecordedOperation('restore instances', None): for instance in instances: try: with db.get_lock('instance', None, instance, ttl=120, timeout=120, op='Instance restore'): i = virt.from_db(instance) if not i: continue started = ['on', 'transition-to-on', 'initial', 'unknown'] if i.db_entry.get('power_state', 'unknown') not in started: continue LOG.withObj(i).info('Restoring instance') i.create() except Exception as e: util.ignore_exception('restore instance %s' % instance, e) db.enqueue_instance_error( instance, 'exception while restoring instance on daemon restart')
def _find_most_matching_networks(self, requested_networks, candidates): if not candidates: return [] # Find number of matching networks on each node candidates_network_matches = {} for node in candidates: candidates_network_matches[node] = 0 # Make a list of networks for the node present_networks = [] for inst in list(db.get_instances(only_node=node)): for iface in db.get_instance_interfaces(inst['uuid']): if not iface['network_uuid'] in present_networks: present_networks.append(iface['network_uuid']) # Count the requested networks present on this node for network in present_networks: if network in requested_networks: candidates_network_matches[node] += 1 # Store candidate nodes keyed by number of matches candidates_by_network_matches = {} for node in candidates: matches = candidates_network_matches[node] candidates_by_network_matches.setdefault(matches, []).append(node) # Find maximum matches of networks on a node max_matches = max(candidates_by_network_matches.keys()) # Check that the maximum is not just the network node. # (Network node always has every network.) net_node = db.get_network_node()['fqdn'] if (max_matches == 1 and candidates_by_network_matches[max_matches][0] == net_node): # No preference, all candidates are a reasonable choice return candidates # Return list of candidates that has maximum networks return candidates_by_network_matches[max_matches]
def delete(self, instance_uuid=None, instance_from_db=None, instance_from_db_virt=None): # Check if instance has already been deleted if instance_from_db['state'] == 'deleted': return error(404, 'instance not found') with db.get_lock('/sf/instance/%s' % instance_uuid) as _: db.add_event('instance', instance_uuid, 'api', 'delete', None, None) instance_networks = [] for iface in list(db.get_instance_interfaces(instance_uuid)): if not iface['network_uuid'] in instance_networks: instance_networks.append(iface['network_uuid']) db.update_network_interface_state(iface['uuid'], 'deleted') host_networks = [] for inst in list( db.get_instances( only_node=config.parsed.get('NODE_NAME'))): if not inst['uuid'] == instance_uuid: for iface in db.get_instance_interfaces(inst['uuid']): if not iface['network_uuid'] in host_networks: host_networks.append(iface['network_uuid']) instance_from_db_virt.delete() for network in instance_networks: n = net.from_db(network) if n: if network in host_networks: with util.RecordedOperation( 'deallocate ip address', instance_from_db_virt) as _: n.update_dhcp() else: with util.RecordedOperation('remove network', n) as _: n.delete()
def instance_delete(instance_uuid): with db.get_lock('instance', None, instance_uuid, timeout=120, op='Instance delete'): db.add_event('instance', instance_uuid, 'queued', 'delete', None, None) # Create list of networks used by instance instance_networks = [] for iface in list(db.get_instance_interfaces(instance_uuid)): if not iface['network_uuid'] in instance_networks: instance_networks.append(iface['network_uuid']) # Create list of networks used by all other instances host_networks = [] for inst in list( db.get_instances(only_node=config.parsed.get('NODE_NAME'))): if not inst['uuid'] == instance_uuid: for iface in db.get_instance_interfaces(inst['uuid']): if not iface['network_uuid'] in host_networks: host_networks.append(iface['network_uuid']) instance_from_db_virt = virt.from_db(instance_uuid) if instance_from_db_virt: instance_from_db_virt.delete() # Check each network used by the deleted instance for network in instance_networks: n = net.from_db(network) if n: # If network used by another instance, only update if network in host_networks: with util.RecordedOperation('deallocate ip address', instance_from_db_virt): n.update_dhcp() else: # Network not used by any other instance therefore delete with util.RecordedOperation('remove network', n): n.delete()
def restore_instances(): # Ensure all instances for this node are defined networks = [] instances = [] for inst in list(db.get_instances(local_only=True)): for iface in db.get_instance_interfaces(inst['uuid']): if not iface['network_uuid'] in networks: networks.append(iface['network_uuid']) instances.append(inst['uuid']) with util.RecordedOperation('restore networks', None) as _: for network in networks: LOG.info('Restoring network %s' % network) n = net.from_db(network) n.create() n.ensure_mesh() n.update_dhcp() with util.RecordedOperation('restore instances', None) as _: for instance in instances: LOG.info('Restoring instance %s' % instance) i = virt.from_db(instance) i.create()
def get(self): return list(db.get_instances())
def get(self, all=False): return list(db.get_instances(all=all, namespace=get_jwt_identity()))
def _maintain_networks(self): LOG.info('Maintaining networks') # Discover what networks are present _, _, vxid_to_mac = util.discover_interfaces() # Determine what networks we should be on host_networks = [] seen_vxids = [] if not util.is_network_node(): # For normal nodes, just the ones we have instances for for inst in list(db.get_instances(only_node=config.parsed.get('NODE_NAME'))): for iface in db.get_instance_interfaces(inst['uuid']): if not iface['network_uuid'] in host_networks: host_networks.append(iface['network_uuid']) else: # For network nodes, its all networks for n in db.get_networks(): host_networks.append(n['uuid']) # Network nodes also look for interfaces for absent instances # and delete them for ni in db.get_network_interfaces(n['uuid']): inst = db.get_instance(ni['instance_uuid']) if (not inst or inst.get('state', 'unknown') in ['deleted', 'error', 'unknown']): db.hard_delete_network_interface(ni['uuid']) LOG.withInstance( ni['instance_uuid']).withNetworkInterface( ni['uuid']).info('Hard deleted stray network interface') # Ensure we are on every network we have a host for for network in host_networks: try: n = net.from_db(network) if not n: continue if n.db_entry['state_updated'] - time.time() < 60: # Network state changed in the last minute, punt for now continue if not n.is_okay(): LOG.withObj(n).info('Recreating not okay network') n.create() n.ensure_mesh() seen_vxids.append(n.vxlan_id) except exceptions.LockException as e: LOG.warning( 'Failed to acquire lock while maintaining networks: %s' % e) # Determine if there are any extra vxids extra_vxids = set(vxid_to_mac.keys()) - set(seen_vxids) # Delete "deleted" SF networks and log unknown vxlans if extra_vxids: LOG.withField('vxids', extra_vxids).warning( 'Extra vxlans present!') # Determine the network uuids for those vxids # vxid_to_uuid = {} # for n in db.get_networks(): # vxid_to_uuid[n['vxid']] = n['uuid'] # for extra in extra_vxids: # if extra in vxid_to_uuid: # with db.get_lock('network', None, vxid_to_uuid[extra], # ttl=120, op='Network reap VXLAN'): # n = net.from_db(vxid_to_uuid[extra]) # n.delete() # LOG.info('Extra vxlan %s (network %s) removed.' # % (extra, vxid_to_uuid[extra])) # else: # LOG.error('Extra vxlan %s does not map to any network.' # % extra) # And record vxids in the database db.persist_node_vxid_mapping( config.parsed.get('NODE_NAME'), vxid_to_mac)