def image_fetch(url, instance_uuid): instance = None if instance_uuid: instance = virt.from_db(instance_uuid) try: # TODO(andy): Wait up to 15 mins for another queue process to download # the required image. This will be changed to queue on a # "waiting_image_fetch" queue but this works now. with db.get_lock('image', config.NODE_NAME, Image.calc_unique_ref(url), timeout=15 * 60, op='Image fetch') as lock: img = Image.from_url(url) img.get([lock], instance) db.add_event('image', url, 'fetch', None, None, 'success') except (exceptions.HTTPError, requests.exceptions.RequestException) as e: LOG.withField('image', url).info('Failed to fetch image') if instance_uuid: db.enqueue_instance_error(instance_uuid, 'Image fetch failed: %s' % e) # Clean common problems to store in events msg = str(e) re_conn_err = re.compile(r'.*NewConnectionError\(\'\<.*\>: (.*)\'') m = re_conn_err.match(msg) if m: msg = m.group(1) db.add_event('image', url, 'fetch', None, None, 'Error: ' + msg) raise exceptions.ImageFetchTaskFailedException( 'Failed to fetch image %s' % url)
def post(self, interface_uuid=None): ni = db.get_interface(interface_uuid) if not ni: return error(404, 'network interface not found') if not ni['floating']: return error(409, 'this interface does not have a floating ip') n = net.from_db(ni['network_uuid']) if not n: LOG.info('network(%s): network not found, genuinely missing' % ni['network_uuid']) return error(404, 'network not found') if get_jwt_identity() not in [n.namespace, 'system']: LOG.info('%s: network not found, ownership test' % n) return error(404, 'network not found') i = virt.from_db(ni['instance_uuid']) if get_jwt_identity() not in [i.db_entry['namespace'], 'system']: LOG.info('%s: instance not found, ownership test' % i) return error(404, 'instance not found') float_net = net.from_db('floating') if not float_net: return error(404, 'floating network not found') db.add_event('interface', interface_uuid, 'api', 'defloat', None, None) with db.get_lock('sf/ipmanager/floating', ttl=120) as _: ipm = db.get_ipmanager('floating') ipm.release(ni['floating']) db.persist_ipmanager('floating', ipm.save()) db.remove_floating_from_interface(ni['uuid']) n.remove_floating_ip(ni['floating'], ni['ipv4'])
def restore_instances(): # Ensure all instances for this node are defined networks = [] instances = [] for inst in list( db.get_instances(only_node=config.parsed.get('NODE_NAME'))): for iface in db.get_instance_interfaces(inst['uuid']): if not iface['network_uuid'] in networks: networks.append(iface['network_uuid']) instances.append(inst['uuid']) with util.RecordedOperation('restore networks', None) as _: for network in networks: try: n = net.from_db(network) LOG.info('%s Restoring network' % n) n.create() n.ensure_mesh() n.update_dhcp() except Exception as e: LOG.error('%s Failed to restore network: %s' % (n, e)) with util.RecordedOperation('restore instances', None) as _: for instance in instances: try: i = virt.from_db(instance) LOG.info('%s Restoring instance' % i) i.create() except Exception as e: LOG.error('%s Failed to restore instance: %s' % (i, e)) db.update_instance_state(instance, 'error')
def instance_preflight(instance_uuid, network): db.update_instance_state(instance_uuid, 'preflight') s = scheduler.Scheduler() instance = virt.from_db(instance_uuid) try: s.place_instance(instance, network, candidates=[config.NODE_NAME]) return None except exceptions.LowResourceException as e: db.add_event('instance', instance_uuid, 'schedule', 'retry', None, 'insufficient resources: ' + str(e)) if instance.db_entry.get('placement_attempts') > 3: raise exceptions.AbortInstanceStartException('Too many start attempts') try: if instance.db_entry.get('requested_placement'): candidates = [instance.db_entry.get('requested_placement')] else: candidates = [] for node in s.metrics.keys(): if node != config.NODE_NAME: candidates.append(node) candidates = s.place_instance(instance, network, candidates=candidates) return candidates[0] except exceptions.LowResourceException as e: db.add_event('instance', instance_uuid, 'schedule', 'failed', None, 'insufficient resources: ' + str(e)) # This raise implies delete above raise exceptions.AbortInstanceStartException( 'Unable to find suitable node')
def _safe_get_network_interface(interface_uuid): ni = db.get_interface(interface_uuid) if not ni: return None, None, error(404, 'interface not found') n = net.from_db(ni['network_uuid']) if not n: logutil.info([ net.ThinNetwork(ni['network_uuid']), net.ThinNetworkInterface(ni['uuid']) ], 'Network not found or deleted') return None, None, error(404, 'interface network not found') if get_jwt_identity() not in [n.namespace, 'system']: logutil.info([n, net.ThinNetworkInterface(ni['uuid'])], 'Interface not found, ownership test') return None, None, error(404, 'interface not found') i = virt.from_db(ni['instance_uuid']) if get_jwt_identity() not in [i.db_entry['namespace'], 'system']: logutil.info([n, i, net.ThinNetworkInterface(ni['uuid'])], 'Instance not found, ownership test') return None, None, error(404, 'interface not found') return ni, n, None
def instance_start(instance_uuid, network): log = LOG.withField('instance', instance_uuid) with db.get_lock('instance', None, instance_uuid, ttl=900, timeout=120, op='Instance start') as lock: instance = virt.from_db(instance_uuid) # Collect the networks nets = {} for netdesc in network: if netdesc['network_uuid'] not in nets: n = net.from_db(netdesc['network_uuid']) if not n: db.enqueue_instance_error(instance_uuid, 'missing network') return nets[netdesc['network_uuid']] = n # Create the networks with util.RecordedOperation('ensure networks exist', instance): for network_uuid in nets: n = nets[network_uuid] try: n.create() n.ensure_mesh() n.update_dhcp() except exceptions.DeadNetwork as e: log.withField( 'network', n).warning('Instance tried to use dead network') db.enqueue_instance_error( instance_uuid, 'tried to use dead network: %s' % e) return # Allocate console and VDI ports instance.allocate_instance_ports() # Now we can start the instance libvirt = util.get_libvirt() try: with util.RecordedOperation('instance creation', instance): instance.create(lock=lock) except libvirt.libvirtError as e: code = e.get_error_code() if code in (libvirt.VIR_ERR_CONFIG_UNSUPPORTED, libvirt.VIR_ERR_XML_ERROR): db.enqueue_instance_error(instance_uuid, 'instance failed to start: %s' % e) return for iface in db.get_instance_interfaces(instance_uuid): db.update_network_interface_state(iface['uuid'], 'created')
def wrapper(*args, **kwargs): if 'instance_uuid' in kwargs: kwargs['instance_from_db_virt'] = virt.from_db( kwargs['instance_uuid'] ) if not kwargs.get('instance_from_db_virt'): return error(404, 'instance not found') return func(*args, **kwargs)
def wrapper(*args, **kwargs): if 'instance_uuid' in kwargs: kwargs['instance_from_db_virt'] = virt.from_db( kwargs['instance_uuid']) if not kwargs.get('instance_from_db_virt'): LOG.info('instance(%s): instance not found, genuinely missing' % kwargs.get('instance_uuid')) return error(404, 'instance not found') return func(*args, **kwargs)
def image_fetch(url, instance_uuid): try: instance = None if instance_uuid: instance = virt.from_db(instance_uuid) img = images.Image(url) img.get([], instance) except exceptions.LockException: pass
def wrapper(*args, **kwargs): if 'instance_uuid' in kwargs: kwargs['instance_from_db_virt'] = virt.from_db( kwargs['instance_uuid']) if not kwargs.get('instance_from_db_virt'): logutil.info([virt.ThinInstance(kwargs['instance_uuid'])], 'Instance not found, genuinely missing') return error(404, 'instance not found') return func(*args, **kwargs)
def restore_instances(): # Ensure all instances for this node are defined networks = [] instances = [] for inst in list( db.get_instances(only_node=config.parsed.get('NODE_NAME'))): for iface in db.get_instance_interfaces(inst['uuid']): if not iface['network_uuid'] in networks: networks.append(iface['network_uuid']) instances.append(inst['uuid']) with util.RecordedOperation('restore networks', None): for network in networks: try: n = net.from_db(network) LOG.withObj(n).info('Restoring network') n.create() n.ensure_mesh() n.update_dhcp() except Exception as e: util.ignore_exception('restore network %s' % network, e) with util.RecordedOperation('restore instances', None): for instance in instances: try: with db.get_lock('instance', None, instance, ttl=120, timeout=120, op='Instance restore'): i = virt.from_db(instance) if not i: continue started = ['on', 'transition-to-on', 'initial', 'unknown'] if i.db_entry.get('power_state', 'unknown') not in started: continue LOG.withObj(i).info('Restoring instance') i.create() except Exception as e: util.ignore_exception('restore instance %s' % instance, e) db.enqueue_instance_error( instance, 'exception while restoring instance on daemon restart')
def instance_start(instance_uuid, network): with db.get_lock('instance', None, instance_uuid, ttl=900) as lock: instance = virt.from_db(instance_uuid) # Collect the networks nets = {} for netdesc in network: if netdesc['network_uuid'] not in nets: n = net.from_db(netdesc['network_uuid']) if not n: db.enqueue_instance_delete(config.parsed.get('NODE_NAME'), instance_uuid, 'error', 'missing network') return nets[netdesc['network_uuid']] = n # Create the networks with util.RecordedOperation('ensure networks exist', instance): for network_uuid in nets: n = nets[network_uuid] n.create() n.ensure_mesh() n.update_dhcp() # Now we can start the isntance libvirt = util.get_libvirt() try: with util.RecordedOperation('instance creation', instance): instance.create(lock=lock) except libvirt.libvirtError as e: code = e.get_error_code() if code in (libvirt.VIR_ERR_CONFIG_UNSUPPORTED, libvirt.VIR_ERR_XML_ERROR): db.enqueue_instance_delete(config.parsed.get('NODE_NAME'), instance_uuid, 'error', 'instance failed to start') return for iface in db.get_instance_interfaces(instance_uuid): db.update_network_interface_state(iface['uuid'], 'created')
def get(self, interface_uuid=None): ni = db.get_interface(interface_uuid) if not ni: return error(404, 'interface not found') n = net.from_db(ni['network_uuid']) if not n: LOG.info('network(%s): network not found, genuinely missing' % ni['network_uuid']) return error(404, 'interface network not found') if get_jwt_identity() not in [n.namespace, 'system']: LOG.info('%s: interface not found, ownership test' % n) return error(404, 'interface not found') i = virt.from_db(ni['instance_uuid']) if get_jwt_identity() not in [i.db_entry['namespace'], 'system']: LOG.info('%s: instance not found, ownership test' % i) return error(404, 'interface not found') return ni
def instance_delete(instance_uuid): with db.get_lock('instance', None, instance_uuid, timeout=120, op='Instance delete'): db.add_event('instance', instance_uuid, 'queued', 'delete', None, None) # Create list of networks used by instance instance_networks = [] for iface in list(db.get_instance_interfaces(instance_uuid)): if not iface['network_uuid'] in instance_networks: instance_networks.append(iface['network_uuid']) # Create list of networks used by all other instances host_networks = [] for inst in list( db.get_instances(only_node=config.parsed.get('NODE_NAME'))): if not inst['uuid'] == instance_uuid: for iface in db.get_instance_interfaces(inst['uuid']): if not iface['network_uuid'] in host_networks: host_networks.append(iface['network_uuid']) instance_from_db_virt = virt.from_db(instance_uuid) if instance_from_db_virt: instance_from_db_virt.delete() # Check each network used by the deleted instance for network in instance_networks: n = net.from_db(network) if n: # If network used by another instance, only update if network in host_networks: with util.RecordedOperation('deallocate ip address', instance_from_db_virt): n.update_dhcp() else: # Network not used by any other instance therefore delete with util.RecordedOperation('remove network', n): n.delete()
def restore_instances(): # Ensure all instances for this node are defined networks = [] instances = [] for inst in list(db.get_instances(local_only=True)): for iface in db.get_instance_interfaces(inst['uuid']): if not iface['network_uuid'] in networks: networks.append(iface['network_uuid']) instances.append(inst['uuid']) with util.RecordedOperation('restore networks', None) as _: for network in networks: LOG.info('Restoring network %s' % network) n = net.from_db(network) n.create() n.ensure_mesh() n.update_dhcp() with util.RecordedOperation('restore instances', None) as _: for instance in instances: LOG.info('Restoring instance %s' % instance) i = virt.from_db(instance) i.create()
def handle(jobname, workitem): j = JobName(jobname) logutil.info([j], 'Processing workitem') setproctitle.setproctitle('%s-%s' % (daemon.process_name('queues'), jobname)) instance_uuid = None task = None try: for task in workitem.get('tasks', []): ro = [j] instance_uuid = task.get('instance_uuid') if instance_uuid: i = virt.from_db(instance_uuid) ro.append(i) if task.get('type').startswith('instance_') and not instance_uuid: logutil.error(ro, 'Instance task lacks instance uuid') return if instance_uuid: db.add_event('instance', instance_uuid, task.get('type').replace('_', ' '), 'dequeued', None, 'Work item %s' % jobname) logutil.info( ro, 'Executing task %s: %s' % (task.get('type', 'unknown'), task)) if task.get('type') == 'image_fetch': image_fetch(task.get('url'), instance_uuid) if task.get('type') == 'instance_preflight': redirect_to = instance_preflight(instance_uuid, task.get('network')) if redirect_to: util.log('info', ro, 'Redirecting instance start to %s' % redirect_to) db.place_instance(instance_uuid, redirect_to) db.enqueue(redirect_to, workitem) return if task.get('type') == 'instance_start': instance_start(instance_uuid, task.get('network')) db.update_instance_state(instance_uuid, 'created') db.enqueue('%s-metrics' % config.parsed.get('NODE_NAME'), {}) if task.get('type') == 'instance_delete': try: instance_delete(instance_uuid) db.update_instance_state(instance_uuid, task.get('next_state', 'unknown')) if task.get('next_state_message'): db.update_instance_error_message( instance_uuid, task.get('next_state_message')) db.enqueue('%s-metrics' % config.parsed.get('NODE_NAME'), {}) except Exception as e: util.ignore_exception(daemon.process_name('queues'), e) except Exception as e: if instance_uuid: util.ignore_exception(daemon.process_name('queues'), e) db.enqueue_instance_delete(config.parsed.get('NODE_NAME'), instance_uuid, 'error', 'failed queue task: %s' % e) finally: db.resolve(config.parsed.get('NODE_NAME'), jobname) if instance_uuid: db.add_event('instance', instance_uuid, 'tasks complete', 'dequeued', None, 'Work item %s' % jobname) logutil.info([j], 'Completed workitem')
def post(self, name=None, cpus=None, memory=None, network=None, disk=None, ssh_key=None, user_data=None, placed_on=None, namespace=None, instance_uuid=None): global SCHEDULER # We need to sanitise the name so its safe for DNS name = re.sub(r'([^a-zA-Z0-9_\-])', '', name) if not namespace: namespace = get_jwt_identity() # If accessing a foreign namespace, we need to be an admin if get_jwt_identity() not in [namespace, 'system']: return error( 401, 'only admins can create resources in a different namespace') # The instance needs to exist in the DB before network interfaces are created if not instance_uuid: instance_uuid = str(uuid.uuid4()) db.add_event('instance', instance_uuid, 'uuid allocated', None, None, None) # Create instance object instance = virt.from_db(instance_uuid) if instance: if get_jwt_identity() not in [ instance.db_entry['namespace'], 'system' ]: LOG.info('instance(%s): instance not found, ownership test' % instance_uuid) return error(404, 'instance not found') if not instance: instance = virt.from_definition(uuid=instance_uuid, name=name, disks=disk, memory_mb=memory, vcpus=cpus, ssh_key=ssh_key, user_data=user_data, owner=namespace) if not SCHEDULER: SCHEDULER = scheduler.Scheduler() # Have we been placed? if not placed_on: candidates = SCHEDULER.place_instance(instance, network) if len(candidates) == 0: db.add_event('instance', instance_uuid, 'schedule', 'failed', None, 'insufficient resources') db.update_instance_state(instance_uuid, 'error') return error(507, 'insufficient capacity') placed_on = candidates[0] db.place_instance(instance_uuid, placed_on) db.add_event('instance', instance_uuid, 'placement', None, None, placed_on) else: try: candidates = SCHEDULER.place_instance(instance, network, candidates=[placed_on]) if len(candidates) == 0: db.add_event('instance', instance_uuid, 'schedule', 'failed', None, 'insufficient resources') db.update_instance_state(instance_uuid, 'error') return error(507, 'insufficient capacity') except scheduler.CandidateNodeNotFoundException as e: return error(404, 'node not found: %s' % e) # Have we been placed on a different node? if not placed_on == config.parsed.get('NODE_NAME'): body = flask_get_post_body() body['placed_on'] = placed_on body['instance_uuid'] = instance_uuid body['namespace'] = namespace token = util.get_api_token( 'http://%s:%d' % (placed_on, config.parsed.get('API_PORT')), namespace=namespace) r = requests.request('POST', 'http://%s:%d/instances' % (placed_on, config.parsed.get('API_PORT')), data=json.dumps(body), headers={ 'Authorization': token, 'User-Agent': util.get_user_agent() }) LOG.info('Returning proxied request: %d, %s' % (r.status_code, r.text)) resp = flask.Response(r.text, mimetype='application/json') resp.status_code = r.status_code return resp # Check we can get the required IPs nets = {} allocations = {} def error_with_cleanup(status_code, message): for network_uuid in allocations: n = net.from_db(network_uuid) for addr, _ in allocations[network_uuid]: with db.get_lock('sf/ipmanager/%s' % n.uuid, ttl=120) as _: ipm = db.get_ipmanager(n.uuid) ipm.release(addr) db.persist_ipmanager(n.uuid, ipm.save()) return error(status_code, message) order = 0 if network: for netdesc in network: if 'network_uuid' not in netdesc or not netdesc['network_uuid']: return error_with_cleanup(404, 'network not specified') if netdesc['network_uuid'] not in nets: n = net.from_db(netdesc['network_uuid']) if not n: return error_with_cleanup( 404, 'network %s not found' % netdesc['network_uuid']) nets[netdesc['network_uuid']] = n n.create() with db.get_lock('sf/ipmanager/%s' % netdesc['network_uuid'], ttl=120) as _: db.add_event('network', netdesc['network_uuid'], 'allocate address', None, None, instance_uuid) allocations.setdefault(netdesc['network_uuid'], []) ipm = db.get_ipmanager(netdesc['network_uuid']) if 'address' not in netdesc or not netdesc['address']: netdesc['address'] = ipm.get_random_free_address() else: if not ipm.reserve(netdesc['address']): return error_with_cleanup( 409, 'address %s in use' % netdesc['address']) db.persist_ipmanager(netdesc['network_uuid'], ipm.save()) allocations[netdesc['network_uuid']].append( (netdesc['address'], order)) if 'model' not in netdesc or not netdesc['model']: netdesc['model'] = 'virtio' db.create_network_interface(str(uuid.uuid4()), netdesc, instance_uuid, order) order += 1 # Initialise metadata db.persist_metadata('instance', instance_uuid, {}) # Now we can start the instance with db.get_lock('sf/instance/%s' % instance.db_entry['uuid'], ttl=900) as lock: with util.RecordedOperation('ensure networks exist', instance) as _: for network_uuid in nets: n = nets[network_uuid] n.ensure_mesh() n.update_dhcp() with util.RecordedOperation('instance creation', instance) as _: instance.create(lock=lock) for iface in db.get_instance_interfaces(instance.db_entry['uuid']): db.update_network_interface_state(iface['uuid'], 'created') return db.get_instance(instance_uuid)
def _update_power_states(self): libvirt = util.get_libvirt() conn = libvirt.open(None) try: seen = [] # Active VMs have an ID. Active means running in libvirt # land. for domain_id in conn.listDomainsID(): domain = conn.lookupByID(domain_id) if not domain.name().startswith('sf:'): continue instance_uuid = domain.name().split(':')[1] log_ctx = LOG.withInstance(instance_uuid) instance = db.get_instance(instance_uuid) if not instance: # Instance is SF but not in database. Kill to reduce load. log_ctx.warning('Destroying unknown instance') util.execute(None, 'virsh destroy "sf:%s"' % instance_uuid) continue db.place_instance(instance_uuid, config.NODE_NAME) seen.append(domain.name()) if instance.get('state') == 'deleted': # NOTE(mikal): a delete might be in-flight in the queue. # We only worry about instances which should have gone # away five minutes ago. if time.time() - instance['state_updated'] < 300: continue db.instance_enforced_deletes_increment(instance_uuid) attempts = instance.get('enforced_deletes', 0) if attempts > 5: # Sometimes we just can't delete the VM. Try the big hammer instead. log_ctx.warning( 'Attempting alternate delete method for instance') util.execute(None, 'virsh destroy "sf:%s"' % instance_uuid) db.add_event('instance', instance_uuid, 'enforced delete', 'complete', None, None) else: i = virt.from_db(instance_uuid) i.delete() i.update_instance_state('deleted') log_ctx.withField( 'attempt', attempts).warning('Deleting stray instance') continue state = util.extract_power_state(libvirt, domain) db.update_instance_power_state(instance_uuid, state) if state == 'crashed': db.update_instance_state(instance_uuid, 'error') # Inactive VMs just have a name, and are powered off # in our state system. for domain_name in conn.listDefinedDomains(): if not domain_name.startswith('sf:'): continue if domain_name not in seen: instance_uuid = domain_name.split(':')[1] log_ctx = LOG.withInstance(instance_uuid) instance = db.get_instance(instance_uuid) if not instance: # Instance is SF but not in database. Kill because unknown. log_ctx.warning('Removing unknown inactive instance') domain = conn.lookupByName(domain_name) domain.undefine() continue if instance.get('state') == 'deleted': # NOTE(mikal): a delete might be in-flight in the queue. # We only worry about instances which should have gone # away five minutes ago. if time.time() - instance['state_updated'] < 300: continue domain = conn.lookupByName(domain_name) domain.undefine() log_ctx.info('Detected stray instance') db.add_event('instance', instance_uuid, 'deleted stray', 'complete', None, None) continue db.place_instance(instance_uuid, config.NODE_NAME) instance_path = os.path.join(config.get('STORAGE_PATH'), 'instances', instance_uuid) if not os.path.exists(instance_path): # If we're inactive and our files aren't on disk, # we have a problem. log_ctx.info('Detected error state for instance') db.update_instance_state(instance_uuid, 'error') elif instance.get('power_state') != 'off': log_ctx.info('Detected power off for instance') db.update_instance_power_state(instance_uuid, 'off') db.add_event('instance', instance_uuid, 'detected poweroff', 'complete', None, None) except libvirt.libvirtError as e: LOG.error('Failed to lookup all domains: %s' % e)
def post(self, name=None, cpus=None, memory=None, network=None, disk=None, ssh_key=None, user_data=None, placed_on=None, namespace=None, instance_uuid=None, video=None): global SCHEDULER # Check that the instance name is safe for use as a DNS host name if name != re.sub(r'([^a-zA-Z0-9_\-])', '', name) or len(name) > 63: return error(400, 'instance name must be useable as a DNS host name') # Sanity check if not disk: return error(400, 'instance must specify at least one disk') for d in disk: if not isinstance(d, dict): return error(400, 'disk specification should contain JSON objects') if network: for n in network: if not isinstance(n, dict): return error( 400, 'network specification should contain JSON objects') if 'network_uuid' not in n: return error( 400, 'network specification is missing network_uuid') if not video: video = {'model': 'cirrus', 'memory': 16384} if not namespace: namespace = get_jwt_identity() # Only system can specify a uuid if instance_uuid and get_jwt_identity() != 'system': return error(401, 'only system can specify an instance uuid') # If accessing a foreign namespace, we need to be an admin if get_jwt_identity() not in [namespace, 'system']: return error( 401, 'only admins can create resources in a different namespace') # The instance needs to exist in the DB before network interfaces are created if not instance_uuid: instance_uuid = str(uuid.uuid4()) db.add_event('instance', instance_uuid, 'uuid allocated', None, None, None) # Create instance object instance = virt.from_db(instance_uuid) if instance: if get_jwt_identity() not in [ instance.db_entry['namespace'], 'system' ]: logutil.info([virt.ThinInstance(instance_uuid)], 'Instance not found, ownership test') return error(404, 'instance not found') if not instance: instance = virt.from_definition(uuid=instance_uuid, name=name, disks=disk, memory_mb=memory, vcpus=cpus, ssh_key=ssh_key, user_data=user_data, owner=namespace, video=video, requested_placement=placed_on) # Initialise metadata db.persist_metadata('instance', instance_uuid, {}) # Allocate IP addresses order = 0 if network: for netdesc in network: n = net.from_db(netdesc['network_uuid']) if not n: db.enqueue_instance_delete( config.parsed.get('NODE_NAME'), instance_uuid, 'error', 'missing network %s during IP allocation phase' % netdesc['network_uuid']) return error( 404, 'network %s not found' % netdesc['network_uuid']) with db.get_lock('ipmanager', None, netdesc['network_uuid'], ttl=120): db.add_event('network', netdesc['network_uuid'], 'allocate address', None, None, instance_uuid) ipm = db.get_ipmanager(netdesc['network_uuid']) if 'address' not in netdesc or not netdesc['address']: netdesc['address'] = ipm.get_random_free_address() else: if not ipm.reserve(netdesc['address']): db.enqueue_instance_delete( config.parsed.get('NODE_NAME'), instance_uuid, 'error', 'failed to reserve an IP on network %s' % netdesc['network_uuid']) return error( 409, 'address %s in use' % netdesc['address']) db.persist_ipmanager(netdesc['network_uuid'], ipm.save()) if 'model' not in netdesc or not netdesc['model']: netdesc['model'] = 'virtio' db.create_network_interface(str(uuid.uuid4()), netdesc, instance_uuid, order) if not SCHEDULER: SCHEDULER = scheduler.Scheduler() try: # Have we been placed? if not placed_on: candidates = SCHEDULER.place_instance(instance, network) placement = candidates[0] else: SCHEDULER.place_instance(instance, network, candidates=[placed_on]) placement = placed_on except exceptions.LowResourceException as e: db.add_event('instance', instance_uuid, 'schedule', 'failed', None, 'insufficient resources: ' + str(e)) db.enqueue_instance_delete(config.parsed.get('NODE_NAME'), instance_uuid, 'error', 'scheduling failed') return error(507, str(e)) except exceptions.CandidateNodeNotFoundException as e: db.add_event('instance', instance_uuid, 'schedule', 'failed', None, 'candidate node not found: ' + str(e)) db.enqueue_instance_delete(config.get.parsed('NODE_NAME'), instance_uuid, 'error', 'scheduling failed') return error(404, 'node not found: %s' % e) # Record placement db.place_instance(instance_uuid, placement) db.add_event('instance', instance_uuid, 'placement', None, None, placement) # Create a queue entry for the instance start tasks = [{ 'type': 'instance_preflight', 'instance_uuid': instance_uuid, 'network': network }] for disk in instance.db_entry['block_devices']['devices']: if 'base' in disk and disk['base']: tasks.append({ 'type': 'image_fetch', 'instance_uuid': instance_uuid, 'url': disk['base'] }) tasks.append({ 'type': 'instance_start', 'instance_uuid': instance_uuid, 'network': network }) # Enqueue creation tasks on desired node task queue db.enqueue(placement, {'tasks': tasks}) db.add_event('instance', instance_uuid, 'create', 'enqueued', None, None) # Watch for a while and return results if things are fast, give up # after a while and just return the current state start_time = time.time() while time.time() - start_time < config.parsed.get('API_ASYNC_WAIT'): i = db.get_instance(instance_uuid) if i['state'] in ['created', 'deleted', 'error']: return i time.sleep(0.5) return i