def schedule_run_instance(self, context, instance_id, *_args, **_kwargs): """Picks a host that is up and has the fewest running instances.""" instance_ref = db.instance_get(context, instance_id) if (instance_ref['availability_zone'] and ':' in instance_ref['availability_zone'] and context.is_admin): zone, _x, host = instance_ref['availability_zone'].partition(':') service = db.service_get_by_args(context.elevated(), host, 'nova-compute') if not self.service_is_up(service): raise driver.WillNotSchedule(_("Host %s is not alive") % host) # TODO(vish): this probably belongs in the manager, if we # can generalize this somehow now = datetime.datetime.utcnow() db.instance_update(context, instance_id, {'host': host, 'scheduled_at': now}) return host results = db.service_get_all_compute_sorted(context) for result in results: (service, instance_cores) = result if instance_cores + instance_ref['vcpus'] > FLAGS.max_cores: raise driver.NoValidHost(_("All hosts have too many cores")) if self.service_is_up(service): # NOTE(vish): this probably belongs in the manager, if we # can generalize this somehow now = datetime.datetime.utcnow() db.instance_update(context, instance_id, {'host': service['host'], 'scheduled_at': now}) return service['host'] raise driver.NoValidHost(_("No hosts found"))
def _set_vm_state_and_notify(self, method, updates, context, ex, *args, **kwargs): """changes VM state and notifies""" # FIXME(comstud): Re-factor this somehow. Not sure this belongs in the # scheduler manager like this. We should make this easier. # run_instance only sends a request_spec, and an instance may or may # not have been created in the API (or scheduler) already. If it was # created, there's a 'uuid' set in the instance_properties of the # request_spec. # (littleidea): I refactored this a bit, and I agree # it should be easier :) # The refactoring could go further but trying to minimize changes # for essex timeframe LOG.warning(_("Failed to schedule_%(method)s: %(ex)s") % locals()) vm_state = updates["vm_state"] request_spec = kwargs.get("request_spec", {}) properties = request_spec.get("instance_properties", {}) instance_uuid = properties.get("uuid", {}) if instance_uuid: state = vm_state.upper() LOG.warning(_("Setting instance to %(state)s state."), locals(), instance_uuid=instance_uuid) db.instance_update(context, instance_uuid, updates) payload = dict( request_spec=request_spec, instance_properties=properties, instance_id=instance_uuid, state=vm_state, method=method, reason=ex, ) notifier.notify(context, notifier.publisher_id("scheduler"), "scheduler." + method, notifier.ERROR, payload)
def test_prep_resize_exception_host_in_error_state_and_raise(self): """Test that a NoValidHost exception for prep_resize puts the instance in ACTIVE state """ fake_instance_uuid = 'fake-instance-id' self._mox_schedule_method_helper('schedule_prep_resize') self.mox.StubOutWithMock(db, 'instance_update') request_spec = {'instance_properties': {'uuid': fake_instance_uuid}} self.fake_kwargs['request_spec'] = request_spec self.manager.driver.schedule_prep_resize(self.context, *self.fake_args, **self.fake_kwargs).AndRaise( self.AnException('something happened')) db.instance_update(self.context, fake_instance_uuid, {'vm_state': vm_states.ERROR}) self.mox.ReplayAll() self.assertRaises(self.AnException, self.manager.prep_resize, self.context, self.topic, *self.fake_args, **self.fake_kwargs)
def _set_instance_error(self, method, context, ex, *args, **kwargs): """Sets VM to Error state""" LOG.warning(_("Failed to schedule_%(method)s: %(ex)s") % locals()) # FIXME(comstud): Re-factor this somehow. Not sure this belongs # in the scheduler manager like this. Needs to support more than # run_instance if method != "run_instance": return # FIXME(comstud): We should make this easier. run_instance # only sends a request_spec, and an instance may or may not # have been created in the API (or scheduler) already. If it # was created, there's a 'uuid' set in the instance_properties # of the request_spec. request_spec = kwargs.get('request_spec', {}) properties = request_spec.get('instance_properties', {}) instance_uuid = properties.get('uuid', {}) if instance_uuid: LOG.warning( _("Setting instance %(instance_uuid)s to " "ERROR state.") % locals()) db.instance_update(context, instance_uuid, {'vm_state': vm_states.ERROR}) payload = dict(request_spec=request_spec, instance_properties=properties, instance_id=instance_uuid, state=vm_states.ERROR, method=method, reason=ex) notifier.notify(notifier.publisher_id("scheduler"), 'scheduler.run_instance', notifier.ERROR, payload)
def _set_instance_error(self, method, context, ex, *args, **kwargs): """Sets VM to Error state""" LOG.warning(_("Failed to schedule_%(method)s: %(ex)s") % locals()) # FIXME(comstud): Re-factor this somehow. Not sure this belongs # in the scheduler manager like this. Needs to support more than # run_instance if method != "run_instance": return # FIXME(comstud): We should make this easier. run_instance # only sends a request_spec, and an instance may or may not # have been created in the API (or scheduler) already. If it # was created, there's a 'uuid' set in the instance_properties # of the request_spec. request_spec = kwargs.get('request_spec', {}) properties = request_spec.get('instance_properties', {}) instance_uuid = properties.get('uuid', {}) if instance_uuid: LOG.warning(_("Setting instance %(instance_uuid)s to " "ERROR state.") % locals()) db.instance_update(context, instance_uuid, {'vm_state': vm_states.ERROR}) payload = dict(request_spec=request_spec, instance_properties=properties, instance_id=instance_uuid, state=vm_states.ERROR, method=method, reason=ex) notifier.notify(notifier.publisher_id("scheduler"), 'scheduler.run_instance', notifier.ERROR, payload)
def test_run_instance_exception_puts_instance_in_error_state(self): """Test that a NoValidHost exception for run_instance puts the instance in ERROR state and eats the exception. """ fake_instance_uuid = 'fake-instance-id' # Make sure the method exists that we're going to test call def stub_method(*args, **kwargs): pass setattr(self.manager.driver, 'schedule_run_instance', stub_method) self.mox.StubOutWithMock(self.manager.driver, 'schedule_run_instance') self.mox.StubOutWithMock(db, 'instance_update') request_spec = {'instance_properties': {'uuid': fake_instance_uuid}} self.fake_kwargs['request_spec'] = request_spec self.manager.driver.schedule_run_instance(self.context, *self.fake_args, **self.fake_kwargs).AndRaise( exception.NoValidHost(reason="")) db.instance_update(self.context, fake_instance_uuid, {'vm_state': vm_states.ERROR}) self.mox.ReplayAll() self.manager.run_instance(self.context, self.topic, *self.fake_args, **self.fake_kwargs)
def test_resize_instance_notification(self): """Ensure notifications on instance migrate/resize""" instance_id = self._create_instance() context = self.context.elevated() inst_ref = db.instance_get(context, instance_id) self.compute.run_instance(self.context, instance_id) test_notifier.NOTIFICATIONS = [] db.instance_update(self.context, instance_id, {'host': 'foo'}) self.compute.prep_resize(context, inst_ref['uuid'], 1) migration_ref = db.migration_get_by_instance_and_status(context, inst_ref['uuid'], 'pre-migrating') self.assertEquals(len(test_notifier.NOTIFICATIONS), 1) msg = test_notifier.NOTIFICATIONS[0] self.assertEquals(msg['priority'], 'INFO') self.assertEquals(msg['event_type'], 'compute.instance.resize.prep') payload = msg['payload'] self.assertEquals(payload['tenant_id'], self.project_id) self.assertEquals(payload['user_id'], self.user_id) self.assertEquals(payload['instance_id'], instance_id) self.assertEquals(payload['instance_type'], 'm1.tiny') type_id = instance_types.get_instance_type_by_name('m1.tiny')['id'] self.assertEquals(str(payload['instance_type_id']), str(type_id)) self.assertTrue('display_name' in payload) self.assertTrue('created_at' in payload) self.assertTrue('launched_at' in payload) self.assertEquals(payload['image_ref'], '1') self.compute.terminate_instance(context, instance_id)
def schedule_run_instance(self, context, instance_id, *_args, **_kwargs): """Picks a host that is up and has the fewest running instances.""" instance_ref = db.instance_get(context, instance_id) if (instance_ref['availability_zone'] and ':' in instance_ref['availability_zone'] and context.is_admin): zone, _x, host = instance_ref['availability_zone'].partition(':') service = db.service_get_by_args(context.elevated(), host, 'nova-compute') if not self.service_is_up(service): raise driver.WillNotSchedule(_("Host %s is not alive") % host) # TODO(vish): this probably belongs in the manager, if we # can generalize this somehow now = datetime.datetime.utcnow() db.instance_update(context, instance_id, { 'host': host, 'scheduled_at': now }) return host results = db.service_get_all_compute_sorted(context) for result in results: (service, instance_cores) = result compute_ref = db.service_get_all_compute_by_host( context, service['host'])[0] compute_node_ref = compute_ref['compute_node'][0] if (instance_ref['vcpus'] + instance_cores > compute_node_ref['vcpus'] * FLAGS.max_cores): raise driver.NoValidHost(_("All hosts have too many cores")) LOG.debug( _("requested instance cores = %s + used compute node cores = %s < total compute node cores = %s * max cores = %s" ) % (instance_ref['vcpus'], instance_cores, compute_node_ref['vcpus'], FLAGS.max_cores)) if self.service_is_up(service): # NOTE(vish): this probably belongs in the manager, if we # can generalize this somehow now = datetime.datetime.utcnow() db.instance_update(context, instance_id, { 'host': service['host'], 'scheduled_at': now }) LOG.debug( _("instance = %s scheduled to host = %s") % (instance_id, service['host'])) return service['host'] raise driver.NoValidHost( _("Scheduler was unable to locate a host" " for this request. Is the appropriate" " service running?"))
def test_instance_update_with_instance_id(self): """ test instance_update() works when an instance id is passed """ ctxt = context.get_admin_context() # Create an instance with some metadata values = { 'metadata': { 'host': 'foo' }, 'system_metadata': { 'original_image_ref': 'blah' } } instance = db.instance_create(ctxt, values) # Update the metadata values = { 'metadata': { 'host': 'bar' }, 'system_metadata': { 'original_image_ref': 'baz' } } db.instance_update(ctxt, instance.id, values) # Retrieve the user-provided metadata to ensure it was successfully # updated instance_meta = db.instance_metadata_get(ctxt, instance.id) self.assertEqual('bar', instance_meta['host']) # Retrieve the system metadata to ensure it was successfully updated system_meta = db.instance_system_metadata_get(ctxt, instance.uuid) self.assertEqual('baz', system_meta['original_image_ref'])
def test_live_migration_basic(self): """Test basic schedule_live_migration functionality""" self.mox.StubOutWithMock(db, 'instance_get') self.mox.StubOutWithMock(self.driver, '_live_migration_src_check') self.mox.StubOutWithMock(self.driver, '_live_migration_dest_check') self.mox.StubOutWithMock(self.driver, '_live_migration_common_check') self.mox.StubOutWithMock(db, 'instance_update') self.mox.StubOutWithMock(driver, 'cast_to_compute_host') dest = 'fake_host2' block_migration = False disk_over_commit = False instance = self._live_migration_instance() db.instance_get(self.context, instance['id']).AndReturn(instance) self.driver._live_migration_src_check(self.context, instance) self.driver._live_migration_dest_check(self.context, instance, dest, block_migration, disk_over_commit) self.driver._live_migration_common_check(self.context, instance, dest, block_migration, disk_over_commit) db.instance_update(self.context, instance['id'], {'vm_state': vm_states.MIGRATING}) driver.cast_to_compute_host(self.context, instance['host'], 'live_migration', update_db=False, instance_id=instance['id'], dest=dest, block_migration=block_migration) self.mox.ReplayAll() self.driver.schedule_live_migration(self.context, instance_id=instance['id'], dest=dest, block_migration=block_migration, disk_over_commit=disk_over_commit)
def list_instances_detail(self, context): """Return a list of InstanceInfo for all registered VMs""" LOG.debug("list_instances_detail") info_list = [] bmms = db.bmm_get_all_by_instance_id_not_null(context) for bmm in bmms: instance = db.instance_get(context, bmm["instance_id"]) status = PowerManager(bmm["ipmi_ip"]).status() if status == "off": inst_power_state = power_state.SHUTOFF if instance["vm_state"] == vm_states.ACTIVE: db.instance_update(context, instance["id"], {"vm_state": vm_states.STOPPED}) else: inst_power_state = power_state.RUNNING if instance["vm_state"] == vm_states.STOPPED: db.instance_update(context, instance["id"], {"vm_state": vm_states.ACTIVE}) info_list.append( driver.InstanceInfo( self._instance_id_to_name(bmm["instance_id"]), inst_power_state)) return info_list
def _update_instance(self, context, instance_uuid, body): try: db.instance_update(context, instance_uuid, body, update_cells=True) except Exception: msg = _("Could not update the instance") LOG.debug(msg) raise webob.exc.HTTPNotFound(explanation=msg) return webob.exc.HTTPAccepted()
def _set_instance_error(self, method, context, ex, *args, **kwargs): """Sets VM to Error state""" LOG.warning(_("Failed to schedule_%(method)s: %(ex)s") % locals()) if method == "start_instance" or method == "run_instance": instance_id = kwargs['instance_id'] if instance_id: LOG.warning(_("Setting instance %(instance_id)s to " "ERROR state.") % locals()) db.instance_update(context, instance_id, {'vm_state': vm_states.ERROR})
def cast_to_compute_host(context, host, method, update_db=True, **kwargs): """Cast request to a compute host queue""" if update_db: instance_id = kwargs.get("instance_id", None) if instance_id is not None: now = utils.utcnow() db.instance_update(context, instance_id, {"host": host, "scheduled_at": now}) rpc.cast(context, db.queue_get_for(context, "compute", host), {"method": method, "args": kwargs}) LOG.debug(_("Casted '%(method)s' to compute '%(host)s'") % locals())
def stub_cast_to_cells(context, instance, method, *args, **kwargs): fn = getattr(ORIG_COMPUTE_API, method) original_instance = kwargs.pop('original_instance', None) if original_instance: instance = original_instance # Restore this in 'child cell DB' db.instance_update(context, instance['uuid'], dict(vm_state=instance['vm_state'], task_state=instance['task_state'])) fn(context, instance, *args, **kwargs)
def _set_instance_error(self, method, context, ex, *args, **kwargs): """Sets VM to Error state""" LOG.warning(_("Failed to schedule_%(method)s: %(ex)s") % locals()) if method == "start_instance" or method == "run_instance": instance_id = kwargs['instance_id'] if instance_id: LOG.warning( _("Setting instance %(instance_id)s to " "ERROR state.") % locals()) db.instance_update(context, instance_id, {'vm_state': vm_states.ERROR})
def test_resize_instance(self): """Ensure instance can be migrated/resized""" instance_id = self._create_instance() context = self.context.elevated() self.compute.run_instance(self.context, instance_id) db.instance_update(self.context, instance_id, {'host': 'foo'}) self.compute.prep_resize(context, instance_id, 1) migration_ref = db.migration_get_by_instance_and_status( context, instance_id, 'pre-migrating') self.compute.resize_instance(context, instance_id, migration_ref['id']) self.compute.terminate_instance(context, instance_id)
def test_resize_instance(self): """Ensure instance can be migrated/resized""" instance_id = self._create_instance() context = self.context.elevated() self.compute.run_instance(self.context, instance_id) db.instance_update(self.context, instance_id, {'host': 'foo'}) self.compute.prep_resize(context, instance_id) migration_ref = db.migration_get_by_instance_and_status(context, instance_id, 'pre-migrating') self.compute.resize_instance(context, instance_id, migration_ref['id']) self.compute.terminate_instance(context, instance_id)
def spawn(self, context, instance, network_info=None, block_device_info=None): """ Create a new instance/VM/domain on the virtualization platform. Once this successfully completes, the instance should be running (power_state.RUNNING). If this fails, any partial instance should be completely cleaned up, and the virtualization platform should be in the state that it was before this call began. :param context: security context :param instance: Instance object as returned by DB layer. This function should use the data there to guide the creation of the new instance. :param network_info: :py:meth:`~nova.network.manager.NetworkManager.get_instance_nw_info` :param block_device_info: """ LOG.debug("spawn") instance_zone, cluster_name, vlan_id, create_cluster = self._parse_zone(instance["availability_zone"]) # update instances table bmm, reuse = self._select_machine(context, instance) instance["display_name"] = bmm["name"] instance["availability_zone"] = instance_zone db.instance_update(context, instance["id"], {"display_name": bmm["name"], "availability_zone": instance_zone}) if vlan_id: db.bmm_update(context, bmm["id"], {"availability_zone": cluster_name, "vlan_id": vlan_id, "service_ip": None}) if instance_zone == "resource_pool": self._install_machine(context, instance, bmm, cluster_name, vlan_id) else: self._update_ofc(bmm, cluster_name) if bmm["instance_id"]: db.instance_destroy(context, bmm["instance_id"]) if reuse: db.bmm_update(context, bmm["id"], {"status": "used", "instance_id": instance["id"]}) else: self._install_machine(context, instance, bmm, cluster_name, vlan_id) if instance["key_data"]: self._inject_key(bmm["pxe_ip"], str(instance["key_data"]))
def test_resize_down_fails(self): """Ensure resizing down raises and fails""" context = self.context.elevated() instance_id = self._create_instance() self.compute.run_instance(self.context, instance_id) db.instance_update(self.context, instance_id, {'instance_type': 'm1.xlarge'}) self.assertRaises(exception.ApiError, self.compute_api.resize, context, instance_id, 1) self.compute.terminate_instance(context, instance_id)
def schedule_run_instance(self, context, instance_id, *_args, **_kwargs): """Picks a host that is up and has the fewest running instances.""" instance_ref = db.instance_get(context, instance_id) if (instance_ref['availability_zone'] and ':' in instance_ref['availability_zone'] and context.is_admin): zone, _x, host = instance_ref['availability_zone'].partition(':') service = db.service_get_by_args(context.elevated(), host, 'nova-compute') if not self.service_is_up(service): raise driver.WillNotSchedule(_("Host %s is not alive") % host) # TODO(vish): this probably belongs in the manager, if we # can generalize this somehow now = datetime.datetime.utcnow() db.instance_update(context, instance_id, {'host': host, 'scheduled_at': now}) return host results = db.service_get_all_compute_sorted(context) for result in results: (service, instance_cores) = result compute_ref = db.service_get_all_compute_by_host(context, service['host'])[0] compute_node_ref = compute_ref['compute_node'][0] if (instance_ref['vcpus'] + instance_cores > compute_node_ref['vcpus'] * FLAGS.max_cores): raise driver.NoValidHost(_("All hosts have too many cores")) LOG.debug(_("requested instance cores = %s + used compute node cores = %s < total compute node cores = %s * max cores = %s") % (instance_ref['vcpus'], instance_cores, compute_node_ref['vcpus'], FLAGS.max_cores)) if self.service_is_up(service): # NOTE(vish): this probably belongs in the manager, if we # can generalize this somehow now = datetime.datetime.utcnow() db.instance_update(context, instance_id, {'host': service['host'], 'scheduled_at': now}) LOG.debug(_("instance = %s scheduled to host = %s") % (instance_id, service['host'])) return service['host'] raise driver.NoValidHost(_("Scheduler was unable to locate a host" " for this request. Is the appropriate" " service running?"))
def test_finish_revert_resize(self): """Ensure that the flavor is reverted to the original on revert""" context = self.context.elevated() instance_id = self._create_instance() def fake(*args, **kwargs): pass self.stubs.Set(self.compute.driver, 'finish_migration', fake) self.stubs.Set(self.compute.driver, 'revert_migration', fake) self.stubs.Set(self.compute.network_api, 'get_instance_nw_info', fake) self.compute.run_instance(self.context, instance_id) # Confirm the instance size before the resize starts inst_ref = db.instance_get(context, instance_id) instance_type_ref = db.instance_type_get(context, inst_ref['instance_type_id']) self.assertEqual(instance_type_ref['flavorid'], 1) db.instance_update(self.context, instance_id, {'host': 'foo'}) self.compute.prep_resize(context, inst_ref['uuid'], 3) migration_ref = db.migration_get_by_instance_and_status( context, inst_ref['uuid'], 'pre-migrating') self.compute.resize_instance(context, inst_ref['uuid'], migration_ref['id']) self.compute.finish_resize(context, inst_ref['uuid'], int(migration_ref['id']), {}) # Prove that the instance size is now the new size inst_ref = db.instance_get(context, instance_id) instance_type_ref = db.instance_type_get(context, inst_ref['instance_type_id']) self.assertEqual(instance_type_ref['flavorid'], 3) # Finally, revert and confirm the old flavor has been applied self.compute.revert_resize(context, inst_ref['uuid'], migration_ref['id']) self.compute.finish_revert_resize(context, inst_ref['uuid'], migration_ref['id']) inst_ref = db.instance_get(context, instance_id) instance_type_ref = db.instance_type_get(context, inst_ref['instance_type_id']) self.assertEqual(instance_type_ref['flavorid'], 1) self.compute.terminate_instance(context, instance_id)
def test_finish_revert_resize(self): """Ensure that the flavor is reverted to the original on revert""" context = self.context.elevated() instance_id = self._create_instance() def fake(*args, **kwargs): pass self.stubs.Set(self.compute.driver, 'finish_migration', fake) self.stubs.Set(self.compute.driver, 'revert_migration', fake) self.stubs.Set(self.compute.network_api, 'get_instance_nw_info', fake) self.compute.run_instance(self.context, instance_id) # Confirm the instance size before the resize starts inst_ref = db.instance_get(context, instance_id) instance_type_ref = db.instance_type_get(context, inst_ref['instance_type_id']) self.assertEqual(instance_type_ref['flavorid'], 1) db.instance_update(self.context, instance_id, {'host': 'foo'}) self.compute.prep_resize(context, inst_ref['uuid'], 3) migration_ref = db.migration_get_by_instance_and_status(context, inst_ref['uuid'], 'pre-migrating') self.compute.resize_instance(context, inst_ref['uuid'], migration_ref['id']) self.compute.finish_resize(context, inst_ref['uuid'], int(migration_ref['id']), {}) # Prove that the instance size is now the new size inst_ref = db.instance_get(context, instance_id) instance_type_ref = db.instance_type_get(context, inst_ref['instance_type_id']) self.assertEqual(instance_type_ref['flavorid'], 3) # Finally, revert and confirm the old flavor has been applied self.compute.revert_resize(context, inst_ref['uuid'], migration_ref['id']) self.compute.finish_revert_resize(context, inst_ref['uuid'], migration_ref['id']) inst_ref = db.instance_get(context, instance_id) instance_type_ref = db.instance_type_get(context, inst_ref['instance_type_id']) self.assertEqual(instance_type_ref['flavorid'], 1) self.compute.terminate_instance(context, instance_id)
def test_post_live_migration_working_correctly(self): """Confirm post_live_migration() works as expected correctly.""" dest = 'desthost' flo_addr = '1.2.1.2' # Preparing datas c = context.get_admin_context() instance_id = self._create_instance() i_ref = db.instance_get(c, instance_id) db.instance_update(c, i_ref['id'], { 'state_description': 'migrating', 'state': power_state.PAUSED }) v_ref = db.volume_create(c, {'size': 1, 'instance_id': instance_id}) fix_addr = db.fixed_ip_create(c, { 'address': '1.1.1.1', 'instance_id': instance_id }) fix_ref = db.fixed_ip_get_by_address(c, fix_addr) flo_ref = db.floating_ip_create(c, { 'address': flo_addr, 'fixed_ip_id': fix_ref['id'] }) # reload is necessary before setting mocks i_ref = db.instance_get(c, instance_id) # Preparing mocks self.mox.StubOutWithMock(self.compute.volume_manager, 'remove_compute_volume') for v in i_ref['volumes']: self.compute.volume_manager.remove_compute_volume(c, v['id']) self.mox.StubOutWithMock(self.compute.driver, 'unfilter_instance') self.compute.driver.unfilter_instance(i_ref, []) # executing self.mox.ReplayAll() ret = self.compute.post_live_migration(c, i_ref, dest) # make sure every data is rewritten to dest i_ref = db.instance_get(c, i_ref['id']) c1 = (i_ref['host'] == dest) flo_refs = db.floating_ip_get_all_by_host(c, dest) c2 = (len(flo_refs) != 0 and flo_refs[0]['address'] == flo_addr) # post operaton self.assertTrue(c1 and c2) db.instance_destroy(c, instance_id) db.volume_destroy(c, v_ref['id']) db.floating_ip_destroy(c, flo_addr)
def schedule_live_migration(self, context, instance_id, dest, block_migration=False, disk_over_commit=False): """Live migration scheduling method. :param context: :param instance_id: :param dest: destination host :param block_migration: if true, block_migration. :param disk_over_commit: if True, consider real(not virtual) disk size. :return: The host where instance is running currently. Then scheduler send request that host. """ # Whether instance exists and is running. instance_ref = db.instance_get(context, instance_id) # Checking instance. self._live_migration_src_check(context, instance_ref) # Checking destination host. self._live_migration_dest_check(context, instance_ref, dest, block_migration, disk_over_commit) # Common checking. self._live_migration_common_check(context, instance_ref, dest, block_migration, disk_over_commit) # Changing instance_state. values = {"vm_state": vm_states.MIGRATING} db.instance_update(context, instance_id, values) # Changing volume state for volume_ref in instance_ref['volumes']: db.volume_update(context, volume_ref['id'], {'status': 'migrating'}) src = instance_ref['host'] cast_to_compute_host(context, src, 'live_migration', update_db=False, instance_id=instance_id, dest=dest, block_migration=block_migration)
def init_host(self, host): """Initialize anything that is necessary for the driver to function, including catching up with currently running VM's on the given host.""" context = nova_context.get_admin_context() instances = db.instance_get_all_by_host(context, host) powervm_instances = self.list_instances() # Looks for db instances that don't exist on the host side # and cleanup the inconsistencies. for db_instance in instances: task_state = db_instance["task_state"] if db_instance["name"] in powervm_instances: continue if task_state in [task_states.DELETING, task_states.SPAWNING]: db.instance_update(context, db_instance["uuid"], {"vm_state": vm_states.DELETED, "task_state": None}) db.instance_destroy(context, db_instance["uuid"])
def cast_to_compute_host(context, host, method, update_db=True, **kwargs): """Cast request to a compute host queue""" if update_db: # fall back on the id if the uuid is not present instance_id = kwargs.get('instance_id', None) instance_uuid = kwargs.get('instance_uuid', instance_id) if instance_uuid is not None: now = utils.utcnow() db.instance_update(context, instance_uuid, {'host': host, 'scheduled_at': now}) rpc.cast(context, db.queue_get_for(context, 'compute', host), {"method": method, "args": kwargs}) LOG.debug(_("Casted '%(method)s' to compute '%(host)s'") % locals())
def cast_to_compute_host(context, host, method, update_db=True, **kwargs): """Cast request to a compute host queue""" if update_db: # fall back on the id if the uuid is not present instance_id = kwargs.get('instance_id', None) instance_uuid = kwargs.get('instance_uuid', instance_id) if instance_uuid is not None: now = timeutils.utcnow() db.instance_update(context, instance_uuid, {'host': host, 'scheduled_at': now}) rpc.cast(context, rpc.queue_get_for(context, 'compute', host), {"method": method, "args": kwargs}) LOG.debug(_("Casted '%(method)s' to compute '%(host)s'") % locals())
def db_instance_node_set(context, instance_uuid, node): '''Set the node field of an Instance. :returns: An Instance with the updated fields set properly. ''' values = {'node': node} return db.instance_update(context, instance_uuid, values)
def test_instance_update_with_instance_uuid(self): """ test instance_update() works when an instance UUID is passed """ ctxt = context.get_admin_context() # Create an instance with some metadata metadata = {'host': 'foo'} values = {'metadata': metadata} instance = db.instance_create(ctxt, values) # Update the metadata metadata = {'host': 'bar'} values = {'metadata': metadata} db.instance_update(ctxt, instance.uuid, values) # Retrieve the metadata to ensure it was successfully updated instance_meta = db.instance_metadata_get(ctxt, instance.id) self.assertEqual('bar', instance_meta['host'])
def instance_update_db(context, instance_uuid, host): '''Set the host and scheduled_at fields of an Instance. :returns: An Instance with the updated fields set properly. ''' now = timeutils.utcnow() values = {'host': host, 'scheduled_at': now} return db.instance_update(context, instance_uuid, values)
def instance_update_db(context, instance_uuid, host): """Set the host and scheduled_at fields of an Instance. :returns: An Instance with the updated fields set properly. """ now = timeutils.utcnow() values = {"host": host, "scheduled_at": now} return db.instance_update(context, instance_uuid, values)
def instance_update_db(context, instance_uuid): '''Clear the host and node - set the scheduled_at field of an Instance. :returns: An Instance with the updated fields set properly. ''' now = timeutils.utcnow() values = {'host': None, 'node': None, 'scheduled_at': now} return db.instance_update(context, instance_uuid, values)
def test_instance_update_with_instance_id(self): """ test instance_update() works when an instance id is passed """ ctxt = context.get_admin_context() # Create an instance with some metadata metadata = {"host": "foo"} values = {"metadata": metadata} instance = db.instance_create(ctxt, values) # Update the metadata metadata = {"host": "bar"} values = {"metadata": metadata} db.instance_update(ctxt, instance.id, values) # Retrieve the metadata to ensure it was successfully updated instance_meta = db.instance_metadata_get(ctxt, instance.id) self.assertEqual("bar", instance_meta["host"])
def instance_update_db(context, instance_uuid): """Clear the host and node - set the scheduled_at field of an Instance. :returns: An Instance with the updated fields set properly. """ now = timeutils.utcnow() values = {"host": None, "node": None, "scheduled_at": now} return db.instance_update(context, instance_uuid, values)
def _schedule_instance(self, context, instance_id, *_args, **_kwargs): """Picks a host that is up and has the fewest running instances.""" elevated = context.elevated() instance_ref = db.instance_get(context, instance_id) if (instance_ref['availability_zone'] and ':' in instance_ref['availability_zone'] and context.is_admin): zone, _x, host = instance_ref['availability_zone'].partition(':') service = db.service_get_by_args(elevated, host, 'nova-compute') if not self.service_is_up(service): raise driver.WillNotSchedule(_("Host %s is not alive") % host) # TODO(vish): this probably belongs in the manager, if we # can generalize this somehow self.assert_compute_node_has_enough_memory(context, instance_ref, service['host']) now = utils.utcnow() db.instance_update(context, instance_id, {'host': host, 'scheduled_at': now}) return host results = db.service_get_all_compute_sorted(elevated) for result in results: (service, instance_cores) = result if instance_cores + instance_ref['vcpus'] > FLAGS.max_cores: raise driver.NoValidHost(_("Not enough allocatable CPU cores" "remaining")) try: self.assert_compute_node_has_enough_memory(context, instance_ref, service['host']) except exception.InsufficientFreeMemory: break if self.service_is_up(service): # NOTE(vish): this probably belongs in the manager, if we # can generalize this somehow now = utils.utcnow() db.instance_update(context, instance_id, {'host': service['host'], 'scheduled_at': now}) return service['host'] raise driver.NoValidHost(_("Scheduler was unable to locate a host" " for this request. Is the appropriate" " service running?"))
def host_maintenance_mode(self, host, mode): """Start/Stop host maintenance window. On start, it triggers guest VMs evacuation.""" if mode: host_list = [host_ref for host_ref in self._session.call_xenapi('host.get_all') \ if host_ref != self._session.get_xenapi_host()] migrations_counter = vm_counter = 0 ctxt = context.get_admin_context() for vm_ref, vm_rec in vm_utils.VMHelper.list_vms(self._session): for host_ref in host_list: try: # Ensure only guest instances are migrated uuid = vm_rec['other_config'].get('nova_uuid') if not uuid: name = vm_rec['name_label'] uuid = _uuid_find(ctxt, host, name) if not uuid: msg = _('Instance %(name)s running on %(host)s' ' could not be found in the database:' ' assuming it is a worker VM and skip' 'ping migration to a new host') LOG.info(msg % locals()) continue instance = db.instance_get_by_uuid(ctxt, uuid) vm_counter = vm_counter + 1 self._session.call_xenapi('VM.pool_migrate', vm_ref, host_ref, {}) new_host = _host_find(ctxt, self._session, host, host_ref) db.instance_update(ctxt, instance.id, {'host': new_host}) migrations_counter = migrations_counter + 1 break except self.XenAPI.Failure: LOG.exception('Unable to migrate VM %(vm_ref)s' 'from %(host)s' % locals()) if vm_counter == migrations_counter: return 'on_maintenance' else: raise exception.NoValidHost(reason='Unable to find suitable ' 'host for VMs evacuation') else: return 'off_maintenance'
def init_host(self, host): """Initialize anything that is necessary for the driver to function, including catching up with currently running VM's on the given host.""" context = nova_context.get_admin_context() instances = db.instance_get_all_by_host(context, host) powervm_instances = self.list_instances() # Looks for db instances that don't exist on the host side # and cleanup the inconsistencies. for db_instance in instances: task_state = db_instance['task_state'] if db_instance['name'] in powervm_instances: continue if task_state in [task_states.DELETING, task_states.SPAWNING]: db.instance_update(context, db_instance['uuid'], { 'vm_state': vm_states.DELETED, 'task_state': None }) db.instance_destroy(context, db_instance['uuid'])
def test_cast_to_compute_host_update_db_with_instance_uuid(self): host = "fake_host1" method = "fake_method" fake_kwargs = {"instance_uuid": "fake_uuid", "extra_arg": "meow"} queue = "fake_queue" self.mox.StubOutWithMock(timeutils, "utcnow") self.mox.StubOutWithMock(db, "instance_update") self.mox.StubOutWithMock(rpc, "queue_get_for") self.mox.StubOutWithMock(rpc, "cast") timeutils.utcnow().AndReturn("fake-now") db.instance_update(self.context, "fake_uuid", {"host": None, "scheduled_at": "fake-now"}) rpc.queue_get_for(self.context, "compute", host).AndReturn(queue) rpc.cast(self.context, queue, {"method": method, "args": fake_kwargs}) self.mox.ReplayAll() driver.cast_to_compute_host(self.context, host, method, **fake_kwargs)
def set_admin_password(self, instance, new_pass): """Set the root/admin password on the VM instance. This is done via an agent running on the VM. Communication between nova and the agent is done via writing xenstore records. Since communication is done over the XenAPI RPC calls, we need to encrypt the password. We're using a simple Diffie-Hellman class instead of the more advanced one in M2Crypto for compatibility with the agent code. """ # Need to uniquely identify this request. key_init_transaction_id = str(uuid.uuid4()) # The simple Diffie-Hellman class is used to manage key exchange. dh = SimpleDH() key_init_args = {'id': key_init_transaction_id, 'pub': str(dh.get_public())} resp = self._make_agent_call('key_init', instance, '', key_init_args) if resp is None: # No response from the agent return resp_dict = json.loads(resp) # Successful return code from key_init is 'D0' if resp_dict['returncode'] != 'D0': # There was some sort of error; the message will contain # a description of the error. raise RuntimeError(resp_dict['message']) agent_pub = int(resp_dict['message']) dh.compute_shared(agent_pub) enc_pass = dh.encrypt(new_pass) # Send the encrypted password password_transaction_id = str(uuid.uuid4()) password_args = {'id': password_transaction_id, 'enc_pass': enc_pass} resp = self._make_agent_call('password', instance, '', password_args) if resp is None: # No response from the agent return resp_dict = json.loads(resp) # Successful return code from password is '0' if resp_dict['returncode'] != '0': raise RuntimeError(resp_dict['message']) db.instance_update(context.get_admin_context(), instance['id'], dict(admin_pass=new_pass)) return resp_dict['message']
def instance_update_db(context, instance_uuid, host, system_metadata=None): '''Set the host and scheduled_at fields of an Instance. :returns: An Instance with the updated fields set properly. ''' now = timeutils.utcnow() values = {'host': host, 'scheduled_at': now} if system_metadata is not None: values['system_metadata'] = system_metadata return db.instance_update(context, instance_uuid, values)
def stub_cast_to_cells(context, instance, method, *args, **kwargs): fn = getattr(ORIG_COMPUTE_API, method) original_instance = kwargs.pop('original_instance', None) if original_instance: instance = original_instance # Restore this in 'child cell DB' db.instance_update(context, instance['uuid'], dict(vm_state=instance['vm_state'], task_state=instance['task_state'])) # Use NoopQuotaDriver in child cells. saved_quotas = quota.QUOTAS quota.QUOTAS = quota.QuotaEngine( quota_driver_class=quota.NoopQuotaDriver()) compute_api.QUOTAS = quota.QUOTAS try: fn(context, instance, *args, **kwargs) finally: quota.QUOTAS = saved_quotas compute_api.QUOTAS = saved_quotas
def instance_update_db(context, instance_uuid, extra_values=None): """Clear the host and node - set the scheduled_at field of an Instance. :returns: An Instance with the updated fields set properly. """ now = timeutils.utcnow() values = {'host': None, 'node': None, 'scheduled_at': now} if extra_values: values.update(extra_values) return db.instance_update(context, instance_uuid, values)
def test_cast_to_compute_host_update_db_with_instance_uuid(self): host = 'fake_host1' method = 'fake_method' fake_kwargs = {'instance_uuid': 'fake_uuid', 'extra_arg': 'meow'} queue = 'fake_queue' self.mox.StubOutWithMock(timeutils, 'utcnow') self.mox.StubOutWithMock(db, 'instance_update') self.mox.StubOutWithMock(rpc, 'queue_get_for') self.mox.StubOutWithMock(rpc, 'cast') timeutils.utcnow().AndReturn('fake-now') db.instance_update(self.context, 'fake_uuid', { 'host': None, 'scheduled_at': 'fake-now' }) rpc.queue_get_for(self.context, 'compute', host).AndReturn(queue) rpc.cast(self.context, queue, {'method': method, 'args': fake_kwargs}) self.mox.ReplayAll() driver.cast_to_compute_host(self.context, host, method, **fake_kwargs)
def schedule_live_migration(self, context, instance_id, dest, block_migration=False): """Live migration scheduling method. :param context: :param instance_id: :param dest: destination host :return: The host where instance is running currently. Then scheduler send request that host. """ # Whether instance exists and is running. instance_ref = db.instance_get(context, instance_id) # Checking instance. self._live_migration_src_check(context, instance_ref) # Checking destination host. self._live_migration_dest_check(context, instance_ref, dest, block_migration) # Common checking. self._live_migration_common_check(context, instance_ref, dest, block_migration) # Changing instance_state. values = {"vm_state": vm_states.MIGRATING} db.instance_update(context, instance_id, values) # Changing volume state for volume_ref in instance_ref['volumes']: db.volume_update(context, volume_ref['id'], {'status': 'migrating'}) # Return value is necessary to send request to src # Check _schedule() in detail. src = instance_ref['host'] return src
def _wait_for_boot(): try: LOG.debug(_("Key is injected but instance is not running yet"), instance=instance) db.instance_update(context, instance['id'], {'vm_state': vm_states.BUILDING}) state = self._conn.create_domain(xml_dict, bpath) if state == power_state.RUNNING: LOG.debug(_('instance %s: booted'), instance['name'], instance=instance) db.instance_update(context, instance['id'], {'vm_state': vm_states.ACTIVE}) LOG.debug(_('~~~~~~ current state = %s ~~~~~~'), state, instance=instance) LOG.debug(_("instance %s spawned successfully"), instance['name'], instance=instance) else: LOG.debug(_('instance %s:not booted'), instance['name'], instance=instance) except Exception as Exn: LOG.debug(_("Bremetal assignment is overcommitted."), instance=instance) db.instance_update(context, instance['id'], {'vm_state': vm_states.OVERCOMMIT, 'power_state': power_state.SUSPENDED}) timer.stop()
def _set_instance_error(self, method, context, ex, *args, **kwargs): """Sets VM to Error state""" LOG.warning(_("Failed to schedule_%(method)s: %(ex)s") % locals()) if method != "start_instance" and method != "run_instance": return # FIXME(comstud): Clean this up after fully on UUIDs. instance_id = kwargs.get('instance_uuid', kwargs.get('instance_id')) if not instance_id: # FIXME(comstud): We should make this easier. run_instance # only sends a request_spec, and an instance may or may not # have been created in the API (or scheduler) already. If it # was created, there's a 'uuid' set in the instance_properties # of the request_spec. request_spec = kwargs.get('request_spec', {}) properties = request_spec.get('instance_properties', {}) instance_id = properties.get('uuid', {}) if instance_id: LOG.warning( _("Setting instance %(instance_id)s to " "ERROR state.") % locals()) db.instance_update(context, instance_id, {'vm_state': vm_states.ERROR})
def test_live_migration_raises_exception(self): """Confirms recover method is called when exceptions are raised.""" # Skip if non-libvirt environment if not self.lazy_load_library_exists(): return # Preparing data self.compute = utils.import_object(FLAGS.compute_manager) instance_dict = { 'host': 'fake', 'state': power_state.RUNNING, 'state_description': 'running' } instance_ref = db.instance_create(self.context, self.test_instance) instance_ref = db.instance_update(self.context, instance_ref['id'], instance_dict) vol_dict = {'status': 'migrating', 'size': 1} volume_ref = db.volume_create(self.context, vol_dict) db.volume_attached(self.context, volume_ref['id'], instance_ref['id'], '/dev/fake') # Preparing mocks vdmock = self.mox.CreateMock(libvirt.virDomain) self.mox.StubOutWithMock(vdmock, "migrateToURI") vdmock.migrateToURI(FLAGS.live_migration_uri % 'dest', mox.IgnoreArg(), None, FLAGS.live_migration_bandwidth).\ AndRaise(libvirt.libvirtError('ERR')) def fake_lookup(instance_name): if instance_name == instance_ref.name: return vdmock self.create_fake_libvirt_mock(lookupByName=fake_lookup) # Start test self.mox.ReplayAll() conn = libvirt_conn.LibvirtConnection(False) self.assertRaises(libvirt.libvirtError, conn._live_migration, self.context, instance_ref, 'dest', '', self.compute.recover_live_migration) instance_ref = db.instance_get(self.context, instance_ref['id']) self.assertTrue(instance_ref['state_description'] == 'running') self.assertTrue(instance_ref['state'] == power_state.RUNNING) volume_ref = db.volume_get(self.context, volume_ref['id']) self.assertTrue(volume_ref['status'] == 'in-use') db.volume_destroy(self.context, volume_ref['id']) db.instance_destroy(self.context, instance_ref['id'])
def test_instance_get_all_hung_in_rebooting(self): ctxt = context.get_admin_context() # Ensure no instances are returned. results = db.instance_get_all_hung_in_rebooting(ctxt, 10) self.assertEqual(0, len(results)) # Ensure one rebooting instance with updated_at older than 10 seconds # is returned. updated_at = datetime.datetime(2000, 01, 01, 12, 00, 00) values = {"task_state": "rebooting", "updated_at": updated_at} instance = db.instance_create(ctxt, values) results = db.instance_get_all_hung_in_rebooting(ctxt, 10) self.assertEqual(1, len(results)) db.instance_update(ctxt, instance.id, {"task_state": None}) # Ensure the newly rebooted instance is not returned. updated_at = datetime.datetime.utcnow() values = {"task_state": "rebooting", "updated_at": updated_at} instance = db.instance_create(ctxt, values) results = db.instance_get_all_hung_in_rebooting(ctxt, 10) self.assertEqual(0, len(results)) db.instance_update(ctxt, instance.id, {"task_state": None})
def _set_vm_state_and_notify(self, method, updates, context, ex, *args, **kwargs): """changes VM state and notifies""" # FIXME(comstud): Re-factor this somehow. Not sure this belongs in the # scheduler manager like this. We should make this easier. # run_instance only sends a request_spec, and an instance may or may # not have been created in the API (or scheduler) already. If it was # created, there's a 'uuid' set in the instance_properties of the # request_spec. # (littleidea): I refactored this a bit, and I agree # it should be easier :) # The refactoring could go further but trying to minimize changes # for essex timeframe LOG.warning(_("Failed to schedule_%(method)s: %(ex)s") % locals()) vm_state = updates['vm_state'] request_spec = kwargs.get('request_spec', {}) properties = request_spec.get('instance_properties', {}) instance_uuid = properties.get('uuid', {}) if instance_uuid: state = vm_state.upper() LOG.warning(_('Setting instance to %(state)s state.'), locals(), instance_uuid=instance_uuid) db.instance_update(context, instance_uuid, updates) payload = dict(request_spec=request_spec, instance_properties=properties, instance_id=instance_uuid, state=vm_state, method=method, reason=ex) notifier.notify(context, notifier.publisher_id("scheduler"), 'scheduler.' + method, notifier.ERROR, payload)
def _wait_for_boot(): try: LOG.debug(_("Key is injected but instance is not running yet"), instance=instance) #(old_ref, new_ref) = db.instance_update_and_get_original( db.instance_update( context, instance['id'], {'vm_state': vm_states.BUILDING}) #notifications.send_update(context, old_ref, new_ref) state = self._conn.create_domain(xml_dict, bpath) if state == power_state.RUNNING: LOG.debug(_('instance %s: booted'), instance['name'], instance=instance) #(old_ref, new_ref) = db.instance_update_and_get_original( db.instance_update( context, instance['id'], {'vm_state': vm_states.ACTIVE}) #notifications.send_update(context, old_ref, new_ref) LOG.debug(_('~~~~~~ current state = %s ~~~~~~'), state, instance=instance) LOG.debug(_("instance %s spawned successfully"), instance['name'], instance=instance) else: LOG.debug(_('instance %s:not booted'), instance['name'], instance=instance) except Exception: LOG.exception(_("Baremetal assignment is overcommitted."), instance=instance) #(old_ref, new_ref) = db.instance_update_and_get_original( db.instance_update( context, instance['id'], {'vm_state': vm_states.ERROR, 'power_state': power_state.FAILED}) #notifications.send_update(context, old_ref, new_ref) timer.stop()
def _install_machine(self, context, instance, bmm, cluster_name, vlan_id, update_instance=False): db.bmm_update(context, bmm["id"], {"instance_id": instance["id"]}) mac = self._get_pxe_mac(bmm) # fetch image image_base_path = self._get_cobbler_image_path() if not os.path.exists(image_base_path): utils.execute('mkdir', '-p', image_base_path) image_path = self._get_cobbler_image_path(instance) if not os.path.exists(image_path): image_meta = images.fetch(context, instance["image_ref"], image_path, instance["user_id"], instance["project_id"]) else: image_meta = images.show(context, instance["image_ref"]) image_type = "server" image_name = image_meta["name"] or image_meta["properties"][ "image_location"] if image_name.find("dodai-deploy") == -1: image_type = "node" # begin to install os pxe_ip = bmm["pxe_ip"] or "None" pxe_mac = bmm["pxe_mac"] or "None" storage_ip = bmm["storage_ip"] or "None" storage_mac = bmm["storage_mac"] or "None" service_mac1 = bmm["service_mac1"] or "None" service_mac2 = bmm["service_mac2"] or "None" instance_path = self._get_cobbler_instance_path(instance) if not os.path.exists(instance_path): utils.execute('mkdir', '-p', instance_path) self._cp_template( "create.sh", self._get_cobbler_instance_path(instance, "create.sh"), { "INSTANCE_ID": instance["id"], "IMAGE_ID": instance["image_ref"], "COBBLER": FLAGS.cobbler, "HOST_NAME": bmm["name"], "STORAGE_IP": storage_ip, "STORAGE_MAC": storage_mac, "PXE_IP": pxe_ip, "PXE_MAC": pxe_mac, "SERVICE_MAC1": bmm["service_mac1"], "SERVICE_MAC2": bmm["service_mac2"], "IMAGE_TYPE": image_type, "MONITOR_PORT": FLAGS.dodai_monitor_port, "ROOT_SIZE": FLAGS.dodai_partition_root_gb, "SWAP_SIZE": FLAGS.dodai_partition_swap_gb, "EPHEMERAL_SIZE": FLAGS.dodai_partition_ephemeral_gb, "KDUMP_SIZE": FLAGS.dodai_partition_kdump_gb }) self._cp_template( "pxeboot_action", self._get_pxe_boot_file(mac), { "INSTANCE_ID": instance["id"], "COBBLER": FLAGS.cobbler, "PXE_MAC": pxe_mac, "ACTION": "create" }) LOG.debug("Reboot or power on.") self._reboot_or_power_on(bmm["ipmi_ip"]) # wait until starting to install os while self._get_state(context, instance) != "install": greenthread.sleep(20) LOG.debug("Wait until begin to install instance %s." % instance["id"]) self._cp_template("pxeboot_start", self._get_pxe_boot_file(mac), {}) # wait until starting to reboot while self._get_state(context, instance) != "install_reboot": greenthread.sleep(20) LOG.debug( "Wait until begin to reboot instance %s after os has been installed." % instance["id"]) power_manager = PowerManager(bmm["ipmi_ip"]) power_manager.soft_off() while power_manager.status() == "on": greenthread.sleep(20) LOG.debug("Wait unit the instance %s shuts down." % instance["id"]) power_manager.on() # wait until installation of os finished while self._get_state(context, instance) != "installed": greenthread.sleep(20) LOG.debug("Wait until instance %s installation finished." % instance["id"]) if cluster_name == "resource_pool": status = "active" else: status = "used" db.bmm_update(context, bmm["id"], {"status": status}) if update_instance: db.instance_update(context, instance["id"], {"vm_state": vm_states.ACTIVE})