def _live_migrate(self, context, instance, scheduler_hint, block_migration, disk_over_commit): destination = scheduler_hint.get("host") try: live_migrate.execute(context, instance, destination, block_migration, disk_over_commit) except (exception.NoValidHost, exception.ComputeServiceUnavailable, exception.InvalidHypervisorType, exception.InvalidCPUInfo, exception.UnableToMigrateToSelf, exception.DestinationHypervisorTooOld, exception.InvalidLocalStorage, exception.InvalidSharedStorage, exception.HypervisorUnavailable, exception.InstanceNotRunning, exception.MigrationPreCheckError) as ex: with excutils.save_and_reraise_exception(): # TODO(johngarbutt) - eventually need instance actions here request_spec = {'instance_properties': { 'uuid': instance['uuid'], }, } scheduler_utils.set_vm_state_and_notify(context, 'compute_task', 'migrate_server', dict(vm_state=instance['vm_state'], task_state=None, expected_task_state=task_states.MIGRATING,), ex, request_spec, self.db) except Exception as ex: LOG.error(_('Migration of instance %(instance_id)s to host' ' %(dest)s unexpectedly failed.'), {'instance_id': instance['uuid'], 'dest': destination}, exc_info=True) raise exception.MigrationError(reason=ex)
def assert_compute_node_has_enough_disk(self, context, instance_ref, dest): """Checks if destination host has enough disk for block migration. :param context: security context :param instance_ref: nova.db.sqlalchemy.models.Instance object :param dest: destination host """ # Getting total available memory and disk of host avail = self._get_compute_info(context, dest, 'local_gb') # Getting total used memory and disk of host # It should be sum of disks that are assigned as max value # because overcommiting is risky. used = 0 instance_refs = db.instance_get_all_by_host(context, dest) used_list = [i['local_gb'] for i in instance_refs] if used_list: used = reduce(lambda x, y: x + y, used_list) disk_inst = instance_ref['local_gb'] avail = avail - used if avail <= disk_inst: instance_id = ec2utils.id_to_ec2_id(instance_ref['id']) reason = _("Unable to migrate %(instance_id)s to %(dest)s: " "Lack of disk(host:%(avail)s " "<= instance:%(disk_inst)s)") raise exception.MigrationError(reason=reason % locals())
def _assert_compute_node_has_enough_memory(self, context, instance_ref, dest): """Checks if destination host has enough memory for live migration. :param context: security context :param instance_ref: nova.db.sqlalchemy.models.Instance object :param dest: destination host """ # Getting total available memory of host avail = self._get_compute_info(context, dest)['memory_mb'] # Getting total used memory and disk of host # It should be sum of memories that are assigned as max value, # because overcommitting is risky. instance_refs = db.instance_get_all_by_host(context, dest) used = sum([i['memory_mb'] for i in instance_refs]) mem_inst = instance_ref['memory_mb'] avail = avail - used if avail <= mem_inst: instance_uuid = instance_ref['uuid'] reason = _("Unable to migrate %(instance_uuid)s to %(dest)s: " "Lack of memory(host:%(avail)s <= " "instance:%(mem_inst)s)") raise exception.MigrationError(reason=reason % locals())
def _migrate_vm(self, ctxt, vm_name, host): try: instance_uuid = self._vmutils.get_instance_uuid(vm_name) if not instance_uuid: LOG.info( _LI('VM "%s" running on this host was not created by ' 'nova. Skip migrating this vm to a new host.'), vm_name) return instance = objects.Instance.get_by_uuid(ctxt, instance_uuid) if instance.vm_state == vm_states.ACTIVE: self._api.live_migrate(ctxt, instance, block_migration=False, disk_over_commit=False, host_name=None) else: self._api.resize(ctxt, instance, flavor_id=None, clean_shutdown=True) self._wait_for_instance_pending_task(ctxt, instance_uuid) except Exception as e: LOG.error(_LE('Migrating vm failed with error: %s '), e) raise exception.MigrationError(reason='Unable to migrate %s.' % vm_name)
def _assert_compute_node_has_enough_memory(self, context, instance_ref, dest): """Checks if destination host has enough memory for live migration. :param context: security context :param instance_ref: nova.db.sqlalchemy.models.Instance object :param dest: destination host """ compute = self._get_compute_info(context, dest) node = compute.get('hypervisor_hostname') host_state = self.host_manager.host_state_cls(dest, node) host_state.update_from_compute_node(compute) instance_type = instance_types.extract_instance_type(instance_ref) filter_properties = {'instance_type': instance_type} hosts = self.host_manager.get_filtered_hosts([host_state], filter_properties, 'RamFilter') if not hosts: instance_uuid = instance_ref['uuid'] reason = _("Unable to migrate %(instance_uuid)s to %(dest)s: " "Lack of memory") raise exception.MigrationError(reason=reason % locals())
def host_maintenance_mode(self, host, mode): """Starts/Stops host maintenance. On start, it triggers guest VMs evacuation. """ ctxt = context.get_admin_context() if not mode: self._set_service_state(host=host, binary='nova-compute', is_disabled=False) LOG.info(_LI('Host is no longer under maintenance.')) return 'off_maintenance' self._set_service_state(host=host, binary='nova-compute', is_disabled=True) vms_uuids = self._vmops.list_instance_uuids() for vm_uuid in vms_uuids: self._wait_for_instance_pending_task(ctxt, vm_uuid) vm_names = self._vmutils.list_instances() for vm_name in vm_names: self._migrate_vm(ctxt, vm_name, host) vms_uuid_after_migration = self._vmops.list_instance_uuids() remaining_vms = len(vms_uuid_after_migration) if remaining_vms == 0: LOG.info( _LI('All vms have been migrated successfully.' 'Host is down for maintenance')) return 'on_maintenance' raise exception.MigrationError(reason=_( 'Not all vms have been migrated: %s remaining instances.') % remaining_vms)
def assert_compute_node_has_enough_resources(self, context, instance_ref, dest): """Checks if destination host has enough resource for live migration. Currently, only memory checking has been done. If storage migration(block migration, meaning live-migration without any shared storage) will be available, local storage checking is also necessary. :param context: security context :param instance_ref: nova.db.sqlalchemy.models.Instance object :param dest: destination host """ # Getting instance information ec2_id = instance_ref['hostname'] # Getting host information service_refs = db.service_get_all_compute_by_host(context, dest) compute_node_ref = service_refs[0]['compute_node'][0] mem_total = int(compute_node_ref['memory_mb']) mem_used = int(compute_node_ref['memory_mb_used']) mem_avail = mem_total - mem_used mem_inst = instance_ref['memory_mb'] if mem_avail <= mem_inst: reason = _("Unable to migrate %(ec2_id)s to destination: %(dest)s " "(host:%(mem_avail)s <= instance:%(mem_inst)s)") raise exception.MigrationError(reason=reason % locals())
def assert_compute_node_has_enough_disk(self, context, instance_ref, dest, disk_over_commit): """Checks if destination host has enough disk for block migration. :param context: security context :param instance_ref: nova.db.sqlalchemy.models.Instance object :param dest: destination host :param disk_over_commit: if True, consider real(not virtual) disk size. """ # Libvirt supports qcow2 disk format,which is usually compressed # on compute nodes. # Real disk image (compressed) may enlarged to "virtual disk size", # that is specified as the maximum disk size. # (See qemu-img -f path-to-disk) # Scheduler recognizes destination host still has enough disk space # if real disk size < available disk size # if disk_over_commit is True, # otherwise virtual disk size < available disk size. # Getting total available disk of host available_gb = self._get_compute_info(context, dest, 'disk_available_least') available = available_gb * (1024**3) # Getting necessary disk size try: topic = db.queue_get_for(context, FLAGS.compute_topic, instance_ref['host']) ret = rpc.call( context, topic, { "method": 'get_instance_disk_info', "args": { 'instance_name': instance_ref['name'] } }) disk_infos = utils.loads(ret) except rpc_common.RemoteError: LOG.exception( _("host %(dest)s is not compatible with " "original host %(src)s.") % locals()) raise necessary = 0 if disk_over_commit: for info in disk_infos: necessary += int(info['disk_size']) else: for info in disk_infos: necessary += int(info['virt_disk_size']) # Check that available disk > necessary disk if (available - necessary) < 0: instance_uuid = instance_ref['uuid'] reason = _("Unable to migrate %(instance_uuid)s to %(dest)s: " "Lack of disk(host:%(available)s " "<= instance:%(necessary)s)") raise exception.MigrationError(reason=reason % locals())
def _assert_compute_node_has_enough_memory(self, context, instance_ref, dest): """Checks if destination host has enough memory for live migration. :param context: security context :param instance_ref: nova.db.sqlalchemy.models.Instance object :param dest: destination host """ # Getting total available memory of host avail = self._get_compute_info(context, dest)['free_ram_mb'] mem_inst = instance_ref['memory_mb'] if not mem_inst or avail <= mem_inst: instance_uuid = instance_ref['uuid'] reason = _("Unable to migrate %(instance_uuid)s to %(dest)s: " "Lack of memory(host:%(avail)s <= " "instance:%(mem_inst)s)") raise exception.MigrationError(reason=reason % locals())
def test_migrate_live_migration_with_unexpected_error(self): self._test_migrate_live_failed_with_exception( exception.MigrationError(reason=''), expected_exc=webob.exc.HTTPInternalServerError, check_response=False)
def _live_migrate(self, context, instance, scheduler_hint, block_migration, disk_over_commit, request_spec): destination = scheduler_hint.get("host") def _set_vm_state(context, instance, ex, vm_state=None, task_state=None): request_spec = { 'instance_properties': { 'uuid': instance.uuid, }, } scheduler_utils.set_vm_state_and_notify( context, instance.uuid, 'compute_task', 'migrate_server', dict( vm_state=vm_state, task_state=task_state, expected_task_state=task_states.MIGRATING, ), ex, request_spec) migration = objects.Migration(context=context.elevated()) migration.dest_compute = destination migration.status = 'accepted' migration.instance_uuid = instance.uuid migration.source_compute = instance.host migration.migration_type = 'live-migration' if instance.obj_attr_is_set('flavor'): migration.old_instance_type_id = instance.flavor.id migration.new_instance_type_id = instance.flavor.id else: migration.old_instance_type_id = instance.instance_type_id migration.new_instance_type_id = instance.instance_type_id migration.create() task = self._build_live_migrate_task(context, instance, destination, block_migration, disk_over_commit, migration, request_spec) try: task.execute() except (exception.NoValidHost, exception.ComputeServiceUnavailable, exception.InvalidHypervisorType, exception.InvalidCPUInfo, exception.UnableToMigrateToSelf, exception.DestinationHypervisorTooOld, exception.InvalidLocalStorage, exception.InvalidSharedStorage, exception.HypervisorUnavailable, exception.InstanceInvalidState, exception.MigrationPreCheckError, exception.MigrationPreCheckClientException, exception.LiveMigrationWithOldNovaNotSupported, exception.MigrationSchedulerRPCError) as ex: with excutils.save_and_reraise_exception(): # TODO(johngarbutt) - eventually need instance actions here _set_vm_state(context, instance, ex, instance.vm_state) migration.status = 'error' migration.save() except Exception as ex: LOG.error(_LE('Migration of instance %(instance_id)s to host' ' %(dest)s unexpectedly failed.'), { 'instance_id': instance.uuid, 'dest': destination }, exc_info=True) _set_vm_state(context, instance, ex, vm_states.ERROR, instance.task_state) migration.status = 'error' migration.save() raise exception.MigrationError(reason=six.text_type(ex))
def test_migrate_live_migration_with_unexpected_error(self): self._test_migrate_live_failed_with_exception( exception.MigrationError(reason=''), expected_status_code=500, check_response=False)
def fake_raise(*args, **kwargs): raise exception.MigrationError(reason='test failure')