def host_has_instances(host, skip_stopped=False): """ Returns true if a host has instances located on it """ from nfv_vim import directors instance_director = directors.get_instance_director() return instance_director.host_has_instances(host, skip_stopped=skip_stopped)
def host_enabled(host): """ Notifies the host director that a host is enabled """ from nfv_vim import directors DLOG.info("Notify other directors that the host %s is enabled." % host.name) instance_director = directors.get_instance_director() instance_director.recover_instances()
def host_abort(host): """ Notifies the host director that a host abort is inprogress """ from nfv_vim import directors DLOG.info( "Notify other directors that a host %s abort is inprogress." % host.name) instance_director = directors.get_instance_director() instance_director.host_operation_cancel(host.name)
def host_services_disabled(host): """ Notifies the host director that host services are disabled """ from nfv_vim import directors DLOG.info("Notify other directors that the host %s services are " "disabled." % host.name) instance_director = directors.get_instance_director() host_operation = instance_director.host_services_disabled(host) return host_operation
def _audit_nfvi_hypervisors_callback(timer_id): """ Audit Hypervisors """ global _main_audit_inprogress global _nfvi_hypervisors_to_audit response = (yield) DLOG.verbose("Audit-Hypervisors callback, response=%s." % response) trigger_recovery = False if response['completed']: hypervisor_table = tables.tables_get_hypervisor_table() deletable_hypervisors = list(hypervisor_table) for nfvi_hypervisor in response['result-data']: hypervisor = hypervisor_table.get(nfvi_hypervisor.uuid, None) if hypervisor is None: hypervisor = objects.Hypervisor(nfvi_hypervisor) hypervisor_table[nfvi_hypervisor.uuid] = hypervisor trigger_recovery = True else: deletable_hypervisors.remove(nfvi_hypervisor.uuid) if nfvi_hypervisor.uuid not in _nfvi_hypervisors_to_audit: _nfvi_hypervisors_to_audit[nfvi_hypervisor.uuid] \ = nfvi_hypervisor.uuid prev_state = hypervisor.oper_state hypervisor.nfvi_hypervisor_update(nfvi_hypervisor) if (hypervisor.oper_state != prev_state and nfvi.objects.v1.HYPERVISOR_OPER_STATE.ENABLED == hypervisor.oper_state): trigger_recovery = True for hypervisor_id in deletable_hypervisors: del hypervisor_table[hypervisor_id] if hypervisor_id in _nfvi_hypervisors_to_audit: del _nfvi_hypervisors_to_audit[hypervisor_id] else: DLOG.error("Audit-Hypervisors callback, not completed, responses=%s." % response) _main_audit_inprogress = False timers.timers_reschedule_timer(timer_id, 2) # 2 seconds later if trigger_recovery: # Hypervisor is now available, there is potential to recover instances. DLOG.info("Recover-Instances-Audit triggered by hypervisor audit.") instance_director = directors.get_instance_director() instance_director.recover_instances()
def host_offline(host): """ Notifies the host director that a host is offline """ from nfv_vim import directors DLOG.info("Notify other directors that the host %s is offline." % host.name) instance_director = directors.get_instance_director() instance_director.host_offline(host) # Now that the host is offline, we may be able to recover instances # on that host (i.e. evacuate them). instance_director.recover_instances()
def host_audit(host): """ Notifies the host director that a host audit is inprogress """ from nfv_vim import directors DLOG.verbose( "Notify other directors that a host %s audit is inprogress." % host.name) instance_director = directors.get_instance_director() instance_director.host_audit(host) sw_mgmt_director = directors.get_sw_mgmt_director() sw_mgmt_director.host_audit(host)
def vim_instance_api_delete_instance(connection, msg): """ Handle Delete-Instance API request """ DLOG.verbose("Delete instance %s." % msg.uuid) instance_table = tables.tables_get_instance_table() response = rpc.APIResponseDeleteInstance() instance = instance_table.get(msg.uuid, None) if instance is not None: instance_director = directors.get_instance_director() instance_director.delete_instance(instance) response.uuid = msg.uuid else: response.result = rpc.RPC_MSG_RESULT.NOT_FOUND connection.send(response.serialize()) DLOG.verbose("Sent response=%s" % response) connection.close()
def vim_instance_api_create_instance(connection, msg): """ Handle Create-Instance API request """ global _instance_create_operations DLOG.verbose("Create instance, name=%s." % msg.name) _instance_create_operations[msg.name] = connection instance_director = directors.get_instance_director() networks = list() network = dict() network["uuid"] = msg.network_uuid networks.append(network) instance_director.create_instance( msg.name, msg.instance_type_uuid, msg.vcpus, msg.memory_mb, msg.disk_gb, msg.ephemeral_gb, msg.swap_gb, msg.image_uuid, None, networks, msg.auto_recovery, msg.live_migration_timeout, msg.live_migration_max_downtime, _create_instance_callback)
def handle_event(self, instance, event, event_data=None): """ Handle event while in the delete state """ from nfv_vim import directors instance_director = directors.get_instance_director() if INSTANCE_EVENT.TASK_START == event: if not instance.task.inprogress(): instance.task = DeleteTask(instance) instance.task.start() elif instance.task.is_failed() or instance.task.timed_out(): instance.task.start() elif INSTANCE_EVENT.TASK_COMPLETED == event: DLOG.debug("Delete completed for %s." % instance.name) instance.deleted() instance_director.cleanup_instance(instance.uuid) return INSTANCE_STATE.INITIAL elif INSTANCE_EVENT.TASK_FAILED == event: DLOG.info("Delete failed for %s." % instance.name) elif INSTANCE_EVENT.TASK_TIMEOUT == event: DLOG.info("Delete timed out for %s." % instance.name) elif INSTANCE_EVENT.AUDIT == event: if not instance.task.inprogress(): instance.task = DeleteTask(instance) instance.task.start() elif instance.task.is_failed() or instance.task.timed_out(): instance.task.start() else: DLOG.verbose("Ignoring %s event for %s." % (event, instance.name)) return self.name
def _audit_nfvi_instances_callback(timer_id): """ Audit Instances """ global _main_audit_inprogress global _deletable_instances, _nfvi_instances_paging global _nfvi_instances_to_audit, _nfvi_instance_outstanding response = (yield) DLOG.verbose("Audit-Instances callback, response=%s." % response) trigger_recovery = False if response['completed']: if response[ 'page-request-id'] == _nfvi_instances_paging.page_request_id: instance_table = tables.tables_get_instance_table() if _deletable_instances is None: _deletable_instances = list(instance_table) for instance_uuid, instance_name in response['result-data']: instance = instance_table.get(instance_uuid, None) if instance is not None: if instance.uuid in _deletable_instances: _deletable_instances.remove(instance.uuid) if instance_uuid not in _nfvi_instances_to_audit: _nfvi_instances_to_audit[instance_uuid] = instance_name if _nfvi_instances_paging.done: for instance_uuid in _deletable_instances: instance = instance_table.get(instance_uuid, None) if instance is not None: DLOG.info("Deleting instance %s, audit mismatch" % instance_uuid) instance.nfvi_instance_deleted() if instance.is_deleted(): trigger_recovery = True del instance_table[instance_uuid] if instance_uuid in _nfvi_instances_to_audit: del _nfvi_instances_to_audit[instance_uuid] if instance_uuid in _nfvi_instance_outstanding: del _nfvi_instance_outstanding[instance_uuid] _deletable_instances = list(instance_table) _nfvi_instances_paging.first_page() else: DLOG.verbose("Paging is not done for instances.") else: DLOG.error("Audit-Instances callback, page-request-id mismatch, " "responses=%s, page-request-id=%s." % (response, _nfvi_instances_paging.page_request_id)) instance_table = tables.tables_get_instance_table() _deletable_instances = list(instance_table) _nfvi_instances_paging.first_page() else: DLOG.error("Audit-Instances callback, not completed, responses=%s." % response) instance_table = tables.tables_get_instance_table() _deletable_instances = list(instance_table) _nfvi_instances_paging.first_page() _nfvi_instances_paging.set_page_request_id() _main_audit_inprogress = False timers.timers_reschedule_timer(timer_id, 2) # 2 seconds later if trigger_recovery: # Resources have been freed, there is potential to recover instances. DLOG.info("Recover-Instances-Audit triggered by instance deletion.") instance_director = directors.get_instance_director() instance_director.recover_instances()
def handle_event(self, instance, event, event_data=None): """ Handle event while in the start state """ from nfv_vim import directors instance_director = directors.get_instance_director() if event_data is not None: reason = event_data.get('reason', '') else: reason = '' if instance.task.inprogress(): if instance.task.handle_event(event, event_data): return self.name if INSTANCE_EVENT.TASK_STOP == event: instance_director.instance_start_complete(instance, instance.host_name, failed=False, timed_out=False, cancelled=True) return INSTANCE_STATE.INITIAL elif INSTANCE_EVENT.TASK_COMPLETED == event: DLOG.debug("Start inprogress for %s." % instance.name) instance.action_fsm.wait_time = \ timers.get_monotonic_timestamp_in_ms() elif INSTANCE_EVENT.TASK_FAILED == event: DLOG.info("Start failed for %s." % instance.name) instance.fail_action(instance.action_fsm_action_type, reason) instance_director.instance_start_complete(instance, instance.host_name, failed=True) return INSTANCE_STATE.INITIAL elif INSTANCE_EVENT.TASK_TIMEOUT == event: DLOG.info("Start timed out for %s." % instance.name) instance.fail_action(instance.action_fsm_action_type, 'timeout') instance_director.instance_start_complete(instance, instance.host_name, failed=False, timed_out=True) return INSTANCE_STATE.INITIAL elif INSTANCE_EVENT.NFVI_ENABLED == event: instance_director.instance_start_complete(instance, instance.host_name) return INSTANCE_STATE.INITIAL elif INSTANCE_EVENT.AUDIT == event: if not instance.task.inprogress(): if instance.is_enabled(): instance_director.instance_start_complete( instance, instance.host_name) return INSTANCE_STATE.INITIAL else: now_ms = timers.get_monotonic_timestamp_in_ms() secs_expired = \ (now_ms - instance.action_fsm.wait_time) / 1000 # Only wait 60 seconds for the instance to start. if 60 <= secs_expired: instance.fail_action(instance.action_fsm_action_type, 'timeout') instance_director.instance_start_complete( instance, instance.host_name, failed=False, timed_out=True) return INSTANCE_STATE.INITIAL else: DLOG.verbose("Ignoring %s event for %s." % (event, instance.name)) return self.name
def handle_event(self, instance, event, event_data=None): """ Handle event while in the live migrate state """ from nfv_vim import directors instance_director = directors.get_instance_director() if event_data is not None: reason = event_data.get('reason', '') else: reason = '' if instance.task.inprogress(): if instance.task.handle_event(event, event_data): return self.name if INSTANCE_EVENT.TASK_STOP == event: return INSTANCE_STATE.INITIAL elif INSTANCE_EVENT.NFVI_HOST_CHANGED == event: if instance.action_fsm.from_host_name != instance.host_name: DLOG.info("Live-Migrate for %s from host %s to host %s." % (instance.name, instance.action_fsm.from_host_name, instance.host_name)) instance_director.instance_migrate_complete( instance, instance.action_fsm.from_host_name) guest_services = instance.guest_services if guest_services.are_provisioned(): return INSTANCE_STATE.LIVE_MIGRATE_FINISH else: return INSTANCE_STATE.INITIAL elif INSTANCE_EVENT.LIVE_MIGRATE_ROLLBACK == event: DLOG.info("Live-Migrate rollback for %s." % instance.name) guest_services = instance.guest_services # Tell the instance director that the live migrate failed so it # can update any host operation that may be in progress. instance_director.instance_migrate_complete( instance, instance.action_fsm.from_host_name, failed=True) if guest_services.are_provisioned(): return INSTANCE_STATE.LIVE_MIGRATE_FINISH else: return INSTANCE_STATE.INITIAL elif INSTANCE_EVENT.TASK_COMPLETED == event: DLOG.debug("Live-Migrate inprogress for %s." % instance.name) elif INSTANCE_EVENT.TASK_FAILED == event: DLOG.info("Live-Migrate failed for %s." % instance.name) instance.fail_action(instance.action_fsm_action_type, reason) instance_director.instance_migrate_complete( instance, instance.action_fsm.from_host_name, failed=True) return INSTANCE_STATE.INITIAL elif INSTANCE_EVENT.TASK_TIMEOUT == event: DLOG.info("Live-Migrate timed out for %s." % instance.name) elif INSTANCE_EVENT.NFVI_ENABLED == event: if instance.is_migrating(): if not instance._live_migration_started: DLOG.info("Live-Migrate starting for %s." % instance.name) # Live migration has started instance._live_migration_started = True elif instance._live_migration_started and \ instance.action_fsm.from_host_name == instance.host_name: DLOG.info("Live-Migrate no longer in progress for %s." % instance.name) # Live migration was in progress once, but is no longer and # the host has not changed. Nova does this (for example) if it # fails to schedule a destination host for the live migration. # Look at me - I'm migrating. Oh - guess I decided not to. # Stupid nova. # Tell the instance director that the live migrate failed so it # can update any host operation that may be in progress. guest_services = instance.guest_services instance_director.instance_migrate_complete( instance, instance.action_fsm.from_host_name, failed=True) if guest_services.are_provisioned(): return INSTANCE_STATE.LIVE_MIGRATE_FINISH else: return INSTANCE_STATE.INITIAL elif INSTANCE_EVENT.AUDIT == event: if instance.action_fsm.from_host_name != instance.host_name: instance_director.instance_migrate_complete( instance, instance.action_fsm.from_host_name) guest_services = instance.guest_services if guest_services.are_provisioned(): return INSTANCE_STATE.LIVE_MIGRATE_FINISH else: return INSTANCE_STATE.INITIAL elif not (instance.task.inprogress() or instance.is_migrating()): if 0 == instance.action_fsm.wait_time: instance.action_fsm.wait_time \ = timers.get_monotonic_timestamp_in_ms() now_ms = timers.get_monotonic_timestamp_in_ms() secs_expired = (now_ms - instance.action_fsm.wait_time) / 1000 if 60 <= secs_expired: instance.fail_action(instance.action_fsm_action_type, 'timeout') instance_director.instance_migrate_complete( instance, instance.action_fsm.from_host_name, failed=False, timed_out=True) return INSTANCE_STATE.INITIAL else: now_ms = timers.get_monotonic_timestamp_in_ms() secs_expired = (now_ms - instance.action_fsm.start_time) / 1000 max_live_migrate_wait_in_secs = \ instance.max_live_migrate_wait_in_secs if 0 != max_live_migrate_wait_in_secs: # Add 60 seconds buffer on top of nova timeout value max_wait = max_live_migrate_wait_in_secs + 60 if max_wait <= secs_expired: instance.fail_action(instance.action_fsm_action_type, 'timeout') instance_director.instance_migrate_complete( instance, instance.action_fsm.from_host_name, failed=False, timed_out=True) return INSTANCE_STATE.INITIAL elif instance.task.timed_out(): instance.fail_action(instance.action_fsm_action_type, 'timeout') instance_director.instance_migrate_complete( instance, instance.action_fsm.from_host_name, failed=False, timed_out=True) return INSTANCE_STATE.INITIAL else: DLOG.verbose("Ignoring %s event for %s." % (event, instance.name)) return self.name
def handle_event(self, instance, event, event_data=None): """ Handle event while in the evacuate state """ from nfv_vim import directors instance_director = directors.get_instance_director() if event_data is not None: reason = event_data.get('reason', '') else: reason = '' if instance.task.inprogress(): if instance.task.handle_event(event, event_data): return self.name if INSTANCE_EVENT.TASK_STOP == event: return INSTANCE_STATE.INITIAL elif event in [INSTANCE_EVENT.NFVI_ENABLED, INSTANCE_EVENT.NFVI_DISABLED, INSTANCE_EVENT.NFVI_HOST_CHANGED]: if instance.action_fsm.from_host_name != instance.host_name and \ not instance.is_rebuilding(): instance_director.instance_evacuate_complete( instance, instance.action_fsm.from_host_name) return INSTANCE_STATE.INITIAL elif INSTANCE_EVENT.NFVI_DISABLED == event: if instance.is_rebuilding(): if not instance._evacuate_started: DLOG.info("Evacuate starting for %s." % instance.name) # Evacuate has started instance._evacuate_started = True elif instance._evacuate_started and \ instance.action_fsm.from_host_name == instance.host_name: DLOG.info("Evacuate no longer in progress for %s." % instance.name) # Evacuate was in progress once, but is no longer and # the host has not changed. Nova does this (for example) if # it fails to schedule a destination host for the evacuate. # Look at me - I'm evacuating. Oh - guess I decided not to. # Stupid nova. # Tell the instance director that the evacuate failed so it # can update any host operation that may be in progress. instance_director.instance_evacuate_complete( instance, instance.action_fsm.from_host_name, failed=True) return INSTANCE_STATE.INITIAL elif INSTANCE_EVENT.TASK_COMPLETED == event: DLOG.debug("Evacuate inprogress for %s." % instance.name) elif INSTANCE_EVENT.TASK_FAILED == event: DLOG.info("Evacuate failed for %s." % instance.name) instance.fail_action(instance.action_fsm_action_type, reason) instance_director.instance_evacuate_complete( instance, instance.action_fsm.from_host_name, failed=True) return INSTANCE_STATE.INITIAL elif INSTANCE_EVENT.TASK_TIMEOUT == event: DLOG.info("Evacuate timed out for %s." % instance.name) elif INSTANCE_EVENT.AUDIT == event: if instance.action_fsm.from_host_name != instance.host_name and \ not instance.is_rebuilding(): instance_director.instance_evacuate_complete( instance, instance.action_fsm.from_host_name) return INSTANCE_STATE.INITIAL elif not (instance.task.inprogress() or instance.is_rebuilding()): if 0 == instance.action_fsm.wait_time: instance.action_fsm.wait_time \ = timers.get_monotonic_timestamp_in_ms() now_ms = timers.get_monotonic_timestamp_in_ms() secs_expired = (now_ms - instance.action_fsm.wait_time) / 1000 if 120 <= secs_expired: instance.fail_action(instance.action_fsm_action_type, 'timeout') instance_director.instance_evacuate_complete( instance, instance.action_fsm.from_host_name, failed=False, timed_out=True) return INSTANCE_STATE.INITIAL else: now_ms = timers.get_monotonic_timestamp_in_ms() secs_expired = (now_ms - instance.action_fsm.start_time) / 1000 if instance.max_evacuate_wait_in_secs <= secs_expired: instance.fail_action(instance.action_fsm_action_type, 'timeout') instance_director.instance_evacuate_complete( instance, instance.action_fsm.from_host_name, failed=False, timed_out=True) return INSTANCE_STATE.INITIAL elif instance.task.timed_out(): instance.fail_action(instance.action_fsm_action_type, 'timeout') instance_director.instance_evacuate_complete( instance, instance.action_fsm.from_host_name, failed=False, timed_out=True) return INSTANCE_STATE.INITIAL else: DLOG.verbose("Ignoring %s event for %s." % (event, instance.name)) return self.name
def handle_event(self, instance, event, event_data=None): """ Handle event while in the cold migrate state """ from nfv_vim import directors instance_director = directors.get_instance_director() if event_data is not None: reason = event_data.get('reason', '') else: reason = '' if instance.task.inprogress(): if instance.task.handle_event(event, event_data): return self.name if INSTANCE_EVENT.TASK_STOP == event: return INSTANCE_STATE.INITIAL elif INSTANCE_EVENT.NFVI_RESIZED == event: from_host_name = instance.action_fsm.from_host_name instance_director.instance_migrate_complete( instance, from_host_name) return INSTANCE_STATE.COLD_MIGRATE_CONFIRM elif INSTANCE_EVENT.TASK_COMPLETED == event: if instance.action_fsm is not None: action_data = instance.action_fsm_data if action_data is not None: if action_data.initiated_from_cli(): DLOG.debug("Cold-Migrate complete for %s, initiated " "from cli." % instance.name) return INSTANCE_STATE.INITIAL DLOG.debug("Cold-Migrate inprogress for %s." % instance.name) elif INSTANCE_EVENT.TASK_FAILED == event: DLOG.info("Cold-Migrate failed for %s." % instance.name) instance.fail_action(instance.action_fsm_action_type, reason) from_host_name = instance.action_fsm.from_host_name instance_director.instance_migrate_complete(instance, from_host_name, failed=True) return INSTANCE_STATE.INITIAL elif INSTANCE_EVENT.TASK_TIMEOUT == event: DLOG.info("Cold-Migrate timed out for %s." % instance.name) elif INSTANCE_EVENT.AUDIT == event: if not (instance.task.inprogress() or instance.is_resizing()): if 0 == instance.action_fsm.wait_time: instance.action_fsm.wait_time \ = timers.get_monotonic_timestamp_in_ms() now_ms = timers.get_monotonic_timestamp_in_ms() secs_expired = (now_ms - instance.action_fsm.wait_time) / 1000 if 60 <= secs_expired: instance.fail_action(instance.action_fsm_action_type, 'timeout') instance_director.instance_evacuate_complete( instance, instance.action_fsm.from_host_name, failed=False, timed_out=True) return INSTANCE_STATE.INITIAL else: now_ms = timers.get_monotonic_timestamp_in_ms() secs_expired = (now_ms - instance.action_fsm.start_time) / 1000 if instance.max_cold_migrate_wait_in_secs <= secs_expired: instance.fail_action(instance.action_fsm_action_type, 'timeout') instance_director.instance_migrate_complete( instance, instance.action_fsm.from_host_name, failed=False, timed_out=True) return INSTANCE_STATE.INITIAL elif instance.task.timed_out(): instance.fail_action(instance.action_fsm_action_type, 'timeout') instance_director.instance_migrate_complete( instance, instance.action_fsm.from_host_name, failed=False, timed_out=True) return INSTANCE_STATE.INITIAL else: DLOG.verbose("Ignoring %s event for %s." % (event, instance.name)) return self.name