def try_connect_hyp(self): if type(self.hyp_obj) is hyp: try: self.hyp_obj.conn.getLibVersion() self.hyp_obj.connected = True except: log.info('getLibVersion failed in connection testing, reconnecting to hypervisor {} from status thread' .format(self.hostname)) try: self.hyp_obj = hyp(self.hostname, user=self.user, port=self.port) self.hyp_obj.conn.getLibVersion() self.hyp_obj.connected = True return True except: log.info('reconnection to hypervisor {} in status thread fail'.format(self.hostname)) self.hyp_obj.connected = False return False else: log.info('unknown type, not hyp, reconnecting to hypervisor {} from status thread'.format(self.hostname)) try: self.hyp_obj = hyp(self.hostname, user=self.user, port=self.port) self.hyp_obj.conn.getLibVersion() self.hyp_obj.connected = True return True except: log.info('reconnection to hypervisor {} in status thread fail'.format(self.hostname)) self.hyp_obj.connected = False return False
def try_hyp_connection(hyp_id, hostname, port, user): update_hyp_status(hyp_id, 'TryConnection') log.debug('Starting trying to connect to hypervisor {} '.format(hostname)) # INFO TO DEVELOPER, VOLVER A ACTIVAR CUANDO NO FALLE LA AUTENTICACIÓN CON ALGORITMOS MODERNOS DE SSH # hyp_obj = hyp(hostname,user=user,port=port,try_ssh_autologin=True) hyp_obj = hyp(hostname, user=user, port=port, try_ssh_autologin=True) log.debug('####@@@@$$$$$$$$$$$$$$$$') log.debug('hostname: {} , reason: {}'.format( hostname, hyp_obj.fail_connected_reason)) try: reason = hyp_obj.fail_connected_reason except Exception as e: log.error('try hyp {}, error: {}'.format(hyp_id, e)) reason = 'no reason available' update_hypervisor_failed_connection(hyp_id, reason) if hyp_obj.connected is True: ok = True log.debug('hypervisor {} ready'.format(hyp_id)) update_hyp_status(hyp_id, 'ReadyToStart') hyp_obj.disconnect() else: ok = False log.error('hypervisor {} failed when trying to connect'.format(hyp_id)) log.error('fail_connected_reason: {}'.format(reason)) update_hyp_status(hyp_id, 'Error', detail=reason) return hyp_obj, ok
def hyp_from_hyp_id(hyp_id): try: host, port, user = get_hyp_hostname_from_id(hyp_id) h = hyp(host, user=user, port=port) return h except: return False
def try_connect_hyp(self): if type(self.hyp_obj) is hyp: try: self.hyp_obj.conn.getLibVersion() self.hyp_obj.connected = True except: log.info( 'getLibVersion failed in connection testing, reconnecting to hypervisor {} from status thread' .format(self.hostname)) try: self.hyp_obj = hyp(self.hostname, user=self.user, port=self.port) self.hyp_obj.conn.getLibVersion() self.hyp_obj.connected = True return True except: log.info( 'reconnection to hypervisor {} in status thread fail'. format(self.hostname)) self.hyp_obj.connected = False return False else: log.info( 'unknown type, not hyp, reconnecting to hypervisor {} from status thread' .format(self.hostname)) try: self.hyp_obj = hyp(self.hostname, user=self.user, port=self.port) self.hyp_obj.conn.getLibVersion() self.hyp_obj.connected = True return True except: log.info('reconnection to hypervisor {} in status thread fail'. format(self.hostname)) self.hyp_obj.connected = False return False
def __init__(self, id_hyp, hostname, polling_interval, rate_allowed_diff_between_samples=2.5, port=22, user='******'): self.hyp_id = id_hyp self.polling_interval = polling_interval self.rate_allowed_diff_between_samples = rate_allowed_diff_between_samples self.hostname = hostname self.user = user self.port = port self.hyp_obj = hyp(hostname, user=self.user, port=self.port) self.fifo_recent_hyp_stats = deque([], maxlen=max_len_queue_previous_hyp_stats) self.recent_domains_stats = {} hyp_stats = { }
def try_hyp_connection(hyp_id, hostname, port, user): update_hyp_status(hyp_id, 'TryConnection') log.debug('Starting trying to connect to hypervisor {} '.format(hostname)) # INFO TO DEVELOPER, VOLVER A ACTIVAR CUANDO NO FALLE LA AUTENTICACIÓN CON ALGORITMOS MODERNOS DE SSH # hyp_obj = hyp(hostname,user=user,port=port,try_ssh_autologin=True) hyp_obj = hyp(hostname, user=user, port=port, try_ssh_autologin=True) log.debug('####@@@@$$$$$$$$$$$$$$$$') log.debug('hostname: {} , reason: {}'.format( hostname, hyp_obj.fail_connected_reason)) try: reason = hyp_obj.fail_connected_reason except Exception as e: log.error('try hyp {}, error: {}'.format(hyp_id, e)) reason = 'no reason available' update_hypervisor_failed_connection(hyp_id, reason) if hyp_obj.connected is True: log.debug('hypervisor {} libvirt connection ready'.format(hyp_id)) hyp_obj.get_kvm_mod() hyp_obj.get_hyp_info() update_db_hyp_info(hyp_id, hyp_obj.info) if hyp_obj.info['kvm_module'] == 'intel' or hyp_obj.info[ 'kvm_module'] == 'amd': ok = True update_hyp_status(hyp_id, 'ReadyToStart') else: ok = False log.error( 'hypervisor {} has not virtualization support (VT-x for Intel processors and AMD-V for AMD processors). ' .format(hyp_id)) update_hyp_status( hyp_id, 'Error', detail= "KVM requires that the virtual machine host's processor has virtualization " + "support (named VT-x for Intel processors and AMD-V for AMD processors). " + "Check CPU capabilities and enable virtualization support in your BIOS." ) hyp_obj.disconnect() else: ok = False log.error('hypervisor {} failed when trying to connect'.format(hyp_id)) log.error('fail_connected_reason: {}'.format(reason)) update_hyp_status(hyp_id, 'Error', detail=reason) return hyp_obj, ok
def __init__(self, id_hyp, hostname, polling_interval, rate_allowed_diff_between_samples=2.5, port=22, user='******'): self.hyp_id = id_hyp self.polling_interval = polling_interval self.rate_allowed_diff_between_samples = rate_allowed_diff_between_samples self.hostname = hostname self.user = user self.port = port self.hyp_obj = hyp(hostname, user=self.user, port=self.port) self.fifo_recent_hyp_stats = deque( [], maxlen=max_len_queue_previous_hyp_stats) self.recent_domains_stats = {} hyp_stats = {}
def try_hyp_connection(hyp_id, hostname, port, user): update_hyp_status(hyp_id, 'TryConnection') log.debug('Starting trying to connect to hypervisor {} '.format(hostname)) # INFO TO DEVELOPER, VOLVER A ACTIVAR CUANDO NO FALLE LA AUTENTICACIÓN CON ALGORITMOS MODERNOS DE SSH # hyp_obj = hyp(hostname,user=user,port=port,try_ssh_autologin=True) hyp_obj = hyp(hostname, user=user, port=port, try_ssh_autologin=True) log.debug('####@@@@$$$$$$$$$$$$$$$$') log.debug('hostname: {} , reason: {}'.format(hostname, hyp_obj.fail_connected_reason)) try: reason = hyp_obj.fail_connected_reason except Exception as e: log.error('try hyp {}, error: {}'.format(hyp_id, e)) reason = 'no reason available' update_hypervisor_failed_connection(hyp_id, reason) if hyp_obj.connected is True: log.debug('hypervisor {} libvirt connection ready'.format(hyp_id)) hyp_obj.get_kvm_mod() hyp_obj.get_hyp_info() update_db_hyp_info(hyp_id,hyp_obj.info) if hyp_obj.info['kvm_module'] == 'intel' or hyp_obj.info['kvm_module'] == 'amd': ok = True update_hyp_status(hyp_id, 'ReadyToStart') else: ok = False log.error('hypervisor {} has not virtualization support (VT-x for Intel processors and AMD-V for AMD processors). '.format(hyp_id)) update_hyp_status(hyp_id, 'Error', detail="KVM requires that the virtual machine host's processor has virtualization " + "support (named VT-x for Intel processors and AMD-V for AMD processors). " + "Check CPU capabilities and enable virtualization support in your BIOS.") hyp_obj.disconnect() else: ok = False log.error('hypervisor {} failed when trying to connect'.format(hyp_id)) log.error('fail_connected_reason: {}'.format(reason)) update_hyp_status(hyp_id, 'Error', detail=reason) return hyp_obj, ok
def run(self): self.tid = get_tid() logs.workers.info('starting thread: {} (TID {})'.format(self.name, self.tid)) host, port, user = get_hyp_hostname_from_id(self.hyp_id) port = int(port) self.hostname = host self.h = hyp(self.hostname, user=user, port=port) # self.h.get_kvm_mod() # self.h.get_hyp_info() update_db_hyp_info(self.hyp_id, self.h.info) hyp_id = self.hyp_id while self.stop is not True: try: # do={type:'start_domain','xml':'xml','id_domain'='prova'} action = self.queue_actions.get(timeout=TIMEOUT_QUEUES) logs.workers.debug('received action in working thread {}'.format(action['type'])) if action['type'] == 'start_paused_domain': logs.workers.debug('xml to start paused some lines...: {}'.format(action['xml'][30:100])) try: self.h.conn.createXML(action['xml'], flags=VIR_DOMAIN_START_PAUSED) # 32 is the constant for domains paused # reference: https://libvirt.org/html/libvirt-libvirt-domain.html#VIR_CONNECT_LIST_DOMAINS_PAUSED FLAG_LIST_DOMAINS_PAUSED = 32 list_all_domains = self.h.conn.listAllDomains(FLAG_LIST_DOMAINS_PAUSED) list_names_domains = [d.name() for d in list_all_domains] dict_domains = dict(zip(list_names_domains,list_all_domains)) if action['id_domain'] in list_names_domains: # domain started in pause mode domain = dict_domains[action['id_domain']] domain_active = True try: domain.isActive() domain.destroy() try: domain.isActive() except Exception as e: logs.workers.debug('verified domain {} is destroyed'.format(action['id_domain'])) domain_active = False except libvirtError as e: from pprint import pformat error_msg = pformat(e.get_error_message()) update_domain_status('FailedCreatingDomain', action['id_domain'], hyp_id=self.hyp_id, detail='domain {} failed when try to destroy from paused domain in hypervisor {}. creating domain operation is aborted') logs.workers.error( 'Exception in libvirt starting paused xml for domain {} in hypervisor {}. Exception message: {} '.format( action['id_domain'], self.hyp_id, error_msg)) continue if domain_active is False: # domain is destroyed, all ok update_domain_status('CreatingDomain', action['id_domain'], hyp_id='', detail='Domain created and test OK: Started, paused and now stopped in hyp {}'.format(self.hyp_id)) logs.workers.debug( 'domain {} creating operation finalished. Started paused and destroyed in hypervisor {}. Now status is Stopped. READY TO USE'.format( action['id_domain'], self.hyp_id)) else: update_domain_status('Crashed', action['id_domain'], hyp_id=self.hyp_id, detail='Domain is created, started in pause mode but not destroyed,creating domain operation is aborted') logs.workers.error( 'domain {} started paused but not destroyed in hypervisor {}, must be destroyed'.format( action['id_domain'], self.hyp_id)) else: update_domain_status('Crashed', action['id_domain'], hyp_id=self.hyp_id, detail='XML for domain {} can not start in pause mode in hypervisor {}, creating domain operation is aborted by unknown cause'.format( action['id_domain'], self.hyp_id)) logs.workers.error( 'XML for domain {} can not start in pause mode in hypervisor {}, creating domain operation is aborted, not exception, rare case, unknown cause'.format( action['id_domain'], self.hyp_id)) except libvirtError as e: from pprint import pformat error_msg = pformat(e.get_error_message()) update_domain_status('FailedCreatingDomain', action['id_domain'], hyp_id=self.hyp_id, detail='domain {} failed when try to start in pause mode in hypervisor {}. creating domain operation is aborted') logs.workers.error( 'Exception in libvirt starting paused xml for domain {} in hypervisor {}. Exception message: {} '.format( action['id_domain'], self.hyp_id, error_msg)) except Exception as e: update_domain_status('Crashed', action['id_domain'], hyp_id=self.hyp_id, detail='domain {} failed when try to start in pause mode in hypervisor {}. creating domain operation is aborted') logs.workers.error( 'Exception starting paused xml for domain {} in hypervisor {}. NOT LIBVIRT EXCEPTION, RARE CASE. Exception message: {}'.format( action['id_domain'], self.hyp_id, str(e))) ## START DOMAIN elif action['type'] == 'start_domain': logs.workers.debug('xml to start some lines...: {}'.format(action['xml'][30:100])) try: self.h.conn.createXML(action['xml']) # wait to event started to save state in database #update_domain_status('Started', action['id_domain'], hyp_id=self.hyp_id, detail='Domain has started in worker thread') logs.workers.debug('STARTED domain {}: createdXML action in hypervisor {} has been sent'.format( action['id_domain'], host)) except libvirtError as e: update_domain_status('Failed', action['id_domain'], hyp_id=self.hyp_id, detail=("Hypervisor can not create domain with libvirt exception: " + str(e))) logs.workers.debug('exception in starting domain {}: '.format(e)) except Exception as e: update_domain_status('Failed', action['id_domain'], hyp_id=self.hyp_id, detail=("Exception when starting domain: " + str(e))) logs.workers.debug('exception in starting domain {}: '.format(e)) ## STOP DOMAIN elif action['type'] == 'stop_domain': logs.workers.debug('action stop domain: {}'.format(action['id_domain'][30:100])) try: self.h.conn.lookupByName(action['id_domain']).destroy() logs.workers.debug('STOPPED domain {}'.format(action['id_domain'])) check_if_delete = action.get('delete_after_stopped',False) if check_if_delete is True: update_domain_status('Stopped', action['id_domain'], hyp_id='') update_domain_status('Deleting', action['id_domain'], hyp_id='') else: update_domain_status('Stopped', action['id_domain'], hyp_id='') except Exception as e: update_domain_status('Failed', action['id_domain'], hyp_id=self.hyp_id, detail=str(e)) logs.workers.debug('exception in stopping domain {}: '.format(e)) elif action['type'] in ['create_disk', 'delete_disk']: launch_action_disk(action, self.hostname, user, port) elif action['type'] in ['add_media_hot']: pass elif action['type'] in ['killall_curl']: launch_killall_curl(self.hostname, user, port) elif action['type'] in ['delete_media']: final_status = action.get('final_status','Deleted') launch_delete_media (action, self.hostname, user, port, final_status=final_status) # ## DESTROY THREAD # elif action['type'] == 'destroy_thread': # list_works_in_queue = list(self.queue_actions.queue) # if self.queue_master is not None: # self.queue_master.put(['destroy_working_thread',self.hyp_id,list_works_in_queue]) # #INFO TO DEVELOPER, si entra aquí es porque no quedaba nada en cola, si no ya lo habrán matado antes # # logs.workers.error('thread worker from hypervisor {} exit from error status'.format(hyp_id)) # # raise 'destoyed' elif action['type'] == 'create_disk': pass elif action['type'] == 'hyp_info': self.h.get_hyp_info() logs.workers.debug('hypervisor motherboard: {}'.format(self.h.info['motherboard_manufacturer'])) ## DESTROY THREAD elif action['type'] == 'stop_thread': self.stop = True else: logs.workers.error('type action {} not supported in queue actions'.format(action['type'])) # time.sleep(0.1) ## TRY DOMAIN except queue.Empty: try: self.h.conn.getLibVersion() pass # logs.workers.debug('hypervisor {} is alive'.format(host)) except: logs.workers.info('trying to reconnect hypervisor {}, alive test in working thread failed'.format(host)) alive = False for i in range(RETRIES_HYP_IS_ALIVE): try: time.sleep(TIMEOUT_BETWEEN_RETRIES_HYP_IS_ALIVE) self.h.conn.getLibVersion() alive = True logs.workers.info('hypervisor {} is alive'.format(host)) break except: logs.workers.info('hypervisor {} is NOT alive'.format(host)) if alive is False: try: self.h.connect_to_hyp() self.h.conn.getLibVersion() update_hyp_status(self.hyp_id, 'Online') except: logs.workers.debug('hypervisor {} failed'.format(host)) logs.workers.error('fail reconnecting to hypervisor {} in working thread'.format(host)) reason = self.h.fail_connected_reason update_hyp_status(self.hyp_id, 'Error', reason) update_domains_started_in_hyp_to_unknown(self.hyp_id) list_works_in_queue = list(self.queue_actions.queue) if self.queue_master is not None: self.queue_master.put(['error_working_thread', self.hyp_id, list_works_in_queue]) logs.workers.error('thread worker from hypervisor {} exit from error status'.format(hyp_id)) self.active = False break
def polling(self): while self.stop is not True: interval = 0.0 while interval < self.polling_interval: sleep(0.1) interval += 0.1 if self.stop is True: break if self.manager.check_actions_domains_enabled() is False: continue l = get_domains_with_transitional_status() list_domains_without_hyp = [ d for d in l if 'hyp_started' not in d.keys() ] list_domains = [d for d in l if 'hyp_started' in d.keys()] for d in list_domains_without_hyp: logs.broom.error( 'DOMAIN {} WITH STATUS {} without HYPERVISOR'.format( d['id'], d['status'])) update_domain_status( 'Unknown', d['id'], detail='starting or stoping status witouth hypervisor') hyps_to_try = set( [d['hyp_started'] for d in list_domains if d is str]) hyps_domain_started = {} for hyp_id in hyps_to_try: try: hostname, port, user = get_hyp_hostname_from_id(hyp_id) if hostname is False: logs.broom.error( 'hyp {} with id has not hostname or is nos in database' .format(hyp_id)) else: h = hyp(hostname, user=user, port=port) if h.connected: hyps_domain_started[hyp_id] = {} hyps_domain_started[hyp_id]['hyp'] = h list_domains_from_hyp = h.get_domains() if list_domains_from_hyp is None: list_domains_from_hyp = [] hyps_domain_started[hyp_id][ 'active_domains'] = list_domains_from_hyp else: logs.broom.error( 'HYPERVISOR {} libvirt connection failed') hyps_domain_started[hyp_id] = False except Exception as e: logs.broom.error( 'Exception when try to hypervisor {}: {}'.format( hyp_id, e)) logs.broom.error('Traceback: {}'.format( traceback.format_exc())) for d in list_domains_without_hyp: domain_id = d['id'] status = d['status'] if status == 'Stopping': logs.broom.debug( 'DOMAIN: {} STATUS STOPPING WITHOUTH HYPERVISOR, UNKNOWN REASON' .format(domain_id)) update_domain_status( 'Stopped', domain_id, detail= 'Stopped by broom thread because has not hypervisor') for d in list_domains: domain_id = d['id'] status = d['status'] hyp_started = d['hyp_started'] if type(hyp_started) is bool: continue if len(hyp_started) == 0: continue # TODO bug sometimes hyp_started not in hyps_domain_started keys... why? if hyp_started in hyps_domain_started.keys( ) and len(hyp_started) > 0: if hyps_domain_started[hyp_started] is not False: if status == 'Starting': logs.broom.debug( 'DOMAIN: {} STATUS STARTING TO RUN IN HYPERVISOR: {}' .format(domain_id, hyp_started)) # try: # if domain_id in hyps_domain_started[hyp_started]['active_domains']: # print(domain_id) # except Exception as e: # logs.broom.error(e) if domain_id in hyps_domain_started[hyp_started][ 'active_domains']: logs.broom.debug( 'DOMAIN: {} ACTIVE IN HYPERVISOR: {}'. format(domain_id, hyp_started)) state_libvirt = hyps_domain_started[ hyp_started]['hyp'].domains[ domain_id].state() state_str, cause = state_and_cause_to_str( state_libvirt[0], state_libvirt[1]) status = dict_domain_libvirt_state_to_isard_state( state_str) logs.broom.debug( 'DOMAIN: {} ACTIVE IN HYPERVISOR: {} WITH STATUS: {}' .format(domain_id, hyp_started, status)) update_domain_hyp_started( domain_id, hyp_started) else: logs.broom.debug( 'DOMAIN: {} NOT ACTIVE YET IN HYPERVISOR: {} ' .format(domain_id, hyp_started)) elif status == 'Stopping': logs.broom.debug( 'DOMAIN: {} STATUS STOPPING IN HYPERVISOR: {}'. format(domain_id, hyp_started)) if domain_id not in hyps_domain_started[ hyp_started]['active_domains']: update_domain_status( 'Stopped', domain_id, detail='Stopped by broom thread') else: logs.broom.debug( 'DOMAIN: {} NOT ACTIVE YET IN HYPERVISOR: {} '. format(domain_id, hyp_started)) else: if len(hyps_domain_started) > 0: logs.broom.error( 'hyp_started: {} NOT IN hyps_domain_started keys:'. format(hyp_started))
def polling(self): while self.stop is not True: interval = 0.0 while interval < self.polling_interval: sleep(0.1) interval += 0.1 if self.stop is True: break if self.manager.check_actions_domains_enabled() is False: continue l = get_domains_with_transitional_status() list_domains_without_hyp = [d for d in l if 'hyp_started' not in d.keys()] list_domains = [d for d in l if 'hyp_started' in d.keys()] for d in list_domains_without_hyp: logs.broom.error('DOMAIN {} WITH STATUS {} without HYPERVISOR'.format(d['id'], d['status'])) update_domain_status('Unknown', d['id'], detail='starting or stoping status witouth hypervisor') hyps_to_try = set([d['hyp_started'] for d in list_domains if d is str]) hyps_domain_started = {} for hyp_id in hyps_to_try: try: hostname, port, user = get_hyp_hostname_from_id(hyp_id) if hostname is False: logs.broom.error('hyp {} with id has not hostname or is nos in database'.format(hyp_id)) else: h = hyp(hostname, user=user, port=port) if h.connected: hyps_domain_started[hyp_id] = {} hyps_domain_started[hyp_id]['hyp'] = h list_domains_from_hyp = h.get_domains() if list_domains_from_hyp is None: list_domains_from_hyp = [] hyps_domain_started[hyp_id]['active_domains'] = list_domains_from_hyp else: logs.broom.error('HYPERVISOR {} libvirt connection failed') hyps_domain_started[hyp_id] = False except Exception as e: logs.broom.error('Exception when try to hypervisor {}: {}'.format(hyp_id, e)) logs.broom.error('Traceback: {}'.format(traceback.format_exc())) for d in list_domains_without_hyp: domain_id = d['id'] status = d['status'] if status == 'Stopping': logs.broom.debug('DOMAIN: {} STATUS STOPPING WITHOUTH HYPERVISOR, UNKNOWN REASON'.format(domain_id)) update_domain_status('Stopped', domain_id, detail='Stopped by broom thread because has not hypervisor') for d in list_domains: domain_id = d['id'] status = d['status'] hyp_started = d['hyp_started'] if type(hyp_started) is bool: continue if len(hyp_started) == 0: continue # TODO bug sometimes hyp_started not in hyps_domain_started keys... why? if hyp_started in hyps_domain_started.keys() and len(hyp_started) > 0: if hyps_domain_started[hyp_started] is not False: if status == 'Starting': logs.broom.debug( 'DOMAIN: {} STATUS STARTING TO RUN IN HYPERVISOR: {}'.format(domain_id, hyp_started)) # try: # if domain_id in hyps_domain_started[hyp_started]['active_domains']: # print(domain_id) # except Exception as e: # logs.broom.error(e) if domain_id in hyps_domain_started[hyp_started]['active_domains']: logs.broom.debug('DOMAIN: {} ACTIVE IN HYPERVISOR: {}'.format(domain_id, hyp_started)) state_libvirt = hyps_domain_started[hyp_started]['hyp'].domains[domain_id].state() state_str, cause = state_and_cause_to_str(state_libvirt[0], state_libvirt[1]) status = dict_domain_libvirt_state_to_isard_state(state_str) logs.broom.debug( 'DOMAIN: {} ACTIVE IN HYPERVISOR: {} WITH STATUS: {}'.format(domain_id, hyp_started, status)) update_domain_hyp_started(domain_id, hyp_started) else: logs.broom.debug('DOMAIN: {} NOT ACTIVE YET IN HYPERVISOR: {} '.format(domain_id, hyp_started)) elif status == 'Stopping': logs.broom.debug('DOMAIN: {} STATUS STOPPING IN HYPERVISOR: {}'.format(domain_id, hyp_started)) if domain_id not in hyps_domain_started[hyp_started]['active_domains']: update_domain_status('Stopped', domain_id, detail='Stopped by broom thread') else: logs.broom.debug('DOMAIN: {} NOT ACTIVE YET IN HYPERVISOR: {} '.format(domain_id, hyp_started)) else: if len(hyps_domain_started) > 0: logs.broom.error('hyp_started: {} NOT IN hyps_domain_started keys:'.format(hyp_started))