def run(self): update_diag(sync_start=time.time(), backend_status="0 - Synchronizer Start") # start the openstack observer observer = XOSObserver() observer_thread = threading.Thread(target=observer.run,name='synchronizer') observer_thread.start() # start model policies thread observer_name = getattr(Config(), "observer_name", "") if (not observer_name): model_policy_thread = threading.Thread(target=run_policy) model_policy_thread.start() else: model_policy_thread = None print "Skipping model policies thread for service observer." # start event listene #event_manager = EventListener(wake_up=observer.wake_up) #event_manager_thread = threading.Thread(target=event_manager.run) #event_manager_thread.start() while True: try: time.sleep(1000) except KeyboardInterrupt: print "exiting due to keyboard interrupt" # TODO: See about setting the threads as daemons observer_thread._Thread__stop() if model_policy_thread: model_policy_thread._Thread__stop() sys.exit(1)
def run(self): update_diag(sync_start=time.time(), backend_status="0 - Synchronizer Start") # start the observer observer = XOSObserver() observer_thread = threading.Thread(target=observer.run, name='synchronizer') observer_thread.start() # start model policies thread policies_dir = getattr(Config(), "observer_model_policies_dir", None) if policies_dir: from synchronizers.model_policy import run_policy model_policy_thread = threading.Thread(target=run_policy) model_policy_thread.start() else: model_policy_thread = None logger.info( "Skipping model policies thread due to no model_policies dir.") while True: try: time.sleep(1000) except KeyboardInterrupt: print "exiting due to keyboard interrupt" # TODO: See about setting the threads as daemons observer_thread._Thread__stop() if model_policy_thread: model_policy_thread._Thread__stop() sys.exit(1)
def run(self): update_diag(sync_start=time.time(), backend_status="0 - Synchronizer Start") # start the openstack observer observer = XOSObserver() observer_thread = threading.Thread(target=observer.run, name='synchronizer') observer_thread.start() # start model policies thread observer_name = getattr(Config(), "observer_name", "") if (not observer_name): model_policy_thread = threading.Thread(target=run_policy) model_policy_thread.start() else: model_policy_thread = None print "Skipping model policies thread for service observer." # start event listene #event_manager = EventListener(wake_up=observer.wake_up) #event_manager_thread = threading.Thread(target=event_manager.run) #event_manager_thread.start() while True: try: time.sleep(1000) except KeyboardInterrupt: print "exiting due to keyboard interrupt" # TODO: See about setting the threads as daemons observer_thread._Thread__stop() if model_policy_thread: model_policy_thread._Thread__stop() sys.exit(1)
def update_diag(self, loop_end=None, loop_start=None, syncrecord_start=None, sync_start=None, backend_status=None): if self.has_model_class("Diag"): return update_diag(self.get_model_class("Diag"), loop_end, loop_start, syncrecord_start, sync_start, backend_status)
def call(self, failed=[], deletion=False): #if ('Instance' in self.__class__.__name__): # pdb.set_trace() pending = self.fetch_pending(deletion) for o in pending: # another spot to clean up debug state try: reset_queries() except: # this shouldn't happen, but in case it does, catch it... logger.log_exc("exception in reset_queries",extra=o.tologdict()) sync_failed = False try: backoff_disabled = Config().observer_backoff_disabled except: backoff_disabled = 0 try: scratchpad = json.loads(o.backend_register) if (scratchpad): next_run = scratchpad['next_run'] if (not backoff_disabled and next_run>time.time()): sync_failed = True except: logger.log_exc("Exception while loading scratchpad",extra=o.tologdict()) pass if (not sync_failed): try: for f in failed: self.check_dependencies(o,f) # Raises exception if failed if (deletion): self.delete_record(o) o.delete(purge=True) else: new_enacted = timezone.now() try: run_always = self.run_always except AttributeError: run_always = False self.sync_record(o) # if (not run_always): # o.enacted = new_enacted update_diag(syncrecord_start = time.time(), backend_status="1 - Synced Record") o.enacted = new_enacted scratchpad = {'next_run':0, 'exponent':0, 'last_success':time.time()} o.backend_register = json.dumps(scratchpad) o.backend_status = "1 - OK" o.save(update_fields=['enacted','backend_status','backend_register']) except (InnocuousException,Exception,DeferredException) as e: logger.log_exc("sync step failed!",extra=o.tologdict()) try: if (o.backend_status.startswith('2 - ')): str_e = '%s // %r'%(o.backend_status[4:],e) str_e = elim_dups(str_e) else: str_e = '%r'%e except: str_e = '%r'%e try: error = self.error_map.map(str_e) except: error = '%s'%str_e if isinstance(e, InnocuousException): o.backend_status = '1 - %s'%error else: o.backend_status = '2 - %s'%error try: scratchpad = json.loads(o.backend_register) scratchpad['exponent'] except: logger.log_exc("Exception while updating scratchpad",extra=o.tologdict()) scratchpad = {'next_run':0, 'exponent':0, 'last_success':time.time(),'failures':0} # Second failure if (scratchpad['exponent']): if isinstance(e,DeferredException): delay = scratchpad['exponent'] * 60 # 1 minute else: delay = scratchpad['exponent'] * 600 # 10 minutes # cap delays at 8 hours if (delay>8*60*60): delay=8*60*60 scratchpad['next_run'] = time.time() + delay try: scratchpad['exponent']+=1 except: scratchpad['exponent']=1 try: scratchpad['failures']+=1 except KeyError: scratchpad['failures']=1 scratchpad['last_failure']=time.time() o.backend_register = json.dumps(scratchpad) # TOFIX: # DatabaseError: value too long for type character varying(140) if (o.pk): try: o.backend_status = o.backend_status[:1024] o.save(update_fields=['backend_status','backend_register','updated']) except: print "Could not update backend status field!" pass sync_failed = True if (sync_failed): failed.append(o) return failed
def update_diag(self, loop_end=None, loop_start=None, syncrecord_start=None, sync_start=None, backend_status=None, backend_code=0): if self.has_model_class("Diag"): return update_diag(self.get_model_class("Diag"), loop_end, loop_start, syncrecord_start, sync_start, backend_status,backend_code=0)
def call(self, failed=[], deletion=False): #if ('Instance' in self.__class__.__name__): # pdb.set_trace() pending = self.fetch_pending(deletion) for o in pending: # another spot to clean up debug state try: reset_queries() except: # this shouldn't happen, but in case it does, catch it... logger.log_exc("exception in reset_queries", extra=o.tologdict()) sync_failed = False try: backoff_disabled = Config().observer_backoff_disabled except: backoff_disabled = 0 try: scratchpad = json.loads(o.backend_register) if (scratchpad): next_run = scratchpad['next_run'] if (not backoff_disabled and next_run > time.time()): sync_failed = True except: logger.log_exc("Exception while loading scratchpad", extra=o.tologdict()) pass if (not sync_failed): try: for f in failed: self.check_dependencies( o, f) # Raises exception if failed if (deletion): if getattr(o, "backend_need_reap", False): # the object has already been deleted and marked for reaping journal_object( o, "syncstep.call.already_marked_reap") else: journal_object(o, "syncstep.call.delete_record") self.delete_record(o) journal_object(o, "syncstep.call.delete_set_reap") o.backend_need_reap = True o.save(update_fields=['backend_need_reap']) #o.delete(purge=True) else: new_enacted = timezone.now() try: run_always = self.run_always except AttributeError: run_always = False # Mark this as an object that will require delete. Do # this now rather than after the syncstep, if not (o.backend_need_delete): o.backend_need_delete = True o.save(update_fields=['backend_need_delete']) journal_object(o, "syncstep.call.sync_record") self.sync_record(o) # if (not run_always): # o.enacted = new_enacted update_diag(syncrecord_start=time.time(), backend_status="1 - Synced Record") o.enacted = new_enacted scratchpad = { 'next_run': 0, 'exponent': 0, 'last_success': time.time() } o.backend_register = json.dumps(scratchpad) o.backend_status = "1 - OK" journal_object(o, "syncstep.call.save_update") o.save(update_fields=[ 'enacted', 'backend_status', 'backend_register' ]) except (InnocuousException, Exception, DeferredException) as e: logger.log_exc("sync step failed!", extra=o.tologdict()) try: if (o.backend_status.startswith('2 - ')): str_e = '%s // %r' % (o.backend_status[4:], e) str_e = elim_dups(str_e) else: str_e = '%r' % e except: str_e = '%r' % e try: error = self.error_map.map(str_e) except: error = '%s' % str_e if isinstance(e, InnocuousException): o.backend_status = '1 - %s' % error else: o.backend_status = '2 - %s' % error try: scratchpad = json.loads(o.backend_register) scratchpad['exponent'] except: logger.log_exc("Exception while updating scratchpad", extra=o.tologdict()) scratchpad = { 'next_run': 0, 'exponent': 0, 'last_success': time.time(), 'failures': 0 } # Second failure if (scratchpad['exponent']): if isinstance(e, DeferredException): delay = scratchpad['exponent'] * 60 # 1 minute else: delay = scratchpad['exponent'] * 600 # 10 minutes # cap delays at 8 hours if (delay > 8 * 60 * 60): delay = 8 * 60 * 60 scratchpad['next_run'] = time.time() + delay try: scratchpad['exponent'] += 1 except: scratchpad['exponent'] = 1 try: scratchpad['failures'] += 1 except KeyError: scratchpad['failures'] = 1 scratchpad['last_failure'] = time.time() o.backend_register = json.dumps(scratchpad) # TOFIX: # DatabaseError: value too long for type character varying(140) if (o.pk): try: o.backend_status = o.backend_status[:1024] o.save(update_fields=[ 'backend_status', 'backend_register', 'updated' ]) except: print "Could not update backend status field!" pass sync_failed = True if (sync_failed): failed.append(o) return failed
def run_once(self): try: self.check_db_connection_okay() loop_start = time.time() error_map_file = getattr( Config(), "error_map_path", XOS_DIR + "/error_map.txt") self.error_mapper = ErrorMapper(error_map_file) # Two passes. One for sync, the other for deletion. for deletion in [False, True]: # Set of individual objects within steps that failed self.failed_step_objects = set() # Set up conditions and step status # This is needed for steps to run in parallel # while obeying dependencies. providers = set() dependency_graph = self.dependency_graph if not deletion else self.deletion_dependency_graph for v in dependency_graph.values(): if (v): providers.update(v) self.step_conditions = {} self.step_status = {} for p in list(providers): self.step_conditions[p] = threading.Condition() self.step_status[p] = STEP_STATUS_WORKING self.failed_steps = [] threads = [] logger.debug('Deletion=%r...' % deletion) schedule = self.ordered_steps if not deletion else reversed( self.ordered_steps) for S in schedule: thread = threading.Thread( target=self.sync, name='synchronizer', args=( S, deletion)) logger.debug('Deletion=%r...' % deletion) threads.append(thread) # Start threads for t in threads: t.start() # another spot to clean up debug state try: reset_queries() except: # this shouldn't happen, but in case it does, catch it... logger.log_exc("exception in reset_queries") # Wait for all threads to finish before continuing with the run # loop for t in threads: t.join() self.save_run_times() loop_end = time.time() update_diag( loop_end=loop_end, loop_start=loop_start, backend_status="1 - Bottom Of Loop") except Exception as e: logger.error( 'Core error. This seems like a misconfiguration or bug: %r. This error will not be relayed to the user!' % e) logger.log_exc("Exception in observer run loop") traceback.print_exc() update_diag(backend_status="2 - Exception in Event Loop")