def check_bridge(self): """ Check containers threads, restart if some failed """ if self.bridge.ready and self.bridge.exception: context.log.debug('bridge exception: %s' % self.bridge.exception) self.bridge = gevent.spawn(Bridge().start)
def run(self): # get correct pid context.set_pid() # set thread name current_thread().name = 'supervisor' # get initial config from cloud self.talk_to_cloud(initial=True) # init object managers self.init_object_managers() if not self.object_managers: context.log.error('no object managers configured, stopping') return # run bridge manager self.bridge_object = Bridge() self.bridge = spawn(self.bridge_object.start) # main cycle while True: time.sleep(5.0) if not self.is_running: break try: context.inc_action_id() for object_manager_name in self.object_manager_order: object_manager = self.object_managers[object_manager_name] object_manager.run() try: if context.objects.root_object: context.inc_action_id() self.talk_to_cloud( root_object=context.objects.root_object.definition) else: pass # context.default_log.debug('No root object defined during supervisor main run') except AmplifyCriticalException: pass self.check_bridge() except OSError as e: if e.errno == 12: # OSError errno 12 is a memory error (unable to allocate, out of memory, etc.) context.log.error('OSError: [Errno %s] %s' % (e.errno, e.message), exc_info=True) continue else: raise e
def run(self): # get correct pid context.set_pid() # set thread name current_thread().name = 'supervisor' # get initial config from cloud self.talk_to_cloud(initial=True) # init object managers self.init_object_managers() # load ext managers self.load_ext_managers() if not self.object_managers: context.log.error('no object managers configured, stopping') return # run bridge manager self.bridge_object = Bridge() self.bridge = spawn(self.bridge_object.start) # register exit handlers atexit.register(self.stop_everything) atexit.register(self.bridge_object.flush_metrics) # main cycle while True: time.sleep(5.0) # stop if was running in debug mode for more than five minutes if self.debug_mode: elapsed_time = int(time.time()) - self.start_time if elapsed_time > self.debug_mode_time: self.stop() else: print "Agent is running in debug mode, %s seconds to go..." % ( self.debug_mode_time - elapsed_time) if not self.is_running: break try: context.inc_action_id() # run internal object managers for object_manager_name in self.object_manager_order: object_manager = self.object_managers[object_manager_name] object_manager.run() # run external object managers external_object_managers = filter( lambda x: x not in self.object_manager_order, self.object_managers.keys()) for object_manager_name in external_object_managers: object_manager = self.object_managers[object_manager_name] object_manager.run() # manage external regular managers self.manage_external_managers() # talk to cloud try: if context.objects.root_object: if context.objects.root_object.definition and context.objects.root_object.definition_healthy: context.inc_action_id() self.talk_to_cloud(root_object=context.objects. root_object.definition) else: context.log.error( 'Problem with root object definition, agent stopping' ) self.stop() else: pass # context.default_log.debug('No root object defined during supervisor main run') except AmplifyCriticalException: pass self.check_bridge() except OSError as e: if e.errno == 12: # OSError errno 12 is a memory error (unable to allocate, out of memory, etc.) context.log.error('OSError: [Errno %s] %s' % (e.errno, e.message), exc_info=True) continue else: raise e