def run(self): signal.signal(signal.SIGTERM, graceful_exit) signal.signal(signal.SIGINT, graceful_exit) self._logger.info('Starting...') counter = 0 while True: # process one element from work queue task_id = task.work_unqueue(self._redis, self._service) if task_id is not None: try: self._advance_task(task_id) except RuntimeWarning: self._logger.warning( '%s: failed to acquire a lock, retrying', task_id) task.work_queue(self._redis, task_id, self._service) except Exception as e: self._logger.error('%s: %s', task_id, str(e)) with self._redis.acquire_lock(task_id): task.set_log(self._redis, self._taskfile_dir, task_id, str(e)) task.terminate(self._redis, task_id, phase="launch_error") self._logger.info(traceback.format_exc()) # every 0.01s * refresh_counter - check if we can find some free resource if counter > self._refresh_counter: # if there are some queued tasks, look for free resources if self._redis.exists('queued:%s' % self._service): self._logger.debug('checking processes on : %s', self._service) self._select_best_task_to_process( self._services[self._service]) counter = 0 counter += 1 time.sleep(self._work_cycle)
def run(self): self._logger.info('Starting worker') # Subscribe to beat expiration. pubsub = self._redis.pubsub() pubsub.psubscribe('__keyspace@0__:beat:*') pubsub.psubscribe('__keyspace@0__:queue:*') counter = 0 while True: message = pubsub.get_message() if message: channel = message['channel'] data = message['data'] if data == 'expired': self._logger.warning('received expired event on channel %s', channel) if channel.startswith('__keyspace@0__:beat:'): task_id = channel[20:] service = self._redis.hget('task:'+task_id, 'service') if service in self._services: self._logger.info('%s: task expired', task_id) with self._redis.acquire_lock(task_id): task.terminate(self._redis, task_id, phase='expired') elif channel.startswith('__keyspace@0__:queue:'): task_id = channel[21:] service = self._redis.hget('task:'+task_id, 'service') if service in self._services: task.work_queue(self._redis, task_id, service) else: for service in self._services: task_id = task.work_unqueue(self._redis, service) if task_id is not None: try: self._advance_task(task_id) except RuntimeWarning: self._logger.warning( '%s: failed to acquire a lock, retrying', task_id) task.work_queue(self._redis, task_id, service) except Exception as e: self._logger.error('%s: %s', task_id, str(e)) with self._redis.acquire_lock(task_id): task.set_log(self._redis, task_id, str(e)) task.terminate(self._redis, task_id, phase="launch_error") else: if counter > self._refresh_counter: resources = self._services[service].list_resources() for resource in resources: keyr = 'resource:%s:%s' % (service, resource) key_busy = 'busy:%s:%s' % (service, resource) key_reserved = 'reserved:%s:%s' % (service, resource) if not self._redis.exists(key_busy) and self._redis.hlen(keyr) < resources[resource]: if self._redis.exists(key_reserved) and self._redis.ttl('queue:'+self._redis.get(key_reserved))>10: self._redis.expire('queue:'+self._redis.get(key_reserved), 5) break if self._redis.exists('queued:%s' % service): resources = self._services[service].list_resources() self._logger.debug('checking processes on : %s', service) availableResource = False for resource in resources: keyr = 'resource:%s:%s' % (service, resource) key_busy = 'busy:%s:%s' % (service, resource) key_reserved = 'reserved:%s:%s' % (service, resource) if not self._redis.exists(key_busy) and self._redis.hlen(keyr) < resources[resource]: if not self._redis.exists(key_reserved): availableResource = True break if availableResource: self._logger.debug('resources available on %s - trying dequeuing', service) self._service_unqueue(self._services[service]) if counter > self._refresh_counter: counter = 0 counter += 1 time.sleep(0.01)
def post_log(task_id): content = flask.request.get_data() content = task.set_log(redis, taskfile_dir, task_id, content, max_log_size) (task_id, content) = post_function('POST/task/log', task_id, content) return flask.jsonify(200)
def run(self): self._logger.info('Starting worker') # Subscribe to beat expiration. pubsub = self._redis.pubsub() pubsub.psubscribe('__keyspace@0__:beat:*') pubsub.psubscribe('__keyspace@0__:queue:*') counter = 0 counter_beat = 1000 while True: counter_beat += 1 # every 1000 * 0.01s (10s) - check & reset beat of the worker if counter_beat > 1000: counter_beat = 0 if self._redis.exists(self._worker_id): self._redis.hset(self._worker_id, "beat_time", time.time()) self._redis.expire(self._worker_id, 1200) else: self._logger.info('stopped by key expiration/removal') sys.exit(0) # every 100 * 0.01s (1s) - check worker administration command if counter_beat % 100 == 0: workeradmin.process(self._logger, self._redis, self._service) if (self._default_config_timestamp and self._redis.hget('default', 'timestamp') != self._default_config_timestamp): self._logger.info( 'stopped by default configuration change') sys.exit(0) # process one message from the queue message = pubsub.get_message() if message: channel = message['channel'] data = message['data'] if data == 'expired': # task expired, not beat was received if channel.startswith('__keyspace@0__:beat:'): task_id = channel[20:] service = self._redis.hget('task:' + task_id, 'service') if service in self._services: self._logger.info('%s: task expired', task_id) with self._redis.acquire_lock(task_id): task.terminate(self._redis, task_id, phase='expired') # expired in the queue - comes back in the work queue elif channel.startswith('__keyspace@0__:queue:'): task_id = channel[21:] service = self._redis.hget('task:' + task_id, 'service') if service in self._services: self._logger.info('%s: move to work queue', task_id) task.work_queue(self._redis, task_id, service) # process one element from work queue task_id = task.work_unqueue(self._redis, self._service) if task_id is not None: try: self._advance_task(task_id) except RuntimeWarning: self._logger.warning( '%s: failed to acquire a lock, retrying', task_id) task.work_queue(self._redis, task_id, self._service) except Exception as e: self._logger.error('%s: %s', task_id, str(e)) with self._redis.acquire_lock(task_id): task.set_log(self._redis, self._taskfile_dir, task_id, str(e)) task.terminate(self._redis, task_id, phase="launch_error") # every 0.01s * refresh_counter - check if we can find some free resource if counter > self._refresh_counter: # if there are some queued tasks, look for free resources if self._redis.exists('queued:%s' % self._service): self._logger.debug('checking processes on : %s', self._service) self._service_unqueue(self._services[self._service]) counter = 0 counter += 1 time.sleep(0.01)
def post_log(task_id): content = flask.request.get_data() task.set_log(redis, task_id, content) return flask.jsonify(200)