def save(self, worker_uuid, task_uuid, key, value): logging.getLogger(log_label(self)).info(self.storage.root.keys()) for i in range(3): self.storage.abort() try: db_path = '/worker_uuid_by_task_uuid/%s' % task_uuid self.storage[db_path] = value db_path = '/task_by_uuid/%s/%s' % (task_uuid, key) self.storage[db_path] = value if task_uuid not in self.storage.root.setdefault( 'datetime_by_task_uuid', PersistentOrderedDict()): db_path = '/datetime_by_task_uuid/%s' % task_uuid now = datetime.now() self.storage[db_path] = now task_uuid_by_datetime = self.storage.root.setdefault( 'task_uuid_by_datetime', durus.btree.BTree()) task_uuid_by_datetime[now] = task_uuid task_uuids_by_label = self.storage.root.setdefault( 'task_uuids_by_label', PersistentOrderedDict()) if key == '__label__': logging.getLogger(log_label(self)).warning('key=%s value=%s', key, value) label = value uuids = task_uuids_by_label.setdefault(label, PersistentSet()) if task_uuid not in uuids: logging.getLogger(log_label(self)).warning( 'adding %s to task_uuids_by_label["%s"]', task_uuid, label) uuids.add(task_uuid) self.storage.root._p_note_change() self.storage.commit() return except: pass raise
def rpc__store(self, env, uuid, key, value, task_uuid=None, serialization=SERIALIZE__NONE): if task_uuid is None and uuid in self.task_by_worker: task_uuid = self.task_by_worker[uuid] if task_uuid is not None: message = '[%s] uuid=%s, %s=%s' % (log_label(self), uuid, key, value) logging.getLogger(log_label(self)).info(message) self.publish(env, uuid, task_uuid, 'store', serialization, key, value) return message
def _rpc__std_base(self, message, data, stream_name): std_path = self._message_data_dir(message).joinpath(stream_name) std = std_path.open('a') std.write(data) std.close() logging.getLogger(log_label(self)).info('append %s to : %s', stream_name, std_path)
def sub__store(self, message, serialization, key, value): if serialization == 'SERIALIZE__PICKLE': value = pickle.loads(value) elif serialization == 'SERIALIZE__JSON': value = jsonapi.loads(value) self.save(message.worker_uuid, message.task_uuid, key, value) logging.getLogger(log_label(self)).debug('key=%s value=%s', key, value)
def create_worker(self, env, worker_uuid, *args, **kwargs): ''' Create worker subprocess. ''' import os from subprocess import Popen, PIPE import platform if not hasattr(self, '_worker_subprocesses'): self._worker_subprocesses = OrderedDict() if worker_uuid in self._worker_subprocesses: p = self._worker_subprocesses[worker_uuid] if p.poll() is not None: # Worker with same name existed, but has finished. try: p.terminate() except OSError: pass finally: del self._worker_subprocesses[worker_uuid] else: # Worker already exists, so return `None` logging.getLogger(log_label(self)).info(('worker already exists:', worker_uuid)) return None logging.getLogger(log_label(self)).info((worker_uuid, args, kwargs)) env = os.environ env.update(kwargs.pop('env', {})) supervisor_connect_uri = self.uris['rpc'].replace(r'tcp://*', r'tcp://%s' % (platform.node())) command = ('bash -c "' '. /usr/local/bin/virtualenvwrapper.sh &&' 'workon zmq_job_manager && ' 'cdvirtualenv && ' 'mkdir -p worker_envs/%(uuid)s && ' 'cd worker_envs/%(uuid)s && ' 'python -m zmq_job_manager.worker %(uri)s %(uuid)s"' % {'uuid': worker_uuid, 'uri': supervisor_connect_uri} ) self._worker_subprocesses[worker_uuid] = Popen(command, shell=True, env=env, stdout=PIPE, stderr=PIPE) logging.getLogger(log_label(self)).info('started worker subprocess for:' ' %s\n%s', worker_uuid, command) return worker_uuid
def reset_heartbeat(self, value=None): old_value = getattr(self, '_starting_heartbeat_count', None) if value is None: if old_value is None: raise ValueError, 'No initial value provided for heartbeat count.' value = old_value logging.getLogger(log_label(self)).debug('reset_heartbeat %s', value) self._heartbeat_count = value self._starting_heartbeat_count = value
def rpc__create_worker(self, env, multipart_message, uuid, *args, **kwargs): ''' Create a new worker and return the worker's uuid. ''' new_worker_uuid = kwargs.pop('worker_uuid', None) if new_worker_uuid is None: new_worker_uuid = str(uuid1()) message = '[%s] create worker (%s)' % (datetime.now(), new_worker_uuid) result = self.create_worker(env, new_worker_uuid, *args, **kwargs) if result: logging.getLogger(log_label(self)).info(message) self._data['workers']['pending_create'].add(result) return result
def call_handler(self, handler, env, multipart_message, request): ''' Isolate handler call in this method to allow subclasses to perform special-handling, if necessary. Note that the multipart-message is ignored by default. ''' if request['sender_uuid'] in self._data['worker_states']: self._data['worker_states'][request['sender_uuid']].reset_heartbeat() if request['command'] not in ('heartbeat', ): logging.getLogger(log_label(self)).info(request['command']) return handler(env, multipart_message, request['sender_uuid'], *request['args'], **request['kwargs'])
def timer__monitor_heartbeats(self, *args, **kwargs): for uuid in self._data['workers']['running'].copy(): worker = self._data['worker_states'].get(uuid) if worker: heartbeat_count = worker._heartbeat_count if uuid in self._data['workers']['running']\ and heartbeat_count is not None: # If `heartbeat_count` is `None`, the heart-beat has not # been started. We only process the heart-beat after it # has been started. heartbeat_count -= 1 if heartbeat_count <= 0: # This process has missed the maximum number of # expected heartbeats, so add to list of flatlined # workers. self._data['workers']['flatlined'].add(uuid) # Set a flag to mark worker as newly flat-lined. self._data['workers']['flatlined_latch'].add(uuid) self._data['workers']['running'].remove(uuid) if heartbeat_count == 0: logging.getLogger(log_label(self)).info('worker ' 'has flatlined (i.e., heartbeat_count=%s):' ' %s' % (heartbeat_count, uuid)) worker._heartbeat_count = heartbeat_count for uuid in self._data['workers']['flatlined'].copy(): worker = self._data['worker_states'][uuid] if worker._heartbeat_count is not None\ and worker._heartbeat_count > 0: # A worker has come back to life! Update the worker mappings # accordingly. self._data['workers']['running'].add(uuid) self._data['workers']['flatlined'].remove(uuid) logging.getLogger(log_label(self)).info( 'worker %s has revived - heartbeat_count=%s' % (uuid, worker._heartbeat_count) ) z = ZmqRpcProxy(self._uris['manager_rpc'], uuid=uuid) z.revived_worker()
def sub__begin_task(self, message, seconds_since_epoch_str, worker_info, serialization): if serialization == 'SERIALIZE__PICKLE': worker_info = pickle.loads(worker_info) elif serialization == 'SERIALIZE__JSON': worker_info = jsonapi.loads(worker_info) elif serialization != 'SERIALIZE__NONE': worker_info = {'data': worker_info, 'serialization': serialization} begin_time = datetime.utcfromtimestamp(float(seconds_since_epoch_str)) self.save(message.worker_uuid, message.task_uuid, '__begin_task__', begin_time) self.save(message.worker_uuid, message.task_uuid, '__worker_info__', worker_info) logging.getLogger(log_label(self)).info(worker_info)
def send_response(self, socks, multipart_message, request, response): # Ignore first element (sender uuid) data = map(self.serialize_frame, request.values()[1:]) # Ignore first element (timestamp), and last element (error). data += map(self.serialize_frame, response.values()[1:-1]) try: error = self.serialize_frame(response.values()[-1]) except: error = self.serialize_frame(None) data.insert(0, self.serialize_frame(response['timestamp'])) data.append(error) data = multipart_message[:2] + data if request['command'] not in ('heartbeat', ): logging.getLogger(log_label(self)).info('request: ' 'uuid=%(sender_uuid)s command=%(command)s' % request) socks[self.rpc_sock_name].send_multipart(data)
def terminate_worker(self, env, worker_uuid): ''' If a worker subprocess has been launched by the supervisor for the specified worker uuid, terminate the subprocess. ''' super(Supervisor, self).terminate_worker(env, worker_uuid) if hasattr(self, '_worker_subprocesses'): if worker_uuid in self._worker_subprocesses: logging.getLogger(log_label(self)).info('terminate_worker: %s', worker_uuid) p = self._worker_subprocesses[worker_uuid] try: p.terminate() except OSError: pass finally: del self._worker_subprocesses[worker_uuid] for workers_set in ('pending_terminate', 'flatlined'): if worker_uuid in self._data['workers'][workers_set]: self._data['workers'][workers_set].remove(worker_uuid) self._data['workers']['terminated'].add(worker_uuid)
def process_manager_rpc_response(self, env, multipart_message): # We received a response to the dealer socket, so we need to forward # the message back to the job that requested through the router socket. sender_uuid = self.deserialize_frame(multipart_message[2]) pending_requests = self.deserialize_frame(sender_uuid) request_info = pending_requests[0] del pending_requests[0] request = request_info['request'] response = request_info['response'] # Ignore first element (sender uuid) data = map(self.serialize_frame, request.values()[1:]) # Ignore first element (sender uuid), and last element (error). data += map(self.serialize_frame, response.values()[1:-1]) try: error = self.serialize_frame(response.values()[-1]) except: error = self.serialize_frame(None) data.insert(0, self.serialize_frame(response['timestamp'])) data.append(error) data = multipart_message[:2] + data logging.getLogger(log_label(self)).info( 'request: uuid=%(sender_uuid)s command=%(command)s' % request) env['socks'][self.rpc_sock_name].send_multipart(data)
def process_response(self, env, stream, multipart_message): logging.getLogger(log_label(self)).debug( '%s %s' % (stream, multipart_message,)) self.received_count += 1 if self.received_count >= self.target_count: env['io_loop'].stop()
def rpc__unregister_worker(self, env, uuid, worker_uuid): if worker_uuid in self._registered_workers: env['socks']['sub'].setsockopt(zmq.UNSUBSCRIBE, worker_uuid) self._registered_workers.remove(worker_uuid) logging.getLogger(log_label(self)).info(worker_uuid)
def rpc__register_worker(self, env, uuid, worker_uuid): if not self._registered_workers and self._init_subscribe is not None: env['socks']['sub'].setsockopt(zmq.UNSUBSCRIBE, self._init_subscribe) env['socks']['sub'].setsockopt(zmq.SUBSCRIBE, worker_uuid) self._registered_workers.add(worker_uuid) logging.getLogger(log_label(self)).info(worker_uuid)
def rpc__stderr(self, env, uuid, value): message = self._rpc__std_base(env, uuid, 'stderr', value) logging.getLogger(log_label(self)).info(message) return message
def _rpc__std_base(self, env, uuid, stream_name, value): if uuid in self.task_by_worker: task_uuid = self.task_by_worker[uuid] message = '[%s] uuid=%s\n%s' % (log_label(self), uuid, value) self.publish(env, uuid, task_uuid, stream_name, value) return message
def rpc__supervisor_hello_world(self, env, multipart_message, uuid): message = '[%s] hello world (%s)' % (datetime.now(), uuid) logging.getLogger(log_label(self)).info(message) return message
def run(self): response = super(JsonClient, self).run() logging.getLogger(log_label(self)).info( 'uris = %s' % get_uris(self.socks['req'])) logging.getLogger(log_label(self)).info('finished') return response
def do_request(self): self.socks['req'].send_json({'command': 'test_command'}) response = self.socks['req'].recv_json() logging.getLogger(log_label(self)).info('%s' % response)
def do_request(self): self.socks['req'].send_multipart(['hello world']) response = self.socks['req'].recv_multipart() logging.getLogger(log_label(self)).debug(str(response))
def sub__complete_task(self, message, data): self.save(message.worker_uuid, message.task_uuid, '__complete_task__', datetime.utcfromtimestamp(float(data))) logging.getLogger(log_label(self)).info(data)