示例#1
0
 def save(self, worker_uuid, task_uuid, key, value):
     logging.getLogger(log_label(self)).info(self.storage.root.keys())
     for i in range(3):
         self.storage.abort()
         try:
             db_path = '/worker_uuid_by_task_uuid/%s' % task_uuid
             self.storage[db_path] = value
             db_path = '/task_by_uuid/%s/%s' % (task_uuid, key)
             self.storage[db_path] = value
             if task_uuid not in self.storage.root.setdefault(
                     'datetime_by_task_uuid', PersistentOrderedDict()):
                 db_path = '/datetime_by_task_uuid/%s' % task_uuid
                 now = datetime.now()
                 self.storage[db_path] = now
                 task_uuid_by_datetime = self.storage.root.setdefault(
                         'task_uuid_by_datetime', durus.btree.BTree())
                 task_uuid_by_datetime[now] = task_uuid
             task_uuids_by_label = self.storage.root.setdefault(
                     'task_uuids_by_label', PersistentOrderedDict())
             if key == '__label__':
                 logging.getLogger(log_label(self)).warning('key=%s value=%s', key, value)
                 label = value
                 uuids = task_uuids_by_label.setdefault(label, PersistentSet())
                 if task_uuid not in uuids:
                     logging.getLogger(log_label(self)).warning(
                             'adding %s to task_uuids_by_label["%s"]',
                             task_uuid, label)
                     uuids.add(task_uuid)
                     self.storage.root._p_note_change()
             self.storage.commit()
             return
         except:
             pass
     raise
示例#2
0
 def rpc__store(self, env, uuid, key, value, task_uuid=None,
               serialization=SERIALIZE__NONE):
     if task_uuid is None and uuid in self.task_by_worker:
         task_uuid = self.task_by_worker[uuid]
     if task_uuid is not None:
         message = '[%s] uuid=%s, %s=%s' % (log_label(self), uuid, key, value)
         logging.getLogger(log_label(self)).info(message)
         self.publish(env, uuid, task_uuid, 'store', serialization, key, value)
         return message
示例#3
0
 def _rpc__std_base(self, message, data, stream_name):
     std_path = self._message_data_dir(message).joinpath(stream_name)
     std = std_path.open('a')
     std.write(data)
     std.close()
     logging.getLogger(log_label(self)).info('append %s to : %s',
                                             stream_name, std_path)
示例#4
0
 def sub__store(self, message, serialization, key, value):
     if serialization == 'SERIALIZE__PICKLE':
         value = pickle.loads(value)
     elif serialization == 'SERIALIZE__JSON':
         value = jsonapi.loads(value)
     self.save(message.worker_uuid, message.task_uuid, key, value)
     logging.getLogger(log_label(self)).debug('key=%s value=%s', key, value)
示例#5
0
    def create_worker(self, env, worker_uuid, *args, **kwargs):
        '''
        Create worker subprocess.
        '''
        import os
        from subprocess import Popen, PIPE
        import platform

        if not hasattr(self, '_worker_subprocesses'):
            self._worker_subprocesses = OrderedDict()
        if worker_uuid in self._worker_subprocesses:
            p = self._worker_subprocesses[worker_uuid]
            if p.poll() is not None:
                # Worker with same name existed, but has finished.
                try:
                    p.terminate()
                except OSError:
                    pass
                finally:
                    del self._worker_subprocesses[worker_uuid]
            else:
                # Worker already exists, so return `None`
                logging.getLogger(log_label(self)).info(('worker already exists:', worker_uuid))
                return None
        logging.getLogger(log_label(self)).info((worker_uuid, args, kwargs))

        env = os.environ
        env.update(kwargs.pop('env', {}))
        supervisor_connect_uri = self.uris['rpc'].replace(r'tcp://*', r'tcp://%s' %
                                                      (platform.node()))
        command = ('bash -c "'
            '. /usr/local/bin/virtualenvwrapper.sh &&'
            'workon zmq_job_manager && '
            'cdvirtualenv && '
            'mkdir -p worker_envs/%(uuid)s && '
            'cd worker_envs/%(uuid)s && '
            'python -m zmq_job_manager.worker %(uri)s %(uuid)s"'
            % {'uuid': worker_uuid, 'uri': supervisor_connect_uri}
        )
        self._worker_subprocesses[worker_uuid] = Popen(command, shell=True,
                                                       env=env, stdout=PIPE,
                                                       stderr=PIPE)
        logging.getLogger(log_label(self)).info('started worker subprocess for:'
                                                ' %s\n%s', worker_uuid, command)
        return worker_uuid
示例#6
0
 def reset_heartbeat(self, value=None):
     old_value = getattr(self, '_starting_heartbeat_count', None)
     if value is None:
         if old_value is None:
             raise ValueError, 'No initial value provided for heartbeat count.'
         value = old_value
     logging.getLogger(log_label(self)).debug('reset_heartbeat %s', value)
     self._heartbeat_count = value
     self._starting_heartbeat_count = value
示例#7
0
 def rpc__create_worker(self, env, multipart_message, uuid, *args, **kwargs):
     '''
     Create a new worker and return the worker's uuid.
     '''
     new_worker_uuid = kwargs.pop('worker_uuid', None)
     if new_worker_uuid is None:
         new_worker_uuid = str(uuid1())
     message = '[%s] create worker (%s)' % (datetime.now(), new_worker_uuid)
     result = self.create_worker(env, new_worker_uuid, *args, **kwargs)
     if result:
         logging.getLogger(log_label(self)).info(message)
         self._data['workers']['pending_create'].add(result)
     return result
示例#8
0
    def call_handler(self, handler, env, multipart_message, request):
        '''
        Isolate handler call in this method to allow subclasses to perform
        special-handling, if necessary.

        Note that the multipart-message is ignored by default.
        '''
        if request['sender_uuid'] in self._data['worker_states']:
            self._data['worker_states'][request['sender_uuid']].reset_heartbeat()
        if request['command'] not in ('heartbeat', ):
            logging.getLogger(log_label(self)).info(request['command'])
        return handler(env, multipart_message, request['sender_uuid'],
                       *request['args'], **request['kwargs'])
示例#9
0
    def timer__monitor_heartbeats(self, *args, **kwargs):
        for uuid in self._data['workers']['running'].copy():
            worker = self._data['worker_states'].get(uuid)
            if worker:
                heartbeat_count = worker._heartbeat_count
                if uuid in self._data['workers']['running']\
                        and heartbeat_count is not None:
                    # If `heartbeat_count` is `None`, the heart-beat has not
                    # been started.  We only process the heart-beat after it
                    # has been started.
                    heartbeat_count -= 1
                    if heartbeat_count <= 0:
                        # This process has missed the maximum number of
                        # expected heartbeats, so add to list of flatlined
                        # workers.
                        self._data['workers']['flatlined'].add(uuid)
                        # Set a flag to mark worker as newly flat-lined.
                        self._data['workers']['flatlined_latch'].add(uuid)
                        self._data['workers']['running'].remove(uuid)
                        if heartbeat_count == 0:
                            logging.getLogger(log_label(self)).info('worker '
                                    'has flatlined (i.e., heartbeat_count=%s):'
                                    ' %s' % (heartbeat_count, uuid))
                    worker._heartbeat_count = heartbeat_count

        for uuid in self._data['workers']['flatlined'].copy():
            worker = self._data['worker_states'][uuid]
            if worker._heartbeat_count is not None\
                    and worker._heartbeat_count > 0:
                # A worker has come back to life!  Update the worker mappings
                # accordingly.
                self._data['workers']['running'].add(uuid)
                self._data['workers']['flatlined'].remove(uuid)
                logging.getLogger(log_label(self)).info(
                    'worker %s has revived - heartbeat_count=%s'
                    % (uuid, worker._heartbeat_count)
                )
                z = ZmqRpcProxy(self._uris['manager_rpc'], uuid=uuid)
                z.revived_worker()
示例#10
0
 def sub__begin_task(self, message, seconds_since_epoch_str, worker_info,
                     serialization):
     if serialization == 'SERIALIZE__PICKLE':
         worker_info = pickle.loads(worker_info)
     elif serialization == 'SERIALIZE__JSON':
         worker_info = jsonapi.loads(worker_info)
     elif serialization != 'SERIALIZE__NONE':
         worker_info = {'data': worker_info, 'serialization': serialization}
     begin_time = datetime.utcfromtimestamp(float(seconds_since_epoch_str))
     self.save(message.worker_uuid, message.task_uuid, '__begin_task__',
               begin_time)
     self.save(message.worker_uuid, message.task_uuid, '__worker_info__',
               worker_info)
     logging.getLogger(log_label(self)).info(worker_info)
示例#11
0
 def send_response(self, socks, multipart_message, request, response):
     # Ignore first element (sender uuid)
     data = map(self.serialize_frame, request.values()[1:])
     # Ignore first element (timestamp), and last element (error).
     data += map(self.serialize_frame, response.values()[1:-1])
     try:
         error = self.serialize_frame(response.values()[-1])
     except:
         error = self.serialize_frame(None)
     data.insert(0, self.serialize_frame(response['timestamp']))
     data.append(error)
     data = multipart_message[:2] + data
     if request['command'] not in ('heartbeat', ):
         logging.getLogger(log_label(self)).info('request: '
                 'uuid=%(sender_uuid)s command=%(command)s' % request)
     socks[self.rpc_sock_name].send_multipart(data)
示例#12
0
 def terminate_worker(self, env, worker_uuid):
     '''
     If a worker subprocess has been launched by the supervisor for the
     specified worker uuid, terminate the subprocess.
     '''
     super(Supervisor, self).terminate_worker(env, worker_uuid)
     if hasattr(self, '_worker_subprocesses'):
         if worker_uuid in self._worker_subprocesses:
             logging.getLogger(log_label(self)).info('terminate_worker: %s',
                                                     worker_uuid)
             p = self._worker_subprocesses[worker_uuid]
             try:
                 p.terminate()
             except OSError:
                 pass
             finally:
                 del self._worker_subprocesses[worker_uuid]
         for workers_set in ('pending_terminate', 'flatlined'):
             if worker_uuid in self._data['workers'][workers_set]:
                 self._data['workers'][workers_set].remove(worker_uuid)
             self._data['workers']['terminated'].add(worker_uuid)
示例#13
0
    def process_manager_rpc_response(self, env, multipart_message):
        # We received a response to the dealer socket, so we need to forward
        # the message back to the job that requested through the router socket.
        sender_uuid = self.deserialize_frame(multipart_message[2])
        pending_requests = self.deserialize_frame(sender_uuid)
        request_info = pending_requests[0]
        del pending_requests[0]
        request = request_info['request']
        response = request_info['response']

        # Ignore first element (sender uuid)
        data = map(self.serialize_frame, request.values()[1:])
        # Ignore first element (sender uuid), and last element (error).
        data += map(self.serialize_frame, response.values()[1:-1])
        try:
            error = self.serialize_frame(response.values()[-1])
        except:
            error = self.serialize_frame(None)
        data.insert(0, self.serialize_frame(response['timestamp']))
        data.append(error)
        data = multipart_message[:2] + data
        logging.getLogger(log_label(self)).info(
                'request: uuid=%(sender_uuid)s command=%(command)s' % request)
        env['socks'][self.rpc_sock_name].send_multipart(data)
示例#14
0
 def process_response(self, env, stream, multipart_message):
     logging.getLogger(log_label(self)).debug(
             '%s %s' % (stream, multipart_message,))
     self.received_count += 1
     if self.received_count >= self.target_count:
         env['io_loop'].stop()
示例#15
0
 def rpc__unregister_worker(self, env, uuid, worker_uuid):
     if worker_uuid in self._registered_workers:
         env['socks']['sub'].setsockopt(zmq.UNSUBSCRIBE, worker_uuid)
         self._registered_workers.remove(worker_uuid)
     logging.getLogger(log_label(self)).info(worker_uuid)
示例#16
0
 def rpc__register_worker(self, env, uuid, worker_uuid):
     if not self._registered_workers and self._init_subscribe is not None:
         env['socks']['sub'].setsockopt(zmq.UNSUBSCRIBE, self._init_subscribe)
     env['socks']['sub'].setsockopt(zmq.SUBSCRIBE, worker_uuid)
     self._registered_workers.add(worker_uuid)
     logging.getLogger(log_label(self)).info(worker_uuid)
示例#17
0
 def rpc__stderr(self, env, uuid, value):
     message = self._rpc__std_base(env, uuid, 'stderr', value)
     logging.getLogger(log_label(self)).info(message)
     return message
示例#18
0
 def _rpc__std_base(self, env, uuid, stream_name, value):
     if uuid in self.task_by_worker:
         task_uuid = self.task_by_worker[uuid]
         message = '[%s] uuid=%s\n%s' % (log_label(self), uuid, value)
         self.publish(env, uuid, task_uuid, stream_name, value)
         return message
示例#19
0
 def rpc__supervisor_hello_world(self, env, multipart_message, uuid):
     message = '[%s] hello world (%s)' % (datetime.now(), uuid)
     logging.getLogger(log_label(self)).info(message)
     return message
示例#20
0
 def run(self):
     response = super(JsonClient, self).run()
     logging.getLogger(log_label(self)).info(
                 'uris = %s' % get_uris(self.socks['req']))
     logging.getLogger(log_label(self)).info('finished')
     return response
示例#21
0
 def do_request(self):
     self.socks['req'].send_json({'command': 'test_command'})
     response = self.socks['req'].recv_json()
     logging.getLogger(log_label(self)).info('%s' % response)
示例#22
0
 def do_request(self):
     self.socks['req'].send_multipart(['hello world'])
     response = self.socks['req'].recv_multipart()
     logging.getLogger(log_label(self)).debug(str(response))
示例#23
0
 def sub__complete_task(self, message, data):
     self.save(message.worker_uuid, message.task_uuid, '__complete_task__',
               datetime.utcfromtimestamp(float(data)))
     logging.getLogger(log_label(self)).info(data)