예제 #1
0
    def _run(self, sink, *receivers):
        # Windows does not support logger in MP environment, thus get a new logger
        # inside the process for better compatibility
        logger = set_logger(colored('WORKER-%d' % self.worker_id, 'yellow'))

        logger.info('use device %s' % ('cpu' if self.device_id < 0 else
                                       ('gpu: %d' % self.device_id)))

        for sock, addr in zip(receivers, self.worker_address):
            sock.connect(addr)

        sink.connect(self.sink_address)
        poller = zmq.Poller()
        for sock in receivers:
            poller.register(sock, zmq.POLLIN)
        logger.info('ready and listening!')
        while not self.exit_flag.is_set():
            events = dict(poller.poll())
            for sock_idx, sock in enumerate(receivers):
                if sock in events:
                    client_id, raw_msg = sock.recv_multipart()
                    msg = jsonapi.loads(raw_msg)
                    msg = np.array(msg)
                    send_ndarray(sink, client_id, msg)
                    logger.info('job done\tsize: %s\tclient: %s' %
                                (len(msg), client_id))
예제 #2
0
 def __init__(self, id, args, worker_address_list, sink_address, device_id):
     super().__init__()
     self.worker_id = id
     self.device_id = device_id
     self.logger = set_logger(
         colored('WORKER-%d' % self.worker_id, 'yellow'))
     self.daemon = True
     self.exit_flag = multiprocessing.Event()
     self.worker_address = worker_address_list
     self.num_concurrent_socket = len(self.worker_address)
     self.sink_address = sink_address
예제 #3
0
    def _run(self, sink, *receivers):
        # Windows does not support logger in MP environment, thus get a new logger
        # inside the process for better compatibility
        logger = set_logger(colored('WORKER-%d' % self.worker_id, 'yellow'))

        logger.info('use device %s' % ('cpu' if self.device_id < 0 else
                                       ('gpu: %d' % self.device_id)))

        for sock, addr in zip(receivers, self.worker_address):
            sock.connect(addr)

        sink.connect(self.sink_address)
        poller = zmq.Poller()
        for sock in receivers:
            poller.register(sock, zmq.POLLIN)
        logger.info('ready and listening!')
        while not self.exit_flag.is_set():
            events = dict(poller.poll())
            for sock_idx, sock in enumerate(receivers):
                if sock in events:
                    client_id, raw_msg = sock.recv_multipart()
                    msg = jsonapi.loads(raw_msg)
                    msg_len = len(msg)
                    dic = {}
                    i = len(msg) - 1
                    while (i >= 0):
                        if '____' in msg[i]:
                            words = msg[i].split('____')
                            dic[words[0]] = words[1]
                            i -= 1
                        else:
                            break
                    lines = msg[:i + 1]
                    sa = semantic_annotation_jieba(dic)
                    res = []
                    for line in lines:
                        r = sa.semantic_annotation_jieba(line)
                        r['origin_sentence'] = line
                        tmp_res = []
                        for key in r.keys():
                            tmp_res.append(key + '==>' + r[key])
                        res.append("++".join(tmp_res))
                    lines = res + [''] * (msg_len - len(lines))
                    msg = np.array(lines)
                    send_ndarray(sink, client_id, msg)
                    logger.info('job done\tsize: %s\tclient: %s' %
                                (len(msg), client_id))
예제 #4
0
    def __init__(self, args):
        super().__init__()
        self.logger = set_logger(colored('VENTILATOR', 'magenta'))

        self.num_worker = args.num_worker
        self.max_batch_size = args.max_batch_size
        self.num_concurrent_socket = max(
            8, args.num_worker * 2)  # optimize concurrency for multi-clients
        self.port = args.port
        self.args = args
        self.status_args = {
            k: (v if k != 'pooling_strategy' else v.value)
            for k, v in sorted(vars(args).items())
        }
        self.status_static = {
            'python_version': sys.version,
            'server_version': __version__,
            'pyzmq_version': zmq.pyzmq_version(),
            'zmq_version': zmq.zmq_version(),
            'server_start_time': str(datetime.now()),
        }
        self.processes = []
예제 #5
0
    def _run(self, receiver, frontend, sender):
        receiver_addr = auto_bind(receiver)
        frontend.connect(self.front_sink_addr)
        sender.bind('tcp://*:%d' % self.port)

        pending_checksum = defaultdict(int)
        pending_result = defaultdict(list)
        job_checksum = defaultdict(int)

        poller = zmq.Poller()
        poller.register(frontend, zmq.POLLIN)
        poller.register(receiver, zmq.POLLIN)

        # send worker receiver address back to frontend
        frontend.send(receiver_addr.encode('ascii'))

        # Windows does not support logger in MP environment, thus get a new logger
        # inside the process for better compability
        logger = set_logger(colored('SINK', 'green'))
        logger.info('ready')

        while not self.exit_flag.is_set():
            socks = dict(poller.poll())
            if socks.get(receiver) == zmq.POLLIN:
                msg = receiver.recv_multipart()
                job_id = msg[0]
                # parsing the ndarray
                arr_info, arr_val = jsonapi.loads(msg[1]), msg[2]
                X = np.frombuffer(memoryview(arr_val), dtype=arr_info['dtype'])
                X = X.reshape(arr_info['shape'])
                job_info = job_id.split(b'@')
                job_id = job_info[0]
                partial_id = job_info[1] if len(job_info) == 2 else 0
                pending_result[job_id].append((X, partial_id))
                pending_checksum[job_id] += X.shape[0]
                logger.info(
                    'collect job %s (%d/%d)' %
                    (job_id, pending_checksum[job_id], job_checksum[job_id]))

                # check if there are finished jobs, send it back to workers
                finished = [(k, v) for k, v in pending_result.items()
                            if pending_checksum[k] == job_checksum[k]]
                for job_info, tmp in finished:
                    logger.info('send back\tsize: %d\tjob id:%s\t' %
                                (job_checksum[job_info], job_info))
                    # re-sort to the original order
                    tmp = [x[0] for x in sorted(tmp, key=lambda x: int(x[1]))]
                    client_addr, req_id = job_info.split(b'#')
                    send_ndarray(sender, client_addr,
                                 np.concatenate(tmp, axis=0), req_id)
                    pending_result.pop(job_info)
                    pending_checksum.pop(job_info)
                    job_checksum.pop(job_info)

            if socks.get(frontend) == zmq.POLLIN:
                client_addr, msg_type, msg_info, req_id = frontend.recv_multipart(
                )
                if msg_type == ServerCommand.new_job:
                    job_info = client_addr + b'#' + req_id
                    job_checksum[job_info] = int(msg_info)
                    logger.info('job register\tsize: %d\tjob id: %s' %
                                (int(msg_info), job_info))
                elif msg_type == ServerCommand.show_config:
                    time.sleep(
                        0.1
                    )  # dirty fix of slow-joiner: sleep so that client receiver can connect.
                    logger.info('send config\tclient %s' % client_addr)
                    sender.send_multipart([client_addr, msg_info, req_id])
예제 #6
0
 def __init__(self, args, front_sink_addr):
     super().__init__()
     self.port = args.port_out
     self.exit_flag = multiprocessing.Event()
     self.logger = set_logger(colored('SINK', 'green'))
     self.front_sink_addr = front_sink_addr