def recv(self, endpoint: ErEndpoint, tag, broker=FifoBroker()): try: @_exception_logger def fill_broker(iterable: Iterable, broker): iterator = iter(iterable) for e in iterator: broker.put(e) broker.signal_write_finish() channel = self.__grpc_channel_factory.create_channel(endpoint) stub = transfer_pb2_grpc.TransferServiceStub(channel) request = transfer_pb2.TransferBatch( header=transfer_pb2.TransferHeader(id=1, tag=tag)) response_iter = stub.recv(request, metadata=[(TRANSFER_BROKER_NAME, tag)]) if broker is None: return response_iter else: t = Thread(target=fill_broker, args=[response_iter, broker]) t.start() return broker except Exception as e: L.error(f'Error calling to {endpoint} in TransferClient.recv') raise e
def transfer_batch_generator_from_broker(broker: FifoBroker, tag): i = 0 while not broker.is_closable(): try: data = broker.get(block=True, timeout=0.1) if data: header = transfer_pb2.TransferHeader(id=i, tag=tag) batch = transfer_pb2.TransferBatch(header=header, data=data) i += 1 yield batch except queue.Empty as e: # print("transfer client queue empty") pass except BrokerClosed as e: break except Exception as e: print(e)
def get_or_create_broker(key: str, maxsize: int = _DEFAULT_QUEUE_SIZE, write_signals=1): if not TransferService.has_broker(key): with TransferService.mutex as m: if not TransferService.has_broker(key): L.info(f'creating broker: {key}, write signals: {write_signals}') final_size = maxsize if maxsize > 0 else TransferService._DEFAULT_QUEUE_SIZE TransferService.data_buffer[key] = \ FifoBroker(maxsize=final_size, writers=write_signals, name=key) return TransferService.data_buffer[key]
def put_all(self, items, output=None, options: dict = None): if options is None: options = {} include_key = options.get("include_key", True) job_id = generate_job_id(self.__session_id, RollPair.PUT_ALL) # TODO:1: consider multiprocessing scenario. parallel size should be sent to egg_pair to set write signal count def send_command(): job = ErJob(id=job_id, name=RollPair.PUT_ALL, inputs=[self.__store], outputs=[self.__store], functors=[]) result = self.__command_client.simple_sync_send( input=job, output_type=ErJob, endpoint=self.ctx.get_roll()._command_endpoint, command_uri=CommandURI( f'{RollPair.ROLL_PAIR_URI_PREFIX}/{RollPair.RUN_JOB}'), serdes_type=SerdesTypes.PROTOBUF) return result th = Thread(target=send_command, name=f'roll_pair-send_command-{job_id}') th.start() populated_store = self.ctx.populate_processor(self.__store) shuffler = TransferPair(job_id) broker = FifoBroker() bb = BatchBroker(broker) scatter_future = shuffler.scatter(broker, self.partitioner, populated_store) key_serdes = self.key_serdes value_serdes = self.value_serdes try: if include_key: for k, v in items: bb.put(item=(key_serdes.serialize(k), value_serdes.serialize(v))) else: k = 0 for v in items: bb.put(item=(key_serdes.serialize(k), value_serdes.serialize(v))) k += 1 finally: bb.signal_write_finish() scatter_results = scatter_future.result() L.debug(f"scatter_results: {scatter_results}") th.join() return RollPair(populated_store, self.ctx)
def get_or_create_broker(key: str, maxsize: int = _DEFAULT_QUEUE_SIZE, write_signals=1): with TransferService.mutex: if not TransferService.has_broker(key): L.trace( f'creating broker={key}, write signals={write_signals}') final_size = maxsize if maxsize > 0 else TransferService._DEFAULT_QUEUE_SIZE TransferService.data_buffer[key] = \ FifoBroker(maxsize=final_size, writers=write_signals, name=key) if key not in TransferService.event_buffer: TransferService.event_buffer[key] = Event() TransferService.event_buffer[key].set() return TransferService.data_buffer[key]
def put_all(self, items, output=None, options: dict = None): if options is None: options = {} include_key = options.get("include_key", True) job_id = generate_job_id(self.__session_id, RollPair.PUT_ALL) # TODO:1: consider multiprocessing scenario. parallel size should be sent to egg_pair to set write signal count def send_command(): job = ErJob(id=job_id, name=RollPair.PUT_ALL, inputs=[self.__store], outputs=[self.__store], functors=[]) task_results = self._run_job(job) return self.__get_output_from_result(task_results) th = Thread(target=send_command, name=f'roll_pair-send_command-{job_id}') th.start() populated_store = self.ctx.populate_processor(self.__store) shuffler = TransferPair(job_id) fifo_broker = FifoBroker() bb = BatchBroker(fifo_broker) scatter_future = shuffler.scatter(fifo_broker, self.partitioner, populated_store) key_serdes = self.key_serdes value_serdes = self.value_serdes try: if include_key: for k, v in items: bb.put(item=(key_serdes.serialize(k), value_serdes.serialize(v))) else: k = 0 for v in items: bb.put(item=(key_serdes.serialize(k), value_serdes.serialize(v))) k += 1 finally: bb.signal_write_finish() scatter_results = scatter_future.result() th.join() return RollPair(populated_store, self.ctx)
def scatter(self, input_broker, partition_function, output_store): output_partitions = output_store._partitions total_partitions = len(output_partitions) L.debug( f'scatter starts for {self.__transfer_id}, total partitions: {total_partitions}, output_store: {output_store}' ) partitioned_brokers = [FifoBroker() for i in range(total_partitions)] partitioned_bb = [BatchBroker(v) for v in partitioned_brokers] futures = [] @_exception_logger def do_partition(): L.debug(f'do_partition start for {self.__transfer_id}') done_count = 0 for k, v in BatchBroker(input_broker): partitioned_bb[partition_function(k)].put((k, v)) done_count += 1 L.debug(f"do_partition end for transfer id: {self.__transfer_id}, " f"total partitions: {total_partitions}, " f"cur done partition count: {done_count}") for broker in partitioned_bb: broker.signal_write_finish() return done_count futures.append(self._executor_pool.submit(do_partition)) client = TransferClient() def do_send_all(): send_all_futs = [] for i, part in enumerate(output_partitions): tag = self.__generate_tag(i) L.debug(f"do_send_all for tag: {tag}, " f"active thread count: {threading.active_count()}") fut = client.send( TransferPair.pair_to_bin_batch( BatchBroker(partitioned_brokers[i])), part._processor._transfer_endpoint, tag) send_all_futs.append(fut) return CompositeFuture(send_all_futs).result() futures.append(self._executor_pool.submit(do_send_all)) return CompositeFuture(futures)
def _run_unary(self, func, task, shuffle=False): key_serdes = create_serdes(task._inputs[0]._store_locator._serdes) value_serdes = create_serdes(task._inputs[0]._store_locator._serdes) with create_adapter(task._inputs[0]) as input_db: L.debug(f"create_store_adatper: {task._inputs[0]}") with input_db.iteritems() as rb: L.debug(f"create_store_adatper_iter: {task._inputs[0]}") from eggroll.roll_pair.transfer_pair import TransferPair, BatchBroker if shuffle: total_partitions = task._inputs[ 0]._store_locator._total_partitions output_store = task._job._outputs[0] shuffle_broker = FifoBroker() write_bb = BatchBroker(shuffle_broker) try: shuffler = TransferPair(transfer_id=task._job._id) store_future = shuffler.store_broker( task._outputs[0], True, total_partitions) scatter_future = shuffler.scatter( shuffle_broker, partitioner(hash_func=hash_code, total_partitions=total_partitions), output_store) func(rb, key_serdes, value_serdes, write_bb) finally: write_bb.signal_write_finish() scatter_results = scatter_future.result() store_result = store_future.result() L.debug(f"scatter_result:{scatter_results}") L.debug(f"gather_result:{store_result}") else: # TODO: modification may be needed when store options finished with create_adapter(task._outputs[0], options=task._job._options ) as db, db.new_batch() as wb: func(rb, key_serdes, value_serdes, wb) L.debug(f"close_store_adatper:{task._inputs[0]}")
def _run_unary(self, func, task, shuffle=False, reduce_op=None): input_store_head = task._job._inputs[0] output_store_head = task._job._outputs[0] input_key_serdes = create_serdes( input_store_head._store_locator._serdes) input_value_serdes = create_serdes( input_store_head._store_locator._serdes) output_key_serdes = create_serdes( output_store_head._store_locator._serdes) output_value_serdes = create_serdes( output_store_head._store_locator._serdes) if input_key_serdes != output_key_serdes or \ input_value_serdes != output_value_serdes: raise ValueError( f"input key-value serdes:{(input_key_serdes, input_value_serdes)}" f"differ from output key-value serdes:{(output_key_serdes, output_value_serdes)}" ) if shuffle: from eggroll.roll_pair.transfer_pair import TransferPair input_total_partitions = input_store_head._store_locator._total_partitions output_total_partitions = output_store_head._store_locator._total_partitions output_store = output_store_head my_server_node_id = get_static_er_conf().get( 'server_node_id', None) shuffler = TransferPair(transfer_id=task._job._id) if not task._outputs or \ (my_server_node_id is not None and my_server_node_id != task._outputs[0]._processor._server_node_id): store_future = None else: store_future = shuffler.store_broker( store_partition=task._outputs[0], is_shuffle=True, total_writers=input_total_partitions, reduce_op=reduce_op) if not task._inputs or \ (my_server_node_id is not None and my_server_node_id != task._inputs[0]._processor._server_node_id): scatter_future = None else: shuffle_broker = FifoBroker() write_bb = BatchBroker(shuffle_broker) try: scatter_future = shuffler.scatter( input_broker=shuffle_broker, partition_function=partitioner( hash_func=hash_code, total_partitions=output_total_partitions), output_store=output_store) with create_adapter(task._inputs[0]) as input_db, \ input_db.iteritems() as rb: func(rb, input_key_serdes, input_value_serdes, write_bb) finally: write_bb.signal_write_finish() if scatter_future: scatter_results = scatter_future.result() else: scatter_results = 'no scatter for this partition' if store_future: store_results = store_future.result() else: store_results = 'no store for this partition' else: # no shuffle with create_adapter(task._inputs[0]) as input_db, \ input_db.iteritems() as rb, \ create_adapter(task._outputs[0], options=task._job._options) as db, \ db.new_batch() as wb: func(rb, input_key_serdes, input_value_serdes, wb) L.trace(f"close_store_adatper:{task._inputs[0]}")
def test_send(self): transfer_client = TransferClient() broker = FifoBroker() broker.put(b'hello') broker.put(b'world') broker.put(b'this') broker.put(b'is') broker.put(b'a') broker.put(b'test') broker.signal_write_finish() future = transfer_client.send(broker=broker, endpoint=ErEndpoint(host='localhost', port=transfer_port), tag='test') future.result()