Пример #1
0
    def __init__(self, adapter: RollSiteAdapter, options: dict = None):
        if options is None:
            options = {}
        self.adapter = adapter

        self.roll_site_header: ErRollSiteHeader = adapter.roll_site_header
        self.namespace = adapter.namespace
        self.name = create_store_name(self.roll_site_header)

        self.tagged_key = ''
        self.obj_type = adapter.obj_type

        self.proxy_endpoint = adapter.proxy_endpoint
        channel = self.grpc_channel_factory.create_channel(self.proxy_endpoint)
        self.stub = proxy_pb2_grpc.DataTransferServiceStub(channel)

        static_er_conf = get_static_er_conf()
        self.__bin_packet_len = int(options.get(
                RollSiteConfKeys.EGGROLL_ROLLSITE_ADAPTER_SENDBUF_SIZE.key,
                static_er_conf.get(RollSiteConfKeys.EGGROLL_ROLLSITE_ADAPTER_SENDBUF_SIZE.key,
                                   RollSiteConfKeys.EGGROLL_ROLLSITE_ADAPTER_SENDBUF_SIZE.default_value)))
        self.total_written = 0

        self.ba = bytearray(self.__bin_packet_len)
        self.buffer = ArrayByteBuffer(self.ba)
        self.writer = PairBinWriter(pair_buffer=self.buffer)

        self.push_batch_cnt = 0
        self.push_pair_cnt = 0

        self.topic_src = proxy_pb2.Topic(name=self.name, partyId=self.roll_site_header._src_party_id,
                                         role=self.roll_site_header._src_role, callback=None)
        self.topic_dst = proxy_pb2.Topic(name=self.name, partyId=self.roll_site_header._dst_party_id,
                                         role=self.roll_site_header._dst_role, callback=None)
Пример #2
0
 def test_byte_buffer(self):
     bs = bytearray(1024)
     buf = ArrayByteBuffer(bs)
     buf.write_int32(12)
     buf.write_bytes(b"34")
     buf.set_offset(0)
     assert buf.read_int32() == 12
     assert buf.read_bytes(2) == b"34"
Пример #3
0
 def put(self, k, v):
     if self.obj_type == 'object':
         L.debug(f"set tagged_key: {k}")
         self.tagged_key = _serdes.deserialize(k)
     try:
         self.writer.write(k, v)
     except IndexError as e:
         bin_batch = bytes(self.ba[0:self.buffer.get_offset()])
         self.push(bin_batch)
         # TODO:0: replace 1024 with constant
         self.ba = bytearray(
             max(self.__bin_packet_len,
                 len(k) + len(v) + 1024))
         self.buffer = ArrayByteBuffer(self.ba)
         self.writer = PairBinWriter(pair_buffer=self.buffer)
         self.writer.write(k, v)
     except Exception as e:
         L.error(f"Unexpected error: {sys.exc_info()[0]}")
         raise e
Пример #4
0
 def commit(bs=sendbuf_size):
     nonlocal ba
     nonlocal buffer
     nonlocal writer
     bin_batch = None
     if ba:
         bin_batch = bytes(ba[0:buffer.get_offset()])
     ba = bytearray(bs)
     buffer = ArrayByteBuffer(ba)
     writer = PairBinWriter(pair_buffer=buffer)
     return bin_batch
Пример #5
0
 def commit(bs=sendbuf_size):
     L.debug(f'generate_bin_batch commit: {done_cnt}')
     nonlocal ba
     nonlocal buffer
     nonlocal writer
     bin_batch = None
     if ba:
         bin_batch = bytes(ba[0:buffer.get_offset()])
     ba = bytearray(bs)
     buffer = ArrayByteBuffer(ba)
     writer = PairBinWriter(pair_buffer=buffer)
     return bin_batch
Пример #6
0
    def run(self):
        # batch stream must be executed serially, and reinit.
        # TODO:0:  remove lock to bss
        rs_header = None
        with PutBatchTask._partition_lock[self.tag]:   # tag includes partition info in tag generation
            L.trace(f"do_store start for tag={self.tag}, partition_id={self.partition._id}")
            bss = _BatchStreamStatus.get_or_create(self.tag)
            try:
                broker = TransferService.get_or_create_broker(self.tag, write_signals=1)

                iter_wait = 0
                iter_timeout = int(CoreConfKeys.EGGROLL_CORE_FIFOBROKER_ITER_TIMEOUT_SEC.get())

                batch = None
                batch_get_interval = 0.1
                with create_adapter(self.partition) as db, db.new_batch() as wb:
                    #for batch in broker:
                    while not broker.is_closable():
                        try:
                            batch = broker.get(block=True, timeout=batch_get_interval)
                        except queue.Empty as e:
                            iter_wait += batch_get_interval
                            if iter_wait > iter_timeout:
                                raise TimeoutError(f'timeout in PutBatchTask.run. tag={self.tag}, iter_timeout={iter_timeout}, iter_wait={iter_wait}')
                            else:
                                continue
                        except BrokerClosed as e:
                            continue

                        iter_wait = 0
                        rs_header = ErRollSiteHeader.from_proto_string(batch.header.ext)
                        batch_pairs = 0
                        if batch.data:
                            bin_data = ArrayByteBuffer(batch.data)
                            reader = PairBinReader(pair_buffer=bin_data, data=batch.data)
                            for k_bytes, v_bytes in reader.read_all():
                                wb.put(k_bytes, v_bytes)
                                batch_pairs += 1
                        bss.count_batch(rs_header, batch_pairs)
                        # TODO:0
                        bss._data_type = rs_header._data_type
                        if rs_header._stage == FINISH_STATUS:
                            bss.set_done(rs_header)  # starting from 0

                bss.check_finish()
                # TransferService.remove_broker(tag) will be called in get_status phrase finished or exception got
            except Exception as e:
                L.exception(f'_run_put_batch error, tag={self.tag}, '
                        f'rs_key={rs_header.get_rs_key() if rs_header is not None else None}, rs_header={rs_header}')
                raise e
            finally:
                TransferService.remove_broker(self.tag)
Пример #7
0
 def test_pair_bin(self):
     bs = bytearray(32)
     buf = ArrayByteBuffer(bs)
     writer = PairBinWriter(buf)
     for i in range(10):
         try:
             writer.write(str(i).encode(), str(i).encode())
         except IndexError as e:
             print(buf.read_bytes(buf.get_offset(), 0))
             buf.set_offset(0)
             writer = PairBinWriter(buf)
             writer.write(str(i).encode(), str(i).encode())
     buf.set_offset(0)
     reader = PairBinReader(buf)
     print("last")
     print(list(reader.read_all()))
Пример #8
0
 def bin_batch_to_pair(input_iter):
     L.debug(f"bin_batch_to_pair start")
     total_written = 0
     for batch in input_iter:
         L.debug(f"bin_batch_to_pair batch start size:{len(batch)}")
         try:
             bin_data = ArrayByteBuffer(batch)
             reader = PairBinReader(pair_buffer=bin_data)
             for k_bytes, v_bytes in reader.read_all():
                 yield k_bytes, v_bytes
                 total_written += 1
         except IndexError as e:
             L.exception(f"error bin bath format:{e}")
         L.debug(f"bin_batch_to_pair batch end count:{total_written}")
     L.debug(f"bin_batch_to_pair total_written count:{total_written}")
Пример #9
0
 def bin_batch_to_pair(input_iter):
     L.trace(f"bin_batch_to_pair start")
     write_count = 0
     for batch in input_iter:
         L.trace(f"bin_batch_to_pair: cur batch size={len(batch)}")
         try:
             bin_data = ArrayByteBuffer(batch)
             reader = PairBinReader(pair_buffer=bin_data, data=batch)
             for k_bytes, v_bytes in reader.read_all():
                 yield k_bytes, v_bytes
                 write_count += 1
         except IndexError as e:
             L.exception(f"error bin bath format: {e}")
         L.trace(f"bin_batch_to_pair batch ends. total write count={write_count}")
     L.trace(f"bin_batch_to_pair total_written count={write_count}")
Пример #10
0
 def test_pair_bin_no_abb(self):
     bs = bytearray(32)
     buf = ArrayByteBuffer(bs)
     writer = PairBinWriter(pair_buffer=buf, data=bs)
     for i in range(10):
         try:
             writer.write(str(i).encode(), str(i).encode())
         except IndexError as e:
             writer.set_offset(0)
             writer = PairBinWriter(pair_buffer=buf, data=bs)
             writer.write(str(i).encode(), str(i).encode())
             pbr = PairBinReader(pair_buffer=buf, data=writer.get_data())
             print(pbr.read_bytes(writer.get_offset(), 0))
     writer.set_offset(0)
     reader = PairBinReader(pair_buffer=buf, data=bs)
     print("last")
     print(list(reader.read_all()))
Пример #11
0
class RollSiteWriteBatch(PairWriteBatch):
    grpc_channel_factory = GrpcChannelFactory()

    # TODO:0: check if secure channel needed
    def __init__(self, adapter: RollSiteAdapter, options: dict = None):
        if options is None:
            options = {}
        self.adapter = adapter

        self.roll_site_header: ErRollSiteHeader = adapter.roll_site_header
        self.namespace = adapter.namespace
        self.name = create_store_name(self.roll_site_header)

        self.tagged_key = ''
        self.obj_type = adapter.obj_type

        self.proxy_endpoint = adapter.proxy_endpoint
        channel = self.grpc_channel_factory.create_channel(self.proxy_endpoint)
        self.stub = proxy_pb2_grpc.DataTransferServiceStub(channel)

        static_er_conf = get_static_er_conf()
        self.__bin_packet_len = int(
            options.get(
                RollSiteConfKeys.EGGROLL_ROLLSITE_ADAPTER_SENDBUF_SIZE.key,
                static_er_conf.get(
                    RollSiteConfKeys.EGGROLL_ROLLSITE_ADAPTER_SENDBUF_SIZE.key,
                    RollSiteConfKeys.EGGROLL_ROLLSITE_ADAPTER_SENDBUF_SIZE.
                    default_value)))
        self.total_written = 0

        self.ba = bytearray(self.__bin_packet_len)
        self.buffer = ArrayByteBuffer(self.ba)
        self.writer = PairBinWriter(pair_buffer=self.buffer)

        self.push_cnt = 0

        self.topic_src = proxy_pb2.Topic(
            name=self.name,
            partyId=self.roll_site_header._src_party_id,
            role=self.roll_site_header._src_role,
            callback=None)
        self.topic_dst = proxy_pb2.Topic(
            name=self.name,
            partyId=self.roll_site_header._dst_party_id,
            role=self.roll_site_header._dst_role,
            callback=None)

    def __repr__(self):
        return f'<ErRollSiteWriteBatch(' \
               f'adapter={self.adapter}, ' \
               f'roll_site_header={self.roll_site_header}' \
               f'namespace={self.namespace}, ' \
               f'name={self.name}, ' \
               f'obj_type={self.obj_type}, ' \
               f'proxy_endpoint={self.proxy_endpoint}) ' \
               f'at {hex(id(self))}>'

    def generate_message(self, obj, metadata):
        data = proxy_pb2.Data(value=obj)
        metadata.seq += 1
        packet = proxy_pb2.Packet(header=metadata, body=data)
        yield packet

    # TODO:0: configurable
    def push(self, obj):
        L.debug(
            f'pushing for task: {self.name}, partition id: {self.adapter.partition_id}, push cnt: {self.get_push_count()}'
        )
        task_info = proxy_pb2.Task(
            taskId=self.name,
            model=proxy_pb2.Model(name=self.adapter.roll_site_header_string,
                                  dataKey=self.namespace))

        command_test = proxy_pb2.Command()

        # TODO: conf test as config and use it
        conf_test = proxy_pb2.Conf(overallTimeout=200000,
                                   completionWaitTimeout=200000,
                                   packetIntervalTimeout=200000,
                                   maxRetries=10)

        metadata = proxy_pb2.Metadata(task=task_info,
                                      src=self.topic_src,
                                      dst=self.topic_dst,
                                      command=command_test,
                                      seq=0,
                                      ack=0)

        max_retry_cnt = 100
        exception = None
        for i in range(1, max_retry_cnt + 1):
            try:
                self.stub.push(self.generate_message(obj, metadata))
                exception = None
                self.increase_push_count()
                break
            except Exception as e:
                exception = e
                L.info(
                    f'caught exception in pushing {self.name}, partition_id: {self.adapter.partition_id}: {e}. retrying. current retry count: {i}, max_retry_cnt: {max_retry_cnt}'
                )
                time.sleep(min(0.1 * i, 30))

        if exception:
            raise GrpcCallError("error in push", self.proxy_endpoint,
                                exception)

    def write(self):
        bin_data = bytes(self.ba[0:self.buffer.get_offset()])
        self.push(bin_data)
        self.buffer = ArrayByteBuffer(self.ba)

    def send_end(self):
        L.info(f"send_end tagged_key:{self.tagged_key}")
        task_info = proxy_pb2.Task(
            taskId=self.name,
            model=proxy_pb2.Model(name=self.adapter.roll_site_header_string,
                                  dataKey=self.namespace))

        command_test = proxy_pb2.Command(name="set_status")
        conf_test = proxy_pb2.Conf(overallTimeout=20000,
                                   completionWaitTimeout=20000,
                                   packetIntervalTimeout=20000,
                                   maxRetries=10)

        metadata = proxy_pb2.Metadata(task=task_info,
                                      src=self.topic_src,
                                      dst=self.topic_dst,
                                      command=command_test,
                                      operator="markEnd",
                                      seq=self.get_push_count(),
                                      ack=0)

        packet = proxy_pb2.Packet(header=metadata)

        try:
            # TODO:0: retry and sleep for all grpc call in RollSite
            self.stub.unaryCall(packet)
        except Exception as e:
            raise GrpcCallError('send_end', self.proxy_endpoint, e)

    def close(self):
        bin_batch = bytes(self.ba[0:self.buffer.get_offset()])
        self.push(bin_batch)
        self.send_end()
        L.info(f'closing RollSiteWriteBatch for name: {self.name}, '
               f'total push count: {self.push_cnt}')

    def put(self, k, v):
        if self.obj_type == 'object':
            L.debug(f"set tagged_key: {k}")
            self.tagged_key = _serdes.deserialize(k)
        try:
            self.writer.write(k, v)
        except IndexError as e:
            bin_batch = bytes(self.ba[0:self.buffer.get_offset()])
            self.push(bin_batch)
            # TODO:0: replace 1024 with constant
            self.ba = bytearray(
                max(self.__bin_packet_len,
                    len(k) + len(v) + 1024))
            self.buffer = ArrayByteBuffer(self.ba)
            self.writer = PairBinWriter(pair_buffer=self.buffer)
            self.writer.write(k, v)
        except Exception as e:
            L.error(f"Unexpected error: {sys.exc_info()[0]}")
            raise e

    def increase_push_count(self):
        self.push_cnt += 1

    def get_push_count(self):
        return self.push_cnt
Пример #12
0
 def write(self):
     bin_data = bytes(self.ba[0:self.buffer.get_offset()])
     self.push(bin_data)
     self.buffer = ArrayByteBuffer(self.ba)
Пример #13
0
 def __init__(self, file):
     self.mm = mmap.mmap(file.fileno(), 0)
     self.mm.seek(0)
     self.reader = PairBinReader(ArrayByteBuffer(self.mm))