示例#1
0
def test_serialize_read_concatenated_records():
    # ARROW-1996 -- see stream alignment work in ARROW-2840, ARROW-3212
    f = pa.BufferOutputStream()
    pa.serialize_to(12, f)
    pa.serialize_to(23, f)
    buf = f.getvalue()

    f = pa.BufferReader(buf)
    pa.read_serialized(f).deserialize()
    pa.read_serialized(f).deserialize()
示例#2
0
def test_serialize_read_concatenated_records():
    # ARROW-1996 -- see stream alignment work in ARROW-2840, ARROW-3212
    f = pa.BufferOutputStream()
    pa.serialize_to(12, f)
    pa.serialize_to(23, f)
    buf = f.getvalue()

    f = pa.BufferReader(buf)
    pa.read_serialized(f).deserialize()
    pa.read_serialized(f).deserialize()
示例#3
0
    def get_results(self, timeout=None):
        """Returns results from worker pool

        :param timeout: If None, will block forever, otherwise will raise :class:`.TimeoutWaitingForResultError`
            exception if no data received within the timeout (in seconds)
        :return: arguments passed to ``publish_func(...)`` by a worker. If no more results are anticipated,
            :class:`.EmptyResultError` is raised.
        """

        while True:
            # If there is no more work to do, raise an EmptyResultError
            if self._ventilated_items == self._ventilated_items_processed:
                # We also need to check if we are using a ventilator and if it is completed
                if not self._ventilator or self._ventilator.completed():
                    raise EmptyResultError()

            socks = self._results_receiver_poller.poll(
                timeout * 1e3 if timeout else None)
            if not socks:
                raise TimeoutWaitingForResultError()
            result = self._results_receiver.recv_pyobj(0)
            if isinstance(result, VentilatedItemProcessedMessage):
                self._ventilated_items_processed += 1
                if self._ventilator:
                    self._ventilator.processed_item()
                continue
            if isinstance(result, Exception):
                self.stop()
                self.join()
                raise result
            else:
                deserialized_result = pyarrow.read_serialized(
                    result).deserialize()
                return deserialized_result
示例#4
0
 def _load_file(self, file_path):
     if self.ext == ".json":
         return _read_json_file(file_path)
     elif self.ext == ".csv":
         return pd.read_csv(file_path, index_col=0, parse_dates=True)
     elif self.ext == ".pkl":
         with open(file_path, "rb") as f:
             return pickle.load(f)
     elif self.fname.endswith(".pa"):
         return pa.read_serialized(pa.OSFile(file_path, "rb")).deserialize()
     else:
         raise Exception("Unknown file type")
示例#5
0
def test_serialization_deprecated():
    with pytest.warns(FutureWarning):
        ser = pa.serialize(1)

    with pytest.warns(FutureWarning):
        pa.deserialize(ser.to_buffer())

    f = pa.BufferOutputStream()
    with pytest.warns(FutureWarning):
        pa.serialize_to(12, f)

    buf = f.getvalue()
    f = pa.BufferReader(buf)
    with pytest.warns(FutureWarning):
        pa.read_serialized(f).deserialize()

    with pytest.warns(FutureWarning):
        pa.default_serialization_context()

    context = pa.lib.SerializationContext()
    with pytest.warns(FutureWarning):
        pa.register_default_serialization_handlers(context)
示例#6
0
def test_numpy_base_object(tmpdir):
    # ARROW-2040: deserialized Numpy array should keep a reference to the
    # owner of its memory
    path = os.path.join(str(tmpdir), 'zzz.bin')
    data = np.arange(12, dtype=np.int32)

    with open(path, 'wb') as f:
        f.write(pa.serialize(data).to_buffer())

    serialized = pa.read_serialized(pa.OSFile(path))
    result = serialized.deserialize()
    assert_equal(result, data)
    serialized = None
    assert_equal(result, data)
    assert result.base is not None
示例#7
0
def test_numpy_base_object(tmpdir):
    # ARROW-2040: deserialized Numpy array should keep a reference to the
    # owner of its memory
    path = os.path.join(str(tmpdir), 'zzz.bin')
    data = np.arange(12, dtype=np.int32)

    with open(path, 'wb') as f:
        f.write(pa.serialize(data).to_buffer())

    serialized = pa.read_serialized(pa.OSFile(path))
    result = serialized.deserialize()
    assert_equal(result, data)
    serialized = None
    assert_equal(result, data)
    assert result.base is not None
示例#8
0
def test_numpy_matrix_serialization(tmpdir):
    class CustomType(object):
        def __init__(self, val):
            self.val = val

    path = os.path.join(str(tmpdir), 'pyarrow_npmatrix_serialization_test.bin')
    array = np.random.randint(low=-1, high=1, size=(2, 2))

    for data_type in [str, int, float, CustomType]:
        matrix = np.matrix(array.astype(data_type))

        with open(path, 'wb') as f:
            f.write(pa.serialize(matrix).to_buffer())

        serialized = pa.read_serialized(pa.OSFile(path))
        result = serialized.deserialize()
        assert_equal(result, matrix)
        assert_equal(result.dtype, matrix.dtype)
        serialized = None
        assert_equal(result, matrix)
        assert result.base is not None
示例#9
0
def test_numpy_matrix_serialization(tmpdir):
    class CustomType(object):
        def __init__(self, val):
            self.val = val

    path = os.path.join(str(tmpdir), 'pyarrow_npmatrix_serialization_test.bin')
    array = np.random.randint(low=-1, high=1, size=(2, 2))

    for data_type in [str, int, float, CustomType]:
        matrix = np.matrix(array.astype(data_type))

        with open(path, 'wb') as f:
            f.write(pa.serialize(matrix).to_buffer())

        serialized = pa.read_serialized(pa.OSFile(path))
        result = serialized.deserialize()
        assert_equal(result, matrix)
        assert_equal(result.dtype, matrix.dtype)
        serialized = None
        assert_equal(result, matrix)
        assert result.base is not None
示例#10
0
def fast_read_state_dict(path):
    serialized = pyarrow.read_serialized(path)
    sd = serialized.deserialize()
    return {k: torch.Tensor(v) for k, v in sd}