Пример #1
0
def test_fingerprint_when_transform_version_changes():
    data = {"a": [0, 1, 2]}

    class DummyDatasetChild(datasets.Dataset):
        @fingerprint_transform(inplace=False)
        def func(self, new_fingerprint):
            return DummyDatasetChild(self.data, fingerprint=new_fingerprint)

    fingeprint_no_version = DummyDatasetChild(
        InMemoryTable.from_pydict(data)).func()

    class DummyDatasetChild(datasets.Dataset):
        @fingerprint_transform(inplace=False, version="1.0.0")
        def func(self, new_fingerprint):
            return DummyDatasetChild(self.data, fingerprint=new_fingerprint)

    fingeprint_1 = DummyDatasetChild(InMemoryTable.from_pydict(data)).func()

    class DummyDatasetChild(datasets.Dataset):
        @fingerprint_transform(inplace=False, version="2.0.0")
        def func(self, new_fingerprint):
            return DummyDatasetChild(self.data, fingerprint=new_fingerprint)

    fingeprint_2 = DummyDatasetChild(InMemoryTable.from_pydict(data)).func()

    assert len({fingeprint_no_version, fingeprint_1, fingeprint_2}) == 3
Пример #2
0
def test_in_memory_table_pickle_big_table():
    big_table_4GB = InMemoryTable.from_pydict(
        {"col": [0] * ((4 * 8 << 30) // 64)})
    length = len(big_table_4GB)
    big_table_4GB = pickle.dumps(big_table_4GB)
    big_table_4GB = pickle.loads(big_table_4GB)
    assert len(big_table_4GB) == length
Пример #3
0
def test_fingerprint_in_multiprocessing():
    data = {"a": [0, 1, 2]}
    dataset = DatasetChild(InMemoryTable.from_pydict(data))
    expected_fingerprint = dataset.func1()._fingerprint
    assert expected_fingerprint == dataset.func1()._fingerprint
    assert expected_fingerprint != dataset.func2()._fingerprint

    with Pool(2) as p:
        assert expected_fingerprint == p.apply_async(
            dataset.func1).get()._fingerprint
        assert expected_fingerprint != p.apply_async(
            dataset.func2).get()._fingerprint
Пример #4
0
def test_in_memory_table_from_pydict(in_memory_pa_table):
    pydict = in_memory_pa_table.to_pydict()
    with assert_arrow_memory_increases():
        table = InMemoryTable.from_pydict(pydict)
        assert isinstance(table, InMemoryTable)
        assert table.table == pa.Table.from_pydict(pydict)