Пример #1
0
    def save(self, dataset_id: str, dataset: Dataset) -> DatasetSource:
        writer = dataset.get_writer()
        if writer is None:
            raise ValueError(
                f'{dataset.dataset_type} does not support artifact persistance'
            )

        reader, artifacts = writer.write(dataset)
        with artifacts.blob_dict() as blobs:
            try:
                pushed = self.repo.push_artifact(self.ARTIFACT_TYPE,
                                                 dataset_id, blobs)
            except ArtifactExistsError as e:
                raise DatasetExistsError(dataset_id, self, e)
        return ArtifactDatasetSource(reader, pushed, dataset.dataset_type)
Пример #2
0
def dataset_write_read_check(dataset: Dataset,
                             writer: DatasetWriter = None,
                             reader_type: Type[DatasetReader] = None,
                             custom_eq: Callable[[Any, Any], bool] = None,
                             custom_assert: Callable[[Any, Any], Any] = None):
    writer = writer or dataset.get_writer()

    reader, artifacts = writer.write(dataset)
    if reader_type is not None:
        assert isinstance(reader, reader_type)

    new = reader.read(artifacts)

    assert dataset.dataset_type == new.dataset_type
    if custom_assert is not None:
        custom_assert(new.data, dataset.data)
    else:
        if custom_eq is not None:
            assert custom_eq(new.data, dataset.data)
        else:
            assert new.data == dataset.data
Пример #3
0
def data() -> Dataset:
    return Dataset('abcdefg', TestDatasetType())
Пример #4
0
 def read(self, artifacts: ArtifactCollection) -> Dataset:
     return Dataset(artifacts.bytes_dict()['data'].decode('utf8'),
                    TestDatasetType())
Пример #5
0
def test_with_index_complex(data, format):
    writer = PandasWriter(format)
    dataset_write_read_check(Dataset.from_object(data), writer, PandasReader, custom_assert=pandas_assert)
Пример #6
0
def test_with_multiindex(data, format):
    writer = PandasWriter(format)
    dataset_write_read_check(Dataset.from_object(data.set_index(['a', 'b'])), writer, PandasReader,
                             custom_assert=pandas_assert)
Пример #7
0
def test_simple_df(data, format):
    writer = PandasWriter(format)
    dataset_write_read_check(Dataset.from_object(data), writer, PandasReader, pd.DataFrame.equals)
Пример #8
0
 def read(self, artifacts: ArtifactCollection) -> Dataset:
     payload = artifacts.bytes_dict()[OneFileDatasetWriter.FILENAME]
     return Dataset(self.convert(payload), self.dataset_type)
Пример #9
0
def test_ndarray_source():
    data = np.array([1, 2, 3])
    dataset = Dataset.from_object(data)
    dataset_write_read_check(dataset, custom_eq=np.array_equal)
Пример #10
0
 def read(self, artifacts: ArtifactCollection) -> Dataset:
     with artifacts.blob_dict() as blobs:
         with blobs[DATA_FILE].bytestream() as f:
             data = np.load(f)[DATA_KEY]
     return Dataset.from_object(data)
Пример #11
0
 def read(self, artifacts: ArtifactCollection) -> Dataset:
     with artifacts.blob_dict(
     ) as blobs, blobs[PANDAS_DATA_FILE].bytestream() as b:
         return Dataset.from_object(
             self.data_type.align(self.format.read(b)))
Пример #12
0
 def create_dataset(self, data, target=None):
     # TODO persisting to art repo?
     return Dataset.from_object(data)
Пример #13
0
def dataset(pandas_data):
    return Dataset.from_object(pandas_data)