def test_index_dataset_with_location(index, default_metadata_type):
    """
    :type index: datacube.index._api.Index
    :type default_collection: datacube.model.DatasetType
    """
    first_file = '/tmp/first/something.yaml'
    second_file = '/tmp/second/something.yaml'
    type_ = index.datasets.types.add_document(_pseudo_telemetry_dataset_type)
    dataset = Dataset(type_, _telemetry_dataset, Path(first_file).absolute().as_uri())
    dataset = index.datasets.add(dataset)

    assert dataset.id == _telemetry_uuid
    # TODO: Dataset types?
    assert dataset.type.id == type_.id
    assert dataset.metadata_type.id == default_metadata_type.id

    assert dataset.local_path.absolute() == Path(first_file).absolute()

    # Ingesting again should have no effect.
    index.datasets.add(dataset)
    locations = index.datasets.get_locations(dataset)
    assert len(locations) == 1

    first_as_uri = Path(first_file).absolute().as_uri()
    second_as_uri = Path(second_file).absolute().as_uri()

    # Ingesting with a new path should add the second one too.
    dataset.local_uri = second_as_uri
    index.datasets.add(dataset)
    locations = index.datasets.get_locations(dataset)
    assert len(locations) == 2
    # Newest to oldest.
    assert locations == [second_as_uri, first_as_uri]
    # And the second one is newer, so it should be returned as the default local path:
    assert dataset.local_path.absolute() == Path(second_file).absolute()
示例#2
0
def test_index_dataset_with_location(index, default_metadata_type):
    """
    :type index: datacube.index._api.Index
    :type default_metadata_type: datacube.model.MetadataType
    """
    first_file = Path('/tmp/first/something.yaml').absolute()
    second_file = Path('/tmp/second/something.yaml').absolute()

    type_ = index.products.add_document(_pseudo_telemetry_dataset_type)
    dataset = Dataset(type_,
                      _telemetry_dataset,
                      first_file.as_uri(),
                      sources={})
    index.datasets.add(dataset)
    stored = index.datasets.get(dataset.id)

    assert stored.id == _telemetry_uuid
    # TODO: Dataset types?
    assert stored.type.id == type_.id
    assert stored.metadata_type.id == default_metadata_type.id
    assert stored.local_path == Path(first_file)

    # Ingesting again should have no effect.
    index.datasets.add(dataset)
    stored = index.datasets.get(dataset.id)
    locations = index.datasets.get_locations(dataset)
    assert len(locations) == 1
    # Remove the location
    was_removed = index.datasets.remove_location(dataset, first_file.as_uri())
    assert was_removed
    was_removed = index.datasets.remove_location(dataset, first_file.as_uri())
    assert not was_removed
    locations = index.datasets.get_locations(dataset)
    assert len(locations) == 0
    # Re-add the location
    was_added = index.datasets.add_location(dataset, first_file.as_uri())
    assert was_added
    was_added = index.datasets.add_location(dataset, first_file.as_uri())
    assert not was_added
    locations = index.datasets.get_locations(dataset)
    assert len(locations) == 1

    # Ingesting with a new path should add the second one too.
    dataset.local_uri = second_file.as_uri()
    index.datasets.add(dataset)
    stored = index.datasets.get(dataset.id)
    locations = index.datasets.get_locations(dataset)
    assert len(locations) == 2
    # Newest to oldest.
    assert locations == [second_file.as_uri(), first_file.as_uri()]
    # And the second one is newer, so it should be returned as the default local path:
    assert stored.local_path == Path(second_file)

    # Ingestion again without location should have no effect.
    dataset.local_uri = None
    index.datasets.add(dataset)
    stored = index.datasets.get(dataset.id)
    locations = index.datasets.get_locations(dataset)
    assert len(locations) == 2
    # Newest to oldest.
    assert locations == [second_file.as_uri(), first_file.as_uri()]
    # And the second one is newer, so it should be returned as the default local path:
    assert stored.local_path == Path(second_file)

    # Ability to get datasets for a location
    # Add a second dataset with a different location (to catch lack of joins, filtering etc)
    second_ds_doc = copy.deepcopy(_telemetry_dataset)
    second_ds_doc['id'] = '366f32d8-e1f8-11e6-94b4-185e0f80a5c0'
    index.datasets.add(
        Dataset(type_, second_ds_doc, second_file.as_uri(), sources={}))
    dataset_ids = [
        d.id
        for d in index.datasets.get_datasets_for_location(first_file.as_uri())
    ]
    assert dataset_ids == [dataset.id]