Exemplo n.º 1
0
def test_add_file_end_to_end(file_name):

    test_path = "d_0/d_0.0/f_0.0.0"

    json.dump({
        **metadata_template,
        "type": "file",
        "path": test_path
    }, open(file_name, "tw"))

    with tempfile.TemporaryDirectory() as temp_dir:
        git_repo = GitRepo(temp_dir)

        res = meta_add(metadata=file_name, metadata_store=git_repo.path)
        assert_result_count(res, 1)
        assert_result_count(res, 1, type='file')
        assert_result_count(res, 0, type='dataset')

        # Verify file level metadata was added
        tree_version_list, uuid_set, mrr = _get_top_nodes(
            git_repo,
            UUID(metadata_template["dataset_id"]),
            metadata_template["dataset_version"])

        file_tree = mrr.get_file_tree()
        assert_is_not_none(file_tree)
        assert_true(test_path in file_tree)

        metadata = file_tree.get_metadata(MetadataPath(test_path))
        metadata_content = _get_metadata_content(metadata)
        eq_(metadata_content, metadata_template["extracted_metadata"])
Exemplo n.º 2
0
def _get_metadata_content(metadata):

    assert_is_not_none(metadata)
    metadata_instances = tuple(metadata.extractor_runs())
    assert_true(len(metadata_instances) == 1)

    extractor_name, extractor_runs = metadata_instances[0]
    eq_(extractor_name, metadata_template["extractor_name"])

    instances = tuple(extractor_runs.get_instances())
    assert_true(len(instances), 1)

    return instances[0].metadata_content
Exemplo n.º 3
0
def test_symlinked_dataset_properties(repo1, repo2, repo3, non_repo, symlink):

    ds = Dataset(repo1).create()

    # now, let ds be a symlink and change that symlink to point to different
    # things:
    ar2 = AnnexRepo(repo2)
    ar3 = AnnexRepo(repo3)
    assert_true(os.path.isabs(non_repo))

    os.symlink(repo1, symlink)
    ds_link = Dataset(symlink)
    assert_is(ds_link.repo, ds.repo)  # same Repo instance
    assert_is_not(ds_link, ds)  # but not the same Dataset instance
    assert_is(ds_link.config, ds.repo.config)
    assert_true(ds_link._cfg_bound)
    assert_is_not_none(ds_link.id)
    # same id, although different Dataset instance:
    assert_equal(ds_link.id, ds.id)

    os.unlink(symlink)
    os.symlink(repo2, symlink)

    assert_is(ds_link.repo, ar2)  # same Repo instance
    assert_is(ds_link.config, ar2.config)
    assert_true(ds_link._cfg_bound)
    # id is None again, since this repository is an annex but there was no
    # Dataset.create() called yet.
    assert_is_none(ds_link.id)

    os.unlink(symlink)
    os.symlink(repo3, symlink)

    assert_is(ds_link.repo, ar3)  # same Repo instance
    assert_is(ds_link.config, ar3.config)
    assert_true(ds_link._cfg_bound)
    # id is None again, since this repository is an annex but there was no
    # Dataset.create() called yet.
    assert_is_none(ds_link.id)

    os.unlink(symlink)
    os.symlink(non_repo, symlink)

    assert_is_none(ds_link.repo)
    assert_is_not(ds_link.config, ar3.config)
    assert_false(ds_link._cfg_bound)
    assert_is_none(ds_link.id)
Exemplo n.º 4
0
def test_subdataset_add_file_end_to_end(file_name):

    test_path = "d_1/d_1.0/f_1.0.0"

    json.dump({
        **metadata_template,
        **additional_keys_template,
        "type": "file",
        "path": test_path
    }, open(file_name, "tw"))

    with tempfile.TemporaryDirectory() as temp_dir:
        git_repo = GitRepo(temp_dir)

        res = meta_add(metadata=file_name, metadata_store=git_repo.path)
        assert_result_count(res, 1)
        assert_result_count(res, 1, type='file')
        assert_result_count(res, 0, type='dataset')

        # Verify dataset level metadata was added
        root_dataset_id = UUID(additional_keys_template["root_dataset_id"])
        root_dataset_version = additional_keys_template["root_dataset_version"]
        dataset_tree_path = MetadataPath(
            additional_keys_template["dataset_path"])

        tree_version_list, uuid_set, mrr = _get_top_nodes(
            git_repo,
            root_dataset_id,
            root_dataset_version)

        _, dataset_tree = tree_version_list.get_dataset_tree(
            root_dataset_version)

        mrr = dataset_tree.get_metadata_root_record(dataset_tree_path)
        eq_(mrr.dataset_identifier, UUID(metadata_template["dataset_id"]))

        file_tree = mrr.get_file_tree()
        assert_is_not_none(file_tree)
        assert_true(test_path in file_tree)

        metadata = file_tree.get_metadata(MetadataPath(test_path))
        metadata_content = _get_metadata_content(metadata)
        eq_(metadata_content, metadata_template["extracted_metadata"])
Exemplo n.º 5
0
def _get_top_nodes(git_repo, dataset_id, dataset_version):
    # Ensure that metadata was created
    tree_version_list, uuid_set, mrr = \
        get_top_nodes_and_metadata_root_record(
            "git",
            git_repo.path,
            dataset_id,
            dataset_version,
            MetadataPath(""))

    assert_is_not_none(tree_version_list)
    assert_is_not_none(uuid_set)
    assert_is_not_none(mrr)

    return tree_version_list, uuid_set, mrr
Exemplo n.º 6
0
def test_property_reevaluation(repo1):
    ds = Dataset(repo1)
    assert_is_none(ds.repo)
    assert_is_not_none(ds.config)
    first_config = ds.config
    assert_false(ds._cfg_bound)
    assert_is_none(ds.id)

    ds.create()
    assert_repo_status(repo1)
    # after creation, we have `repo`, and `config` was reevaluated to point
    # to the repo's config:
    assert_is_not_none(ds.repo)
    assert_is_not_none(ds.config)
    second_config = ds.config
    assert_true(ds._cfg_bound)
    assert_is(ds.config, ds.repo.config)
    assert_is_not(first_config, second_config)
    assert_is_not_none(ds.id)
    first_id = ds.id

    ds.remove()
    # repo is gone, and config is again reevaluated to only provide user/system
    # level config:
    assert_false(lexists(ds.path))
    assert_is_none(ds.repo)
    assert_is_not_none(ds.config)
    third_config = ds.config
    assert_false(ds._cfg_bound)
    assert_is_not(second_config, third_config)
    assert_is_none(ds.id)

    ds.create()
    assert_repo_status(repo1)
    # after recreation everything is sane again:
    assert_is_not_none(ds.repo)
    assert_is_not_none(ds.config)
    assert_is(ds.config, ds.repo.config)
    forth_config = ds.config
    assert_true(ds._cfg_bound)
    assert_is_not(third_config, forth_config)
    assert_is_not_none(ds.id)
    assert_not_equal(ds.id, first_id)