Пример #1
0
def test_initiate_dataset(path, path2):
    dataset_path = opj(path, 'test')
    datas = list(
        initiate_dataset('template', 'testdataset', path=dataset_path)())
    assert_equal(len(datas), 1)
    data = datas[0]
    eq_(data['dataset_path'], dataset_path)
    crawl_cfg = opj(dataset_path, CRAWLER_META_CONFIG_PATH)
    ok_(exists, crawl_cfg)
    pipeline = load_pipeline_from_config(crawl_cfg)

    # by default we should initiate to MD5E backend
    fname = 'test.dat'
    f = opj(dataset_path, fname)
    annex = put_file_under_git(f, content="test", annexed=True)
    eq_(annex.get_file_backend(f), 'MD5E')

    # and even if we clone it -- nope -- since persistence is set by Annexificator
    # so we don't need to explicitly to commit it just in master since that might
    # not be the branch we will end up working in
    annex2 = AnnexRepo.clone(path=path2, url=dataset_path)
    annex3 = put_file_under_git(path2,
                                'test2.dat',
                                content="test2",
                                annexed=True)
    eq_(annex3.get_file_backend('test2.dat'), 'MD5E')

    raise SkipTest("TODO much more")
Пример #2
0
def test_install_consistent_state(src, dest, dest2, dest3):
    # if we install a dataset, where sub-dataset "went ahead" in that branch,
    # while super-dataset was not yet updated (e.g. we installed super before)
    # then it is desired to get that default installed branch to get to the
    # position where previous location was pointing to.
    # It is indeed a mere heuristic which might not hold the assumption in some
    # cases, but it would work for most simple and thus mostly used ones
    ds1 = create(src)
    sub1 = ds1.create('sub1')

    def check_consistent_installation(ds):
        datasets = [ds] + list(
            map(
                Dataset,
                ds.subdatasets(
                    recursive=True, fulfilled=True, result_xfm='paths')))
        assert len(datasets) == 2  # in this test
        for ds in datasets:
            # all of them should be in master branch
            eq_(ds.repo.get_active_branch(), "master")
            # all of them should be clean, so sub should be installed in a "version"
            # as pointed by the super
            ok_(not ds.repo.dirty)

    dest_ds = install(dest, source=src)
    # now we progress sub1 by adding sub2
    subsub2 = sub1.create('sub2')

    # and progress subsub2 forward to stay really thorough
    put_file_under_git(subsub2.path, 'file.dat', content="data")
    subsub2.save("added a file")  # above function does not commit

    # just installing a submodule -- apparently different code/logic
    # but also the same story should hold - we should install the version pointed
    # by the super, and stay all clean
    dest_sub1 = dest_ds.install('sub1')
    check_consistent_installation(dest_ds)

    # So now we have source super-dataset "dirty" with sub1 progressed forward
    # Our install should try to "retain" consistency of the installation
    # whenever possible.

    # install entire hierarchy without specifying dataset
    # no filter, we want full report
    dest2_ds = install(dest2, source=src, recursive=True, result_filter=None)
    check_consistent_installation(dest2_ds[0])  # [1] is the subdataset

    # install entire hierarchy by first installing top level ds
    # and then specifying sub-dataset
    dest3_ds = install(dest3, source=src, recursive=False)
    # and then install both submodules recursively while pointing
    # to it based on dest3_ds
    dest3_ds.install('sub1', recursive=True)
    check_consistent_installation(dest3_ds)
Пример #3
0
def test_install_consistent_state(src, dest, dest2, dest3):
    # if we install a dataset, where sub-dataset "went ahead" in that branch,
    # while super-dataset was not yet updated (e.g. we installed super before)
    # then it is desired to get that default installed branch to get to the
    # position where previous location was pointing to.
    # It is indeed a mere heuristic which might not hold the assumption in some
    # cases, but it would work for most simple and thus mostly used ones
    ds1 = create(src)
    sub1 = ds1.create('sub1')

    def check_consistent_installation(ds):
        datasets = [ds] + list(
            map(Dataset, ds.subdatasets(recursive=True, fulfilled=True,
                                        result_xfm='paths')))
        assert len(datasets) == 2  # in this test
        for ds in datasets:
            # all of them should be in master branch
            eq_(ds.repo.get_active_branch(), "master")
            # all of them should be clean, so sub should be installed in a "version"
            # as pointed by the super
            ok_(not ds.repo.dirty)

    dest_ds = install(dest, source=src)
    # now we progress sub1 by adding sub2
    subsub2 = sub1.create('sub2')

    # and progress subsub2 forward to stay really thorough
    put_file_under_git(subsub2.path, 'file.dat', content="data")
    subsub2.save("added a file")  # above function does not commit

    # just installing a submodule -- apparently different code/logic
    # but also the same story should hold - we should install the version pointed
    # by the super, and stay all clean
    dest_sub1 = dest_ds.install('sub1')
    check_consistent_installation(dest_ds)

    # So now we have source super-dataset "dirty" with sub1 progressed forward
    # Our install should try to "retain" consistency of the installation
    # whenever possible.

    # install entire hierarchy without specifying dataset
    # no filter, we want full report
    dest2_ds = install(dest2, source=src, recursive=True, result_filter=None)
    check_consistent_installation(dest2_ds[0])  # [1] is the subdataset

    # install entire hierarchy by first installing top level ds
    # and then specifying sub-dataset
    dest3_ds = install(dest3, source=src, recursive=False)
    # and then install both submodules recursively while pointing
    # to it based on dest3_ds
    dest3_ds.install('sub1', recursive=True)
    check_consistent_installation(dest3_ds)
Пример #4
0
def test_initiate_dataset_new_create_warns(path):
    try:
        from datalad.distribution import create
    except ImportError:
        # We are on a version of DataLad with the new create.
        expected_backend = "MD5E"
        expect_warning = True
    else:
        expected_backend = "SHA256E"
        expect_warning = False

    path = opj(path, 'test')
    with swallow_logs(new_level=logging.WARNING) as cml:
        list(
            initiate_dataset('template',
                             'testdataset',
                             backend="SHA256E",
                             path=path)())
        if expect_warning:
            assert_in("datalad.repo.backend", cml.out)
    # DataLad's new create honors datalad.repo.backend rather than the
    # crawler-specific one.
    fname = 'test.dat'
    f = opj(path, fname)
    annex = put_file_under_git(f, content="test", annexed=True)
    eq_(annex.get_file_backend(f), expected_backend)