def test_symlinked_relpath(path): # initially ran into on OSX https://github.com/datalad/datalad/issues/2406 os.makedirs(opj(path, "origin")) dspath = opj(path, "linked") os.symlink('origin', dspath) ds = Dataset(dspath).create() create_tree(dspath, { "mike1": 'mike1', # will be added from topdir "later": "later", # later from within subdir "d": { "mike2": 'mike2', # to be added within subdir } }) # in the root of ds with chpwd(dspath): ds.repo.add("mike1", git=True) ds._save("committing", path="./mike1") # Let's also do in subdirectory with chpwd(opj(dspath, 'd')): ds.repo.add("mike2", git=True) ds._save("committing", path="./mike2") later = opj(pardir, "later") ds.repo.add(later, git=True) ds._save("committing", path=later) ok_clean_git(dspath)
def test_status_basics(path, linkpath, otherdir): if not on_windows: # make it more complicated by default ut.Path(linkpath).symlink_to(path, target_is_directory=True) path = linkpath with chpwd(path): assert_raises(NoDatasetArgumentFound, status) ds = Dataset(path).create() # outcome identical between ds= and auto-discovery with chpwd(path): assert_raises(IncompleteResultsError, status, path=otherdir) stat = status() eq_(stat, ds.status()) assert_status('ok', stat) # we have a bunch of reports (be vague to be robust to future changes assert len(stat) > 2 # check the composition for s in stat: eq_(s['status'], 'ok') eq_(s['action'], 'status') eq_(s['state'], 'clean') eq_(s['type'], 'file') assert_in('gitshasum', s) assert_in('bytesize', s) eq_(s['refds'], ds.path)
def test_install_crcns(tdir, ds_path): with chpwd(tdir): with swallow_logs(new_level=logging.INFO) as cml: install("all-nonrecursive", source='///') # since we didn't log decorations such as log level atm while # swallowing so lets check if exit code is returned or not # I will test both assert_not_in('ERROR', cml.out) # below one must not fail alone! ;) assert_not_in('with exit code', cml.out) # should not hang in infinite recursion with chpwd('all-nonrecursive'): get("crcns") ok_(exists(_path_("all-nonrecursive/crcns/.git/config"))) # and we could repeat installation and get the same result ds1 = install(_path_("all-nonrecursive/crcns")) ds2 = Dataset('all-nonrecursive').install('crcns') ok_(ds1.is_installed()) eq_(ds1, ds2) eq_(ds1.path, ds2.path) # to make sure they are a single dataset # again, but into existing dataset: ds = create(ds_path) crcns = ds.install("///crcns") ok_(crcns.is_installed()) eq_(crcns.path, opj(ds_path, "crcns")) assert_in(crcns.path, ds.get_subdatasets(absolute=True))
def test_subdatasets(path): # from scratch ds = Dataset(path) assert_false(ds.is_installed()) eq_(ds.subdatasets(), []) ds = ds.create() assert_true(ds.is_installed()) eq_(ds.subdatasets(), []) # create some file and commit it open(os.path.join(ds.path, 'test'), 'w').write('some') ds.add(path='test') assert_true(ds.is_installed()) ds.save("Hello!", version_tag=1) # Assuming that tmp location was not under a super-dataset eq_(ds.get_superdataset(), None) eq_(ds.get_superdataset(topmost=True), ds) # add itself as a subdataset (crazy, isn't it?) subds = ds.install('subds', source=path, result_xfm='datasets', return_type='item-or-list') assert_true(subds.is_installed()) eq_(subds.get_superdataset(), ds) eq_(subds.get_superdataset(topmost=True), ds) subdss = ds.subdatasets() eq_(len(subdss), 1) eq_(subds.path, ds.subdatasets(result_xfm='paths')[0]) eq_(subdss, ds.subdatasets(recursive=True)) eq_(subdss, ds.subdatasets(fulfilled=True)) ds.save("with subds", version_tag=2) ds.recall_state(1) assert_true(ds.is_installed()) eq_(ds.subdatasets(), []) # very nested subdataset to test topmost subsubds = subds.install( _path_('d1/subds'), source=path, result_xfm='datasets', return_type='item-or-list') assert_true(subsubds.is_installed()) eq_(subsubds.get_superdataset(), subds) # by default, it will only report a subperdataset that actually # has the queries dataset as a registered true subdataset eq_(subsubds.get_superdataset(topmost=True), subds) # by we can also ask for a dataset that is merely above eq_(subsubds.get_superdataset(topmost=True, registered_only=False), ds) # verify that '^' alias would work with chpwd(subsubds.path): dstop = Dataset('^') eq_(dstop, subds) # and while in the dataset we still can resolve into central one dscentral = Dataset('///') eq_(dscentral.path, cfg.obtain('datalad.locations.default-dataset')) with chpwd(ds.path): dstop = Dataset('^') eq_(dstop, ds)
def test_subdatasets(path): # from scratch ds = Dataset(path) assert_false(ds.is_installed()) eq_(ds.get_subdatasets(), []) ds = ds.create() assert_true(ds.is_installed()) eq_(ds.get_subdatasets(), []) # create some file and commit it open(os.path.join(ds.path, 'test'), 'w').write('some') ds.add(path='test') assert_true(ds.is_installed()) ds.save("Hello!", version_tag=1) # Assuming that tmp location was not under a super-dataset eq_(ds.get_superdataset(), None) eq_(ds.get_superdataset(topmost=True), ds) # add itself as a subdataset (crazy, isn't it?) subds = ds.install('subds', source=path) assert_true(subds.is_installed()) eq_(subds.get_superdataset(), ds) eq_(subds.get_superdataset(topmost=True), ds) subdss = ds.get_subdatasets() eq_(len(subdss), 1) eq_(os.path.join(path, subdss[0]), subds.path) eq_(subds.path, ds.get_subdatasets(absolute=True)[0]) eq_(subdss, ds.get_subdatasets(recursive=True)) eq_(subdss, ds.get_subdatasets(fulfilled=True)) # don't have that right now assert_raises(NotImplementedError, ds.get_subdatasets, pattern='sub*') ds.save("with subds", version_tag=2) ds.recall_state(1) assert_true(ds.is_installed()) eq_(ds.get_subdatasets(), []) # very nested subdataset to test topmost subsubds = subds.install(_path_('d1/subds'), source=path) assert_true(subsubds.is_installed()) eq_(subsubds.get_superdataset(), subds) eq_(subsubds.get_superdataset(topmost=True), ds) # verify that '^' alias would work with chpwd(subsubds.path): dstop = Dataset('^') eq_(dstop, ds) # and while in the dataset we still can resolve into central one dscentral = Dataset('///') eq_(dscentral.path, LOCAL_CENTRAL_PATH) with chpwd(ds.path): dstop = Dataset('^') eq_(dstop, ds)
def test_basics(path, nodspath): ds = Dataset(path).create() direct_mode = ds.repo.is_direct_mode() last_state = ds.repo.get_hexsha() # run inside the dataset with chpwd(path), \ swallow_outputs(): # provoke command failure with assert_raises(CommandError) as cme: ds.run('7i3amhmuch9invalid') # let's not speculate that the exit code is always 127 ok_(cme.code > 0) eq_(last_state, ds.repo.get_hexsha()) # now one that must work res = ds.run('touch empty', message='TEST') ok_clean_git(ds.path) assert_result_count(res, 2) # TODO 'state' is still untracked!!! assert_result_count(res, 1, action='add', path=opj(ds.path, 'empty'), type='file') assert_result_count(res, 1, action='save', path=ds.path) commit_msg = ds.repo.format_commit("%B") ok_(commit_msg.startswith('[DATALAD RUNCMD] TEST')) # crude test that we have a record for the PWD assert_in('"pwd": "."', commit_msg) last_state = ds.repo.get_hexsha() # now run a command that will not alter the dataset res = ds.run('touch empty', message='NOOP_TEST') # When in direct mode, check at the level of save rather than add # because the annex files show up as typechanges and adding them won't # necessarily have a "notneeded" status. assert_result_count(res, 1, action='save' if direct_mode else 'add', status='notneeded') eq_(last_state, ds.repo.get_hexsha()) # We can also run the command via a single-item list because this is # what the CLI interface passes in for quoted commands. res = ds.run(['touch empty'], message='NOOP_TEST') assert_result_count(res, 1, action='save' if direct_mode else 'add', status='notneeded') # run outside the dataset, should still work but with limitations with chpwd(nodspath), \ swallow_outputs(): res = ds.run(['touch', 'empty2'], message='TEST') assert_result_count(res, 1, action='add', path=opj(ds.path, 'empty2'), type='file', status='ok') assert_result_count(res, 1, action='save', status='ok') # running without a command is a noop with chpwd(path): with swallow_logs(new_level=logging.WARN) as cml: ds.run() assert_in("No command given", cml.out)
def test_create_curdir(path, path2): with chpwd(path, mkdir=True): create() ds = Dataset(path) ok_(ds.is_installed()) assert_repo_status(ds.path, annex=True) with chpwd(path2, mkdir=True): create(no_annex=True) ds = Dataset(path2) ok_(ds.is_installed()) assert_repo_status(ds.path, annex=False) ok_(op.exists(op.join(ds.path, '.noannex')))
def test_save_directory(path): # Sequence of save invocations on subdirectories. ds = Dataset(path).create(force=True) ds._save(path='sdir1') ok_clean_git(ds.path, untracked=['sdir2/foo', 'sdir3/sdir/subsub/foo']) # There is also difference from with chpwd(path): save(path='sdir2') ok_clean_git(ds.path, untracked=['sdir3/sdir/subsub/foo']) with chpwd(opj(path, 'sdir3')): save(path='sdir') ok_clean_git(ds.path)
def test_remove_file_handle_only(path): ds = Dataset(path).create(force=True) ds.save() ok_clean_git(ds.path) # make sure there is any key ok_(len(ds.repo.get_file_key('one'))) # both files link to the same key eq_(ds.repo.get_file_key('one'), ds.repo.get_file_key('two')) rpath_one = realpath(opj(ds.path, 'one')) eq_(rpath_one, realpath(opj(ds.path, 'two'))) path_two = opj(ds.path, 'two') ok_(exists(path_two)) # remove one handle, should not affect the other ds.remove('two', check=False, message="custom msg") eq_(ds.repo.repo.head.commit.message.rstrip(), "custom msg") eq_(rpath_one, realpath(opj(ds.path, 'one'))) ok_(exists(rpath_one)) ok_(not exists(path_two)) # remove file without specifying the dataset -- shouldn't fail with chpwd(path): remove('one', check=False) ok_(not exists("one")) # and we should be able to remove without saving ds.remove('three', check=False, save=False) ok_(ds.repo.dirty)
def test_drop_nocrash_absent_subds(path): parent = Dataset(path).create() sub = parent.create('sub') parent.uninstall('sub') ok_clean_git(parent.path) with chpwd(path): assert_status('notneeded', drop('.', recursive=True))
def test_invalid_call(origin, tdir): ds = Dataset(origin) ds.uninstall('subm 1', check=False) # nothing assert_status('error', publish('/notthere', on_failure='ignore')) # known, but not present assert_status('impossible', publish(opj(ds.path, 'subm 1'), on_failure='ignore')) # --since without dataset is now supported as long as it # could be identified # assert_raises(InsufficientArgumentsError, publish, since='HEAD') # but if it couldn't be, then should indeed crash with chpwd(tdir): assert_raises(InsufficientArgumentsError, publish, since='HEAD') # new dataset, with unavailable subdataset dummy = Dataset(tdir).create() dummy_sub = dummy.create('sub') dummy_sub.uninstall() assert_in('sub', dummy.subdatasets(fulfilled=False, result_xfm='relpaths')) # now an explicit call to publish the unavailable subdataset assert_result_count( dummy.publish('sub', on_failure='ignore'), 1, path=dummy_sub.path, status='impossible', type='dataset')
def test_invalid_call(path): with chpwd(path): # ^ Change directory so that we don't fail with an # InvalidGitRepositoryError if the test is executed from a git # worktree. # needs a SSH URL assert_raises(InsufficientArgumentsError, create_sibling, '') assert_raises(ValueError, create_sibling, 'http://ignore.me') # needs an actual dataset assert_raises( ValueError, create_sibling, 'localhost:/tmp/somewhere', dataset='/nothere') # pre-configure a bogus remote ds = Dataset(path).create() ds.repo.add_remote('bogus', 'http://bogus.url.com') # fails to reconfigure by default with generated # and also when given an existing name for res in (ds.create_sibling('bogus:/tmp/somewhere', on_failure='ignore'), ds.create_sibling('localhost:/tmp/somewhere', name='bogus', on_failure='ignore')): assert_result_count( res, 1, status='error', message=( "sibling '%s' already configured (specify alternative name, or force reconfiguration via --existing", 'bogus'))
def test_search_outside1_install_default_ds(tdir, default_dspath): with chpwd(tdir): # let's mock out even actual install/search calls with \ patch_config({'datalad.locations.default-dataset': default_dspath}), \ patch('datalad.api.install', return_value=Dataset(default_dspath)) as mock_install, \ patch('datalad.distribution.dataset.Dataset.search', new_callable=_mock_search): _check_mocked_install(default_dspath, mock_install) # now on subsequent run, we want to mock as if dataset already exists # at central location and then do search again from datalad.ui import ui ui.add_responses('yes') mock_install.reset_mock() with patch( 'datalad.distribution.dataset.Dataset.is_installed', True): _check_mocked_install(default_dspath, mock_install) # and what if we say "no" to install? ui.add_responses('no') mock_install.reset_mock() with assert_raises(NoDatasetArgumentFound): list(search(".")) # and if path exists and is a valid dataset and we say "no" Dataset(default_dspath).create() ui.add_responses('no') mock_install.reset_mock() with assert_raises(NoDatasetArgumentFound): list(search("."))
def test_install_known_subdataset(src, path): # get the superdataset: ds = install(path, source=src) # subdataset not installed: subds = Dataset(opj(path, 'subm 1')) assert_false(subds.is_installed()) assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # install it: ds.install('subm 1') ok_(subds.is_installed()) ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False)) # Verify that it is the correct submodule installed and not # new repository initiated eq_(set(subds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # now, get the data by reinstalling with -g: ok_(subds.repo.file_has_content('test-annex.dat') is False) with chpwd(ds.path): result = get(path='subm 1', dataset=os.curdir) assert_in_results(result, path=opj(subds.path, 'test-annex.dat')) ok_(subds.repo.file_has_content('test-annex.dat') is True) ok_(subds.is_installed())
def _expand_globs(self): def normalize_hit(h): normalized = op.relpath(h) + ("" if op.basename(h) else op.sep) if h == op.curdir + op.sep + normalized: # Don't let relpath prune "./fname" (gh-3034). return h return normalized expanded = [] with chpwd(self.pwd): for pattern in self._paths["patterns"]: hits = glob.glob(pattern) if hits: expanded.extend(sorted(map(normalize_hit, hits))) else: lgr.debug("No matching files found for '%s'", pattern) # We didn't find a hit for the complete pattern. If we find # a sub-pattern hit, that may mean we have an uninstalled # subdataset. for sub_pattern in self._get_sub_patterns(pattern): sub_hits = glob.glob(sub_pattern) if sub_hits: expanded.extend( sorted(map(normalize_hit, sub_hits))) break # ... but we still want to retain the original pattern # because we don't know for sure at this point, and it # won't bother the "install, reglob" routine. expanded.extend([pattern]) return expanded
def test_implicit_install(src, dst): origin_top = create(src) origin_sub = origin_top.create("sub") origin_subsub = origin_sub.create("subsub") with open(opj(origin_top.path, "file1.txt"), "w") as f: f.write("content1") origin_top.add("file1.txt") with open(opj(origin_sub.path, "file2.txt"), "w") as f: f.write("content2") origin_sub.add("file2.txt") with open(opj(origin_subsub.path, "file3.txt"), "w") as f: f.write("content3") origin_subsub.add("file3.txt") origin_top.save(recursive=True) # first, install toplevel: ds = install(dst, source=src) ok_(ds.is_installed()) sub = Dataset(opj(ds.path, "sub")) ok_(not sub.is_installed()) subsub = Dataset(opj(sub.path, "subsub")) ok_(not subsub.is_installed()) # fail on obscure non-existing one assert_raises(IncompleteResultsError, ds.install, source='obscure') # install 3rd level and therefore implicitly the 2nd: result = ds.install(path=opj("sub", "subsub")) ok_(sub.is_installed()) ok_(subsub.is_installed()) # but by default implicit results are not reported eq_(result, subsub) # fail on obscure non-existing one in subds assert_raises(IncompleteResultsError, ds.install, source=opj('sub', 'obscure')) # clean up, the nasty way rmtree(dst, chmod_files=True) ok_(not exists(dst)) # again first toplevel: ds = install(dst, source=src) ok_(ds.is_installed()) sub = Dataset(opj(ds.path, "sub")) ok_(not sub.is_installed()) subsub = Dataset(opj(sub.path, "subsub")) ok_(not subsub.is_installed()) # now implicit but without an explicit dataset to install into # (deriving from CWD): with chpwd(dst): # don't ask for the file content to make return value comparison # simpler result = get(path=opj("sub", "subsub"), get_data=False, result_xfm='datasets') ok_(sub.is_installed()) ok_(subsub.is_installed()) eq_(result, [sub, subsub])
def test_new_relpath(topdir): from datalad.api import create_test_dataset with swallow_logs(), chpwd(topdir), swallow_outputs(): dss = create_test_dataset('testds', spec='1') eq_(dss[0], opj(topdir, 'testds')) eq_(len(dss), 2) # 1 top + 1 sub-dataset as demanded for ds in dss: ok_clean_git(ds, annex=False)
def test_install_dataset_from_just_source(url, path): with chpwd(path, mkdir=True): ds = install(source=url) ok_startswith(ds.path, path) ok_(ds.is_installed()) ok_clean_git(ds.path, annex=False)
def test_bf2541(path): ds = create(path) subds = ds.create('sub') ok_clean_git(ds.path) os.symlink('sub', op.join(ds.path, 'symlink')) with chpwd(ds.path): res = add('.', recursive=True) ok_clean_git(ds.path)
def test_invalid_call(path): with chpwd(path): # no dataset, no luck assert_raises(NoDatasetArgumentFound, run, 'doesntmatter') # dirty dataset ds = Dataset(path).create() create_tree(ds.path, {'this': 'dirty'}) assert_status('impossible', run('doesntmatter', on_failure='ignore'))
def test_clone_dataset_from_just_source(url, path): with chpwd(path, mkdir=True): ds = clone(url, result_xfm='datasets', return_type='item-or-list') ok_startswith(ds.path, path) ok_(ds.is_installed()) ok_(GitRepo.is_valid_repo(ds.path)) ok_clean_git(ds.path, annex=None) assert_in('INFO.txt', ds.repo.get_indexed_files())
def test_relpath_add(path): ds = Dataset(path).create(force=True) with chpwd(opj(path, 'dir')): eq_(add('testindir')[0]['path'], opj(ds.path, 'dir', 'testindir')) # and now add all add('..') # auto-save enabled assert_false(ds.repo.dirty)
def test_bf2043p2(path): ds = Dataset(path).create(force=True) ds.add('staged', save=False) ok_clean_git(ds.path, head_modified=['staged'], untracked=['untracked']) # plain save does not commit untracked content # this tests the second issue in #2043 with chpwd(path): save() ok_clean_git(ds.path, untracked=['untracked'])
def test_autoresolve_multiple_datasets(src, path): with chpwd(path): ds1 = install('ds1', source=src) ds2 = install('ds2', source=src) results = get([opj('ds1', 'test-annex.dat')] + glob(opj('ds2', '*.dat'))) # each ds has one file eq_(len(results), 2) ok_(ds1.repo.file_has_content('test-annex.dat') is True) ok_(ds2.repo.file_has_content('test-annex.dat') is True)
def test_install_dataset_from_just_source(url, path): with chpwd(path, mkdir=True): ds = install(source=url) ok_startswith(ds.path, path) ok_(ds.is_installed()) ok_(GitRepo.is_valid_repo(ds.path)) ok_clean_git(ds.path, annex=None) assert_in('INFO.txt', ds.repo.get_indexed_files())
def test_safetynet(path): ds = Dataset(path).create() os.makedirs(opj(ds.path, 'deep', 'down')) for p in (ds.path, opj(ds.path, 'deep'), opj(ds.path, 'deep', 'down')): with chpwd(p): # will never remove PWD, or anything outside the dataset for target in (ds.path, os.curdir, os.pardir, opj(os.pardir, os.pardir)): assert_status( ('error', 'impossible'), uninstall(path=target, on_failure='ignore'))
def get_baseline(p): ds = Dataset(p).create() with chpwd(ds.path): subds = create('sub') ds.add('sub', save=False) create_tree(subds.path, {"staged": ""}) subds.add("staged", save=False) # subdataset has staged changes. ok_clean_git(ds.path, index_modified=['sub']) return ds
def test_placeholders(path): ds = Dataset(path).create(force=True) ds.add(".") ds.run("echo {inputs} >{outputs}", inputs=[".", "*.in"], outputs=["c.out"]) ok_file_has_content(opj(path, "c.out"), "a.in b.in\n") hexsha_before = ds.repo.get_hexsha() ds.rerun() eq_(hexsha_before, ds.repo.get_hexsha()) ds.run("echo {inputs[0]} >getitem", inputs=["*.in"]) ok_file_has_content(opj(path, "getitem"), "a.in\n") ds.run("echo {pwd} >expanded-pwd") ok_file_has_content(opj(path, "expanded-pwd"), path, strip=True) ds.run("echo {dspath} >expanded-dspath") ok_file_has_content(opj(path, "expanded-dspath"), ds.path, strip=True) subdir_path = opj(path, "subdir") with chpwd(subdir_path): run("echo {pwd} >expanded-pwd") ok_file_has_content(opj(path, "subdir", "expanded-pwd"), subdir_path, strip=True) eq_(get_run_info(ds, ds.repo.format_commit("%B"))[1]["pwd"], "subdir") # Double brackets can be used to escape placeholders. ds.run("touch {{inputs}}", inputs=["*.in"]) ok_exists(opj(path, "{inputs}")) # rerun --script expands the placeholders. with patch("sys.stdout", new_callable=StringIO) as cmout: ds.rerun(script="-", since="") script_out = cmout.getvalue() assert_in("echo a.in b.in >c.out", script_out) assert_in("echo {} >expanded-pwd".format(subdir_path), script_out) assert_in("echo {} >expanded-dspath".format(ds.path), script_out) assert_result_count( ds.run("{unknown_placeholder}", on_failure="ignore"), 1, status="impossible", action="run") # Configured placeholders. ds.config.add("datalad.run.substitutions.license", "gpl3", where="local") ds.run("echo {license} >configured-license") ok_file_has_content(opj(path, "configured-license"), "gpl3", strip=True) # --script handles configured placeholders. with patch("sys.stdout", new_callable=StringIO) as cmout: ds.rerun(script="-") assert_in("gpl3", cmout.getvalue())
def __call__(path=None, dry_run=False, is_pipeline=False, is_template=False, chdir=None): from datalad.crawler.pipeline import ( load_pipeline_from_config, load_pipeline_from_module, get_repo_pipeline_config_path, get_repo_pipeline_script_path ) from datalad.crawler.pipeline import run_pipeline from datalad.utils import chpwd # import late so we could mock during tests with chpwd(chdir): assert not (is_pipeline and is_template), "it is either a pipeline or a template name, can't be both" if is_template: # generate a config and overload path with its filename path = initiate_pipeline_config(template=path, # kwargs=TODO, commit=True) # TODO: centralize via _params_ handling if dry_run: if not 'crawl' in cfg.sections(): cfg.add_section('crawl') cfg.set('crawl', 'dryrun', "True") if path is None: # get config from the current repository/handle if is_pipeline: raise ValueError("You must specify the file if --pipeline") # Let's see if there is a config or pipeline in this repo path = get_repo_pipeline_config_path() if not path or not exists(path): # Check if there may be the pipeline provided path = get_repo_pipeline_script_path() if path and exists(path): is_pipeline = True if not path: raise RuntimeError("Cannot locate crawler config or pipeline file") if is_pipeline: lgr.info("Loading pipeline definition from %s" % path) pipeline = load_pipeline_from_module(path) else: lgr.info("Loading pipeline specification from %s" % path) pipeline = load_pipeline_from_config(path) lgr.info("Running pipeline %s" % str(pipeline)) # TODO: capture the state of all branches so in case of crash # we could gracefully reset back try: run_pipeline(pipeline) except Exception as exc: # TODO: config.crawl.failure = full-reset | last-good-master # probably ask via ui which action should be performed unless # explicitly specified raise
def test_install_dataset_from_just_source_via_path(url, path): # for remote urls only, the source could be given to `path` # to allows for simplistic cmdline calls with chpwd(path, mkdir=True): ds = install(path=url) ok_startswith(ds.path, path) ok_(ds.is_installed()) ok_clean_git(ds.path, annex=False) assert_true(os.path.lexists(opj(ds.path, 'test-annex.dat')))
def test_dlabspath(path): if not has_symlink_capability(): raise SkipTest # initially ran into on OSX https://github.com/datalad/datalad/issues/2406 opath = opj(path, "origin") os.makedirs(opath) lpath = opj(path, "linked") os.symlink('origin', lpath) for d in opath, lpath: # regardless under which directory, all results should not resolve # anything eq_(d, dlabspath(d)) # in the root of ds with chpwd(d): eq_(dlabspath("bu"), opj(d, "bu")) eq_(dlabspath("./bu"), opj(d, "./bu")) # we do not normpath by default eq_(dlabspath("./bu", norm=True), opj(d, "bu"))
def test_add_insufficient_args(path): # no argument: assert_raises(InsufficientArgumentsError, add) # no `path`, no `source`: assert_raises(InsufficientArgumentsError, add, dataset=path) with chpwd(path): res = add(path="some", on_failure='ignore') assert_status('impossible', res) ds = Dataset(opj(path, 'ds')) ds.create() # non-existing path outside assert_status('impossible', ds.add(opj(path, 'outside'), on_failure='ignore')) # existing path outside with open(opj(path, 'outside'), 'w') as f: f.write('doesnt matter') assert_status('impossible', ds.add(opj(path, 'outside'), on_failure='ignore'))
def test_install_datasets_root(tdir): with chpwd(tdir): ds = install("///") ok_(ds.is_installed()) eq_(ds.path, opj(tdir, 'datasets.datalad.org')) # do it a second time: result = install("///", result_xfm=None, return_type='list') assert_status('notneeded', result) eq_(YieldDatasets()(result[0]), ds) # and a third time into an existing something, that is not a dataset: with open(opj(tdir, 'sub', 'a_file.txt'), 'w') as f: f.write("something") with assert_raises(IncompleteResultsError) as cme: install("sub", source='///') assert_in("already exists and not empty", str(cme))
def test_clone_isnt_a_smartass(origin_path, path): origin = create(origin_path) cloned = clone(origin, path, result_xfm='datasets', return_type='item-or-list') with chpwd(path): # no were are inside a dataset clone, and we make another one # we do not want automatic subdatasetification without given a dataset # explicitely clonedsub = clone(origin, 'testsub', result_xfm='datasets', return_type='item-or-list') # correct destination assert clonedsub.path.startswith(path) # no subdataset relation eq_(cloned.subdatasets(), [])
def test_update_known_submodule(path): def get_baseline(p): ds = Dataset(p).create() sub = ds.create('sub', save=False) # subdataset saw another commit after becoming a submodule ok_clean_git(ds.path, index_modified=['sub']) return ds # attempt one ds = get_baseline(opj(path, 'wo_ref')) with chpwd(ds.path): add('.', recursive=True) ok_clean_git(ds.path) # attempt two, same as above but call add via reference dataset ds = get_baseline(opj(path, 'w_ref')) ds.add('.', recursive=True) ok_clean_git(ds.path)
def test_since_empty_and_unsupported(p1, p2): source = Dataset(p1).create() source.create_sibling(p2, name='target1') # see https://github.com/datalad/datalad/pull/4448#issuecomment-620847327 # Test that it doesn't fail without a prior push source.publish(to='target1', since='') with chpwd(p1): # since we have only two commits (set backend, init dataset) # -- there is no HEAD^^ assert_result_count(publish(to='target1', since='HEAD^^', on_failure='ignore'), 1, status='impossible', message="fatal: bad revision 'HEAD^^..HEAD'") # but now let's add one more commit, we should be able to pusblish source.repo.commit("msg", options=['--allow-empty']) publish(to='target1', since='HEAD^') # must not fail now
def test_clone_report_permission_issue(tdir): pdir = _path_(tdir, 'protected') mkdir(pdir) # make it read-only chmod(pdir, 0o555) with chpwd(pdir): res = clone('///', result_xfm=None, return_type='list', on_failure='ignore') assert_status('error', res) assert_result_count( res, 1, status='error', message= "could not create work tree dir '%s/datasets.datalad.org': Permission denied" % pdir)
def test_GitRepo_flyweight(path1, path2): repo1 = GitRepo(path1, create=True) assert_is_instance(repo1, GitRepo) # instantiate again: repo2 = GitRepo(path1, create=False) assert_is_instance(repo2, GitRepo) # the very same object: ok_(repo1 is repo2) # reference the same in a different way: with chpwd(path1): repo3 = GitRepo(op.relpath(path1, start=path2), create=False) # it's the same object: ok_(repo1 is repo3) # and realpath attribute is the same, so they are still equal: ok_(repo1 == repo3)
def test_smth_about_not_supported(p1, p2): source = Dataset(p1).create() from datalad.support.network import PathRI source.create_sibling('ssh://localhost' + PathRI(p2).posixpath, name='target1') # source.publish(to='target1') with chpwd(p1): # since we have only two commits (set backend, init dataset) # -- there is no HEAD^^ assert_result_count(publish(to='target1', since='HEAD^^', on_failure='ignore'), 1, status='impossible', message="fatal: bad revision 'HEAD^^'") # but now let's add one more commit, we should be able to pusblish source.repo.commit("msg", options=['--allow-empty']) publish(to='target1', since='HEAD^') # must not fail now
def _prep_worktree(ds_path, pwd, globbed, assume_ready=None, remove_outputs=False, rerun_outputs=None, jobs=None): """ Yields ------ dict Result records """ # ATTN: For correct path handling, all dataset commands call should be # unbound. They should (1) receive a string dataset argument, (2) receive # relative paths, and (3) happen within a chpwd(pwd) context. with chpwd(pwd): for res in prepare_inputs( ds_path, [] if assume_ready in ["inputs", "both"] else globbed['inputs'], # Ignore --assume-ready for extra_inputs. It's an unexposed # implementation detail that lets wrappers sneak in inputs. extra_inputs=globbed['extra_inputs'], jobs=jobs): yield res if assume_ready not in ["outputs", "both"]: if globbed['outputs']: for res in _install_and_reglob(ds_path, globbed['outputs']): yield res for res in _unlock_or_remove( ds_path, globbed['outputs'].expand_strict() if not remove_outputs # when force-removing, exclude declared inputs else set( globbed['outputs'].expand_strict()).difference( globbed['inputs'].expand_strict()), remove=remove_outputs): yield res if rerun_outputs is not None: for res in _unlock_or_remove(ds_path, rerun_outputs): yield res
def test_add_delete_after_and_drop_subdir(self): os.mkdir(opj(self.annex.path, 'subdir')) mv_out = self.annex.call_git( ['mv', '1.tar', 'subdir'] ) self.annex.commit("moved into subdir") with chpwd(self.annex.path): # was failing since deleting without considering if tarball # was extracted in that tarball directory commits_prior_master = list(self.annex.get_branch_commits_()) commits_prior = list(self.annex.get_branch_commits_('git-annex')) add_out = add_archive_content( opj('subdir', '1.tar'), delete_after=True, drop_after=True) assert_repo_status(self.annex.path) commits_after_master = list(self.annex.get_branch_commits_()) commits_after = list(self.annex.get_branch_commits_('git-annex')) # There should be a single commit for all additions +1 to # initiate datalad-archives gh-1258. If faking dates, # there should be another +1 because annex.alwayscommit # isn't set to false. assert_equal(len(commits_after), len(commits_prior) + 2 + self.annex.fake_dates_enabled) assert_equal(len(commits_after_master), len(commits_prior_master)) assert(add_out is self.annex) # there should be no .datalad temporary files hanging around self.assert_no_trash_left_behind() # and if we add some untracked file, redo, there should be no changes # to master and file should remain not committed create_tree(self.annex.path, {'dummy.txt': '123'}) assert_true(self.annex.dirty) # untracked file add_out = add_archive_content( opj('subdir', '1.tar'), delete_after=True, drop_after=True, allow_dirty=True) assert_repo_status(self.annex.path, untracked=['dummy.txt']) assert_equal(len(list(self.annex.get_branch_commits_())), len(commits_prior_master)) # there should be no .datalad temporary files hanging around self.assert_no_trash_left_behind()
def test_check_dates(path=None): skip_if_no_module("dateutil") ref_ts = 1218182889 # Fri, 08 Aug 2008 04:08:09 -0400 refdate = "@{}".format(ref_ts) repo = os.path.join(path, "repo") with set_date(ref_ts + 5000): ar = AnnexRepo(repo) ar.add(".") ar.commit() # The standard renderer outputs json. with swallow_outputs() as cmo: # Set level to WARNING to avoid the progress bar when # DATALAD_TESTS_UI_BACKEND=console. with swallow_logs(new_level=logging.WARNING): check_dates([repo], reference_date=refdate, return_type="list") assert_in("report", json.loads(cmo.out).keys()) # We find the newer objects. newer = call([path], reference_date=refdate) eq_(len(newer), 1) ok_(newer[0]["report"]["objects"]) # There are no older objects to find. older = call([repo], reference_date=refdate, older=True) assert_false(older[0]["report"]["objects"]) # We can pass the date in RFC 2822 format. assert_dict_equal( newer[0], call([path], reference_date="08 Aug 2008 04:08:09 -0400")[0]) # paths=None defaults to the current directory. with chpwd(path): assert_dict_equal( newer[0]["report"], call(paths=None, reference_date=refdate)[0]["report"]) # Only commit type is present when annex='none'. newer_noannex = call([path], reference_date=refdate, annex="none") for entry in newer_noannex[0]["report"]["objects"].values(): ok_(entry["type"] == "commit")
def test_rev_resolve_path(path): if op.realpath(path) != path: raise SkipTest("Test assumptions require non-symlinked parent paths") # initially ran into on OSX https://github.com/datalad/datalad/issues/2406 opath = op.join(path, "origin") os.makedirs(opath) if not on_windows: lpath = op.join(path, "linked") os.symlink('origin', lpath) ds_global = Dataset(path) # path resolution of absolute paths is not influenced by symlinks # ignore the linked path on windows, it is not a symlink in the POSIX sense for d in (opath, ) if on_windows else (opath, lpath): ds_local = Dataset(d) # no symlink resolution eq_(str(rev_resolve_path(d)), d) with chpwd(d): # be aware: knows about cwd, but this CWD has symlinks resolved eq_(str(rev_resolve_path(d).cwd()), opath) # using pathlib's `resolve()` will resolve any # symlinks # also resolve `opath`, as on old windows systems the path might # come in crippled (e.g. C:\Users\MIKE~1/...) # and comparison would fails unjustified eq_(rev_resolve_path('.').resolve(), ut.Path(opath).resolve()) # no norming, but absolute paths, without resolving links eq_(rev_resolve_path('.'), ut.Path(d)) eq_(str(rev_resolve_path('.')), d) eq_(str(rev_resolve_path(op.join(os.curdir, 'bu'), ds=ds_global)), op.join(d, 'bu')) eq_(str(rev_resolve_path(op.join(os.pardir, 'bu'), ds=ds_global)), op.join(ds_global.path, 'bu')) # resolve against a dataset eq_(str(rev_resolve_path('bu', ds=ds_local)), op.join(d, 'bu')) eq_(str(rev_resolve_path('bu', ds=ds_global)), op.join(path, 'bu')) # but paths outside the dataset are left untouched eq_(str(rev_resolve_path(op.join(os.curdir, 'bu'), ds=ds_global)), op.join(getpwd(), 'bu')) eq_(str(rev_resolve_path(op.join(os.pardir, 'bu'), ds=ds_global)), op.normpath(op.join(getpwd(), os.pardir, 'bu')))
def test_run_cmdline_disambiguation(path): Dataset(path).create() with chpwd(path): # Without a positional argument starting a command, any option is # treated as an option to 'datalad run'. with swallow_outputs() as cmo: with patch("datalad.interface.run._execute_command") as exec_cmd: with assert_raises(SystemExit): main(["datalad", "run", "--message"]) exec_cmd.assert_not_called() assert_in("message: expected one", cmo.err) # If we want to pass an option as the first value of a command (e.g., # because we are using a runscript with containers-run), we can do this # with "--". with patch("datalad.interface.run._execute_command") as exec_cmd: with assert_raises(SystemExit): main(["datalad", "run", "--", "--message"]) exec_cmd.assert_called_once_with("--message", path, expected_exit=None) # And a twist on above: Our parser mishandles --version (gh-3067), # treating 'datalad run CMD --version' as 'datalad --version'. version_stream = "err" if PY2 else "out" with swallow_outputs() as cmo: with assert_raises(SystemExit) as cm: main(["datalad", "run", "echo", "--version"]) eq_(cm.exception.code, 0) out = getattr(cmo, version_stream) with swallow_outputs() as cmo: with assert_raises(SystemExit): main(["datalad", "--version"]) version_out = getattr(cmo, version_stream) ok_(version_out) eq_(version_out, out) # We can work around that (i.e., make "--version" get passed as # command) with "--". with patch("datalad.interface.run._execute_command") as exec_cmd: with assert_raises(SystemExit): main(["datalad", "run", "--", "echo", "--version"]) exec_cmd.assert_called_once_with("echo --version", path, expected_exit=None)
def test_bf1886(path): parent = Dataset(path).create() sub = parent.create('sub') ok_clean_git(parent.path) # create a symlink pointing down to the subdataset, and add it os.symlink('sub', opj(parent.path, 'down')) parent.add('down') ok_clean_git(parent.path) # now symlink pointing up os.makedirs(opj(parent.path, 'subdir', 'subsubdir')) os.symlink(opj(pardir, 'sub'), opj(parent.path, 'subdir', 'up')) parent.add(opj('subdir', 'up')) ok_clean_git(parent.path) # now symlink pointing 2xup, as in #1886 os.symlink(opj(pardir, pardir, 'sub'), opj(parent.path, 'subdir', 'subsubdir', 'upup')) parent.add(opj('subdir', 'subsubdir', 'upup')) ok_clean_git(parent.path) # simulatenously add a subds and a symlink pointing to it # create subds, but don't register it sub2 = create(opj(parent.path, 'sub2')) os.symlink(opj(pardir, pardir, 'sub2'), opj(parent.path, 'subdir', 'subsubdir', 'upup2')) parent.add(['sub2', opj('subdir', 'subsubdir', 'upup2')]) ok_clean_git(parent.path) # full replication of #1886: the above but be in subdir of symlink # with no reference dataset sub3 = create(opj(parent.path, 'sub3')) os.symlink(opj(pardir, pardir, 'sub3'), opj(parent.path, 'subdir', 'subsubdir', 'upup3')) # need to use absolute paths with chpwd(opj(parent.path, 'subdir', 'subsubdir')): add([ opj(parent.path, 'sub3'), opj(parent.path, 'subdir', 'subsubdir', 'upup3') ]) # here is where we need to disagree with the repo in #1886 # we would not expect that `add` registers sub3 as a subdataset # of parent, because no reference dataset was given and the # command cannot decide (with the current semantics) whether # it should "add anything in sub3 to sub3" or "add sub3 to whatever # sub3 is in" ok_clean_git(parent.path, untracked=['sub3/'])
def test_search_outside1(tdir, newhome): with chpwd(tdir): # should fail since directory exists, but not a dataset # should not even waste our response ;) always_render = cfg.obtain('datalad.api.alwaysrender') with patch.object(search_mod, 'LOCAL_CENTRAL_PATH', newhome): if always_render: # we do try to render results which actually causes exception # to come right away assert_raises(NoDatasetArgumentFound, search, "bu") else: gen = search("bu") assert_is_generator(gen) assert_raises(NoDatasetArgumentFound, next, gen) # and if we point to some non-existing dataset -- the same in both cases # but might come before even next if always_render with assert_raises(ValueError): next(search("bu", dataset=newhome))
def test_placeholders(path): ds = Dataset(path).create(force=True) ds.add(".") ds.run("echo {inputs} >{outputs}", inputs=[".", "*.in"], outputs=["c.out"]) ok_file_has_content(opj(path, "c.out"), "a.in b.in\n") hexsha_before = ds.repo.get_hexsha() ds.rerun() eq_(hexsha_before, ds.repo.get_hexsha()) ds.run("echo {inputs[0]} >getitem", inputs=["*.in"]) ok_file_has_content(opj(path, "getitem"), "a.in\n") ds.run("echo {pwd} >expanded-pwd") ok_file_has_content(opj(path, "expanded-pwd"), path, strip=True) ds.run("echo {dspath} >expanded-dspath") ok_file_has_content(opj(path, "expanded-dspath"), ds.path, strip=True) subdir_path = opj(path, "subdir") with chpwd(subdir_path): run("echo {pwd} >expanded-pwd") ok_file_has_content(opj(path, "subdir", "expanded-pwd"), subdir_path, strip=True) eq_(get_run_info(ds, ds.repo.repo.head.commit.message)[1]["pwd"], "subdir") # Double brackets can be used to escape placeholders. ds.run("touch {{inputs}}", inputs=["*.in"]) ok_exists(opj(path, "{inputs}")) # rerun --script expands the placeholders. with patch("sys.stdout", new_callable=StringIO) as cmout: ds.rerun(script="-", since="") script_out = cmout.getvalue() assert_in("echo a.in b.in >c.out", script_out) assert_in("echo {} >expanded-pwd".format(subdir_path), script_out) assert_in("echo {} >expanded-dspath".format(ds.path), script_out)
def test_bf1886(path): parent = Dataset(path).rev_create() sub = parent.rev_create('sub') ok_clean_git(parent.path) # create a symlink pointing down to the subdataset, and add it os.symlink('sub', opj(parent.path, 'down')) parent.add('down') ok_clean_git(parent.path) # now symlink pointing up os.makedirs(opj(parent.path, 'subdir', 'subsubdir')) os.symlink(opj(pardir, 'sub'), opj(parent.path, 'subdir', 'up')) parent.add(opj('subdir', 'up')) ok_clean_git(parent.path) # now symlink pointing 2xup, as in #1886 os.symlink(opj(pardir, pardir, 'sub'), opj(parent.path, 'subdir', 'subsubdir', 'upup')) parent.add(opj('subdir', 'subsubdir', 'upup')) ok_clean_git(parent.path) # simulatenously add a subds and a symlink pointing to it # create subds, but don't register it sub2 = rev_create(opj(parent.path, 'sub2')) os.symlink(opj(pardir, pardir, 'sub2'), opj(parent.path, 'subdir', 'subsubdir', 'upup2')) parent.add(['sub2', opj('subdir', 'subsubdir', 'upup2')]) ok_clean_git(parent.path) # full replication of #1886: the above but be in subdir of symlink # with no reference dataset sub3 = rev_create(opj(parent.path, 'sub3')) os.symlink(opj(pardir, pardir, 'sub3'), opj(parent.path, 'subdir', 'subsubdir', 'upup3')) # need to use absolute paths with chpwd(opj(parent.path, 'subdir', 'subsubdir')): rev_save([ opj(parent.path, 'sub3'), opj(parent.path, 'subdir', 'subsubdir', 'upup3') ]) # in contrast to `add` only operates on a single top-level dataset # although it is not specified, it get's discovered based on the PWD # the logic behind that feels a bit shaky # consult discussion in https://github.com/datalad/datalad/issues/3230 # if this comes up as an issue at some point ok_clean_git(parent.path)
def test_tarball(path): ds = Dataset(opj(path, 'ds')).create(force=True) ds.save(all_changes=True) committed_date = ds.repo.get_committed_date() with chpwd(path): _mod, tarball1 = ds.export('tarball') assert (not isabs(tarball1)) tarball1 = opj(path, tarball1) default_outname = opj(path, 'datalad_{}.tar.gz'.format(ds.id)) assert_equal(tarball1, default_outname) assert_true(os.path.exists(default_outname)) custom_outname = opj(path, 'myexport.tar.gz') # feed in without extension ds.export('tarball', output=custom_outname[:-7]) assert_true(os.path.exists(custom_outname)) custom1_md5 = md5sum(custom_outname) # encodes the original tarball filename -> different checksum, despit # same content assert_not_equal(md5sum(default_outname), custom1_md5) # should really sleep so if they stop using time.time - we know time.sleep(1.1) ds.export('tarball', output=custom_outname) # should not encode mtime, so should be identical assert_equal(md5sum(custom_outname), custom1_md5) def check_contents(outname, prefix): with tarfile.open(outname) as tf: nfiles = 0 for ti in tf: # any annex links resolved assert_false(ti.issym()) ok_startswith(ti.name, prefix + '/') assert_equal(ti.mtime, committed_date) if '.datalad' not in ti.name: # ignore any files in .datalad for this test to not be # susceptible to changes in how much we generate a meta info nfiles += 1 # we have exactly three files, and expect no content for any directory assert_equal(nfiles, 3) check_contents(default_outname, 'datalad_%s' % ds.id) check_contents(custom_outname, 'myexport')
def test_uninstall_subdataset(src, dst): ds = install(dst, source=src, recursive=True) ok_(ds.is_installed()) known_subdss = ds.subdatasets(result_xfm='datasets') for subds in ds.subdatasets(result_xfm='datasets'): ok_(subds.is_installed()) repo = subds.repo annexed_files = repo.get_content_annexinfo(init=None) repo.get([str(f) for f in annexed_files]) # drop data of subds: res = ds.drop(path=subds.path, result_xfm='paths') ok_(all(str(f) in res for f in annexed_files)) ainfo = repo.get_content_annexinfo(paths=annexed_files, eval_availability=True) ok_(all(not st["has_content"] for st in ainfo.values())) # subdataset is still known assert_in(subds.path, ds.subdatasets(result_xfm='paths')) eq_(ds.subdatasets(result_xfm='datasets'), known_subdss) for subds in ds.subdatasets(result_xfm='datasets'): # uninstall subds itself: # simulate a cmdline invocation pointing to the subdataset # with a relative path from outside the superdataset to catch # https://github.com/datalad/datalad/issues/4001 pwd = Path(dst).parent with chpwd(str(pwd)): res = uninstall( dataset=ds.path, path=str(subds.pathobj.relative_to(pwd)), result_xfm='datasets', ) eq_(res[0], subds) ok_(not subds.is_installed()) # just a deinit must not remove the subdataset registration eq_(ds.subdatasets(result_xfm='datasets'), known_subdss) # mountpoint of subdataset should still be there ok_(exists(subds.path))
def test_require_dataset(path): with chpwd(path): assert_raises( InsufficientArgumentsError, require_dataset, None) create('.') # in this folder by default assert_equal( require_dataset(None).path, path) assert_equal( require_dataset('some', check_installed=False).path, abspath('some')) assert_raises( ValueError, require_dataset, 'some', check_installed=True)
def test_get_dataset_root(path): eq_(get_dataset_root('/nonexistent'), None) with chpwd(path): repo = AnnexRepo(os.curdir, create=True) subdir = opj('some', 'deep') fname = opj(subdir, 'dummy') os.makedirs(subdir) with open(fname, 'w') as f: f.write('some') repo.add(fname) # we can find this repo eq_(get_dataset_root(os.curdir), os.curdir) # and we get the type of path that we fed in eq_(get_dataset_root(abspath(os.curdir)), abspath(os.curdir)) # subdirs are no issue eq_(get_dataset_root(subdir), os.curdir) # even more subdirs are no issue eq_(get_dataset_root(opj(subdir, subdir)), os.curdir) # non-dir paths are no issue eq_(get_dataset_root(fname), os.curdir)
def test_resolve_path_symlink_edition(path): deepest = ut.Path(path) / 'one' / 'two' / 'three' deepest_str = str(deepest) os.makedirs(deepest_str) with chpwd(deepest_str): # direct absolute eq_(deepest, resolve_path(deepest)) eq_(deepest, resolve_path(deepest_str)) # explicit direct relative eq_(deepest, resolve_path('.')) eq_(deepest, resolve_path(op.join('.', '.'))) eq_(deepest, resolve_path(op.join('..', 'three'))) eq_(deepest, resolve_path(op.join('..', '..', 'two', 'three'))) eq_(deepest, resolve_path(op.join('..', '..', '..', 'one', 'two', 'three'))) # weird ones eq_(deepest, resolve_path(op.join('..', '.', 'three'))) eq_(deepest, resolve_path(op.join('..', 'three', '.'))) eq_(deepest, resolve_path(op.join('..', 'three', '.'))) eq_(deepest, resolve_path(op.join('.', '..', 'three')))
def check_create_path_semantics( cwd, create_ds, path_arg, base_path, other_path): ds = Dataset(base_path).create() os.makedirs(op.join(ds.path, 'some')) target_path = ds.pathobj / "some" / "what" / "deeper" with chpwd( other_path if cwd == 'elsewhere' else base_path if cwd == 'parentds' else str(ds.pathobj / 'some') if cwd == 'subdir' else str(Path.cwd())): subds = create( dataset=ds.path if create_ds == 'abspath' else str(ds.pathobj.relative_to(cwd)) if create_ds == 'relpath' else ds if create_ds == 'instance' else create_ds, path=str(target_path) if path_arg == 'abspath' else str(target_path.relative_to(ds.pathobj)) if path_arg == 'relpath' else op.join('what', 'deeper') if path_arg == 'subdir_relpath' else path_arg) eq_(subds.pathobj, target_path)
def test_clone_datasets_root(tdir): with chpwd(tdir): ds = clone("///", result_xfm='datasets', return_type='item-or-list') ok_(ds.is_installed()) eq_(ds.path, opj(tdir, get_datasets_topdir())) # do it a second time: res = clone("///", on_failure='ignore') assert_message("dataset %s was already cloned from '%s'", res) assert_status('notneeded', res) # and a third time into an existing something, that is not a dataset: with open(opj(tdir, 'sub', 'a_file.txt'), 'w') as f: f.write("something") res = clone('///', path="sub", on_failure='ignore') assert_message( 'target path already exists and not empty, refuse to clone into target path', res) assert_status('error', res)
def test_cfg_originorigin(path): path = Path(path) origin = Dataset(path / 'origin').create() (origin.pathobj / 'file1.txt').write_text('content') origin.save() clone_lev1 = clone(origin, path / 'clone_lev1') clone_lev2 = clone(clone_lev1, path / 'clone_lev2') # the goal is to be able to get file content from origin without # the need to configure it manually assert_result_count( clone_lev2.get('file1.txt', on_failure='ignore'), 1, action='get', status='ok', path=str(clone_lev2.pathobj / 'file1.txt'), ) eq_((clone_lev2.pathobj / 'file1.txt').read_text(), 'content') eq_( Path(clone_lev2.siblings( 'query', name='origin-2', return_type='item-or-list')['url']), origin.pathobj ) # Clone another level, this time with a relative path. Drop content from # lev2 so that origin is the only place that the file is available from. clone_lev2.drop("file1.txt") with chpwd(path), swallow_logs(new_level=logging.DEBUG) as cml: clone_lev3 = clone('clone_lev2', 'clone_lev3') # we called git-annex-init; see gh-4367: cml.assert_logged(msg=r"[^[]*Async run \[('git', 'annex'|'git-annex'), " r"'init'", match=False, level='DEBUG') assert_result_count( clone_lev3.get('file1.txt', on_failure='ignore'), 1, action='get', status='ok', path=str(clone_lev3.pathobj / 'file1.txt'))
def test_openfmri_addperms(ind, topurl, outd, clonedir): index_html = opj(ind, 'ds666', 'index.html') list(initiate_dataset( template="openfmri", dataset_name='dataladtest-ds666', path=outd, data_fields=['dataset'])({'dataset': 'ds666'})) ok_clean_git(outd) with chpwd(outd): pipeline = ofpipeline( 'ds666', versioned_urls=False, topurl=topurl, s3_prefix=False # so we do not invoke s3 subpipeline ) ok_clean_git(outd) out = run_pipeline(pipeline) eq_(len(out), 1) ok_clean_git(outd) ok_file_under_git(outd, 'dataset_description.json', annexed=False)
def test_gh1597(path): ds = Dataset(path).create() with chpwd(ds.path): sub = create('sub') ds.add('sub', save=False) # only staged at this point, but known, and not annexed ok_file_under_git(ds.path, '.gitmodules', annexed=False) res = ds.subdatasets() assert_result_count(res, 1, path=sub.path) # now modify .gitmodules with another command ds.subdatasets(contains=sub.path, set_property=[('this', 'that')]) assert_repo_status(ds.path, added=[sub.path]) # now modify low-level with open(opj(ds.path, '.gitmodules'), 'a') as f: f.write('\n') assert_repo_status(ds.path, modified=[ds.pathobj / ".gitmodules"], added=[sub.path]) ds.add('.gitmodules') # must not come under annex mangement ok_file_under_git(ds.path, '.gitmodules', annexed=False)
def test_install_datasets_root(tdir): with chpwd(tdir): ds = install("///") ok_(ds.is_installed()) eq_(ds.path, opj(tdir, 'datasets.datalad.org')) # do it a second time: with swallow_logs(new_level=logging.INFO) as cml: result = install("///") assert_in("was already installed from", cml.out) eq_(result, ds) # and a third time into an existing something, that is not a dataset: with open(opj(tdir, 'sub', 'a_file.txt'), 'w') as f: f.write("something") with swallow_logs(new_level=logging.WARNING) as cml: result = install("sub", source='///') assert_in("already exists and is not an installed dataset", cml.out) ok_(result is None)
def test_get_in_unavailable_subdataset(src, path): _make_dataset_hierarchy(src) root = install( path, source=src, result_xfm='datasets', return_type='item-or-list') targetpath = opj('sub1', 'sub2') targetabspath = opj(root.path, targetpath) with chpwd(path): res = get(targetabspath) assert_result_count(res, 2, status='ok', action='install', type='dataset') # dry-fit result filter that only returns the result that matched the requested # path filtered = [r for r in res if only_matching_paths(r, path=targetabspath)] assert_result_count( filtered, 1, status='ok', action='install', type='dataset', path=targetabspath) # we got the dataset, and its immediate content, but nothing below sub2 = Dataset(targetabspath) ok_(sub2.is_installed()) ok_(sub2.repo.file_has_content('file_in_annex.txt') is True) ok_(not Dataset(opj(targetabspath, 'sub3')).is_installed())