def test_symlinked_relpath(path=None): # initially ran into on OSX https://github.com/datalad/datalad/issues/2406 os.makedirs(op.join(path, "origin")) dspath = op.join(path, "linked") os.symlink('origin', dspath) ds = Dataset(dspath).create() create_tree( dspath, { "mike1": 'mike1', # will be added from topdir "later": "later", # later from within subdir "d": { "mike2": 'mike2', # to be added within subdir } }) # in the root of ds with chpwd(dspath): ds.repo.add("mike1", git=True) ds.save(message="committing", path="./mike1") # Let's also do in subdirectory as CWD, check that relative path # given to a plain command (not dataset method) are treated as # relative to CWD with chpwd(op.join(dspath, 'd')): save(dataset=ds.path, message="committing", path="mike2") later = op.join(op.pardir, "later") ds.repo.add(later, git=True) save(dataset=ds.path, message="committing", path=later) assert_repo_status(dspath)
def test_subsuperdataset_save(path=None): # Verify that when invoked without recursion save does not # cause querying of subdatasets of the subdataset # see https://github.com/datalad/datalad/issues/4523 parent = Dataset(path).create() # Create 3 levels of subdatasets so later to check operation # with or without --dataset being specified sub1 = parent.create('sub1') sub2 = parent.create(sub1.pathobj / 'sub2') sub3 = parent.create(sub2.pathobj / 'sub3') assert_repo_status(path) # now we will lobotomize that sub3 so git would fail if any query is performed. (sub3.pathobj / '.git' / 'config').chmod(0o000) try: sub3.repo.call_git(['ls-files'], read_only=True) raise SkipTest except CommandError: # desired outcome pass # the call should proceed fine since neither should care about sub3 # default is no recursion parent.save('sub1') sub1.save('sub2') assert_raises(CommandError, parent.save, 'sub1', recursive=True) # and should not fail in the top level superdataset with chpwd(parent.path): save('sub1') # or in a subdataset above the problematic one with chpwd(sub1.path): save('sub2')
def test_install_dataset_from_just_source(src_repo=None, path=None): src_ds = Dataset(src_repo).create(result_renderer='disabled', force=True) src_ds.save(['INFO.txt', 'test.dat'], to_git=True) src_ds.save('test-annex.dat', to_git=False) # equivalent repo on github: src_url = "https://github.com/datalad/testrepo--basic--r1.git" sources = [ src_ds.path, get_local_file_url(src_ds.path, compatibility='git') ] if not dl_cfg.get('datalad.tests.nonetwork'): sources.append(src_url) for url in sources: with chpwd(path, mkdir=True): ds = install(source=url) ok_startswith(ds.path, path) ok_(ds.is_installed()) ok_(GitRepo.is_valid_repo(ds.path)) assert_repo_status(ds.path, annex=None) assert_in('INFO.txt', ds.repo.get_indexed_files()) # cleanup before next iteration rmtree(path)
def test_creatsubdatasets(topds_path=None, n=2): from datalad.api import create from datalad.distribution.dataset import Dataset ds = Dataset(topds_path).create() paths = [op.join(topds_path, "subds%d" % i) for i in range(n)] paths.extend( op.join(topds_path, "subds%d" % i, "subsub%d" % k) for i in range(n) for k in range(2)) # To allow for parallel execution without hitting the problem of # a lock in the super dataset, we create all subdatasets, and then # save them all within their superdataset create_ = partial( create, # cfg_proc="yoda", result_xfm=None, return_type='generator') # if we flip the paths so to go from the end, create without --force should fail # and we should get the exception (the first one encountered!) # Note: reraise_immediately is of "concern" only for producer. since we typically # rely on outside code to do the killing! assert_raises(IncompleteResultsError, list, ProducerConsumer(paths[::-1], create_, jobs=5)) # we are in a dirty state, let's just remove all those for a clean run rmtree(topds_path) # and this one followed by save should be good IFF we provide our dependency checker ds = Dataset(topds_path).create() list( ProducerConsumer(paths, create_, safe_to_consume=no_parentds_in_futures, jobs=5)) ds.save(paths) assert_repo_status(ds.repo)
def test_add_mimetypes(path=None): ds = Dataset(path).create(force=True) ds.repo.add('.gitattributes') ds.repo.commit('added attributes to git explicitly') # now test that those files will go into git/annex correspondingly # WINDOWS FAILURE NEXT __not_tested__ = ds.save(['file.txt', 'empty']) assert_repo_status(path, untracked=['file2.txt']) # But we should be able to force adding file to annex when desired ds.save('file2.txt', to_git=False) # check annex file status annexinfo = ds.repo.get_content_annexinfo() for path, in_annex in ( # Empty one considered to be application/octet-stream # i.e. non-text ('empty', True), ('file.txt', False), ('file2.txt', True)): # low-level API report -> repo path reference, no ds path p = ds.repo.pathobj / path assert_in(p, annexinfo) if in_annex: assert_in('key', annexinfo[p], p) else: assert_not_in('key', annexinfo[p], p)
def test_push_subds_no_recursion(src_path=None, dst_top=None, dst_sub=None, dst_subsub=None): # dataset with one submodule and one subsubmodule top = Dataset(src_path).create() sub = top.create('sub m') test_file = sub.pathobj / 'subdir' / 'test_file' test_file.parent.mkdir() test_file.write_text('some') subsub = sub.create(sub.pathobj / 'subdir' / 'subsub m') top.save(recursive=True) assert_repo_status(top.path) target_top = mk_push_target(top, 'target', dst_top, annex=True) target_sub = mk_push_target(sub, 'target', dst_sub, annex=True) target_subsub = mk_push_target(subsub, 'target', dst_subsub, annex=True) # now publish, but NO recursion, instead give the parent dir of # both a subdataset and a file in the middle subdataset res = top.push( to='target', # give relative to top dataset to elevate the difficulty a little path=str(test_file.relative_to(top.pathobj).parent)) assert_status('ok', res) assert_in_results(res, action='publish', type='dataset', path=top.path) assert_in_results(res, action='publish', type='dataset', path=sub.path) assert_in_results(res, action='copy', type='file', path=str(test_file)) # the lowest-level subdataset isn't touched assert_not_in_results(res, action='publish', type='dataset', path=subsub.path)
def test_merge_follow_parentds_subdataset_adjusted_warning(path=None): path = Path(path) ds_src = Dataset(path / "source").create() ds_src_subds = ds_src.create("subds") ds_clone = install(source=ds_src.path, path=path / "clone", recursive=True, result_xfm="datasets") ds_clone_subds = Dataset(ds_clone.pathobj / "subds") maybe_adjust_repo(ds_clone_subds.repo) # Note: Were we to save ds_clone here, we would get a merge conflict in the # top repo for the submodule (even if using 'git annex sync' rather than # 'git merge'). ds_src_subds.repo.call_git(["checkout", DEFAULT_BRANCH + "^0"]) (ds_src_subds.pathobj / "foo").write_text("foo content") ds_src.save(recursive=True) assert_repo_status(ds_src.path) assert_in_results(ds_clone.update(merge=True, recursive=True, follow="parentds", on_failure="ignore"), status="impossible", path=ds_clone_subds.path, action="update") eq_(ds_clone.repo.get_hexsha(), ds_src.repo.get_hexsha())
def test_sidecar(path=None): ds = Dataset(path).create() # Simple sidecar message checks. ds.run("cd .> dummy0", message="sidecar arg", sidecar=True) assert_not_in('"cmd":', ds.repo.format_commit("%B")) ds.config.set("datalad.run.record-sidecar", "false", scope="local") ds.run("cd .> dummy1", message="sidecar config") assert_in('"cmd":', last_commit_msg(ds.repo)) ds.config.set("datalad.run.record-sidecar", "true", scope="local") ds.run("cd .> dummy2", message="sidecar config") assert_not_in('"cmd":', last_commit_msg(ds.repo)) # Don't break when config.get() returns multiple values. Here it's two # values in .gitconfig, but a more realistic scenario is a value in # $repo/.git/config that overrides a setting in ~/.config/git/config. ds.config.add("datalad.run.record-sidecar", "false", scope="local") ds.run("cd .> dummy3", message="sidecar config") assert_in('"cmd":', last_commit_msg(ds.repo)) # make sure sidecar file is committed when explicitly specifying outputs ds.run("cd .> dummy4", outputs=["dummy4"], sidecar=True, explicit=True, message="sidecar + specified outputs") assert_not_in('"cmd":', last_commit_msg(ds.repo)) assert_repo_status(ds.path)
def test_run_remove_keeps_leading_directory(path=None): ds = Dataset(op.join(path, "ds")).create() repo = ds.repo (ds.pathobj / "d").mkdir() output = (ds.pathobj / "d" / "foo") output.write_text("foo") ds.save() output_rel = str(output.relative_to(ds.pathobj)) repo.drop(output_rel, options=["--force"]) assert_in_results(ds.run("cd .> {}".format(output_rel), outputs=[output_rel], result_renderer='disabled'), action="run.remove", status="ok") assert_repo_status(ds.path) # Remove still gets saved() if command doesn't generate the output (just as # it would if git-rm were used instead of unlink). repo.drop(output_rel, options=["--force"]) assert_in_results(ds.run("cd .> something-else", outputs=[output_rel], result_renderer='disabled'), action="run.remove", status="ok") assert_repo_status(ds.path)
def test_aggregate_removal(path=None): base = Dataset(opj(path, 'origin')).create(force=True) # force all metadata objects into the annex with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f: f.write( '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n' ) sub = base.create('sub', force=True) subsub = sub.create(opj('subsub'), force=True) base.save(recursive=True) base.aggregate_metadata(recursive=True, update_mode='all') assert_repo_status(base.path) res = base.metadata(get_aggregates=True) assert_result_count(res, 3) assert_result_count(res, 1, path=subsub.path) # check that we only have object files that are listed in agginfo eq_(_get_contained_objs(base), _get_referenced_objs(base)) # now delete the deepest subdataset to test cleanup of aggregated objects # in the top-level ds base.remove(opj('sub', 'subsub'), reckless='kill') # now aggregation has to detect that subsub is not simply missing, but gone # for good base.aggregate_metadata(recursive=True, update_mode='all') assert_repo_status(base.path) # internally consistent state eq_(_get_contained_objs(base), _get_referenced_objs(base)) # info on subsub was removed at all levels res = base.metadata(get_aggregates=True) assert_result_count(res, 0, path=subsub.path) assert_result_count(res, 2) res = sub.metadata(get_aggregates=True) assert_result_count(res, 0, path=subsub.path) assert_result_count(res, 1)
def test_ds_extraction(path=None): skip_if_no_module('libxmp') ds = Dataset(path).create() copy(testpath, path) ds.save() assert_repo_status(ds.path) res = extract_metadata( types=['xmp'], dataset=ds, # artificially disable extraction from any file in the dataset files=[]) assert_result_count( res, 1, type='dataset', status='ok', action='metadata', path=path, refds=ds.path) assert_in('xmp', res[0]['metadata']) # now the more useful case: getting everything for xmp from a dataset res = extract_metadata( types=['xmp'], dataset=ds) assert_result_count(res, 2) assert_result_count( res, 1, type='dataset', status='ok', action='metadata', path=path, refds=ds.path) assert_result_count( res, 1, type='file', status='ok', action='metadata', path=opj(path, 'xmp.pdf'), parentds=ds.path) for r in res: assert_in('xmp', r['metadata'])
def test_create_1test_dataset(): # and just a single dataset from datalad.api import create_test_dataset with swallow_outputs(): dss = create_test_dataset() eq_(len(dss), 1) assert_repo_status(dss[0], annex=False)
def test_audio(path=None): ds = Dataset(path).create() ds.config.add('datalad.metadata.nativetype', 'audio', scope='branch') copy( opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'audio.mp3'), path) ds.save() assert_repo_status(ds.path) res = ds.aggregate_metadata() assert_status('ok', res) res = ds.metadata('audio.mp3') assert_result_count(res, 1) # from this extractor meta = res[0]['metadata']['audio'] for k, v in target.items(): eq_(meta[k], v) assert_in('@context', meta) uniques = ds.metadata(reporton='datasets', return_type='item-or-list' )['metadata']['datalad_unique_content_properties'] # test file has it, but uniques have it blanked out, because the extractor considers it worthless # for discovering whole datasets assert_in('bitrate', meta) eq_(uniques['audio']['bitrate'], None) # 'date' field carries not value, hence gets exclude from the unique report assert_in('date', meta) assert (not meta['date']) assert_not_in('date', uniques['audio'])
def test_remove_more_than_one(path=None): ds = Dataset(path).create(force=True) ds.save() assert_repo_status(path) # ensure #1912 stays resolved ds.remove(['one', 'two'], reckless='availability') assert_repo_status(path)
def test_dirty(path=None): for mode in _dirty_modes: # does nothing without a dataset handle_dirty_dataset(None, mode) # placeholder, but not yet created ds = Dataset(path) # unknown mode assert_raises(ValueError, handle_dirty_dataset, ds, 'MADEUP') # not yet created is very dirty assert_raises(RuntimeError, handle_dirty_dataset, ds, 'fail') handle_dirty_dataset(ds, 'ignore') assert_raises(RuntimeError, handle_dirty_dataset, ds, 'save-before') # should yield a clean repo ds.create() orig_state = ds.repo.get_hexsha() _check_all_clean(ds, orig_state) # tainted: untracked with open(opj(ds.path, 'something'), 'w') as f: f.write('some') # we don't want to auto-add untracked files by saving (anymore) assert_raises(AssertionError, _check_auto_save, ds, orig_state) # tainted: staged ds.repo.add('something', git=True) orig_state = _check_auto_save(ds, orig_state) # tainted: submodule # not added to super on purpose! subds = ds.create('subds') _check_all_clean(subds, subds.repo.get_hexsha()) assert_repo_status(ds.path) # subdataset must be added as a submodule! assert_equal(ds.subdatasets(result_xfm='relpaths'), ['subds'])
def check_save_dotfiles(to_git, save_path, path): # Note: Take relpath to work with Travis "TMPDIR=/var/tmp/sym\ link" run. paths = [ Path(op.relpath(op.join(root, fname), path)) for root, _, fnames in os.walk(op.join(path, save_path or "")) for fname in fnames ] ok_(paths) ds = Dataset(path).create(force=True) ds.save(save_path, to_git=to_git) if save_path is None: assert_repo_status(ds.path) repo = ds.repo annexinfo = repo.get_content_annexinfo() def _check(fn, p): fn("key", annexinfo[repo.pathobj / p], p) if to_git: def check(p): _check(assert_not_in, p) else: def check(p): _check(assert_in, p) for path in paths: check(path)
def test_create_subdataset_hierarchy_from_top(path=None): # how it would look like to overlay a subdataset hierarchy onto # an existing directory tree ds = Dataset(op.join(path, 'origin')).create(force=True) # we got a dataset .... ok_(ds.is_installed()) # ... but it has untracked content ok_(ds.repo.dirty) subds = ds.create(u"ds-" + OBSCURE_FILENAME, force=True) ok_(subds.is_installed()) ok_(subds.repo.dirty) subsubds = subds.create('subsub', force=True) ok_(subsubds.is_installed()) ok_(subsubds.repo.dirty) ok_(ds.id != subds.id != subsubds.id) ds.save(updated=True, recursive=True) # 'file*' in each repo was untracked before and should remain as such # (we don't want a #1419 resurrection ok_(ds.repo.dirty) ok_(subds.repo.dirty) ok_(subsubds.repo.dirty) # if we add these three, we should get clean ds.save([ 'file1', op.join(subds.path, 'file2'), op.join(subsubds.path, 'file3') ]) assert_repo_status(ds.path) ok_(ds.id != subds.id != subsubds.id)
def test_encoding(path=None): staged = OBSCURE_FILENAME + u'_staged' untracked = OBSCURE_FILENAME + u'_untracked' ds = Dataset(path).create(force=True) ds.repo.add(staged) assert_repo_status(ds.path, added=[staged], untracked=[untracked]) ds.save(updated=True) assert_repo_status(ds.path, untracked=[untracked])
def test_save_gitrepo_annex_subds_adjusted(path=None): ds = Dataset(path).create(annex=False) subds = ds.create("sub") maybe_adjust_repo(subds.repo) (subds.pathobj / "foo").write_text("foo") subds.save() ds.save() assert_repo_status(ds.path)
def test_bf2541(path=None): ds = create(path) subds = ds.create('sub') assert_repo_status(ds.path) os.symlink('sub', op.join(ds.path, 'symlink')) with chpwd(ds.path): res = save(recursive=True) assert_repo_status(ds.path)
def test_relpath_add(path=None): ds = Dataset(path).create(force=True) with chpwd(op.join(path, 'dir')): eq_(save('testindir')[0]['path'], op.join(ds.path, 'dir', 'testindir')) # and now add all save('..') # auto-save enabled assert_repo_status(ds.path)
def test_new_relpath(topdir=None): from datalad.api import create_test_dataset with swallow_logs(), chpwd(topdir), swallow_outputs(): dss = create_test_dataset('testds', spec='1') eq_(dss[0], opj(topdir, 'testds')) eq_(len(dss), 2) # 1 top + 1 sub-dataset as demanded for ds in dss: assert_repo_status(ds, annex=False)
def test_bf2043p2(path=None): ds = Dataset(path).create(force=True) ds.repo.add('staged') assert_repo_status(ds.path, added=['staged'], untracked=['untracked']) # save -u does not commit untracked content # this tests the second issue in #2043 with chpwd(path): save(updated=True) assert_repo_status(ds.path, untracked=['untracked'])
def test_remove_recreation(path=None): # test recreation is possible and doesn't conflict with in-memory # remainings of the old instances # see issue #1311 ds = Dataset(path).create() ds.remove(reckless='availability') ds = Dataset(path).create() assert_repo_status(ds.path) ok_(ds.is_installed())
def test_push_url(storepath=None, dspath=None, blockfile=None): dspath = Path(dspath) store = Path(storepath) blockfile = Path(blockfile) blockfile.touch() ds = Dataset(dspath).create() populate_dataset(ds) assert_repo_status(ds.path) # set up store: io = LocalIO() store_url = "ria+{}".format(store.as_uri()) create_store(io, store, '1') create_ds_in_store(io, store, ds.id, '2', '1') # initremote fails with invalid url (not a ria+ URL): invalid_url = (store.parent / "non-existent").as_uri() init_opts = common_init_opts + [ 'url={}'.format(store_url), 'push-url={}'.format(invalid_url) ] assert_raises(CommandError, ds.repo.init_remote, 'store', options=init_opts) # initremote succeeds with valid but inaccessible URL (pointing to a file # instead of a store): block_url = "ria+" + blockfile.as_uri() init_opts = common_init_opts + [ 'url={}'.format(store_url), 'push-url={}'.format(block_url) ] ds.repo.init_remote('store', options=init_opts) # but a push will fail: assert_raises(CommandError, ds.repo.call_annex, ['copy', 'one.txt', '--to', 'store']) # reconfigure with correct push-url: init_opts = common_init_opts + [ 'url={}'.format(store_url), 'push-url={}'.format(store_url) ] ds.repo.enable_remote('store', options=init_opts) # push works now: ds.repo.call_annex(['copy', 'one.txt', '--to', 'store']) store_uuid = ds.siblings(name='store', return_type='item-or-list')['annex-uuid'] here_uuid = ds.siblings(name='here', return_type='item-or-list')['annex-uuid'] known_sources = ds.repo.whereis('one.txt') assert_in(here_uuid, known_sources) assert_in(store_uuid, known_sources)
def test_create_withcfg(path=None): ds = create(dataset=path, cfg_proc=['yoda']) assert_repo_status(path) assert (ds.pathobj / 'README.md').exists() # If we are creating a dataset within a reference dataset, we save _after_ # the procedure runs. ds.create('subds', cfg_proc=['yoda']) assert_repo_status(path) assert (ds.pathobj / 'subds' / 'README.md').exists()
def test_bf3285(path=None): ds = Dataset(path).create(force=True) # Note: Using repo.pathobj matters in the "TMPDIR=/var/tmp/sym\ link" case # because assert_repo_status is based off of {Annex,Git}Repo.path, which is # the realpath'd path (from the processing in _flyweight_id_from_args). subds = create(ds.repo.pathobj.joinpath("subds")) # Explicitly saving a path does not save an untracked, unspecified # subdataset. ds.save("foo") assert_repo_status(ds.path, untracked=[subds.path])
def test_save_adjusted_partial(path=None): ds = Dataset(path).create() subds = ds.create("sub") maybe_adjust_repo(subds.repo) (subds.pathobj / "foo").write_text("foo") subds.save() (ds.pathobj / "other").write_text("staged, not for committing") ds.repo.call_git(["add", "other"]) ds.save(path=["sub"]) assert_repo_status(ds.path, added=["other"])
def _test_BasicAnnexTestRepo(repodir): trepo = BasicAnnexTestRepo(repodir) trepo.create() assert_repo_status(trepo.path) ok_file_under_git(trepo.path, 'test.dat') ok_file_under_git(trepo.path, 'INFO.txt') ok_file_under_git(trepo.path, 'test-annex.dat', annexed=True) ok_(trepo.repo.file_has_content('test-annex.dat') is False) with swallow_outputs(): trepo.repo.get('test-annex.dat') ok_(trepo.repo.file_has_content('test-annex.dat'))
def test_merge_no_merge_target(path=None): path = Path(path) ds_src = Dataset(path / "source").create() ds_clone = install(source=ds_src.path, path=path / "clone", recursive=True, result_xfm="datasets") assert_repo_status(ds_src.path) ds_clone.repo.checkout(DEFAULT_BRANCH, options=["-bnew"]) res = ds_clone.update(merge=True, on_failure="ignore") assert_in_results(res, status="impossible", action="update")