def test_copy_file_errors(dspath1=None, dspath2=None, nondspath=None): ds1 = Dataset(dspath1) # nothing given assert_raises(ValueError, copy_file) # no target directory given assert_raises(ValueError, ds1.copy_file, 'somefile') # using multiple sources and --specs-from assert_raises(ValueError, ds1.copy_file, ['1', '2', '3'], specs_from='-') # trying to copy to a dir that is not in a dataset ds1.create() assert_status( 'error', ds1.copy_file('somepath', target_dir=nondspath, on_failure='ignore')) # copy into a dataset that is not in the reference dataset ds2 = Dataset(dspath2).create() assert_status( 'error', ds1.copy_file('somepath', target_dir=dspath2, on_failure='ignore')) # attempt to copy from a directory, but no recursion is enabled. # use no reference ds to exercise a different code path assert_status('impossible', copy_file([nondspath, dspath1], on_failure='ignore')) # attempt to copy a file that doesn't exist assert_status('impossible', copy_file(['funky', dspath1], on_failure='ignore')) # attempt to copy a file without a destination given assert_raises(ValueError, copy_file, 'somepath') assert_status('impossible', copy_file(specs_from=['somepath'], on_failure='ignore'))
def test_status_basics(path=None, linkpath=None, otherdir=None): if has_symlink_capability(): # make it more complicated by default ut.Path(linkpath).symlink_to(path, target_is_directory=True) path = linkpath with chpwd(path): assert_raises(NoDatasetFound, status) ds = Dataset(path).create() # outcome identical between ds= and auto-discovery with chpwd(path): assert_raises(IncompleteResultsError, status, path=otherdir) stat = status(result_renderer='disabled') eq_(stat, ds.status(result_renderer='disabled')) assert_status('ok', stat) # we have a bunch of reports (be vague to be robust to future changes assert len(stat) > 2 # check the composition for s in stat: eq_(s['status'], 'ok') eq_(s['action'], 'status') eq_(s['state'], 'clean') eq_(s['type'], 'file') assert_in('gitshasum', s) assert_in('bytesize', s) eq_(s['refds'], ds.path)
def test_get_mixed_hierarchy(src=None, path=None): origin = Dataset(src).create(annex=False) origin_sub = origin.create('subds') with open(opj(origin.path, 'file_in_git.txt'), "w") as f: f.write('no idea') with open(opj(origin_sub.path, 'file_in_annex.txt'), "w") as f: f.write('content') origin.save('file_in_git.txt', to_git=True) origin_sub.save('file_in_annex.txt') origin.save() # now, install that thing: ds, subds = install(path, source=src, recursive=True, result_xfm='datasets', return_type='item-or-list', result_filter=None) ok_(subds.repo.file_has_content("file_in_annex.txt") is False) # and get: result = ds.get(curdir, recursive=True) # git repo and subds assert_status(['ok', 'notneeded'], result) assert_result_count(result, 1, path=opj(subds.path, "file_in_annex.txt"), status='ok') ok_(subds.repo.file_has_content("file_in_annex.txt") is True)
def test_ria_push(srcpath=None, dstpath=None): # complex test involving a git remote, a special remote, and a # publication dependency src = Dataset(srcpath).create() testfile = src.pathobj / 'test_mod_annex_file' testfile.write_text("Heavy stuff.") src.save() assert_status( 'ok', src.create_sibling_ria("ria+{}".format( get_local_file_url(dstpath, compatibility='git')), "datastore", new_store_ok=True)) res = src.push(to='datastore') assert_in_results(res, action='publish', target='datastore', status='ok', refspec=DEFAULT_REFSPEC) assert_in_results(res, action='publish', target='datastore', status='ok', refspec='refs/heads/git-annex:refs/heads/git-annex') assert_in_results(res, action='copy', target='datastore-storage', status='ok', path=str(testfile))
def test_push_subds_no_recursion(src_path=None, dst_top=None, dst_sub=None, dst_subsub=None): # dataset with one submodule and one subsubmodule top = Dataset(src_path).create() sub = top.create('sub m') test_file = sub.pathobj / 'subdir' / 'test_file' test_file.parent.mkdir() test_file.write_text('some') subsub = sub.create(sub.pathobj / 'subdir' / 'subsub m') top.save(recursive=True) assert_repo_status(top.path) target_top = mk_push_target(top, 'target', dst_top, annex=True) target_sub = mk_push_target(sub, 'target', dst_sub, annex=True) target_subsub = mk_push_target(subsub, 'target', dst_subsub, annex=True) # now publish, but NO recursion, instead give the parent dir of # both a subdataset and a file in the middle subdataset res = top.push( to='target', # give relative to top dataset to elevate the difficulty a little path=str(test_file.relative_to(top.pathobj).parent)) assert_status('ok', res) assert_in_results(res, action='publish', type='dataset', path=top.path) assert_in_results(res, action='publish', type='dataset', path=sub.path) assert_in_results(res, action='copy', type='file', path=str(test_file)) # the lowest-level subdataset isn't touched assert_not_in_results(res, action='publish', type='dataset', path=subsub.path)
def test_invalid_call(origin=None, tdir=None): ds = Dataset(origin).create() # no target assert_status('impossible', ds.push(on_failure='ignore')) # no dataset with chpwd(tdir): assert_raises(InsufficientArgumentsError, Push.__call__) # dataset, but outside path assert_raises(IncompleteResultsError, ds.push, path=tdir) # given a path constraint that doesn't match anything, will cause # nothing to be done assert_status('notneeded', ds.push(path=ds.pathobj / 'nothere')) # unavailable subdataset dummy_sub = ds.create('sub') dummy_sub.drop(what='all', reckless='kill', recursive=True) assert_in('sub', ds.subdatasets(state='absent', result_xfm='relpaths')) # now an explicit call to publish the unavailable subdataset assert_raises(ValueError, ds.push, 'sub') target = mk_push_target(ds, 'target', tdir, annex=True) # revision that doesn't exist assert_raises(ValueError, ds.push, to='target', since='09320957509720437523') # If a publish() user accidentally passes since='', which push() spells as # since='^', the call is aborted. assert_raises(ValueError, ds.push, to='target', since='')
def test_subdataset_save(path=None): parent = Dataset(path).create() sub = parent.create('sub') assert_repo_status(parent.path) create_tree(parent.path, {"untracked": 'ignore', 'sub': {"new": "wanted"}}) sub.save('new') # defined state: one untracked, modified (but clean in itself) subdataset assert_repo_status(sub.path) assert_repo_status(parent.path, untracked=['untracked'], modified=['sub']) # `save sub` does not save the parent!! with chpwd(parent.path): assert_status('notneeded', save(dataset=sub.path)) assert_repo_status(parent.path, untracked=['untracked'], modified=['sub']) # `save -u .` saves the state change in the subdataset, # but leaves any untracked content alone with chpwd(parent.path): assert_status('ok', parent.save(updated=True)) assert_repo_status(parent.path, untracked=['untracked']) # get back to the original modified state and check that -S behaves in # exactly the same way create_tree(parent.path, {'sub': {"new2": "wanted2"}}) sub.save('new2') assert_repo_status(parent.path, untracked=['untracked'], modified=['sub'])
def test_sibling_enable_sameas(repo=None, clone_path=None): ds = Dataset(repo.path) create_tree(ds.path, {"f0": "0"}) ds.save(path="f0") ds.push(["f0"], to="r_dir") ds.repo.drop(["f0"]) ds_cloned = clone(ds.path, clone_path) assert_false(ds_cloned.repo.file_has_content("f0")) # does not work without a name res = ds_cloned.siblings( action="enable", result_renderer='disabled', on_failure='ignore', ) assert_in_results(res, status='error', message='require `name` of sibling to enable') # does not work with the wrong name res = ds_cloned.siblings( action="enable", name='wrong', result_renderer='disabled', on_failure='ignore', ) assert_in_results(res, status='impossible', message=("cannot enable sibling '%s', not known", 'wrong')) # works with the right name res = ds_cloned.siblings(action="enable", name="r_rsync") assert_status("ok", res) ds_cloned.get(path=["f0"]) ok_(ds_cloned.repo.file_has_content("f0"))
def test_install_recursive_with_data(src=None, path=None): _make_dataset_hierarchy(src) # now again; with data: res = install(path, source=src, recursive=True, get_data=True, result_filter=None, result_xfm=None) assert_status('ok', res) # installed a dataset and two subdatasets, and one file with content in # each assert_result_count(res, 5, type='dataset', action='install') assert_result_count(res, 2, type='file', action='get') # we recurse top down during installation, so toplevel should appear at # first position in returned list eq_(res[0]['path'], path) top_ds = YieldDatasets()(res[0]) ok_(top_ds.is_installed()) def all_have_content(repo): ainfo = repo.get_content_annexinfo(init=None, eval_availability=True) return all(st["has_content"] for st in ainfo.values()) if isinstance(top_ds.repo, AnnexRepo): ok_(all_have_content(top_ds.repo)) for subds in top_ds.subdatasets(recursive=True, result_xfm='datasets'): ok_(subds.is_installed(), "Not installed: %s" % (subds, )) if isinstance(subds.repo, AnnexRepo): ok_(all_have_content(subds.repo))
def test_audio(path=None): ds = Dataset(path).create() ds.config.add('datalad.metadata.nativetype', 'audio', scope='branch') copy( opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'audio.mp3'), path) ds.save() assert_repo_status(ds.path) res = ds.aggregate_metadata() assert_status('ok', res) res = ds.metadata('audio.mp3') assert_result_count(res, 1) # from this extractor meta = res[0]['metadata']['audio'] for k, v in target.items(): eq_(meta[k], v) assert_in('@context', meta) uniques = ds.metadata(reporton='datasets', return_type='item-or-list' )['metadata']['datalad_unique_content_properties'] # test file has it, but uniques have it blanked out, because the extractor considers it worthless # for discovering whole datasets assert_in('bitrate', meta) eq_(uniques['audio']['bitrate'], None) # 'date' field carries not value, hence gets exclude from the unique report assert_in('date', meta) assert (not meta['date']) assert_not_in('date', uniques['audio'])
def test_install_list(path=None, top_path=None): _mk_submodule_annex(path, fname="test-annex.dat", fcontent="whatever") # we want to be able to install several things, if these are known # (no 'source' allowed). Therefore first toplevel: ds = install(top_path, source=path, recursive=False) assert_not_in('annex.hardlink', ds.config) ok_(ds.is_installed()) sub1 = Dataset(opj(top_path, 'subm 1')) sub2 = Dataset(opj(top_path, '2')) ok_(not sub1.is_installed()) ok_(not sub2.is_installed()) # fails, when `source` is passed: assert_raises(ValueError, ds.install, path=['subm 1', '2'], source='something') # now should work: result = ds.install(path=['subm 1', '2'], result_xfm='paths') ok_(sub1.is_installed()) ok_(sub2.is_installed()) eq_(set(result), {sub1.path, sub2.path}) # and if we request it again via get, result should be empty get_result = ds.get(path=['subm 1', '2'], get_data=False) assert_status('notneeded', get_result)
def test_invalid_call(path=None): with chpwd(path): # no dataset, no luck assert_raises(NoDatasetFound, run, 'doesntmatter') # dirty dataset ds = Dataset(path).create() create_tree(ds.path, {'this': 'dirty'}) assert_status('impossible', run('doesntmatter', on_failure='ignore'))
def test_drop_file_need_nocheck(path=None): ds = Dataset(path).create(force=True) ds.save() with assert_raises(IncompleteResultsError) as cme: ds.drop("foo") # The --force suggestion from git-annex-drop is translated to --reckless. assert_in("--reckless", str(cme.value)) assert_status("ok", ds.drop("foo", reckless='kill', on_failure="ignore"))
def test_run_empty_repo(path=None): ds = Dataset(path).create() cmd = [sys.executable, "-c", "open('foo', 'w').write('')"] # Using "*" in a completely empty repo will fail. with assert_raises(IncompleteResultsError): ds.run(cmd, inputs=["*"], on_failure="stop") assert_repo_status(ds.path) # "." will work okay, though. assert_status("ok", ds.run(cmd, inputs=["."])) assert_repo_status(ds.path) ok_exists(str(ds.pathobj / "foo"))
def test_gh2927(path=None, linkpath=None): if has_symlink_capability(): # make it more complicated by default Path(linkpath).symlink_to(path, target_is_directory=True) path = linkpath ds = Dataset(path).create() ds.create('subds_clean') assert_status( 'ok', ds.create(op.join('subds_clean', 'subds_lvl1_clean'), result_xfm=None, return_type='list'))
def test_update_strategy(path=None): base = Dataset(opj(path, 'origin')).create(force=True) # force all metadata objects into the annex with open(opj(base.path, '.datalad', '.gitattributes'), 'w') as f: f.write( '** annex.largefiles=nothing\nmetadata/objects/** annex.largefiles=anything\n' ) sub = base.create('sub', force=True) subsub = sub.create(opj('subsub'), force=True) base.save(recursive=True) assert_repo_status(base.path) # we start clean for ds in base, sub, subsub: eq_(len(_get_contained_objs(ds)), 0) # aggregate the base dataset only, nothing below changes base.aggregate_metadata() eq_(len(_get_contained_objs(base)), 2) for ds in sub, subsub: eq_(len(_get_contained_objs(ds)), 0) # aggregate the entire tree, but by default only updates # the top-level dataset with all objects, none of the leaf # or intermediate datasets gets touched base.aggregate_metadata(recursive=True) eq_(len(_get_contained_objs(base)), 6) eq_(len(_get_referenced_objs(base)), 6) for ds in sub, subsub: eq_(len(_get_contained_objs(ds)), 0) res = base.metadata(get_aggregates=True) assert_result_count(res, 3) # it is impossible to query an intermediate or leaf dataset # for metadata for ds in sub, subsub: assert_status('impossible', ds.metadata(get_aggregates=True, on_failure='ignore')) # get the full metadata report target_meta = base.metadata(return_type='list') # now redo full aggregation, this time updating all # (intermediate) datasets base.aggregate_metadata(recursive=True, update_mode='all') eq_(len(_get_contained_objs(base)), 6) eq_(len(_get_contained_objs(sub)), 4) eq_(len(_get_contained_objs(subsub)), 2) # it is now OK to query an intermediate or leaf dataset # for metadata for ds in sub, subsub: assert_status('ok', ds.metadata(get_aggregates=True, on_failure='ignore')) # all of that has no impact on the reported metadata eq_(target_meta, base.metadata(return_type='list'))
def test_rerun_branch(path=None): ds = Dataset(path).create() if ds.repo.is_managed_branch(): assert_status('impossible', ds.rerun(branch="triggers-abort", on_failure="ignore")) raise SkipTest("Test incompatible with adjusted branch") ds.repo.tag("prerun") outfile = op.join(path, "run-file") with swallow_outputs(): ds.run(f'echo x$({cat_command} run-file) > run-file') ds.rerun() eq_('xx\n', open(outfile).read()) with open(op.join(path, "nonrun-file"), "w") as f: f.write("foo") ds.save("nonrun-file") # Rerun the commands on a new branch that starts at the parent # commit of the first run. with swallow_outputs(): ds.rerun(since="prerun", onto="prerun", branch="rerun") eq_(ds.repo.get_active_branch(), "rerun") eq_('xx\n', open(outfile).read()) # NOTE: This test depends on the non-run commit above following a run # commit. Otherwise, all the metadata (e.g., author date) aside from the # parent commit that is used to generate the commit ID may be set when # running the tests, which would result in two commits rather than three. for revrange in ["rerun.." + DEFAULT_BRANCH, DEFAULT_BRANCH + "..rerun"]: eq_(len(ds.repo.get_revisions(revrange)), 3) eq_(ds.repo.get_merge_base([DEFAULT_BRANCH, "rerun"]), ds.repo.get_hexsha("prerun")) # Start rerun branch at tip of current branch. ds.repo.checkout(DEFAULT_BRANCH) ds.rerun(since="prerun", branch="rerun2") eq_(ds.repo.get_active_branch(), "rerun2") eq_('xxxx\n', open(outfile).read()) eq_(len(ds.repo.get_revisions(DEFAULT_BRANCH + "..rerun2")), 2) eq_(len(ds.repo.get_revisions("rerun2.." + DEFAULT_BRANCH)), 0) # Using an existing branch name fails. ds.repo.checkout(DEFAULT_BRANCH) assert_raises(IncompleteResultsError, ds.rerun, since="prerun", branch="rerun2")
def test_archive(path=None): ds = Dataset(opj(path, 'ds')).create(force=True) ds.save() committed_date = ds.repo.get_commit_date() default_outname = opj(path, 'datalad_{}.tar.gz'.format(ds.id)) with chpwd(path): res = list(ds.export_archive()) assert_status('ok', res) assert_result_count(res, 1) assert (isabs(res[0]['path'])) assert_true(os.path.exists(default_outname)) custom_outname = opj(path, 'myexport.tar.gz') # feed in without extension ds.export_archive(filename=custom_outname[:-7]) assert_true(os.path.exists(custom_outname)) custom1_md5 = md5sum(custom_outname) # encodes the original archive filename -> different checksum, despit # same content assert_not_equal(md5sum(default_outname), custom1_md5) # should really sleep so if they stop using time.time - we know time.sleep(1.1) ds.export_archive(filename=custom_outname) # should not encode mtime, so should be identical assert_equal(md5sum(custom_outname), custom1_md5) def check_contents(outname, prefix): with tarfile.open(outname) as tf: nfiles = 0 for ti in tf: # any annex links resolved assert_false(ti.issym()) ok_startswith(ti.name, prefix + '/') assert_equal(ti.mtime, committed_date) if '.datalad' not in ti.name: # ignore any files in .datalad for this test to not be # susceptible to changes in how much we generate a meta info nfiles += 1 # we have exactly four files (includes .gitattributes for default # MD5E backend), and expect no content for any directory assert_equal(nfiles, 4) check_contents(default_outname, 'datalad_%s' % ds.id) check_contents(custom_outname, 'myexport') # now loose some content ds.drop('file_up', reckless='kill') assert_raises(IOError, ds.export_archive, filename=opj(path, 'my')) ds.export_archive(filename=opj(path, 'partial'), missing_content='ignore') assert_true(os.path.exists(opj(path, 'partial.tar.gz')))
def test_remove_subds(path=None): ds = create(path) ds.create('sub') ds.create(op.join('sub', 'subsub')) assert_repo_status(ds.path) assert_result_count(ds.subdatasets(), 1, path=op.join(ds.path, 'sub')) # all good at this point, subdataset known, dataset clean # now have some external force wipe out the subdatasets rmtree(op.join(ds.path, 'sub')) assert_result_count(ds.status(), 1, path=op.join(ds.path, 'sub'), state='deleted') # a single call to save() must fix up the mess assert_status('ok', ds.save()) assert_repo_status(ds.path)
def test_arg_missing(path=None, path2=None): # test fix for gh-3553 ds = create(path) assert_raises( InsufficientArgumentsError, ds.siblings, 'add', url=path2, ) assert_status('ok', ds.siblings('add', url=path2, name='somename')) # trigger some name guessing functionality that will still not # being able to end up using a hostnames-spec despite being # given a URL if not on_windows: # the trick with the file:// URL creation only works on POSIX # the underlying tested code here is not about paths, though, # so it is good enough to run this on POSIX system to be # reasonably sure that things work assert_raises( InsufficientArgumentsError, ds.siblings, 'add', url=f'file://{path2}', ) # there is no name guessing with 'configure' assert_in_results(ds.siblings('configure', url='http://somename', on_failure='ignore'), status='error', message='need sibling `name` for configuration') # needs a URL assert_raises(InsufficientArgumentsError, ds.siblings, 'add', name='somename') # just pushurl is OK assert_status('ok', ds.siblings('add', pushurl=path2, name='somename2')) # needs group with groupwanted assert_raises(InsufficientArgumentsError, ds.siblings, 'add', url=path2, name='somename', annex_groupwanted='whatever')
def test_copy_file_prevent_dotgit_placement(srcpath=None, destpath=None): src = Dataset(srcpath).create() sub = src.create('sub') dest = Dataset(destpath).create() # recursion doesn't capture .git/ dest.copy_file(sub.path, recursive=True) nok_((dest.pathobj / 'sub' / '.git').exists()) # explicit instruction results in failure assert_status( 'impossible', dest.copy_file(sub.pathobj / '.git', recursive=True, on_failure='ignore')) # same when the source has an OK name, but the dest now assert_in_results(dest.copy_file( [sub.pathobj / '.git' / 'config', dest.pathobj / 'some' / '.git'], on_failure='ignore'), status='impossible', action='copy_file') # The last path above wasn't treated as a target directory because it # wasn't an existing directory. We also guard against a '.git' in the # target directory code path, though the handling is different. with assert_raises(ValueError): dest.copy_file( [sub.pathobj / '.git' / 'config', dest.pathobj / '.git']) # A source path can have a leading .git/ if the destination is outside of # .git/. nok_((dest.pathobj / "config").exists()) dest.copy_file(sub.pathobj / '.git' / 'config') ok_((dest.pathobj / "config").exists()) target = dest.pathobj / 'some' nok_(target.exists()) dest.copy_file([sub.pathobj / '.git' / 'config', target]) ok_(target.exists()) # But we only waste so many cycles trying to prevent foot shooting. This # next one sneaks by because only .name, not all upstream parts, is checked # for each destination that comes out of _yield_specs(). badobj = dest.pathobj / '.git' / 'objects' / 'i-do-not-exist' dest.copy_file([sub.pathobj / '.git' / 'config', badobj]) ok_(badobj.exists())
def test_no_annex(path=None): # few smoke tests regarding the 'here' sibling ds = create(path, annex=False) res = ds.siblings('configure', name='here', description='very special', on_failure='ignore', result_renderer='disabled') assert_status('impossible', res) res = ds.siblings('enable', name='doesnotmatter', on_failure='ignore', result_renderer='disabled') assert_in_results(res, status='impossible', message='cannot enable sibling of non-annex dataset')
def test_datasets_datalad_org(tdir=None, *, suffix): # Test that git annex / datalad install, get work correctly on our datasets.datalad.org # Apparently things can break, especially with introduction of the # smart HTTP backend for apache2 etc ds = install(tdir, source='///dicoms/dartmouth-phantoms/bids_test6-PD+T2w' + suffix) eq_(ds.config.get(f'remote.{DEFAULT_REMOTE}.annex-ignore', None), None) # assert_result_count and not just assert_status since for some reason on # Windows we get two records due to a duplicate attempt (as res[1]) to get it # again, which is reported as "notneeded". For the purpose of this test # it doesn't make a difference. assert_result_count(ds.get( op.join('001-anat-scout_ses-{date}', '000001.dcm')), 1, status='ok') assert_status('ok', ds.drop(what='all', reckless='kill', recursive=True))
def test_push_wanted(srcpath=None, dstpath=None): src = Dataset(srcpath).create() (src.pathobj / 'data.0').write_text('0') (src.pathobj / 'secure.1').write_text('1') (src.pathobj / 'secure.2').write_text('2') src.save() # Dropping a file to mimic a case of simply not having it locally (thus not # to be "pushed") src.drop('secure.2', reckless='kill') # Annotate sensitive content, actual value "verysecure" does not matter in # this example src.repo.set_metadata(add={'distribution-restrictions': 'verysecure'}, files=['secure.1', 'secure.2']) src.create_sibling( dstpath, annex_wanted="not metadata=distribution-restrictions=*", name='target', ) # check that wanted is obeyed, since set in sibling configuration res = src.push(to='target') assert_in_results(res, action='copy', path=str(src.pathobj / 'data.0'), status='ok') for p in ('secure.1', 'secure.2'): assert_not_in_results(res, path=str(src.pathobj / p)) assert_status('notneeded', src.push(to='target')) # check the target to really make sure dst = Dataset(dstpath) # normal file, yes eq_((dst.pathobj / 'data.0').read_text(), '0') # secure file, no if dst.repo.is_managed_branch(): neq_((dst.pathobj / 'secure.1').read_text(), '1') else: assert_raises(FileNotFoundError, (dst.pathobj / 'secure.1').read_text) # reset wanted config, which must enable push of secure file src.repo.set_preferred_content('wanted', '', remote='target') res = src.push(to='target') assert_in_results(res, path=str(src.pathobj / 'secure.1')) eq_((dst.pathobj / 'secure.1').read_text(), '1')
def test_as_common_datasource(testbed=None, viapath=None, viaurl=None, remotepath=None, url=None): ds = Dataset(remotepath).create() (ds.pathobj / 'testfile').write_text('likemagic') (ds.pathobj / 'testfile2').write_text('likemagic2') ds.save() # make clonable via HTTP ds.repo.call_git(['update-server-info']) # this does not work for remotes that have path URLs ds_frompath = clone(source=remotepath, path=viapath) res = ds_frompath.siblings( 'configure', name=DEFAULT_REMOTE, as_common_datasrc='mike', on_failure='ignore', result_renderer='disabled', ) assert_in_results( res, status='impossible', message='cannot configure as a common data source, URL protocol ' 'is not http or https', ) # but it works for HTTP ds_fromurl = clone(source=url, path=viaurl) res = ds_fromurl.siblings( 'configure', name=DEFAULT_REMOTE, as_common_datasrc='mike2', result_renderer='disabled', ) assert_status('ok', res) # same thing should be possible by adding a fresh remote res = ds_fromurl.siblings( 'add', name='fresh', url=url, as_common_datasrc='fresh-sr', result_renderer='disabled', ) assert_status('ok', res) # now try if it works. we will clone the clone, and get a repo that does # not know its ultimate origin. still, we should be able to pull data # from it via the special remote testbed = clone(source=ds_fromurl, path=testbed) assert_status('ok', testbed.get('testfile')) eq_('likemagic', (testbed.pathobj / 'testfile').read_text()) # and the other one assert_status('ok', testbed.get('testfile2'))
def test_exif(path=None): ds = Dataset(path).create() ds.config.add('datalad.metadata.nativetype', 'exif', scope='branch') copy(opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'exif.jpg'), path) ds.save() assert_repo_status(ds.path) res = ds.aggregate_metadata() assert_status('ok', res) res = ds.metadata('exif.jpg') assert_result_count(res, 1) # from this extractor meta = res[0]['metadata']['exif'] for k, v in target.items(): eq_(meta[k], v) assert_in('@context', meta)
def test_install_datasets_root(tdir=None): with chpwd(tdir): ds = install("///") ok_(ds.is_installed()) eq_(ds.path, opj(tdir, get_datasets_topdir())) # do it a second time: result = install("///", result_xfm=None, return_type='list') assert_status('notneeded', result) eq_(YieldDatasets()(result[0]), ds) # and a third time into an existing something, that is not a dataset: with open(opj(tdir, 'sub', 'a_file.txt'), 'w') as f: f.write("something") with assert_raises(IncompleteResultsError) as cme: install("sub", source='///') assert_in("already exists and not empty", str(cme.value))
def test_get_single_file(src=None, path=None): ca = dict(result_renderer='disabled') test_fname = 'test-annex.dat' orig = Dataset(src).create(**ca) (orig.pathobj / test_fname).write_text('some') orig.save(**ca) ds = clone(src, path, **ca) ok_(ds.is_installed()) ok_(ds.repo.file_has_content('test-annex.dat') is False) result = ds.get("test-annex.dat", **ca) assert_result_count(result, 1) assert_status('ok', result) eq_(result[0]['path'], opj(ds.path, 'test-annex.dat')) annexprops = ds.repo.get_file_annexinfo('test-annex.dat', eval_availability=True) eq_(result[0]['annexkey'], annexprops['key']) ok_(annexprops['has_content'])
def test_copy_file_specs_from(srcdir=None, destdir=None): srcdir = Path(srcdir) destdir = Path(destdir) files = [p for p in srcdir.glob('**/*') if not p.is_dir()] # plain list of absolute path objects r_srcabs, res = _check_copy_file_specs_from(srcdir, destdir / 'srcabs', files) # same, but with relative paths with chpwd(srcdir): r_srcrel, res = _check_copy_file_specs_from( srcdir, destdir / 'srcrel', [p.relative_to(srcdir) for p in files]) # same, but as strings r_srcabs_str, res = _check_copy_file_specs_from(srcdir, destdir / 'srcabs_str', [str(p) for p in files]) with chpwd(srcdir): r_srcrel_str, res = _check_copy_file_specs_from( srcdir, destdir / 'srcrel_str', [str(p.relative_to(srcdir)) for p in files]) # same, but with src/dest pairs r_srcdestabs_str, res = _check_copy_file_specs_from( srcdir, destdir / 'srcdestabs_str', [ '{}\0{}'.format(str(p), str(destdir / 'srcdestabs_str' / p.name)) for p in files ]) # all methods lead to the same dataset structure for a, b in ((r_srcabs, r_srcrel), (r_srcabs, r_srcabs_str), (r_srcabs, r_srcrel_str), (r_srcabs, r_srcdestabs_str)): eq_(*[ sorted(r for r in d.status(result_xfm='relpaths', result_renderer='disabled')) for d in (a, b) ]) # fail on destination outside of the dest repo res = copy_file(specs_from=[ '{}\0{}'.format(str(p), str(destdir / 'srcdest_wrong' / p.relative_to(srcdir))) for p in files ], on_failure='ignore') assert_status('error', res)
def test_copy_file_into_nonannex(workdir=None): workdir = Path(workdir) src_ds = Dataset(workdir / 'src').create() (src_ds.pathobj / 'present.txt').write_text('123') (src_ds.pathobj / 'gone.txt').write_text('abc') src_ds.save() src_ds.drop('gone.txt', reckless='kill') # destination has no annex dest_ds = Dataset(workdir / 'dest').create(annex=False) # no issue copying a file that has content copy_file([src_ds.pathobj / 'present.txt', dest_ds.pathobj]) ok_file_has_content(dest_ds.pathobj / 'present.txt', '123') # but cannot handle a dropped file, no chance to register # availability info in an annex assert_status( 'impossible', copy_file([src_ds.pathobj / 'gone.txt', dest_ds.pathobj], on_failure='ignore'))