def test_GitRepo_ssh_pull(remote_path, repo_path): from datalad import ssh_manager remote_repo = GitRepo(remote_path, create=True) url = "ssh://localhost" + op.abspath(remote_path) socket_path = op.join(ssh_manager.socket_dir, get_connection_hash('localhost')) repo = GitRepo(repo_path, create=True) repo.add_remote("ssh-remote", url) # modify remote: remote_repo.checkout("ssh-test", ['-b']) with open(op.join(remote_repo.path, "ssh_testfile.dat"), "w") as f: f.write("whatever") remote_repo.add("ssh_testfile.dat") remote_repo.commit("ssh_testfile.dat added.") # file is not locally known yet: assert_not_in("ssh_testfile.dat", repo.get_indexed_files()) # pull changes: repo.pull(remote="ssh-remote", refspec=remote_repo.get_active_branch()) ok_clean_git(repo.path, annex=False) # the connection is known to the SSH manager, since fetch() requested it: assert_in(socket_path, ssh_manager._connections) # and socket was created: ok_(op.exists(socket_path)) # we actually pulled the changes assert_in("ssh_testfile.dat", repo.get_indexed_files())
def test_install_known_subdataset(src, path): # get the superdataset: ds = install(path, source=src) # subdataset not installed: subds = Dataset(opj(path, 'subm 1')) assert_false(subds.is_installed()) assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # install it: ds.install('subm 1') ok_(subds.is_installed()) ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False)) # Verify that it is the correct submodule installed and not # new repository initiated eq_(set(subds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # now, get the data by reinstalling with -g: ok_(subds.repo.file_has_content('test-annex.dat') is False) with chpwd(ds.path): result = get(path='subm 1', dataset=os.curdir) assert_in_results(result, path=opj(subds.path, 'test-annex.dat')) ok_(subds.repo.file_has_content('test-annex.dat') is True) ok_(subds.is_installed())
def test_audio(path): ds = Dataset(path).create() ds.config.add('datalad.metadata.nativetype', 'audio', where='dataset') copy( opj(dirname(dirname(dirname(__file__))), 'tests', 'data', 'audio.mp3'), path) ds.add('.') ok_clean_git(ds.path) res = ds.aggregate_metadata() assert_status('ok', res) res = ds.metadata('audio.mp3') assert_result_count(res, 1) # from this extractor meta = res[0]['metadata']['audio'] for k, v in target.items(): eq_(meta[k], v) assert_in('@context', meta) uniques = ds.metadata( reporton='datasets', return_type='item-or-list')['metadata']['datalad_unique_content_properties'] # test file has it, but uniques have it blanked out, because the extractor considers it worthless # for discovering whole datasets assert_in('bitrate', meta) eq_(uniques['audio']['bitrate'], None) # 'date' field carries not value, hence gets exclude from the unique report assert_in('date', meta) assert(not meta['date']) assert_not_in('date', uniques['audio'])
def test_add_mimetypes(path): ds = Dataset(path).create(force=True) ds.repo.add('.gitattributes') ds.repo.commit('added attributes to git explicitly') # now test that those files will go into git/annex correspondingly # WINDOWS FAILURE NEXT __not_tested__ = ds.save(['file.txt', 'empty']) assert_repo_status(path, untracked=['file2.txt']) # But we should be able to force adding file to annex when desired ds.save('file2.txt', to_git=False) # check annex file status annexinfo = ds.repo.get_content_annexinfo() for path, in_annex in ( # Empty one considered to be application/octet-stream # i.e. non-text ('empty', True), ('file.txt', False), ('file2.txt', True)): # low-level API report -> repo path reference, no ds path p = ds.repo.pathobj / path assert_in(p, annexinfo) if in_annex: assert_in('key', annexinfo[p], p) else: assert_not_in('key', annexinfo[p], p)
def test_publish_file_handle(origin, src_path, dst_path): # prepare src source = install(path=src_path, source=origin, recursive=True) # TODO: For now, circumnavigate the detached head issue. # Figure out, what to do. for subds in source.get_dataset_handles(recursive=True): AnnexRepo(opj(src_path, subds), init=True, create=True).git_checkout("master") source.repo.get('test-annex.dat') # create plain git at target: target = AnnexRepo(dst_path, create=True) # actually not needed for this test, but provide same setup as # everywhere else: target.git_checkout("TMP", "-b") source.repo.git_remote_add("target", dst_path) # directly publish a file handle, not the dataset itself: res = publish(dataset=source, dest="target", path="test-annex.dat") eq_(res, opj(source.path, 'test-annex.dat')) # only file was published, not the dataset itself: assert_not_in("master", target.git_get_branches()) eq_(Dataset(dst_path).get_dataset_handles(), []) assert_not_in("test.dat", target.git_get_files()) # content is now available from 'target': assert_in("target", source.repo.annex_whereis('test-annex.dat', output="descriptions")) source.repo.annex_drop('test-annex.dat') eq_(source.repo.file_has_content(['test-annex.dat']), [False]) source.repo._run_annex_command('get', annex_options=['test-annex.dat', '--from=target']) eq_(source.repo.file_has_content(['test-annex.dat']), [True])
def test_rerun_script(path): ds = Dataset(path).create() ds.run("echo a >foo") ds.run(["touch", "bar"], message='BAR', sidecar=True) # a run record sidecar file was added with the last commit assert(any(d['path'].startswith(opj(ds.path, '.datalad', 'runinfo')) for d in ds.rerun(report=True, return_type='item-or-list')['diff'])) bar_hexsha = ds.repo.get_hexsha() script_file = opj(path, "commands.sh") ds.rerun(script=script_file) ok_exists(script_file) with open(script_file) as sf: lines = sf.readlines() assert_in("touch bar\n", lines) # The commit message is there too. assert_in("# BAR\n", lines) assert_in("# (record: {})\n".format(bar_hexsha), lines) assert_not_in("echo a >foo\n", lines) ds.rerun(since="", script=script_file) with open(script_file) as sf: lines = sf.readlines() assert_in("touch bar\n", lines) # Automatic commit messages aren't included. assert_not_in("# echo a >foo\n", lines) assert_in("echo a >foo\n", lines) # --script=- writes to stdout. with patch("sys.stdout", new_callable=StringIO) as cmout: ds.rerun(script="-") assert_in("touch bar", cmout.getvalue().splitlines())
def test_surprise_subds(path): # https://github.com/datalad/datalad/issues/3139 ds = create(path, force=True) # a lonely repo without any commit somerepo = AnnexRepo(path=op.join(path, 'd1', 'subrepo'), create=True) # a proper subdataset subds = create(op.join(path, 'd2', 'subds'), force=True) # save non-recursive ds.save(recursive=False) # the content of both subds and subrepo are not added to their # respective parent as no --recursive was given assert_repo_status(subds.path, untracked=['subfile']) assert_repo_status(somerepo.path, untracked=['subfile']) # however, while the subdataset is added (and reported as modified # because it content is still untracked) the subrepo # cannot be added (it has no commit) # worse: its untracked file add been added to the superdataset # XXX the next conditional really says: if the subrepo is not in an # adjusted branch: #datalad/3178 (that would have a commit) if not on_windows: assert_repo_status(ds.path, modified=['d2/subds']) assert_in(ds.repo.pathobj / 'd1' / 'subrepo' / 'subfile', ds.repo.get_content_info()) # with proper subdatasets, all evil is gone assert_not_in(ds.repo.pathobj / 'd2' / 'subds' / 'subfile', ds.repo.get_content_info())
def test_install_crcns(tdir, ds_path): with chpwd(tdir): with swallow_logs(new_level=logging.INFO) as cml: install("all-nonrecursive", source='///') # since we didn't log decorations such as log level atm while # swallowing so lets check if exit code is returned or not # I will test both assert_not_in('ERROR', cml.out) # below one must not fail alone! ;) assert_not_in('with exit code', cml.out) # should not hang in infinite recursion with chpwd('all-nonrecursive'): get("crcns") ok_(exists(_path_("all-nonrecursive/crcns/.git/config"))) # and we could repeat installation and get the same result ds1 = install(_path_("all-nonrecursive/crcns")) ds2 = Dataset('all-nonrecursive').install('crcns') ok_(ds1.is_installed()) eq_(ds1, ds2) eq_(ds1.path, ds2.path) # to make sure they are a single dataset # again, but into existing dataset: ds = create(ds_path) crcns = ds.install("///crcns") ok_(crcns.is_installed()) eq_(crcns.path, opj(ds_path, "crcns")) assert_in(crcns.path, ds.get_subdatasets(absolute=True))
def test_dont_trip_over_missing_subds(path): ds1 = Dataset(opj(path, 'ds1')).create() ds2 = Dataset(opj(path, 'ds2')).create() subds2 = ds1.install( source=ds2.path, path='subds2', result_xfm='datasets', return_type='item-or-list') assert_true(subds2.is_installed()) assert_in('subds2', ds1.subdatasets(result_xfm='relpaths')) subds2.uninstall() assert_in('subds2', ds1.subdatasets(result_xfm='relpaths')) assert_false(subds2.is_installed()) # see if it wants to talk to github (and fail), or if it trips over something # before assert_raises(gh.BadCredentialsException, ds1.create_sibling_github, 'bogus', recursive=True, github_login='******') # inject remote config prior run assert_not_in('github', ds1.repo.get_remotes()) # fail on existing ds1.repo.add_remote('github', 'http://nothere') assert_raises(ValueError, ds1.create_sibling_github, 'bogus', recursive=True, github_login='******') # talk to github when existing is OK assert_raises(gh.BadCredentialsException, ds1.create_sibling_github, 'bogus', recursive=True, github_login='******', existing='reconfigure') # return happy emptiness when all is skipped assert_equal( ds1.create_sibling_github( 'bogus', recursive=True, github_login='******', existing='skip'), [])
def test_cached_load_document(tdir): target_schema = {'buga': 'duga'} cache_filename = opj(tdir, "crap") with open(cache_filename, 'wb') as f: f.write("CRAPNOTPICKLED".encode()) with patch('datalad.support.network.get_url_cache_filename', return_value=cache_filename): with patch('pyld.jsonld.load_document', return_value=target_schema), \ swallow_logs(new_level=logging.WARNING) as cml: schema = _cached_load_document("http://schema.org/") assert_equal(schema, target_schema) cml.assert_logged("cannot load cache from", level="WARNING") # but now pickled one should have been saved assert_equal(pickle.load(open(cache_filename, 'rb')), target_schema) # and if we reload it -- it should be all fine without warnings # should come from cache so no need to overload load_document with swallow_logs(new_level=logging.WARNING) as cml: schema = _cached_load_document("http://schema.org/") assert_equal(schema, target_schema) assert_not_in("cannot load cache from", cml.out)
def test_basic_metadata(path): ds = Dataset(opj(path, 'origin')) meta = get_metadata(ds) assert_equal(sorted(meta[0].keys()), ['@context', 'dcterms:conformsTo']) ds.create(force=True, save=False) # with subdataset sub = ds.create('sub', force=True) ds.save() meta = get_metadata(ds) assert_equal( sorted(meta[0].keys()), ['@context', '@id', 'availableFrom', 'dcterms:conformsTo', 'dcterms:modified', 'type', 'version']) assert_equal(meta[0]['type'], 'Dataset') # clone and get relationship info in metadata sibling = install(opj(path, 'sibling'), source=opj(path, 'origin')) sibling_meta = get_metadata(sibling) assert_equal(sibling_meta[0]['@id'], ds.id) # origin should learn about the clone sibling.repo.push(remote='origin', refspec='git-annex') meta = get_metadata(ds) assert_equal([m['@id'] for m in meta[0]['availableFrom']], [m['@id'] for m in sibling_meta[0]['availableFrom']]) meta = get_metadata(ds, guess_type=True) # without aggregation there is not trace of subdatasets in the metadata assert_not_in('dcterms:hasPart', meta[0])
def test_script_shims(): runner = Runner() for script in [ 'datalad', 'git-annex-remote-datalad-archives', 'git-annex-remote-datalad']: if not on_windows: # those must be available for execution, and should not contain which, _ = runner(['which', script]) # test if there is no easy install shim in there with open(which.rstrip()) as f: content = f.read() else: from distutils.spawn import find_executable content = find_executable(script) assert_not_in('EASY', content) # NOTHING easy should be there assert_not_in('pkg_resources', content) # and let's check that it is our script out, err = runner([script, '--version']) version = (out + err).splitlines()[0].split(' ', 1)[1] # we can get git and non git .dev version... so for now # relax get_numeric_portion = lambda v: [x for x in v.split('.') if x.isdigit()] # extract numeric portion assert get_numeric_portion(version) # that my lambda is correctish assert_equal(get_numeric_portion(__version__), get_numeric_portion(version))
def test_install_list(path, top_path): # we want to be able to install several things, if these are known # (no 'source' allowed). Therefore first toplevel: ds = install(top_path, source=path, recursive=False) assert_not_in('annex.hardlink', ds.config) ok_(ds.is_installed()) sub1 = Dataset(opj(top_path, 'subm 1')) sub2 = Dataset(opj(top_path, '2')) ok_(not sub1.is_installed()) ok_(not sub2.is_installed()) # fails, when `source` is passed: assert_raises(ValueError, ds.install, path=['subm 1', '2'], source='something') # now should work: result = ds.install(path=['subm 1', '2'], result_xfm='paths') ok_(sub1.is_installed()) ok_(sub2.is_installed()) eq_(set(result), {sub1.path, sub2.path}) # and if we request it again via get, result should be empty get_result = ds.get(path=['subm 1', '2'], get_data=False) assert_status('notneeded', get_result)
def test_notclone_known_subdataset(src, path): # get the superdataset: ds = clone(src, path, result_xfm='datasets', return_type='item-or-list') # subdataset not installed: subds = Dataset(opj(path, 'subm 1')) assert_false(subds.is_installed()) assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # clone is not meaningful res = ds.clone('subm 1', on_failure='ignore') assert_status('error', res) assert_message('Failed to clone from any candidate source URL. ' 'Encountered errors per each url were: %s', res) # get does the job res = ds.get(path='subm 1', get_data=False) assert_status('ok', res) ok_(subds.is_installed()) ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False)) # Verify that it is the correct submodule installed and not # new repository initiated eq_(set(subds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
def test_install_recursive_repeat(src, path): subsub_src = Dataset(opj(src, 'sub 1', 'subsub')).create(force=True) sub1_src = Dataset(opj(src, 'sub 1')).create(force=True) sub2_src = Dataset(opj(src, 'sub 2')).create(force=True) top_src = Dataset(src).create(force=True) top_src.add('.', recursive=True) ok_clean_git(top_src.path) # install top level: top_ds = install(path, source=src) ok_(top_ds.is_installed() is True) sub1 = Dataset(opj(path, 'sub 1')) ok_(sub1.is_installed() is False) sub2 = Dataset(opj(path, 'sub 2')) ok_(sub2.is_installed() is False) subsub = Dataset(opj(path, 'sub 1', 'subsub')) ok_(subsub.is_installed() is False) # install again, now with data and recursive, but recursion_limit 1: result = get(os.curdir, dataset=path, recursive=True, recursion_limit=1, result_xfm='datasets') # top-level dataset was not reobtained assert_not_in(top_ds, result) assert_in(sub1, result) assert_in(sub2, result) assert_not_in(subsub, result) ok_(top_ds.repo.file_has_content('top_file.txt') is True) ok_(sub1.repo.file_has_content('sub1file.txt') is True) ok_(sub2.repo.file_has_content('sub2file.txt') is True) # install sub1 again, recursively and with data top_ds.install('sub 1', recursive=True, get_data=True) ok_(subsub.is_installed()) ok_(subsub.repo.file_has_content('subsubfile.txt'))
def test_alter_interface_docs_for_api(): alt = alter_interface_docs_for_api(demo_doc) alt_l = alt.split('\n') # dedented assert_false(alt_l[0].startswith(' ')) assert_false(alt_l[-1].startswith(' ')) assert_not_in('CMD', alt) assert_not_in('Command line', alt)
def test_update_simple(origin, src_path, dst_path): # prepare src source = install(src_path, source=origin, recursive=True)[0] # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.remove_remote("origin") # get a clone to update later on: dest = install(dst_path, source=src_path, recursive=True)[0] # test setup done; # assert all fine ok_clean_git(dst_path) ok_clean_git(src_path) # update yields nothing => up-to-date # TODO: how to test besides not failing? dest.update() ok_clean_git(dst_path) # modify origin: with open(opj(src_path, "update.txt"), "w") as f: f.write("Additional content") source.add(path="update.txt") source.save("Added update.txt") ok_clean_git(src_path) # update without `merge` only fetches: dest.update() # modification is not known to active branch: assert_not_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch())) # modification is known to branch origin/master assert_in("update.txt", dest.repo.get_files("origin/master")) # merge: dest.update(merge=True) # modification is now known to active branch: assert_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch())) # it's known to annex, but has no content yet: dest.repo.get_file_key("update.txt") # raises if unknown eq_([False], dest.repo.file_has_content(["update.txt"])) # smoke-test if recursive update doesn't fail if submodule is removed dest.remove('subm 1') dest.update(recursive=True) dest.update(merge=True, recursive=True) # and now test recursive update with merging in differences create_tree(opj(source.path, 'subm 2'), {'load.dat': 'heavy'}) source.save(message="saving changes within subm2", recursive=True, all_changes=True) dest.update(merge=True, recursive=True) # and now we can get new file dest.get('subm 2/load.dat') ok_file_has_content(opj(dest.path, 'subm 2', 'load.dat'), 'heavy')
def test_alter_interface_docs_for_cmdline(): alt = alter_interface_docs_for_cmdline(demo_doc) alt_l = alt.split('\n') # dedented assert_false(alt_l[0].startswith(' ')) assert_false(alt_l[-1].startswith(' ')) assert_not_in('PY', alt) assert_not_in('Python', alt) # args altarg = alter_interface_docs_for_cmdline(demo_argdoc)
def test_from_env(): cfg = ConfigManager() assert_not_in('datalad.crazy.cfg', cfg) os.environ['DATALAD_CRAZY_CFG'] = 'impossibletoguess' cfg.reload() assert_in('datalad.crazy.cfg', cfg) assert_equal(cfg['datalad.crazy.cfg'], 'impossibletoguess') # not in dataset-only mode cfg = ConfigManager(Dataset('nowhere'), dataset_only=True) assert_not_in('datalad.crazy.cfg', cfg)
def test_version(): stdout, stderr = run_main(['--version'], expect_stderr=True) # and output should contain our version, copyright, license # https://hg.python.org/cpython/file/default/Doc/whatsnew/3.4.rst#l1952 out = stdout if sys.version_info >= (3, 4) else stderr ok_startswith(out, 'datalad %s\n' % datalad.__version__) # since https://github.com/datalad/datalad/pull/2733 no license in --version assert_not_in("Copyright", out) assert_not_in("Permission is hereby granted", out)
def test_update_simple(origin, src_path, dst_path): # prepare src source = install(path=src_path, source=origin, recursive=True) # TODO: For now, circumnavigate the detached head issue. # Figure out, what to do. for subds in source.get_dataset_handles(recursive=True): AnnexRepo(opj(src_path, subds), init=True, create=True).git_checkout("master") # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.git_remote_remove("origin") # get a clone to update later on: dest = install(path=dst_path, source=src_path, recursive=True) # TODO: For now, circumnavigate the detached head issue. # Figure out, what to do. for subds in dest.get_dataset_handles(recursive=True): AnnexRepo(opj(dst_path, subds), init=True, create=True).git_checkout("master") # test setup done; # assert all fine ok_clean_git(dst_path) ok_clean_git(src_path) # update yields nothing => up-to-date # TODO: how to test besides not failing? dest.update() ok_clean_git(dst_path) # modify origin: with open(opj(src_path, "update.txt"), "w") as f: f.write("Additional content") source.install(path="update.txt") source.remember_state("Added update.txt") ok_clean_git(src_path) # update without `merge` only fetches: dest.update() # modification is not known to active branch: assert_not_in("update.txt", dest.repo.git_get_files(dest.repo.git_get_active_branch())) # modification is known to branch origin/master assert_in("update.txt", dest.repo.git_get_files("origin/master")) # merge: dest.update(merge=True) # modification is now known to active branch: assert_in("update.txt", dest.repo.git_get_files(dest.repo.git_get_active_branch())) # it's known to annex, but has no content yet: dest.repo.get_file_key("update.txt") # raises if unknown eq_([False], dest.repo.file_has_content(["update.txt"]))
def test_install_into_dataset(source, top_path): ds = install(top_path) subds = ds.install(path="sub", source=source) ok_(subds.is_installed()) # sub is clean: ok_clean_git(subds.path, annex=False) # top is not: assert_raises(AssertionError, ok_clean_git, ds.path, annex=False) # unless committed the subds should not show up in the parent # this is the same behavior that 'git submodule status' implements assert_not_in('sub', ds.get_dataset_handles()) ds.remember_state('addsub') assert_in('sub', ds.get_dataset_handles())
def test_alter_interface_docs_for_cmdline(): alt = alter_interface_docs_for_cmdline(demo_doc) alt_l = alt.split('\n') # dedented assert_false(alt_l[0].startswith(' ')) assert_false(alt_l[-1].startswith(' ')) assert_not_in('PY', alt) assert_not_in('CMD', alt) assert_not_in('REFLOW', alt) assert_in('a b', alt) assert_in('not\n reflowed', alt) assert_in("Something for the cmdline only Multiline!", alt) # args altarg = alter_interface_docs_for_cmdline(demo_argdoc) # RST role markup eq_(alter_interface_docs_for_cmdline(':murks:`me and my buddies`'), 'me and my buddies') # spread across lines eq_(alter_interface_docs_for_cmdline(':term:`Barbara\nStreisand`'), 'Barbara\nStreisand') # multiple on one line eq_(alter_interface_docs_for_cmdline( ':term:`one` bla bla :term:`two` bla'), 'one bla bla two bla') altpd = alter_interface_docs_for_cmdline(demo_paramdoc) assert_not_in('python', altpd) assert_in('inbetween', altpd) assert_in('appended', altpd) assert_in('cmdline', altpd)
def decorated_test2(ds): # we get a Dataset instance assert_is_instance(ds, Dataset) # it's a clone in a temp. location, not within the cache assert_not_in(cache_dir, ds.pathobj.parents) assert_result_count(ds.siblings(), 1, type="sibling", name=DEFAULT_REMOTE, url=str(cache_dir / name_in_cache)) here = ds.config.get("annex.uuid") origin = ds.config.get(f"remote.{DEFAULT_REMOTE}.annex-uuid") where = ds.repo.whereis(str(annexed_file)) assert_in(here, where) assert_in(origin, where) return ds.pathobj, ds.repo.pathobj
def test_crazy_cfg(path): cfg = ConfigManager(GitRepo(opj(path, 'ds'), create=True), source='dataset') assert_in('crazy.padry', cfg) # make sure crazy config is not read when in local mode cfg = ConfigManager(Dataset(opj(path, 'ds')), source='local') assert_not_in('crazy.padry', cfg) # it will make it in in 'any' mode though cfg = ConfigManager(Dataset(opj(path, 'ds')), source='any') assert_in('crazy.padry', cfg) # typos in the source mode arg will not have silent side-effects assert_raises(ValueError, ConfigManager, Dataset(opj(path, 'ds')), source='locale')
def test_dataset_systemglobal_mode(path): ds = create(path) # any sensible (and also our CI) test environment(s) should have this assert_in('user.name', ds.config) # from .datalad/config assert_in('datalad.dataset.id', ds.config) # from .git/config assert_in('annex.version', ds.config) with chpwd(path): # now check that no config from a random dataset at PWD is picked up # if not dataset instance was provided cfg = ConfigManager(dataset=None, source='any') assert_in('user.name', cfg) assert_not_in('datalad.dataset.id', cfg) assert_not_in('annex.version', cfg)
def test_bare(path): # can we handle a bare repo? gr = GitRepo(path, create=True, bare=True) # do we read the correct local config? assert_in(gr.pathobj / 'config', gr.config._stores['git']['files']) # any sensible (and also our CI) test environment(s) should have this assert_in('user.name', gr.config) # not set something that wasn't there obscure_key = 'sec.reallyobscurename!@@.key' assert_not_in(obscure_key, gr.config) # to the local config, which is easily accessible gr.config.set(obscure_key, 'myvalue', where='local') assert_equal(gr.config.get(obscure_key), 'myvalue') # now make sure the config is where we think it is assert_in(obscure_key.split('.')[1], (gr.pathobj / 'config').read_text())
def test_cmdline_example_to_rst(): # don't puke on nothing out = fmt.cmdline_example_to_rst(SIO('')) out.seek(0) ok_startswith(out.read(), '.. AUTO-GENERATED') out = fmt.cmdline_example_to_rst(SIO(''), ref='dummy') out.seek(0) assert_in('.. dummy:', out.read()) # full scale test out = fmt.cmdline_example_to_rst(SIO(demo_example), ref='mydemo') out.seek(0) out_text = out.read() assert_in('.. code-block:: sh', out_text) assert_not_in('shame', out_text) # no SKIP'ed assert_not_in('#', out_text) # no comments
def test_add_subdataset(path): subds = create(opj(path, 'dir'), force=True) ds = create(path, force=True) ok_(subds.repo.dirty) ok_(ds.repo.dirty) assert_not_in('dir', ds.get_subdatasets()) # without a base dataset the next is interpreted as "add everything # in subds to subds" add(subds.path) ok_clean_git(subds.path) assert_not_in('dir', ds.get_subdatasets()) # but with a base directory we add the dataset subds as a subdataset # to ds ds.add(subds.path) assert_in('dir', ds.get_subdatasets())
def test_eval_results_plus_build_doc(): # test docs # docstring was build already: with swallow_logs(new_level=logging.DEBUG) as cml: TestUtils().__call__(1) assert_not_in("Building doc for", cml.out) # docstring accessible both ways: doc1 = Dataset.fake_command.__doc__ doc2 = TestUtils().__call__.__doc__ # docstring was built from Test_Util's definition: assert_equal(doc1, doc2) assert_in("TestUtil's fake command", doc1) assert_in("Parameters", doc1) assert_in("It's a number", doc1) # docstring also contains eval_result's parameters: assert_in("result_filter", doc1) assert_in("return_type", doc1) assert_in("list", doc1) assert_in("None", doc1) assert_in("return value behavior", doc1) assert_in("dictionary is passed", doc1) # test eval_results is able to determine the call, a method of which it is # decorating: with swallow_logs(new_level=logging.DEBUG) as cml: Dataset('/does/not/matter').fake_command(3) assert_in("Determined class of decorated function: {}" "".format(TestUtils().__class__), cml.out) # test results: result = TestUtils().__call__(2) assert_equal(len(list(result)), 2) result = Dataset('/does/not/matter').fake_command(3) assert_equal(len(list(result)), 3) # test absent side-effect of popping eval_defaults kwargs = dict(return_type='list') TestUtils().__call__(2, **kwargs) assert_equal(list(kwargs), ['return_type']) # test signature: from inspect import getargspec assert_equal(getargspec(Dataset.fake_command)[0], ['number', 'dataset']) assert_equal(getargspec(TestUtils.__call__)[0], ['number', 'dataset'])
def test_eval_results_plus_build_doc(): # test docs # docstring was build already: with swallow_logs(new_level=logging.DEBUG) as cml: TestUtils().__call__(1) assert_not_in("Building doc for", cml.out) # docstring accessible both ways: doc1 = Dataset.fake_command.__doc__ doc2 = TestUtils().__call__.__doc__ # docstring was built from Test_Util's definition: assert_equal(doc1, doc2) assert_in("TestUtil's fake command", doc1) assert_in("Parameters", doc1) assert_in("It's a number", doc1) # docstring also contains eval_result's parameters: assert_in("result_filter", doc1) assert_in("return_type", doc1) assert_in("list", doc1) assert_in("None", doc1) assert_in("return value behavior", doc1) assert_in("dictionary is passed", doc1) # test eval_results is able to determine the call, a method of which it is # decorating: with swallow_logs(new_level=logging.DEBUG) as cml: Dataset('/does/not/matter').fake_command(3) assert_in("Determined class of decorated function: {}" "".format(TestUtils().__class__), cml.out) # test results: result = TestUtils().__call__(2) assert_equal(len(list(result)), 2) result = Dataset('/does/not/matter').fake_command(3) assert_equal(len(list(result)), 3) # test absent side-effect of popping eval_defaults kwargs = dict(return_type='list') TestUtils().__call__(2, **kwargs) assert_equal(list(kwargs), ['return_type']) # test signature: from inspect import getargspec assert_equal(getargspec(Dataset.fake_command)[0], ['number', 'dataset']) assert_equal(getargspec(TestUtils.__call__)[0], ['number', 'dataset'])
def _test_binary_data(host, store, dspath): # make sure, special remote deals with binary data and doesn't # accidentally involve any decode/encode etc. dspath = Path(dspath) store = Path(store) url = "https://github.com/datalad/example-dicom-functional/blob/master/dicoms/MR.1.3.46.670589.11.38317.5.0.4476.2014042516042547586" file = "dicomfile" ds = Dataset(dspath).create() ds.download_url(url, path=file, message="Add DICOM file from github") assert_repo_status(ds.path) # set up store: io = SSHRemoteIO(host) if host else LocalIO() if host: store_url = "ria+ssh://{host}{path}".format(host=host, path=store) else: store_url = "ria+{}".format(store.as_uri()) create_store(io, store, '1') create_ds_in_store(io, store, ds.id, '2', '1') # add special remote init_opts = common_init_opts + ['url={}'.format(store_url)] ds.repo.init_remote('store', options=init_opts) # actual data transfer (both directions) # Note, that we intentionally call annex commands instead of # datalad-publish/-get here. We are testing an annex-special-remote. store_uuid = ds.siblings(name='store', return_type='item-or-list')['annex-uuid'] here_uuid = ds.siblings(name='here', return_type='item-or-list')['annex-uuid'] known_sources = ds.repo.whereis(str(file)) assert_in(here_uuid, known_sources) assert_not_in(store_uuid, known_sources) ds.repo.call_annex(['move', str(file), '--to', 'store']) known_sources = ds.repo.whereis(str(file)) assert_not_in(here_uuid, known_sources) assert_in(store_uuid, known_sources) ds.repo.call_annex(['get', str(file), '--from', 'store']) known_sources = ds.repo.whereis(str(file)) assert_in(here_uuid, known_sources) assert_in(store_uuid, known_sources)
def test_call_from_parser_result_filter(): class DummyOne(Interface): @staticmethod def __call__(**kwargs): yield kwargs # call_from_parser doesn't add result_filter to the keyword arguments assert_not_in("result_filter", DummyOne.call_from_parser(_new_args())[0]) # with dissolution of _OLD_STYLE_COMMANDS and just relying on having # @eval_results, no result_filter is added, since those commands are # not guaranteed to return/yield any record suitable for filtering. # The effect is the same -- those "common" options are not really applicable # to Interface's which do not return/yield expected records assert_not_in( "result_filter", DummyOne.call_from_parser(_new_args(common_report_type="dataset"))[0])
def test_gh1597(path): ds = Dataset(path).create() sub = ds.create('sub') res = ds.subdatasets() assert_result_count(res, 1, path=sub.path) # now modify .gitmodules with another command ds.subdatasets(contains=sub.path, set_property=[('this', 'that')]) # now modify low-level with open(op.join(ds.path, '.gitmodules'), 'a') as f: f.write('\n') assert_repo_status(ds.path, modified=['.gitmodules']) ds.save('.gitmodules') # must not come under annex mangement assert_not_in( 'key', ds.repo.annexstatus(paths=['.gitmodules']).popitem()[1])
def test_cmdline_example_to_rst(): # don't puke on nothing out = fmt.cmdline_example_to_rst(SIO('')) out.seek(0) ok_startswith(out.read(), '.. AUTO-GENERATED') out = fmt.cmdline_example_to_rst(SIO(''), ref='dummy') out.seek(0) assert_in('.. dummy:', out.read()) # full scale test out = fmt.cmdline_example_to_rst( SIO(demo_example), ref='mydemo') out.seek(0) out_text = out.read() assert_in('.. code-block:: sh', out_text) assert_not_in('shame', out_text) # no SKIP'ed assert_not_in('#', out_text) # no comments
def test_target_ssh_recursive(origin, src_path, target_path): # prepare src source = install(src_path, source=origin, recursive=True)[0] sub1 = Dataset(opj(src_path, "subm 1")) sub2 = Dataset(opj(src_path, "subm 2")) for flat in False, True: target_path_ = target_dir_tpl = target_path + "-" + str(flat) if flat: target_dir_tpl += "/%NAME" sep = '-' else: sep = os.path.sep if flat: # now that create_sibling also does fetch -- the related problem # so skipping this early raise SkipTest('TODO: Make publish work for flat datasets, it currently breaks') remote_name = 'remote-' + str(flat) # TODO: there is f.ckup with paths so assert_create fails ATM # And let's test without explicit dataset being provided with chpwd(source.path): #assert_create_sshwebserver( create_sibling( target=remote_name, sshurl="ssh://localhost" + target_path_, target_dir=target_dir_tpl, recursive=True, ui=True) # raise if git repos were not created for suffix in [sep + 'subm 1', sep + 'subm 2', '']: target_dir = opj(target_path_, basename(src_path) if flat else "").rstrip(os.path.sep) + suffix # raise if git repos were not created GitRepo(target_dir, create=False) _test_correct_publish(target_dir, rootds=not suffix, flat=flat) for repo in [source.repo, sub1.repo, sub2.repo]: assert_not_in("local_target", repo.get_remotes()) # now, push should work: publish(dataset=source, to=remote_name)
def _test_guess_dot_git(annex, path, url, tdir): repo = (AnnexRepo if annex else GitRepo)(path, create=True) repo.add('file.txt', commit=True, git=not annex) # we need to prepare to be served via http, otherwise it must fail with swallow_logs() as cml: assert_raises(GitCommandError, install, path=tdir, source=url) ok_(not exists(tdir)) Runner(cwd=path)(['git', 'update-server-info']) with swallow_logs() as cml: installed = install(tdir, source=url) assert_not_in("Failed to get annex.uuid", cml.out) eq_(realpath(installed.path), realpath(tdir)) ok_(exists(tdir)) ok_clean_git(tdir, annex=annex)
def test_gh1597_simpler(path): ds = Dataset(path).create() # same goes for .gitattributes with open(op.join(ds.path, '.gitignore'), 'a') as f: f.write('*.swp\n') ds.save('.gitignore') assert_repo_status(ds.path) # put .gitattributes in some subdir and add all, should also go into Git attrfile = op.join('subdir', '.gitattributes') ds.repo.set_gitattributes([('*', dict(mycustomthing='this'))], attrfile) assert_repo_status(ds.path, untracked=[attrfile], untracked_mode='all') ds.save() assert_repo_status(ds.path) # no annex key, not in annex assert_not_in( 'key', ds.repo.get_content_annexinfo([ut.Path(attrfile)]).popitem()[1])
def _test_guess_dot_git(annex, path, url, tdir): repo = (AnnexRepo if annex else GitRepo)(path, create=True) repo.add('file.txt', commit=True, git=not annex) # we need to prepare to be served via http, otherwise it must fail with swallow_logs() as cml: assert_raises(IncompleteResultsError, install, path=tdir, source=url) ok_(not exists(tdir)) Runner(cwd=path)(['git', 'update-server-info']) with swallow_logs() as cml: installed = install(tdir, source=url) assert_not_in("Failed to get annex.uuid", cml.out) eq_(realpath(installed.path), realpath(tdir)) ok_(exists(tdir)) ok_clean_git(tdir, annex=annex)
def check_integration1(login, keyring, path, organization=None, kwargs={}, oauthtokens=None): kwargs = kwargs.copy() if organization: kwargs['github_organization'] = organization ds = Dataset(path).create() if oauthtokens: for oauthtoken in assure_list(oauthtokens): ds.config.add('hub.oauthtoken', oauthtoken, where='local') # so we do not pick up local repo configuration/token repo_name = 'test_integration1' with chpwd(path): # ATM all the github goodness does not care about "this dataset" # so force "process wide" cfg to pick up our defined above oauthtoken cfg.reload(force=True) # everything works just nice, no conflicts etc res = ds.create_sibling_github(repo_name, **kwargs) if organization: url_fmt = 'https://{login}@github.com/{organization}/{repo_name}.git' else: url_fmt = 'https://github.com/{login}/{repo_name}.git' eq_(res, [(ds, url_fmt.format(**locals()), False)]) # but if we rerun - should kaboom since already has this sibling: with assert_raises(ValueError) as cme: ds.create_sibling_github(repo_name, **kwargs) assert_in("already has a configured sibling", str(cme.exception)) # but we can give it a new name, but it should kaboom since the remote one # exists already with assert_raises(ValueError) as cme: ds.create_sibling_github(repo_name, name="github2", **kwargs) assert_in("already exists on", str(cme.exception)) # we should not leave the broken sibling behind assert_not_in('github2', ds.repo.get_remotes()) # If we ask to reconfigure - should proceed normally ds.create_sibling_github(repo_name, existing='reconfigure', **kwargs) cfg.reload(force=True)
def test_GitRepo_ssh_push(repo_path, remote_path): from datalad import ssh_manager remote_repo = GitRepo(remote_path, create=True) url = "ssh://localhost" + op.abspath(remote_path) socket_path = op.join(ssh_manager.socket_dir, get_connection_hash('localhost')) repo = GitRepo(repo_path, create=True) repo.add_remote("ssh-remote", url) # modify local repo: repo.checkout("ssh-test", ['-b']) with open(op.join(repo.path, "ssh_testfile.dat"), "w") as f: f.write("whatever") repo.add("ssh_testfile.dat") repo.commit("ssh_testfile.dat added.") # file is not known to the remote yet: assert_not_in("ssh_testfile.dat", remote_repo.get_indexed_files()) # push changes: pushed = repo.push(remote="ssh-remote", refspec="ssh-test") # test PushInfo object for assert_in("ssh-remote/ssh-test", [commit.remote_ref.name for commit in pushed]) # the connection is known to the SSH manager, since fetch() requested it: assert_in(socket_path, ssh_manager._connections) # and socket was created: ok_(op.exists(socket_path)) # remote now knows the changes: assert_in("ssh-test", remote_repo.get_branches()) assert_in("ssh_testfile.dat", remote_repo.get_files("ssh-test")) # amend to make it require "--force": repo.commit("amended", options=['--amend']) # push without --force should yield an error: pushed = repo.push(remote="ssh-remote", refspec="ssh-test") assert_in("[rejected] (non-fast-forward)", pushed[0].summary) # now push using force: repo.push(remote="ssh-remote", refspec="ssh-test", force=True) # correct commit message in remote: assert_in("amended", list(remote_repo.get_branch_commits('ssh-test'))[-1].summary)
def test_create_osf_simple(path): ds = Dataset(path).create(force=True) ds.save() file1 = Path('ds') / "file1.txt" create_results = ds.create_sibling_osf(title="CI dl-create", name="osf-storage") assert_result_count(create_results, 2, status='ok', type='dataset') # if we got here, we created something at OSF; # make sure, we clean up afterwards try: # special remote is configured: remote_log = ds.repo.call_git( ['cat-file', 'blob', 'git-annex:remote.log']) assert_in("project={}".format(create_results[0]['id']), remote_log) # copy files over ds.repo.copy_to('.', "osf-storage") whereis = ds.repo.whereis(str(file1)) here = ds.config.get("annex.uuid") # files should be 'here' and on remote end: assert_equal(len(whereis), 2) assert_in(here, whereis) # drop content here ds.drop('.') whereis = ds.repo.whereis(str(file1)) # now on remote end only assert_equal(len(whereis), 1) assert_not_in(here, whereis) # and get content again from remote: ds.get('.') whereis = ds.repo.whereis(str(file1)) assert_equal(len(whereis), 2) assert_in(here, whereis) finally: # clean remote end: cred = get_credentials(allow_interactive=False) osf = OSF(**cred) delete_project(osf.session, create_results[0]['id'])
def check_integration1(login, keyring, path, organization=None, kwargs={}, oauthtokens=None): kwargs = kwargs.copy() if organization: kwargs['github_organization'] = organization ds = Dataset(path).create() if oauthtokens: for oauthtoken in assure_list(oauthtokens): ds.config.add('hub.oauthtoken', oauthtoken, where='local') # so we do not pick up local repo configuration/token repo_name = 'test_integration1' with chpwd(path): # ATM all the github goodness does not care about "this dataset" # so force "process wide" cfg to pick up our defined above oauthtoken cfg.reload(force=True) # everything works just nice, no conflicts etc res = ds.create_sibling_github(repo_name, **kwargs) if organization: url_fmt = 'https://{login}@github.com/{organization}/{repo_name}.git' else: url_fmt = 'https://github.com/{login}/{repo_name}.git' eq_(res, [(ds, url_fmt.format(**locals()), False)]) # but if we rerun - should kaboom since already has this sibling: with assert_raises(ValueError) as cme: ds.create_sibling_github(repo_name, **kwargs) assert_in("already has a configured sibling", str(cme.exception)) # but we can give it a new name, but it should kaboom since the remote one # exists already with assert_raises(ValueError) as cme: ds.create_sibling_github(repo_name, name="github2", **kwargs) assert_in("already exists on", str(cme.exception)) # we should not leave the broken sibling behind assert_not_in('github2', ds.repo.get_remotes()) # If we ask to reconfigure - should proceed normally ds.create_sibling_github(repo_name, existing='reconfigure', **kwargs) cfg.reload(force=True)
def test_wtf(path): # smoke test for now with swallow_outputs() as cmo: plugin(['wtf'], dataset=path) assert_not_in('Dataset information', cmo.out) assert_in('Configuration', cmo.out) with chpwd(path): with swallow_outputs() as cmo: plugin(['wtf']) assert_not_in('Dataset information', cmo.out) assert_in('Configuration', cmo.out) # now with a dataset ds = create(path) with swallow_outputs() as cmo: plugin(['wtf'], dataset=ds.path) assert_in('Configuration', cmo.out) assert_in('Dataset information', cmo.out) assert_in('path: {}'.format(ds.path), cmo.out)
def test_save_to_git(path): ds = Dataset(path).create() create_tree( ds.path, { 'file_ingit': 'file_ingit', 'file_inannex': 'file_inannex', } ) ds.repo.save(paths=['file_ingit'], git=True) ds.repo.save(paths=['file_inannex']) assert_repo_status(ds.repo) for f, p in ds.repo.annexstatus().items(): eq_(p['state'], 'clean') if f.match('*ingit'): assert_not_in('key', p, f) elif f.match('*inannex'): assert_in('key', p, f)
def test_state(path): ds = Dataset.create(path) sub = ds.create('sub') res = ds.subdatasets() assert_result_count(res, 1, path=sub.path) # by default we are not reporting any state info assert_not_in('state', res[0]) # uninstall the subdataset ds.uninstall('sub') # normale 'gone' is "absent" assert_false(sub.is_installed()) assert_result_count(ds.subdatasets(), 1, path=sub.path, state='absent') # with directory totally gone also os.rmdir(sub.path) assert_result_count(ds.subdatasets(), 1, path=sub.path, state='absent') # putting dir back, no change os.makedirs(sub.path) assert_result_count(ds.subdatasets(), 1, path=sub.path, state='absent')
def decorated_test3(ds): # we get a Dataset instance assert_is_instance(ds, Dataset) # it's a clone in a temp. location, not within the cache assert_not_in(cache_dir, ds.pathobj.parents) assert_result_count(ds.siblings(), 1, type="sibling", name="origin", url=str(cache_dir / name_in_cache)) # origin is the same cached dataset, that got this content in # decorated_test2 before. Should still be there. But "here" we # didn't request it here = ds.config.get("annex.uuid") origin = ds.config.get("remote.origin.annex-uuid") where = ds.repo.whereis(str(annexed_file)) assert_not_in(here, where) assert_in(origin, where) return ds.pathobj, ds.repo.pathobj
def test_save_to_git(path): ds = Dataset(path).create() create_tree( ds.path, { 'file_ingit': 'file_ingit', 'file_inannex': 'file_inannex', } ) ds.repo.save(paths=['file_ingit'], git=True) ds.repo.save(paths=['file_inannex']) assert_repo_status(ds.repo) for f, p in iteritems(ds.repo.annexstatus()): eq_(p['state'], 'clean') if f.match('*ingit'): assert_not_in('key', p, f) elif f.match('*inannex'): assert_in('key', p, f)
def _test_assert_Xwd_unchanged_ok_chdir(func): # Test that we are not masking out other "more important" exceptions orig_cwd = os.getcwd() orig_pwd = getpwd() @assert_cwd_unchanged(ok_to_chdir=True) def do_chdir_value_error(): func(os.pardir) return "a value" with swallow_logs() as cml: eq_(do_chdir_value_error(), "a value") eq_(orig_cwd, os.getcwd(), "assert_cwd_unchanged didn't return us back to cwd %s" % orig_cwd) eq_(orig_pwd, getpwd(), "assert_cwd_unchanged didn't return us back to cwd %s" % orig_pwd) assert_not_in("Mitigating and changing back", cml.out)
def test_dont_trip_over_missing_subds(path): ds1 = Dataset(opj(path, 'ds1')).create() ds2 = Dataset(opj(path, 'ds2')).create() subds2 = ds1.install( source=ds2.path, path='subds2', result_xfm='datasets', return_type='item-or-list') assert_true(subds2.is_installed()) assert_in('subds2', ds1.subdatasets(result_xfm='relpaths')) subds2.uninstall() assert_in('subds2', ds1.subdatasets(result_xfm='relpaths')) assert_false(subds2.is_installed()) # see if it wants to talk to github (and fail), or if it trips over something # before assert_raises(gh.BadCredentialsException, ds1.create_sibling_github, 'bogus', recursive=True, github_login='******') # inject remote config prior run assert_not_in('github', ds1.repo.get_remotes()) # fail on existing ds1.repo.add_remote('github', 'http://nothere') assert_in_results( ds1.create_sibling_github( 'bogus', recursive=True, github_login='******', on_failure='ignore'), status='error', message=('already has a configured sibling "%s"', 'github'), ) assert_in_results( ds1.create_sibling_github( 'bogus', recursive=True, github_login='******', existing='reconfigure'), status='notneeded', message=('already has a configured sibling "%s"', 'github'), ) assert_in_results( ds1.create_sibling_github( 'bogus', recursive=True, github_login='******', existing='skip',), status='notneeded', message=('already has a configured sibling "%s"', 'github'), )
def test_version(): # we just get a version if not asking for a version of some command stdout, stderr = run_main(['--version'], expect_stderr=True) eq_(stdout.rstrip(), "datalad %s" % datalad.__version__) stdout, stderr = run_main(['clone', '--version'], expect_stderr=True) ok_startswith(stdout, 'datalad %s\n' % datalad.__version__) # since https://github.com/datalad/datalad/pull/2733 no license in --version assert_not_in("Copyright", stdout) assert_not_in("Permission is hereby granted", stdout) try: import datalad_container except ImportError: pass # not installed, cannot test with extension else: stdout, stderr = run_main(['containers-list', '--version'], expect_stderr=True) eq_(stdout, 'datalad_container %s\n' % datalad_container.__version__)
def test_gh1597(path): if 'APPVEYOR' in os.environ: # issue only happens on appveyor, Python itself implodes # cannot be reproduced on a real windows box raise SkipTest('this test causes appveyor to crash, reason unknown') ds = Dataset(path).create() sub = ds.create('sub') res = ds.subdatasets() assert_result_count(res, 1, path=sub.path) # now modify .gitmodules with another command ds.subdatasets(contains=sub.path, set_property=[('this', 'that')]) # now modify low-level with open(op.join(ds.path, '.gitmodules'), 'a') as f: f.write('\n') assert_repo_status(ds.path, modified=['.gitmodules']) ds.save('.gitmodules') # must not come under annex mangement assert_not_in('key', ds.repo.annexstatus(paths=['.gitmodules']).popitem()[1])
def test_add_local_path(path, local_file): ds = Dataset(path).create() res = ds.containers_add(name="foobert", url=op.join(local_file, "foo.img")) foo_target = op.join(path, ".datalad", "environments", "foobert", "image") assert_result_count(res, 1, status="ok", type="file", path=foo_target, action="containers_add") # We've just copied and added the file. assert_not_in(ds.repo.WEB_UUID, ds.repo.whereis(foo_target)) # We can force the URL to be added. (Note: This works because datalad # overrides 'annex.security.allowed-url-schemes' in its tests.) ds.containers_add(name="barry", url=get_local_file_url(op.join(local_file, "bar.img"))) bar_target = op.join(path, ".datalad", "environments", "barry", "image") assert_in(ds.repo.WEB_UUID, ds.repo.whereis(bar_target))
def test_GitRepo_gitpy_injection(path, path2): gr = GitRepo(path, create=True) gr._GIT_COMMON_OPTIONS.extend(['test-option']) with assert_raises(GitCommandError) as cme: gr.repo.git.unknown_git_command() assert_in('test-option', exc_str(cme.exception)) # once set, these option should be persistent across git calls: with assert_raises(GitCommandError) as cme: gr.repo.git.another_unknown_git_command() assert_in('test-option', exc_str(cme.exception)) # but other repos should not be affected: gr2 = GitRepo(path2, create=True) with assert_raises(GitCommandError) as cme: gr2.repo.git.unknown_git_command() assert_not_in('test-option', exc_str(cme.exception))
def test_recurseinto(dspath, dest): # make fresh dataset hierarchy ds = make_demo_hierarchy_datasets(dspath, demo_hierarchy) ds.add('.', recursive=True) # label intermediate dataset as 'norecurseinto' res = Dataset(opj(ds.path, 'b')).subdatasets(contains='bb', set_property=[ ('datalad-recursiveinstall', 'skip') ]) assert_result_count(res, 1, path=opj(ds.path, 'b', 'bb')) ds.add('b/', recursive=True) ok_clean_git(ds.path) # recursive install, should skip the entire bb branch res = install(source=ds.path, path=dest, recursive=True, result_xfm=None, result_filter=None) assert_result_count(res, 5) assert_result_count(res, 5, type='dataset') # we got the neighbor subdataset assert_result_count(res, 1, type='dataset', path=opj(dest, 'b', 'ba')) # we did not get the one we wanted to skip assert_result_count(res, 0, type='dataset', path=opj(dest, 'b', 'bb')) assert_not_in( opj(dest, 'b', 'bb'), Dataset(dest).subdatasets(fulfilled=True, result_xfm='paths')) assert (not Dataset(opj(dest, 'b', 'bb')).is_installed()) # cleanup Dataset(dest).remove(recursive=True) assert (not lexists(dest)) # again but just clone the base, and then get content and grab 'bb' # explicitly -- must get it installed dest = install(source=ds.path, path=dest) res = dest.get(['.', opj('b', 'bb')], get_data=False, recursive=True) assert_result_count(res, 8) assert_result_count(res, 8, type='dataset') assert_result_count(res, 1, type='dataset', path=opj(dest.path, 'b', 'bb')) assert (Dataset(opj(dest.path, 'b', 'bb')).is_installed())
def test_nested_metadata(path): ds = Dataset(path).create(force=True) ds.save() ds.aggregate_metadata() # BIDS returns participant info as a nested dict for each file in the # content metadata. On the dataset-level this should automatically # yield a sequence of participant info dicts, without any further action # or BIDS-specific configuration meta = ds.metadata('.', reporton='datasets', return_type='item-or-list')['metadata'] for i in zip( sorted( meta['datalad_unique_content_properties']['bids']['subject'], key=lambda x: x['id']), sorted([{ "age(years)": "20-25", "id": "03", "gender": "female", "handedness": "r", "hearing_problems_current": "n", "language": "english" }, { "age(years)": "30-35", "id": "01", "gender": 'n/a', "handedness": "r", "hearing_problems_current": "n", "language": u"русский" }], key=lambda x: x['id'])): assert_dict_equal(i[0], i[1]) # we can turn off this kind of auto-summary ds.config.add('datalad.metadata.generate-unique-bids', 'false', where='dataset') ds.aggregate_metadata() meta = ds.metadata('.', reporton='datasets', return_type='item-or-list')['metadata'] # protect next test a little, in case we enhance our core extractor in the future # to provide more info if 'datalad_unique_content_properties' in meta: assert_not_in('bids', meta['datalad_unique_content_properties'])
def test_replace_and_relative_sshpath(src_path, dst_path): # We need to come up with the path relative to our current home directory # https://github.com/datalad/datalad/issues/1653 dst_relpath = os.path.relpath(dst_path, os.path.expanduser('~')) url = 'localhost:%s' % dst_relpath ds = Dataset(src_path).create() create_tree(ds.path, {'sub.dat': 'lots of data'}) ds.add('sub.dat') ds.create_sibling(url) published = ds.publish('.', to='localhost') assert_in('sub.dat', published[0]) # verify that hook runs and there is nothing in stderr # since it exits with 0 exit even if there was a problem out, err = Runner(cwd=opj(dst_path, '.git'))(_path_('hooks/post-update')) assert_false(out) assert_false(err) # Verify that we could replace and publish no problem # https://github.com/datalad/datalad/issues/1656 # Strangely it spits outs IncompleteResultsError exception atm... so just # checking that it fails somehow assert_raises(Exception, ds.create_sibling, url) ds.create_sibling(url, existing='replace') published2 = ds.publish('.', to='localhost') assert_in('sub.dat', published2[0]) # and one more test since in above test it would not puke ATM but just # not even try to copy since it assumes that file is already there create_tree(ds.path, {'sub2.dat': 'more data'}) ds.add('sub2.dat') published3 = ds.publish(to='localhost') # we publish just git assert_not_in('sub2.dat', published3[0]) # now publish "with" data, which should also trigger the hook! # https://github.com/datalad/datalad/issues/1658 from glob import glob from datalad.consts import WEB_META_LOG logs_prior = glob(_path_(dst_path, WEB_META_LOG, '*')) published4 = ds.publish('.', to='localhost') assert_in('sub2.dat', published4[0]) logs_post = glob(_path_(dst_path, WEB_META_LOG, '*')) eq_(len(logs_post), len(logs_prior) + 1)
def test_create_sub(path): ds = Dataset(path) ds.create() # 1. create sub and add to super: subds = ds.create(op.join("some", "what", "deeper")) ok_(isinstance(subds, Dataset)) ok_(subds.is_installed()) assert_repo_status(subds.path, annex=True) assert_in( 'submodule.some/what/deeper.datalad-id={}'.format( subds.id), list(ds.repo.call_git_items_(['config', '--file', '.gitmodules', '--list'], read_only=True)) ) # subdataset is known to superdataset: assert_in(op.join("some", "what", "deeper"), ds.subdatasets(result_xfm='relpaths')) # and was committed: assert_repo_status(ds.path) # subds finds superdataset ok_(subds.get_superdataset() == ds) # 2. create sub without adding to super: subds2 = Dataset(op.join(path, "someother")).create() ok_(isinstance(subds2, Dataset)) ok_(subds2.is_installed()) assert_repo_status(subds2.path, annex=True) # unknown to superdataset: assert_not_in("someother", ds.subdatasets(result_xfm='relpaths')) # 3. create sub via super: subds3 = ds.create("third", annex=False) ok_(isinstance(subds3, Dataset)) ok_(subds3.is_installed()) assert_repo_status(subds3.path, annex=False) assert_in("third", ds.subdatasets(result_xfm='relpaths'))
def test_crawl_s3_commit_versions(path): annex = _annex(path) # Fancier setup so we could do any of desired actions within a single sweep pipeline = [ crawl_s3('datalad-test0-versioned', strategy='commit-versions', repo=annex.repo), switch('datalad_action', { 'commit': annex.finalize(tag=True), 'remove': annex.remove, 'annex': annex, }) ] with externals_use_cassette('test_crawl_s3-pipeline1'): with swallow_logs(new_level=logging.WARN) as cml: out = run_pipeline(pipeline) assert_in("There is already a tag %s" % target_version, cml.out) # things are committed and thus stats are empty eq_(out, [{'datalad_stats': ActivityStats()}]) total_stats = out[0]['datalad_stats'].get_total() eq_(set(total_stats.versions), {target_version}) # we have a bunch of them since not uniq'ing them and they are all the same # override for easier checking total_stats.versions = [] eq_(total_stats, # Deletions come as 'files' as well atm ActivityStats(files=17, overwritten=3, downloaded=14, urls=14, add_annex=14, removed=3, downloaded_size=112)) tags = annex.repo.get_tags(output='name') assert_in(target_version, tags) # and we actually got 7 more commits for t in range(1, 8): assert_in(target_version + "+%d" % t, tags) # if we rerun -- nothing new should have been done. I.e. it is the # and ATM we can reuse the same cassette with externals_use_cassette('test_crawl_s3-pipeline1'): with swallow_logs() as cml: out = run_pipeline(pipeline) assert_not_in("There is already a tag %s" % target_version, cml.out) eq_(out, [{'datalad_stats': ActivityStats(skipped=17)}]) eq_(out[0]['datalad_stats'].get_total(), ActivityStats(skipped=17)) # Really nothing was done
def test_sidecar(path): ds = Dataset(path).create() # Simple sidecar message checks. ds.run(["touch", "dummy0"], message="sidecar arg", sidecar=True) assert_not_in('"cmd":', ds.repo.format_commit("%B")) ds.config.set("datalad.run.record-sidecar", "false", where="local") ds.run(["touch", "dummy1"], message="sidecar config") assert_in('"cmd":', ds.repo.format_commit("%B")) ds.config.set("datalad.run.record-sidecar", "true", where="local") ds.run(["touch", "dummy1"], message="sidecar config") assert_not_in('"cmd":', ds.repo.format_commit("%B")) # Don't break when config.get() returns multiple values. Here it's two # values in .gitconfig, but a more realistic scenario is a value in # $repo/.git/config that overrides a setting in ~/.config/git/config. ds.config.add("datalad.run.record-sidecar", "false", where="local") ds.run(["touch", "dummy2"], message="sidecar config") assert_in('"cmd":', ds.repo.format_commit("%B"))