def test_dont_trip_over_missing_subds(path): ds1 = Dataset(opj(path, 'ds1')).create() ds2 = Dataset(opj(path, 'ds2')).create() subds2 = ds1.install( source=ds2.path, path='subds2', result_xfm='datasets', return_type='item-or-list') assert_true(subds2.is_installed()) assert_in('subds2', ds1.subdatasets(result_xfm='relpaths')) subds2.uninstall() assert_in('subds2', ds1.subdatasets(result_xfm='relpaths')) assert_false(subds2.is_installed()) # see if it wants to talk to github (and fail), or if it trips over something # before assert_raises(gh.BadCredentialsException, ds1.create_sibling_github, 'bogus', recursive=True, github_login='******') # inject remote config prior run assert_not_in('github', ds1.repo.get_remotes()) # fail on existing ds1.repo.add_remote('github', 'http://nothere') assert_raises(ValueError, ds1.create_sibling_github, 'bogus', recursive=True, github_login='******') # talk to github when existing is OK assert_raises(gh.BadCredentialsException, ds1.create_sibling_github, 'bogus', recursive=True, github_login='******', existing='reconfigure') # return happy emptiness when all is skipped assert_equal( ds1.create_sibling_github( 'bogus', recursive=True, github_login='******', existing='skip'), [])
def test_publish_gh1691(origin, src_path, dst_path): # prepare src; no subdatasets installed, but mount points present source = install(src_path, source=origin, recursive=False) ok_(exists(opj(src_path, "subm 1"))) assert_false(Dataset(opj(src_path, "subm 1")).is_installed()) # some content modification of the superdataset create_tree(src_path, {'probe1': 'probe1'}) source.add('probe1') ok_clean_git(src_path) # create the target(s): source.create_sibling( 'ssh://localhost:' + dst_path, name='target', recursive=True) # publish recursively, which silently ignores non-installed datasets results = source.publish(to='target', recursive=True) assert_result_count(results, 1) assert_result_count(results, 1, status='ok', type='dataset', path=source.path) # if however, a non-installed subdataset is requsted explicitly, it'll fail results = source.publish(path='subm 1', to='target', on_failure='ignore') assert_result_count(results, 1, status='impossible', type='dataset', action='publish')
def test_install_known_subdataset(src, path): # get the superdataset: ds = install(path, source=src) # subdataset not installed: subds = Dataset(opj(path, 'subm 1')) assert_false(subds.is_installed()) assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # install it: ds.install('subm 1') ok_(subds.is_installed()) ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False)) # Verify that it is the correct submodule installed and not # new repository initiated eq_(set(subds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # now, get the data by reinstalling with -g: ok_(subds.repo.file_has_content('test-annex.dat') is False) with chpwd(ds.path): result = get(path='subm 1', dataset=os.curdir) assert_in_results(result, path=opj(subds.path, 'test-annex.dat')) ok_(subds.repo.file_has_content('test-annex.dat') is True) ok_(subds.is_installed())
def test_GitRepo_fetch(test_path, orig_path, clone_path): origin = GitRepo.clone(test_path, orig_path) clone = GitRepo.clone(orig_path, clone_path) filename = get_most_obscure_supported_name() origin.checkout("new_branch", ['-b']) with open(op.join(orig_path, filename), 'w') as f: f.write("New file.") origin.add(filename) origin.commit("new file added.") fetched = clone.fetch(remote='origin') # test FetchInfo list returned by fetch eq_([u'origin/' + clone.get_active_branch(), u'origin/new_branch'], [commit.name for commit in fetched]) ok_clean_git(clone.path, annex=False) assert_in("origin/new_branch", clone.get_remote_branches()) assert_in(filename, clone.get_files("origin/new_branch")) assert_false(op.exists(op.join(clone_path, filename))) # not checked out # create a remote without an URL: origin.add_remote('not-available', 'git://example.com/not/existing') origin.config.unset('remote.not-available.url', where='local') # fetch without provided URL fetched = origin.fetch('not-available') # nothing was done, nothing returned: eq_([], fetched)
def test_alter_interface_docs_for_cmdline(): alt = alter_interface_docs_for_cmdline(demo_doc) alt_l = alt.split('\n') # dedented assert_false(alt_l[0].startswith(' ')) assert_false(alt_l[-1].startswith(' ')) assert_not_in('PY', alt) assert_not_in('CMD', alt) assert_not_in('REFLOW', alt) assert_in('a b', alt) assert_in('not\n reflowed', alt) assert_in("Something for the cmdline only Multiline!", alt) # args altarg = alter_interface_docs_for_cmdline(demo_argdoc) # RST role markup eq_(alter_interface_docs_for_cmdline(':murks:`me and my buddies`'), 'me and my buddies') # spread across lines eq_(alter_interface_docs_for_cmdline(':term:`Barbara\nStreisand`'), 'Barbara\nStreisand') # multiple on one line eq_(alter_interface_docs_for_cmdline( ':term:`one` bla bla :term:`two` bla'), 'one bla bla two bla') altpd = alter_interface_docs_for_cmdline(demo_paramdoc) assert_not_in('python', altpd) assert_in('inbetween', altpd) assert_in('appended', altpd) assert_in('cmdline', altpd)
def test_notclone_known_subdataset(src, path): # get the superdataset: ds = clone(src, path, result_xfm='datasets', return_type='item-or-list') # subdataset not installed: subds = Dataset(opj(path, 'subm 1')) assert_false(subds.is_installed()) assert_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_not_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths')) # clone is not meaningful res = ds.clone('subm 1', on_failure='ignore') assert_status('error', res) assert_message('Failed to clone from any candidate source URL. ' 'Encountered errors per each url were: %s', res) # get does the job res = ds.get(path='subm 1', get_data=False) assert_status('ok', res) ok_(subds.is_installed()) ok_(AnnexRepo.is_valid_repo(subds.path, allow_noninitialized=False)) # Verify that it is the correct submodule installed and not # new repository initiated eq_(set(subds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) assert_not_in('subm 1', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) assert_in('subm 1', ds.subdatasets(fulfilled=True, result_xfm='relpaths'))
def test_alter_interface_docs_for_api(): alt = alter_interface_docs_for_api(demo_doc) alt_l = alt.split('\n') # dedented assert_false(alt_l[0].startswith(' ')) assert_false(alt_l[-1].startswith(' ')) assert_not_in('CMD', alt) assert_not_in('Command line', alt)
def test_relpath_add(path): ds = Dataset(path).create(force=True) with chpwd(opj(path, 'dir')): eq_(add('testindir')[0]['path'], opj(ds.path, 'dir', 'testindir')) # and now add all add('..') # auto-save enabled assert_false(ds.repo.dirty)
def test_bundle_invariance(path): remote_url = 'ssh://localhost' manager = SSHManager() testfile = Path(path) / 'dummy' for flag in (True, False): assert_false(testfile.exists()) ssh = manager.get_connection(remote_url, use_remote_annex_bundle=flag) ssh('cd .>{}'.format(text_type(testfile))) ok_(testfile.exists()) testfile.unlink()
def test_alter_interface_docs_for_cmdline(): alt = alter_interface_docs_for_cmdline(demo_doc) alt_l = alt.split('\n') # dedented assert_false(alt_l[0].startswith(' ')) assert_false(alt_l[-1].startswith(' ')) assert_not_in('PY', alt) assert_not_in('Python', alt) # args altarg = alter_interface_docs_for_cmdline(demo_argdoc)
def test_cred1_enter_new(): keyring = MemoryKeyring() cred = UserPassword("name", keyring=keyring) assert_false(cred.is_known) assert_equal(cred.enter_new(), None) assert_true(cred.is_known) assert_equal(keyring.get('name', 'user'), 'user1') assert_equal(keyring.get('name', 'password'), 'password1') keyring.delete('name') assert_raises(KeyError, keyring.delete, 'name', 'user') assert_raises(KeyError, keyring.delete, 'name') assert_equal(keyring.get('name', 'user'), None)
def test_install_noautoget_data(src, path): subsub_src = Dataset(opj(src, 'sub 1', 'subsub')).create(force=True) sub1_src = Dataset(opj(src, 'sub 1')).create(force=True) sub2_src = Dataset(opj(src, 'sub 2')).create(force=True) top_src = Dataset(src).create(force=True) top_src.add('.', recursive=True) # install top level: cdss = install(path, source=src, recursive=True) # there should only be datasets in the list of installed items, # and none of those should have any data for their annexed files yet for ds in cdss: assert_false(any(ds.repo.file_has_content(ds.repo.get_annexed_files())))
def check_contents(outname, prefix): with tarfile.open(outname) as tf: nfiles = 0 for ti in tf: # any annex links resolved assert_false(ti.issym()) ok_startswith(ti.name, prefix + '/') assert_equal(ti.mtime, committed_date) if '.datalad' not in ti.name: # ignore any files in .datalad for this test to not be # susceptible to changes in how much we generate a meta info nfiles += 1 # we have exactly three files, and expect no content for any directory assert_equal(nfiles, 3)
def test_run_explicit(path): ds = Dataset(path) assert_false(ds.repo.file_has_content("test-annex.dat")) create_tree(ds.path, {"dirt_untracked": "untracked", "dirt_modified": "modified"}) ds.save("dirt_modified", to_git=True) with open(op.join(path, "dirt_modified"), "a") as ofh: ofh.write(", more") # We need explicit=True to run with dirty repo. assert_status("impossible", ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], on_failure="ignore")) hexsha_initial = ds.repo.get_hexsha() # If we specify test-annex.dat as an input, it will be retrieved before the # run. ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], explicit=True) ok_(ds.repo.file_has_content("test-annex.dat")) # We didn't commit anything because outputs weren't specified. assert_false(ds.repo.file_has_content("doubled.dat")) eq_(hexsha_initial, ds.repo.get_hexsha()) # If an input doesn't exist, we just show the standard warning. with swallow_logs(new_level=logging.WARN) as cml: with swallow_outputs(): ds.run("ls", inputs=["not-there"], explicit=True) assert_in("Input does not exist: ", cml.out) remove(op.join(path, "doubled.dat")) hexsha_initial = ds.repo.get_hexsha() ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"], outputs=["doubled.dat"], explicit=True) ok_(ds.repo.file_has_content("doubled.dat")) assert_repo_status(ds.path, modified=["dirt_modified"], untracked=['dirt_untracked']) neq_(hexsha_initial, ds.repo.get_hexsha()) # Saving explicit outputs works from subdirectories. subdir = op.join(path, "subdir") mkdir(subdir) with chpwd(subdir): run("echo insubdir >foo", explicit=True, outputs=["foo"]) ok_(ds.repo.file_has_content(op.join("subdir", "foo")))
def test_alter_interface_docs_for_api(): alt = alter_interface_docs_for_api(demo_doc) alt_l = alt.split('\n') # dedented assert_false(alt_l[0].startswith(' ')) assert_false(alt_l[-1].startswith(' ')) assert_not_in('CMD', alt) assert_not_in('PY', alt) assert_not_in('REFLOW', alt) assert_in("Some Python-only bits Multiline!", alt) altpd = alter_interface_docs_for_api(demo_paramdoc) assert_in('python', altpd) assert_in('inbetween', altpd) assert_in('appended', altpd) assert_not_in('cmdline', altpd)
def test_check_dates(path): skip_if_no_module("dateutil") ref_ts = 1218182889 # Fri, 08 Aug 2008 04:08:09 -0400 refdate = "@{}".format(ref_ts) repo = os.path.join(path, "repo") with set_date(ref_ts + 5000): ar = AnnexRepo(repo) ar.add(".") ar.commit() # The standard renderer outputs json. with swallow_outputs() as cmo: # Set level to WARNING to avoid the progress bar when # DATALAD_TESTS_UI_BACKEND=console. with swallow_logs(new_level=logging.WARNING): check_dates([repo], reference_date=refdate, return_type="list") assert_in("report", json.loads(cmo.out).keys()) # We find the newer objects. newer = call([path], reference_date=refdate) eq_(len(newer), 1) ok_(newer[0]["report"]["objects"]) # There are no older objects to find. older = call([repo], reference_date=refdate, older=True) assert_false(older[0]["report"]["objects"]) # We can pass the date in RFC 2822 format. assert_dict_equal( newer[0], call([path], reference_date="08 Aug 2008 04:08:09 -0400")[0]) # paths=None defaults to the current directory. with chpwd(path): assert_dict_equal( newer[0]["report"], call(paths=None, reference_date=refdate)[0]["report"]) # Only commit type is present when annex='none'. newer_noannex = call([path], reference_date=refdate, annex="none") for entry in newer_noannex[0]["report"]["objects"].values(): ok_(entry["type"] == "commit")
def test_save(path): ds = Dataset(path) with open(opj(path, "new_file.tst"), "w") as f: f.write("something") ds.repo.add("new_file.tst", git=True) ok_(ds.repo.dirty) ds.save("add a new file", all_changes=False) ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo)) with open(opj(path, "new_file.tst"), "w") as f: f.write("modify") ok_(ds.repo.dirty) ds.save("modified new_file.tst", all_changes=True) ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo)) files = ['one.txt', 'two.txt'] for fn in files: with open(opj(path, fn), "w") as f: f.write(fn) ds.add([opj(path, f) for f in files]) # superfluous call to save (add saved it already), should not fail # but report that nothing was saved assert_false(ds.save("set of new files")) ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo)) # create subdataset subds = ds.create('subds') ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo)) # modify subds with open(opj(subds.path, "some_file.tst"), "w") as f: f.write("something") subds.add('.') ok_clean_git(subds.path, annex=isinstance(ds.repo, AnnexRepo)) ok_(ds.repo.dirty) # ensure modified subds is committed ds.save(all_changes=True) ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))
def test_new_or_modified(path): def get_new_or_modified(*args, **kwargs): return [relpath(ap["path"], path) for ap in new_or_modified(diff_revision(*args, **kwargs))] ds = Dataset(path).create(force=True, no_annex=True) # Check out an orphan branch so that we can test the "one commit # in a repo" case. ds.repo.checkout("orph", options=["--orphan"]) ds.add(".") assert_false(ds.repo.dirty) assert_result_count(ds.repo.repo.git.rev_list("HEAD").split(), 1) # Diffing doesn't fail when the branch contains a single commit. assert_in("to_modify", get_new_or_modified(ds, "HEAD")) # New files are detected, deletions are not. ds.repo.remove(["to_remove"]) ok_(ds.repo.dirty) with open(opj(path, "to_add"), "w") as f: f.write("content5") ds.repo.add(["to_add"]) ds.repo.commit("add one, remove another") eq_(get_new_or_modified(ds, "HEAD"), ["to_add"]) # Modifications are detected. with open(opj(path, "to_modify"), "w") as f: f.write("updated 1") with open(opj(path, "d/to_modify"), "w") as f: f.write("updated 2") ds.add(["to_modify", "d/to_modify"]) eq_(set(get_new_or_modified(ds, "HEAD")), {"to_modify", "d/to_modify"}) # Non-HEAD revisions work. ds.repo.commit("empty", options=["--allow-empty"]) assert_false(get_new_or_modified(ds, "HEAD")) eq_(set(get_new_or_modified(ds, "HEAD~")), {"to_modify", "d/to_modify"})
def test_state(path): ds = Dataset.create(path) sub = ds.create('sub') res = ds.subdatasets() assert_result_count(res, 1, path=sub.path) # by default we are not reporting any state info assert_not_in('state', res[0]) # uninstall the subdataset ds.uninstall('sub') # normale 'gone' is "absent" assert_false(sub.is_installed()) assert_result_count( ds.subdatasets(), 1, path=sub.path, state='absent') # with directory totally gone also os.rmdir(sub.path) assert_result_count( ds.subdatasets(), 1, path=sub.path, state='absent') # putting dir back, no change os.makedirs(sub.path) assert_result_count( ds.subdatasets(), 1, path=sub.path, state='absent')
def test_GitRepo_instance_from_not_existing(path, path2): # 1. create=False and path doesn't exist: assert_raises(NoSuchPathError, GitRepo, path, create=False) assert_false(op.exists(path)) # 2. create=False, path exists, but no git repo: os.mkdir(path) ok_(op.exists(path)) assert_raises(InvalidGitRepositoryError, GitRepo, path, create=False) assert_false(op.exists(op.join(path, '.git'))) # 3. create=True, path doesn't exist: gr = GitRepo(path2, create=True) assert_is_instance(gr, GitRepo, "GitRepo was not created.") ok_(op.exists(op.join(path2, '.git'))) ok_clean_git(path2, annex=False) # 4. create=True, path exists, but no git repo: gr = GitRepo(path, create=True) assert_is_instance(gr, GitRepo, "GitRepo was not created.") ok_(op.exists(op.join(path, '.git'))) ok_clean_git(path, annex=False)
def test_run_failure(path): ds = Dataset(path).create() subds = ds.create("sub") hexsha_initial = ds.repo.get_hexsha() with assert_raises(CommandError): ds.run("echo x$(cat sub/grows) > sub/grows && false") eq_(hexsha_initial, ds.repo.get_hexsha()) ok_(ds.repo.dirty) msgfile = opj(path, ds.repo.get_git_dir(ds.repo), "COMMIT_EDITMSG") ok_exists(msgfile) ds.add(".", recursive=True, message_file=msgfile) ok_clean_git(ds.path) neq_(hexsha_initial, ds.repo.get_hexsha()) outfile = opj(subds.path, "grows") eq_('x\n', open(outfile).read()) # There is no CommandError on rerun if the non-zero error matches the # original code. ds.rerun() eq_('xx\n', open(outfile).read()) # On the other hand, we fail if we rerun a command and there is a non-zero # error that doesn't match. ds.run("[ ! -e bar ] && echo c >bar") ok_clean_git(ds.path) with assert_raises(CommandError): ds.rerun() # We don't show instructions if the caller specified us not to save. remove(msgfile) with assert_raises(CommandError): ds.run("false", explicit=True, outputs=None) assert_false(op.exists(msgfile))
def test_basics(path, super_path): ds = Dataset(path).create(force=True) ds.run_procedure('setup_yoda_dataset') ok_clean_git(ds.path) assert_false(ds.repo.is_under_annex("README.md")) # configure dataset to look for procedures in its code folder ds.config.add( 'datalad.locations.dataset-procedures', 'code', where='dataset') # commit this procedure config for later use in a clone: ds.add(op.join('.datalad', 'config')) # configure dataset to run the demo procedure prior to the clean command ds.config.add( 'datalad.clean.proc-pre', 'datalad_test_proc', where='dataset') # run command that should trigger the demo procedure ds.clean() # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'hello\n') ok_clean_git(ds.path, index_modified=[op.join('.datalad', 'config')]) # make a fresh dataset: super = Dataset(super_path).create() # configure dataset to run the demo procedure prior to the clean command super.config.add( 'datalad.clean.proc-pre', 'datalad_test_proc', where='dataset') # 'super' doesn't know any procedures but should get to know one by # installing the above as a subdataset super.install('sub', source=ds.path) # run command that should trigger the demo procedure super.clean() # look for traces ok_file_has_content(op.join(super.path, 'fromproc.txt'), 'hello\n') ok_clean_git(super.path, index_modified=[op.join('.datalad', 'config')])
def test_cred1_enter_new(): keyring = MemoryKeyring() cred = UserPassword("name", keyring=keyring) assert_false(cred.is_known) assert_equal(cred.enter_new(), None) assert_true(cred.is_known) assert_equal(keyring.get('name', 'user'), 'user1') assert_equal(keyring.get('name', 'password'), 'password1') keyring.delete('name') assert_raises(KeyError, keyring.delete, 'name', 'user') assert_raises(KeyError, keyring.delete, 'name') assert_equal(keyring.get('name', 'user'), None) # Test it blowing up if we provide unknown field with assert_raises(ValueError) as cme: cred.enter_new(username='******') assert_in('field(s): username. Known but not specified: password, user', str(cme.exception)) # Test that if user is provided, it is not asked cred.enter_new(user='******') assert_equal(keyring.get('name', 'user'), 'user2') assert_equal(keyring.get('name', 'password'), 'newpassword')
def test_credentials_from_env(): keyring = Keyring() cred = AWS_S3("test-s3", keyring=keyring) assert_false(cred.is_known) assert_equal(cred.get('key_id'), None) assert_equal(cred.get('secret_id'), None) with patch.dict('os.environ', {'DATALAD_test_s3_key_id': '1'}): assert_equal(cred.get('key_id'), '1') assert_false(cred.is_known) with patch.dict('os.environ', {'DATALAD_test_s3_secret_id': '2'}): assert_equal(cred.get('key_id'), '1') assert_equal(cred.get('secret_id'), '2') assert_true(cred.is_known) assert_false(cred.is_known) # no memory of the past
def test_add_archive_dirs(path_orig, url, repo_path): # change to repo_path with chpwd(repo_path): # create annex repo repo = AnnexRepo(repo_path, create=True) # add archive to the repo so we could test with swallow_outputs(): repo.add_urls([opj(url, '1.tar.gz')], options=["--pathdepth", "-1"]) repo.commit("added 1.tar.gz") # test with excludes and annex options add_archive_content( '1.tar.gz', existing='archive-suffix', # Since inconsistent and seems in many cases no leading dirs to strip, keep them as provided strip_leading_dirs=True, delete=True, leading_dirs_consider=['crcns.*', '1'], leading_dirs_depth=2, use_current_dir=False, exclude='.*__MACOSX.*') # some junk penetrates if external_versions['cmd:annex'] >= '6.20170208': # should have fixed remotes eq_( repo.get_description( uuid=DATALAD_SPECIAL_REMOTES_UUIDS[ARCHIVES_SPECIAL_REMOTE] ), '[%s]' % ARCHIVES_SPECIAL_REMOTE) all_files = sorted(find_files('.')) target_files = { './CR24A/behaving1/1 f.txt', './CR24C/behaving3/3 f.txt', './CR24D/behaving2/2 f.txt', } eq_(set(all_files), target_files) # regression test: the subdir in MACOSX wasn't excluded and its name was getting stripped by leading_dir_len assert_false(exists( '__MACOSX')) # if stripping and exclude didn't work this fails assert_false( exists('c-1_data') ) # if exclude doesn't work then name of subdir gets stripped by leading_dir_len assert_false( exists('CR24B') ) # if exclude doesn't work but everything else works this fails
def test_ArchivesCache(): # we don't actually need to test archives handling itself path1 = "/zuba/duba" path2 = "/zuba/duba2" # should not be able to create a persistent cache without topdir assert_raises(ValueError, ArchivesCache, persistent=True) cache = ArchivesCache() # by default -- non persistent archive1_path = op.join(path1, fn_archive_obscure_ext) archive2_path = op.join(path2, fn_archive_obscure_ext) cached_archive1_path = cache[archive1_path].path assert_false(cache[archive1_path].path == cache[archive2_path].path) assert_true(cache[archive1_path] is cache[archive1_path]) cache.clean() assert_false(op.exists(cached_archive1_path)) assert_false(op.exists(cache.path)) # test del cache = ArchivesCache() # by default -- non persistent assert_true(op.exists(cache.path)) cache_path = cache.path del cache assert_false(op.exists(cache_path))
def test_func_to_node(): int_node = func_to_node(int) # node which requires nothing and nothing of output is used assert int_node.__doc__ in_dict = {'in': 1} ok_generator(int_node(in_dict)) # xrange is not considered to be a generator def xrange_(n, offset=0): for x in range(offset, offset + n): yield x xrange_node = func_to_node(xrange_, data_args='in', outputs='out') assert_in('assigned to out', xrange_node.__doc__) assert_false('Additional keyword arguments' in xrange_node.__doc__) range_node_gen = xrange_node(in_dict) ok_generator(range_node_gen) assert_equal(list(range_node_gen), [{'in': 1, 'out': 0}]) # with additional kwargs xrange_node = func_to_node(xrange_, data_args='in', outputs='out', kwargs={'offset': 10}) assert_in('assigned to out', xrange_node.__doc__) assert_in('Additional keyword arguments', xrange_node.__doc__) range_node_gen = xrange_node(in_dict) ok_generator(range_node_gen) assert_equal(list(range_node_gen), [{'in': 1, 'out': 10}]) # testing func_node data = {'offset': 5, 'in': 1} xrange_node = func_to_node(xrange_, data_args='in', data_kwargs=['offset'], outputs='out') assert_in('assigned to out', xrange_node.__doc__) assert_false('Additional keyword arguments' in xrange_node.__doc__) gen = xrange_node(data) ok_generator(gen) assert_equal(list(gen), [{'offset': 5, 'out': 5, 'in': 1}]) # with multiple outputs def split_(s, num): yield s.split('/', num) data = {'num': 3, 'in': 'datalad/crawler/nodes'} split_node = func_to_node(split_, data_args='in', data_kwargs=['num'], outputs=['a', 'b', 'c']) assert_in('assigned to a, b, c', split_node.__doc__) assert_false('Additional keyword arguments' in split_node.__doc__) split_node_gen = split_node(data) assert_equal(list(split_node_gen), [{'a': 'datalad', 'c': 'nodes', 'b': 'crawler', 'num': 3, 'in': 'datalad/crawler/nodes'}])
def test_reobtain_data(originpath, destpath): origin = Dataset(originpath).create() ds = install(source=originpath, path=destpath, result_xfm='datasets', return_type='item-or-list') # no harm assert_result_count(ds.update(merge=True, reobtain_data=True), 1, action="update", status="ok") # content create_tree(origin.path, {'load.dat': 'heavy'}) origin.save(opj(origin.path, 'load.dat')) # update does not bring data automatically assert_result_count(ds.update(merge=True, reobtain_data=True), 1, action="update", status="ok") assert_in('load.dat', ds.repo.get_annexed_files()) assert_false(ds.repo.file_has_content('load.dat')) # now get data ds.get('load.dat') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') # new content at origin create_tree(origin.path, {'novel': 'but boring'}) origin.save() # update must not bring in data for new file result = ds.update(merge=True, reobtain_data=True) assert_in_results(result, action='get', status='notneeded') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') assert_in('novel', ds.repo.get_annexed_files()) assert_false(ds.repo.file_has_content('novel')) # modify content at origin os.remove(opj(origin.path, 'load.dat')) create_tree(origin.path, {'load.dat': 'light'}) origin.save() # update must update file with existing data, but leave empty one alone res = ds.update(merge=True, reobtain_data=True) assert_result_count(res, 1, status='ok', type='dataset', action='update') assert_result_count(res, 1, status='ok', type='file', action='get') ok_file_has_content(opj(ds.path, 'load.dat'), 'light') assert_false(ds.repo.file_has_content('novel'))
def test_property_reevaluation(repo1): ds = Dataset(repo1) assert_is_none(ds.repo) assert_is_not_none(ds.config) first_config = ds.config assert_false(ds._cfg_bound) assert_is_none(ds.id) ds.create() assert_repo_status(repo1) # after creation, we have `repo`, and `config` was reevaluated to point # to the repo's config: assert_is_not_none(ds.repo) assert_is_not_none(ds.config) second_config = ds.config assert_true(ds._cfg_bound) assert_is(ds.config, ds.repo.config) assert_is_not(first_config, second_config) assert_is_not_none(ds.id) first_id = ds.id ds.remove() # repo is gone, and config is again reevaluated to only provide user/system # level config: assert_false(lexists(ds.path)) assert_is_none(ds.repo) assert_is_not_none(ds.config) third_config = ds.config assert_false(ds._cfg_bound) assert_is_not(second_config, third_config) assert_is_none(ds.id) ds.create() assert_repo_status(repo1) # after recreation everything is sane again: assert_is_not_none(ds.repo) assert_is_not_none(ds.config) assert_is(ds.config, ds.repo.config) forth_config = ds.config assert_true(ds._cfg_bound) assert_is_not(third_config, forth_config) assert_is_not_none(ds.id) assert_not_equal(ds.id, first_id)
def test_is_installed(src, path): ds = Dataset(path) assert_false(ds.is_installed()) # get a clone: AnnexRepo.clone(src, path) ok_(ds.is_installed()) # submodule still not installed: subds = Dataset(opj(path, 'subm 1')) assert_false(subds.is_installed()) # We must not be able to create a new repository under a known # subdataset path. # Note: Unfortunately we would still be able to generate it under # subdirectory within submodule, e.g. `subm 1/subdir` but that is # not checked here. `create` provides that protection though. res = subds.create(on_failure='ignore', return_type='list', result_filter=None, result_xfm=None) assert_result_count(res, 1) assert_result_count(res, 1, status='error', path=subds.path, message=('collision with %s (dataset) in dataset %s', subds.path, ds.path)) # get the submodule # This would init so there is a .git file with symlink info, which is # as we agreed is more pain than gain, so let's use our install which would # do it right, after all we are checking 'is_installed' ;) # from datalad.cmd import Runner # Runner().run(['git', 'submodule', 'update', '--init', 'subm 1'], cwd=path) with chpwd(path): get('subm 1') ok_(subds.is_installed()) # wipe it out rmtree(ds.path) assert_false(ds.is_installed())
def test_credentials_from_env(): keyring = Keyring() cred = AWS_S3("test-s3", keyring=keyring) assert_false(cred.is_known) assert_equal(cred.get('key_id'), None) assert_equal(cred.get('secret_id'), None) def _check1(): assert_equal(cred.get('key_id'), '1') assert_false(cred.is_known) def _check2(): assert_equal(cred.get('key_id'), '1') assert_equal(cred.get('secret_id'), '2') assert_true(cred.is_known) # this is the old way, should still work with patch.dict('os.environ', {'DATALAD_test_s3_key_id': '1'}): _check1() with patch.dict('os.environ', {'DATALAD_test_s3_secret_id': '2'}): _check2() assert_false(cred.is_known) # no memory of the past # here is the new way import datalad try: with patch.dict('os.environ', {'DATALAD_CREDENTIAL_test__s3_key__id': '1'}): datalad.cfg.reload() _check1() with patch.dict('os.environ', {'DATALAD_CREDENTIAL_test__s3_secret__id': '2'}): datalad.cfg.reload() _check2() datalad.cfg.reload() assert_false(cred.is_known) # no memory of the past finally: datalad.cfg.reload()
def test_dedent(): assert_false(dedent_docstring("one liner").endswith("\n"))
def test_check_dates_empty_repo(path): assert_false(check_dates(GitRepo(path, create=True))["objects"])
def check(a, kw=False): assert_false('http_proxy' in os.environ) assert_false('https_proxy' in os.environ) assert_in(kw, [False, 'custom'])
def test_is_legal_metafield(): for legal in ["legal", "0", "legal_"]: assert_true(au.is_legal_metafield(legal)) for notlegal in ["_not", "with space"]: assert_false(au.is_legal_metafield(notlegal))
def test_publish_depends( origin, src_path, target1_path, target2_path, target3_path): # prepare src source = install(src_path, source=origin, recursive=True) source.repo.get('test-annex.dat') # pollute config depvar = 'remote.target2.datalad-publish-depends' # TODO next line would require `add_sibling` to be called with force # see gh-1235 #source.config.add(depvar, 'stupid', where='local') #eq_(source.config.get(depvar, None), 'stupid') # two remote sibling on two "different" hosts source.create_sibling( 'ssh://localhost' + target1_path, annex_wanted='standard', annex_group='backup', name='target1') # fails with unknown remote res = source.create_sibling( 'ssh://datalad-test' + target2_path, name='target2', existing='reconfigure', # because 'target2' is known in polluted cfg publish_depends='bogus', on_failure='ignore') assert_result_count( res, 1, status='error', message=( 'unknown sibling(s) specified as publication dependency: %s', set(['bogus']))) # for real source.create_sibling( 'ssh://datalad-test' + target2_path, name='target2', existing='reconfigure', # because 'target2' is known in polluted cfg annex_wanted='standard', annex_group='backup', publish_depends='target1') # wiped out previous dependencies eq_(source.config.get(depvar, None), 'target1') # and one more remote, on the same host but associated with a dependency source.create_sibling( 'ssh://datalad-test' + target3_path, name='target3') ok_clean_git(src_path) # introduce change in source create_tree(src_path, {'probe1': 'probe1'}) source.add('probe1') ok_clean_git(src_path) # only the source has the probe ok_file_has_content(opj(src_path, 'probe1'), 'probe1') for p in (target1_path, target2_path, target3_path): assert_false(lexists(opj(p, 'probe1'))) # publish to a standalone remote source.publish(to='target3') ok_(lexists(opj(target3_path, 'probe1'))) # but it has no data copied target3 = Dataset(target3_path) nok_(target3.repo.file_has_content('probe1')) # but if we publish specifying its path, it gets copied source.publish('probe1', to='target3') ok_file_has_content(opj(target3_path, 'probe1'), 'probe1') # no others are affected in either case for p in (target1_path, target2_path): assert_false(lexists(opj(p, 'probe1'))) # publish to all remaining, but via a dependency source.publish(to='target2') for p in (target1_path, target2_path, target3_path): ok_file_has_content(opj(p, 'probe1'), 'probe1')
def test_is_legal_metafield(): for legal in ["legal", "0", "legal_"]: assert_true(au.is_legal_metafield(legal)) for notlegal in ["_not", "with space"]: assert_false(au.is_legal_metafield(notlegal))
def test_get_content_info_dotgit(path): ds = Dataset(path).create() # Files in .git/ won't be reported, though this takes a kludge on our side # before Git 2.25. assert_false(ds.repo.get_content_info(paths=[op.join(".git", "config")]))
def check_replace_and_relative_sshpath(use_ssh, src_path, dst_path): # We need to come up with the path relative to our current home directory # https://github.com/datalad/datalad/issues/1653 # but because we override HOME the HOME on the remote end would be # different even though a datalad-test. So we need to query it if use_ssh: from datalad import ssh_manager ssh = ssh_manager.get_connection('datalad-test') remote_home, err = ssh('pwd') remote_home = remote_home.rstrip('\n') dst_relpath = os.path.relpath(dst_path, remote_home) url = 'datalad-test:%s' % dst_relpath sibname = 'datalad-test' else: url = dst_path sibname = 'local' ds = Dataset(src_path).create() create_tree(ds.path, {'sub.dat': 'lots of data'}) ds.save('sub.dat') try: res = ds.create_sibling(url, ui=have_webui()) except UnicodeDecodeError: if sys.version_info < (3, 7): # observed test failing on ubuntu 18.04 with python 3.6 # (reproduced in conda env locally with python 3.6.10 when LANG=C # We will just skip this tricky one raise SkipTest("Known failure") raise assert_in_results(res, action="create_sibling", sibling_name=sibname) published = ds.publish(to=sibname, transfer_data='all') assert_result_count(published, 1, path=opj(ds.path, 'sub.dat')) if have_webui(): # verify that hook runs and there is nothing in stderr # since it exits with 0 exit even if there was a problem out = Runner(cwd=opj(dst_path, '.git')).run( [_path_('hooks/post-update')], protocol=StdOutErrCapture) assert_false(out['stdout']) assert_false(out['stderr']) # Verify that we could replace and publish no problem # https://github.com/datalad/datalad/issues/1656 # Strangely it spits outs IncompleteResultsError exception atm... so just # checking that it fails somehow res = ds.create_sibling(url, on_failure='ignore') assert_status('error', res) assert_in('already configured', res[0]['message'][0]) # "Settings" such as UI do not persist, so we specify it again # for the test below depending on it with assert_raises(RuntimeError): # but we cannot replace in non-interactive mode ds.create_sibling(url, existing='replace', ui=have_webui()) # We don't have context manager like @with_testsui, so @with_testsui(responses=["yes"]) def interactive_create_sibling(): ds.create_sibling(url, existing='replace', ui=have_webui()) interactive_create_sibling() published2 = ds.publish(to=sibname, transfer_data='all') assert_result_count(published2, 1, path=opj(ds.path, 'sub.dat')) # and one more test since in above test it would not puke ATM but just # not even try to copy since it assumes that file is already there create_tree(ds.path, {'sub2.dat': 'more data'}) ds.save('sub2.dat') published3 = ds.publish(to=sibname, transfer_data='none') # we publish just git assert_result_count(published3, 0, path=opj(ds.path, 'sub2.dat')) if not have_webui(): return # now publish "with" data, which should also trigger the hook! # https://github.com/datalad/datalad/issues/1658 from glob import glob from datalad.consts import WEB_META_LOG logs_prior = glob(_path_(dst_path, WEB_META_LOG, '*')) published4 = ds.publish(to=sibname, transfer_data='all') assert_result_count(published4, 1, path=opj(ds.path, 'sub2.dat')) logs_post = glob(_path_(dst_path, WEB_META_LOG, '*')) eq_(len(logs_post), len(logs_prior) + 1) assert_postupdate_hooks(dst_path)
def test_dedent(): assert_false(dedent_docstring("one liner").endswith("\n"))
def test_external_version_contains(): ev = ExternalVersions() assert_true("datalad" in ev) assert_false("does not exist" in ev)
def test_container_from_subdataset(ds_path, src_subds_path, local_file): # prepare a to-be subdataset with a registered container src_subds = Dataset(src_subds_path).create() src_subds.containers_add(name="first", url=get_local_file_url( op.join(local_file, 'some_container.img'))) # add it as subdataset to a super ds: ds = Dataset(ds_path).create() subds = ds.install("sub", source=src_subds_path) # add it again one level down to see actual recursion: subds.install("subsub", source=src_subds_path) # We come up empty without recursive: res = ds.containers_list(recursive=False, **RAW_KWDS) assert_result_count(res, 0) # query available containers from within super: res = ds.containers_list(recursive=True, **RAW_KWDS) assert_result_count(res, 2) assert_in_results(res, action="containers", refds=ds.path) # default location within the subdataset: target_path = op.join(subds.path, '.datalad', 'environments', 'first', 'image') assert_result_count(res, 1, name='sub/first', type='file', action='containers', status='ok', path=target_path, parentds=subds.path) # not installed subdataset doesn't pose an issue: sub2 = ds.create("sub2") assert_result_count(ds.subdatasets(), 2, type="dataset") ds.uninstall("sub2") from datalad.tests.utils import assert_false assert_false(sub2.is_installed()) # same results as before, not crashing or somehow confused by a not present # subds: res = ds.containers_list(recursive=True, **RAW_KWDS) assert_result_count(res, 2) assert_result_count(res, 1, name='sub/first', type='file', action='containers', status='ok', path=target_path, parentds=subds.path) # The default renderer includes the image names. with swallow_outputs() as out: ds.containers_list(recursive=True) lines = out.out.splitlines() assert_re_in("sub/first", lines) assert_re_in("sub/subsub/first", lines) # But we are careful not to render partial names from subdataset traversals # (i.e. we recurse with containers_list(..., result_renderer=None)). with assert_raises(AssertionError): assert_re_in("subsub/first", lines)
def test_something(path, new_home): # will refuse to work on dataset without a dataset assert_raises(ValueError, ConfigManager, source='dataset') # now read the example config cfg = ConfigManager(Dataset(opj(path, 'ds')), source='dataset') assert_equal(len(cfg), 5) assert_in('something.user', cfg) # multi-value assert_equal(len(cfg['something.user']), 2) assert_equal(cfg['something.user'], ('name=Jane Doe', '[email protected]')) assert_true(cfg.has_section('something')) assert_false(cfg.has_section('somethingelse')) assert_equal(sorted(cfg.sections()), [u'onemore.complicated の beast with.dot', 'something']) assert_true(cfg.has_option('something', 'user')) assert_false(cfg.has_option('something', 'us?er')) assert_false(cfg.has_option('some?thing', 'user')) assert_equal(sorted(cfg.options('something')), ['empty', 'myint', 'novalue', 'user']) assert_equal(cfg.options(u'onemore.complicated の beast with.dot'), ['findme']) assert_equal(sorted(cfg.items()), [(u'onemore.complicated の beast with.dot.findme', '5.0'), ('something.empty', ''), ('something.myint', '3'), ('something.novalue', None), ('something.user', ('name=Jane Doe', '[email protected]'))]) assert_equal(sorted(cfg.items('something')), [('something.empty', ''), ('something.myint', '3'), ('something.novalue', None), ('something.user', ('name=Jane Doe', '[email protected]'))]) # always get all values assert_equal(cfg.get('something.user'), ('name=Jane Doe', '[email protected]')) assert_raises(KeyError, cfg.__getitem__, 'somedthing.user') assert_equal( cfg.getfloat(u'onemore.complicated の beast with.dot', 'findme'), 5.0) assert_equal(cfg.getint('something', 'myint'), 3) assert_equal(cfg.getbool('something', 'myint'), True) # git demands a key without value at all to be used as a flag, thus True assert_equal(cfg.getbool('something', 'novalue'), True) assert_equal(cfg.get('something.novalue'), None) # empty value is False assert_equal(cfg.getbool('something', 'empty'), False) assert_equal(cfg.get('something.empty'), '') assert_equal(cfg.getbool('doesnot', 'exist', default=True), True) assert_raises(TypeError, cfg.getbool, 'something', 'user') # gitpython-style access assert_equal(cfg.get('something.myint'), cfg.get_value('something', 'myint')) assert_equal(cfg.get_value('doesnot', 'exist', default='oohaaa'), 'oohaaa') # weired, but that is how it is assert_raises(KeyError, cfg.get_value, 'doesnot', 'exist', default=None) # modification follows cfg.add('something.new', 'の') assert_equal(cfg.get('something.new'), u'の') # sections are added on demand cfg.add('unheard.of', 'fame') assert_true(cfg.has_section('unheard.of')) comp = cfg.items('something') cfg.rename_section('something', 'this') assert_true(cfg.has_section('this')) assert_false(cfg.has_section('something')) # direct comparision would fail, because of section prefix assert_equal(len(cfg.items('this')), len(comp)) # fail if no such section with swallow_logs(): assert_raises(CommandError, cfg.rename_section, 'nothere', 'irrelevant') assert_true(cfg.has_option('this', 'myint')) cfg.unset('this.myint') assert_false(cfg.has_option('this', 'myint')) # batch a changes cfg.add('mike.wants.to', 'know', reload=False) assert_false('mike.wants.to' in cfg) cfg.add('mike.wants.to', 'eat') assert_true('mike.wants.to' in cfg) assert_equal(len(cfg['mike.wants.to']), 2) # set a new one: cfg.set('mike.should.have', 'known') assert_in('mike.should.have', cfg) assert_equal(cfg['mike.should.have'], 'known') # set an existing one: cfg.set('mike.should.have', 'known better') assert_equal(cfg['mike.should.have'], 'known better') # set, while there are several matching ones already: cfg.add('mike.should.have', 'a meal') assert_equal(len(cfg['mike.should.have']), 2) # raises with force=False assert_raises(CommandError, cfg.set, 'mike.should.have', 'a beer', force=False) assert_equal(len(cfg['mike.should.have']), 2) # replaces all matching ones with force=True cfg.set('mike.should.have', 'a beer', force=True) assert_equal(cfg['mike.should.have'], 'a beer') # fails unknown location assert_raises(ValueError, cfg.add, 'somesuch', 'shit', where='umpalumpa') # very carefully test non-local config # so carefully that even in case of bad weather Yarik doesn't find some # lame datalad unittest sections in his precious ~/.gitconfig with patch.dict('os.environ', { 'HOME': new_home, 'DATALAD_SNEAKY_ADDITION': 'ignore' }): global_gitconfig = opj(new_home, '.gitconfig') assert (not exists(global_gitconfig)) globalcfg = ConfigManager() assert_not_in('datalad.unittest.youcan', globalcfg) assert_in('datalad.sneaky.addition', globalcfg) cfg.add('datalad.unittest.youcan', 'removeme', where='global') assert (exists(global_gitconfig)) # it did not go into the dataset's config! assert_not_in('datalad.unittest.youcan', cfg) # does not monitor additions! globalcfg.reload(force=True) assert_in('datalad.unittest.youcan', globalcfg) with swallow_logs(): assert_raises(CommandError, globalcfg.unset, 'datalad.unittest.youcan', where='local') assert (globalcfg.has_section('datalad.unittest')) globalcfg.unset('datalad.unittest.youcan', where='global') # but after we unset the only value -- that section is no longer listed assert (not globalcfg.has_section('datalad.unittest')) assert_not_in('datalad.unittest.youcan', globalcfg) if external_versions['cmd:git'] < '2.18': # older versions leave empty section behind in the file ok_file_has_content(global_gitconfig, '[datalad "unittest"]', strip=True) # remove_section to clean it up entirely globalcfg.remove_section('datalad.unittest', where='global') ok_file_has_content(global_gitconfig, "") cfg = ConfigManager(Dataset(opj(path, 'ds')), source='dataset', overrides={'datalad.godgiven': True}) assert_equal(cfg.get('datalad.godgiven'), True) # setter has no effect cfg.set('datalad.godgiven', 'false') assert_equal(cfg.get('datalad.godgiven'), True)
def test_rerun(path, nodspath): ds = Dataset(path).create() sub = ds.create('sub') probe_path = op.join(sub.path, 'sequence') # run inside the dataset with chpwd(path), \ swallow_outputs(): ds.run('echo x$(cat sub/sequence) > sub/sequence') # command ran once, all clean assert_repo_status(ds.path) eq_('x\n', open(probe_path).read()) # now, for a rerun we can be anywhere, PWD and all are recorded # moreover, rerun must figure out which bits to unlock, even in # subdatasets with chpwd(nodspath), \ swallow_outputs(): ds.rerun() assert_repo_status(ds.path) # ran twice now eq_('xx\n', open(probe_path).read()) # Rerunning from a subdataset skips the command. _, sub_info = get_run_info(ds, sub.repo.format_commit("%B")) eq_(ds.id, sub_info["dsid"]) assert_result_count(sub.rerun(return_type="list", on_failure="ignore"), 1, status="impossible", action="run", rerun_action="skip") eq_('xx\n', open(probe_path).read()) # Rerun fails with a dirty repo. dirt = op.join(path, "dirt") with open(dirt, "w") as fh: fh.write("") assert_status('impossible', ds.rerun(on_failure="ignore")) remove(dirt) assert_repo_status(ds.path) # Make a non-run commit. with open(op.join(path, "nonrun-file"), "w") as f: f.write("foo") ds.save("nonrun-file") # Now rerun the buried command. ds.rerun(revision="HEAD~", message="rerun buried") eq_('xxx\n', open(probe_path).read()) # Also check that the messasge override worked. eq_( ds.repo.format_commit("%B").splitlines()[0], "[DATALAD RUNCMD] rerun buried") # Or a range of commits, skipping non-run commits. ds.rerun(since="HEAD~3") eq_('xxxxx\n', open(probe_path).read()) # Or --since= to run all reachable commits. ds.rerun(since="") eq_('xxxxxxxxxx\n', open(probe_path).read()) # We can get back a report of what would happen rather than actually # rerunning anything. report = ds.rerun(since="", report=True, return_type="list") # The "diff" section of the report doesn't include the unchanged files that # would come in "-f json diff" output. for entry in report: if entry["rerun_action"] == "run": # None of the run commits touch .datalad/config or any other config # file. assert_false( any(r["path"].endswith("config") for r in entry["diff"])) # Nothing changed. eq_('xxxxxxxxxx\n', open(probe_path).read()) assert_result_count(report, 1, rerun_action="skip-or-pick") report[-1]["commit"] == ds.repo.get_hexsha() # If a file is dropped, we remove it instead of unlocking it. ds.drop(probe_path, check=False) with swallow_outputs(): ds.rerun() eq_('x\n', open(probe_path).read())
def func3(x): assert_false(ui.is_interactive) return x * 3
def test_publish_depends( origin, src_path, target1_path, target2_path, target3_path): # prepare src source = install(src_path, source=origin, recursive=True) source.repo.get('test-annex.dat') # pollute config depvar = 'remote.target2.datalad-publish-depends' source.config.add(depvar, 'stupid', where='local') eq_(source.config.get(depvar, None), 'stupid') # two remote sibling on two "different" hosts source.create_sibling( 'ssh://localhost' + target1_path, annex_wanted='standard', annex_group='backup', name='target1') # fails with unknown remote res = source.create_sibling( 'ssh://datalad-test' + target2_path, name='target2', existing='reconfigure', # because 'target2' is known in polluted cfg publish_depends='bogus', on_failure='ignore') assert_result_count( res, 1, status='error', message=( 'unknown sibling(s) specified as publication dependency: %s', set(['bogus']))) # for real source.create_sibling( 'ssh://datalad-test' + target2_path, name='target2', existing='reconfigure', # because 'target2' is known in polluted cfg annex_wanted='standard', annex_group='backup', publish_depends='target1') # wiped out previous dependencies eq_(source.config.get(depvar, None), 'target1') # and one more remote, on the same host but associated with a dependency source.create_sibling( 'ssh://datalad-test' + target3_path, name='target3') ok_clean_git(src_path) # introduce change in source create_tree(src_path, {'probe1': 'probe1'}) source.add('probe1') ok_clean_git(src_path) # only the source has the probe ok_file_has_content(opj(src_path, 'probe1'), 'probe1') for p in (target1_path, target2_path, target3_path): assert_false(lexists(opj(p, 'probe1'))) # publish to a standalone remote source.publish(to='target3') ok_(lexists(opj(target3_path, 'probe1'))) # but it has no data copied target3 = Dataset(target3_path) nok_(target3.repo.file_has_content('probe1')) # but if we publish specifying its path, it gets copied source.publish('probe1', to='target3') ok_file_has_content(opj(target3_path, 'probe1'), 'probe1') # no others are affected in either case for p in (target1_path, target2_path): assert_false(lexists(opj(p, 'probe1'))) # publish to all remaining, but via a dependency source.publish(to='target2') for p in (target1_path, target2_path, target3_path): ok_file_has_content(opj(p, 'probe1'), 'probe1')
def test_check_dates_empty_repo(path): assert_false(check_dates(GitRepo(path, create=True))["objects"])
def test_subdatasets(path): # from scratch ds = Dataset(path) assert_false(ds.is_installed()) assert_raises(ValueError, ds.subdatasets) ds = ds.create() assert_true(ds.is_installed()) eq_(ds.subdatasets(), []) # create some file and commit it open(os.path.join(ds.path, 'test'), 'w').write('some') ds.save(path='test', message="Hello!", version_tag=1) assert_true(ds.is_installed()) # Assuming that tmp location was not under a super-dataset eq_(ds.get_superdataset(), None) eq_(ds.get_superdataset(topmost=True), ds) # add itself as a subdataset (crazy, isn't it?) subds = ds.install('subds', source=path, result_xfm='datasets', return_type='item-or-list') assert_true(subds.is_installed()) eq_(subds.get_superdataset(), ds) eq_(subds.get_superdataset(topmost=True), ds) subdss = ds.subdatasets() eq_(len(subdss), 1) eq_(subds.path, ds.subdatasets(result_xfm='paths')[0]) eq_(subdss, ds.subdatasets(recursive=True)) eq_(subdss, ds.subdatasets(fulfilled=True)) ds.save(message="with subds", version_tag=2) ds.recall_state(1) assert_true(ds.is_installed()) eq_(ds.subdatasets(), []) # very nested subdataset to test topmost subsubds = subds.install(_path_('d1/subds'), source=path, result_xfm='datasets', return_type='item-or-list') assert_true(subsubds.is_installed()) eq_(subsubds.get_superdataset(), subds) # by default, it will only report a subperdataset that actually # has the queries dataset as a registered true subdataset eq_(subsubds.get_superdataset(topmost=True), subds) # by we can also ask for a dataset that is merely above eq_(subsubds.get_superdataset(topmost=True, registered_only=False), ds) # verify that '^' alias would work with chpwd(subsubds.path): dstop = Dataset('^') eq_(dstop, subds) # and while in the dataset we still can resolve into central one dscentral = Dataset('///') eq_(dscentral.path, dl_cfg.obtain('datalad.locations.default-dataset')) with chpwd(ds.path): dstop = Dataset('^') eq_(dstop, ds) # TODO actual submodule checkout is still there # Test ^. (the dataset for curdir) shortcut # At the top should point to the top with chpwd(ds.path): dstop = Dataset('^.') eq_(dstop, ds) # and still does within subdir os.mkdir(opj(ds.path, 'subdir')) with chpwd(opj(ds.path, 'subdir')): dstop = Dataset('^.') eq_(dstop, ds) # within submodule will point to submodule with chpwd(subsubds.path): dstop = Dataset('^.') eq_(dstop, subsubds)
def test_get_content_info_paths_empty_list(path): ds = Dataset(path).create() # Unlike None, passing any empty list as paths to get_content_info() does # not report on all content. assert_false(ds.repo.get_content_info(paths=[])) assert_false(ds.repo.get_content_info(paths=[], ref="HEAD")) # Add annex content to make sure its not reported. (ds.pathobj / "foo").write_text("foo") ds.save() # Same for get_content_annexinfo()... assert_false(ds.repo.get_content_annexinfo(paths=[])) assert_false(ds.repo.get_content_annexinfo(paths=[], init=None)) assert_false(ds.repo.get_content_annexinfo(paths=[], ref="HEAD")) assert_false(ds.repo.get_content_annexinfo(paths=[], ref="HEAD", init=None)) # ... where whatever was passed for init will be returned as is. assert_equal( ds.repo.get_content_annexinfo(paths=[], ref="HEAD", init={"random": { "entry": "a" }}), {"random": { "entry": "a" }})
def test_ls_json(topdir, topurl): annex = AnnexRepo(topdir, create=True) ds = Dataset(topdir) # create some file and commit it with open(opj(ds.path, 'subdsfile.txt'), 'w') as f: f.write('123') ds.add(path='subdsfile.txt') ds.save("Hello!", version_tag=1) # add a subdataset ds.install('subds', source=topdir) subdirds = ds.create(_path_('dir/subds2'), force=True) subdirds.add('file') git = GitRepo(opj(topdir, 'dir', 'subgit'), create=True) # create git repo git.add(opj(topdir, 'dir', 'subgit', 'fgit.txt')) # commit to git to init git repo git.commit() annex.add(opj(topdir, 'dir', 'subgit')) # add the non-dataset git repo to annex annex.add(opj(topdir, 'dir')) # add to annex (links) annex.drop(opj(topdir, 'dir', 'subdir', 'file2.txt'), options=['--force']) # broken-link annex.commit() git.add('fgit.txt') # commit to git to init git repo git.commit() # annex.add doesn't add submodule, so using ds.add ds.add(opj('dir', 'subgit')) # add the non-dataset git repo to annex ds.add('dir') # add to annex (links) ds.drop(opj('dir', 'subdir', 'file2.txt'), check=False) # broken-link # register "external" submodule by installing and uninstalling it ext_url = topurl + '/dir/subgit/.git' # need to make it installable via http Runner()('git update-server-info', cwd=opj(topdir, 'dir', 'subgit')) ds.install(opj('dir', 'subgit_ext'), source=ext_url) ds.uninstall(opj('dir', 'subgit_ext')) meta_dir = opj('.git', 'datalad', 'metadata') def get_metahash(*path): if not path: path = ['/'] return hashlib.md5(opj(*path).encode('utf-8')).hexdigest() def get_metapath(dspath, *path): return _path_(dspath, meta_dir, get_metahash(*path)) def get_meta(dspath, *path): with open(get_metapath(dspath, *path)) as f: return js.load(f) # Let's see that there is no crash if one of the files is available only # in relaxed URL mode, so no size could be picked up ds.repo.add_url_to_file('fromweb', topurl + '/noteventhere', options=['--relaxed']) for all_ in [True, False]: # recurse directories for recursive in [True, False]: for state in ['file', 'delete']: # subdataset should have its json created and deleted when # all=True else not subds_metapath = get_metapath(opj(topdir, 'subds')) exists_prior = exists(subds_metapath) #with swallow_logs(), swallow_outputs(): dsj = _ls_json(topdir, json=state, all_=all_, recursive=recursive) ok_startswith(dsj['tags'], '1-') exists_post = exists(subds_metapath) # print("%s %s -> %s" % (state, exists_prior, exists_post)) assert_equal(exists_post, (state == 'file' and recursive)) # root should have its json file created and deleted in all cases ds_metapath = get_metapath(topdir) assert_equal(exists(ds_metapath), state == 'file') # children should have their metadata json's created and deleted only when recursive=True child_metapath = get_metapath(topdir, 'dir', 'subdir') assert_equal(exists(child_metapath), (state == 'file' and all_)) # ignored directories should not have json files created in any case for subdir in [('.hidden', ), ('dir', 'subgit')]: assert_false(exists(get_metapath(topdir, *subdir))) # check if its updated in its nodes sublist too. used by web-ui json. regression test assert_equal(dsj['nodes'][0]['size']['total'], dsj['size']['total']) # check size of subdataset subds = [ item for item in dsj['nodes'] if item['name'] == ('subdsfile.txt' or 'subds') ][0] assert_equal(subds['size']['total'], '3 Bytes') # dir/subds2 must not be listed among nodes of the top dataset: topds_nodes = {x['name']: x for x in dsj['nodes']} assert_in('subds', topds_nodes) # XXX # # condition here is a bit a guesswork by yoh later on # # TODO: here and below clear destiny/interaction of all_ and recursive # assert_equal(dsj['size']['total'], # '15 Bytes' if (recursive and all_) else # ('9 Bytes' if (recursive or all_) else '3 Bytes') # ) # https://github.com/datalad/datalad/issues/1674 if state == 'file' and all_: dirj = get_meta(topdir, 'dir') dir_nodes = {x['name']: x for x in dirj['nodes']} # it should be present in the subdir meta assert_in('subds2', dir_nodes) assert_not_in('url_external', dir_nodes['subds2']) assert_in('subgit_ext', dir_nodes) assert_equal(dir_nodes['subgit_ext']['url'], ext_url) # and not in topds assert_not_in('subds2', topds_nodes) # run non-recursive dataset traversal after subdataset metadata already created # to verify sub-dataset metadata being picked up from its metadata file in such cases if state == 'file' and recursive and not all_: dsj = _ls_json(topdir, json='file', all_=False) subds = [ item for item in dsj['nodes'] if item['name'] == ('subdsfile.txt' or 'subds') ][0] assert_equal(subds['size']['total'], '3 Bytes') assert_equal(topds_nodes['fromweb']['size']['total'], UNKNOWN_SIZE)
def test_fmt_to_name(): eq_(au.fmt_to_name("{name}", {}), "name") eq_(au.fmt_to_name("{0}", {0: "name"}), "name") eq_(au.fmt_to_name("{1}", {0: "name"}), "1") assert_false(au.fmt_to_name("frontmatter{name}", {})) assert_false(au.fmt_to_name("{name}backmatter", {})) assert_false(au.fmt_to_name("{two}{names}", {})) assert_false(au.fmt_to_name("", {})) assert_false(au.fmt_to_name("nonames", {})) assert_false(au.fmt_to_name("{}", {}))
def test_run_inputs_outputs(src, path): for subds in [("s0", "s1_0", "s2"), ("s0", "s1_1", "s2"), ("s0", "s1_0"), ("s0", "s1_1"), ("s0", "ss"), ("s0", )]: Dataset(op.join(*((src, ) + subds))).create(force=True) src_ds = Dataset(src).create(force=True) src_ds.add(".", recursive=True) ds = install(path, source=src, result_xfm='datasets', return_type='item-or-list') assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) # The specified inputs and extra inputs will be retrieved before the run. # (Use run_command() to access the extra_inputs argument.) list( run_command("cat {inputs} {inputs} >doubled.dat", dataset=ds, inputs=["input.dat"], extra_inputs=["extra-input.dat"])) ok_clean_git(ds.path) ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) ok_(ds.repo.file_has_content("doubled.dat")) with open(opj(path, "doubled.dat")) as fh: content = fh.read() assert_in("input", content) assert_not_in("extra-input", content) # Rerunning the commit will also get the input file. ds.repo.drop(["input.dat", "extra-input.dat"], options=["--force"]) assert_false(ds.repo.file_has_content("input.dat")) assert_false(ds.repo.file_has_content("extra-input.dat")) ds.rerun() ok_(ds.repo.file_has_content("input.dat")) ok_(ds.repo.file_has_content("extra-input.dat")) with swallow_logs(new_level=logging.WARN) as cml: ds.run("touch dummy", inputs=["not-there"]) assert_in("Input does not exist: ", cml.out) # Test different combinations of globs and explicit files. inputs = ["a.dat", "b.dat", "c.txt", "d.txt"] create_tree(ds.path, {i: i for i in inputs}) ds.add(".") ds.repo.copy_to(inputs, remote="origin") ds.repo.drop(inputs, options=["--force"]) test_cases = [(["*.dat"], ["a.dat", "b.dat"]), (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]), (["*"], inputs)] for idx, (inputs_arg, expected_present) in enumerate(test_cases): assert_false(any(ds.repo.file_has_content(i) for i in inputs)) ds.run("touch dummy{}".format(idx), inputs=inputs_arg) ok_(all(ds.repo.file_has_content(f) for f in expected_present)) # Globs are stored unexpanded by default. assert_in(inputs_arg[0], ds.repo.format_commit("%B")) ds.repo.drop(inputs, options=["--force"]) # --input can be passed a subdirectory. create_tree(ds.path, {"subdir": {"a": "subdir a", "b": "subdir b"}}) ds.add("subdir") ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin") ds.repo.drop("subdir", options=["--force"]) ds.run("touch subdir-dummy", inputs=[opj(ds.path, "subdir")]) ok_(all(ds.repo.file_has_content(opj("subdir", f)) for f in ["a", "b"])) # Inputs are specified relative to a dataset's subdirectory. ds.repo.drop(opj("subdir", "a"), options=["--force"]) with chpwd(opj(path, "subdir")): run("touch subdir-dummy1", inputs=["a"]) ok_(ds.repo.file_has_content(opj("subdir", "a"))) # --input=. runs "datalad get ." ds.run("touch dot-dummy", inputs=["."]) eq_(ds.repo.get_annexed_files(), ds.repo.get_annexed_files(with_content_only=True)) # On rerun, we get all files, even those that weren't in the tree at the # time of the run. create_tree(ds.path, {"after-dot-run": "after-dot-run content"}) ds.add(".") ds.repo.copy_to(["after-dot-run"], remote="origin") ds.repo.drop(["after-dot-run"], options=["--force"]) ds.rerun("HEAD^") ds.repo.file_has_content("after-dot-run") # --output will unlock files that are present. ds.repo.get("a.dat") ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), "a.dat appended\n") # --output will remove files that are not present. ds.repo.drop(["a.dat", "d.txt"], options=["--force"]) ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), " appended\n") # --input can be combined with --output. ds.repo.repo.git.reset("--hard", "HEAD~2") ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), "a.dat appended\n") with swallow_logs(new_level=logging.DEBUG) as cml: with swallow_outputs(): ds.run("echo blah", outputs=["not-there"]) assert_in("Filtered out non-existing path: ", cml.out) ds.create('sub') ds.run("echo sub_orig >sub/subfile") ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) ds.drop("sub/subfile", check=False) ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) # --input/--output globs can be stored in expanded form. ds.run("touch expand-dummy", inputs=["a.*"], outputs=["b.*"], expand="both") assert_in("a.dat", ds.repo.format_commit("%B")) assert_in("b.dat", ds.repo.format_commit("%B")) res = ds.rerun(report=True, return_type='item-or-list') eq_(res["run_info"]['inputs'], ["a.dat"]) eq_(res["run_info"]['outputs'], ["b.dat"]) # We install subdatasets to fully resolve globs. ds.uninstall("s0") assert_false(Dataset(op.join(path, "s0")).is_installed()) ds.run("echo {inputs} >globbed-subds", inputs=["s0/s1_*/s2/*.dat"]) ok_file_has_content(op.join(ds.path, "globbed-subds"), "s0/s1_0/s2/a.dat s0/s1_1/s2/c.dat", strip=True) ds_ss = Dataset(op.join(path, "s0", "ss")) assert_false(ds_ss.is_installed()) ds.run("echo blah >{outputs}", outputs=["s0/ss/out"]) ok_(ds_ss.is_installed()) ok_file_has_content(op.join(ds.path, "s0", "ss", "out"), "blah", strip=True)
def test_update_volatile_subds(originpath, otherpath, destpath): origin = Dataset(originpath).create() ds = install(source=originpath, path=destpath, result_xfm='datasets', return_type='item-or-list') # as a submodule sname = 'subm 1' osm1 = origin.create(sname) assert_result_count(ds.update(), 1, status='ok', type='dataset') # nothing without a merge, no inappropriate magic assert_not_in(sname, ds.subdatasets(result_xfm='relpaths')) assert_result_count(ds.update(merge=True), 1, action='update', status='ok', type='dataset') # and we should be able to do update with recursive invocation assert_result_count(ds.update(merge=True, recursive=True), 1, action='update', status='ok', type='dataset') # known, and placeholder exists assert_in(sname, ds.subdatasets(result_xfm='relpaths')) ok_(exists(opj(ds.path, sname))) # remove from origin origin.remove(sname) assert_result_count(ds.update(merge=True), 1, action='update', status='ok', type='dataset') # gone locally, wasn't checked out assert_not_in(sname, ds.subdatasets(result_xfm='relpaths')) assert_false(exists(opj(ds.path, sname))) # re-introduce at origin osm1 = origin.create(sname) create_tree(osm1.path, {'load.dat': 'heavy'}) origin.save(opj(osm1.path, 'load.dat')) assert_result_count(ds.update(merge=True), 1, action='update', status='ok', type='dataset') # grab new content of uninstall subdataset, right away ds.get(opj(ds.path, sname, 'load.dat')) ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy') # modify ds and subds at origin create_tree(origin.path, {'mike': 'this', sname: {'probe': 'little'}}) origin.save(recursive=True) assert_repo_status(origin.path) # updates for both datasets should come down the pipe assert_result_count(ds.update(merge=True, recursive=True), 2, action='update', status='ok', type='dataset') assert_repo_status(ds.path) # now remove just-installed subdataset from origin again origin.remove(sname, check=False) assert_not_in(sname, origin.subdatasets(result_xfm='relpaths')) assert_in(sname, ds.subdatasets(result_xfm='relpaths')) # merge should disconnect the installed subdataset, but leave the actual # ex-subdataset alone assert_result_count(ds.update(merge=True, recursive=True), 1, action='update', type='dataset') assert_not_in(sname, ds.subdatasets(result_xfm='relpaths')) ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy') ok_(Dataset(opj(ds.path, sname)).is_installed()) # now remove the now disconnected subdataset for further tests # not using a bound method, not giving a parentds, should # not be needed to get a clean dataset remove(op.join(ds.path, sname), check=False) assert_repo_status(ds.path) # new separate subdataset, not within the origin dataset otherds = Dataset(otherpath).create() # install separate dataset as a submodule ds.install(source=otherds.path, path='other') create_tree(otherds.path, {'brand': 'new'}) otherds.save() assert_repo_status(otherds.path) # pull in changes res = ds.update(merge=True, recursive=True) assert_result_count(res, 2, status='ok', action='update', type='dataset') # the next is to check for #2858 assert_repo_status(ds.path)
def test_get_cached_dataset(cache_dir): # patch DATALAD_TESTS_CACHE to not use the actual cache with # the test testing that very cache. cache_dir = Path(cache_dir) # store file-based values for testrepo-minimalds for readability: annexed_file = opj('inannex', 'animated.gif') annexed_file_key = "MD5E-s144625--4c458c62b7ac8ec8e19c8ff14b2e34ad.gif" with patch(CACHE_PATCH_STR, new=cache_dir): # tuples to test (url, version, keys, class): test_cases = [ # a simple testrepo ("https://github.com/datalad/testrepo--minimalds", "541cf855d13c2a338ff2803d4488daf0035e568f", None, AnnexRepo), # Same repo, but request paths to be present. This should work # with a subsequent call, although the first one did not already # request any: ("https://github.com/datalad/testrepo--minimalds", "9dd8b56cc706ab56185f2ceb75fbe9de9b606724", annexed_file_key, AnnexRepo), # Same repo again, but invalid version ( "https://github.com/datalad/testrepo--minimalds", "nonexistent", "irrelevantkey", # invalid version; don't even try to get the key AnnexRepo), # same thing with different name should be treated as a new thing: ("https://github.com/datalad/testrepo--minimalds", "git-annex", None, AnnexRepo), # try a plain git repo to make sure we can deal with that: # Note, that we first need a test case w/o a `key` parameter to not # blow up the test when Clone is patched, resulting in a MagicMock # instead of a Dataset instance within get_cached_dataset. In the # second case it's already cached then, so the patched Clone is # never executed. ("https://github.com/datalad/datalad.org", None, None, GitRepo), ( "https://github.com/datalad/datalad.org", "gh-pages", "ignored-key", # it's a git repo; don't even try to get a key GitRepo), ] for url, version, keys, cls in test_cases: target = cache_dir / url2filename(url) # assuming it doesn't exist yet - patched cache dir! in_cache_before = target.exists() with patch(CLONE_PATCH_STR) as exec_clone: try: ds = get_cached_dataset(url, version, keys) invalid_version = False except AssertionError: # should happen only if `version` wasn't found. Implies # that the dataset exists in cache (although not returned # due to exception) assert_true(version) assert_false(Dataset(target).repo.commit_exists(version)) # mark for later assertions (most of them should still hold # true) invalid_version = True assert_equal(exec_clone.call_count, 0 if in_cache_before else 1) # Patch prevents actual execution. Now do it for real. Note, that # this might be necessary for content retrieval even if dataset was # in cache before. try: ds = get_cached_dataset(url, version, keys) except AssertionError: # see previous call assert_true(invalid_version) assert_is_instance(ds, Dataset) assert_true(ds.is_installed()) assert_equal(target, ds.pathobj) assert_is_instance(ds.repo, cls) if keys and not invalid_version and \ AnnexRepo.is_valid_repo(ds.path): # Note: it's not supposed to get that content if passed # `version` wasn't available. get_cached_dataset would then # raise before and not download anything only to raise # afterwards. here = ds.config.get("annex.uuid") where = ds.repo.whereis(ensure_list(keys), key=True) assert_true(all(here in remotes for remotes in where)) # version check. Note, that all `get_cached_dataset` is supposed to # do, is verifying, that specified version exists - NOT check it # out" if version and not invalid_version: assert_true(ds.repo.commit_exists(version)) # re-execution with patch(CLONE_PATCH_STR) as exec_clone: try: ds2 = get_cached_dataset(url, version, keys) except AssertionError: assert_true(invalid_version) exec_clone.assert_not_called() # returns the same Dataset as before: assert_is(ds, ds2)
def test_fmt_to_name(): eq_(au.fmt_to_name("{name}", {}), "name") eq_(au.fmt_to_name("{0}", {0: "name"}), "name") eq_(au.fmt_to_name("{1}", {0: "name"}), "1") assert_false(au.fmt_to_name("frontmatter{name}", {})) assert_false(au.fmt_to_name("{name}backmatter", {})) assert_false(au.fmt_to_name("{two}{names}", {})) assert_false(au.fmt_to_name("", {})) assert_false(au.fmt_to_name("nonames", {})) assert_false(au.fmt_to_name("{}", {}))
def test_run_subdataset_install(path): path = Path(path) ds_src = Dataset(path / "src").create() # Repository setup # # . # |-- a/ # | |-- a2/ # | | `-- img # | `-- img # |-- b/ / module name: b-name / # | `-- b2/ # | `-- img # |-- c/ # | `-- c2/ # | `-- img # `-- d/ / module name: d-name / # `-- d2/ # `-- img ds_src_a = ds_src.create("a") ds_src_a2 = ds_src_a.create("a2") ds_src_b = Dataset(ds_src.pathobj / "b").create() ds_src_b2 = ds_src_b.create("b2") ds_src_c = ds_src.create("c") ds_src_c2 = ds_src_c.create("c2") ds_src_d = Dataset(ds_src.pathobj / "d").create() ds_src_d2 = ds_src_d.create("d2") ds_src.repo.add_submodule("b", name="b-name") ds_src.repo.add_submodule("d", name="d-name") ds_src.save() add_pyscript_image(ds_src_a, "in-a", "img") add_pyscript_image(ds_src_a2, "in-a2", "img") add_pyscript_image(ds_src_b2, "in-b2", "img") add_pyscript_image(ds_src_c2, "in-c2", "img") add_pyscript_image(ds_src_d2, "in-d2", "img") ds_src.save(recursive=True) ds_dest = clone(ds_src.path, str(path / "dest")) ds_dest_a2 = Dataset(ds_dest.pathobj / "a" / "a2") ds_dest_b2 = Dataset(ds_dest.pathobj / "b" / "b2") ds_dest_c2 = Dataset(ds_dest.pathobj / "c" / "c2") ds_dest_d2 = Dataset(ds_dest.pathobj / "d" / "d2") assert_false(ds_dest_a2.is_installed()) assert_false(ds_dest_b2.is_installed()) assert_false(ds_dest_c2.is_installed()) assert_false(ds_dest_d2.is_installed()) # Needed subdatasets are installed if container name is given... res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2") assert_result_count(res, 1, action="install", status="ok", path=ds_dest_a2.path) assert_result_count(res, 1, action="get", status="ok", path=str(ds_dest_a2.pathobj / "img")) ok_(ds_dest_a2.is_installed()) # ... even if the name and path do not match. res = ds_dest.containers_run(["arg"], container_name="b-name/b2/in-b2") assert_result_count(res, 1, action="install", status="ok", path=ds_dest_b2.path) assert_result_count(res, 1, action="get", status="ok", path=str(ds_dest_b2.pathobj / "img")) ok_(ds_dest_b2.is_installed()) # Subdatasets will also be installed if given an image path... res = ds_dest.containers_run(["arg"], container_name=str(Path("c/c2/img"))) assert_result_count(res, 1, action="install", status="ok", path=ds_dest_c2.path) assert_result_count(res, 1, action="get", status="ok", path=str(ds_dest_c2.pathobj / "img")) ok_(ds_dest_c2.is_installed()) # ... unless the module name chain doesn't match the subdataset path. In # that case, the caller needs to install the subdatasets beforehand. with assert_raises(ValueError): ds_dest.containers_run(["arg"], container_name=str(Path("d/d2/img"))) ds_dest.get(ds_dest_d2.path, recursive=True, get_data=False) ds_dest.containers_run(["arg"], container_name=str(Path("d/d2/img"))) # There's no install record if subdataset is already present. res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2") assert_not_in_results(res, action="install")
def test_rerun_multifork(path): ds = Dataset(path).create() ds.repo.checkout(DEFAULT_BRANCH, options=["-b", "side"]) ds.run("echo foo >foo") ds.repo.checkout("side", options=["-b", "side-nonrun"]) with open(op.join(path, "nonrun-file0"), "w") as f: f.write("blah 0") ds.save() ds.repo.checkout("side") with open(op.join(path, "nonrun-file1"), "w") as f: f.write("blah 1") ds.save() ds.run("echo bar >bar") ds.repo.checkout("side~1", options=["-b", "side-side"]) with open(op.join(path, "nonrun-file2"), "w") as f: f.write("blah 2") ds.save() ds.run("echo onside0 >onside0") ds.repo.checkout("side") ds.repo.merge("side-side") ds.run("echo after-side-side >after-side-side") ds.repo.checkout(DEFAULT_BRANCH) ds.repo.merge("side", options=["--no-ff"]) ds.repo.merge("side-nonrun") # o k_n # |\ # | o j_n # o | i_n # |\ \ # | o | h_r # | o | g_n # | |\ \ # | | o | f_r # | | o | e_n # | o | | d_r # | |/ / # | o | c_n # | |/ # | o b_r # |/ # o a_n ds.rerun(since="", onto="") # o k_M # |\ # | o j_n # o | i_M # |\ \ # | o | h_R # | o | g_M # | |\ \ # | | o | f_R # | | o | e_n # | o | | d_R # | |/ / # | o | c_n # | |/ # | o b_R # |/ # o a_n eq_(ds.repo.get_hexsha("HEAD~2"), ds.repo.get_hexsha(DEFAULT_BRANCH + "~2")) neq_(ds.repo.get_hexsha("HEAD^2"), ds.repo.get_hexsha(DEFAULT_BRANCH + "^2")) neq_(ds.repo.get_hexsha("HEAD^^2"), ds.repo.get_hexsha(DEFAULT_BRANCH + "^^2")) assert_false(ds.repo.commit_exists("HEAD^^2^2")) eq_(ds.repo.get_hexsha("HEAD^2^^"), ds.repo.get_hexsha(DEFAULT_BRANCH + "^2^^")) ds.repo.checkout(DEFAULT_BRANCH) hexsha_before = ds.repo.get_hexsha() ds.rerun(since="") eq_(hexsha_before, ds.repo.get_hexsha())
def test_run_inputs_outputs(path): ds = Dataset(path) assert_false(ds.repo.file_has_content("test-annex.dat")) # If we specify test-annex.dat as an input, it will be retrieved before the # run. ds.run("cat test-annex.dat test-annex.dat >doubled.dat", inputs=["test-annex.dat"]) ok_clean_git(ds.path) ok_(ds.repo.file_has_content("test-annex.dat")) ok_(ds.repo.file_has_content("doubled.dat")) # Rerunning the commit will also get the input file. ds.repo.drop("test-annex.dat", options=["--force"]) assert_false(ds.repo.file_has_content("test-annex.dat")) ds.rerun() ok_(ds.repo.file_has_content("test-annex.dat")) with swallow_logs(new_level=logging.WARN) as cml: ds.run("touch dummy", inputs=["*.not-an-extension"]) assert_in("No matching files found for '*.not-an-extension'", cml.out) # Test different combinations of globs and explicit files. inputs = ["a.dat", "b.dat", "c.txt", "d.txt"] create_tree(ds.path, {i: i for i in inputs}) ds.add(".") ds.repo.copy_to(inputs, remote="origin") ds.repo.drop(inputs, options=["--force"]) test_cases = [(["*.dat"], ["a.dat", "b.dat"]), (["*.dat", "c.txt"], ["a.dat", "b.dat", "c.txt"]), (["*"], inputs)] for idx, (inputs_arg, expected_present) in enumerate(test_cases): assert_false(any(ds.repo.file_has_content(i) for i in inputs)) ds.run("touch dummy{}".format(idx), inputs=inputs_arg) ok_(all(ds.repo.file_has_content(f) for f in expected_present)) # Globs are stored unexpanded by default. assert_in(inputs_arg[0], ds.repo.repo.head.commit.message) ds.repo.drop(inputs, options=["--force"]) # --input can be passed a subdirectory. create_tree(ds.path, {"subdir": {"a": "subdir a", "b": "subdir b"}}) ds.add("subdir") ds.repo.copy_to(["subdir/a", "subdir/b"], remote="origin") ds.repo.drop("subdir", options=["--force"]) ds.run("touch subdir-dummy", inputs=[opj(ds.path, "subdir")]) ok_(all(ds.repo.file_has_content(opj("subdir", f)) for f in ["a", "b"])) # Inputs are specified relative to a dataset's subdirectory. ds.repo.drop(opj("subdir", "a"), options=["--force"]) with chpwd(opj(path, "subdir")): run("touch subdir-dummy1", inputs=["a"]) ok_(ds.repo.file_has_content(opj("subdir", "a"))) # --input=. runs "datalad get ." ds.run("touch dot-dummy", inputs=["."]) eq_(ds.repo.get_annexed_files(), ds.repo.get_annexed_files(with_content_only=True)) # On rerun, we get all files, even those that weren't in the tree at the # time of the run. create_tree(ds.path, {"after-dot-run": "after-dot-run content"}) ds.add(".") ds.repo.copy_to(["after-dot-run"], remote="origin") ds.repo.drop(["after-dot-run"], options=["--force"]) ds.rerun("HEAD^") ds.repo.file_has_content("after-dot-run") # --output will unlock files that are present. ds.repo.get("a.dat") ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), "a.dat appended\n") # --output will remove files that are not present. ds.repo.drop("a.dat", options=["--force"]) ds.run("echo ' appended' >>a.dat", outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), " appended\n") # --input can be combined with --output. ds.repo.repo.git.reset("--hard", "HEAD~2") ds.run("echo ' appended' >>a.dat", inputs=["a.dat"], outputs=["a.dat"]) with open(opj(path, "a.dat")) as fh: eq_(fh.read(), "a.dat appended\n") with swallow_logs(new_level=logging.WARN) as cml: ds.run("echo blah", outputs=["*.not-an-extension"]) assert_in("No matching files found for '*.not-an-extension'", cml.out) ds.create('sub') ds.run("echo sub_orig >sub/subfile") ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) ds.drop("sub/subfile", check=False) ds.run("echo sub_overwrite >sub/subfile", outputs=["sub/subfile"]) # --input/--output globs can be stored in expanded form. ds.run("touch expand-dummy", inputs=["a.*"], outputs=["b.*"], expand="both") assert_in("a.dat", ds.repo.repo.head.commit.message) assert_in("b.dat", ds.repo.repo.head.commit.message) res = ds.rerun(report=True, return_type='item-or-list') eq_(res["run_info"]['inputs'], ["a.dat"]) eq_(res["run_info"]['outputs'], ["b.dat"])
def test_target_ssh_simple(origin, src_path, target_rootpath): port = get_ssh_port("datalad-test") # prepare src source = install(src_path, source=origin, result_xfm='datasets', return_type='item-or-list') target_path = opj(target_rootpath, "basic") with swallow_logs(new_level=logging.ERROR) as cml: create_sibling(dataset=source, name="local_target", sshurl="ssh://*****:*****@with_testsui(responses=["yes"]) def interactive_assert_create_sshwebserver(): assert_create_sshwebserver( dataset=source, name="local_target", sshurl="ssh://datalad-test" + target_path, publish_by_default=DEFAULT_BRANCH, existing='replace', ui=have_webui(), ) interactive_assert_create_sshwebserver() eq_("ssh://datalad-test" + urlquote(target_path), source.repo.get_remote_url("local_target")) ok_(source.repo.get_remote_url("local_target", push=True) is None) # ensure target tree actually replaced by source assert_false(exists(opj(target_path, 'random'))) if src_is_annex: lclcfg = AnnexRepo(src_path).config eq_(lclcfg.get('remote.local_target.annex-ignore'), 'false') # valid uuid eq_(lclcfg.get('remote.local_target.annex-uuid').count('-'), 4) # should be added too, even if URL matches prior state eq_(lclcfg.get('remote.local_target.push'), DEFAULT_BRANCH) # again, by explicitly passing urls. Since we are on datalad-test, the # local path should work: cpkwargs = dict( dataset=source, name="local_target", sshurl="ssh://datalad-test", target_dir=target_path, target_url=target_path, target_pushurl="ssh://datalad-test" + target_path, ui=have_webui(), ) @with_testsui(responses=['yes']) def interactive_assert_create_sshwebserver(): assert_create_sshwebserver(existing='replace', **cpkwargs) interactive_assert_create_sshwebserver() if src_is_annex: target_description = AnnexRepo(target_path, create=False).get_description() eq_(target_description, target_path) eq_(target_path, source.repo.get_remote_url("local_target")) eq_("ssh://datalad-test" + target_path, source.repo.get_remote_url("local_target", push=True)) if have_webui(): from datalad_deprecated.tests.test_create_sibling_webui \ import assert_publish_with_ui assert_publish_with_ui(target_path) # now, push should work: publish(dataset=source, to="local_target") # and we should be able to 'reconfigure' def process_digests_mtimes(digests, mtimes): # it should have triggered a hook, which would have created log and metadata files check_metadata = False for part in 'logs', 'metadata': metafiles = [ k for k in digests if k.startswith(_path_('.git/datalad/%s/' % part)) ] # This is in effect ONLY if we have "compatible" datalad installed on remote # end. ATM we don't have easy way to guarantee that AFAIK (yoh), # so let's not check/enforce (TODO) # assert(len(metafiles) >= 1) # we might have 2 logs if timestamps do not collide ;) # Let's actually do it to some degree if part == 'logs': # always should have those: assert (len(metafiles) >= 1) with open(opj(target_path, metafiles[0])) as f: if 'no datalad found' not in f.read(): check_metadata = True if part == 'metadata': eq_(len(metafiles), bool(check_metadata)) for f in metafiles: digests.pop(f) mtimes.pop(f) # and just pop some leftovers from annex # and ignore .git/logs content (gh-5298) for f in list(digests): if f.startswith('.git/annex/mergedrefs') \ or f.startswith('.git/logs/'): digests.pop(f) mtimes.pop(f) if not have_webui(): # the rest of the test assumed that we have uploaded a UI return orig_digests, orig_mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(orig_digests, orig_mtimes) import time time.sleep(0.1) # just so that mtimes change assert_create_sshwebserver(existing='reconfigure', **cpkwargs) digests, mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(digests, mtimes) assert_dict_equal(orig_digests, digests) # nothing should change in terms of content # but some files should have been modified modified_files = { k for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0) } # collect which files were expected to be modified without incurring any changes ok_modified_files = { _path_('.git/hooks/post-update'), 'index.html', } ok_modified_files.add(_path_('.git/config')) ok_modified_files.update( {f for f in digests if f.startswith(_path_('.git/datalad/web'))}) # it seems that with some recent git behavior has changed a bit # and index might get touched if _path_('.git/index') in modified_files: ok_modified_files.add(_path_('.git/index')) ok_(modified_files.issuperset(ok_modified_files))
def test_add_archive_content(path_orig, url, repo_path): with chpwd(repo_path): # TODO we need to be able to pass path into add_archive_content # We could mock but I mean for the API assert_raises(RuntimeError, add_archive_content, "nonexisting.tar.gz") # no repo yet repo = AnnexRepo(repo_path, create=True) assert_raises(ValueError, add_archive_content, "nonexisting.tar.gz") # we can't add a file from outside the repo ATM assert_raises(FileNotInRepositoryError, add_archive_content, opj(path_orig, '1.tar.gz')) # Let's add first archive to the repo so we could test with swallow_outputs(): repo.add_urls([opj(url, '1.tar.gz')], options=["--pathdepth", "-1"]) for s in range(1, 5): repo.add_urls([opj(url, '%du/1.tar.gz' % s)], options=["--pathdepth", "-2"]) repo.commit("added 1.tar.gz") key_1tar = repo.get_file_key( '1.tar.gz') # will be used in the test later def d1_basic_checks(): ok_(exists('1')) ok_file_under_git('1', '1 f.txt', annexed=True) ok_file_under_git(opj('1', 'd', '1d'), annexed=True) ok_archives_caches(repo_path, 0) # and by default it just does it, everything goes to annex repo_ = add_archive_content('1.tar.gz') eq_(repo.path, repo_.path) d1_basic_checks() # If ran again, should proceed just fine since the content is the same so no changes would be made really add_archive_content('1.tar.gz') # But that other one carries updated file, so should fail due to overwrite with assert_raises(RuntimeError) as cme: add_archive_content(opj('1u', '1.tar.gz'), use_current_dir=True) # TODO: somewhat not precise since we have two possible "already exists" # -- in caching and overwrite check assert_in("already exists", str(cme.exception)) # but should do fine if overrides are allowed add_archive_content(opj('1u', '1.tar.gz'), existing='overwrite', use_current_dir=True) add_archive_content(opj('2u', '1.tar.gz'), existing='archive-suffix', use_current_dir=True) add_archive_content(opj('3u', '1.tar.gz'), existing='archive-suffix', use_current_dir=True) add_archive_content(opj('4u', '1.tar.gz'), existing='archive-suffix', use_current_dir=True) # rudimentary test assert_equal(sorted(map(basename, glob(opj(repo_path, '1', '1*')))), ['1 f-1.1.txt', '1 f-1.2.txt', '1 f-1.txt', '1 f.txt']) whereis = repo.whereis(glob(opj(repo_path, '1', '1*'))) # they all must be the same assert (all([x == whereis[0] for x in whereis[1:]])) # and we should be able to reference it while under subdirectory subdir = opj(repo_path, 'subdir') with chpwd(subdir, mkdir=True): add_archive_content(opj(pardir, '1.tar.gz'), use_current_dir=True) d1_basic_checks() # or we could keep relative path and also demand to keep the archive prefix # while extracting under original (annex root) dir add_archive_content(opj(pardir, '1.tar.gz'), add_archive_leading_dir=True) with chpwd(opj(repo_path, '1')): d1_basic_checks() with chpwd(repo_path): # test with excludes and renames and annex options add_archive_content('1.tar.gz', exclude=['d'], rename=['/ /_', '/^1/2'], annex_options="-c annex.largefiles=exclude=*.txt", delete=True) # no conflicts since new name ok_file_under_git('2', '1_f.txt', annexed=False) assert_false(exists(opj('2', 'd'))) assert_false(exists('1.tar.gz')) # delete was in effect # now test ability to extract within subdir with chpwd(opj(repo_path, 'd1'), mkdir=True): # Let's add first archive to the repo so we could test # named the same way but different content with swallow_outputs(): repo.add_urls([opj(url, 'd1', '1.tar.gz')], options=["--pathdepth", "-1"], cwd=getpwd()) # invoke under current subdir repo.commit("added 1.tar.gz in d1") def d2_basic_checks(): ok_(exists('1')) ok_file_under_git('1', '2 f.txt', annexed=True) ok_file_under_git(opj('1', 'd2', '2d'), annexed=True) ok_archives_caches(repo.path, 0) add_archive_content('1.tar.gz') d2_basic_checks() # in manual tests ran into the situation of inability to obtain on a single run # a file from an archive which was coming from a dropped key. I thought it was # tested in custom remote tests, but I guess not sufficiently well enough repo.drop(opj('1', '1 f.txt')) # should be all kosher repo.get(opj('1', '1 f.txt')) ok_archives_caches(repo.path, 1, persistent=True) ok_archives_caches(repo.path, 0, persistent=False) repo.drop(opj('1', '1 f.txt')) # should be all kosher repo.drop(key_1tar, key=True) # is available from the URL -- should be kosher repo.get(opj('1', '1 f.txt')) # that what managed to not work # TODO: check if persistent archive is there for the 1.tar.gz # We should be able to drop everything since available online with swallow_outputs(): clean(dataset=repo.path) repo.drop(key_1tar, key=True) # is available from the URL -- should be kosher repo.drop(opj('1', '1 f.txt')) # should be all kosher repo.get(opj('1', '1 f.txt')) # and should be able to get it again # bug was that dropping didn't work since archive was dropped first repo.call_annex(["drop", "--all"]) # verify that we can't drop a file if archive key was dropped and online archive was removed or changed size! ;) repo.get(key_1tar, key=True) unlink(opj(path_orig, '1.tar.gz')) with assert_raises(CommandError) as e: repo.drop(key_1tar, key=True) assert_equal(e.kwargs['stdout_json'][0]['success'], False) assert_result_values_cond( e.kwargs['stdout_json'], 'note', lambda x: '(Use --force to override this check, or adjust numcopies.)' in x) assert exists(opj(repo.path, repo.get_contentlocation(key_1tar)))