def test_uninstall_git_file(path): ds = Dataset(path) ok_(ds.is_installed()) ok_(exists(opj(path, 'INFO.txt'))) ok_file_under_git(ds.repo.path, 'INFO.txt') # drop file in Git in an annex repo # regardless of the type of repo this is 'notneeded'... # it is less about education that about "can we # we get the content back?", and for a file in Git we can assert_result_count( ds.drop(path='INFO.txt'), 1, status='notneeded', message="no annex'ed content") res = ds.uninstall(path="INFO.txt", on_failure='ignore') assert_result_count( res, 1, status='impossible', message='can only uninstall datasets (consider the `drop` command)') # remove the file: res = ds.remove(path='INFO.txt', result_xfm='paths', result_filter=lambda x: x['action'] == 'remove') assert_raises(AssertionError, ok_file_under_git, ds.repo.path, 'INFO.txt') ok_(not exists(opj(path, 'INFO.txt'))) eq_(res, ['INFO.txt'])
def test_url_base(): # Basic checks assert_raises(ValueError, URL, "http://example.com", hostname='example.com') url = URL("http://example.com") eq_(url.hostname, 'example.com') eq_(url.scheme, 'http') eq_(url.port, '') # not specified -- empty strings eq_(url.username, '') # not specified -- empty strings eq_(repr(url), "URL(hostname='example.com', scheme='http')") eq_(url, "http://example.com") # automagic coercion in __eq__ neq_(URL(), URL(hostname='x')) smth = URL('smth') eq_(smth.hostname, '') ok_(bool(smth)) nok_(bool(URL())) assert_raises(ValueError, url._set_from_fields, unknown='1') with swallow_logs(new_level=logging.WARNING) as cml: # we don't "care" about params ATM so there is a warning if there are any purl = URL("http://example.com/;param") eq_(str(purl), 'http://example.com/;param') # but we do maintain original string assert_in('ParseResults contains params', cml.out) eq_(purl.as_str(), 'http://example.com/')
def test_install_subdataset(src, path): # get the superdataset: ds = install(path=path, source=src) # subdataset not installed: subds = Dataset(opj(path, 'sub1')) assert_false(subds.is_installed()) # install it: ds.install('sub1') ok_(subds.is_installed()) # Verify that it is the correct submodule installed and not # new repository initiated assert_equal(set(subds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) # Now the obnoxious install an annex file within not yet # initialized repository! with swallow_outputs(): # progress bar ds.install(opj('sub2', 'test-annex.dat')) subds2 = Dataset(opj(path, 'sub2')) assert(subds2.is_installed()) assert(subds2.repo.file_has_content('test-annex.dat')) # we shouldn't be able silently ignore attempt to provide source while # "installing" file under git assert_raises(FileInGitError, ds.install, opj('sub2', 'INFO.txt'), source="http://bogusbogus")
def test_unlock_raises(path, path2, path3): # make sure, we are not within a dataset: _cwd = getpwd() chpwd(path) # no dataset and no path: assert_raises(InsufficientArgumentsError, unlock, dataset=None, path=None) # no dataset and path not within a dataset: with swallow_logs(new_level=logging.WARNING) as cml: unlock(dataset=None, path=path2) assert_in("ignored paths that do not belong to any dataset: ['{0}'".format(path2), cml.out) create(path=path, no_annex=True) ds = Dataset(path) # no complaints ds.unlock() # make it annex, but call unlock with invalid path: AnnexRepo(path, create=True) with swallow_logs(new_level=logging.WARNING) as cml: ds.unlock(path="notexistent.txt") assert_in("ignored non-existing paths", cml.out) chpwd(_cwd)
def test_submodule_deinit(path): from datalad.support.annexrepo import AnnexRepo top_repo = AnnexRepo(path, create=False) eq_({'subm 1', '2'}, {s.name for s in top_repo.get_submodules()}) # note: here init=True is ok, since we are using it just for testing with swallow_logs(new_level=logging.WARN) as cml: top_repo.update_submodule('subm 1', init=True) assert_in('Do not use update_submodule with init=True', cml.out) top_repo.update_submodule('2', init=True) # ok_(all([s.module_exists() for s in top_repo.get_submodules()])) # TODO: old assertion above if non-bare? (can't use "direct mode" in test_gitrepo) # Alternatively: New testrepo (plain git submodules) and have a dedicated # test for annexes in addition ok_(all([GitRepo.is_valid_repo(op.join(top_repo.path, s.path)) for s in top_repo.get_submodules()])) # modify submodule: with open(op.join(top_repo.path, 'subm 1', 'file_ut.dat'), "w") as f: f.write("some content") assert_raises(CommandError, top_repo.deinit_submodule, 'sub1') # using force should work: top_repo.deinit_submodule('subm 1', force=True) ok_(not top_repo.repo.submodule('subm 1').module_exists())
def test_invalid_call(origin, tdir): ds = Dataset(origin) ds.uninstall('subm 1', check=False) # nothing assert_status('error', publish('/notthere', on_failure='ignore')) # known, but not present assert_status('impossible', publish(opj(ds.path, 'subm 1'), on_failure='ignore')) # --since without dataset is now supported as long as it # could be identified # assert_raises(InsufficientArgumentsError, publish, since='HEAD') # but if it couldn't be, then should indeed crash with chpwd(tdir): assert_raises(InsufficientArgumentsError, publish, since='HEAD') # new dataset, with unavailable subdataset dummy = Dataset(tdir).create() dummy_sub = dummy.create('sub') dummy_sub.uninstall() assert_in('sub', dummy.subdatasets(fulfilled=False, result_xfm='relpaths')) # now an explicit call to publish the unavailable subdataset assert_result_count( dummy.publish('sub', on_failure='ignore'), 1, path=dummy_sub.path, status='impossible', type='dataset')
def test_unlock_raises(path, path2, path3): # make sure, we are not within a dataset: _cwd = getpwd() chpwd(path) # no dataset and no path: assert_raises(InsufficientArgumentsError, unlock, dataset=None, path=None) # no dataset and path not within a dataset: res = unlock(dataset=None, path=path2, result_xfm=None, on_failure='ignore', return_type='item-or-list') eq_(res['message'], "path not associated with any dataset") eq_(res['path'], path2) create(path=path, no_annex=True) ds = Dataset(path) # no complaints ds.unlock() # make it annex, but call unlock with invalid path: AnnexRepo(path, create=True) res = ds.unlock(path="notexistent.txt", result_xfm=None, on_failure='ignore', return_type='item-or-list') eq_(res['message'], "path does not exist") chpwd(_cwd)
def test_fail_with_short_help(): out = StringIO() with assert_raises(SystemExit) as cme: fail_with_short_help(exit_code=3, out=out) assert_equal(cme.exception.code, 3) assert_equal(out.getvalue(), "") out = StringIO() with assert_raises(SystemExit) as cme: fail_with_short_help(msg="Failed badly", out=out) assert_equal(cme.exception.code, 1) assert_equal(out.getvalue(), "error: Failed badly\n") # Suggestions, hint, etc out = StringIO() with assert_raises(SystemExit) as cme: fail_with_short_help( msg="Failed badly", known=["mother", "mutter", "father", "son"], provided="muther", hint="You can become one", exit_code=0, # noone forbids what="parent", out=out) assert_equal(cme.exception.code, 0) assert_equal(out.getvalue(), "error: Failed badly\n" "datalad: Unknown parent 'muther'. See 'datalad --help'.\n\n" "Did you mean any of these?\n" " mutter\n" " mother\n" " father\n" "Hint: You can become one\n")
def test_failed_install_multiple(top_path): ds = create(top_path) create(_path_(top_path, 'ds1')) create(_path_(top_path, 'ds3')) ok_clean_git(ds.path, annex=False, untracked=['ds1/', 'ds3/']) # specify install with multiple paths and one non-existing with assert_raises(IncompleteResultsError) as cme: ds.install(['ds1', 'ds2', '///crcns', '///nonexisting', 'ds3']) # install doesn't add existing submodules -- add does that ok_clean_git(ds.path, annex=False, untracked=['ds1/', 'ds3/']) ds.add(['ds1', 'ds3']) ok_clean_git(ds.path, annex=False) # those which succeeded should be saved now eq_(ds.get_subdatasets(), ['crcns', 'ds1', 'ds3']) # and those which didn't -- listed eq_(set(cme.exception.failed), {'///nonexisting', _path_(top_path, 'ds2')}) # but if there was only a single installation requested -- it will be # InstallFailedError to stay consistent with single install behavior # TODO: unify at some point with assert_raises(InstallFailedError) as cme: ds.install('ds2') with assert_raises(InstallFailedError) as cme: ds.install('///nonexisting')
def test_install_into_dataset(source, top_path): ds = create(top_path) ok_clean_git(ds.path) subds = ds.install("sub", source=source, save=False) if isinstance(subds.repo, AnnexRepo) and subds.repo.is_direct_mode(): ok_(exists(opj(subds.path, '.git'))) else: ok_(isdir(opj(subds.path, '.git'))) ok_(subds.is_installed()) assert_in('sub', ds.get_subdatasets()) # sub is clean: ok_clean_git(subds.path, annex=False) # top is not: assert_raises(AssertionError, ok_clean_git, ds.path, annex=False) ds.save('addsub') # now it is: ok_clean_git(ds.path, annex=False) # but we could also save while installing and there should be no side-effect # of saving any other changes if we state to not auto-save changes # Create a dummy change create_tree(ds.path, {'dummy.txt': 'buga'}) ok_clean_git(ds.path, untracked=['dummy.txt']) subds_ = ds.install("sub2", source=source, if_dirty='ignore') eq_(subds_.path, opj(ds.path, "sub2")) # for paranoid yoh ;) ok_clean_git(ds.path, untracked=['dummy.txt']) # and we should achieve the same behavior if we create a dataset # and then decide to add it create(_path_(top_path, 'sub3')) ok_clean_git(ds.path, untracked=['dummy.txt', 'sub3/']) ds.add('sub3') ok_clean_git(ds.path, untracked=['dummy.txt'])
def test_GitRepo_get_merge_base(src): repo = GitRepo(src, create=True) with open(op.join(src, 'file.txt'), 'w') as f: f.write('load') repo.add('*') repo.commit('committing') assert_raises(ValueError, repo.get_merge_base, []) branch1 = repo.get_active_branch() branch1_hexsha = repo.get_hexsha() eq_(len(branch1_hexsha), 40) eq_(repo.get_merge_base(branch1), branch1_hexsha) # Let's create a detached branch branch2 = "_detach_" repo.checkout(branch2, options=["--orphan"]) # it will have all the files # Must not do: https://github.com/gitpython-developers/GitPython/issues/375 # repo.git_add('.') repo.add('*') # NOTE: fun part is that we should have at least a different commit message # so it results in a different checksum ;) repo.commit("committing again") assert(repo.get_indexed_files()) # we did commit assert(repo.get_merge_base(branch1) is None) assert(repo.get_merge_base([branch2, branch1]) is None) # Let's merge them up -- then merge base should match the master repo.merge(branch1, allow_unrelated=True) eq_(repo.get_merge_base(branch1), branch1_hexsha) # if points to some empty/non-existing branch - should also be None assert(repo.get_merge_base(['nonexistent', branch2]) is None)
def test_install_skip_list_arguments(src, path, path_outside): ds = install(path, source=src) ok_(ds.is_installed()) # install a list with valid and invalid items: with swallow_logs(new_level=logging.WARNING) as cml: with assert_raises(IncompleteResultsError) as cme: ds.install( path=['subm 1', 'not_existing', path_outside, 'subm 2'], get_data=False) result = cme.exception.results for skipped in [opj(ds.path, 'not_existing'), path_outside]: cml.assert_logged(msg="ignored non-existing paths: {}\n".format( [opj(ds.path, 'not_existing'), path_outside]), regex=False, level='WARNING') pass ok_(isinstance(result, list)) eq_(len(result), 2) for sub in [Dataset(opj(path, 'subm 1')), Dataset(opj(path, 'subm 2'))]: assert_in(sub, result) ok_(sub.is_installed()) # return of get is always a list, even if just one thing was gotten # in this case 'subm1' was already obtained above, so this will get this # content of the subdataset with assert_raises(IncompleteResultsError) as cme: ds.install(path=['subm 1', 'not_existing']) with assert_raises(IncompleteResultsError) as cme: ds.get(path=['subm 1', 'not_existing']) result = cme.exception.results eq_(len(result), 1) eq_(result[0]['file'], 'subm 1/test-annex.dat')
def test_install_list(path, top_path): # we want to be able to install several things, if these are known # (no 'source' allowed). Therefore first toplevel: ds = install(top_path, source=path, recursive=False) assert_not_in('annex.hardlink', ds.config) ok_(ds.is_installed()) sub1 = Dataset(opj(top_path, 'subm 1')) sub2 = Dataset(opj(top_path, '2')) ok_(not sub1.is_installed()) ok_(not sub2.is_installed()) # fails, when `source` is passed: assert_raises(ValueError, ds.install, path=['subm 1', '2'], source='something') # now should work: result = ds.install(path=['subm 1', '2'], result_xfm='paths') ok_(sub1.is_installed()) ok_(sub2.is_installed()) eq_(set(result), {sub1.path, sub2.path}) # and if we request it again via get, result should be empty get_result = ds.get(path=['subm 1', '2'], get_data=False) assert_status('notneeded', get_result)
def test_subdatasets(path): from datalad.api import install # from scratch ds = Dataset(path) assert_false(ds.is_installed()) eq_(ds.get_dataset_handles(), None) ds = ds.install() assert_true(ds.is_installed()) eq_(ds.get_dataset_handles(), []) # create some file and commit it open(os.path.join(ds.path, 'test'), 'w').write('some') ds.install(path='test') assert_true(ds.is_installed()) # TODO change to remember_state() ds.remember_state("Hello!", version=1) # add a subdataset subds = ds.install('subds', source=path) assert_true(subds.is_installed()) subdss = ds.get_dataset_handles() eq_(len(subdss), 1) eq_(os.path.join(path, subdss[0]), subds.path) eq_(subds.path, ds.get_dataset_handles(absolute=True)[0]) eq_(subdss, ds.get_dataset_handles(recursive=True)) eq_(subdss, ds.get_dataset_handles(fulfilled=True)) # don't have that right now assert_raises(NotImplementedError, ds.get_dataset_handles, pattern='sub*') ds.remember_state("with subds", version=2) ds.recall_state(1) assert_true(ds.is_installed()) eq_(ds.get_dataset_handles(), [])
def test_composite_credential1(): # basic test of composite credential keyring = MemoryKeyring() cred = _CCred1("name", keyring=keyring) # When queried, does the chain assert_equal(cred(), {'user': '******', 'password': '******'}) # But the "Front" credential is exposed to the user assert_equal(cred.get('user'), 'user1') assert_equal(keyring.get('name', 'user'), 'user1') assert_raises(ValueError, cred.get, 'unknown_field') assert_equal(cred.get('password'), 'password1') assert_equal(keyring.get('name', 'password'), 'password1') # ATM composite credential stores "derived" ones unconditionally in the # keyring as well assert_equal(keyring.get('name:1', 'user'), 'user1_1') assert_equal(keyring.get('name:1', 'password'), 'password1_2') # and now enter new should remove "derived" entries cred.enter_new() assert_equal(keyring.get('name', 'user'), 'user2') assert_equal(keyring.get('name', 'password'), 'password2') assert_equal(keyring.get('name:1', 'user'), None) assert_equal(keyring.get('name:1', 'password'), None) # which would get reevaluated if requested assert_equal(keyring.entries, {'name:1': {}, 'name': {'user': '******', 'password': '******'}}) assert_equal(cred(), {'user': '******', 'password': '******'})
def test_status_basics(path, linkpath, otherdir): if not on_windows: # make it more complicated by default ut.Path(linkpath).symlink_to(path, target_is_directory=True) path = linkpath with chpwd(path): assert_raises(NoDatasetArgumentFound, status) ds = Dataset(path).create() # outcome identical between ds= and auto-discovery with chpwd(path): assert_raises(IncompleteResultsError, status, path=otherdir) stat = status() eq_(stat, ds.status()) assert_status('ok', stat) # we have a bunch of reports (be vague to be robust to future changes assert len(stat) > 2 # check the composition for s in stat: eq_(s['status'], 'ok') eq_(s['action'], 'status') eq_(s['state'], 'clean') eq_(s['type'], 'file') assert_in('gitshasum', s) assert_in('bytesize', s) eq_(s['refds'], ds.path)
def _test_match_basic(matcher, query): extracts = dict( xpaths={'text': 'text()'}, csss={'favorite': '.class1::text'} ) m = matcher(query, **extracts) mg = m(dict(response="<div></div>")) ok_(inspect.isgenerator(mg)) eq_(list(mg), []) # there should be no hits mg = m(dict(response=sample1.response)) ok_(inspect.isgenerator(mg)) hits = list(mg) eq_(len(hits), 3) for hit, a_html, a_text, class1_text in zip( hits, sample1.a_htmls, sample1.a_texts, sample1.class1_texts): ok_(hit['response']) eq_(hit['match'], a_html) eq_(hit['text'], a_text) eq_(hit.get('favorite', None), class1_text) m = matcher(query, min_count=4, **extracts) mg = m(dict(response=sample1.response)) ok_(inspect.isgenerator(mg)) assert_raises(ValueError, list, mg) m = matcher(query, max_count=2, **extracts) mg = m(dict(response=sample1.response)) ok_(inspect.isgenerator(mg)) assert_raises(ValueError, list, mg)
def test_optimized_cloning(path): # make test repo with one file and one commit originpath = op.join(path, 'origin') repo = GitRepo(originpath, create=True) with open(op.join(originpath, 'test'), 'w') as f: f.write('some') repo.add('test') repo.commit('init') ok_clean_git(originpath, annex=False) from glob import glob def _get_inodes(repo): return dict( [(os.path.join(*o.split(os.sep)[-2:]), os.stat(o).st_ino) for o in glob(os.path.join(repo.path, repo.get_git_dir(repo), 'objects', '*', '*'))]) origin_inodes = _get_inodes(repo) # now clone it in different ways and see what happens to the object storage from datalad.support.network import get_local_file_url clonepath = op.join(path, 'clone') for src in (originpath, get_local_file_url(originpath)): # deprecated assert_raises(DeprecatedError, GitRepo, url=src, path=clonepath) clone = GitRepo.clone(url=src, path=clonepath, create=True) clone_inodes = _get_inodes(clone) eq_(origin_inodes, clone_inodes, msg='with src={}'.format(src)) rmtree(clonepath)
def test_GitRepo_gitignore(path): gr = GitRepo(path, create=True) sub = GitRepo(op.join(path, 'ignore-sub.me')) # we need to commit something, otherwise add_submodule # will already refuse the submodule for having no commit sub.add('a_file.txt') sub.commit() from ..exceptions import GitIgnoreError with open(op.join(path, '.gitignore'), "w") as f: f.write("*.me") with assert_raises(GitIgnoreError) as cme: gr.add('ignore.me') eq_(cme.exception.paths, ['ignore.me']) with assert_raises(GitIgnoreError) as cme: gr.add_submodule(path='ignore-sub.me') eq_(cme.exception.paths, ['ignore-sub.me']) with assert_raises(GitIgnoreError) as cme: gr.add(['ignore.me', 'dontigno.re', op.join('ignore-sub.me', 'a_file.txt')]) eq_(set(cme.exception.paths), {'ignore.me', 'ignore-sub.me'}) eq_(gr.get_gitattributes('.')['.'], {}) # nothing is recorded within .gitattributes
def test_ssh_get_connection(): manager = SSHManager() assert manager._socket_dir is None, \ "Should be unset upon initialization. Got %s" % str(manager._socket_dir) c1 = manager.get_connection('ssh://localhost') assert manager._socket_dir, "Should be set after interactions with the manager" assert_is_instance(c1, SSHConnection) # subsequent call returns the very same instance: ok_(manager.get_connection('ssh://localhost') is c1) # fail on malformed URls (meaning: our fancy URL parser can't correctly # deal with them): #assert_raises(ValueError, manager.get_connection, 'localhost') # we now allow those simple specifications of host to get_connection c2 = manager.get_connection('localhost') assert_is_instance(c2, SSHConnection) # but should fail if it looks like something else assert_raises(ValueError, manager.get_connection, 'localhost/') assert_raises(ValueError, manager.get_connection, ':localhost') # we can do what urlparse cannot # assert_raises(ValueError, manager.get_connection, 'someone@localhost') # next one is considered a proper url by urlparse (netloc:'', # path='/localhost), but eventually gets turned into SSHRI(hostname='ssh', # path='/localhost') -- which is fair IMHO -> invalid test # assert_raises(ValueError, manager.get_connection, 'ssh:/localhost') manager.close()
def test_install_skip_list_arguments(src, path, path_outside): ds = install(path, source=src) ok_(ds.is_installed()) # install a list with valid and invalid items: result = ds.install( path=['subm 1', 'not_existing', path_outside, '2'], get_data=False, on_failure='ignore', result_xfm=None, return_type='list') # good and bad results together ok_(isinstance(result, list)) eq_(len(result), 4) # check that we have an 'impossible' status for both invalid args # but all the other tasks have been accomplished for skipped, msg in [(opj(ds.path, 'not_existing'), "path does not exist"), (path_outside, "path not associated with any dataset")]: assert_result_count( result, 1, status='impossible', message=msg, path=skipped) for sub in [Dataset(opj(path, 'subm 1')), Dataset(opj(path, '2'))]: assert_result_count( result, 1, status='ok', message=('Installed subdataset in order to get %s', sub.path)) ok_(sub.is_installed()) # return of get is always a list, by default, even if just one thing was gotten # in this case 'subm1' was already obtained above, so this will get this # content of the subdataset with assert_raises(IncompleteResultsError) as cme: ds.install(path=['subm 1', 'not_existing']) with assert_raises(IncompleteResultsError) as cme: ds.get(path=['subm 1', 'not_existing'])
def test_implicit_install(src, dst): origin_top = create(src) origin_sub = origin_top.create("sub") origin_subsub = origin_sub.create("subsub") with open(opj(origin_top.path, "file1.txt"), "w") as f: f.write("content1") origin_top.add("file1.txt") with open(opj(origin_sub.path, "file2.txt"), "w") as f: f.write("content2") origin_sub.add("file2.txt") with open(opj(origin_subsub.path, "file3.txt"), "w") as f: f.write("content3") origin_subsub.add("file3.txt") origin_top.save(recursive=True) # first, install toplevel: ds = install(dst, source=src) ok_(ds.is_installed()) sub = Dataset(opj(ds.path, "sub")) ok_(not sub.is_installed()) subsub = Dataset(opj(sub.path, "subsub")) ok_(not subsub.is_installed()) # fail on obscure non-existing one assert_raises(IncompleteResultsError, ds.install, source='obscure') # install 3rd level and therefore implicitly the 2nd: result = ds.install(path=opj("sub", "subsub")) ok_(sub.is_installed()) ok_(subsub.is_installed()) # but by default implicit results are not reported eq_(result, subsub) # fail on obscure non-existing one in subds assert_raises(IncompleteResultsError, ds.install, source=opj('sub', 'obscure')) # clean up, the nasty way rmtree(dst, chmod_files=True) ok_(not exists(dst)) # again first toplevel: ds = install(dst, source=src) ok_(ds.is_installed()) sub = Dataset(opj(ds.path, "sub")) ok_(not sub.is_installed()) subsub = Dataset(opj(sub.path, "subsub")) ok_(not subsub.is_installed()) # now implicit but without an explicit dataset to install into # (deriving from CWD): with chpwd(dst): # don't ask for the file content to make return value comparison # simpler result = get(path=opj("sub", "subsub"), get_data=False, result_xfm='datasets') ok_(sub.is_installed()) ok_(subsub.is_installed()) eq_(result, [sub, subsub])
def test_invalid_call(path): with chpwd(path): # no dataset, no luck assert_raises(NoDatasetArgumentFound, run, 'doesntmatter') # dirty dataset ds = Dataset(path).create() create_tree(ds.path, {'this': 'dirty'}) assert_status('impossible', run('doesntmatter', on_failure='ignore'))
def test_addurls_url_filename_fail(self, path): ds = Dataset(path).create(force=True) with chpwd(path): assert_raises(IncompleteResultsError, ds.addurls, self.json_file, "{url}/nofilename/", "{_url0}/{_url_filename}")
def test_uninstall_invalid(path): ds = Dataset(path).create(force=True) for method in (uninstall, remove, drop): assert_raises(InsufficientArgumentsError, method) # refuse to touch stuff outside the dataset assert_status('error', method(dataset=ds, path='..', on_failure='ignore')) # same if it doesn't exist, for consistency assert_status('error', method(dataset=ds, path='../madeupnonexist', on_failure='ignore'))
def test_split_remote_branch(): r, b = split_remote_branch("MyRemote/SimpleBranch") eq_(r, "MyRemote") eq_(b, "SimpleBranch") r, b = split_remote_branch("MyRemote/Branch/with/slashes") eq_(r, "MyRemote") eq_(b, "Branch/with/slashes") assert_raises(AssertionError, split_remote_branch, "NoSlashesAtAll") assert_raises(AssertionError, split_remote_branch, "TrailingSlash/")
def test_invalid_call(path): # no dataset assert_raises(ValueError, create_sibling_github, 'bogus', dataset=path) ds = Dataset(path).create() # no user assert_raises(gh.BadCredentialsException, ds.create_sibling_github, 'bogus', github_login='******')
def test_invalid_call(path): # inter-option dependencies assert_raises( ValueError, annotate_paths, '', force_subds_discovery=True, force_parentds_discovery=False) # modified_since needs a actual dataset assert_raises( ValueError, annotate_paths, dataset=path, modified="something")
def test_addurls(self, path): ds = Dataset(path).create(force=True) def get_annex_commit_counts(): return int( ds.repo.repo.git.rev_list("--count", "git-annex").strip()) n_annex_commits = get_annex_commit_counts() with chpwd(path): ds.addurls(self.json_file, "{url}", "{name}") filenames = ["a", "b", "c"] for fname in filenames: ok_exists(fname) for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames), ["foo", "bar", "foo"]): assert_dict_equal(meta, {"subdir": [subdir], "name": [fname]}) # Ignore this check if we're faking dates because that disables # batch mode. if not os.environ.get('DATALAD_FAKE__DATES'): # We should have two new commits on the git-annex: one for the # added urls and one for the added metadata. eq_(n_annex_commits + 2, get_annex_commit_counts()) # Add to already existing links, overwriting. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{name}", ifexists="overwrite") for fname in filenames: assert_in("Removing {}".format(os.path.join(path, fname)), cml.out) # Add to already existing links, skipping. assert_in_results( ds.addurls(self.json_file, "{url}", "{name}", ifexists="skip"), action="addurls", status="notneeded") # Add to already existing links works, as long content is the same. ds.addurls(self.json_file, "{url}", "{name}") # But it fails if something has changed. ds.unlock("a") with open("a", "w") as ofh: ofh.write("changed") ds.save("a") assert_raises(IncompleteResultsError, ds.addurls, self.json_file, "{url}", "{name}")
def test_publish_plain_git(origin, src_path, dst_path): # TODO: Since it's mostly the same, melt with test_publish_simple # prepare src source = install(src_path, source=origin, recursive=True) # forget we cloned it (provide no 'origin' anymore), which should lead to # setting tracking branch to target: source.repo.remove_remote("origin") # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) res = publish(dataset=source, to="target", result_xfm='datasets') eq_(res, [source]) ok_clean_git(source.repo, annex=None) ok_clean_git(target, annex=None) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # don't fail when doing it again res = publish(dataset=source, to="target") # and nothing is pushed assert_result_count(res, 1, status='notneeded') ok_clean_git(source.repo, annex=None) ok_clean_git(target, annex=None) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # some modification: with open(opj(src_path, 'test_mod_file'), "w") as f: f.write("Some additional stuff.") source.add(opj(src_path, 'test_mod_file'), to_git=True, message="Modified.") ok_clean_git(source.repo, annex=None) res = publish(dataset=source, to='target', result_xfm='datasets') eq_(res, [source]) ok_clean_git(dst_path, annex=None) eq_(list(target.get_branch_commits("master")), list(source.repo.get_branch_commits("master"))) # amend and change commit msg in order to test for force push: source.repo.commit("amended", options=['--amend']) # push should be rejected (non-fast-forward): assert_raises(IncompleteResultsError, publish, dataset=source, to='target', result_xfm='datasets') # push with force=True works: res = publish(dataset=source, to='target', result_xfm='datasets', force=True) eq_(res, [source])
def test_invalid_url(path): assert_raises(IncompleteResultsError, clone, 'osf://q8xnk/somepath', path)
def test_obtain(path): ds = create(path) cfg = ConfigManager(ds) dummy = 'datalad.test.dummy' # we know nothing and we don't know how to ask assert_raises(RuntimeError, cfg.obtain, dummy) # can report known ones cfg.add(dummy, '5.3') assert_equal(cfg.obtain(dummy), '5.3') # better type assert_equal(cfg.obtain(dummy, valtype=float), 5.3) # don't hide type issues, float doesn't become an int magically assert_raises(ValueError, cfg.obtain, dummy, valtype=int) # inject some prior knowledge from datalad.interface.common_cfg import definitions as cfg_defs cfg_defs[dummy] = dict(type=float) # no we don't need to specify a type anymore assert_equal(cfg.obtain(dummy), 5.3) # but if we remove the value from the config, all magic is gone cfg.unset(dummy) # we know nothing and we don't know how to ask assert_raises(RuntimeError, cfg.obtain, dummy) # # test actual interaction # @with_testsui() def ask(): # fail on unkown dialog type assert_raises(ValueError, cfg.obtain, dummy, dialog_type='Rorschach_test') ask() # ask nicely, and get a value of proper type using the preconfiguration @with_testsui(responses='5.3') def ask(): assert_equal( cfg.obtain(dummy, dialog_type='question', text='Tell me'), 5.3) ask() # preconfigure even more, to get the most compact call cfg_defs[dummy]['ui'] = ('question', dict(text='tell me', title='Gretchen Frage')) @with_testsui(responses='5.3') def ask(): assert_equal(cfg.obtain(dummy), 5.3) ask() @with_testsui(responses='murks') def ask(): assert_raises(ValueError, cfg.obtain, dummy) ask() # fail to store when destination is not specified, will not even ask @with_testsui() def ask(): assert_raises(ValueError, cfg.obtain, dummy, store=True) ask() # but we can preconfigure it cfg_defs[dummy]['destination'] = 'broken' @with_testsui(responses='5.3') def ask(): assert_raises(ValueError, cfg.obtain, dummy, store=True) ask() # fixup destination cfg_defs[dummy]['destination'] = 'dataset' @with_testsui(responses='5.3') def ask(): assert_equal(cfg.obtain(dummy, store=True), 5.3) ask() # now it won't have to ask again @with_testsui() def ask(): assert_equal(cfg.obtain(dummy), 5.3) ask() # wipe it out again cfg.unset(dummy) assert_not_in(dummy, cfg)
def test_something(path, new_home): # read nothing, has nothing cfg = ConfigManager(dataset_only=True) assert_false(len(cfg)) # now read the example config cfg = ConfigManager(Dataset(opj(path, 'ds')), dataset_only=True) assert_equal(len(cfg), 3) assert_in('something.user', cfg) # multi-value assert_equal(len(cfg['something.user']), 2) assert_equal(cfg['something.user'], ('name=Jane Doe', '[email protected]')) assert_true(cfg.has_section('something')) assert_false(cfg.has_section('somethingelse')) assert_equal(sorted(cfg.sections()), ['onemore.complicated の beast with.dot', 'something']) assert_true(cfg.has_option('something', 'user')) assert_false(cfg.has_option('something', 'us?er')) assert_false(cfg.has_option('some?thing', 'user')) assert_equal(sorted(cfg.options('something')), ['myint', 'user']) assert_equal(cfg.options('onemore.complicated の beast with.dot'), ['findme']) assert_equal( sorted(cfg.items()), [('onemore.complicated の beast with.dot.findme', '5.0'), ('something.myint', '3'), ('something.user', ('name=Jane Doe', '[email protected]'))]) assert_equal( sorted(cfg.items('something')), [('something.myint', '3'), ('something.user', ('name=Jane Doe', '[email protected]'))]) # always get all values assert_equal( cfg.get('something.user'), ('name=Jane Doe', '[email protected]')) assert_raises(KeyError, cfg.__getitem__, 'somedthing.user') assert_equal(cfg.getfloat('onemore.complicated の beast with.dot', 'findme'), 5.0) assert_equal(cfg.getint('something', 'myint'), 3) assert_equal(cfg.getbool('something', 'myint'), True) assert_equal(cfg.getbool('doesnot', 'exist', default=True), True) assert_raises(TypeError, cfg.getbool, 'something', 'user') # gitpython-style access assert_equal(cfg.get('something.myint'), cfg.get_value('something', 'myint')) assert_equal(cfg.get_value('doesnot', 'exist', default='oohaaa'), 'oohaaa') # weired, but that is how it is assert_raises(KeyError, cfg.get_value, 'doesnot', 'exist', default=None) # modification follows cfg.add('something.new', 'の') assert_equal(cfg.get('something.new'), 'の') # sections are added on demand cfg.add('unheard.of', 'fame') assert_true(cfg.has_section('unheard.of')) comp = cfg.items('something') cfg.rename_section('something', 'this') assert_true(cfg.has_section('this')) assert_false(cfg.has_section('something')) # direct comparision would fail, because of section prefix assert_equal(len(cfg.items('this')), len(comp)) # fail if no such section with swallow_logs(): assert_raises(CommandError, cfg.rename_section, 'nothere', 'irrelevant') assert_true(cfg.has_option('this', 'myint')) cfg.unset('this.myint') assert_false(cfg.has_option('this', 'myint')) # batch a changes cfg.add('mike.wants.to', 'know', reload=False) assert_false('mike.wants.to' in cfg) cfg.add('mike.wants.to', 'eat') assert_true('mike.wants.to' in cfg) assert_equal(len(cfg['mike.wants.to']), 2) # set a new one: cfg.set('mike.should.have', 'known') assert_in('mike.should.have', cfg) assert_equal(cfg['mike.should.have'], 'known') # set an existing one: cfg.set('mike.should.have', 'known better') assert_equal(cfg['mike.should.have'], 'known better') # set, while there are several matching ones already: cfg.add('mike.should.have', 'a meal') assert_equal(len(cfg['mike.should.have']), 2) # raises with force=False assert_raises(CommandError, cfg.set, 'mike.should.have', 'a beer', force=False) assert_equal(len(cfg['mike.should.have']), 2) # replaces all matching ones with force=True cfg.set('mike.should.have', 'a beer', force=True) assert_equal(cfg['mike.should.have'], 'a beer') # fails unknown location assert_raises(ValueError, cfg.add, 'somesuch', 'shit', where='umpalumpa') # very carefully test non-local config # so carefully that even in case of bad weather Yarik doesn't find some # lame datalad unittest sections in his precious ~/.gitconfig with patch.dict('os.environ', {'HOME': new_home, 'DATALAD_SNEAKY_ADDITION': 'ignore'}): global_gitconfig = opj(new_home, '.gitconfig') assert(not exists(global_gitconfig)) globalcfg = ConfigManager(dataset_only=False) assert_not_in('datalad.unittest.youcan', globalcfg) assert_in('datalad.sneaky.addition', globalcfg) cfg.add('datalad.unittest.youcan', 'removeme', where='global') assert(exists(global_gitconfig)) # it did not go into the dataset's config! assert_not_in('datalad.unittest.youcan', cfg) # does not monitor additions! globalcfg.reload(force=True) assert_in('datalad.unittest.youcan', globalcfg) with swallow_logs(): assert_raises( CommandError, globalcfg.unset, 'datalad.unittest.youcan', where='local') assert(globalcfg.has_section('datalad.unittest')) globalcfg.unset('datalad.unittest.youcan', where='global') # but after we unset the only value -- that section is no longer listed assert (not globalcfg.has_section('datalad.unittest')) assert_not_in('datalad.unittest.youcan', globalcfg) # although it does leaves empty section behind in the file ok_file_has_content(global_gitconfig, '[datalad "unittest"]', strip=True) # remove_section to clean it up entirely globalcfg.remove_section('datalad.unittest', where='global') ok_file_has_content(global_gitconfig, "") cfg = ConfigManager( Dataset(opj(path, 'ds')), dataset_only=True, overrides={'datalad.godgiven': True}) assert_equal(cfg.get('datalad.godgiven'), True) # setter has no effect cfg.set('datalad.godgiven', 'false') assert_equal(cfg.get('datalad.godgiven'), True)
def test_siblings(origin, repo_path, local_clone_path): sshurl = "ssh://push-remote.example.com" httpurl1 = "http://remote1.example.com/location" httpurl2 = "http://remote2.example.com/location" # insufficient arguments # we need a dataset to work at with chpwd(repo_path): # not yet there assert_raises(InsufficientArgumentsError, siblings, 'add', url=httpurl1) # prepare src source = install(repo_path, source=origin, recursive=True) # pollute config depvar = 'remote.test-remote.datalad-publish-depends' source.config.add(depvar, 'stupid', where='local') # cannot configure unknown remotes as dependencies res = siblings('configure', dataset=source, name="test-remote", url=httpurl1, publish_depends=['r1', 'r2'], on_failure='ignore', result_renderer=None) assert_status('error', res) eq_(res[0]['message'], ('unknown sibling(s) specified as publication dependency: %s', set(('r1', 'r2')))) # prior config was not changed by failed call above eq_(source.config.get(depvar, None), 'stupid') res = siblings('configure', dataset=source, name="test-remote", url=httpurl1, result_xfm='paths', result_renderer=None) eq_(res, [source.path]) assert_in("test-remote", source.repo.get_remotes()) eq_(httpurl1, source.repo.get_remote_url("test-remote")) # reconfiguring doesn't change anything siblings('configure', dataset=source, name="test-remote", url=httpurl1, result_renderer=None) assert_in("test-remote", source.repo.get_remotes()) eq_(httpurl1, source.repo.get_remote_url("test-remote")) # re-adding doesn't work res = siblings('add', dataset=source, name="test-remote", url=httpurl1, on_failure='ignore', result_renderer=None) assert_status('error', res) # only after removal res = siblings('remove', dataset=source, name="test-remote", result_renderer=None) assert_status('ok', res) assert_not_in("test-remote", source.repo.get_remotes()) res = siblings('add', dataset=source, name="test-remote", url=httpurl1, on_failure='ignore', result_renderer=None) assert_status('ok', res) # add to another remote automagically taking it from the url # and being in the dataset directory with chpwd(source.path): res = siblings('add', url=httpurl2, result_renderer=None) assert_result_count(res, 1, name="remote2.example.com", type='sibling') assert_in("remote2.example.com", source.repo.get_remotes()) # don't fail with conflicting url, when using force: res = siblings('configure', dataset=source, name="test-remote", url=httpurl1 + "/elsewhere", result_renderer=None) assert_status('ok', res) eq_(httpurl1 + "/elsewhere", source.repo.get_remote_url("test-remote")) # no longer a use case, I would need additional convincing that # this is anyhow useful other then triple checking other peoples # errors. for an actual check use 'query' # maybe it could be turned into a set of warnings when `configure` # alters an existing setting, but then why call configure, if you # want to keep the old values #with assert_raises(RuntimeError) as cm: # add_sibling(dataset=source, name="test-remote", # url=httpurl1 + "/elsewhere") #assert_in("""'test-remote' already exists with conflicting settings""", # str(cm.exception)) ## add a push url without force fails, since in a way the fetch url is the ## configured push url, too, in that case: #with assert_raises(RuntimeError) as cm: # add_sibling(dataset=source, name="test-remote", # url=httpurl1 + "/elsewhere", # pushurl=sshurl, force=False) #assert_in("""'test-remote' already exists with conflicting settings""", # str(cm.exception)) # add push url (force): res = siblings('configure', dataset=source, name="test-remote", url=httpurl1 + "/elsewhere", pushurl=sshurl, result_renderer=None) assert_status('ok', res) eq_(httpurl1 + "/elsewhere", source.repo.get_remote_url("test-remote")) eq_(sshurl, source.repo.get_remote_url("test-remote", push=True)) # recursively: for r in siblings( 'configure', dataset=source, name="test-remote", url=httpurl1 + "/%NAME", pushurl=sshurl + "/%NAME", recursive=True, # we need to disable annex queries, as it will try to access # the fake URL configured above get_annex_info=False): repo = GitRepo(r['path'], create=False) assert_in("test-remote", repo.get_remotes()) url = repo.get_remote_url("test-remote") pushurl = repo.get_remote_url("test-remote", push=True) ok_(url.startswith(httpurl1 + '/' + basename(source.path))) ok_(url.endswith(basename(repo.path))) ok_(pushurl.startswith(sshurl + '/' + basename(source.path))) ok_(pushurl.endswith(basename(repo.path))) eq_(url, r['url']) eq_(pushurl, r['pushurl']) # recursively without template: for r in siblings( 'configure', dataset=source, name="test-remote-2", url=httpurl1, pushurl=sshurl, recursive=True, # we need to disable annex queries, as it will try to access # the fake URL configured above get_annex_info=False, result_renderer=None): repo = GitRepo(r['path'], create=False) assert_in("test-remote-2", repo.get_remotes()) url = repo.get_remote_url("test-remote-2") pushurl = repo.get_remote_url("test-remote-2", push=True) ok_(url.startswith(httpurl1)) ok_(pushurl.startswith(sshurl)) # FIXME: next condition used to compare the *Repo objects instead of # there paths. Due to missing annex-init in # datalad/tests/utils.py:clone_url this might not be the same, since # `source` actually is an annex, but after flavor 'clone' in # `with_testrepos` and then `install` any trace of an annex might be # gone in v5 (branch 'master' only), while in direct mode it still is # considered an annex. `repo` is forced to be a `GitRepo`, so we might # compare two objects of different classes while they actually are # pointing to the same repository. # See github issue #1854 if repo.path != source.repo.path: ok_(url.endswith('/' + basename(repo.path))) ok_(pushurl.endswith(basename(repo.path))) eq_(url, r['url']) eq_(pushurl, r['pushurl']) # recursively without template and pushurl but full "hierarchy" # to a local clone for r in siblings( 'configure', dataset=source, name="test-remote-3", url=local_clone_path, recursive=True, # we need to disable annex queries, as it will try to access # the fake URL configured above get_annex_info=False, result_renderer=None): repo = GitRepo(r['path'], create=False) assert_in("test-remote-3", repo.get_remotes()) url = repo.get_remote_url("test-remote-3") pushurl = repo.get_remote_url("test-remote-3", push=True) eq_( normpath(url), normpath( opj(local_clone_path, relpath(str(r['path']), source.path)))) # https://github.com/datalad/datalad/issues/3951 ok_(not pushurl) # no pushurl should be defined # 5621: Users shouldn't pass identical names for remote & common data source assert_raises(ValueError, siblings, 'add', dataset=source, name='howdy', url=httpurl1, as_common_datasrc='howdy')
def test_create_raises(path, outside_path): ds = Dataset(path) # incompatible arguments (annex only): assert_raises(ValueError, ds.create, no_annex=True, description='some') assert_raises(ValueError, ds.create, no_annex=True, annex_opts=['some']) assert_raises(ValueError, ds.create, no_annex=True, annex_init_opts=['some']) with open(opj(path, "somefile.tst"), 'w') as f: f.write("some") # non-empty without `force`: assert_raises(ValueError, ds.create, force=False) # non-empty with `force`: ds.create(force=True) # create sub outside of super: assert_raises(ValueError, ds.create, outside_path) # create a sub: ds.create('sub') # fail when doing it again without `force`: assert_raises(ValueError, ds.create, 'sub')
def test_within_ds_file_search(path): try: import mutagen except ImportError: raise SkipTest ds = Dataset(path).create(force=True) # override default and search for datasets and files for this test for m in ('egrep', 'textblob', 'autofield'): ds.config.add('datalad.search.index-{}-documenttype'.format(m), 'all', where='dataset') ds.config.add('datalad.metadata.nativetype', 'audio', where='dataset') makedirs(opj(path, 'stim')) for src, dst in (('audio.mp3', opj('stim', 'stim1.mp3')), ): copy(opj(dirname(dirname(__file__)), 'tests', 'data', src), opj(path, dst)) ds.save() ok_file_under_git(path, opj('stim', 'stim1.mp3'), annexed=True) # If it is not under annex, below addition of metadata silently does # not do anything ds.repo.set_metadata(opj('stim', 'stim1.mp3'), init={'importance': 'very'}) ds.aggregate_metadata() assert_repo_status(ds.path) # basic sanity check on the metadata structure of the dataset dsmeta = ds.metadata('.', reporton='datasets')[0]['metadata'] for src in ('audio', ): # something for each one assert_in(src, dsmeta) # each src declares its own context assert_in('@context', dsmeta[src]) # we have a unique content metadata summary for each src assert_in(src, dsmeta['datalad_unique_content_properties']) # test default behavior with swallow_outputs() as cmo: ds.search(show_keys='name', mode='textblob') assert_in("""\ id meta parentds path type """, cmo.out) target_out = """\ annex.importance annex.key audio.bitrate audio.duration(s) audio.format audio.music-Genre audio.music-album audio.music-artist audio.music-channels audio.music-sample_rate audio.name audio.tracknumber datalad_core.id datalad_core.refcommit id parentds path type """ # test default behavior while limiting set of keys reported with swallow_outputs() as cmo: ds.search(['\.id', 'artist$'], show_keys='short') out_lines = [l for l in cmo.out.split(os.linesep) if l] # test that only the ones matching were returned assert_equal([l for l in out_lines if not l.startswith(' ')], ['audio.music-artist', 'datalad_core.id']) # more specific test which would also test formatting assert_equal( out_lines, [ 'audio.music-artist', ' in 1 datasets', " has 1 unique values: 'dlartist'", 'datalad_core.id', ' in 1 datasets', # we have them sorted " has 1 unique values: '%s'" % ds.id ]) with assert_raises(ValueError) as cme: ds.search('*wrong') assert_re_in( r"regular expression '\(\?i\)\*wrong' \(original: '\*wrong'\) is incorrect: ", str(cme.exception)) # check generated autofield index keys with swallow_outputs() as cmo: ds.search(mode='autofield', show_keys='name') # it is impossible to assess what is different from that dump assert_in(target_out, cmo.out) assert_result_count(ds.search('blablob#'), 0) # now check that we can discover things from the aggregated metadata for mode, query, hitpath, matched in ( ('egrep', ':mp3', opj('stim', 'stim1.mp3'), { 'audio.format': 'mp3' }), # same as above, leading : is stripped, in indicates "ALL FIELDS" ('egrep', 'mp3', opj('stim', 'stim1.mp3'), { 'audio.format': 'mp3' }), # same as above, but with AND condition # get both matches ('egrep', ['mp3', 'type:file'], opj('stim', 'stim1.mp3'), { 'type': 'file', 'audio.format': 'mp3' }), # case insensitive search ('egrep', 'mp3', opj('stim', 'stim1.mp3'), { 'audio.format': 'mp3' }), # field selection by expression ('egrep', 'audio\.+:mp3', opj('stim', 'stim1.mp3'), { 'audio.format': 'mp3' }), # random keyword query ('textblob', 'mp3', opj('stim', 'stim1.mp3'), { 'meta': 'mp3' }), # report which field matched with auto-field ('autofield', 'mp3', opj('stim', 'stim1.mp3'), { 'audio.format': 'mp3' }), # XXX next one is not supported by current text field analyser # decomposes the mime type in [mime, audio, mp3] # ('autofield', # "'mime:audio/mp3'", # opj('stim', 'stim1.mp3'), # 'audio.format', 'mime:audio/mp3'), # but this one works ('autofield', "'mime audio mp3'", opj('stim', 'stim1.mp3'), { 'audio.format': 'mp3' }), # TODO extend with more complex queries to test whoosh # query language configuration ): res = ds.search(query, mode=mode, full_record=True) assert_result_count( res, 1, type='file', path=opj(ds.path, hitpath), # each file must report the ID of the dataset it is from, critical for # discovering related content dsid=ds.id) # in egrep we currently do not search unique values # and the queries above aim at files assert_result_count(res, 1 if mode == 'egrep' else 2) if mode != 'egrep': assert_result_count(res, 1, type='dataset', path=ds.path, dsid=ds.id) # test the key and specific value of the match for matched_key, matched_val in matched.items(): assert_in(matched_key, res[-1]['query_matched']) assert_equal(res[-1]['query_matched'][matched_key], matched_val) # test a suggestion msg being logged if no hits and key is a bit off with swallow_logs(new_level=logging.INFO) as cml: res = ds.search('audio.formats:mp3 audio.bitsrate:1', mode='egrep') assert not res assert_in('Did you mean any of', cml.out) assert_in('audio.format', cml.out) assert_in('audio.bitrate', cml.out)
def test_rerun(path, nodspath): ds = Dataset(path).create() sub = ds.create('sub') probe_path = op.join(sub.path, 'sequence') # run inside the dataset with chpwd(path), \ swallow_outputs(): ds.run('echo x$(cat sub/sequence) > sub/sequence') # command ran once, all clean assert_repo_status(ds.path) eq_('x\n', open(probe_path).read()) # now, for a rerun we can be anywhere, PWD and all are recorded # moreover, rerun must figure out which bits to unlock, even in # subdatasets with chpwd(nodspath), \ swallow_outputs(): ds.rerun() assert_repo_status(ds.path) # ran twice now eq_('xx\n', open(probe_path).read()) # Rerunning from a subdataset skips the command. _, sub_info = get_run_info(ds, sub.repo.repo.head.commit.message) eq_(ds.id, sub_info["dsid"]) assert_result_count( sub.rerun(return_type="list", on_failure="ignore"), 1, status="impossible", action="run", rerun_action="skip") eq_('xx\n', open(probe_path).read()) # Rerun fails with a dirty repo. dirt = op.join(path, "dirt") with open(dirt, "w") as fh: fh.write("") assert_status('impossible', ds.rerun(on_failure="ignore")) remove(dirt) assert_repo_status(ds.path) # Make a non-run commit. with open(op.join(path, "nonrun-file"), "w") as f: f.write("foo") ds.save("nonrun-file") # Now rerun the buried command. ds.rerun(revision="HEAD~", message="rerun buried") eq_('xxx\n', open(probe_path).read()) # Also check that the messasge override worked. eq_(ds.repo.format_commit("%B").splitlines()[0], "[DATALAD RUNCMD] rerun buried") # Or a range of commits, skipping non-run commits. ds.rerun(since="HEAD~3") eq_('xxxxx\n', open(probe_path).read()) # Or --since= to run all reachable commits. ds.rerun(since="") eq_('xxxxxxxxxx\n', open(probe_path).read()) # We can get back a report of what would happen rather than actually # rerunning anything. report = ds.rerun(since="", report=True, return_type="list") # Nothing changed. eq_('xxxxxxxxxx\n', open(probe_path).read()) assert_result_count(report, 1, rerun_action="skip") report[-1]["commit"] == ds.repo.get_hexsha() # If a file is dropped, we remove it instead of unlocking it. ds.drop(probe_path, check=False) with swallow_outputs(): ds.rerun() eq_('x\n', open(probe_path).read()) # If the history to rerun has a merge commit, we abort. ds.repo.checkout("HEAD~3", options=["-b", "topic"]) with open(op.join(path, "topic-file"), "w") as f: f.write("topic") ds.save("topic-file") ds.repo.checkout("master") ds.repo.merge("topic") assert_repo_status(ds.path) assert_raises(IncompleteResultsError, ds.rerun)
def test_assert_git_annex_branch_published(path): repo_a = AnnexRepo(opj(path, "a"), create=True) repo_b = AnnexRepo(opj(path, "b"), create=True) with assert_raises(AssertionError): assert_git_annex_branch_published(repo_a, repo_b)
def test_search_outside1_noninteractive_ui(tdir): # we should raise an informative exception with chpwd(tdir): with assert_raises(NoDatasetFound) as cme: list(search("bu")) assert_in('run interactively', str(cme.exception))
def test_publish_plain_git(origin, src_path, dst_path): # TODO: Since it's mostly the same, melt with test_publish_simple # prepare src source = install(src_path, source=origin, recursive=True) # forget we cloned it by removing remote, which should lead to # setting tracking branch to target: source.repo.remove_remote(DEFAULT_REMOTE) # create plain git at target: target = GitRepo(dst_path, create=True) target.checkout("TMP", ["-b"]) source.repo.add_remote("target", dst_path) res = publish(dataset=source, to="target", result_xfm='datasets') eq_(res, [source]) assert_repo_status(source.repo, annex=None) assert_repo_status(target, annex=None) eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(source.repo.get_branch_commits_(DEFAULT_BRANCH))) # don't fail when doing it again res = publish(dataset=source, to="target") # and nothing is pushed assert_result_count(res, 1, status='notneeded') assert_repo_status(source.repo, annex=None) assert_repo_status(target, annex=None) eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(source.repo.get_branch_commits_(DEFAULT_BRANCH))) # some modification: with open(opj(src_path, 'test_mod_file'), "w") as f: f.write("Some additional stuff.") source.save(opj(src_path, 'test_mod_file'), to_git=True, message="Modified.") assert_repo_status(source.repo, annex=None) res = publish(dataset=source, to='target', result_xfm='datasets') eq_(res, [source]) assert_repo_status(dst_path, annex=None) eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(source.repo.get_branch_commits_(DEFAULT_BRANCH))) # amend and change commit msg in order to test for force push: source.repo.commit("amended", options=['--amend']) # push should be rejected (non-fast-forward): assert_raises(IncompleteResultsError, publish, dataset=source, to='target', result_xfm='datasets') # push with force=True works: res = publish(dataset=source, to='target', result_xfm='datasets', force=True) eq_(res, [source])
def test_runner_failure(dir_): runner = Runner() with assert_raises(CommandError) as cme: runner.run(py2cmd('import sys; sys.exit(53)')) eq_(53, cme.exception.code)
def test_repo_diff(path, norepo): ds = Dataset(path).create() assert_repo_status(ds.path) assert_raises(ValueError, ds.repo.diff, fr='WTF', to='MIKE') if ds.repo.is_managed_branch(): fr_base = DEFAULT_BRANCH to = DEFAULT_BRANCH else: fr_base = "HEAD" to = None # no diff eq_(ds.repo.diff(fr_base, to), {}) # bogus path makes no difference eq_(ds.repo.diff(fr_base, to, paths=['THIS']), {}) # let's introduce a known change create_tree(ds.path, {'new': 'empty'}) ds.save(to_git=True) assert_repo_status(ds.path) eq_( ds.repo.diff(fr=fr_base + '~1', to=fr_base), { ut.Path(ds.repo.pathobj / 'new'): { 'state': 'added', 'type': 'file', 'bytesize': 5, 'gitshasum': '7b4d68d70fcae134d5348f5e118f5e9c9d3f05f6' } }) # modify known file create_tree(ds.path, {'new': 'notempty'}) eq_( ds.repo.diff(fr='HEAD', to=None), { ut.Path(ds.repo.pathobj / 'new'): { 'state': 'modified', 'type': 'file', # the beast is modified, but no change in shasum -> not staged 'gitshasum': '7b4d68d70fcae134d5348f5e118f5e9c9d3f05f6', 'prev_gitshasum': '7b4d68d70fcae134d5348f5e118f5e9c9d3f05f6' } }) # per path query gives the same result eq_(ds.repo.diff(fr=fr_base, to=to), ds.repo.diff(fr=fr_base, to=to, paths=['new'])) # also given a directory as a constraint does the same eq_(ds.repo.diff(fr=fr_base, to=to), ds.repo.diff(fr=fr_base, to=to, paths=['.'])) # but if we give another path, it doesn't show up eq_(ds.repo.diff(fr=fr_base, to=to, paths=['other']), {}) # make clean ds.save() assert_repo_status(ds.path) # untracked stuff create_tree(ds.path, {'deep': {'down': 'untracked', 'down2': 'tobeadded'}}) # default is to report all files eq_( ds.repo.diff(fr='HEAD', to=None), { ut.Path(ds.repo.pathobj / 'deep' / 'down'): { 'state': 'untracked', 'type': 'file' }, ut.Path(ds.repo.pathobj / 'deep' / 'down2'): { 'state': 'untracked', 'type': 'file' } }) # but can be made more compact eq_( ds.repo.diff(fr='HEAD', to=None, untracked='normal'), { ut.Path(ds.repo.pathobj / 'deep'): { 'state': 'untracked', 'type': 'directory' } }) # again a unmatching path constrainted will give an empty report eq_(ds.repo.diff(fr='HEAD', to=None, paths=['other']), {}) # perfect match and anything underneath will do eq_( ds.repo.diff(fr='HEAD', to=None, paths=['deep']), { ut.Path(ds.repo.pathobj / 'deep' / 'down'): { 'state': 'untracked', 'type': 'file' }, ut.Path(ds.repo.pathobj / 'deep' / 'down2'): { 'state': 'untracked', 'type': 'file' } })
def test_failed_install(dspath): ds = create(dspath) assert_raises(IncompleteResultsError, ds.install, "sub", source="http://nonexistingreallyanything.somewhere/bla")
def _test_proxying_open(generate_load, verify_load, repo): annex = AnnexRepo(repo, create=True) fpath1 = opj(repo, "test") fpath2 = opj(repo, 'd1', 'd2', 'test2') # generate load fpath1 = generate_load(fpath1) or fpath1 os.makedirs(dirname(fpath2)) fpath2 = generate_load(fpath2) or fpath2 annex.add([fpath1, fpath2]) verify_load(fpath1) verify_load(fpath2) annex.commit("Added some files") # clone to another repo repo2 = repo + "_2" annex2 = AnnexRepo.clone(repo, repo2) # verify that can't access fpath1_2 = fpath1.replace(repo, repo2) fpath2_2 = fpath2.replace(repo, repo2) EXPECTED_EXCEPTIONS = (IOError, OSError) assert_raises(EXPECTED_EXCEPTIONS, verify_load, fpath1_2) with AutomagicIO(): # verify that it doesn't even try to get files which do not exist with patch('datalad.support.annexrepo.AnnexRepo.get') as gricm: # if we request absent file assert_raises(EXPECTED_EXCEPTIONS, open, fpath1_2+"_", 'r') # no get should be called assert_false(gricm.called) verify_load(fpath1_2) verify_load(fpath2_2) # and even if we drop it -- we still can get it no problem annex2.drop(fpath2_2) assert_false(annex2.file_has_content(fpath2_2)) verify_load(fpath2_2) assert_true(annex2.file_has_content(fpath2_2)) annex2.drop(fpath2_2) assert_false(annex2.file_has_content(fpath2_2)) assert_true(os.path.isfile(fpath2_2)) # In check_once mode, if we drop it, it wouldn't be considered again annex2.drop(fpath2_2) assert_false(annex2.file_has_content(fpath2_2)) with AutomagicIO(check_once=True): verify_load(fpath2_2) assert_true(annex2.file_has_content(fpath2_2)) annex2.drop(fpath2_2) assert_false(annex2.file_has_content(fpath2_2)) assert_false(os.path.isfile(fpath2_2)) # if we override stdout with something not supporting fileno, like tornado # does which ruins using get under IPython # TODO: we might need to refuse any online logging in other places like that annex2.drop(fpath2_2) class StringIOfileno(StringIO): def fileno(self): raise Exception("I have no clue how to do fileno") with patch('sys.stdout', new_callable=StringIOfileno), \ patch('sys.stderr', new_callable=StringIOfileno): with AutomagicIO(): assert_false(annex2.file_has_content(fpath2_2)) verify_load(fpath2_2) assert_true(annex2.file_has_content(fpath2_2))
def test_save(path): ds = Dataset(path) with open(opj(path, "new_file.tst"), "w") as f: f.write("something") ds.repo.add("new_file.tst", git=True) ok_(ds.repo.dirty) # no all_changes any longer with assert_raises(DeprecatedError): ds.save("add a new file", all_changes=True) ds.save("add a new file") ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo)) with open(opj(path, "new_file.tst"), "w") as f: f.write("modify") ok_(ds.repo.dirty) ds.save("modified new_file.tst") ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo)) # save works without ds and files given in the PWD with open(opj(path, "new_file.tst"), "w") as f: f.write("rapunzel") with chpwd(path): save("love rapunzel") ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo)) # and also without `-a` when things are staged with open(opj(path, "new_file.tst"), "w") as f: f.write("exotic") ds.repo.add("new_file.tst", git=True) with chpwd(path): save("love marsians") ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo)) files = ['one.txt', 'two.txt'] for fn in files: with open(opj(path, fn), "w") as f: f.write(fn) ds.add([opj(path, f) for f in files]) # superfluous call to save (add saved it already), should not fail # but report that nothing was saved assert_status('notneeded', ds.save("set of new files")) ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo)) # create subdataset subds = ds.create('subds') ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo)) # modify subds with open(opj(subds.path, "some_file.tst"), "w") as f: f.write("something") subds.add('.') ok_clean_git(subds.path, annex=isinstance(subds.repo, AnnexRepo)) # Note/TODO: ok_clean_git is failing in direct mode, due to staged but # uncommited .datalad (probably caused within create) ok_(ds.repo.dirty) # ensure modified subds is committed ds.save() ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo)) # now introduce a change downstairs subds.create('someotherds') ok_clean_git(subds.path, annex=isinstance(subds.repo, AnnexRepo)) ok_(ds.repo.dirty) # and save via subdataset path ds.save('subds') ok_clean_git(path, annex=isinstance(ds.repo, AnnexRepo))
def test_target_ssh_simple(origin, src_path, target_rootpath): # prepare src source = install(src_path, source=origin, result_xfm='datasets', return_type='item-or-list') target_path = opj(target_rootpath, "basic") with swallow_logs(new_level=logging.ERROR) as cml: create_sibling(dataset=source, name="local_target", sshurl="ssh://localhost:22", target_dir=target_path, ui=True) assert_not_in('enableremote local_target failed', cml.out) GitRepo(target_path, create=False) # raises if not a git repo assert_in("local_target", source.repo.get_remotes()) # Both must be annex or git repositories src_is_annex = AnnexRepo.is_valid_repo(src_path) eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path)) # And target one should be known to have a known UUID within the source if annex if src_is_annex: annex = AnnexRepo(src_path) local_target_cfg = annex.repo.remotes["local_target"].config_reader.get # basic config in place eq_(local_target_cfg('annex-ignore'), 'false') ok_(local_target_cfg('annex-uuid')) # do it again without force, but use a different name to avoid initial checks # for existing remotes: with assert_raises(RuntimeError) as cm: assert_create_sshwebserver(dataset=source, name="local_target_alt", sshurl="ssh://localhost", target_dir=target_path) ok_( text_type(cm.exception).startswith( "Target path %s already exists. And it fails to rmdir" % target_path)) if src_is_annex: target_description = AnnexRepo(target_path, create=False).get_description() assert_not_equal(target_description, None) assert_not_equal(target_description, target_path) # on yoh's laptop TMPDIR is under HOME, so things start to become # tricky since then target_path is shortened and we would need to know # remote $HOME. To not over-complicate and still test, test only for # the basename of the target_path ok_endswith(target_description, basename(target_path)) # now, with force and correct url, which is also used to determine # target_dir # Note: on windows absolute path is not url conform. But this way it's easy # to test, that ssh path is correctly used. if not on_windows: # add random file under target_path, to explicitly test existing=replace open(opj(target_path, 'random'), 'w').write('123') assert_create_sshwebserver( dataset=source, name="local_target", sshurl="ssh://localhost" + target_path, publish_by_default='master', existing='replace', ui=True, ) eq_("ssh://localhost" + urlquote(target_path), source.repo.get_remote_url("local_target")) ok_(source.repo.get_remote_url("local_target", push=True) is None) # ensure target tree actually replaced by source assert_false(exists(opj(target_path, 'random'))) if src_is_annex: annex = AnnexRepo(src_path) local_target_cfg = annex.repo.remotes[ "local_target"].config_reader.get eq_(local_target_cfg('annex-ignore'), 'false') eq_(local_target_cfg('annex-uuid').count('-'), 4) # valid uuid # should be added too, even if URL matches prior state eq_(local_target_cfg('push'), 'master') # again, by explicitly passing urls. Since we are on localhost, the # local path should work: cpkwargs = dict( dataset=source, name="local_target", sshurl="ssh://localhost", target_dir=target_path, target_url=target_path, target_pushurl="ssh://localhost" + target_path, ui=True, ) assert_create_sshwebserver(existing='replace', **cpkwargs) if src_is_annex: target_description = AnnexRepo(target_path, create=False).get_description() eq_(target_description, target_path) eq_(target_path, source.repo.get_remote_url("local_target")) eq_("ssh://localhost" + target_path, source.repo.get_remote_url("local_target", push=True)) assert_publish_with_ui(target_path) # now, push should work: publish(dataset=source, to="local_target") # and we should be able to 'reconfigure' def process_digests_mtimes(digests, mtimes): # it should have triggered a hook, which would have created log and metadata files check_metadata = False for part in 'logs', 'metadata': metafiles = [ k for k in digests if k.startswith(_path_('.git/datalad/%s/' % part)) ] # This is in effect ONLY if we have "compatible" datalad installed on remote # end. ATM we don't have easy way to guarantee that AFAIK (yoh), # so let's not check/enforce (TODO) # assert(len(metafiles) >= 1) # we might have 2 logs if timestamps do not collide ;) # Let's actually do it to some degree if part == 'logs': # always should have those: assert (len(metafiles) >= 1) with open(opj(target_path, metafiles[0])) as f: if 'no datalad found' not in f.read(): check_metadata = True if part == 'metadata': eq_(len(metafiles), bool(check_metadata)) for f in metafiles: digests.pop(f) mtimes.pop(f) # and just pop some leftovers from annex for f in list(digests): if f.startswith('.git/annex/mergedrefs'): digests.pop(f) mtimes.pop(f) orig_digests, orig_mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(orig_digests, orig_mtimes) import time time.sleep(0.1) # just so that mtimes change assert_create_sshwebserver(existing='reconfigure', **cpkwargs) digests, mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(digests, mtimes) assert_dict_equal(orig_digests, digests) # nothing should change in terms of content # but some files should have been modified modified_files = { k for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0) } # collect which files were expected to be modified without incurring any changes ok_modified_files = { _path_('.git/hooks/post-update'), 'index.html', # files which hook would manage to generate _path_('.git/info/refs'), '.git/objects/info/packs' } # on elderly git we don't change receive setting ok_modified_files.add(_path_('.git/config')) ok_modified_files.update( {f for f in digests if f.startswith(_path_('.git/datalad/web'))}) # it seems that with some recent git behavior has changed a bit # and index might get touched if _path_('.git/index') in modified_files: ok_modified_files.add(_path_('.git/index')) assert_set_equal(modified_files, ok_modified_files)
def test_unlock(path): ds = Dataset(path) # file is currently locked: # TODO: use get_annexed_files instead of hardcoded filename assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w") # in V6+ we can unlock even if the file's content isn't present: if ds.repo.supports_unlocked_pointers: res = ds.unlock() assert_result_count(res, 1) assert_status('ok', res) # TODO: RF: make 'lock' a command as well # re-lock to further on have a consistent situation with V5: ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock']) else: # cannot unlock without content (annex get wasn't called) assert_raises(CommandError, ds.unlock) # FIXME ds.repo.get('test-annex.dat') result = ds.unlock() assert_result_count(result, 1) assert_in_results(result, path=opj(ds.path, 'test-annex.dat'), status='ok') with open(opj(path, 'test-annex.dat'), "w") as f: f.write("change content") ds.repo.add('test-annex.dat') # in V6+ we need to explicitly re-lock it: if ds.repo.supports_unlocked_pointers: # TODO: RF: make 'lock' a command as well # re-lock to further on have a consistent situation with V5: ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock']) ds.repo.commit("edit 'test-annex.dat' via unlock and lock it again") # after commit, file is locked again: assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w") # content was changed: with open(opj(path, 'test-annex.dat'), "r") as f: eq_("change content", f.read()) # unlock again, this time more specific: result = ds.unlock(path='test-annex.dat') assert_result_count(result, 1) assert_in_results(result, path=opj(ds.path, 'test-annex.dat'), status='ok') with open(opj(path, 'test-annex.dat'), "w") as f: f.write("change content again") ds.repo.add('test-annex.dat') # in V6+ we need to explicitly re-lock it: if ds.repo.supports_unlocked_pointers: # TODO: RF: make 'lock' a command as well # re-lock to further on have a consistent situation with V5: ds.repo._git_custom_command('test-annex.dat', ['git', 'annex', 'lock']) ds.repo.commit("edit 'test-annex.dat' via unlock and lock it again") # TODO: # BOOOM: test-annex.dat writeable in V6! # Why the hell is this different than the first time we wrote to the file # and locked it again? # Also: After opening the file is empty. # after commit, file is locked again: assert_raises(IOError, open, opj(path, 'test-annex.dat'), "w") # content was changed: with open(opj(path, 'test-annex.dat'), "r") as f: eq_("change content again", f.read())
def test_formatter_no_mapping_arg(): fmt = au.Formatter({}) assert_raises(ValueError, fmt.format, "{0}", "not a mapping")
def test_run_assume_ready(path): ds = Dataset(path).create() repo = ds.repo adjusted = repo.is_managed_branch() # --assume-ready=inputs (repo.pathobj / "f1").write_text("f1") ds.save() def cat_cmd(fname): return [ sys.executable, "-c", "import sys; print(open(sys.argv[-1]).read())", fname ] assert_in_results(ds.run(cat_cmd("f1"), inputs=["f1"]), action="get", type="file") # Same thing, but without the get() call. assert_not_in_results(ds.run(cat_cmd("f1"), inputs=["f1"], assume_ready="inputs"), action="get", type="file") ds.drop("f1", check=False) if not adjusted: # If the input is not actually ready, the command will fail. with assert_raises(CommandError): ds.run(cat_cmd("f1"), inputs=["f1"], assume_ready="inputs") # --assume-ready=outputs def unlink_and_write_cmd(fname): # This command doesn't care whether the output file is unlocked because # it removes it ahead of time anyway. return [ sys.executable, "-c", "import sys; import os; import os.path as op; " "f = sys.argv[-1]; op.lexists(f) and os.unlink(f); " "open(f, mode='w').write(str(sys.argv))", fname ] (repo.pathobj / "f2").write_text("f2") ds.save() res = ds.run(unlink_and_write_cmd("f2"), outputs=["f2"]) if not adjusted: assert_in_results(res, action="unlock", type="file") # Same thing, but without the unlock() call. res = ds.run(unlink_and_write_cmd("f2"), outputs=["f2"], assume_ready="outputs") assert_not_in_results(res, action="unlock", type="file") # --assume-ready=both res = ds.run(unlink_and_write_cmd("f2"), outputs=["f2"], inputs=["f2"]) assert_in_results(res, action="get", type="file") if not adjusted: assert_in_results(res, action="unlock", type="file") res = ds.run(unlink_and_write_cmd("f2"), outputs=["f2"], inputs=["f2"], assume_ready="both") assert_not_in_results(res, action="get", type="file") assert_not_in_results(res, action="unlock", type="file")
def test_formatter_no_idx_map(): fmt = au.Formatter({}) assert_raises(KeyError, fmt.format, "{0}", {"col0": "value0"})
def ask(): assert_raises(ValueError, cfg.obtain, dummy, store=True)
def test_addurls_url_filename_fail(self, path): ds = Dataset(path).create(force=True) assert_raises(IncompleteResultsError, ds.addurls, self.json_file, "{url}/nofilename/", "{_url0}/{_url_filename}")
def ask(): assert_raises(ValueError, cfg.obtain, dummy)
def test_addurls(self, path): ds = Dataset(path).create(force=True) def get_annex_commit_counts(): return len(ds.repo.get_revisions("git-annex")) n_annex_commits = get_annex_commit_counts() # Meanwhile also test that we can specify path relative # to the top of the dataset, as we generally treat paths in # Python API, and it will be the one saved in commit # message record json_file = op.relpath(self.json_file, ds.path) ds.addurls(json_file, "{url}", "{name}", exclude_autometa="(md5sum|size)") ok_startswith(ds.repo.format_commit('%b', DEFAULT_BRANCH), f"url_file='{json_file}'") filenames = ["a", "b", "c"] for fname in filenames: ok_exists(op.join(ds.path, fname)) for (fname, meta), subdir in zip(ds.repo.get_metadata(filenames), ["foo", "bar", "foo"]): assert_dict_equal(meta, {"subdir": [subdir], "name": [fname]}) # Ignore this check if we're faking dates because that disables # batch mode. # Also ignore if on Windows as it seems as if a git-annex bug # leads to separate meta data commits: # https://github.com/datalad/datalad/pull/5202#discussion_r535429704 if not (dl_cfg.get('datalad.fake-dates') or on_windows): # We should have two new commits on the git-annex: one for the # added urls and one for the added metadata. eq_(n_annex_commits + 2, get_annex_commit_counts()) # Add to already existing links, overwriting. with swallow_logs(new_level=logging.DEBUG) as cml: ds.addurls(self.json_file, "{url}", "{name}", ifexists="overwrite") for fname in filenames: assert_in("Removing {}".format(os.path.join(path, fname)), cml.out) # Add to already existing links, skipping. assert_in_results(ds.addurls(self.json_file, "{url}", "{name}", ifexists="skip"), action="addurls", status="notneeded") # Add to already existing links works, as long content is the same. ds.addurls(self.json_file, "{url}", "{name}") # But it fails if something has changed. ds.unlock("a") with open(op.join(ds.path, "a"), "w") as ofh: ofh.write("changed") ds.save("a") assert_raises(IncompleteResultsError, ds.addurls, self.json_file, "{url}", "{name}")
def ask(): # fail on unkown dialog type assert_raises(ValueError, cfg.obtain, dummy, dialog_type='Rorschach_test')
def test_addurls_nonannex_repo(path): ds = Dataset(path).create(force=True, annex=False) with assert_raises(IncompleteResultsError) as raised: ds.addurls("dummy_arg0", "dummy_arg1", "dummy_arg2") assert_in("not an annex repo", str(raised.exception))
def test_container_from_subdataset(ds_path, src_subds_path, local_file): # prepare a to-be subdataset with a registered container src_subds = Dataset(src_subds_path).create() src_subds.containers_add(name="first", url=get_local_file_url( op.join(local_file, 'some_container.img'))) # add it as subdataset to a super ds: ds = Dataset(ds_path).create() subds = ds.install("sub", source=src_subds_path) # add it again one level down to see actual recursion: subds.install("subsub", source=src_subds_path) # We come up empty without recursive: res = ds.containers_list(recursive=False, **RAW_KWDS) assert_result_count(res, 0) # query available containers from within super: res = ds.containers_list(recursive=True, **RAW_KWDS) assert_result_count(res, 2) assert_in_results(res, action="containers", refds=ds.path) # default location within the subdataset: target_path = op.join(subds.path, '.datalad', 'environments', 'first', 'image') assert_result_count(res, 1, name='sub/first', type='file', action='containers', status='ok', path=target_path, parentds=subds.path) # not installed subdataset doesn't pose an issue: sub2 = ds.create("sub2") assert_result_count(ds.subdatasets(), 2, type="dataset") ds.uninstall("sub2") from datalad.tests.utils import assert_false assert_false(sub2.is_installed()) # same results as before, not crashing or somehow confused by a not present # subds: res = ds.containers_list(recursive=True, **RAW_KWDS) assert_result_count(res, 2) assert_result_count(res, 1, name='sub/first', type='file', action='containers', status='ok', path=target_path, parentds=subds.path) # The default renderer includes the image names. with swallow_outputs() as out: ds.containers_list(recursive=True) lines = out.out.splitlines() assert_re_in("sub/first", lines) assert_re_in("sub/subsub/first", lines) # But we are careful not to render partial names from subdataset traversals # (i.e. we recurse with containers_list(..., result_renderer=None)). with assert_raises(AssertionError): assert_re_in("subsub/first", lines)
def test_extract_wrong_input_type(): assert_raises(ValueError, au._read, None, "invalid_input_type")
def test_diff(path, norepo): with chpwd(norepo): assert_raises(NoDatasetFound, diff) ds = Dataset(path).create() assert_repo_status(ds.path) # reports stupid revision input assert_result_count(ds.diff(fr='WTF', on_failure='ignore', result_renderer=None), 1, status='impossible', message="Git reference 'WTF' invalid") # no diff assert_result_count(_dirty_results(ds.diff(result_renderer=None)), 0) assert_result_count( _dirty_results(ds.diff(fr='HEAD', result_renderer=None)), 0) # bogus path makes no difference assert_result_count( _dirty_results(ds.diff(path='THIS', fr='HEAD', result_renderer=None)), 0) # let's introduce a known change create_tree(ds.path, {'new': 'empty'}) ds.save(to_git=True) assert_repo_status(ds.path) if ds.repo.is_managed_branch(): fr_base = DEFAULT_BRANCH to = DEFAULT_BRANCH else: fr_base = "HEAD" to = None res = _dirty_results( ds.diff(fr=fr_base + '~1', to=to, result_renderer=None)) assert_result_count(res, 1) assert_result_count(res, 1, action='diff', path=op.join(ds.path, 'new'), state='added') # we can also find the diff without going through the dataset explicitly with chpwd(ds.path): assert_result_count(_dirty_results( diff(fr=fr_base + '~1', to=to, result_renderer=None)), 1, action='diff', path=op.join(ds.path, 'new'), state='added') # no diff against HEAD assert_result_count(_dirty_results(ds.diff(result_renderer=None)), 0) # modify known file create_tree(ds.path, {'new': 'notempty'}) res = _dirty_results(ds.diff(result_renderer=None)) assert_result_count(res, 1) assert_result_count(res, 1, action='diff', path=op.join(ds.path, 'new'), state='modified') # but if we give another path, it doesn't show up assert_result_count(ds.diff(path='otherpath', result_renderer=None), 0) # giving the right path must work though assert_result_count(ds.diff(path='new', result_renderer=None), 1, action='diff', path=op.join(ds.path, 'new'), state='modified') # stage changes ds.repo.add('.', git=True) # no change in diff, staged is not committed assert_result_count(_dirty_results(ds.diff(result_renderer=None)), 1) ds.save() assert_repo_status(ds.path) assert_result_count(_dirty_results(ds.diff(result_renderer=None)), 0) # untracked stuff create_tree(ds.path, {'deep': {'down': 'untracked', 'down2': 'tobeadded'}}) # a plain diff should report the untracked file # but not directly, because the parent dir is already unknown res = _dirty_results(ds.diff(result_renderer=None)) assert_result_count(res, 1) assert_result_count(res, 1, state='untracked', type='directory', path=op.join(ds.path, 'deep')) # report of individual files is also possible assert_result_count(ds.diff(untracked='all', result_renderer=None), 2, state='untracked', type='file') # an unmatching path will hide this result assert_result_count(ds.diff(path='somewhere', result_renderer=None), 0) # perfect match and anything underneath will do assert_result_count(ds.diff(path='deep', result_renderer=None), 1, state='untracked', path=op.join(ds.path, 'deep'), type='directory') assert_result_count(ds.diff(path='deep', result_renderer=None), 1, state='untracked', path=op.join(ds.path, 'deep')) ds.repo.add(op.join('deep', 'down2'), git=True) # now the remaining file is the only untracked one assert_result_count(ds.diff(result_renderer=None), 1, state='untracked', path=op.join(ds.path, 'deep', 'down'), type='file')