def test_addurls_url_on_collision_choose(self=None, path=None): ds = Dataset(path).create(force=True) data = deepcopy(self.data) for row in data: row["name"] = "a" with patch("sys.stdin", new=StringIO(json.dumps(data))): assert_in_results(ds.addurls("-", "{url}", "{name}", on_failure="ignore"), action="addurls", status="error") with patch("sys.stdin", new=StringIO(json.dumps(data))): assert_in_results(ds.addurls("-", "{url}", "{name}", on_collision="error-if-different", on_failure="ignore"), action="addurls", status="error") with patch("sys.stdin", new=StringIO(json.dumps(data))): ds.addurls("-", "{url}", "{name}-first", on_collision="take-first") ok_file_has_content(op.join(ds.path, "a-first"), "a content", strip=True) with patch("sys.stdin", new=StringIO(json.dumps(data))): ds.addurls("-", "{url}", "{name}-last", on_collision="take-last") ok_file_has_content(op.join(ds.path, "a-last"), "c content", strip=True)
def test_delete_not_crashing(path=None): # although in above test we just use/interact with Keyring without specifying # any custom one, there we do not change it so I guess it is ok. Here we want # a real keyring backend which we will alter from keyrings.alt.file import PlaintextKeyring kb = PlaintextKeyring() kb.filename = path keyring = Keyring(keyring_backend=kb) cred = UserPassword("test1", keyring=keyring) cred.set(user="******", password="******") ok_file_has_content(path, ".*test1.*", re_=True) # keyring backend saves where we expect # manually delete one component of the credential cred._keyring.delete(cred.name, next(iter(cred._FIELDS))) # now delete entire credential -- we must not crash cred.delete() try: ok_file_has_content(path, ".*test1.*", re_=True) # keyring backend saves where we expect raise AssertionError("keyring still has our key") except AssertionError: pass
def test_create_tree(path=None): content = u"мама мыла раму" create_tree( path, OrderedDict([ ('1', content), ( 'sd', OrderedDict([ # right away an obscure case where we have both 1 and 1.gz ('1', content * 2), ('1.gz', content * 3), ('1.xz', content * 4), ('1.lzma', content * 5), ])), ])) ok_file_has_content(op.join(path, '1'), content) ok_file_has_content(op.join(path, 'sd', '1'), content * 2) ok_file_has_content(op.join(path, 'sd', '1.gz'), content * 3, decompress=True) ok_file_has_content(op.join(path, 'sd', '1.xz'), content * 4, decompress=True) ok_file_has_content(op.join(path, 'sd', '1.lzma'), content * 5, decompress=True)
def check_compress_file(ext, annex, path=None, name=None): # we base the archive name on the filename, in order to also # be able to properly test compressors where the corresponding # archive format has no capability of storing a filename # (i.e. where the archive name itself determines the filename # of the decompressed file, like .xz) archive = op.join(name, _filename + ext) compress_files([_filename], archive, path=path) assert_true(op.exists(archive)) if annex: # It should work even when file is annexed and is a symlink to the # key from datalad.support.annexrepo import AnnexRepo repo = AnnexRepo(path, init=True) repo.add(_filename) repo.commit(files=[_filename], msg="commit") dir_extracted = name + "_extracted" try: decompress_file(archive, dir_extracted) except MissingExternalDependency as exc: raise SkipTest() from exc _filepath = op.join(dir_extracted, _filename) ok_file_has_content(_filepath, 'content')
def test_copy_file_recursion(srcdir=None, destdir=None): src_ds = Dataset(srcdir).create(force=True) src_ds.save() dest_ds = Dataset(destdir).create() copy_file([src_ds.pathobj / 'subdir', dest_ds.pathobj], recursive=True) # structure is mirrored ok_file_has_content(dest_ds.pathobj / 'subdir' / 'file1', '123') ok_file_has_content(dest_ds.pathobj / 'subdir' / 'file2', 'abc')
def test_newthings_coming_down(originpath=None, destpath=None): origin = GitRepo(originpath, create=True) create_tree(originpath, {'load.dat': 'heavy'}) Dataset(originpath).save('load.dat') ds = install(source=originpath, path=destpath, result_xfm='datasets', return_type='item-or-list') assert_is_instance(ds.repo, GitRepo) assert_in(DEFAULT_REMOTE, ds.repo.get_remotes()) # turn origin into an annex origin = AnnexRepo(originpath, create=True) # clone doesn't know yet assert_false(knows_annex(ds.path)) # but after an update it should # no merge, only one sibling, no parameters should be specific enough assert_result_count(ds.update(), 1, status='ok', type='dataset') assert (knows_annex(ds.path)) # no branches appeared eq_(ds.repo.get_branches(), [DEFAULT_BRANCH]) # now merge, and get an annex assert_result_count(ds.update(merge=True), 1, action='update', status='ok', type='dataset') assert_in('git-annex', ds.repo.get_branches()) assert_is_instance(ds.repo, AnnexRepo) # should be fully functional testfname = opj(ds.path, 'load.dat') assert_false(ds.repo.file_has_content(testfname)) ds.get('.') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') # check that a new tag comes down origin.tag('first!') assert_result_count(ds.update(), 1, status='ok', type='dataset') eq_(ds.repo.get_tags(output='name')[0], 'first!') # and now we destroy the remote annex origin.call_git(['config', '--remove-section', 'annex']) rmtree(opj(origin.path, '.git', 'annex'), chmod_files=True) origin.call_git(['branch', '-D', 'git-annex']) origin = GitRepo(originpath) assert_false(knows_annex(originpath)) # and update the local clone # for now this should simply not fail (see gh-793), later might be enhanced to a # graceful downgrade before_branches = ds.repo.get_branches() ok_(any("git-annex" in b for b in ds.repo.get_remote_branches())) assert_result_count(ds.update(), 1, status='ok', type='dataset') eq_(before_branches, ds.repo.get_branches()) # annex branch got pruned assert_false(any("git-annex" in b for b in ds.repo.get_remote_branches())) # check that a new tag comes down even if repo types mismatch origin.tag('second!') assert_result_count(ds.update(), 1, status='ok', type='dataset') eq_(ds.repo.get_tags(output='name')[-1], 'second!')
def test_runner(tempfile=None): runner = Runner() content = 'Testing real run' if on_windows else 'Testing äöü東 real run' cmd = 'echo %s > %s' % (content, tempfile) res = runner.run(cmd) # no capture of any kind, by default ok_(not res['stdout']) ok_(not res['stderr']) ok_file_has_content(tempfile, content, strip=True) os.unlink(tempfile)
def test_publish_target_url(src=None, desttop=None, desturl=None): # https://github.com/datalad/datalad/issues/1762 ds = Dataset(src).create(force=True) ds.save('1') ds.create_sibling('ssh://datalad-test:%s/subdir' % desttop, name='target', target_url=desturl + 'subdir/.git') results = ds.push(to='target') assert results ok_file_has_content(Path(desttop, 'subdir', '1'), '123')
def test_add_archive_single_file(repo_path=None): ds = Dataset(repo_path).create(force=True) with chpwd(repo_path): archives = glob('archives/*') ds.save(archives, message='Added archives') for archive in archives: archive_name = os.path.splitext(archive)[0] archive_content = os.path.basename(archive_name) ds.add_archive_content(archive) ok_file_has_content(archive_name, archive_content)
def test_install_dataladri(src=None, topurl=None, path=None): # make plain git repo ds_path = opj(src, 'ds') gr = GitRepo(ds_path, create=True) gr.add('test.txt') gr.commit('demo') Runner(cwd=gr.path).run(['git', 'update-server-info']) # now install it somewhere else with patch('datalad.consts.DATASETS_TOPURL', topurl), \ swallow_logs(): ds = install(path, source='///ds') eq_(ds.path, path) assert_repo_status(path, annex=False) ok_file_has_content(opj(path, 'test.txt'), 'some')
def test_spaces(path=None): """ Test whether args with spaces are correctly parsed. """ ds = Dataset(path).create(force=True) ds.run_procedure('cfg_yoda') # configure dataset to look for procedures in its code folder ds.config.add('datalad.locations.dataset-procedures', 'code', scope='branch') # 1. run procedure based on execution guessing by run_procedure: ds.run_procedure(spec=['datalad_test_proc', 'with spaces', 'unrelated']) # check whether file has name with spaces ok_file_has_content(op.join(ds.path, 'with spaces'), 'hello\n')
def test_update_git_smoke(src_path=None, dst_path=None): # Apparently was just failing on git repos for basic lack of coverage, hence this quick test ds = Dataset(src_path).create(annex=False) target = install(dst_path, source=src_path, result_xfm='datasets', return_type='item-or-list') create_tree(ds.path, {'file.dat': '123'}) ds.save('file.dat') assert_result_count(target.update(recursive=True, merge=True), 1, action='update', status='ok', type='dataset') ok_file_has_content(opj(target.path, 'file.dat'), '123')
def test_swallow_logs(logfile=None): lgr = logging.getLogger('datalad') with swallow_logs(new_level=9) as cm: eq_(cm.out, '') lgr.log(8, "very heavy debug") eq_(cm.out, '') # not even visible at level 9 lgr.log(9, "debug1") eq_(cm.out, '[Level 9] debug1\n') # not even visible at level 9 lgr.info("info") # not even visible at level 9 eq_(cm.out, '[Level 9] debug1\n[INFO] info\n') with swallow_logs(new_level=9, file_=logfile) as cm: eq_(cm.out, '') lgr.info("next info") from datalad.tests.utils_pytest import ok_file_has_content ok_file_has_content(logfile, "[INFO] next info", strip=True)
def test_copy_file_datalad_specialremote(workdir=None, webdir=None, weburl=None): workdir = Path(workdir) src_ds = Dataset(workdir / 'src').create() # enable datalad special remote src_ds.repo.init_remote(DATALAD_SPECIAL_REMOTE, [ 'encryption=none', 'type=external', 'externaltype={}'.format(DATALAD_SPECIAL_REMOTE), 'autoenable=true' ]) # put files into the dataset by URL src_ds.download_url('/'.join((weburl, 'webfile1')), path='myfile1.txt') src_ds.download_url('/'.join((weburl, 'webfile2')), path='myfile2.txt') # approx test that the file is known to a remote # that is not the web remote assert_in_results( src_ds.repo.whereis('myfile1.txt', output='full').values(), here=False, description='[{}]'.format(DATALAD_SPECIAL_REMOTE), ) # now a new dataset dest_ds = Dataset(workdir / 'dest').create() # no special remotes eq_(dest_ds.repo.get_special_remotes(), {}) # must call with a dataset to get change saved, in order for drop # below to work properly without getting in reckless mode dest_ds.copy_file([src_ds.pathobj / 'myfile1.txt', dest_ds.pathobj]) # we have an special remote in the destination dataset now assert_in_results( dest_ds.repo.get_special_remotes().values(), externaltype=DATALAD_SPECIAL_REMOTE, ) # and it works dest_ds.drop('myfile1.txt') dest_ds.repo.get('myfile1.txt', remote='datalad') ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', '123') # now replace file in dest with a different content at the same path # must call with a dataset to get change saved, in order for drop dest_ds.copy_file( [src_ds.pathobj / 'myfile2.txt', dest_ds.pathobj / 'myfile1.txt']) dest_ds.drop('myfile1.txt') dest_ds.repo.get('myfile1.txt', remote='datalad') # no gets the "same path" but yields different content ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', 'abc')
def check_dss(): # we added the remote and set all the for subds in subdss: eq_(subds.repo.get_preferred_content('wanted', remote), 'standard' if standardgroup else '') eq_(subds.repo.get_preferred_content('group', remote), standardgroup or '') for target_sub in target_subdss: ok_(target_sub.is_installed()) # it is there now eq_(target_sub.repo.config.get('core.sharedrepository'), '1') # and we have transferred the content if standardgroup and standardgroup == 'backup': # only then content should be copied ok_file_has_content(opj(target_sub.path, 'sub.dat'), 'lots of data') else: # otherwise nothing is copied by default assert_false(target_sub.repo.file_has_content('sub.dat'))
def test_copy_file_into_nonannex(workdir=None): workdir = Path(workdir) src_ds = Dataset(workdir / 'src').create() (src_ds.pathobj / 'present.txt').write_text('123') (src_ds.pathobj / 'gone.txt').write_text('abc') src_ds.save() src_ds.drop('gone.txt', reckless='kill') # destination has no annex dest_ds = Dataset(workdir / 'dest').create(annex=False) # no issue copying a file that has content copy_file([src_ds.pathobj / 'present.txt', dest_ds.pathobj]) ok_file_has_content(dest_ds.pathobj / 'present.txt', '123') # but cannot handle a dropped file, no chance to register # availability info in an annex assert_status( 'impossible', copy_file([src_ds.pathobj / 'gone.txt', dest_ds.pathobj], on_failure='ignore'))
def test_inputs_quotes_needed(path=None): ds = Dataset(path).create(force=True) ds.save() cmd = "import sys; open(sys.argv[-1], 'w').write('!'.join(sys.argv[1:]))" # The string form of a command works fine when the inputs/outputs have # spaces ... cmd_str = "{} -c \"{}\" {{inputs}} {{outputs[0]}}".format( sys.executable, cmd) ds.run(cmd_str, inputs=["*.t*"], outputs=["out0"], expand="inputs") expected = u"!".join( list(sorted([OBSCURE_FILENAME + u".t", "bar.txt", "foo blah.txt"])) + ["out0"]) with open(op.join(path, "out0")) as ifh: eq_(ensure_unicode(ifh.read()), expected) # ... but the list form of a command does not. (Don't test this failure # with the obscure file name because we'd need to know its composition to # predict the failure.) cmd_list = [sys.executable, "-c", cmd, "{inputs}", "{outputs[0]}"] ds.run(cmd_list, inputs=["*.txt"], outputs=["out0"]) ok_file_has_content(op.join(path, "out0"), "bar.txt foo!blah.txt!out0")
def test_downloader_download(urlpath=None, url=None, path=None): path = Path(path) downloader = SHubDownloader() downloader.api_url = url create_tree(urlpath, tree={ "data": "foo", "org": { "repo": '{{"name":"org/repo","image":"{}"}}'.format(url + "data") } }) target = str(path / "target") downloader.download("shub://org/repo", target) ok_file_has_content(target, "foo") other_target = str(path / "other-target") downloader.download("shub://org/repo", other_target)
def test_paths_with_forward_slashes(path=None): # access file with native absolute path spec print(path) ok_file_has_content(op.join(path, 'subdir', 'testfile'), 'testcontent') with chpwd(path): # native relative path spec ok_file_has_content(op.join('subdir', 'testfile'), 'testcontent') # posix relative path spec ok_file_has_content('subdir/testfile', 'testcontent') # abspath with forward slash path sep char ok_file_has_content( op.join(path, 'subdir', 'testfile').replace(op.sep, '/'), 'testcontent')
def test_configs(path=None): # set up dataset with registered procedure (c&p from test_basics): ds = Dataset(path).create(force=True) ds.run_procedure('cfg_yoda') # configure dataset to look for procedures in its code folder ds.config.add('datalad.locations.dataset-procedures', 'code', scope='branch') # 1. run procedure based on execution guessing by run_procedure: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'some_arg\n') # 2. now configure specific call format including usage of substitution config # for run: ds.config.add('datalad.procedures.datalad_test_proc.call-format', u'%s {script} {ds} {{mysub}} {args}' % quote_cmdlinearg(sys.executable), scope='branch') ds.config.add('datalad.run.substitutions.mysub', 'dataset-call-config', scope='branch') # TODO: Should we allow for --inputs/--outputs arguments for run_procedure # (to be passed into run)? ds.unlock("fromproc.txt") # run again: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'dataset-call-config\n') # 3. have a conflicting config at user-level, which should override the # config on dataset level: ds.config.add('datalad.procedures.datalad_test_proc.call-format', u'%s {script} {ds} local {args}' % quote_cmdlinearg(sys.executable), scope='local') ds.unlock("fromproc.txt") # run again: ds.run_procedure(spec=['datalad_test_proc', 'some_arg']) # look for traces ok_file_has_content(op.join(ds.path, 'fromproc.txt'), 'local\n') # 4. get configured help message: r = ds.run_procedure('datalad_test_proc', help_proc=True, on_failure='ignore') assert_true(len(r) == 1) assert_in_results(r, status="impossible") ds.config.add('datalad.procedures.datalad_test_proc.help', "This is a help message", scope='branch') r = ds.run_procedure('datalad_test_proc', help_proc=True) assert_true(len(r) == 1) assert_in_results(r, message="This is a help message", status='ok')
def test_copy_file(workdir=None, webdir=None, weburl=None): workdir = Path(workdir) webdir = Path(webdir) src_ds = Dataset(workdir / 'src').create() # put a file into the dataset by URL and drop it again src_ds.download_url('/'.join((weburl, 'webfile1')), path='myfile1.txt') src_ds.download_url('/'.join((weburl, 'webfile2')), path=opj('subdir', 'myfile2.txt')) ok_file_has_content(src_ds.pathobj / 'myfile1.txt', '123') # now create a fresh dataset dest_ds = Dataset(workdir / 'dest').create() if dest_ds.repo._check_version_kludges("fromkey-supports-unlocked") or \ not dest_ds.repo.is_managed_branch(): # unless we have a target ds on a cripples FS (where `annex fromkey` # doesn't work until after 8.20210428), we can even drop the file # content in the source repo src_ds.drop('myfile1.txt', reckless='kill') nok_(src_ds.repo.file_has_content('myfile1.txt')) # copy the file from the source dataset into it. # it must copy enough info to actually put datalad into the position # to obtain the file content from the original URL dest_ds.copy_file(src_ds.pathobj / 'myfile1.txt') dest_ds.get('myfile1.txt') ok_file_has_content(dest_ds.pathobj / 'myfile1.txt', '123') # purposefully pollute the employed tmp folder to check that we do not trip # over such a condition tmploc = dest_ds.pathobj / '.git' / 'tmp' / 'datalad-copy' / 'some' tmploc.parent.mkdir(parents=True) tmploc.touch() # copy again, but to different target file name # (source+dest pair now) dest_ds.copy_file( [src_ds.pathobj / 'myfile1.txt', dest_ds.pathobj / 'renamed.txt']) ok_file_has_content(dest_ds.pathobj / 'renamed.txt', '123') # copying more than one at once dest_ds.copy_file([ src_ds.pathobj / 'myfile1.txt', src_ds.pathobj / 'subdir' / 'myfile2.txt', dest_ds.pathobj ]) # copy directly from a non-dataset location dest_ds.copy_file(webdir / 'webfile1') # copy from annex dataset into gitrepo git_ds = Dataset(workdir / 'git').create(annex=False) git_ds.copy_file(src_ds.pathobj / 'subdir' / 'myfile2.txt')
def test_reobtain_data(originpath=None, destpath=None): origin = Dataset(originpath).create() ds = install(source=originpath, path=destpath, result_xfm='datasets', return_type='item-or-list') # no harm assert_result_count(ds.update(merge=True, reobtain_data=True), 1, action="update", status="ok") # content create_tree(origin.path, {'load.dat': 'heavy'}) origin.save(opj(origin.path, 'load.dat')) # update does not bring data automatically assert_result_count(ds.update(merge=True, reobtain_data=True), 1, action="update", status="ok") assert_in('load.dat', ds.repo.get_annexed_files()) assert_false(ds.repo.file_has_content('load.dat')) # now get data ds.get('load.dat') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') # new content at origin create_tree(origin.path, {'novel': 'but boring'}) origin.save() # update must not bring in data for new file result = ds.update(merge=True, reobtain_data=True) assert_in_results(result, action='get', status='notneeded') ok_file_has_content(opj(ds.path, 'load.dat'), 'heavy') assert_in('novel', ds.repo.get_annexed_files()) assert_false(ds.repo.file_has_content('novel')) # modify content at origin os.remove(opj(origin.path, 'load.dat')) create_tree(origin.path, {'load.dat': 'light'}) origin.save() # update must update file with existing data, but leave empty one alone res = ds.update(merge=True, reobtain_data=True) assert_result_count(res, 1, status='ok', type='dataset', action='update') assert_result_count(res, 1, status='ok', type='file', action='get') ok_file_has_content(opj(ds.path, 'load.dat'), 'light') assert_false(ds.repo.file_has_content('novel'))
def test_run_unexpanded_placeholders(path=None): ds = Dataset(path).create() cmd = [ sys.executable, "-c", "import sys; open(sys.argv[1], 'w').write(' '.join(sys.argv[2:]))" ] # It's weird, but for lack of better options, inputs and outputs that don't # have matches are available unexpanded. with assert_raises(IncompleteResultsError): ds.run(cmd + ["arg1", "{inputs}"], inputs=["foo*"], on_failure="continue") assert_repo_status(ds.path) ok_file_has_content(op.join(path, "arg1"), "foo*") ds.run(cmd + ["arg2", "{outputs}"], outputs=["bar*"]) assert_repo_status(ds.path) ok_file_has_content(op.join(path, "arg2"), "bar*") ds.run(cmd + ["arg3", "{outputs[1]}"], outputs=["foo*", "bar"]) ok_file_has_content(op.join(path, "arg3"), "bar")
def test_update_volatile_subds(originpath=None, otherpath=None, destpath=None): origin = Dataset(originpath).create() repo = origin.repo if repo.is_managed_branch() and repo.git_annex_version <= "8.20201129": # Fails before git-annex's fd161da2c (adjustTree: Consider submodule # deletions, 2021-01-06). raise SkipTest( "On adjusted branch, test requires fix in more recent git-annex") ds = install(source=originpath, path=destpath, result_xfm='datasets', return_type='item-or-list') # as a submodule sname = 'subm 1' osm1 = origin.create(sname) assert_result_count(ds.update(), 1, status='ok', type='dataset') # nothing without a merge, no inappropriate magic assert_not_in(sname, ds.subdatasets(result_xfm='relpaths')) assert_result_count(ds.update(merge=True), 1, action='update', status='ok', type='dataset') # and we should be able to do update with recursive invocation assert_result_count(ds.update(merge=True, recursive=True), 1, action='update', status='ok', type='dataset') # known, and placeholder exists assert_in(sname, ds.subdatasets(result_xfm='relpaths')) ok_(exists(opj(ds.path, sname))) # remove from origin origin.remove(sname, reckless='availability') assert_result_count(ds.update(merge=True), 1, action='update', status='ok', type='dataset') # gone locally, wasn't checked out assert_not_in(sname, ds.subdatasets(result_xfm='relpaths')) assert_false(exists(opj(ds.path, sname))) # re-introduce at origin osm1 = origin.create(sname) create_tree(osm1.path, {'load.dat': 'heavy'}) origin.save(opj(osm1.path, 'load.dat')) assert_result_count(ds.update(merge=True), 1, action='update', status='ok', type='dataset') # grab new content of uninstall subdataset, right away ds.get(opj(ds.path, sname, 'load.dat')) ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy') # modify ds and subds at origin create_tree(origin.path, {'mike': 'this', sname: {'probe': 'little'}}) origin.save(recursive=True) assert_repo_status(origin.path) # updates for both datasets should come down the pipe assert_result_count(ds.update(merge=True, recursive=True), 2, action='update', status='ok', type='dataset') assert_repo_status(ds.path) # now remove just-installed subdataset from origin again origin.remove(sname, reckless='kill') assert_not_in(sname, origin.subdatasets(result_xfm='relpaths')) assert_in(sname, ds.subdatasets(result_xfm='relpaths')) # merge should disconnect the installed subdataset, but leave the actual # ex-subdataset alone assert_result_count(ds.update(merge=True, recursive=True), 1, action='update', type='dataset') assert_not_in(sname, ds.subdatasets(result_xfm='relpaths')) ok_file_has_content(opj(ds.path, sname, 'load.dat'), 'heavy') ok_(Dataset(opj(ds.path, sname)).is_installed()) # now remove the now disconnected subdataset for further tests remove(dataset=op.join(ds.path, sname), reckless='kill') assert_repo_status(ds.path) # new separate subdataset, not within the origin dataset otherds = Dataset(otherpath).create() # install separate dataset as a submodule ds.install(source=otherds.path, path='other') create_tree(otherds.path, {'brand': 'new'}) otherds.save() assert_repo_status(otherds.path) # pull in changes res = ds.update(merge=True, recursive=True) assert_result_count(res, 2, status='ok', action='update', type='dataset') # the next is to check for #2858 assert_repo_status(ds.path)
def test_update_simple(origin=None, src_path=None, dst_path=None): ca = dict(result_renderer='disabled') # a remote dataset with a subdataset underneath origds = Dataset(origin).create(**ca) # naming is weird, but a legacy artifact _ = origds.create('subm 1', **ca) _ = origds.create('2', **ca) # prepare src source = install(src_path, source=origin, recursive=True) # forget we cloned it by removing remote, which should lead to # setting tracking branch to target: source.repo.remove_remote(DEFAULT_REMOTE) # also forget the declared absolute location of the submodules, and turn them # relative to this/a clone for sub in source.subdatasets(result_xfm=lambda x: x['gitmodule_name']): source.subdatasets(path=sub, set_property=[('url', './{}'.format(sub))]) # dataset without sibling will not need updates assert_status('notneeded', source.update()) # deprecation message doesn't ruin things assert_status('notneeded', source.update(fetch_all=True)) # but error if unknown sibling is given assert_status('impossible', source.update(sibling='funky', on_failure='ignore')) # get a clone to update later on: dest = install(dst_path, source=src_path, recursive=True) # test setup done; # assert all fine assert_repo_status(dst_path) assert_repo_status(src_path) # update yields nothing => up-to-date assert_status('ok', dest.update()) assert_repo_status(dst_path) # modify remote: with open(opj(src_path, "update.txt"), "w") as f: f.write("Additional content") source.save(path="update.txt", message="Added update.txt") assert_repo_status(src_path) # update without `merge` only fetches: assert_status('ok', dest.update()) # modification is not known to active branch: assert_not_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch())) # modification is known to branch <default remote>/<default branch> assert_in("update.txt", dest.repo.get_files(DEFAULT_REMOTE + "/" + DEFAULT_BRANCH)) # merge: assert_status('ok', dest.update(merge=True)) # modification is now known to active branch: assert_in("update.txt", dest.repo.get_files(dest.repo.get_active_branch())) # it's known to annex, but has no content yet: annexprops = dest.repo.get_file_annexinfo("update.txt", eval_availability=True) annexprops['key'] # blows if unknown eq_(False, annexprops['has_content']) # check subdataset path constraints, baseline (parent + 2 subds) assert_result_count(dest.update(recursive=True), 3, status='ok', type='dataset') # no recursion and invalid path still updates the parent res = dest.update(path='whatever') assert_result_count(res, 1, status='ok', type='dataset') assert_result_count(res, 1, status='ok', path=dest.path) # invalid path with recursion also does res = dest.update(recursive=True, path='whatever') assert_result_count(res, 1, status='ok', type='dataset') assert_result_count(res, 1, status='ok', path=dest.path) # valid path and no recursion only updates the parent res = dest.update(path='subm 1') assert_result_count(res, 1, status='ok', type='dataset') assert_result_count(res, 1, status='ok', path=dest.path) # valid path and recursion updates matching res = dest.update(recursive=True, path='subm 1') assert_result_count(res, 2, status='ok', type='dataset') assert_result_count(res, 1, status='ok', path=dest.path) assert_result_count(res, 1, status='ok', path=str(dest.pathobj / 'subm 1')) # additional invalid path doesn't hurt res = dest.update(recursive=True, path=['subm 1', 'mike']) assert_result_count(res, 2, status='ok', type='dataset') # full match res = dest.update(recursive=True, path=['subm 1', '2']) assert_result_count(res, 3, status='ok', type='dataset') # test that update doesn't crash if we specify only a single path (submod) to # operate on with chpwd(dest.path): # in 0.11.x it would be a single result since "pwd" dataset is not # considered, and would be relative path (as specified). # In 0.12.0 - it would include implicit pwd dataset, and paths would be absolute res_update = update(path=['subm 1'], recursive=True) assert_result_count(res_update, 2) for p in dest.path, str(dest.pathobj / 'subm 1'): assert_in_results(res_update, path=p, action='update', status='ok', type='dataset') # and with merge we would also try to save (but there would be no changes) res_merge = update(path=['subm 1'], recursive=True, merge=True) assert_result_count(res_merge, 2, action='update') # 2 of "updates" really. assert_in_results(res_merge, action='update', status='ok', type='dataset') assert_in_results(res_merge, action='save', status='notneeded', type='dataset') # smoke-test if recursive update doesn't fail if submodule is removed # and that we can run it from within a dataset without providing it # explicitly assert_result_count(dest.remove('subm 1'), 1, status='ok', action='remove', path=opj(dest.path, 'subm 1')) with chpwd(dest.path): assert_result_count(update(recursive=True), 2, status='ok', type='dataset') assert_result_count(dest.update(merge=True, recursive=True), 2, action='update', status='ok', type='dataset') # and now test recursive update with merging in differences create_tree(opj(source.path, '2'), {'load.dat': 'heavy'}) source.save(opj('2', 'load.dat'), message="saving changes within subm2", recursive=True) assert_result_count(dest.update(merge=True, recursive=True), 2, action='update', status='ok', type='dataset') # and now we can get new file dest.get(opj('2', 'load.dat')) ok_file_has_content(opj(dest.path, '2', 'load.dat'), 'heavy')
def check_push(annex, src_path, dst_path): # prepare src src = Dataset(src_path).create(annex=annex) src_repo = src.repo # push should not add branches to the local dataset orig_branches = src_repo.get_branches() assert_not_in('synced/' + DEFAULT_BRANCH, orig_branches) res = src.push(on_failure='ignore') assert_result_count(res, 1) assert_in_results( res, status='impossible', message='No push target given, and none could be auto-detected, ' 'please specify via --to') eq_(orig_branches, src_repo.get_branches()) # target sibling target = mk_push_target(src, 'target', dst_path, annex=annex) eq_(orig_branches, src_repo.get_branches()) res = src.push(to="target") eq_(orig_branches, src_repo.get_branches()) assert_result_count(res, 2 if annex else 1) assert_in_results(res, action='publish', status='ok', target='target', refspec=DEFAULT_REFSPEC, operations=['new-branch']) assert_repo_status(src_repo, annex=annex) eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(src_repo.get_branch_commits_(DEFAULT_BRANCH))) # configure a default merge/upstream target src.config.set('branch.{}.remote'.format(DEFAULT_BRANCH), 'target', scope='local') src.config.set('branch.{}.merge'.format(DEFAULT_BRANCH), DEFAULT_BRANCH, scope='local') # don't fail when doing it again, no explicit target specification # needed anymore res = src.push() eq_(orig_branches, src_repo.get_branches()) # and nothing is pushed assert_status('notneeded', res) assert_repo_status(src_repo, annex=annex) eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(src_repo.get_branch_commits_(DEFAULT_BRANCH))) # some modification: (src.pathobj / 'test_mod_file').write_text("Some additional stuff.") src.save(to_git=True, message="Modified.") (src.pathobj / 'test_mod_annex_file').write_text("Heavy stuff.") src.save(to_git=not annex, message="Modified again.") assert_repo_status(src_repo, annex=annex) # we could say since='HEAD~2' to make things fast, or we are lazy # and say since='^' to indicate the state of the tracking remote # which is the same, because we made to commits since the last push. res = src.push(to='target', since="^", jobs=2) assert_in_results( res, action='publish', status='ok', target='target', refspec=DEFAULT_REFSPEC, # we get to see what happened operations=['fast-forward']) if annex: # we got to see the copy result for the annexed files assert_in_results(res, action='copy', status='ok', path=str(src.pathobj / 'test_mod_annex_file')) # we published, so we can drop and reobtain ok_(src_repo.file_has_content('test_mod_annex_file')) src_repo.drop('test_mod_annex_file') ok_(not src_repo.file_has_content('test_mod_annex_file')) src_repo.get('test_mod_annex_file') ok_(src_repo.file_has_content('test_mod_annex_file')) ok_file_has_content(src_repo.pathobj / 'test_mod_annex_file', 'Heavy stuff.') eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(src_repo.get_branch_commits_(DEFAULT_BRANCH))) if not (annex and src_repo.is_managed_branch()): # the following doesn't make sense in managed branches, because # a commit that could be amended is no longer the last commit # of a branch after a sync has happened (which did happen # during the last push above # amend and change commit msg in order to test for force push: src_repo.commit("amended", options=['--amend']) # push should be rejected (non-fast-forward): res = src.push(to='target', since='HEAD~2', on_failure='ignore') # fails before even touching the annex branch assert_in_results(res, action='publish', status='error', target='target', refspec=DEFAULT_REFSPEC, operations=['rejected', 'error']) # push with force=True works: res = src.push(to='target', since='HEAD~2', force='gitpush') assert_in_results(res, action='publish', status='ok', target='target', refspec=DEFAULT_REFSPEC, operations=['forced-update']) eq_(list(target.get_branch_commits_(DEFAULT_BRANCH)), list(src_repo.get_branch_commits_(DEFAULT_BRANCH))) # we do not have more branches than we had in the beginning # in particular no 'synced/<default branch>' eq_(orig_branches, src_repo.get_branches())
def test_with_tempfile_content(f=None): ok_file_has_content(f, "testtest") ok_file_has_content(f, "test*", re_=True)
def test_push_recursive(origin_path=None, src_path=None, dst_top=None, dst_sub=None, dst_subnoannex=None, dst_subsub=None): # dataset with two submodules and one subsubmodule origin = Dataset(origin_path).create() origin_subm1 = origin.create('sub m') origin_subm1.create('subsub m') origin.create('subm noannex', annex=False) origin.save() assert_repo_status(origin.path) # prepare src as a fresh clone with all subdatasets checkout out recursively # running on a clone should make the test scenario more different than # test_push(), even for the pieces that should be identical top = Clone.__call__(source=origin.path, path=src_path) subs = top.get('.', recursive=True, get_data=False, result_xfm='datasets') # order for '.' should not be relied upon, so sort by path sub, subsub, subnoannex = sorted(subs, key=lambda ds: ds.path) target_top = mk_push_target(top, 'target', dst_top, annex=True) # subdatasets have no remote yet, so recursive publishing should fail: res = top.push(to="target", recursive=True, on_failure='ignore') check_datasets_order(res) assert_in_results(res, path=top.path, type='dataset', refspec=DEFAULT_REFSPEC, operations=['new-branch'], action='publish', status='ok', target='target') for d in (sub, subsub, subnoannex): assert_in_results(res, status='error', type='dataset', path=d.path, message=("Unknown target sibling '%s'.", 'target')) # now fix that and set up targets for the submodules target_sub = mk_push_target(sub, 'target', dst_sub, annex=True) target_subnoannex = mk_push_target(subnoannex, 'target', dst_subnoannex, annex=False) target_subsub = mk_push_target(subsub, 'target', dst_subsub, annex=True) # and same push call as above res = top.push(to="target", recursive=True) check_datasets_order(res) # topds skipped assert_in_results(res, path=top.path, type='dataset', action='publish', status='notneeded', target='target') # the rest pushed for d in (sub, subsub, subnoannex): assert_in_results(res, status='ok', type='dataset', path=d.path, refspec=DEFAULT_REFSPEC) # all corresponding branches match across all datasets for s, d in zip( (top, sub, subnoannex, subsub), (target_top, target_sub, target_subnoannex, target_subsub)): eq_(list(s.repo.get_branch_commits_(DEFAULT_BRANCH)), list(d.get_branch_commits_(DEFAULT_BRANCH))) if s != subnoannex: eq_(list(s.repo.get_branch_commits_("git-annex")), list(d.get_branch_commits_("git-annex"))) # rerun should not result in further pushes of the default branch res = top.push(to="target", recursive=True) check_datasets_order(res) assert_not_in_results(res, status='ok', refspec=DEFAULT_REFSPEC) assert_in_results(res, status='notneeded', refspec=DEFAULT_REFSPEC) # now annex a file in subsub test_copy_file = subsub.pathobj / 'test_mod_annex_file' test_copy_file.write_text("Heavy stuff.") # save all the way up assert_status(('ok', 'notneeded'), top.save(message='subsub got something', recursive=True)) assert_repo_status(top.path) # publish straight up, should be smart by default res = top.push(to="target", recursive=True) check_datasets_order(res) # we see 3 out of 4 datasets pushed (sub noannex was left unchanged) for d in (top, sub, subsub): assert_in_results(res, status='ok', type='dataset', path=d.path, refspec=DEFAULT_REFSPEC) # file content copied too assert_in_results(res, action='copy', status='ok', path=str(test_copy_file)) # verify it is accessible, drop and bring back assert_status('ok', top.drop(str(test_copy_file))) ok_(not subsub.repo.file_has_content('test_mod_annex_file')) top.get(test_copy_file) ok_file_has_content(test_copy_file, 'Heavy stuff.') # make two modification (sub.pathobj / 'test_mod_annex_file').write_text('annex') (subnoannex.pathobj / 'test_mod_file').write_text('git') # save separately top.save(sub.pathobj, message='annexadd', recursive=True) top.save(subnoannex.pathobj, message='gitadd', recursive=True) # now only publish the latter one res = top.push(to="target", since=DEFAULT_BRANCH + '~1', recursive=True) # nothing copied, no reports on the other modification assert_not_in_results(res, action='copy') assert_not_in_results(res, path=sub.path) for d in (top, subnoannex): assert_in_results(res, status='ok', type='dataset', path=d.path, refspec=DEFAULT_REFSPEC) # an unconditional push should now pick up the remaining changes res = top.push(to="target", recursive=True) assert_in_results(res, action='copy', status='ok', path=str(sub.pathobj / 'test_mod_annex_file')) assert_in_results(res, status='ok', type='dataset', path=sub.path, refspec=DEFAULT_REFSPEC) for d in (top, subnoannex, subsub): assert_in_results(res, status='notneeded', type='dataset', path=d.path, refspec=DEFAULT_REFSPEC) # if noannex target gets some annex, we still should not fail to push target_subnoannex.call_git(['annex', 'init']) # just to ensure that we do need something to push (subnoannex.pathobj / "newfile").write_text("content") subnoannex.save() res = subnoannex.push(to="target") assert_in_results(res, status='ok', type='dataset')
def test_something(path=None, new_home=None): # will refuse to work on dataset without a dataset assert_raises(ValueError, ConfigManager, source='branch') # now read the example config cfg = ConfigManager(GitRepo(opj(path, 'ds'), create=True), source='branch') assert_equal(len(cfg), 5) assert_in('something.user', cfg) # multi-value assert_equal(len(cfg['something.user']), 2) assert_equal(cfg['something.user'], ('name=Jane Doe', '[email protected]')) assert_true(cfg.has_section('something')) assert_false(cfg.has_section('somethingelse')) assert_equal(sorted(cfg.sections()), [u'onemore.complicated の beast with.dot', 'something']) assert_true(cfg.has_option('something', 'user')) assert_false(cfg.has_option('something', 'us?er')) assert_false(cfg.has_option('some?thing', 'user')) assert_equal(sorted(cfg.options('something')), ['empty', 'myint', 'novalue', 'user']) assert_equal(cfg.options(u'onemore.complicated の beast with.dot'), ['findme']) assert_equal(sorted(cfg.items()), [(u'onemore.complicated の beast with.dot.findme', '5.0'), ('something.empty', ''), ('something.myint', '3'), ('something.novalue', None), ('something.user', ('name=Jane Doe', '[email protected]'))]) assert_equal(sorted(cfg.items('something')), [('something.empty', ''), ('something.myint', '3'), ('something.novalue', None), ('something.user', ('name=Jane Doe', '[email protected]'))]) # by default get last value only assert_equal(cfg.get('something.user'), '[email protected]') # but can get all values assert_equal(cfg.get('something.user', get_all=True), ('name=Jane Doe', '[email protected]')) assert_raises(KeyError, cfg.__getitem__, 'somedthing.user') assert_equal( cfg.getfloat(u'onemore.complicated の beast with.dot', 'findme'), 5.0) assert_equal(cfg.getint('something', 'myint'), 3) assert_equal(cfg.getbool('something', 'myint'), True) # git demands a key without value at all to be used as a flag, thus True assert_equal(cfg.getbool('something', 'novalue'), True) assert_equal(cfg.get('something.novalue'), None) # empty value is False assert_equal(cfg.getbool('something', 'empty'), False) assert_equal(cfg.get('something.empty'), '') assert_equal(cfg.getbool('doesnot', 'exist', default=True), True) assert_raises(TypeError, cfg.getbool, 'something', 'user') # gitpython-style access assert_equal(cfg.get('something.myint'), cfg.get_value('something', 'myint')) assert_equal(cfg.get_value('doesnot', 'exist', default='oohaaa'), 'oohaaa') # weird, but that is how it is assert_raises(KeyError, cfg.get_value, 'doesnot', 'exist', default=None) # modification follows cfg.add('something.new', 'の') assert_equal(cfg.get('something.new'), u'の') # sections are added on demand cfg.add('unheard.of', 'fame') assert_true(cfg.has_section('unheard.of')) comp = cfg.items('something') cfg.rename_section('something', 'this') assert_true(cfg.has_section('this')) assert_false(cfg.has_section('something')) # direct comparison would fail, because of section prefix assert_equal(len(cfg.items('this')), len(comp)) # fail if no such section with swallow_logs(): assert_raises(CommandError, cfg.rename_section, 'nothere', 'irrelevant') assert_true(cfg.has_option('this', 'myint')) cfg.unset('this.myint') assert_false(cfg.has_option('this', 'myint')) # batch a changes cfg.add('mike.wants.to', 'know', reload=False) assert_false('mike.wants.to' in cfg) cfg.add('mike.wants.to', 'eat') assert_true('mike.wants.to' in cfg) assert_equal(len(cfg['mike.wants.to']), 2) # set a new one: cfg.set('mike.should.have', 'known') assert_in('mike.should.have', cfg) assert_equal(cfg['mike.should.have'], 'known') # set an existing one: cfg.set('mike.should.have', 'known better') assert_equal(cfg['mike.should.have'], 'known better') # set, while there are several matching ones already: cfg.add('mike.should.have', 'a meal') assert_equal(len(cfg['mike.should.have']), 2) # raises with force=False assert_raises(CommandError, cfg.set, 'mike.should.have', 'a beer', force=False) assert_equal(len(cfg['mike.should.have']), 2) # replaces all matching ones with force=True cfg.set('mike.should.have', 'a beer', force=True) assert_equal(cfg['mike.should.have'], 'a beer') # test deprecated 'where' interface and old 'dataset' (not 'branch') value # TODO: remove along with the removal of deprecated 'where' cfg.set('mike.should.have', 'wasknown', where='dataset') assert_equal(cfg['mike.should.have'], 'wasknown') assert_equal(cfg.get_from_source('dataset', 'mike.should.have'), 'wasknown') # fails unknown location assert_raises(ValueError, cfg.add, 'somesuch', 'shit', scope='umpalumpa') # very carefully test non-local config # so carefully that even in case of bad weather Yarik doesn't find some # lame datalad unittest sections in his precious ~/.gitconfig # Note: An easier way to test this, would be to just set GIT_CONFIG_GLOBAL # to point somewhere else. However, this is not supported by git before # 2.32. Hence, stick with changed HOME in this test, but be sure to unset a # possible GIT_CONFIG_GLOBAL in addition. patched_env = os.environ.copy() patched_env.pop('GIT_CONFIG_GLOBAL', None) patched_env.update(get_home_envvars(new_home)) with patch.dict('os.environ', dict(patched_env, DATALAD_SNEAKY_ADDITION='ignore'), clear=True): global_gitconfig = opj(new_home, '.gitconfig') assert (not exists(global_gitconfig)) globalcfg = ConfigManager() assert_not_in('datalad.unittest.youcan', globalcfg) assert_in('datalad.sneaky.addition', globalcfg) cfg.add('datalad.unittest.youcan', 'removeme', scope='global') assert (exists(global_gitconfig)) # it did not go into the dataset's config! assert_not_in('datalad.unittest.youcan', cfg) # does not monitor additions! globalcfg.reload(force=True) assert_in('datalad.unittest.youcan', globalcfg) with swallow_logs(): assert_raises(CommandError, globalcfg.unset, 'datalad.unittest.youcan', scope='local') assert (globalcfg.has_section('datalad.unittest')) globalcfg.unset('datalad.unittest.youcan', scope='global') # but after we unset the only value -- that section is no longer listed assert (not globalcfg.has_section('datalad.unittest')) assert_not_in('datalad.unittest.youcan', globalcfg) ok_file_has_content(global_gitconfig, "") cfg = ConfigManager(Dataset(opj(path, 'ds')), source='branch', overrides={'datalad.godgiven': True}) assert_equal(cfg.get('datalad.godgiven'), True) # setter has no effect cfg.set('datalad.godgiven', 'false') assert_equal(cfg.get('datalad.godgiven'), True)
def _test_expiring_token(outdir): url = "s3://datalad-test0-versioned/1version-removed-recreated.txt" outpath = op.join(outdir, "output") providers = get_test_providers(url, reload=True) downloader = providers.get_provider(url).get_downloader(url) from time import ( sleep, time, ) from datalad.downloaders.credentials import ( AWS_S3, CompositeCredential, UserPassword, ) from datalad.support.keyring_ import MemoryKeyring from datalad.tests.utils_pytest import ok_file_has_content credential = downloader.credential # AWS_S3('datalad-test-s3') # We will replace credential with a CompositeCredential which will # mint new token after expiration # crap -- duration must be no shorter than 900, i.e. 15 minutes -- # too long to wait for a test! duration = 900 generated = [] def _gen_session_token(_, key_id=None, secret_id=None): from boto.sts.connection import STSConnection sts = STSConnection(aws_access_key_id=key_id, aws_secret_access_key=secret_id) # Note: without force_new=True it will not re-request a token and would # just return old one if not expired yet. Testing below might fail # if not entirely new token = sts.get_session_token(duration=duration, force_new=True) generated.append(token) return dict(key_id=token.access_key, secret_id=token.secret_key, session=token.session_token, expiration=token.expiration) class CustomS3(CompositeCredential): _CREDENTIAL_CLASSES = (UserPassword, AWS_S3) _CREDENTIAL_ADAPTERS = (_gen_session_token, ) keyring = MemoryKeyring() downloader.credential = new_credential = CustomS3("testexpire", keyring=keyring) # but reuse our existing credential for the first part: downloader.credential._credentials[0] = credential # now downloader must use the token generator assert not generated # since we have not called it yet # do it twice so we reuse session and test that we do not # re-mint a new token t0 = time() # not exactly when we generated, might be a bit racy? for i in range(2): downloader.download(url, outpath) ok_file_has_content(outpath, "version1") os.unlink(outpath) # but we should have asked for a new token only once assert len(generated) == 1 assert downloader.credential is new_credential # we did not reset it # sleep for a while and now do a number of downloads during which # token should get refreshed etc # -3 since we have offset -2 hardcoded to refresh a bit ahead of time to_sleep = duration - (time() - t0) - 3 print("Sleeping for %d seconds. Token should expire at %s" % (to_sleep, generated[0].expiration)) sleep(to_sleep) for i in range(5): # should have not been regenerated yet # -2 is our hardcoded buffer if time() - t0 < duration - 2: assert len(generated) == 1 downloader.download(url, outpath) ok_file_has_content(outpath, "version1") os.unlink(outpath) sleep(1) assert len(generated) == 2