def _test_correct_publish(target_path, rootds=False, flat=True): paths = [_path_(".git/hooks/post-update")] # hooks enabled in all datasets not_paths = [ ] # _path_(".git/datalad/metadata")] # metadata only on publish # ATM we run post-update hook also upon create since it might # be a reconfiguration (TODO: I guess could be conditioned) # web-interface html pushed to dataset root web_paths = ['index.html', _path_(".git/datalad/web")] if rootds: paths += web_paths # and not to subdatasets elif not flat: not_paths += web_paths for path in paths: ok_exists(opj(target_path, path)) for path in not_paths: assert_false(exists(opj(target_path, path))) # correct ls_json command in hook content (path wrapped in quotes) ok_file_has_content(_path_(target_path, '.git/hooks/post-update'), '.*datalad ls -a --json file \'%s\'.*' % target_path, re_=True, flags=re.DOTALL)
def _test_correct_publish(target_path, rootds=False, flat=True): paths = [_path_(".git/hooks/post-update")] # hooks enabled in all datasets not_paths = [] # _path_(".git/datalad/metadata")] # metadata only on publish # ATM we run post-update hook also upon create since it might # be a reconfiguration (TODO: I guess could be conditioned) # web-interface html pushed to dataset root web_paths = ['index.html', _path_(".git/datalad/web")] if rootds: paths += web_paths # and not to subdatasets elif not flat: not_paths += web_paths for path in paths: ok_exists(opj(target_path, path)) for path in not_paths: assert_false(exists(opj(target_path, path))) # correct ls_json command in hook content (path wrapped in quotes) ok_file_has_content(_path_(target_path, '.git/hooks/post-update'), '.*datalad ls -a --json file \'%s\'.*' % target_path, re_=True, flags=re.DOTALL)
def test_install_crcns(tdir, ds_path): with chpwd(tdir): with swallow_logs(new_level=logging.INFO) as cml: install("all-nonrecursive", source='///') # since we didn't log decorations such as log level atm while # swallowing so lets check if exit code is returned or not # I will test both assert_not_in('ERROR', cml.out) # below one must not fail alone! ;) assert_not_in('with exit code', cml.out) # should not hang in infinite recursion with chpwd('all-nonrecursive'): get("crcns") ok_(exists(_path_("all-nonrecursive/crcns/.git/config"))) # and we could repeat installation and get the same result ds1 = install(_path_("all-nonrecursive/crcns")) ds2 = Dataset('all-nonrecursive').install('crcns') ok_(ds1.is_installed()) eq_(ds1, ds2) eq_(ds1.path, ds2.path) # to make sure they are a single dataset # again, but into existing dataset: ds = create(ds_path) crcns = ds.install("///crcns") ok_(crcns.is_installed()) eq_(crcns.path, opj(ds_path, "crcns")) assert_in(crcns.path, ds.get_subdatasets(absolute=True))
def test_target_ssh_since(origin, src_path, target_path): # prepare src source = install(src_path, source=origin, recursive=True) eq_(len(source.subdatasets()), 2) # get a new subdataset and make sure it is committed in the super source.create('brandnew') eq_(len(source.subdatasets()), 3) ok_clean_git(source.path) # and now we create a sibling for the new subdataset only assert_create_sshwebserver(name='dominique_carrera', dataset=source, sshurl="ssh://localhost" + target_path, recursive=True, since='HEAD~1') # there is one thing in the target directory only, and that is the # remote repo of the newly added subdataset target = Dataset(target_path) ok_(not target.is_installed()) # since we didn't create it due to since eq_(['brandnew'], os.listdir(target_path)) # now test functionality if we add a subdataset with a subdataset brandnew2 = source.create('brandnew2') brandnewsub = brandnew2.create('sub') brandnewsubsub = brandnewsub.create('sub') # and now we create a sibling for the new subdataset only assert_create_sshwebserver(name='dominique_carrera', dataset=source, sshurl="ssh://localhost" + target_path, recursive=True, existing='skip') # verify that it created the sub and sub/sub ok_(Dataset(_path_(target_path, 'brandnew2/sub')).is_installed()) ok_(Dataset(_path_(target_path, 'brandnew2/sub/sub')).is_installed())
def test_failed_install_multiple(top_path): ds = create(top_path) create(_path_(top_path, 'ds1')) create(_path_(top_path, 'ds3')) ok_clean_git(ds.path, annex=False, untracked=['ds1/', 'ds3/']) # specify install with multiple paths and one non-existing with assert_raises(IncompleteResultsError) as cme: ds.install(['ds1', 'ds2', '///crcns', '///nonexisting', 'ds3']) # install doesn't add existing submodules -- add does that ok_clean_git(ds.path, annex=False, untracked=['ds1/', 'ds3/']) ds.add(['ds1', 'ds3']) ok_clean_git(ds.path, annex=False) # those which succeeded should be saved now eq_(ds.get_subdatasets(), ['crcns', 'ds1', 'ds3']) # and those which didn't -- listed eq_(set(cme.exception.failed), {'///nonexisting', _path_(top_path, 'ds2')}) # but if there was only a single installation requested -- it will be # InstallFailedError to stay consistent with single install behavior # TODO: unify at some point with assert_raises(InstallFailedError) as cme: ds.install('ds2') with assert_raises(InstallFailedError) as cme: ds.install('///nonexisting')
def test_add_recursive(path): ds = Dataset(path) ds.create(force=True, save=False) ds.create('dir', force=True, if_dirty='ignore') ds.save("Submodule added.") # TODO: CommandError to something meaningful # fail without recursive: assert_raises(CommandError, ds.add, opj('dir', 'testindir'), recursive=False) # fail with recursion limit too low: assert_raises(CommandError, ds.add, opj('dir', 'testindir'), recursive=True, recursion_limit=0) # add while also instructing annex to add in parallel 2 jobs (smoke testing # for that effect ATM) added1 = ds.add(opj('dir', 'testindir'), recursive=True, jobs=2) # added to annex, so annex output record eq_(added1, [{'file': _path_('dir/testindir'), 'command': 'add', 'key': 'MD5E-s9--3f0f870d18d6ba60a79d9463ff3827ea', 'success': True}]) assert_in('testindir', Dataset(opj(path, 'dir')).repo.get_annexed_files()) added2 = ds.add(opj('dir', 'testindir2'), recursive=True, to_git=True) # added to git, so parsed git output record eq_(added2, [{'success': True, 'file': _path_('dir/testindir2')}]) assert_in('testindir2', Dataset(opj(path, 'dir')).repo.get_indexed_files()) # We used to fail to add to pure git repository, but now it should all be # just fine subds = ds.create('git-sub', no_annex=True) with open(opj(subds.path, 'somefile.txt'), "w") as f: f.write("bla bla") result = ds.add(opj('git-sub', 'somefile.txt'), recursive=True, to_git=False) eq_(result, [{'file': _path_('git-sub/somefile.txt'), 'success': True}])
def test_path_(): eq_(_path_('a'), 'a') if on_windows: eq_(_path_('a/b'), r'a\b') else: p = 'a/b/c' assert (_path_(p) is p) # nothing is done to it whatsoever eq_(_path_(p, 'd'), 'a/b/c/d')
def test_replace_and_relative_sshpath(src_path, dst_path): # We need to come up with the path relative to our current home directory # https://github.com/datalad/datalad/issues/1653 # but because we override HOME the HOME on the remote end would be # different even though a localhost. So we need to query it from datalad import ssh_manager ssh = ssh_manager.get_connection('localhost') remote_home, err = ssh('pwd') assert not err remote_home = remote_home.rstrip('\n') dst_relpath = os.path.relpath(dst_path, remote_home) url = 'localhost:%s' % dst_relpath ds = Dataset(src_path).create() create_tree(ds.path, {'sub.dat': 'lots of data'}) ds.add('sub.dat') ds.create_sibling(url, ui=True) published = ds.publish(to='localhost', transfer_data='all') assert_result_count(published, 1, path=opj(ds.path, 'sub.dat')) # verify that hook runs and there is nothing in stderr # since it exits with 0 exit even if there was a problem out, err = Runner(cwd=opj(dst_path, '.git'))(_path_('hooks/post-update')) assert_false(out) assert_false(err) # Verify that we could replace and publish no problem # https://github.com/datalad/datalad/issues/1656 # Strangely it spits outs IncompleteResultsError exception atm... so just # checking that it fails somehow res = ds.create_sibling(url, on_failure='ignore') assert_status('error', res) assert_in('already configured', res[0]['message'][0]) # "Settings" such as UI do not persist, so we specify it again # for the test below depending on it ds.create_sibling(url, existing='replace', ui=True) published2 = ds.publish(to='localhost', transfer_data='all') assert_result_count(published2, 1, path=opj(ds.path, 'sub.dat')) # and one more test since in above test it would not puke ATM but just # not even try to copy since it assumes that file is already there create_tree(ds.path, {'sub2.dat': 'more data'}) ds.add('sub2.dat') published3 = ds.publish(to='localhost', transfer_data='none') # we publish just git assert_result_count(published3, 0, path=opj(ds.path, 'sub2.dat')) # now publish "with" data, which should also trigger the hook! # https://github.com/datalad/datalad/issues/1658 from glob import glob from datalad.consts import WEB_META_LOG logs_prior = glob(_path_(dst_path, WEB_META_LOG, '*')) published4 = ds.publish(to='localhost', transfer_data='all') assert_result_count(published4, 1, path=opj(ds.path, 'sub2.dat')) logs_post = glob(_path_(dst_path, WEB_META_LOG, '*')) eq_(len(logs_post), len(logs_prior) + 1) assert_postupdate_hooks(dst_path)
def test_replace_and_relative_sshpath(src_path, dst_path): # We need to come up with the path relative to our current home directory # https://github.com/datalad/datalad/issues/1653 # but because we override HOME the HOME on the remote end would be # different even though a localhost. So we need to query it from datalad import ssh_manager ssh = ssh_manager.get_connection('localhost') remote_home, err = ssh('pwd') assert not err remote_home = remote_home.rstrip('\n') dst_relpath = os.path.relpath(dst_path, remote_home) url = 'localhost:%s' % dst_relpath ds = Dataset(src_path).create() create_tree(ds.path, {'sub.dat': 'lots of data'}) ds.save('sub.dat') ds.create_sibling(url, ui=True) published = ds.publish(to='localhost', transfer_data='all') assert_result_count(published, 1, path=opj(ds.path, 'sub.dat')) # verify that hook runs and there is nothing in stderr # since it exits with 0 exit even if there was a problem out, err = Runner(cwd=opj(dst_path, '.git'))(_path_('hooks/post-update')) assert_false(out) assert_false(err) # Verify that we could replace and publish no problem # https://github.com/datalad/datalad/issues/1656 # Strangely it spits outs IncompleteResultsError exception atm... so just # checking that it fails somehow res = ds.create_sibling(url, on_failure='ignore') assert_status('error', res) assert_in('already configured', res[0]['message'][0]) # "Settings" such as UI do not persist, so we specify it again # for the test below depending on it ds.create_sibling(url, existing='replace', ui=True) published2 = ds.publish(to='localhost', transfer_data='all') assert_result_count(published2, 1, path=opj(ds.path, 'sub.dat')) # and one more test since in above test it would not puke ATM but just # not even try to copy since it assumes that file is already there create_tree(ds.path, {'sub2.dat': 'more data'}) ds.save('sub2.dat') published3 = ds.publish(to='localhost', transfer_data='none') # we publish just git assert_result_count(published3, 0, path=opj(ds.path, 'sub2.dat')) # now publish "with" data, which should also trigger the hook! # https://github.com/datalad/datalad/issues/1658 from glob import glob from datalad.consts import WEB_META_LOG logs_prior = glob(_path_(dst_path, WEB_META_LOG, '*')) published4 = ds.publish(to='localhost', transfer_data='all') assert_result_count(published4, 1, path=opj(ds.path, 'sub2.dat')) logs_post = glob(_path_(dst_path, WEB_META_LOG, '*')) eq_(len(logs_post), len(logs_prior) + 1) assert_postupdate_hooks(dst_path)
def test_subdatasets(path): # from scratch ds = Dataset(path) assert_false(ds.is_installed()) eq_(ds.subdatasets(), []) ds = ds.create() assert_true(ds.is_installed()) eq_(ds.subdatasets(), []) # create some file and commit it open(os.path.join(ds.path, 'test'), 'w').write('some') ds.add(path='test') assert_true(ds.is_installed()) ds.save("Hello!", version_tag=1) # Assuming that tmp location was not under a super-dataset eq_(ds.get_superdataset(), None) eq_(ds.get_superdataset(topmost=True), ds) # add itself as a subdataset (crazy, isn't it?) subds = ds.install('subds', source=path, result_xfm='datasets', return_type='item-or-list') assert_true(subds.is_installed()) eq_(subds.get_superdataset(), ds) eq_(subds.get_superdataset(topmost=True), ds) subdss = ds.subdatasets() eq_(len(subdss), 1) eq_(subds.path, ds.subdatasets(result_xfm='paths')[0]) eq_(subdss, ds.subdatasets(recursive=True)) eq_(subdss, ds.subdatasets(fulfilled=True)) ds.save("with subds", version_tag=2) ds.recall_state(1) assert_true(ds.is_installed()) eq_(ds.subdatasets(), []) # very nested subdataset to test topmost subsubds = subds.install(_path_('d1/subds'), source=path, result_xfm='datasets', return_type='item-or-list') assert_true(subsubds.is_installed()) eq_(subsubds.get_superdataset(), subds) # by default, it will only report a subperdataset that actually # has the queries dataset as a registered true subdataset eq_(subsubds.get_superdataset(topmost=True), subds) # by we can also ask for a dataset that is merely above eq_(subsubds.get_superdataset(topmost=True, registered_only=False), ds) # verify that '^' alias would work with chpwd(subsubds.path): dstop = Dataset('^') eq_(dstop, subds) # and while in the dataset we still can resolve into central one dscentral = Dataset('///') eq_(dscentral.path, cfg.obtain('datalad.locations.default-dataset')) with chpwd(ds.path): dstop = Dataset('^') eq_(dstop, ds)
def process_digests_mtimes(digests, mtimes): # it should have triggered a hook, which would have created log and metadata files check_metadata = False for part in 'logs', 'metadata': metafiles = [ k for k in digests if k.startswith(_path_('.git/datalad/%s/' % part)) ] # This is in effect ONLY if we have "compatible" datalad installed on remote # end. ATM we don't have easy way to guarantee that AFAIK (yoh), # so let's not check/enforce (TODO) # assert(len(metafiles) >= 1) # we might have 2 logs if timestamps do not collide ;) # Let's actually do it to some degree if part == 'logs': # always should have those: assert (len(metafiles) >= 1) with open(opj(target_path, metafiles[0])) as f: if 'no datalad found' not in f.read(): check_metadata = True if part == 'metadata': eq_(len(metafiles), bool(check_metadata)) for f in metafiles: digests.pop(f) mtimes.pop(f) # and just pop some leftovers from annex for f in list(digests): if f.startswith('.git/annex/mergedrefs'): digests.pop(f) mtimes.pop(f)
def test_install_into_dataset(source, top_path): ds = create(top_path) ok_clean_git(ds.path) subds = ds.install("sub", source=source, save=False) if isinstance(subds.repo, AnnexRepo) and subds.repo.is_direct_mode(): ok_(exists(opj(subds.path, '.git'))) else: ok_(isdir(opj(subds.path, '.git'))) ok_(subds.is_installed()) assert_in('sub', ds.subdatasets(result_xfm='relpaths')) # sub is clean: ok_clean_git(subds.path, annex=None) # top is too: ok_clean_git(ds.path, annex=None) ds.save('addsub') # now it is: ok_clean_git(ds.path, annex=None) # but we could also save while installing and there should be no side-effect # of saving any other changes if we state to not auto-save changes # Create a dummy change create_tree(ds.path, {'dummy.txt': 'buga'}) ok_clean_git(ds.path, untracked=['dummy.txt']) subds_ = ds.install("sub2", source=source) eq_(subds_.path, opj(ds.path, "sub2")) # for paranoid yoh ;) ok_clean_git(ds.path, untracked=['dummy.txt']) # and we should achieve the same behavior if we create a dataset # and then decide to add it create(_path_(top_path, 'sub3')) ok_clean_git(ds.path, untracked=['dummy.txt', 'sub3/']) ds.add('sub3') ok_clean_git(ds.path, untracked=['dummy.txt'])
def test_crawl_api_recursive(get_subdatasets_, run_pipeline_, load_pipeline_from_config_, get_repo_pipeline_script_path_, get_lofilename_, chpwd_, tdir): pwd = getpwd() with chpwd(tdir): output, stats = crawl(recursive=True) assert_equal(pwd, getpwd()) if external_versions['mock'] < '1.0.1': raise SkipTest( "needs a more recent mock which throws exceptions in side_effects") assert_equal(output, [[]] * 4 + [None]) # for now output is just a list of outputs assert_equal( stats, ActivityStats( datasets_crawled=5, datasets_crawl_failed=1)) # nothing was done but we got it crawled chpwd_.assert_has_calls([ call(None), call('path1'), call('path1/path1_1'), call('path2'), ], any_order=True) assert_equal( list(find_files('.*', tdir, exclude_vcs=False)), [_path_(tdir, 'some.log')]) # no files were generated besides the log
def process_digests_mtimes(digests, mtimes): # it should have triggered a hook, which would have created log and metadata files check_metadata = False for part in 'logs', 'metadata': metafiles = [k for k in digests if k.startswith(_path_('.git/datalad/%s/' % part))] # This is in effect ONLY if we have "compatible" datalad installed on remote # end. ATM we don't have easy way to guarantee that AFAIK (yoh), # so let's not check/enforce (TODO) # assert(len(metafiles) >= 1) # we might have 2 logs if timestamps do not collide ;) # Let's actually do it to some degree if part == 'logs': # always should have those: assert (len(metafiles) >= 1) with open(opj(target_path, metafiles[0])) as f: if 'no datalad found' not in f.read(): check_metadata = True if part == 'metadata': eq_(len(metafiles), bool(check_metadata)) for f in metafiles: digests.pop(f) mtimes.pop(f) # and just pop some leftovers from annex for f in list(digests): if f.startswith('.git/annex/mergedrefs'): digests.pop(f) mtimes.pop(f)
def test_install_into_dataset(source=None, top_path=None): src_ds = Dataset(source).create(result_renderer='disabled', force=True) src_ds.save(['INFO.txt', 'test.dat'], to_git=True) src_ds.save('test-annex.dat', to_git=False) ds = create(top_path) assert_repo_status(ds.path) subds = ds.install("sub", source=source) ok_(isdir(opj(subds.path, '.git'))) ok_(subds.is_installed()) assert_in('sub', ds.subdatasets(result_xfm='relpaths')) # sub is clean: assert_repo_status(subds.path, annex=None) # top is too: assert_repo_status(ds.path, annex=None) ds.save(message='addsub') # now it is: assert_repo_status(ds.path, annex=None) # but we could also save while installing and there should be no side-effect # of saving any other changes if we state to not auto-save changes # Create a dummy change create_tree(ds.path, {'dummy.txt': 'buga'}) assert_repo_status(ds.path, untracked=['dummy.txt']) subds_ = ds.install("sub2", source=source) eq_(subds_.path, opj(ds.path, "sub2")) # for paranoid yoh ;) assert_repo_status(ds.path, untracked=['dummy.txt']) # and we should achieve the same behavior if we create a dataset # and then decide to add it create(_path_(top_path, 'sub3')) assert_repo_status(ds.path, untracked=['dummy.txt', 'sub3/']) ds.save('sub3') assert_repo_status(ds.path, untracked=['dummy.txt'])
def test_remove_nowhining(path): # when removing a dataset under a dataset (but not a subdataset) # should not provide a meaningless message that something was not right ds = create(path) # just install/clone inside of it subds_path = _path_(path, 'subds') install(subds_path, source=path) remove(subds_path) # should remove just fine
def test_create_raises(path, outside_path): ds = Dataset(path) # incompatible arguments (annex only): assert_raises(ValueError, ds.create, no_annex=True, description='some') with open(op.join(path, "somefile.tst"), 'w') as f: f.write("some") # non-empty without `force`: assert_in_results( ds.rev_create(force=False, **raw), status='error', message= 'will not create a dataset in a non-empty directory, use `force` option to ignore' ) # non-empty with `force`: ds.rev_create(force=True) # create sub outside of super: assert_in_results( ds.rev_create(outside_path, **raw), status='error', message=( 'dataset containing given paths is not underneath the reference ' 'dataset %s: %s', ds, outside_path)) # create a sub: ds.rev_create('sub') # fail when doing it again assert_in_results( ds.rev_create('sub', **raw), status='error', message=('collision with content in parent dataset at %s: %s', ds.path, [str(ds.pathobj / 'sub')]), ) # now deinstall the sub and fail trying to create a new one at the # same location ds.uninstall('sub', check=False) assert_in('sub', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) # and now should fail to also create inplace or under assert_in_results( ds.rev_create('sub', **raw), status='error', message=('collision with content in parent dataset at %s: %s', ds.path, [str(ds.pathobj / 'sub')]), ) assert_in_results(ds.rev_create(_path_('sub/subsub'), **raw), status='error', message=('collision with %s (dataset) in dataset %s', str(ds.pathobj / 'sub'), ds.path)) os.makedirs(op.join(ds.path, 'down')) with open(op.join(ds.path, 'down', "someotherfile.tst"), 'w') as f: f.write("someother") ds.rev_save() assert_in_results( ds.rev_create('down', **raw), status='error', message=('collision with content in parent dataset at %s: %s', ds.path, [str(ds.pathobj / 'down' / 'someotherfile.tst')]), )
def test_subdatasets(path): # from scratch ds = Dataset(path) assert_false(ds.is_installed()) eq_(ds.subdatasets(), []) ds = ds.create() assert_true(ds.is_installed()) eq_(ds.subdatasets(), []) # create some file and commit it open(os.path.join(ds.path, 'test'), 'w').write('some') ds.add(path='test') assert_true(ds.is_installed()) ds.save("Hello!", version_tag=1) # Assuming that tmp location was not under a super-dataset eq_(ds.get_superdataset(), None) eq_(ds.get_superdataset(topmost=True), ds) # add itself as a subdataset (crazy, isn't it?) subds = ds.install('subds', source=path, result_xfm='datasets', return_type='item-or-list') assert_true(subds.is_installed()) eq_(subds.get_superdataset(), ds) eq_(subds.get_superdataset(topmost=True), ds) subdss = ds.subdatasets() eq_(len(subdss), 1) eq_(subds.path, ds.subdatasets(result_xfm='paths')[0]) eq_(subdss, ds.subdatasets(recursive=True)) eq_(subdss, ds.subdatasets(fulfilled=True)) ds.save("with subds", version_tag=2) ds.recall_state(1) assert_true(ds.is_installed()) eq_(ds.subdatasets(), []) # very nested subdataset to test topmost subsubds = subds.install( _path_('d1/subds'), source=path, result_xfm='datasets', return_type='item-or-list') assert_true(subsubds.is_installed()) eq_(subsubds.get_superdataset(), subds) # by default, it will only report a subperdataset that actually # has the queries dataset as a registered true subdataset eq_(subsubds.get_superdataset(topmost=True), subds) # by we can also ask for a dataset that is merely above eq_(subsubds.get_superdataset(topmost=True, registered_only=False), ds) # verify that '^' alias would work with chpwd(subsubds.path): dstop = Dataset('^') eq_(dstop, subds) # and while in the dataset we still can resolve into central one dscentral = Dataset('///') eq_(dscentral.path, cfg.obtain('datalad.locations.default-dataset')) with chpwd(ds.path): dstop = Dataset('^') eq_(dstop, ds)
def test_replace_and_relative_sshpath(src_path, dst_path): # We need to come up with the path relative to our current home directory # https://github.com/datalad/datalad/issues/1653 dst_relpath = os.path.relpath(dst_path, os.path.expanduser('~')) url = 'localhost:%s' % dst_relpath ds = Dataset(src_path).create() create_tree(ds.path, {'sub.dat': 'lots of data'}) ds.add('sub.dat') ds.create_sibling(url) published = ds.publish(to='localhost', transfer_data='all') assert_result_count(published, 1, path=opj(ds.path, 'sub.dat')) # verify that hook runs and there is nothing in stderr # since it exits with 0 exit even if there was a problem out, err = Runner(cwd=opj(dst_path, '.git'))(_path_('hooks/post-update')) assert_false(out) assert_false(err) # Verify that we could replace and publish no problem # https://github.com/datalad/datalad/issues/1656 # Strangely it spits outs IncompleteResultsError exception atm... so just # checking that it fails somehow res = ds.create_sibling(url, on_failure='ignore') assert_status('error', res) assert_in('already configured', res[0]['message'][0]) ds.create_sibling(url, existing='replace') published2 = ds.publish(to='localhost', transfer_data='all') assert_result_count(published2, 1, path=opj(ds.path, 'sub.dat')) # and one more test since in above test it would not puke ATM but just # not even try to copy since it assumes that file is already there create_tree(ds.path, {'sub2.dat': 'more data'}) ds.add('sub2.dat') published3 = ds.publish(to='localhost', transfer_data='none') # we publish just git assert_result_count(published3, 0, path=opj(ds.path, 'sub2.dat')) # now publish "with" data, which should also trigger the hook! # https://github.com/datalad/datalad/issues/1658 from glob import glob from datalad.consts import WEB_META_LOG logs_prior = glob(_path_(dst_path, WEB_META_LOG, '*')) published4 = ds.publish(to='localhost', transfer_data='all') assert_result_count(published4, 1, path=opj(ds.path, 'sub2.dat')) logs_post = glob(_path_(dst_path, WEB_META_LOG, '*')) eq_(len(logs_post), len(logs_prior) + 1)
def check_target_ssh_since(use_ssh, origin, src_path, target_path): if use_ssh: sshurl = "ssh://datalad-test" + target_path else: sshurl = target_path # prepare src source = install(src_path, source=origin, recursive=True) eq_(len(source.subdatasets()), 2) # get a new subdataset and make sure it is committed in the super source.create('brandnew') eq_(len(source.subdatasets()), 3) assert_repo_status(source.path) # and now we create a sibling for the new subdataset only assert_create_sshwebserver( name='dominique_carrera', dataset=source, sshurl=sshurl, recursive=True, since='HEAD~1') # there is one thing in the target directory only, and that is the # remote repo of the newly added subdataset target = Dataset(target_path) ok_(not target.is_installed()) # since we didn't create it due to since eq_(['brandnew'], os.listdir(target_path)) # now test functionality if we add a subdataset with a subdataset brandnew2 = source.create('brandnew2') brandnewsub = brandnew2.create('sub') brandnewsubsub = brandnewsub.create('sub') # and now we create a sibling for the new subdataset only assert_create_sshwebserver( name='dominique_carrera', dataset=source, sshurl=sshurl, recursive=True, existing='skip') # verify that it created the sub and sub/sub ok_(Dataset(_path_(target_path, 'brandnew2/sub')).is_installed()) ok_(Dataset(_path_(target_path, 'brandnew2/sub/sub')).is_installed()) # we installed without web ui - no hooks should be created/enabled assert_postupdate_hooks(_path_(target_path, 'brandnew'), installed=False)
def test_subdatasets(path): # from scratch ds = Dataset(path) assert_false(ds.is_installed()) eq_(ds.subdatasets(), []) ds = ds.create() assert_true(ds.is_installed()) eq_(ds.subdatasets(), []) # create some file and commit it open(os.path.join(ds.path, 'test'), 'w').write('some') ds.add(path='test') assert_true(ds.is_installed()) ds.save("Hello!", version_tag=1) # Assuming that tmp location was not under a super-dataset eq_(ds.get_superdataset(), None) eq_(ds.get_superdataset(topmost=True), ds) # add itself as a subdataset (crazy, isn't it?) subds = ds.install('subds', source=path, result_xfm='datasets', return_type='item-or-list') assert_true(subds.is_installed()) eq_(subds.get_superdataset(), ds) eq_(subds.get_superdataset(topmost=True), ds) subdss = ds.subdatasets() eq_(len(subdss), 1) eq_(subds.path, ds.subdatasets(result_xfm='paths')[0]) eq_(subdss, ds.subdatasets(recursive=True)) eq_(subdss, ds.subdatasets(fulfilled=True)) ds.save("with subds", version_tag=2) ds.recall_state(1) assert_true(ds.is_installed()) eq_(ds.subdatasets(), []) # very nested subdataset to test topmost subsubds = subds.install(_path_('d1/subds'), source=path, result_xfm='datasets', return_type='item-or-list') assert_true(subsubds.is_installed()) eq_(subsubds.get_superdataset(), subds) eq_(subsubds.get_superdataset(topmost=True), ds) # verify that '^' alias would work with chpwd(subsubds.path): dstop = Dataset('^') eq_(dstop, ds) # and while in the dataset we still can resolve into central one dscentral = Dataset('///') eq_(dscentral.path, LOCAL_CENTRAL_PATH) with chpwd(ds.path): dstop = Dataset('^') eq_(dstop, ds)
def test_publish_target_url(src, desttop, desturl): # https://github.com/datalad/datalad/issues/1762 ds = Dataset(src).create(force=True) ds.add('1') ds.create_sibling('ssh://localhost:%s/subdir' % desttop, name='target', target_url=desturl + 'subdir/.git') results = ds.publish(to='target', transfer_data='all') assert results ok_file_has_content(_path_(desttop, 'subdir/1'), '123')
def test_subdatasets(path): # from scratch ds = Dataset(path) assert_false(ds.is_installed()) eq_(ds.get_subdatasets(), []) ds = ds.create() assert_true(ds.is_installed()) eq_(ds.get_subdatasets(), []) # create some file and commit it open(os.path.join(ds.path, 'test'), 'w').write('some') ds.add(path='test') assert_true(ds.is_installed()) ds.save("Hello!", version_tag=1) # Assuming that tmp location was not under a super-dataset eq_(ds.get_superdataset(), None) eq_(ds.get_superdataset(topmost=True), ds) # add itself as a subdataset (crazy, isn't it?) subds = ds.install('subds', source=path) assert_true(subds.is_installed()) eq_(subds.get_superdataset(), ds) eq_(subds.get_superdataset(topmost=True), ds) subdss = ds.get_subdatasets() eq_(len(subdss), 1) eq_(os.path.join(path, subdss[0]), subds.path) eq_(subds.path, ds.get_subdatasets(absolute=True)[0]) eq_(subdss, ds.get_subdatasets(recursive=True)) eq_(subdss, ds.get_subdatasets(fulfilled=True)) # don't have that right now assert_raises(NotImplementedError, ds.get_subdatasets, pattern='sub*') ds.save("with subds", version_tag=2) ds.recall_state(1) assert_true(ds.is_installed()) eq_(ds.get_subdatasets(), []) # very nested subdataset to test topmost subsubds = subds.install(_path_('d1/subds'), source=path) assert_true(subsubds.is_installed()) eq_(subsubds.get_superdataset(), subds) eq_(subsubds.get_superdataset(topmost=True), ds) # verify that '^' alias would work with chpwd(subsubds.path): dstop = Dataset('^') eq_(dstop, ds) # and while in the dataset we still can resolve into central one dscentral = Dataset('///') eq_(dscentral.path, LOCAL_CENTRAL_PATH) with chpwd(ds.path): dstop = Dataset('^') eq_(dstop, ds)
def test_failed_install_multiple(top_path): ds = create(top_path) create(_path_(top_path, 'ds1')) create(_path_(top_path, 'ds3')) ok_clean_git(ds.path, annex=None, untracked=['ds1/', 'ds3/']) # specify install with multiple paths and one non-existing with assert_raises(IncompleteResultsError) as cme: ds.install(['ds1', 'ds2', '///crcns', '///nonexisting', 'ds3'], on_failure='continue') # install doesn't add existing submodules -- add does that ok_clean_git(ds.path, annex=None, untracked=['ds1/', 'ds3/']) ds.add(['ds1', 'ds3']) ok_clean_git(ds.path, annex=None) # those which succeeded should be saved now eq_(ds.subdatasets(result_xfm='relpaths'), ['crcns', 'ds1', 'ds3']) # and those which didn't -- listed eq_(set(r.get('source_url', r['path']) for r in cme.exception.failed), {'///nonexisting', _path_(top_path, 'ds2')})
def test_target_ssh_since(origin, src_path, target_path): # prepare src source = install(src_path, source=origin, recursive=True) eq_(len(source.subdatasets()), 2) # get a new subdataset and make sure it is committed in the super source.create('brandnew') eq_(len(source.subdatasets()), 3) ok_clean_git(source.path) # and now we create a sibling for the new subdataset only assert_create_sshwebserver( name='dominique_carrera', dataset=source, sshurl="ssh://localhost" + target_path, recursive=True, since='HEAD~1') # there is one thing in the target directory only, and that is the # remote repo of the newly added subdataset target = Dataset(target_path) ok_(not target.is_installed()) # since we didn't create it due to since eq_(['brandnew'], os.listdir(target_path)) # now test functionality if we add a subdataset with a subdataset brandnew2 = source.create('brandnew2') brandnewsub = brandnew2.create('sub') brandnewsubsub = brandnewsub.create('sub') # and now we create a sibling for the new subdataset only assert_create_sshwebserver( name='dominique_carrera', dataset=source, sshurl="ssh://localhost" + target_path, recursive=True, existing='skip') # verify that it created the sub and sub/sub ok_(Dataset(_path_(target_path, 'brandnew2/sub')).is_installed()) ok_(Dataset(_path_(target_path, 'brandnew2/sub/sub')).is_installed()) # we installed without web ui - no hooks should be created/enabled assert_postupdate_hooks(_path_(target_path, 'brandnew'), installed=False)
def test_clone_report_permission_issue(tdir): pdir = _path_(tdir, 'protected') mkdir(pdir) # make it read-only chmod(pdir, 0o555) with chpwd(pdir): res = clone('///', result_xfm=None, return_type='list', on_failure='ignore') assert_status('error', res) assert_result_count( res, 1, status='error', message="could not create work tree dir '%s/%s': Permission denied" % (pdir, get_datasets_topdir()) )
def __call__(self, data): # we do not take anything from data meta = get_metadata(self.dataset) if meta: meta_encoded = meta.encode('utf-8') if not os.path.exists('.datalad'): os.makedirs('.datalad') path_ = _path_('.datalad', 'meta.datacite.xml') with open(path_, 'w') as f: f.write(meta_encoded) yield updated(data, {'filename': path_}) else: yield data
def _test_correct_publish(target_path, rootds=False, flat=True): paths = [_path_(".git/hooks/post-update")] # hooks enabled in all datasets not_paths = [ ] # _path_(".git/datalad/metadata")] # metadata only on publish # ATM we run post-update hook also upon create since it might # be a reconfiguration (TODO: I guess could be conditioned) # web-interface html pushed to dataset root web_paths = ['index.html', _path_(".git/datalad/web")] if rootds: paths += web_paths # and not to subdatasets elif not flat: not_paths += web_paths for path in paths: ok_exists(opj(target_path, path)) for path in not_paths: assert_false(exists(opj(target_path, path))) hook_path = _path_(target_path, '.git/hooks/post-update') # No longer the case -- we are no longer using absolute path in the # script # ok_file_has_content(hook_path, # '.*\ndsdir="%s"\n.*' % target_path, # re_=True, # flags=re.DOTALL) # No absolute path (so dataset could be moved) in the hook with open(hook_path) as f: assert_not_in(target_path, f.read()) # correct ls_json command in hook content (path wrapped in "quotes) ok_file_has_content(hook_path, '.*datalad ls -a --json file \..*', re_=True, flags=re.DOTALL)
def _test_correct_publish(target_path, rootds=False, flat=True): paths = [_path_(".git/hooks/post-update")] # hooks enabled in all datasets not_paths = [] # _path_(".git/datalad/metadata")] # metadata only on publish # ATM we run post-update hook also upon create since it might # be a reconfiguration (TODO: I guess could be conditioned) # web-interface html pushed to dataset root web_paths = ['index.html', _path_(".git/datalad/web")] if rootds: paths += web_paths # and not to subdatasets elif not flat: not_paths += web_paths for path in paths: ok_exists(opj(target_path, path)) for path in not_paths: assert_false(exists(opj(target_path, path))) hook_path = _path_(target_path, '.git/hooks/post-update') # No longer the case -- we are no longer using absolute path in the # script # ok_file_has_content(hook_path, # '.*\ndsdir="%s"\n.*' % target_path, # re_=True, # flags=re.DOTALL) # No absolute path (so dataset could be moved) in the hook with open(hook_path) as f: assert_not_in(target_path, f.read()) # correct ls_json command in hook content (path wrapped in "quotes) ok_file_has_content(hook_path, '.*datalad ls -a --json file \..*', re_=True, flags=re.DOTALL)
def test_create_raises(path, outside_path): ds = Dataset(path) # incompatible arguments (annex only): assert_raises(ValueError, ds.create, no_annex=True, description='some') assert_raises(ValueError, ds.create, no_annex=True, annex_opts=['some']) assert_raises(ValueError, ds.create, no_annex=True, annex_init_opts=['some']) with open(opj(path, "somefile.tst"), 'w') as f: f.write("some") # non-empty without `force`: assert_in_results( ds.create(force=False, **raw), status='error', message= 'will not create a dataset in a non-empty directory, use `force` option to ignore' ) # non-empty with `force`: ds.create(force=True) # create sub outside of super: assert_in_results(ds.create(outside_path, **raw), status='error', message='path not associated with any dataset') # create a sub: ds.create('sub') # fail when doing it again assert_in_results( ds.create('sub', **raw), status='error', message=('collision with known subdataset %s/ in dataset %s', 'sub', ds.path)) # now deinstall the sub and fail trying to create a new one at the # same location ds.uninstall('sub', check=False) assert_in('sub', ds.subdatasets(fulfilled=False, result_xfm='relpaths')) # and now should fail to also create inplace or under for s in 'sub', _path_('sub/subsub'): assert_in_results( ds.create(s, **raw), status='error', message=('collision with known subdataset %s/ in dataset %s', 'sub', ds.path))
def extract_meta(data): content = [ x['match'] for x in xpath_match('//*[@class="attributeLabel"]/..')(data) ] content = [(re.sub('</li>', ', ', x)) for x in content] content = [(re.sub('<[^<]+?>|[\t|\n|\r]', '', (str(x.encode('ascii', 'ignore'))))).strip() for x in content] [content.remove(x) for x in content if x.find('SCENES:' or 'OWNERS:') >= 0] json_dict = OrderedDict(map(str, x.split(':', 1)) for x in content) if not exists(".datalad/meta"): makedirs(".datalad/meta") with open(_path_(".datalad/meta/balsa.json"), "w") as fi: json.dump(json_dict, fi, indent=1) lgr.info("Generated descriptor file") yield {'filename': ".datalad/meta/balsa.json"}
def _test_drop(path, drop_immediately): s3url = 's3://datalad-test0-nonversioned' providers = get_test_providers(s3url) # to verify having s3 credentials # vcr tape is getting bound to the session object, so we need to # force re-establishing the session for the bucket. # TODO (in datalad): make a dedicated API for that, now too obscure _ = providers.get_status(s3url, allow_old_session=False) create(path) # unfortunately this doesn't work without force dropping since I guess vcr # stops and then gets queried again for the same tape while testing for # drop :-/ with chpwd(path): crawl_init( template="simple_s3", args=dict( bucket="datalad-test0-nonversioned", drop=True, drop_force=True, # so test goes faster drop_immediately=drop_immediately, ), save=True) if drop_immediately: # cannot figure out but taping that interaction results in # git annex addurl error. No time to figure it out # so we just crawl without vcr for now. TODO: figure out WTF with chpwd(path): crawl() else: with externals_use_cassette( 'test_simple_s3_test0_nonversioned_crawl_ext' + ('_immediately' if drop_immediately else '')), \ chpwd(path): crawl() # test that all was dropped repo = AnnexRepo(path, create=False) files = glob(_path_(path, '*')) eq_(len(files), 8) for f in files: assert_false(repo.file_has_content(f))
def test_create_text_no_annex(path): ds = create(path, text_no_annex=True) ok_clean_git(path) import re ok_file_has_content( _path_(path, '.gitattributes'), content='\* annex\.largefiles=\(not\(mimetype=text/\*\)\)', re_=True, match=False, flags=re.MULTILINE) # and check that it is really committing text files to git and binaries # to annex create_tree( path, { 't': 'some text', 'b': '' # empty file is not considered to be a text file # should we adjust the rule to consider only non empty files? }) ds.add(['t', 'b']) ok_file_under_git(path, 't', annexed=False) ok_file_under_git(path, 'b', annexed=True)
def test_simple1(ind, topurl, outd): list( initiate_dataset(template="simple_with_archives", dataset_name='test1', path=outd, add_fields={ 'url': topurl + 'study/show/WG33', 'a_href_match_': '.*download.*' })({})) with chpwd(outd): out, stats = crawl() eq_(stats.add_annex, 3) ok_file_under_git(outd, 'file1.nii', annexed=True) ok_file_has_content(opj(outd, 'file1.nii'), 'content of file1.nii') ok_file_under_git(outd, _path_('dir1/file2.nii'), annexed=True) ok_file_has_content(opj(outd, 'dir1', 'file2.nii'), 'content of file2.nii') eq_(len(out), 1)
def _parse_git_submodules(dspath): """All known ones with some properties""" if not exists(opj(dspath, ".gitmodules")): # easy way out. if there is no .gitmodules file # we cannot have (functional) subdatasets return # this will not work in direct mode, need better way #1422 cmd = ['git', 'ls-files', '--stage', '-z'] # need to go rogue and cannot use proper helper in GitRepo # as they also pull in all of GitPython's magic try: stdout, stderr = GitRunner(cwd=dspath).run( cmd, log_stderr=True, log_stdout=True, # not sure why exactly, but log_online has to be false! log_online=False, expect_stderr=False, shell=False, # we don't want it to scream on stdout expect_fail=True) except CommandError as e: raise InvalidGitRepositoryError(exc_str(e)) for line in stdout.split('\0'): if not line or not line.startswith('160000'): continue sm = {} props = submodule_full_props.match(line) sm['revision'] = props.group(2) subpath = _path_(dspath, props.group(4)) sm['path'] = subpath if not exists(subpath) or not GitRepo.is_valid_repo(subpath): sm['state'] = 'absent' yield sm
def test_drop(path): get_test_providers( 's3://datalad-test0-nonversioned') # to verify having s3 credentials create(path) # unfortunately this doesn't work without force dropping since I guess vcr # stops and then gets queried again for the same tape while testing for # drop :-/ with externals_use_cassette('test_simple_s3_test0_nonversioned_crawl_ext'), \ chpwd(path): crawl_init( template="simple_s3", args=dict( bucket="datalad-test0-nonversioned", drop=True, drop_force=True # so test goes faster ), save=True) crawl() # test that all was dropped repo = AnnexRepo(path, create=False) files = glob(_path_(path, '*')) eq_(len(files), 8) for f in files: assert_false(repo.file_has_content(f))
def test_target_ssh_recursive(origin, src_path, target_path): # prepare src source = install(src_path, source=origin, recursive=True) sub1 = Dataset(opj(src_path, "subm 1")) sub2 = Dataset(opj(src_path, "2")) for flat in False, True: target_path_ = target_dir_tpl = target_path + "-" + str(flat) if flat: target_dir_tpl += "/prefix%RELNAME" sep = '-' else: sep = os.path.sep remote_name = 'remote-' + str(flat) with chpwd(source.path): assert_create_sshwebserver(name=remote_name, sshurl="ssh://localhost" + target_path_, target_dir=target_dir_tpl, recursive=True, ui=True) # raise if git repos were not created for suffix in [sep + 'subm 1', sep + '2', '']: target_dir = opj(target_path_, 'prefix' if flat else "").rstrip( os.path.sep) + suffix # raise if git repos were not created GitRepo(target_dir, create=False) _test_correct_publish(target_dir, rootds=not suffix, flat=flat) for repo in [source.repo, sub1.repo, sub2.repo]: assert_not_in("local_target", repo.get_remotes()) # now, push should work: publish(dataset=source, to=remote_name) # verify that we can create-sibling which was created later and possibly # first published in super-dataset as an empty directory sub3_name = 'subm 3-%s' % flat sub3 = source.create(sub3_name) # since is an empty value to force it to consider all changes since we published # already with chpwd(source.path): # as we discussed in gh-1495 we use the last-published state of the base # dataset as the indicator for modification detection with since='' # hence we must not publish the base dataset on its own without recursion, # if we want to have this mechanism do its job #publish(to=remote_name) # no recursion assert_create_sshwebserver(name=remote_name, sshurl="ssh://localhost" + target_path_, target_dir=target_dir_tpl, recursive=True, existing='skip', ui=True, since='') # so it was created on remote correctly and wasn't just skipped assert (Dataset( _path_(target_path_, ('prefix-' if flat else '') + sub3_name)).is_installed()) publish(dataset=source, to=remote_name, recursive=True, since='') # just a smoke test
def test_ls_json(topdir, topurl): annex = AnnexRepo(topdir, create=True) ds = Dataset(topdir) # create some file and commit it with open(opj(ds.path, 'subdsfile.txt'), 'w') as f: f.write('123') ds.add(path='subdsfile.txt') ds.save("Hello!", version_tag=1) # add a subdataset ds.install('subds', source=topdir) subdirds = ds.create(_path_('dir/subds2'), force=True) subdirds.add('file') git = GitRepo(opj(topdir, 'dir', 'subgit'), create=True) # create git repo git.add(opj(topdir, 'dir', 'subgit', 'fgit.txt')) # commit to git to init git repo git.commit() annex.add(opj(topdir, 'dir', 'subgit')) # add the non-dataset git repo to annex annex.add(opj(topdir, 'dir')) # add to annex (links) annex.drop(opj(topdir, 'dir', 'subdir', 'file2.txt'), options=['--force']) # broken-link annex.commit() git.add('fgit.txt') # commit to git to init git repo git.commit() # annex.add doesn't add submodule, so using ds.add ds.add(opj('dir', 'subgit')) # add the non-dataset git repo to annex ds.add('dir') # add to annex (links) ds.drop(opj('dir', 'subdir', 'file2.txt'), check=False) # broken-link # register "external" submodule by installing and uninstalling it ext_url = topurl + '/dir/subgit/.git' # need to make it installable via http Runner()('git update-server-info', cwd=opj(topdir, 'dir', 'subgit')) ds.install(opj('dir', 'subgit_ext'), source=ext_url) ds.uninstall(opj('dir', 'subgit_ext')) meta_dir = opj('.git', 'datalad', 'metadata') def get_metahash(*path): if not path: path = ['/'] return hashlib.md5(opj(*path).encode('utf-8')).hexdigest() def get_metapath(dspath, *path): return _path_(dspath, meta_dir, get_metahash(*path)) def get_meta(dspath, *path): with open(get_metapath(dspath, *path)) as f: return js.load(f) # Let's see that there is no crash if one of the files is available only # in relaxed URL mode, so no size could be picked up ds.repo.add_url_to_file( 'fromweb', topurl + '/noteventhere', options=['--relaxed']) for all_ in [True, False]: # recurse directories for recursive in [True, False]: for state in ['file', 'delete']: # subdataset should have its json created and deleted when # all=True else not subds_metapath = get_metapath(opj(topdir, 'subds')) exists_prior = exists(subds_metapath) #with swallow_logs(), swallow_outputs(): dsj = _ls_json( topdir, json=state, all_=all_, recursive=recursive ) ok_startswith(dsj['tags'], '1-') exists_post = exists(subds_metapath) # print("%s %s -> %s" % (state, exists_prior, exists_post)) assert_equal(exists_post, (state == 'file' and recursive)) # root should have its json file created and deleted in all cases ds_metapath = get_metapath(topdir) assert_equal(exists(ds_metapath), state == 'file') # children should have their metadata json's created and deleted only when recursive=True child_metapath = get_metapath(topdir, 'dir', 'subdir') assert_equal(exists(child_metapath), (state == 'file' and all_)) # ignored directories should not have json files created in any case for subdir in [('.hidden',), ('dir', 'subgit')]: assert_false(exists(get_metapath(topdir, *subdir))) # check if its updated in its nodes sublist too. used by web-ui json. regression test assert_equal(dsj['nodes'][0]['size']['total'], dsj['size']['total']) # check size of subdataset subds = [item for item in dsj['nodes'] if item['name'] == ('subdsfile.txt' or 'subds')][0] assert_equal(subds['size']['total'], '3 Bytes') # dir/subds2 must not be listed among nodes of the top dataset: topds_nodes = {x['name']: x for x in dsj['nodes']} assert_in('subds', topds_nodes) # XXX # # condition here is a bit a guesswork by yoh later on # # TODO: here and below clear destiny/interaction of all_ and recursive # assert_equal(dsj['size']['total'], # '15 Bytes' if (recursive and all_) else # ('9 Bytes' if (recursive or all_) else '3 Bytes') # ) # https://github.com/datalad/datalad/issues/1674 if state == 'file' and all_: dirj = get_meta(topdir, 'dir') dir_nodes = {x['name']: x for x in dirj['nodes']} # it should be present in the subdir meta assert_in('subds2', dir_nodes) assert_not_in('url_external', dir_nodes['subds2']) assert_in('subgit_ext', dir_nodes) assert_equal(dir_nodes['subgit_ext']['url'], ext_url) # and not in topds assert_not_in('subds2', topds_nodes) # run non-recursive dataset traversal after subdataset metadata already created # to verify sub-dataset metadata being picked up from its metadata file in such cases if state == 'file' and recursive and not all_: dsj = _ls_json(topdir, json='file', all_=False) subds = [ item for item in dsj['nodes'] if item['name'] == ('subdsfile.txt' or 'subds') ][0] assert_equal(subds['size']['total'], '3 Bytes') assert_equal( topds_nodes['fromweb']['size']['total'], UNKNOWN_SIZE )
def get_metapath(dspath, *path): return _path_(dspath, meta_dir, get_metahash(*path))
def test_target_ssh_simple(origin, src_path, target_rootpath): # prepare src source = install( src_path, source=origin, result_xfm='datasets', return_type='item-or-list') target_path = opj(target_rootpath, "basic") with swallow_logs(new_level=logging.ERROR) as cml: create_sibling( dataset=source, name="local_target", sshurl="ssh://localhost", target_dir=target_path, ui=True) assert_not_in('enableremote local_target failed', cml.out) GitRepo(target_path, create=False) # raises if not a git repo assert_in("local_target", source.repo.get_remotes()) # Both must be annex or git repositories src_is_annex = AnnexRepo.is_valid_repo(src_path) eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path)) # And target one should be known to have a known UUID within the source if annex if src_is_annex: annex = AnnexRepo(src_path) local_target_cfg = annex.repo.remotes["local_target"].config_reader.get # basic config in place eq_(local_target_cfg('annex-ignore'), 'false') ok_(local_target_cfg('annex-uuid')) # do it again without force, but use a different name to avoid initial checks # for existing remotes: with assert_raises(RuntimeError) as cm: assert_create_sshwebserver( dataset=source, name="local_target_alt", sshurl="ssh://localhost", target_dir=target_path) ok_(text_type(cm.exception).startswith( "Target path %s already exists. And it fails to rmdir" % target_path)) if src_is_annex: target_description = AnnexRepo(target_path, create=False).get_description() assert_not_equal(target_description, None) assert_not_equal(target_description, target_path) # on yoh's laptop TMPDIR is under HOME, so things start to become # tricky since then target_path is shortened and we would need to know # remote $HOME. To not over-complicate and still test, test only for # the basename of the target_path ok_endswith(target_description, basename(target_path)) # now, with force and correct url, which is also used to determine # target_dir # Note: on windows absolute path is not url conform. But this way it's easy # to test, that ssh path is correctly used. if not on_windows: # add random file under target_path, to explicitly test existing=replace open(opj(target_path, 'random'), 'w').write('123') assert_create_sshwebserver( dataset=source, name="local_target", sshurl="ssh://localhost" + target_path, publish_by_default='master', existing='replace') eq_("ssh://localhost" + urlquote(target_path), source.repo.get_remote_url("local_target")) ok_(source.repo.get_remote_url("local_target", push=True) is None) # ensure target tree actually replaced by source assert_false(exists(opj(target_path, 'random'))) if src_is_annex: annex = AnnexRepo(src_path) local_target_cfg = annex.repo.remotes["local_target"].config_reader.get eq_(local_target_cfg('annex-ignore'), 'false') eq_(local_target_cfg('annex-uuid').count('-'), 4) # valid uuid # should be added too, even if URL matches prior state eq_(local_target_cfg('push'), 'master') # again, by explicitly passing urls. Since we are on localhost, the # local path should work: cpkwargs = dict( dataset=source, name="local_target", sshurl="ssh://localhost", target_dir=target_path, target_url=target_path, target_pushurl="ssh://localhost" + target_path, ui=True, ) assert_create_sshwebserver(existing='replace', **cpkwargs) if src_is_annex: target_description = AnnexRepo(target_path, create=False).get_description() eq_(target_description, target_path) eq_(target_path, source.repo.get_remote_url("local_target")) eq_("ssh://localhost" + target_path, source.repo.get_remote_url("local_target", push=True)) _test_correct_publish(target_path) # now, push should work: publish(dataset=source, to="local_target") # and we should be able to 'reconfigure' def process_digests_mtimes(digests, mtimes): # it should have triggered a hook, which would have created log and metadata files check_metadata = False for part in 'logs', 'metadata': metafiles = [k for k in digests if k.startswith(_path_('.git/datalad/%s/' % part))] # This is in effect ONLY if we have "compatible" datalad installed on remote # end. ATM we don't have easy way to guarantee that AFAIK (yoh), # so let's not check/enforce (TODO) # assert(len(metafiles) >= 1) # we might have 2 logs if timestamps do not collide ;) # Let's actually do it to some degree if part == 'logs': # always should have those: assert (len(metafiles) >= 1) with open(opj(target_path, metafiles[0])) as f: if 'no datalad found' not in f.read(): check_metadata = True if part == 'metadata': eq_(len(metafiles), bool(check_metadata)) for f in metafiles: digests.pop(f) mtimes.pop(f) # and just pop some leftovers from annex for f in list(digests): if f.startswith('.git/annex/mergedrefs'): digests.pop(f) mtimes.pop(f) orig_digests, orig_mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(orig_digests, orig_mtimes) import time time.sleep(0.1) # just so that mtimes change assert_create_sshwebserver(existing='reconfigure', **cpkwargs) digests, mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(digests, mtimes) assert_dict_equal(orig_digests, digests) # nothing should change in terms of content # but some files should have been modified modified_files = {k for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)} # collect which files were expected to be modified without incurring any changes ok_modified_files = { _path_('.git/hooks/post-update'), 'index.html', # files which hook would manage to generate _path_('.git/info/refs'), '.git/objects/info/packs' } # on elderly git we don't change receive setting ok_modified_files.add(_path_('.git/config')) ok_modified_files.update({f for f in digests if f.startswith(_path_('.git/datalad/web'))}) # it seems that with some recent git behavior has changed a bit # and index might get touched if _path_('.git/index') in modified_files: ok_modified_files.add(_path_('.git/index')) assert_set_equal(modified_files, ok_modified_files)
def __call__(sshurl, target=None, target_dir=None, target_url=None, target_pushurl=None, dataset=None, recursive=False, existing='error', shared=False, ui=False, as_common_datasrc=None, publish_by_default=None, publish_depends=None): if sshurl is None: raise ValueError("""insufficient information for target creation (needs at least a dataset and a SSH URL).""") if target is None and (target_url is not None or target_pushurl is not None): raise ValueError("""insufficient information for adding the target as a sibling (needs at least a name)""") # shortcut ds = require_dataset(dataset, check_installed=True, purpose='creating a sibling') assert(ds is not None and sshurl is not None and ds.repo is not None) # determine target parameters: sshri = RI(sshurl) if not isinstance(sshri, SSHRI) \ and not (isinstance(sshri, URL) and sshri.scheme == 'ssh'): raise ValueError("Unsupported SSH URL: '{0}', use ssh://host/path or host:path syntax".format(sshurl)) if target_dir is None: if sshri.path: target_dir = sshri.path else: target_dir = '.' # TODO: centralize and generalize template symbol handling replicate_local_structure = False if "%NAME" not in target_dir: replicate_local_structure = True # collect datasets to use: datasets = dict() datasets[basename(ds.path)] = ds if recursive: for subds in ds.get_subdatasets(recursive=True): sub_path = opj(ds.path, subds) # TODO: when enhancing Dataset/*Repo classes and therefore # adapt to moved code, make proper distinction between name and # path of a submodule, which are technically different. This # probably will become important on windows as well as whenever # we want to allow for moved worktrees. datasets[basename(ds.path) + '/' + subds] = \ Dataset(sub_path) # request ssh connection: not_supported_on_windows("TODO") lgr.info("Connecting ...") ssh = ssh_manager.get_connection(sshurl) ssh.open() # flag to check if at dataset_root at_root = True # loop over all datasets, ordered from top to bottom to make test # below valid (existing directories would cause the machinery to halt) # But we need to run post-update hook in depth-first fashion, so # would only collect first and then run (see gh #790) remote_repos_to_run_hook_for = [] for current_dspath in \ sorted(datasets.keys(), key=lambda x: x.count('/')): current_ds = datasets[current_dspath] if not current_ds.is_installed(): lgr.info("Skipping %s since not installed locally", current_dspath) continue if not replicate_local_structure: path = target_dir.replace("%NAME", current_dspath.replace("/", "-")) else: # TODO: opj depends on local platform, not the remote one. # check how to deal with it. Does windows ssh server accept # posix paths? vice versa? Should planned SSH class provide # tools for this issue? path = normpath(opj(target_dir, relpath(datasets[current_dspath].path, start=ds.path))) lgr.info("Creating target dataset {0} at {1}".format(current_dspath, path)) # Must be set to True only if exists and existing='reconfigure' # otherwise we might skip actions if we say existing='reconfigure' # but it did not even exist before only_reconfigure = False if path != '.': # check if target exists # TODO: Is this condition valid for != '.' only? path_exists = True try: out, err = ssh(["ls", path]) except CommandError as e: if "No such file or directory" in e.stderr and \ path in e.stderr: path_exists = False else: raise # It's an unexpected failure here if path_exists: if existing == 'error': raise RuntimeError("Target directory %s already exists." % path) elif existing == 'skip': continue elif existing == 'replace': ssh(["chmod", "+r+w", "-R", path]) # enable write permissions to allow removing dir ssh(["rm", "-rf", path]) # remove target at path path_exists = False # if we succeeded in removing it elif existing == 'reconfigure': only_reconfigure = True else: raise ValueError("Do not know how to handle existing=%s" % repr(existing)) if not path_exists: try: ssh(["mkdir", "-p", path]) except CommandError as e: lgr.error("Remotely creating target directory failed at " "%s.\nError: %s" % (path, exc_str(e))) continue # don't (re-)initialize dataset if existing == reconfigure if not only_reconfigure: # init git and possibly annex repo if not CreateSibling.init_remote_repo( path, ssh, shared, datasets[current_dspath], description=target_url): continue # check git version on remote end lgr.info("Adjusting remote git configuration") remote_git_version = CreateSibling.get_remote_git_version(ssh) if remote_git_version and remote_git_version >= "2.4": # allow for pushing to checked out branch try: ssh(["git", "-C", path] + ["config", "receive.denyCurrentBranch", "updateInstead"]) except CommandError as e: lgr.error("git config failed at remote location %s.\n" "You will not be able to push to checked out " "branch. Error: %s", path, exc_str(e)) else: lgr.error("Git version >= 2.4 needed to configure remote." " Version detected on server: %s\nSkipping configuration" " of receive.denyCurrentBranch - you will not be able to" " publish updates to this repository. Upgrade your git" " and run with --existing=reconfigure" % remote_git_version) # enable metadata refresh on dataset updates to publication server lgr.info("Enabling git post-update hook ...") try: CreateSibling.create_postupdate_hook( path, ssh, datasets[current_dspath]) except CommandError as e: lgr.error("Failed to add json creation command to post update " "hook.\nError: %s" % exc_str(e)) # publish web-interface to root dataset on publication server if at_root and ui: lgr.info("Uploading web interface to %s" % path) at_root = False try: CreateSibling.upload_web_interface(path, ssh, shared, ui) except CommandError as e: lgr.error("Failed to push web interface to the remote " "datalad repository.\nError: %s" % exc_str(e)) remote_repos_to_run_hook_for.append(path) # in reverse order would be depth first lgr.debug("Running post-update hooks in all created siblings") for path in remote_repos_to_run_hook_for[::-1]: # Trigger the hook try: ssh( ["cd '" + _path_(path, ".git") + "' && hooks/post-update"], wrap_args=False # we wrapped here manually ) except CommandError as e: lgr.error("Failed to run post-update hook under path %s. " "Error: %s" % (path, exc_str(e))) if target: # add the sibling(s): lgr.debug("Adding the siblings") if target_url is None: target_url = sshurl if target_pushurl is None and sshurl != target_url: target_pushurl = sshurl AddSibling()(dataset=ds, name=target, url=target_url, pushurl=target_pushurl, recursive=recursive, fetch=True, force=existing in {'replace'}, as_common_datasrc=as_common_datasrc, publish_by_default=publish_by_default, publish_depends=publish_depends)
def __call__(sshurl, name=None, target_dir=None, target_url=None, target_pushurl=None, dataset=None, recursive=False, recursion_limit=None, existing='error', shared=None, group=None, ui=False, as_common_datasrc=None, publish_by_default=None, publish_depends=None, annex_wanted=None, annex_group=None, annex_groupwanted=None, inherit=False, since=None): # # nothing without a base dataset # ds = require_dataset(dataset, check_installed=True, purpose='creating a sibling') refds_path = ds.path # # all checks that are possible before we start parsing the dataset # # possibly use sshurl to get the name in case if not specified if not sshurl: if not inherit: raise InsufficientArgumentsError( "needs at least an SSH URL, if no inherit option" ) if name is None: raise ValueError( "Neither SSH URL, nor the name of sibling to inherit from " "was specified" ) # It might well be that we already have this remote setup try: sshurl = CreateSibling._get_remote_url(ds, name) except Exception as exc: lgr.debug('%s does not know about url for %s: %s', ds, name, exc_str(exc)) elif inherit: raise ValueError( "For now, for clarity not allowing specifying a custom sshurl " "while inheriting settings" ) # may be could be safely dropped -- still WiP if not sshurl: # TODO: may be more back up before _prep? super_ds = ds.get_superdataset() if not super_ds: raise ValueError( "Could not determine super dataset for %s to inherit URL" % ds ) super_url = CreateSibling._get_remote_url(super_ds, name) # for now assuming hierarchical setup # (TODO: to be able to destinguish between the two, probably # needs storing datalad.*.target_dir to have %RELNAME in there) sshurl = slash_join(super_url, relpath(ds.path, super_ds.path)) # check the login URL sshri = RI(sshurl) if not is_ssh(sshri): raise ValueError( "Unsupported SSH URL: '{0}', " "use ssh://host/path or host:path syntax".format(sshurl)) if not name: # use the hostname as default remote name name = sshri.hostname lgr.debug( "No sibling name given, use URL hostname '%s' as sibling name", name) if since == '': # consider creating siblings only since the point of # the last update # XXX here we assume one to one mapping of names from local branches # to the remote active_branch = ds.repo.get_active_branch() since = '%s/%s' % (name, active_branch) # # parse the base dataset to find all subdatasets that need processing # to_process = [] for ap in AnnotatePaths.__call__( dataset=refds_path, # only a single path! path=refds_path, recursive=recursive, recursion_limit=recursion_limit, action='create_sibling', # both next should not happen anyways unavailable_path_status='impossible', nondataset_path_status='error', modified=since, return_type='generator', on_failure='ignore'): if ap.get('status', None): # this is done yield ap continue if ap.get('type', None) != 'dataset' or ap.get('state', None) == 'absent': # this can happen when there is `since`, but we have no # use for anything but datasets here continue checkds_remotes = Dataset(ap['path']).repo.get_remotes() \ if ap.get('state', None) != 'absent' \ else [] if publish_depends: # make sure dependencies are valid # TODO: inherit -- we might want to automagically create # those dependents as well??? unknown_deps = set(assure_list(publish_depends)).difference(checkds_remotes) if unknown_deps: ap['status'] = 'error' ap['message'] = ( 'unknown sibling(s) specified as publication dependency: %s', unknown_deps) yield ap continue if name in checkds_remotes and existing in ('error', 'skip'): ap['status'] = 'error' if existing == 'error' else 'notneeded' ap['message'] = ( "sibling '%s' already configured (specify alternative name, or force " "reconfiguration via --existing", name) yield ap continue to_process.append(ap) if not to_process: # we ruled out all possibilities # TODO wait for gh-1218 and make better return values lgr.info("No datasets qualify for sibling creation. " "Consider different settings for --existing " "or --since if this is unexpected") return if target_dir is None: if sshri.path: target_dir = sshri.path else: target_dir = '.' # TODO: centralize and generalize template symbol handling replicate_local_structure = "%RELNAME" not in target_dir # request ssh connection: lgr.info("Connecting ...") assert(sshurl is not None) # delayed anal verification ssh = ssh_manager.get_connection(sshurl) if not ssh.get_annex_version(): raise MissingExternalDependency( 'git-annex', msg='on the remote system') # # all checks done and we have a connection, now do something # # loop over all datasets, ordered from top to bottom to make test # below valid (existing directories would cause the machinery to halt) # But we need to run post-update hook in depth-first fashion, so # would only collect first and then run (see gh #790) yielded = set() remote_repos_to_run_hook_for = [] for currentds_ap in \ sorted(to_process, key=lambda x: x['path'].count('/')): current_ds = Dataset(currentds_ap['path']) path = _create_dataset_sibling( name, current_ds, ds.path, ssh, replicate_local_structure, sshri, target_dir, target_url, target_pushurl, existing, shared, group, publish_depends, publish_by_default, ui, as_common_datasrc, annex_wanted, annex_group, annex_groupwanted, inherit ) if not path: # nothing new was created # TODO is 'notneeded' appropriate in this case? currentds_ap['status'] = 'notneeded' # TODO explain status in 'message' yield currentds_ap yielded.add(currentds_ap['path']) continue remote_repos_to_run_hook_for.append((path, currentds_ap)) # publish web-interface to root dataset on publication server if current_ds.path == ds.path and ui: lgr.info("Uploading web interface to %s" % path) try: CreateSibling.upload_web_interface(path, ssh, shared, ui) except CommandError as e: currentds_ap['status'] = 'error' currentds_ap['message'] = ( "failed to push web interface to the remote datalad repository (%s)", exc_str(e)) yield currentds_ap yielded.add(currentds_ap['path']) continue # in reverse order would be depth first lgr.info("Running post-update hooks in all created siblings") # TODO: add progressbar for path, currentds_ap in remote_repos_to_run_hook_for[::-1]: # Trigger the hook lgr.debug("Running hook for %s (if exists and executable)", path) try: ssh("cd {} " "&& ( [ -x hooks/post-update ] && hooks/post-update || : )" "".format(sh_quote(_path_(path, ".git")))) except CommandError as e: currentds_ap['status'] = 'error' currentds_ap['message'] = ( "failed to run post-update hook under remote path %s (%s)", path, exc_str(e)) yield currentds_ap yielded.add(currentds_ap['path']) continue if not currentds_ap['path'] in yielded: # if we were silent until now everything is just splendid currentds_ap['status'] = 'ok' yield currentds_ap
def test_target_ssh_recursive(origin, src_path, target_path): # prepare src source = install(src_path, source=origin, recursive=True) sub1 = Dataset(opj(src_path, "subm 1")) sub2 = Dataset(opj(src_path, "2")) for flat in False, True: target_path_ = target_dir_tpl = target_path + "-" + str(flat) if flat: target_dir_tpl += "/prefix%RELNAME" sep = '-' else: sep = os.path.sep remote_name = 'remote-' + str(flat) with chpwd(source.path): assert_create_sshwebserver( name=remote_name, sshurl="ssh://localhost" + target_path_, target_dir=target_dir_tpl, recursive=True, ui=True) # raise if git repos were not created for suffix in [sep + 'subm 1', sep + '2', '']: target_dir = opj(target_path_, 'prefix' if flat else "").rstrip(os.path.sep) + suffix # raise if git repos were not created GitRepo(target_dir, create=False) _test_correct_publish(target_dir, rootds=not suffix, flat=flat) for repo in [source.repo, sub1.repo, sub2.repo]: assert_not_in("local_target", repo.get_remotes()) # now, push should work: publish(dataset=source, to=remote_name) # verify that we can create-sibling which was created later and possibly # first published in super-dataset as an empty directory sub3_name = 'subm 3-%s' % flat sub3 = source.create(sub3_name) # since is an empty value to force it to consider all changes since we published # already with chpwd(source.path): # as we discussed in gh-1495 we use the last-published state of the base # dataset as the indicator for modification detection with since='' # hence we must not publish the base dataset on its own without recursion, # if we want to have this mechanism do its job #publish(to=remote_name) # no recursion assert_create_sshwebserver( name=remote_name, sshurl="ssh://localhost" + target_path_, target_dir=target_dir_tpl, recursive=True, existing='skip', ui=True, since='' ) # so it was created on remote correctly and wasn't just skipped assert(Dataset(_path_(target_path_, ('prefix-' if flat else '') + sub3_name)).is_installed()) publish(dataset=source, to=remote_name, recursive=True, since='') # just a smoke test
def test_target_ssh_simple(origin, src_path, target_rootpath): # prepare src source = install(src_path, source=origin) target_path = opj(target_rootpath, "basic") # it will try to fetch it so would fail as well since sshurl is wrong with swallow_logs(new_level=logging.ERROR) as cml, \ assert_raises(GitCommandError): create_sibling( dataset=source, target="local_target", sshurl="ssh://localhost", target_dir=target_path, ui=True) # is not actually happening on one of the two basic cases -- TODO figure it out # assert_in('enableremote local_target failed', cml.out) GitRepo(target_path, create=False) # raises if not a git repo assert_in("local_target", source.repo.get_remotes()) eq_("ssh://localhost", source.repo.get_remote_url("local_target")) # should NOT be able to push now, since url isn't correct: # TODO: assumption is wrong if ~ does have .git! fix up! assert_raises(GitCommandError, publish, dataset=source, to="local_target") # Both must be annex or git repositories src_is_annex = AnnexRepo.is_valid_repo(src_path) eq_(src_is_annex, AnnexRepo.is_valid_repo(target_path)) # And target one should be known to have a known UUID within the source if annex if src_is_annex: annex = AnnexRepo(src_path) local_target_cfg = annex.repo.remotes["local_target"].config_reader.get # for some reason this was "correct" # eq_(local_target_cfg('annex-ignore'), 'false') # but after fixing creating siblings in # 21f6dd012b2c7b9c0b8b348dcfb3b0ace7e8b2ec it started to fail # I think it is legit since we are trying to fetch now before calling # annex.enable_remote so it doesn't set it up, and fails before assert_raises(Exception, local_target_cfg, 'annex-ignore') # hm, but ATM wouldn't get a uuid since url is wrong assert_raises(Exception, local_target_cfg, 'annex-uuid') # do it again without force: with assert_raises(RuntimeError) as cm: assert_create_sshwebserver( dataset=source, target="local_target", sshurl="ssh://localhost", target_dir=target_path) eq_("Target directory %s already exists." % target_path, str(cm.exception)) if src_is_annex: target_description = AnnexRepo(target_path, create=False).get_description() assert_not_equal(target_description, None) assert_not_equal(target_description, target_path) ok_endswith(target_description, target_path) # now, with force and correct url, which is also used to determine # target_dir # Note: on windows absolute path is not url conform. But this way it's easy # to test, that ssh path is correctly used. if not on_windows: # add random file under target_path, to explicitly test existing=replace open(opj(target_path, 'random'), 'w').write('123') assert_create_sshwebserver( dataset=source, target="local_target", sshurl="ssh://localhost" + target_path, existing='replace') eq_("ssh://localhost" + target_path, source.repo.get_remote_url("local_target")) ok_(source.repo.get_remote_url("local_target", push=True) is None) # ensure target tree actually replaced by source assert_false(exists(opj(target_path, 'random'))) if src_is_annex: annex = AnnexRepo(src_path) local_target_cfg = annex.repo.remotes["local_target"].config_reader.get eq_(local_target_cfg('annex-ignore'), 'false') eq_(local_target_cfg('annex-uuid').count('-'), 4) # valid uuid # again, by explicitly passing urls. Since we are on localhost, the # local path should work: cpkwargs = dict( dataset=source, target="local_target", sshurl="ssh://localhost", target_dir=target_path, target_url=target_path, target_pushurl="ssh://localhost" + target_path, ui=True, ) assert_create_sshwebserver(existing='replace', **cpkwargs) if src_is_annex: target_description = AnnexRepo(target_path, create=False).get_description() eq_(target_description, target_path) eq_(target_path, source.repo.get_remote_url("local_target")) eq_("ssh://localhost" + target_path, source.repo.get_remote_url("local_target", push=True)) _test_correct_publish(target_path) # now, push should work: publish(dataset=source, to="local_target") # and we should be able to 'reconfigure' def process_digests_mtimes(digests, mtimes): # it should have triggered a hook, which would have created log and metadata files check_metadata = False for part in 'logs', 'metadata': metafiles = [k for k in digests if k.startswith(_path_('.git/datalad/%s/' % part))] # This is in effect ONLY if we have "compatible" datalad installed on remote # end. ATM we don't have easy way to guarantee that AFAIK (yoh), # so let's not check/enforce (TODO) # assert(len(metafiles) >= 1) # we might have 2 logs if timestamps do not collide ;) # Let's actually do it to some degree if part == 'logs': # always should have those: assert (len(metafiles) >= 1) with open(opj(target_path, metafiles[0])) as f: if 'no datalad found' not in f.read(): check_metadata = True if part == 'metadata': eq_(len(metafiles), bool(check_metadata)) for f in metafiles: digests.pop(f) mtimes.pop(f) # and just pop some leftovers from annex for f in list(digests): if f.startswith('.git/annex/mergedrefs'): digests.pop(f) mtimes.pop(f) orig_digests, orig_mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(orig_digests, orig_mtimes) import time; time.sleep(0.1) # just so that mtimes change assert_create_sshwebserver(existing='reconfigure', **cpkwargs) digests, mtimes = get_mtimes_and_digests(target_path) process_digests_mtimes(digests, mtimes) assert_dict_equal(orig_digests, digests) # nothing should change in terms of content # but some files should have been modified modified_files = {k for k in mtimes if orig_mtimes.get(k, 0) != mtimes.get(k, 0)} # collect which files were expected to be modified without incurring any changes ok_modified_files = { _path_('.git/hooks/post-update'), 'index.html', # files which hook would manage to generate _path_('.git/info/refs'), '.git/objects/info/packs' } if external_versions['cmd:system-git'] >= '2.4': # on elderly git we don't change receive setting ok_modified_files.add(_path_('.git/config')) ok_modified_files.update({f for f in digests if f.startswith(_path_('.git/datalad/web'))}) assert_set_equal(modified_files, ok_modified_files)