def check_dropall_get(repo): # drop all annex content for all revisions, clean the cache, get the content for all files in # master in all of its revisions t1w_fpath = opj(repo.path, 'sub-1', 'anat', 'sub-1_T1w.dat') ok_file_has_content(t1w_fpath, "mighty load 2.0.0") # --force since it would fail to verify presence in case we remove archives keys... TODO repo._annex_custom_command([], ["git", "annex", "drop", "--all", "--force"]) clean(dataset=repo.path) # remove possible extracted archives with assert_raises(AssertionError): ok_file_has_content(t1w_fpath, "mighty load 2.0.0") repo.get('.') ok_file_has_content(t1w_fpath, "mighty load 2.0.0")
def check_dropall_get(repo): # drop all annex content for all revisions, clean the cache, get the content for all files in # master in all of its revisions t1w_fpath = opj(repo.path, 'sub-1', 'anat', 'sub-1_T1w.dat') ok_file_has_content(t1w_fpath, "mighty load 2.0.0") # --force since it would fail to verify presence in case we remove archives keys... TODO repo.drop([], options=['--all', '--force']) clean(dataset=repo.path) # remove possible extracted archives with assert_raises(AssertionError): ok_file_has_content(t1w_fpath, "mighty load 2.0.0") try: repo.get('.') except CommandError as exc: # get() raises an exception starting with DataLad version 0.14 if # getting any of the items fail, and the `drop --force` call above # drops metadata files that aren't available elsewhere. pass ok_file_has_content(t1w_fpath, "mighty load 2.0.0")
def test_add_archive_content(path_orig, url, repo_path): with chpwd(repo_path): # TODO we need to be able to pass path into add_archive_content # We could mock but I mean for the API assert_raises(RuntimeError, add_archive_content, "nonexisting.tar.gz") # no repo yet repo = AnnexRepo(repo_path, create=True) assert_raises(ValueError, add_archive_content, "nonexisting.tar.gz") # we can't add a file from outside the repo ATM assert_raises(FileNotInRepositoryError, add_archive_content, opj(path_orig, '1.tar.gz')) # Let's add first archive to the repo so we could test with swallow_outputs(): repo.add_urls([opj(url, '1.tar.gz')], options=["--pathdepth", "-1"]) for s in range(1, 5): repo.add_urls([opj(url, '%du/1.tar.gz' % s)], options=["--pathdepth", "-2"]) repo.commit("added 1.tar.gz") key_1tar = repo.get_file_key( '1.tar.gz') # will be used in the test later def d1_basic_checks(): ok_(exists('1')) ok_file_under_git('1', '1 f.txt', annexed=True) ok_file_under_git(opj('1', 'd', '1d'), annexed=True) ok_archives_caches(repo_path, 0) # and by default it just does it, everything goes to annex repo_ = add_archive_content('1.tar.gz') eq_(repo.path, repo_.path) d1_basic_checks() # If ran again, should proceed just fine since the content is the same so no changes would be made really add_archive_content('1.tar.gz') # But that other one carries updated file, so should fail due to overwrite with assert_raises(RuntimeError) as cme: add_archive_content(opj('1u', '1.tar.gz'), use_current_dir=True) # TODO: somewhat not precise since we have two possible "already exists" # -- in caching and overwrite check assert_in("already exists", str(cme.exception)) # but should do fine if overrides are allowed add_archive_content(opj('1u', '1.tar.gz'), existing='overwrite', use_current_dir=True) add_archive_content(opj('2u', '1.tar.gz'), existing='archive-suffix', use_current_dir=True) add_archive_content(opj('3u', '1.tar.gz'), existing='archive-suffix', use_current_dir=True) add_archive_content(opj('4u', '1.tar.gz'), existing='archive-suffix', use_current_dir=True) # rudimentary test assert_equal(sorted(map(basename, glob(opj(repo_path, '1', '1*')))), ['1 f-1.1.txt', '1 f-1.2.txt', '1 f-1.txt', '1 f.txt']) whereis = repo.whereis(glob(opj(repo_path, '1', '1*'))) # they all must be the same assert (all([x == whereis[0] for x in whereis[1:]])) # and we should be able to reference it while under subdirectory subdir = opj(repo_path, 'subdir') with chpwd(subdir, mkdir=True): add_archive_content(opj(pardir, '1.tar.gz'), use_current_dir=True) d1_basic_checks() # or we could keep relative path and also demand to keep the archive prefix # while extracting under original (annex root) dir add_archive_content(opj(pardir, '1.tar.gz'), add_archive_leading_dir=True) with chpwd(opj(repo_path, '1')): d1_basic_checks() with chpwd(repo_path): # test with excludes and renames and annex options add_archive_content('1.tar.gz', exclude=['d'], rename=['/ /_', '/^1/2'], annex_options="-c annex.largefiles=exclude=*.txt", delete=True) # no conflicts since new name ok_file_under_git('2', '1_f.txt', annexed=False) assert_false(exists(opj('2', 'd'))) assert_false(exists('1.tar.gz')) # delete was in effect # now test ability to extract within subdir with chpwd(opj(repo_path, 'd1'), mkdir=True): # Let's add first archive to the repo so we could test # named the same way but different content with swallow_outputs(): repo.add_urls([opj(url, 'd1', '1.tar.gz')], options=["--pathdepth", "-1"], cwd=getpwd()) # invoke under current subdir repo.commit("added 1.tar.gz in d1") def d2_basic_checks(): ok_(exists('1')) ok_file_under_git('1', '2 f.txt', annexed=True) ok_file_under_git(opj('1', 'd2', '2d'), annexed=True) ok_archives_caches(repo.path, 0) add_archive_content('1.tar.gz') d2_basic_checks() # in manual tests ran into the situation of inability to obtain on a single run # a file from an archive which was coming from a dropped key. I thought it was # tested in custom remote tests, but I guess not sufficiently well enough repo.drop(opj('1', '1 f.txt')) # should be all kosher repo.get(opj('1', '1 f.txt')) ok_archives_caches(repo.path, 1, persistent=True) ok_archives_caches(repo.path, 0, persistent=False) repo.drop(opj('1', '1 f.txt')) # should be all kosher repo.drop(key_1tar, key=True) # is available from the URL -- should be kosher repo.get(opj('1', '1 f.txt')) # that what managed to not work # TODO: check if persistent archive is there for the 1.tar.gz # We should be able to drop everything since available online with swallow_outputs(): clean(dataset=repo.path) repo.drop(key_1tar, key=True) # is available from the URL -- should be kosher repo.drop(opj('1', '1 f.txt')) # should be all kosher repo.get(opj('1', '1 f.txt')) # and should be able to get it again # bug was that dropping didn't work since archive was dropped first repo.call_annex(["drop", "--all"]) # verify that we can't drop a file if archive key was dropped and online archive was removed or changed size! ;) repo.get(key_1tar, key=True) unlink(opj(path_orig, '1.tar.gz')) with assert_raises(CommandError) as e: repo.drop(key_1tar, key=True) assert_equal(e.kwargs['stdout_json'][0]['success'], False) assert_result_values_cond( e.kwargs['stdout_json'], 'note', lambda x: '(Use --force to override this check, or adjust numcopies.)' in x) assert exists(opj(repo.path, repo.get_contentlocation(key_1tar)))
def test_clean(d=None): AnnexRepo(d, create=True) ds = Dataset(d) assert_status('notneeded', clean(dataset=ds)) archives_path = ds.pathobj / Path(ARCHIVES_TEMP_DIR) annex_tmp_path = ds.pathobj / Path(ANNEX_TEMP_DIR) annex_trans_path = ds.pathobj / Path(ANNEX_TRANSFER_DIR) index_path = ds.repo.dot_git / Path(SEARCH_INDEX_DOTGITDIR) # if we create some temp archives directory (archives_path / 'somebogus').mkdir(parents=True) res = clean(dataset=ds, return_type='item-or-list', result_filter=lambda x: x['status'] == 'ok') assert_equal(res['path'], str(archives_path)) assert_equal(res['message'][0] % tuple(res['message'][1:]), "Removed 1 temporary archive directory: somebogus") assert_false(archives_path.exists()) # relative path (archives_path / 'somebogus').mkdir(parents=True) (archives_path / 'somebogus2').mkdir(parents=True) with chpwd(d), swallow_outputs() as cmo: res = clean(return_type='item-or-list', result_filter=lambda x: x['status'] == 'ok') assert_equal( res['message'][0] % tuple(res['message'][1:]), "Removed 2 temporary archive directories: somebogus, " "somebogus2") assert_false(archives_path.exists()) # and what about git annex temporary files? annex_tmp_path.mkdir(parents=True) (annex_tmp_path / "somebogus").write_text("load") with chpwd(d): res = clean(return_type='item-or-list', result_filter=lambda x: x['status'] == 'ok') assert_equal(res['path'], str(annex_tmp_path)) assert_equal(res['message'][0] % tuple(res['message'][1:]), "Removed 1 temporary annex file: somebogus") assert_false(annex_tmp_path.exists()) (annex_trans_path / 'somebogus').mkdir(parents=True, exist_ok=True) with chpwd(d): res = clean(return_type='item-or-list', result_filter=lambda x: x['status'] == 'ok') assert_equal(res['path'], str(annex_trans_path)) assert_equal( res['message'][0] % tuple(res['message'][1:]), "Removed 1 annex temporary transfer directory: somebogus") assert_false(annex_trans_path.exists()) # search index index_path.mkdir(parents=True) (index_path / "MAIN_r55n3hiyvxkdf1fi.seg, _MAIN_1.toc").write_text("noop") with chpwd(d): res = clean(return_type='item-or-list', result_filter=lambda x: x['status'] == 'ok') assert_equal(res['path'], str(index_path)) assert_equal( res['message'][0] % tuple(res['message'][1:]), "Removed 1 metadata search index file: " "MAIN_r55n3hiyvxkdf1fi.seg, _MAIN_1.toc") assert_false(index_path.exists()) # remove empty directories, too archives_path.mkdir(parents=True) with chpwd(d): res = clean(return_type='item-or-list', result_filter=lambda x: x['status'] == 'ok') assert_equal(res['path'], str(archives_path)) assert_equal(res['message'][0] % tuple(res['message'][1:]), "Removed empty temporary archive directory") assert_false(archives_path.exists()) annex_tmp_path.mkdir(parents=True) with chpwd(d): res = clean(return_type='item-or-list', result_filter=lambda x: x['status'] == 'ok') assert_equal(res['path'], str(annex_tmp_path)) assert_equal(res['message'][0] % tuple(res['message'][1:]), "Removed empty temporary annex directory") assert_false(annex_tmp_path.exists()) annex_trans_path.mkdir(parents=True) with chpwd(d): res = clean(return_type='item-or-list', result_filter=lambda x: x['status'] == 'ok') assert_equal(res['path'], str(annex_trans_path)) assert_equal(res['message'][0] % tuple(res['message'][1:]), "Removed empty annex temporary transfer directory") assert_false(annex_trans_path.exists()) index_path.mkdir(parents=True) with chpwd(d): res = clean(return_type='item-or-list', result_filter=lambda x: x['status'] == 'ok') assert_equal(res['path'], str(index_path)) assert_equal(res['message'][0] % tuple(res['message'][1:]), "Removed empty metadata search index directory") assert_false(index_path.exists())
def test_add_archive_content(path_orig=None, url=None, repo_path=None): with chpwd(repo_path): # TODO we need to be able to pass path into add_archive_content # We could mock but I mean for the API # no repo yet assert_raises(NoDatasetFound, add_archive_content, "nonexisting.tar.gz") ds = Dataset(repo_path).create() res = ds.add_archive_content("nonexisting.tar.gz", on_failure='ignore') assert_in_results(res, action='add-archive-content', status='impossible') repo = ds.repo # we can't add a file from outside the repo ATM res = ds.add_archive_content(Path(path_orig) / '1.tar.gz', on_failure='ignore') assert_in_results(res, action='add-archive-content', status='impossible', type="dataset", message="Can not add archive outside of the dataset") # Let's add first archive to the repo so we could test with swallow_outputs(): repo.add_url_to_file('1.tar.gz', opj(url, '1.tar.gz')) for s in range(1, 5): repo.add_url_to_file('%du/1.tar.gz' % s, opj(url, '%du/1.tar.gz' % s)) repo.commit("added 1.tar.gz") key_1tar = repo.get_file_annexinfo('1.tar.gz')[ 'key'] # will be used in the test later def d1_basic_checks(): ok_(exists('1')) ok_file_under_git('1', '1 f.txt', annexed=True) ok_file_under_git(opj('1', 'd', '1d'), annexed=True) ok_archives_caches(repo_path, 0) # and by default it just does it, everything goes to annex res = add_archive_content('1.tar.gz') assert_in_results(res, action='add-archive-content', status='ok') d1_basic_checks() # If ran again, should proceed just fine since the content is the same # so no changes would be made really res = add_archive_content('1.tar.gz') assert_in_results(res, action='add-archive-content', status='ok') # But that other one carries updated file, so should fail due to # overwrite res = add_archive_content(Path('1u') / '1.tar.gz', use_current_dir=True, on_failure='ignore') assert_in_results( res, action='add-archive-content', status='error', ) assert_in('exists, but would be overwritten by new file', res[0]['message']) # but should do fine if overrides are allowed add_archive_content(Path('1u') / '1.tar.gz', existing='overwrite', use_current_dir=True) add_archive_content(Path('2u') / '1.tar.gz', existing='archive-suffix', use_current_dir=True) add_archive_content(Path('3u') / '1.tar.gz', existing='archive-suffix', use_current_dir=True) add_archive_content(Path('4u') / '1.tar.gz', existing='archive-suffix', use_current_dir=True) # rudimentary test assert_equal(sorted(map(basename, glob(opj(repo_path, '1', '1*')))), ['1 f-1.1.txt', '1 f-1.2.txt', '1 f-1.txt', '1 f.txt']) whereis = repo.whereis(glob(opj(repo_path, '1', '1*'))) # they all must be the same assert (all([x == whereis[0] for x in whereis[1:]])) # and we should be able to reference it while under subdirectory subdir = opj(repo_path, 'subdir') with chpwd(subdir, mkdir=True): add_archive_content(opj(pardir, '1.tar.gz'), dataset=ds.path, use_current_dir=True) d1_basic_checks() # or we could keep relative path and also demand to keep the archive prefix # while extracting under original (annex root) dir add_archive_content(opj(pardir, '1.tar.gz'), dataset=ds.path, add_archive_leading_dir=True) with chpwd(opj(repo_path, '1')): d1_basic_checks() with chpwd(repo_path): # test with excludes and renames and annex options ds.add_archive_content( '1.tar.gz', exclude=['d'], rename=['/ /_', '/^1/2'], annex_options="-c annex.largefiles=exclude=*.txt", delete=True) # no conflicts since new name ok_file_under_git('2', '1_f.txt', annexed=False) assert_false(exists(opj('2', 'd'))) assert_false(exists('1.tar.gz')) # delete was in effect # now test ability to extract within subdir with chpwd(opj(repo_path, 'd1'), mkdir=True): # Let's add first archive to the repo so we could test # named the same way but different content with swallow_outputs(): repo.add_url_to_file('d1/1.tar.gz', opj(url, 'd1', '1.tar.gz')) repo.commit("added 1.tar.gz in d1") def d2_basic_checks(): ok_(exists('1')) ok_file_under_git('1', '2 f.txt', annexed=True) ok_file_under_git(opj('1', 'd2', '2d'), annexed=True) ok_archives_caches(repo.path, 0) add_archive_content('1.tar.gz', dataset=ds.path) d2_basic_checks() # in manual tests ran into the situation of inability to obtain on a single run # a file from an archive which was coming from a dropped key. I thought it was # tested in custom remote tests, but I guess not sufficiently well enough repo.drop(opj('1', '1 f.txt')) # should be all kosher repo.get(opj('1', '1 f.txt')) ok_archives_caches(repo.path, 1, persistent=True) ok_archives_caches(repo.path, 0, persistent=False) repo.drop(opj('1', '1 f.txt')) # should be all kosher repo.drop(key_1tar, key=True) # is available from the URL -- should be kosher repo.get(opj('1', '1 f.txt')) # that what managed to not work # TODO: check if persistent archive is there for the 1.tar.gz # We should be able to drop everything since available online with swallow_outputs(): clean(dataset=ds) repo.drop(key_1tar, key=True) # is available from the URL -- should be kosher ds.drop(opj('1', '1 f.txt')) # should be all kosher ds.get(opj('1', '1 f.txt')) # and should be able to get it again # bug was that dropping didn't work since archive was dropped first repo.call_annex(["drop", "--all"]) # verify that we can't drop a file if archive key was dropped and online archive was removed or changed size! ;) repo.get(key_1tar, key=True) unlink(opj(path_orig, '1.tar.gz')) with assert_raises(CommandError) as e: repo.drop(key_1tar, key=True) assert_equal(e.kwargs['stdout_json'][0]['success'], False) assert_result_values_cond( e.kwargs['stdout_json'], 'note', lambda x: '(Use --force to override this check, or adjust numcopies.)' in x) assert exists(opj(repo.path, repo.get_contentlocation(key_1tar)))