def test_clone_simple_local(src, path): origin = Dataset(path) # now install it somewhere else ds = clone(src, path, description='mydummy', result_xfm='datasets', return_type='item-or-list') eq_(ds.path, path) ok_(ds.is_installed()) if not isinstance(origin.repo, AnnexRepo): # this means it is a GitRepo ok_(isinstance(origin.repo, GitRepo)) # stays plain Git repo ok_(isinstance(ds.repo, GitRepo)) ok_(not isinstance(ds.repo, AnnexRepo)) ok_(GitRepo.is_valid_repo(ds.path)) eq_(set(ds.repo.get_indexed_files()), {'test.dat', 'INFO.txt'}) ok_clean_git(path, annex=False) else: # must be an annex ok_(isinstance(ds.repo, AnnexRepo)) ok_(AnnexRepo.is_valid_repo(ds.path, allow_noninitialized=False)) eq_(set(ds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) ok_clean_git(path, annex=True) # no content was installed: ok_(not ds.repo.file_has_content('test-annex.dat')) uuid_before = ds.repo.uuid eq_(ds.repo.get_description(), 'mydummy') # installing it again, shouldn't matter: res = clone(src, path) assert_result_values_equal(res, 'source_url', [src]) assert_status('notneeded', res) assert_message("dataset %s was already cloned from '%s'", res) ok_(ds.is_installed()) if isinstance(origin.repo, AnnexRepo): eq_(uuid_before, ds.repo.uuid)
def test_clone_simple_local(src, path): origin = Dataset(path) # now install it somewhere else ds = clone(src, path, description='mydummy', result_xfm='datasets', return_type='item-or-list') eq_(ds.path, path) ok_(ds.is_installed()) if not isinstance(origin.repo, AnnexRepo): # this means it is a GitRepo ok_(isinstance(origin.repo, GitRepo)) # stays plain Git repo ok_(isinstance(ds.repo, GitRepo)) ok_(not isinstance(ds.repo, AnnexRepo)) ok_(GitRepo.is_valid_repo(ds.path)) eq_(set(ds.repo.get_indexed_files()), {'test.dat', 'INFO.txt'}) ok_clean_git(path, annex=False) else: # must be an annex ok_(isinstance(ds.repo, AnnexRepo)) ok_(AnnexRepo.is_valid_repo(ds.path, allow_noninitialized=False)) eq_(set(ds.repo.get_indexed_files()), {'test.dat', 'INFO.txt', 'test-annex.dat'}) ok_clean_git(path, annex=True) # no content was installed: ok_(not ds.repo.file_has_content('test-annex.dat')) uuid_before = ds.repo.uuid eq_(ds.repo.get_description(), 'mydummy') # installing it again, shouldn't matter: res = clone(src, path) assert_result_values_equal(res, 'source_url', [src]) assert_status('notneeded', res) assert_message("dataset %s was already cloned from '%s'", res) ok_(ds.is_installed()) if isinstance(origin.repo, AnnexRepo): eq_(uuid_before, ds.repo.uuid)
def test_recursive_save(path): ds = Dataset(path).create() # nothing to save assert_status('notneeded', ds.save()) subds = ds.create('sub') # subdataset presence already saved ok_clean_git(ds.path) subsubds = subds.create('subsub') assert_equal( ds.subdatasets(recursive=True, fulfilled=True, result_xfm='paths'), [subds.path, subsubds.path]) newfile_name = opj(subsubds.path, 'test') with open(newfile_name, 'w') as f: f.write('some') # saves the status change of the subdataset due to the subsubdataset addition assert_result_values_equal( ds.save(result_filter=is_ok_dataset), 'path', [ds.path]) # make the new file known to its dataset ds.add(newfile_name, save=False) # but remains dirty because of the uncommited file down below assert ds.repo.dirty # auto-add will save nothing deep down without recursive assert_status('notneeded', ds.save()) assert ds.repo.dirty # with recursive pick up the change in subsubds assert_result_values_equal( ds.save(recursive=True, result_filter=is_ok_dataset), 'path', [subsubds.path, subds.path, ds.path]) # at this point the entire tree is clean ok_clean_git(ds.path) states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)] # now we save recursively, nothing should happen res = ds.save(recursive=True) # we do not get any report from a subdataset, because we detect at the # very top that the entire tree is clean assert_result_count(res, 1) assert_result_count(res, 1, status='notneeded', action='save', path=ds.path) # now we introduce new files all the way down create_tree(subsubds.path, {"mike1": 'mike1'}) # because we cannot say from the top if there is anything to do down below, # we have to traverse and we will get reports for all dataset, but there is # nothing actually saved res = ds.save(recursive=True) assert_result_count(res, 3) assert_status('notneeded', res) subsubds_indexed = subsubds.repo.get_indexed_files() assert_not_in('mike1', subsubds_indexed) assert_equal(states, [d.repo.get_hexsha() for d in (ds, subds, subsubds)]) unlink(opj(subsubds.path, 'mike1')) ok_clean_git(ds.path) # modify content in subsub and try saving testfname = newfile_name subsubds.unlock(testfname) with open(opj(ds.path, testfname), 'w') as f: f.write('I am in here!') # the following should all do nothing # no auto_add assert_status('notneeded', ds.save()) # no recursive assert_status('notneeded', ds.save()) # an explicit target saves only the corresponding dataset assert_result_values_equal( save(path=[testfname]), 'path', [subsubds.path]) # plain recursive without any files given will save the beast assert_result_values_equal( ds.save(recursive=True, result_filter=is_ok_dataset), 'path', [subds.path, ds.path]) # there is nothing else to save assert_status('notneeded', ds.save(recursive=True)) ok_clean_git(ds.path) # one more time and check that all datasets in the hierarchy are not # contaminated with untracked files states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)] testfname = opj('sub', 'subsub', 'saveme2') with open(opj(ds.path, testfname), 'w') as f: f.write('I am in here!') assert_status('notneeded', ds.save(recursive=True)) newstates = [d.repo.get_hexsha() for d in (ds, subds, subsubds)] for old, new in zip(states, newstates): assert_equal(old, new) assert ds.repo.dirty unlink(opj(ds.path, testfname)) ok_clean_git(ds.path) # now let's check saving "upwards" create_tree(subds.path, {"testnew": 'smth', "testadded": "added"}) subds.repo.add("testadded") indexed_files = subds.repo.get_indexed_files() assert subds.repo.dirty assert ds.repo.dirty assert not subsubds.repo.dirty create_tree(subsubds.path, {"testnew2": 'smth'}) assert subsubds.repo.dirty # and indexed files didn't change assert_equal(indexed_files, subds.repo.get_indexed_files()) ok_clean_git(subds.repo, untracked=['testnew'], index_modified=['subsub'], head_modified=['testadded']) old_states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)] subsubds.save(message="savingtestmessage", super_datasets=True) # this save actually didn't save anything in subsub (or anywhere), # because there were only untracked bits pending for old, new in zip(old_states, [d.repo.get_hexsha() for d in (ds, subds, subsubds)]): assert_equal(old, new) # but now we are saving this untracked bit specifically subsubds.save(message="savingtestmessage", path=['testnew2'], super_datasets=True) ok_clean_git(subsubds.repo) # but its super should have got only the subsub saved # not the file we created ok_clean_git(subds.repo, untracked=['testnew'], head_modified=['testadded']) # check commits to have correct messages # there are no more dedicated superdataset-save commits anymore, because # superdatasets get saved as part of the processed hierarchy and can contain # other parts in the commit (if so instructed) assert_equal(next(subsubds.repo.get_branch_commits('master')).message.rstrip(), 'savingtestmessage') assert_equal(next(subds.repo.get_branch_commits('master')).message.rstrip(), 'savingtestmessage') assert_equal(next(ds.repo.get_branch_commits('master')).message.rstrip(), 'savingtestmessage') # and if we try to save while being within that subsubds path subsubds.unlock('testnew2') create_tree(subsubds.path, {"testnew2": 'smth2'}) # trying to replicate https://github.com/datalad/datalad/issues/1540 subsubds.save(message="saving new changes", all_updated=True) # no super with chpwd(subds.path): # no explicit dataset is provided by path is provided save(path=['subsub'], message='saving sub', super_datasets=True) # super should get it saved too assert_equal(next(ds.repo.get_branch_commits('master')).message.rstrip(), 'saving sub')
def test_recursive_save(path): ds = Dataset(path).create() # nothing to save assert_status('notneeded', ds._save()) subds = ds.create('sub') # subdataset presence already saved ok_clean_git(ds.path) subsubds = subds.create('subsub') assert_equal( ds.subdatasets(recursive=True, fulfilled=True, result_xfm='paths'), [subds.path, subsubds.path]) newfile_name = opj(subsubds.path, 'test') with open(newfile_name, 'w') as f: f.write('some') # saves the status change of the subdataset due to the subsubdataset addition assert_result_values_equal( ds._save(result_filter=is_ok_dataset), 'path', [ds.path]) # make the new file known to its dataset ds.add(newfile_name, save=False) # but remains dirty because of the uncommited file down below assert ds.repo.dirty # auto-add will save nothing deep down without recursive assert_status('notneeded', ds._save()) assert ds.repo.dirty # with recursive pick up the change in subsubds assert_result_values_equal( ds._save(recursive=True, result_filter=is_ok_dataset), 'path', [subsubds.path, subds.path, ds.path]) # at this point the entire tree is clean ok_clean_git(ds.path) states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)] # now we save recursively, nothing should happen res = ds._save(recursive=True) # we do not get any report from a subdataset, because we detect at the # very top that the entire tree is clean assert_result_count(res, 1) assert_result_count(res, 1, status='notneeded', action='save', path=ds.path) # now we introduce new files all the way down create_tree(subsubds.path, {"mike1": 'mike1'}) # because we cannot say from the top if there is anything to do down below, # we have to traverse and we will get reports for all dataset, but there is # nothing actually saved res = ds._save(recursive=True) assert_result_count(res, 3) assert_status('notneeded', res) subsubds_indexed = subsubds.repo.get_indexed_files() assert_not_in('mike1', subsubds_indexed) assert_equal(states, [d.repo.get_hexsha() for d in (ds, subds, subsubds)]) unlink(opj(subsubds.path, 'mike1')) ok_clean_git(ds.path) # modify content in subsub and try saving testfname = newfile_name subsubds.unlock(testfname) with open(opj(ds.path, testfname), 'w') as f: f.write('I am in here!') # the following should all do nothing # no auto_add assert_status('notneeded', ds._save()) # no recursive assert_status('notneeded', ds._save()) # an explicit target saves only the corresponding dataset assert_result_values_equal( save(path=[testfname]), 'path', [subsubds.path]) # plain recursive without any files given will save the beast assert_result_values_equal( ds._save(recursive=True, result_filter=is_ok_dataset), 'path', [subds.path, ds.path]) # there is nothing else to save assert_status('notneeded', ds._save(recursive=True)) ok_clean_git(ds.path) # one more time and check that all datasets in the hierarchy are not # contaminated with untracked files states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)] testfname = opj('sub', 'subsub', 'saveme2') with open(opj(ds.path, testfname), 'w') as f: f.write('I am in here!') assert_status('notneeded', ds._save(recursive=True)) newstates = [d.repo.get_hexsha() for d in (ds, subds, subsubds)] for old, new in zip(states, newstates): assert_equal(old, new) assert ds.repo.dirty unlink(opj(ds.path, testfname)) ok_clean_git(ds.path) # now let's check saving "upwards" create_tree(subds.path, {"testnew": 'smth', "testadded": "added"}) subds.repo.add("testadded") indexed_files = subds.repo.get_indexed_files() assert subds.repo.dirty assert ds.repo.dirty assert not subsubds.repo.dirty create_tree(subsubds.path, {"testnew2": 'smth'}) assert subsubds.repo.dirty # and indexed files didn't change assert_equal(indexed_files, subds.repo.get_indexed_files()) ok_clean_git(subds.repo, untracked=['testnew'], index_modified=['subsub'], head_modified=['testadded']) old_states = [d.repo.get_hexsha() for d in (ds, subds, subsubds)] subsubds._save(message="savingtestmessage", super_datasets=True) # this save actually didn't save anything in subsub (or anywhere), # because there were only untracked bits pending for old, new in zip(old_states, [d.repo.get_hexsha() for d in (ds, subds, subsubds)]): assert_equal(old, new) # but now we are saving this untracked bit specifically subsubds._save(message="savingtestmessage", path=['testnew2'], super_datasets=True) ok_clean_git(subsubds.repo) # but its super should have got only the subsub saved # not the file we created ok_clean_git(subds.repo, untracked=['testnew'], head_modified=['testadded']) # check commits to have correct messages # there are no more dedicated superdataset-save commits anymore, because # superdatasets get saved as part of the processed hierarchy and can contain # other parts in the commit (if so instructed) assert_equal(next(subsubds.repo.get_branch_commits('master')).message.rstrip(), 'savingtestmessage') assert_equal(next(subds.repo.get_branch_commits('master')).message.rstrip(), 'savingtestmessage') assert_equal(next(ds.repo.get_branch_commits('master')).message.rstrip(), 'savingtestmessage') # and if we try to save while being within that subsubds path subsubds.unlock('testnew2') create_tree(subsubds.path, {"testnew2": 'smth2'}) # trying to replicate https://github.com/datalad/datalad/issues/1540 subsubds._save(message="saving new changes", all_updated=True) # no super with chpwd(subds.path): # no explicit dataset is provided by path is provided save(path=['subsub'], message='saving sub', super_datasets=True) # super should get it saved too assert_equal(next(ds.repo.get_branch_commits('master')).message.rstrip(), 'saving sub')