def validate_datalad_config(store, dataset): """Add a .datalad/config file if one does not exist.""" dataset_path = store.get_dataset_path(dataset) try: git_show(dataset_path, 'HEAD', '.datalad/config') except KeyError: create_datalad_config(dataset_path) commit_files(store, dataset, ['.datalad/config'])
def on_get(self, req, resp, dataset, filename=None, snapshot='HEAD'): ds_path = self.store.get_dataset_path(dataset) if filename: try: ds = self.store.get_dataset(dataset) if ds.repo.is_under_annex([filename])[0]: path = git_show(ds_path, snapshot + ':' + filename) # remove leading relative folder paths fd = path[path.find('.git/annex'):] # if fd fails, that means the file is not present in the annex and we need to get it from s3 # so we send the client a 404 to indicate the file was not found locally. fd = open(os.path.join(ds_path, fd), 'rb') resp.stream = fd resp.stream_len = os.fstat(fd.fileno()).st_size resp.status = falcon.HTTP_OK else: resp.body = git_show(ds_path, snapshot + ':' + filename) resp.status = falcon.HTTP_OK except CalledProcessError: # File is not present in tree resp.media = {'error': 'file not found in git tree'} resp.status = falcon.HTTP_NOT_FOUND except IOError: # File is not kept locally resp.media = {'error': 'file not found'} resp.status = falcon.HTTP_NOT_FOUND except: # Some unknown error resp.media = { 'error': 'an unknown error occurred accessing this file' } resp.status = falcon.HTTP_INTERNAL_SERVER_ERROR self.logger.exception( 'An unknown error processing file "{}"'.format(filename)) else: # Request for index of files # Return a list of file objects # {name, path, size} try: if "untracked" in req.params: files = get_untracked_files(self.store, dataset) resp.media = {'files': files} else: files = get_files(self.store, dataset, snapshot) resp.media = {'files': files} except: resp.status = falcon.HTTP_INTERNAL_SERVER_ERROR
def update_description(store, dataset, description_fields, name=None, email=None): ds = store.get_dataset(dataset) description = git_show(ds.path, 'HEAD:dataset_description.json') description_json = json.loads(description) if description_json.get('License') != 'CC0': description_fields = edit_description(description_fields, {'License': 'CC0'}) if description_fields is not None and any(description_fields): updated = edit_description(description_json, description_fields) path = os.path.join(store.get_dataset_path(dataset), 'dataset_description.json') with open(path, 'r+', encoding='utf-8') as description_file: description_file_contents = description_file.read() if description != description_file_contents: raise Exception('unexpected dataset_description.json contents') description_file.seek(0) description_file.truncate(0) description_file.write(json.dumps(updated, indent=4)) # Commit new content, run validator commit_files(store, dataset, ['dataset_description.json']) return updated else: return description_json
def check_remote_has_version(dataset_path, remote, tag): try: info = get_tag_info(dataset_path, tag) remotes = info.get('repositories containing these files', []) remote_repo = [ r for r in remotes if r.get('description') == f'[{remote}]' ] remote_id_A = remote_repo and remote_repo[0].get('uuid') # extract remote uuid and associated git tree id from `git show git-annex:export.log` # this command only logs the latest export. previously exported tags will not show export_log = git_show(dataset_path, 'git-annex', 'export.log') log_remote_id_pattern = re.compile(':(.+) .+$') match = log_remote_id_pattern.search(export_log) remote_id_B = match.group(1) log_tree_id_pattern = re.compile('.* (.+)$') match = log_tree_id_pattern.search(export_log) tree_id_A = match.group(1) # extract git tree id of <tag> from git reference repo = pygit2.Repository(dataset_path) tree_id_B = git_tag_tree(repo, tag) except AttributeError: return False # if the remote uuids and tree ids exist and match, then # <tag> is the latest export to <remote> return remote_id_A == remote_id_B and tree_id_A == tree_id_B
def test_write_new_changes(datalad_store, new_dataset): ds_id = os.path.basename(new_dataset.path) write_new_changes(new_dataset, '1.0.1', ['Some changes'], '2019-01-01') # Manually make the commit without validation new_dataset.save('CHANGES') # Get a fresh dataset object and verify correct CHANGES dataset = Dataset(os.path.join(datalad_store.annex_path, ds_id)) assert not dataset.repo.dirty assert git_show(dataset.path, 'HEAD:CHANGES') == '''1.0.1 2019-01-01
def on_get(self, req, resp, dataset, filename=None, snapshot='HEAD'): ds_path = self.store.get_dataset_path(dataset) if filename: try: file_content = git_show(ds_path, snapshot, filename) # If the file begins with an annex path, return that path if file_content[0:4096].find('.git/annex') != -1: # Resolve absolute path for annex target target_path = os.path.join(ds_path, os.path.dirname(filename), file_content) # Verify the annex path is within the dataset dir if ds_path == os.path.commonpath((ds_path, target_path)): fd = open(target_path, 'rb') resp.stream = fd resp.stream_len = os.fstat(fd.fileno()).st_size resp.status = falcon.HTTP_OK else: resp.media = {'error': 'file not found in git tree'} resp.status = falcon.HTTP_NOT_FOUND else: resp.body = file_content resp.status = falcon.HTTP_OK except KeyError: # File is not present in tree resp.media = {'error': 'file not found in git tree'} resp.status = falcon.HTTP_NOT_FOUND except IOError: # File is not kept locally resp.media = {'error': 'file not found'} resp.status = falcon.HTTP_NOT_FOUND except: # Some unknown error resp.media = { 'error': 'an unknown error occurred accessing this file' } resp.status = falcon.HTTP_INTERNAL_SERVER_ERROR self.logger.exception( 'An unknown error processing file "{}"'.format(filename)) else: # Request for index of files # Return a list of file objects # {name, path, size} try: files = get_files(self.store, dataset, snapshot) resp.media = {'files': files} except: resp.status = falcon.HTTP_INTERNAL_SERVER_ERROR
def test_git_show(new_dataset): assert git.git_show(new_dataset.path, 'HEAD', 'dataset_description.json') == '{"BIDSVersion": "1.0.2", "License": "This is not a real dataset", "Name": "Test fixture new dataset"}'
def get_head_changes(dataset_path): try: return git_show(dataset_path, 'HEAD', 'CHANGES') except KeyError: return None
def get_head_changes(ds): try: return git_show(ds.path, 'HEAD:CHANGES') except CalledProcessError: return None