def __attrs_post_init__(self): """Initialize computed attributes.""" if not self.creator and self.client: if self.client.renku_metadata_path.exists(): self.creator = Person.from_commit( self.client.find_previous_commit( self.client.renku_metadata_path, return_first=True), ) else: # this assumes the project is being newly created self.creator = Person.from_git(self.client.repo) self._id = self.project_id
def __attrs_post_init__(self): """Initialize computed attributes.""" if not self.creator and self.client: if self.client.renku_metadata_path.exists(): self.creator = Person.from_commit( self.client.find_previous_commit(self.client.renku_metadata_path, return_first=True), ) else: # this assumes the project is being newly created self.creator = Person.from_git(self.client.repo) try: self._id = self.project_id except ValueError: """Fallback to old behaviour.""" if self._id: pass elif self.client and self.client.is_project_set(): self._id = self.client.project._id else: raise
def update_dataset_files(self, files, ref, delete=False): """Update files and dataset metadata according to their remotes. :param files: List of files to be updated :param delete: Indicates whether to delete files or not :return: List of files that should be deleted """ from renku import LocalClient visited_repos = {} updated_files = [] deleted_files = [] for file_ in files: if not file_.based_on: continue file_.based_on = DatasetFile.from_jsonld(file_.based_on) based_on = file_.based_on url = based_on.url if url in visited_repos: repo, repo_path, remote_client = visited_repos[url] else: repo, repo_path = self.prepare_git_repo(url, ref) remote_client = LocalClient(repo_path) visited_repos[url] = repo, repo_path, remote_client remote_file = self._fetch_file_metadata(remote_client, based_on.path) if not remote_file: try: remote_file = DatasetFile.from_revision( remote_client, path=based_on.path, url=url, added=based_on.added) except KeyError: raise errors.ParameterError( 'Cannot find file {} in the repo {}'.format( based_on.url, url)) commit_sha = self._get_commit_sha_from_label(based_on) remote_commit_sha = self._get_commit_sha_from_label(remote_file) if commit_sha != remote_commit_sha: src = Path(repo.working_dir) / based_on.path dst = self.renku_path.parent / file_.path if src.exists(): # Fetch file is it is tracked by Git LFS self._fetch_lfs_files(repo_path, {based_on.path}) if remote_client._is_external_file(src): self.remove_file(dst) self._create_external_file(src.resolve(), dst) else: shutil.copy(src, dst) file_.based_on.commit = remote_file.commit file_.based_on._label = remote_file._label updated_files.append(file_) else: # File was removed or renamed if delete: self.remove_file(dst) deleted_files.append(file_) if not updated_files and (not delete or not deleted_files): # Nothing to commit or update return deleted_files # Commit changes in files file_paths = {str(f.path) for f in updated_files + deleted_files} # Force-add to include possible ignored files that are in datasets self.repo.git.add(*(file_paths), force=True) self.repo.index.commit( 'renku dataset: updated {} files and deleted {} files'.format( len(updated_files), len(deleted_files))) # Update datasets' metadata modified_datasets = {} for file_ in updated_files: # Re-create list of creators creators = [] # grab all the creators from the commit history for commit in repo.iter_commits(paths=file_.path): creator = Person.from_commit(commit) if creator not in creators: creators.append(creator) new_file = DatasetFile.from_revision(self, path=file_.path, based_on=file_.based_on, creator=creators) file_.dataset.update_files([new_file]) modified_datasets[file_.dataset.name] = file_.dataset if delete: for file_ in deleted_files: file_.dataset.unlink_file(file_.path) modified_datasets[file_.dataset.name] = file_.dataset for dataset in modified_datasets.values(): dataset.to_yaml() return deleted_files
def _add_from_git(self, dataset, url, sources, destination, ref): """Process adding resources from another git repository.""" from renku import LocalClient u = parse.urlparse(url) sources = self._resolve_paths(u.path, sources) # Get all files from repo that match sources repo, repo_path = self.prepare_git_repo(url, ref) files = set() used_sources = set() for file in repo.head.commit.tree.traverse(): path = file.path result = self._get_src_and_dst(path, repo_path, sources, destination, used_sources) if result: files.add(result) unused_sources = set(sources.keys()) - used_sources if unused_sources: unused_sources = {str(s) for s in unused_sources} raise errors.ParameterError('No such file or directory', param_hint=unused_sources) if destination.exists() and not destination.is_dir(): if len(files) > 1: raise errors.ParameterError( 'Cannot copy multiple files or directories to a file') # Create metadata and move files to dataset results = [] remote_client = LocalClient(repo_path) # Pull files from LFS paths = set() for path, src, _ in files: if src.is_dir(): continue if src.is_symlink(): try: path = str(src.resolve().relative_to(repo_path)) except ValueError: # External file pass paths.add(path) self._fetch_lfs_files(repo_path, paths) # Fetch metadata from Renku if any paths = {f[0] for f in files} metadata = self._fetch_files_metadata(remote_client, paths) for path, src, dst in files: if not src.is_dir(): # Use original metadata if it exists based_on = metadata.get(path) if based_on: based_on.url = url based_on.based_on = None creators = based_on.creator else: creators = [] # grab all the creators from the commit history for commit in repo.iter_commits(paths=path): creator = Person.from_commit(commit) if creator not in creators: creators.append(creator) based_on = DatasetFile.from_revision(remote_client, path=path, url=url, creator=creators) path_in_dst_repo = dst.relative_to(self.path) if remote_client._is_external_file(src): operation = (src.resolve(), dst, 'symlink') else: operation = (src, dst, 'copy') results.append({ 'path': path_in_dst_repo, 'url': remove_credentials(url), 'creator': creators, 'parent': self, 'based_on': based_on, 'operation': operation }) return results
def _add_from_git(self, dataset, url, sources, destination, ref): """Process adding resources from another git repository.""" from renku import LocalClient u = parse.urlparse(url) sources = self._resolve_paths(u.path, sources) # Get all files from repo that match sources repo, repo_path = self._prepare_git_repo(url, ref) copied_sources = set() files = set() for file in repo.head.commit.tree.traverse(): path = file.path result = self._get_src_and_dst(path, repo_path, sources, destination) if result: files.add(result) source = result[3] copied_sources.add(source) uncopied_sources = sources - copied_sources if uncopied_sources: uncopied_sources = {str(s) for s in uncopied_sources} raise errors.ParameterError('No such file or directory', param_hint=uncopied_sources) # Create metadata and move files to dataset results = [] remote_client = LocalClient(repo_path) # Pull files from LFS paths = set() for path, src, _, __ in files: if src.is_dir(): continue if src.is_symlink(): path = str(src.resolve().relative_to(repo_path)) paths.add(path) self._fetch_lfs_files(repo_path, paths) # Fetch metadata from Renku if any paths = {f[0] for f in files} metadata = self._fetch_files_metadata(remote_client, paths) for path, src, dst, _ in files: if not src.is_dir(): # Use original metadata if it exists based_on = metadata.get(path) if based_on: based_on.url = url based_on.based_on = None creators = based_on.creator else: creators = [] # grab all the creators from the commit history for commit in repo.iter_commits(paths=path): creator = Person.from_commit(commit) if creator not in creators: creators.append(creator) based_on = DatasetFile.from_revision(remote_client, path=path, url=url) path_in_dst_repo = dst.relative_to(self.path) results.append({ 'path': path_in_dst_repo, 'url': remove_credentials(url), 'creator': creators, 'parent': self, 'based_on': based_on }) dst.parent.mkdir(parents=True, exist_ok=True) shutil.copy(str(src), str(dst)) return results