def test_show_inputs(tmpdir_factory, project, runner, run): """Test show inputs with submodules.""" second_project = Path(str(tmpdir_factory.mktemp('second_project'))) assert 0 == run(args=('init', str(second_project))) woop = second_project / 'woop' with woop.open('w') as fp: fp.write('woop') second_repo = git.Repo(str(second_project)) second_repo.git.add('--all') second_repo.index.commit('Added woop file') assert 0 == run(args=('dataset', 'create', 'foo')) assert 0 == run(args=('dataset', 'add', 'foo', str(woop))) imported_woop = Path(project) / 'data' / 'foo' / woop.name assert imported_woop.exists() woop_wc = Path(project) / 'woop.wc' assert 0 == run(args=('run', 'wc'), stdin=imported_woop, stdout=woop_wc) result = runner.invoke(cli.cli, ['show', 'inputs'], catch_exceptions=False) assert {str(imported_woop.resolve().relative_to(Path(project).resolve())) } == set(result.output.strip().split('\n'))
def add_file(self, path, revision='HEAD'): """Add a file node to the graph.""" file_commits = list(self.client.git.iter_commits(revision, paths=path)) if not file_commits: raise KeyError('Could not find a file {0} in range {1}'.format( path, revision)) commit = file_commits[0] cwl = self.find_cwl(commit) if cwl is not None: file_key = self.add_node(commit, path) self.add_tool(commit, cwl, file_key=file_key) return file_key else: #: Does not have a parent CWL. root_node = self.add_node(commit, path) parent_commit, parent_path = root_node #: Capture information about the submodule in a submodule. root_submodule = self.G.nodes[root_node].get('submodule', []) #: Resolve Renku based submodules. original_path = Path(parent_path) if original_path.is_symlink() or str(original_path).startswith( '.renku/vendors'): original_path = original_path.resolve() for submodule in Submodule.iter_items( self.client.git, parent_commit=parent_commit): try: subpath = original_path.relative_to( Path(submodule.path).resolve()) subgraph = Graph(client=LocalClient( path=submodule.path)) subnode = subgraph.add_file(str(subpath), revision=submodule.hexsha) #: Extend node metadata. for _, data in subgraph.G.nodes(data=True): data['submodule'] = root_submodule + [ submodule.name ] #: Merge file node with it's symlinked version. self.G = nx.contracted_nodes( nx.compose(self.G, subgraph.G), root_node, subnode, ) # TODO optionally it can be changed to an edge. break except ValueError: continue return root_node
def file_candidate(self, candidate, ignore=None): """Return a path instance if it exists in current directory.""" if ignore and candidate in ignore: return candidate = Path(candidate) if not candidate.is_absolute(): candidate = self.directory / candidate if candidate.exists(): return candidate.resolve()
def move(ctx, client, sources, destination): """Move files and check repository for potential problems.""" from renku.api._git import _expand_directories dst = Path(destination) def fmt_path(path): """Format path as relative to the client path.""" return str(Path(path).absolute().relative_to(client.path)) files = { fmt_path(source): fmt_path(file_or_dir) for file_or_dir in sources for source in _expand_directories((file_or_dir, )) } def fmt_dst(path): """Build a destination path for a source path.""" return str(dst / os.path.relpath(path, start=files[path])) destinations = {source: fmt_dst(source) for source in files} # 1. Check .gitignore. ignored = client.find_ignored_paths(*destinations.values()) if ignored: click.echo(WARNING + 'Renamed files match .gitignore.\n') if click.confirm('Do you want to edit ".gitignore" now?', default=False): click.edit(filename=str(client.path / '.gitignore')) # 2. Update dataset metadata files. with progressbar( client.datasets.items(), item_show_func=lambda item: str(item[1].short_id) if item else '', label='Updating dataset metadata', width=0, ) as bar: for (path, dataset) in bar: renames = {} for file_ in dataset.files: filepath = fmt_path(file_.path) if filepath in files: renames[file_.path] = destinations[filepath] if renames: dataset = dataset.rename_files( lambda key: renames.get(key, key)) dataset.to_yaml() # 3. Manage .gitattributes for external storage. tracked = tuple() if client.has_external_storage: tracked = tuple(path for path, attr in client.find_attr(*files).items() if attr.get('filter') == 'lfs') client.untrack_paths_from_storage(*tracked) if client.find_attr(*tracked): click.echo(WARNING + 'There are custom .gitattributes.\n') if click.confirm('Do you want to edit ".gitattributes" now?', default=False): click.edit(filename=str(client.path / '.gitattributes')) if tracked and client.has_external_storage: client.track_paths_in_storage(*(destinations[path] for path in tracked)) # 4. Handle symlinks. dst.parent.mkdir(parents=True, exist_ok=True) for source, target in destinations.items(): src = Path(source) if src.is_symlink(): Path(target).parent.mkdir(parents=True, exist_ok=True) Path(target).symlink_to( os.path.relpath(str(src.resolve()), start=os.path.dirname(target))) src.unlink() del files[source] # Finally move the files. final_sources = list(set(files.values())) if final_sources: run(['git', 'mv'] + final_sources + [destination], check=True)
def _add_from_git(self, dataset, path, url, target, **kwargs): """Process adding resources from another git repository. The submodules are placed in ``.renku/vendors`` and linked to the *path* specified by the user. """ # create the submodule u = parse.urlparse(url) submodule_path = self.renku_path / 'vendors' / (u.netloc or 'local') # Respect the directory struture inside the source path. relative_to = kwargs.get('relative_to', None) if u.scheme in ('', 'file'): warnings.warn('Importing local git repository, use HTTPS') # determine where is the base repo path r = git.Repo(url, search_parent_directories=True) src_repo_path = Path(r.git_dir).parent submodule_name = os.path.basename(src_repo_path) submodule_path = submodule_path / str(src_repo_path).lstrip('/') # if repo path is a parent, rebase the paths and update url if src_repo_path != Path(u.path): top_target = Path( u.path ).resolve().absolute().relative_to(src_repo_path) if target: target = top_target / target else: target = top_target url = src_repo_path.as_posix() elif u.scheme in ('http', 'https'): submodule_name = os.path.splitext(os.path.basename(u.path))[0] submodule_path = submodule_path.joinpath( os.path.dirname(u.path).lstrip('/'), submodule_name ) else: raise NotImplementedError( 'Scheme {} not supported'.format(u.scheme) ) # FIXME: do a proper check that the repos are not the same if submodule_name not in (s.name for s in self.git.submodules): # new submodule to add self.git.create_submodule( name=submodule_name, path=submodule_path.as_posix(), url=url ) src = submodule_path / (target or '') if target and relative_to: relative_to = Path(relative_to) if relative_to.is_absolute(): assert u.scheme in { '', 'file' }, ('Only relative paths can be used with URLs.') target = (Path(url).resolve().absolute() / target).relative_to( relative_to.resolve() ) else: # src already includes target so we do not have to append it target = src.relative_to(submodule_path / relative_to) # link the target into the data directory dst = self.path / path / submodule_name / (target or '') # if we have a directory, recurse if src.is_dir(): files = {} dst.mkdir(parents=True, exist_ok=True) # FIXME get all files from submodule index for f in src.iterdir(): try: files.update( self._add_from_git( dataset, path, url, target=f.relative_to(submodule_path), **kwargs ) ) except ValueError: pass # skip files outside the relative path return files if not dst.parent.exists(): dst.parent.mkdir(parents=True) os.symlink(os.path.relpath(src, dst.parent), dst) # grab all the authors from the commit history git_repo = git.Repo(submodule_path.absolute().as_posix()) authors = [] for commit in git_repo.iter_commits(paths=target): author = Author.from_commit(commit) if author not in authors: authors.append(author) dataset_path = self.path / self.datadir / dataset.name result = dst.relative_to(dataset_path).as_posix() if u.scheme in ('', 'file'): url = None else: url = '{}/{}'.format(url, target) return { result: DatasetFile( path=result, url=url, authors=authors, dataset=dataset.name, # TODO detect original dataset ) }
def _add_from_git(self, dataset, path, url, target, **kwargs): """Process adding resources from another git repository. The submodules are placed in ``.renku/vendors`` and linked to the *path* specified by the user. """ from git import Repo # create the submodule if url.startswith('git@'): url = 'git+ssh://' + url u = parse.urlparse(url) submodule_path = self.renku_path / 'vendors' / (u.netloc or 'local') # Respect the directory struture inside the source path. relative_to = kwargs.get('relative_to', None) if u.scheme in ('', 'file'): try: relative_url = Path(url).resolve().relative_to(self.path) except Exception: relative_url = None if relative_url: return [{ 'path': url, 'url': url, 'creator': dataset.creator, 'dataset': dataset.name, 'parent': self }] warnings.warn('Importing local git repository, use HTTPS') # determine where is the base repo path r = Repo(url, search_parent_directories=True) src_repo_path = Path(r.git_dir).parent.resolve() submodule_name = src_repo_path.name submodule_path = submodule_path / str(src_repo_path).lstrip('/') # if repo path is a parent, rebase the paths and update url if src_repo_path != Path(u.path): top_target = Path( u.path ).resolve().absolute().relative_to(src_repo_path) if target: target = top_target / target else: target = top_target url = src_repo_path.as_posix() elif u.scheme in {'http', 'https', 'git+https', 'git+ssh'}: submodule_name = os.path.splitext(os.path.basename(u.path))[0] submodule_path = submodule_path.joinpath( os.path.dirname(u.path).lstrip('/'), submodule_name ) else: raise NotImplementedError( 'Scheme {} not supported'.format(u.scheme) ) # FIXME: do a proper check that the repos are not the same if submodule_name not in (s.name for s in self.repo.submodules): if u.scheme in {'http', 'https', 'git+https', 'git+ssh'}: url = self.get_relative_url(url) # Submodule in python git does some custom magic that does not # allow for relative URLs, so we call the git function directly self.repo.git.submodule([ 'add', '--force', '--name', submodule_name, url, submodule_path.relative_to(self.path).as_posix() ]) src = submodule_path / (target or '') if target and relative_to: relative_to = Path(relative_to) if relative_to.is_absolute(): assert u.scheme in { '', 'file' }, 'Only relative paths can be used with URLs.' target = (Path(url).resolve().absolute() / target).relative_to( relative_to.resolve() ) else: # src already includes target so we do not have to append it target = src.relative_to(submodule_path / relative_to) # link the target into the data directory dst = self.path / path / (target or '') # if we have a directory, recurse if src.is_dir(): files = [] dst.mkdir(parents=True, exist_ok=True) # FIXME get all files from submodule index for f in src.iterdir(): try: files.extend( self._add_from_git( dataset, path, url, target=f.relative_to(submodule_path), **kwargs ) ) except ValueError: pass # skip files outside the relative path return files if not dst.parent.exists(): dst.parent.mkdir(parents=True) os.symlink(os.path.relpath(str(src), str(dst.parent)), str(dst)) # grab all the creators from the commit history git_repo = Repo(str(submodule_path.absolute())) creators = [] for commit in git_repo.iter_commits(paths=target): creator = Creator.from_commit(commit) if creator not in creators: creators.append(creator) if u.scheme in ('', 'file'): url = None else: url = '{}/{}'.format(url, target) return [{ 'path': dst.relative_to(self.path), 'url': url, 'creator': creators, 'dataset': dataset.name, 'parent': self }]
def full_path(self): """Return full path in the current reference frame.""" path = Path(self.path) if self.client: return (self.client.path / path).resolve() return path.resolve()