def find_explicit_outputs(self, starting_output_id): """Yield explicit output and changed command input parameter.""" inputs = { str(i.default.path.relative_to(self.working_dir)): i for i in self.inputs if i.type in PATH_OBJECTS } output_id = starting_output_id for path in self.explicit_outputs: if self.file_candidate(path) is None: raise errors.InvalidOutputPath( 'The output file or directory does not exist.' '\n\n\t' + click.style(str(path), fg='yellow') + '\n\n') output_path = str(path.relative_to(self.working_dir)) type = 'Directory' if path.is_dir() else 'File' if output_path in inputs: # change input type to note that it is also an output input = inputs[output_path] input = attr.evolve(input, type='string', default=output_path) yield (CommandOutputParameter( id='output_{0}'.format(output_id), type=type, outputBinding=dict(glob='$(inputs.{0})'.format(input.id))), input, output_path) else: yield (CommandOutputParameter( id='output_{0}'.format(output_id), type=type, outputBinding=dict(glob=str(output_path))), None, output_path) output_id += 1
def siblings(self, node): """Return siblings for a given node. The key is part of the result set, hence to check if the node has siblings you should check the lenght is greater than 1. """ parent = None if isinstance(node, Entity): parent_siblings = self.siblings(node.parent) - {node.parent} return set(node.parent.members) | parent_siblings elif isinstance(node, Generation): parent = node.activity elif isinstance(node, Usage): parent = self.activities[node.commit] elif isinstance(node, Process): return {node} if parent is None or not isinstance(parent, ProcessRun): raise errors.InvalidOutputPath( 'The file "{0}" was not created by a renku command. \n\n' 'Check the file history using: git log --follow "{0}"'.format( node.path)) return set(parent.generated)
def guess_outputs(self, paths): """Yield detected output and changed command input parameter.""" # TODO what to do with duplicate paths & inputs with same defauts paths = list(paths) tree = DirectoryTree.from_list(paths) input_candidates = {} conflicting_paths = {} for index, input in enumerate(self.inputs): # Convert input defaults to paths relative to working directory. if input.type not in PATH_OBJECTS: try: path = self.directory / str(input.default) input_path = path.resolve().relative_to(self.working_dir) except FileNotFoundError: continue else: input_path = input.default.path.relative_to(self.working_dir) if input_path.is_dir() and tree.get(input_path): # The directory might exist before running the script subpaths = { str(input_path / path) for path in tree.get(input_path, default=[]) } if input_path.resolve() not in self.explicit_outputs: content = { str(path) for path in input_path.rglob('*') if not path.is_dir() and path.name != '.gitkeep' } extra_paths = content - subpaths if extra_paths: raise errors.InvalidOutputPath( 'The output directory "{0}" is not empty. \n\n' 'Delete existing files before running the ' 'command:' '\n (use "git rm <file>..." to remove them ' 'first)' '\n\n'.format(input_path) + '\n'.join('\t' + click.style(path, fg='yellow') for path in extra_paths) + '\n\n' 'Once you have removed files that should be used ' 'as outputs,\n' 'you can safely rerun the previous command.') # Remove files from the input directory paths = [path for path in paths if path not in subpaths] # Include input path in the paths to check paths.append(str(input_path)) input_candidates[str(input_path)] = input elif input.type not in PATH_OBJECTS: # Input need to be changed if an output is detected input_candidates[str(input_path)] = input else: # Names that can not be outputs because they are already inputs conflicting_paths[str(input_path)] = (index, input) streams = { path for path in (getattr(self, name) for name in ('stdout', 'stderr')) if path is not None } # TODO group by a common prefix for position, path in enumerate(paths): if Path(path).resolve() in self.explicit_outputs: del paths[position] for position, path in enumerate(paths): candidate = self.file_candidate(self.working_dir / path) if candidate is None: raise ValueError('Path "{0}" does not exist.'.format(path)) glob = str(candidate.relative_to(self.working_dir)) if glob in streams: continue new_input = None if glob in conflicting_paths: # it means that it is rewriting a file index, input = conflicting_paths[glob] new_input = attr.evolve(input, type='string', default=glob) input_candidates[glob] = new_input del conflicting_paths[glob] # TODO add warning ('Output already exists in inputs.') candidate_type = 'Directory' if candidate.is_dir() else 'File' if glob in input_candidates: input = input_candidates[glob] if new_input is None: new_input = input_candidates[glob] = attr.evolve( input, type='string', default=glob) yield (CommandOutputParameter( id='output_{0}'.format(position), type=candidate_type, outputBinding=dict(glob='$(inputs.{0})'.format( input.id), ), ), new_input, glob) else: yield (CommandOutputParameter( id='output_{0}'.format(position), type=candidate_type, outputBinding=dict(glob=glob, ), ), None, glob)