async def compute_token(self, job: Job, command_output: JupyterCommandOutput) -> Token: path_processor = utils.get_path_processor(self.port.step) if self.value is not None: connector = job.step.get_connector() if job is not None else None resources = job.get_resources() or [None] if job.output_directory and not path_processor.isabs(self.value): pattern = path_processor.join(job.output_directory, self.value) else: pattern = self.value token_value = utils.flatten_list(await asyncio.gather(*[ asyncio.create_task( remotepath.resolve( connector=connector, target=resource, pattern=pattern)) for resource in resources ])) if len(token_value) == 1: token_value = token_value[0] else: token_value = command_output.user_ns.get(self.value_from) if job.output_directory: if isinstance(token_value, MutableSequence): token_value = [ path_processor.join(job.output_directory, t) if not path_processor.isabs(t) else t for t in token_value ] else: if not path_processor.isabs(token_value): token_value = path_processor.join(job.output_directory, token_value) return Token(name=self.port.name, value=token_value, job=job.name, tag=utils.get_tag(job.inputs))
async def _deserialize_namespace( self, job: Job, output_serializers: MutableMapping[Text, Any], remote_path: Text) -> MutableMapping[Text, Any]: if remote_path: with TemporaryDirectory() as d: path_processor = get_path_processor(self.step) dest_path = os.path.join(d, path_processor.basename(remote_path)) await self.step.context.data_manager.transfer_data( src=remote_path, src_job=job, dst=dest_path, dst_job=None) with open(dest_path, 'rb') as f: namespace = dill.load(f) for name, value in namespace.items(): if name in output_serializers: intermediate_type = output_serializers[name].get( 'type', 'name') if intermediate_type == 'file': dest_path = os.path.join( mkdtemp(), path_processor.basename(namespace[name])) await self.step.context.data_manager.transfer_data( src=namespace[name], src_job=job, dst=dest_path, dst_job=None) namespace[name] = dest_path return { k: executor.postload(compiler=self.compiler, name=k, value=v, serializer=output_serializers.get(k)) for k, v in namespace.items() } else: return {}
async def collect_output(self, token: Token, output_dir: Text) -> Token: if isinstance(token.job, MutableSequence): return token.update(await asyncio.gather(*[ asyncio.create_task(self.collect_output(t, output_dir)) for t in token.value ])) elif isinstance(token.value, MutableSequence): token_list = await asyncio.gather(*[ asyncio.create_task( self.collect_output(token.update(t), output_dir)) for t in token.value ]) return token.update([t.value for t in token_list]) context = self.port.step.context path_processor = utils.get_path_processor(self.port.step) src_path = token.value src_job = context.scheduler.get_job(token.job) dest_path = os.path.join(output_dir, path_processor.basename(src_path)) # Transfer file to local destination await self.port.step.context.data_manager.transfer_data( src=src_path, src_job=src_job, dst=dest_path, dst_job=None, writable=True) # Update token return token.update(dest_path)
def _process_sf_path(self, pattern: Text, primary_path: Text) -> Text: if pattern.startswith('^'): path_processor = get_path_processor(self.port.step) return self._process_sf_path( pattern[1:], path_processor.splitext(primary_path)[0]) else: return primary_path + pattern
async def _transfer_file(self, job: Job, path: Text) -> Text: path_processor = get_path_processor(self.step) dest_path = path_processor.join(job.input_directory, os.path.basename(path)) await self.step.context.data_manager.transfer_data(src=path, src_job=None, dst=dest_path, dst_job=job, writable=True) return dest_path
async def _process_secondary_file( self, job: Job, secondary_file: Any, token_value: MutableMapping[Text, Any], from_expression: bool, existing_sf: MutableMapping[Text, Any], load_contents: bool, load_listing: Optional[LoadListing] ) -> Optional[MutableMapping[Text, Any]]: step = job.step if job is not None else self.port.step # If value is None, simply return None if secondary_file is None: return None # If value is a dictionary, simply append it to the list elif isinstance(secondary_file, MutableMapping): connector = job.step.get_connector() filepath = utils.get_path_from_token(secondary_file) for resource in job.get_resources() or [None]: if await remotepath.exists(connector, resource, filepath): return await _get_file_token( step=step, job=job, token_class=secondary_file['class'], filepath=filepath, basename=secondary_file.get('basename'), load_contents=load_contents, load_listing=load_listing) # If value is a string else: # If value doesn't come from an expression, apply it to the primary path filepath = ( secondary_file if from_expression else self._process_sf_path( secondary_file, utils.get_path_from_token(token_value))) path_processor = get_path_processor(step) if not path_processor.isabs(filepath): filepath = path_processor.join( path_processor.dirname(get_path_from_token(token_value)), filepath) if filepath not in existing_sf: # Search file in job resources and build token value connector = job.step.get_connector() for resource in job.get_resources() or [None]: if await remotepath.exists(connector, resource, filepath): token_class = 'File' if await remotepath.isfile( connector, resource, filepath) else 'Directory' return await _get_file_token( step=step, job=job, token_class=token_class, filepath=filepath, load_contents=load_contents, load_listing=load_listing) else: return existing_sf[filepath]
async def _transfer_file(self, src_job: Optional[Job], dest_job: Optional[Job], src_path: Text, dest_path: Optional[Text] = None, writable: Optional[bool] = None) -> Text: if dest_path is None: if isinstance(self.port, InputPort) and src_job is not None: if src_path.startswith(src_job.output_directory): path_processor = get_path_processor( self.port.dependee.step) relpath = path_processor.relpath( path_processor.normpath(src_path), src_job.output_directory) path_processor = get_path_processor(self.port.step) dest_path = path_processor.join(dest_job.input_directory, relpath) else: path_processor = get_path_processor( self.port.dependee.step) basename = path_processor.basename( path_processor.normpath(src_path)) path_processor = get_path_processor(self.port.step) dest_path = path_processor.join(dest_job.input_directory, basename) else: path_processor = get_path_processor(self.port.step) dest_path = path_processor.join( dest_job.input_directory, os.path.basename(os.path.normpath(src_path))) await self.get_context().data_manager.transfer_data( src=src_path, src_job=src_job, dst=dest_path, dst_job=dest_job, writable=writable if writable is not None else self.writable) return dest_path
async def _get_file_token( step: Step, job: Job, token_class: Text, filepath: Text, basename: Optional[Text] = None, load_contents: bool = False, load_listing: Optional[LoadListing] = None ) -> MutableMapping[Text, Any]: connector = step.get_connector() resources = job.get_resources() or [None] if job is not None else [None] path_processor = get_path_processor(step) basename = basename or path_processor.basename(filepath) location = ''.join(['file://', filepath]) token = { 'class': token_class, 'location': location, 'basename': basename, 'path': filepath, 'dirname': path_processor.dirname(filepath) } if token_class == 'File': token['nameroot'], token['nameext'] = path_processor.splitext(basename) for resource in resources: if await remotepath.exists(connector, resource, filepath): token['size'] = await remotepath.size(connector, resource, filepath) if load_contents: if token['size'] > CONTENT_LIMIT: raise WorkflowExecutionException( "Cannot read contents from files larger than {limit}kB" .format(limit=CONTENT_LIMIT / 1024)) token['contents'] = await remotepath.head( connector, resource, filepath, CONTENT_LIMIT) filepath = await remotepath.follow_symlink( connector, resource, filepath) token['checksum'] = 'sha1${checksum}'.format( checksum=await remotepath.checksum(connector, resource, filepath)) break elif token_class == 'Directory' and load_listing != LoadListing.no_listing: for resource in resources: if await remotepath.exists(connector, resource, filepath): token['listing'] = await _get_listing( step, job, filepath, load_contents, load_listing == LoadListing.deep_listing) break return token
def _get_stream(self, job: Job, context: MutableMapping[Text, Any], stream: Optional[Text], default_stream: IO, is_input: bool = False) -> IO: if isinstance(stream, str): stream = eval_expression(expression=stream, context=context, full_js=self.full_js, expression_lib=self.expression_lib) path_processor = get_path_processor(self.step) if not path_processor.isabs(stream): basedir = job.input_directory if is_input else job.output_directory stream = path_processor.join(basedir, stream) return open(stream, "rb" if is_input else "wb") else: return default_stream
async def update_token(self, job: Job, token: Token) -> Token: if isinstance(token.job, MutableSequence): return token.update(await asyncio.gather(*[ asyncio.create_task(self.update_token(job, t)) for t in token.value ])) elif isinstance(token.value, MutableSequence): token_list = await asyncio.gather(*[ asyncio.create_task(self.update_token(job, token.update(t))) for t in token.value ]) return token.update([t.value for t in token_list]) src_job = self.get_context().scheduler.get_job(token.job) path_processor = utils.get_path_processor(self.port.step) token_value = dill.loads(token.value) if isinstance( token.value, bytes) else token.value dest_path = path_processor.join(job.input_directory, os.path.basename(token_value)) await self.port.step.context.data_manager.transfer_data( src=token_value, src_job=src_job, dst=dest_path, dst_job=job) return token.update(dill.dumps(dest_path))
async def execute(self, job: Job) -> CommandOutput: connector = self.step.get_connector() # Transfer executor file to remote resource executor_path = await self._transfer_file( job, os.path.join(executor.__file__)) # Modify code, environment and namespaces according to inputs input_names = {} environment = {} for token in job.inputs: if token.value is not None: command_token = self.input_tokens[token.name] token_value = ([token.value] if isinstance( self.step.input_ports[token.name], ScatterInputPort) else token.value) if command_token.token_type == 'file': input_names[token.name] = token_value elif command_token.token_type == 'name': input_names[token.name] = token_value elif command_token.token_type == 'env': environment[token.name] = token_value # List output names to be retrieved from remote context output_names = [ name for name, p in self.step.output_ports.items() if name != executor.CELL_OUTPUT ] # Serialize AST nodes to remote resource code_path = await self._serialize_to_remote_file(job, self.ast_nodes) # Configure output fiel path path_processor = get_path_processor(self.step) output_path = path_processor.join(job.output_directory, random_name()) # Extract serializers from command tokens input_serializers = { k: v.serializer for k, v in self.input_tokens.items() if v.serializer is not None } output_serializers = { k: v.serializer for k, v in self.output_tokens.items() if v.serializer is not None } # Serialize namespaces to remote resource user_ns_path = await self._serialize_namespace( input_serializers=input_serializers, job=job, namespace=input_names) # Create dictionaries of postload input serializers and predump output serializers postload_input_serializers = { k: { 'postload': v['postload'] } for k, v in input_serializers.items() if 'postload' in v } predump_output_serializers = { k: { 'predump': v['predump'] } for k, v in output_serializers.items() if 'predump' in v } # Parse command cmd = [self.interpreter, executor_path] if os.path.basename(self.interpreter) == 'ipython': cmd.append('--') if self.step.workdir: cmd.extend(["--workdir", self.step.workdir]) if self.autoawait: cmd.append("--autoawait") cmd.extend(["--local-ns-file", user_ns_path]) if postload_input_serializers: postload_serializers_path = await self._serialize_to_remote_file( job, postload_input_serializers) cmd.extend( ["--postload-input-serializers", postload_serializers_path]) if predump_output_serializers: predump_serializers_path = await self._serialize_to_remote_file( job, predump_output_serializers) cmd.extend( ["--predump-output-serializers", predump_serializers_path]) for name in output_names: cmd.extend(["--output-name", name]) cmd.extend([code_path, output_path]) # Execute command if connector is not None: resources = job.get_resources() logger.info( 'Executing job {job} on resource {resource} into directory {outdir}:\n{command}' .format( job=job.name, resource=resources[0] if resources else None, outdir=job.output_directory, command=' \\\n\t'.join(cmd), )) # If step is assigned to multiple resources, add the STREAMFLOW_HOSTS environment variable if len(resources) > 1: available_resources = await connector.get_available_resources( self.step.target.service) hosts = { k: v.hostname for k, v in available_resources.items() if k in resources } environment['STREAMFLOW_HOSTS'] = ','.join(hosts.values()) # Configure standard streams stdin = self.stdin stdout = self.stdout if self.stdout is not None else STDOUT stderr = self.stderr if self.stderr is not None else stdout # Execute command result, exit_code = await connector.run( resources[0] if resources else None, cmd, environment=environment, workdir=job.output_directory, stdin=stdin, stdout=stdout, stderr=stderr, capture_output=True, job_name=job.name) else: logger.info( 'Executing job {job} into directory {outdir}: \n{command}'. format(job=job.name, outdir=job.output_directory, command=' \\\n\t'.join(cmd))) # Configure standard streams stdin = open(self.stdin, "rb") if self.stdin is not None else None stdout = open(self.stdout, "wb") if self.stdout is not None else None stderr = open(self.stderr, "wb") if self.stderr is not None else None # Execute command proc = await asyncio.create_subprocess_exec( *cmd, cwd=job.output_directory, env={ **os.environ, **environment }, stdin=stdin, stdout=stdout, stderr=stderr) result, error = await proc.communicate() exit_code = proc.returncode # Close streams if stdin is not None: stdin.close() if stdout is not None: stdout.close() if stderr is not None: stderr.close() # Retrieve outputs with TemporaryDirectory() as d: dest_path = os.path.join(d, path_processor.basename(output_path)) await self.step.context.data_manager.transfer_data(src=output_path, src_job=job, dst=dest_path, dst_job=None) with open(dest_path, mode='r') as f: json_output = json.load(f) # Infer status status = Status[json_output[executor.CELL_STATUS]] if status == Status.COMPLETED: command_stdout = json_output[executor.CELL_OUTPUT] if isinstance(command_stdout, MutableSequence ): # TODO: understand why we obtain a list here command_stdout = command_stdout[0] user_ns = await self._deserialize_namespace( job=job, output_serializers=output_serializers, remote_path=json_output[executor.CELL_LOCAL_NS]) else: command_stdout = json_output[executor.CELL_OUTPUT] user_ns = {} # Return the command output object return JupyterCommandOutput(value=command_stdout, status=status, user_ns=user_ns)
async def _prepare_work_dir(self, job: Job, context: MutableMapping[Text, Any], element: Any, dest_path: Optional[Text] = None, writable: bool = False) -> None: path_processor = get_path_processor(job.step) connector = job.step.get_connector() resources = job.get_resources() or [None] # If current element is a string, it must be an expression if isinstance(element, Text): listing = eval_expression(expression=element, context=context, full_js=self.full_js, expression_lib=self.expression_lib) else: listing = element # If listing is a list, each of its elements must be processed independently if isinstance(listing, MutableSequence): await asyncio.gather(*[ asyncio.create_task( self._prepare_work_dir(job, context, el, dest_path, writable)) for el in listing ]) # If listing is a dictionary, it could be a File, a Directory, a Dirent or some other object elif isinstance(listing, MutableMapping): # If it is a File or Directory element, put the correspnding file in the output directory if 'class' in listing and listing['class'] in [ 'File', 'Directory' ]: src_path = utils.get_path_from_token(listing) src_found = False if src_path is not None: if dest_path is None: if src_path.startswith(job.input_directory): relpath = path_processor.relpath( src_path, job.input_directory) dest_path = path_processor.join( job.output_directory, relpath) else: basename = path_processor.basename(src_path) dest_path = path_processor.join( job.output_directory, basename) for resource in resources: if await remotepath.exists(connector, resource, src_path): await self.step.context.data_manager.transfer_data( src=src_path, src_job=job, dst=dest_path, dst_job=job, writable=writable) src_found = True break # If the source path does not exist, create a File or a Directory in the remote path if not src_found: if dest_path is None: dest_path = job.output_directory if src_path is not None: dest_path = path_processor.join( dest_path, path_processor.basename(src_path)) if listing['class'] == 'Directory': await remotepath.mkdir(connector, resources, dest_path) else: await self._write_remote_file( job=job, content=listing['contents'] if 'contents' in listing else '', dest_path=dest_path, writable=writable) # If `listing` is present, recursively process folder contents if 'listing' in listing: if 'basename' in listing: dest_path = path_processor.join( dest_path, listing['basename']) await remotepath.mkdir(connector, resources, dest_path) await asyncio.gather(*[ asyncio.create_task( self._prepare_work_dir(job, context, element, dest_path, writable)) for element in listing['listing'] ]) # If it is a Dirent element, put or create the corresponding file according to the entryname field elif 'entry' in listing: entry = eval_expression(expression=listing['entry'], context=context, full_js=self.full_js, expression_lib=self.expression_lib, strip_whitespace=False) if 'entryname' in listing: dest_path = eval_expression( expression=listing['entryname'], context=context, full_js=self.full_js, expression_lib=self.expression_lib) if not path_processor.isabs(dest_path): dest_path = path_processor.join( job.output_directory, dest_path) writable = listing[ 'writable'] if 'writable' in listing else False # If entry is a string, a new text file must be created with the string as the file contents if isinstance(entry, Text): await self._write_remote_file(job, entry, dest_path, writable) # If entry is a list elif isinstance(entry, MutableSequence): # If all elements are Files or Directories, each of them must be processed independently if all('class' in t and t['class'] in ['File', 'Directory'] for t in entry): await self._prepare_work_dir(job, context, entry, dest_path, writable) # Otherwise, the content should be serialised to JSON else: await self._write_remote_file(job, json.dumps(entry), dest_path, writable) # If entry is a dict elif isinstance(entry, MutableMapping): # If it is a File or Directory, it must be put in the destination path if 'class' in entry and entry['class'] in [ 'File', 'Directory' ]: await self._prepare_work_dir(job, context, entry, dest_path, writable) # Otherwise, the content should be serialised to JSON else: await self._write_remote_file(job, json.dumps(entry), dest_path, writable) # Every object different from a string should be serialised to JSON else: await self._write_remote_file(job, json.dumps(entry), dest_path, writable)
async def _update_file_token( self, job: Job, src_job: Job, token_value: Any, load_listing: Optional[LoadListing] = None, writable: Optional[bool] = None) -> MutableMapping[Text, Any]: path_processor = get_path_processor( src_job.step) if src_job is not None else os.path if 'location' not in token_value and 'path' in token_value: token_value['location'] = token_value['path'] if 'location' in token_value and token_value['location'] is not None: location = token_value['location'] # Manage remote files scheme = urllib.parse.urlsplit(location).scheme if scheme in ['http', 'https']: location = await _download_file(job, location) elif scheme == 'file': location = location[7:] # If basename is explicitly stated in the token, use it as destination path dest_path = None if 'basename' in token_value: path_processor = get_path_processor(self.port.step) dest_path = path_processor.join(job.input_directory, token_value['basename']) # Check if source file exists src_connector = src_job.step.get_connector( ) if src_job is not None else None src_resources = src_job.get_resources() or [ None ] if src_job is not None else [None] src_found = False for src_resource in src_resources: if await remotepath.exists(src_connector, src_resource, location): src_found = True break # If source_path exists, ransfer file in task's input folder if src_found: filepath = await self._transfer_file(src_job=src_job, dest_job=job, src_path=location, dest_path=dest_path, writable=writable) # Otherwise, keep the current path else: filepath = location new_token_value = {'class': token_value['class'], 'path': filepath} # If token contains secondary files, transfer them, too if 'secondaryFiles' in token_value: sf_tasks = [] for sf in token_value['secondaryFiles']: path = get_path_from_token(sf) # If basename is explicitly stated in the token, use it as destination path dest_path = None if 'basename' in sf: path_processor = get_path_processor(self.port.step) dest_path = path_processor.join( job.input_directory, sf['basename']) sf_tasks.append( asyncio.create_task( self._transfer_file(src_job=src_job, dest_job=job, src_path=path, dest_path=dest_path))) sf_paths = await asyncio.gather(*sf_tasks) new_token_value['secondaryFiles'] = [{ 'class': sf['class'], 'path': sf_path } for sf, sf_path in zip(token_value['secondaryFiles'], sf_paths)] # Build token token_value = await self._build_token_value( job=job, token_value=new_token_value, load_contents=self.load_contents or 'contents' in token_value, load_listing=load_listing) return token_value # If there is only a 'contents' field, simply build the token value elif 'contents' in token_value: return await self._build_token_value(job, token_value, load_listing) # If there is only a 'listing' field, transfer all the listed files to the remote resource elif 'listing' in token_value: # Compute destination path dest_path = get_path_from_token(token_value) if dest_path is None and 'basename' in token_value: dest_path = path_processor.join(job.input_directory, token_value['basename']) # Copy each element of the listing into the destination folder tasks = [] classes = [] for element in cast(List, token_value['listing']): # Compute destination path if dest_path is not None: basename = path_processor.basename(element['path']) current_dest_path = path_processor.join( dest_path, basename) else: current_dest_path = None # Transfer element to the remote resource tasks.append( asyncio.create_task( self._transfer_file(src_job=src_job, dest_job=job, src_path=element['path'], dest_path=current_dest_path, writable=writable))) classes.append(element['class']) dest_paths = await asyncio.gather(*tasks) # Compute listing on remote resource listing_tasks = [] for token_class, path in zip(classes, dest_paths): listing_tasks.append( asyncio.create_task( _get_file_token(step=self.port.step, job=job, token_class=token_class, filepath=path))) token_value['listing'] = await asyncio.gather(*listing_tasks) return token_value
async def _get_value_from_command(self, job: Job, command_output: CWLCommandOutput): context = utils.build_context(job) path_processor = get_path_processor(self.port.step) connector = job.step.get_connector() resources = job.get_resources() or [None] token_value = command_output.value if command_output.value is not None else self.default_value # Check if file `cwl.output.json` exists either locally on at least one resource cwl_output_path = path_processor.join(job.output_directory, 'cwl.output.json') for resource in resources: if await remotepath.exists(connector, resource, cwl_output_path): # If file exists, use its contents as token value token_value = json.loads(await remotepath.read( connector, resource, cwl_output_path)) break # If `token_value` is a dictionary, directly extract the token value from it if isinstance(token_value, MutableMapping) and self.port.name in token_value: token = token_value[self.port.name] return await self._build_token_value(job, token) # Otherwise, generate the output object as described in `outputs` field if self.glob is not None: # Adjust glob path if '$(' in self.glob or '${' in self.glob: globpath = utils.eval_expression( expression=self.glob, context=context, full_js=self.full_js, expression_lib=self.expression_lib) else: globpath = self.glob # Resolve glob resolve_tasks = [] for resource in resources: if isinstance(globpath, MutableSequence): for path in globpath: if not path_processor.isabs(path): path = path_processor.join(job.output_directory, path) resolve_tasks.append( _expand_glob(connector, resource, path)) else: if not path_processor.isabs(globpath): globpath = path_processor.join(job.output_directory, globpath) resolve_tasks.append( _expand_glob(connector, resource, globpath)) paths = flatten_list(await asyncio.gather(*resolve_tasks)) # Cannot glob outside the job output folder for path in paths: if not path.startswith(job.output_directory): raise WorkflowDefinitionException( "Globs outside the job's output folder are not allowed" ) # Get token class from paths class_tasks = [ asyncio.create_task(_get_class_from_path(p, job)) for p in paths ] paths = [{ 'path': p, 'class': c } for p, c in zip(paths, await asyncio.gather(*class_tasks))] # If evaluation is not needed, simply return paths as token value if self.output_eval is None: token_list = await self._build_token_value(job, paths) return token_list if len( token_list) > 1 else token_list[0] if len( token_list) == 1 else None # Otherwise, fill context['self'] with glob data and proceed else: context['self'] = await self._build_token_value(job, paths) if self.output_eval is not None: # Fill context with exit code context['runtime']['exitCode'] = command_output.exit_code # Evaluate output token = utils.eval_expression(expression=self.output_eval, context=context, full_js=self.full_js, expression_lib=self.expression_lib) # Build token if isinstance(token, MutableSequence): paths = [{ 'path': el['path'], 'class': el['class'] } for el in token] return await self._build_token_value(job, paths) else: return await self._build_token_value(job, token) # As the default value (no return path is met in previous code), simply process the command output return await self._build_token_value(job, token_value)
async def _build_token_value( self, job: Job, token_value: Any, load_contents: Optional[bool] = None, load_listing: Optional[LoadListing] = None) -> Any: if load_contents is None: load_contents = self.load_contents if token_value is None: return self.default_value elif isinstance(token_value, MutableSequence): value_tasks = [] for t in token_value: value_tasks.append( asyncio.create_task( self._build_token_value(job, t, load_listing))) return await asyncio.gather(*value_tasks) elif (isinstance(token_value, MutableMapping) and token_value.get( 'class', token_value.get('type')) in ['File', 'Directory']): step = job.step if job is not None else self.port.step # Get filepath filepath = get_path_from_token(token_value) if filepath is not None: # Process secondary files in token value sf_map = {} if 'secondaryFiles' in token_value: sf_tasks = [] for sf in token_value.get('secondaryFiles', []): sf_path = get_path_from_token(sf) path_processor = get_path_processor(step) if not path_processor.isabs(sf_path): path_processor.join( path_processor.dirname(filepath), sf_path) sf_tasks.append( asyncio.create_task( _get_file_token(step=step, job=job, token_class=sf['class'], filepath=sf_path, basename=sf.get('basename'), load_contents=load_contents, load_listing=load_listing or self.load_listing))) sf_map = { get_path_from_token(sf): sf for sf in await asyncio.gather(*sf_tasks) } # Compute the new token value token_value = await _get_file_token( step=step, job=job, token_class=token_value.get('class', token_value.get('type')), filepath=filepath, basename=token_value.get('basename'), load_contents=load_contents, load_listing=load_listing or self.load_listing) # Compute new secondary files from port specification if self.secondary_files: context = utils.build_context(job) context['self'] = token_value sf_tasks, sf_specs = [], [] for secondary_file in self.secondary_files: # If pattern is an expression, evaluate it and process result if '$(' in secondary_file.pattern or '${' in secondary_file.pattern: sf_value = utils.eval_expression( expression=secondary_file.pattern, context=context, full_js=self.full_js, expression_lib=self.expression_lib) if isinstance(sf_value, MutableSequence): for sf in sf_value: sf_tasks.append( asyncio.create_task( self._process_secondary_file( job=job, secondary_file=sf, token_value=token_value, from_expression=True, existing_sf=sf_map, load_contents=load_contents, load_listing=load_listing or self.load_listing))) sf_specs.append(secondary_file) else: sf_tasks.append( asyncio.create_task( self._process_secondary_file( job=job, secondary_file=sf_value, token_value=token_value, from_expression=True, existing_sf=sf_map, load_contents=load_contents, load_listing=load_listing or self.load_listing))) sf_specs.append(secondary_file) # Otherwise, simply process the pattern string else: sf_tasks.append( asyncio.create_task( self._process_secondary_file( job=job, secondary_file=secondary_file.pattern, token_value=token_value, from_expression=False, existing_sf=sf_map, load_contents=load_contents, load_listing=load_listing or self.load_listing))) sf_specs.append(secondary_file) for sf_value, sf_spec in zip( await asyncio.gather(*sf_tasks), sf_specs): if sf_value is not None: sf_map[get_path_from_token(sf_value)] = sf_value elif sf_spec.required: raise WorkflowExecutionException( "Required secondary file {sf} not found". format(sf=sf_spec.pattern)) # Add all secondary files to the token if sf_map: token_value['secondaryFiles'] = list(sf_map.values()) # If there is only a 'contents' field, create a file on the step's resource and build the token elif 'contents' in token_value: path_processor = get_path_processor(self.port.step) filepath = path_processor.join( job.output_directory, token_value.get('basename', random_name())) connector = job.step.get_connector() resources = job.get_resources() or [None ] if job is not None else [ None ] await asyncio.gather(*[ asyncio.create_task( remotepath.write(connector, res, filepath, token_value['contents'])) for res in resources ]) token_value = await _get_file_token( step=step, job=job, token_class=token_value.get('class', token_value.get('type')), filepath=filepath, basename=token_value.get('basename'), load_contents=load_contents, load_listing=load_listing or self.load_listing) return token_value