Exemplo n.º 1
0
 async def weight_token(self, job: Job, token_value: Any) -> int:
     if token_value is None or self.port_type not in ['File', 'Directory']:
         return 0
     elif isinstance(token_value, MutableSequence):
         return sum(await asyncio.gather(*[
             asyncio.create_task(self.weight_token(job, t))
             for t in token_value
         ]))
     elif 'size' in token_value:
         weight = token_value['size']
         if 'secondaryFiles' in token_value:
             sf_tasks = []
             for sf in token_value['secondaryFiles']:
                 sf_tasks.append(
                     asyncio.create_task(self.weight_token(job, sf)))
             weight += sum(await asyncio.gather(*sf_tasks))
         return weight
     else:
         if job is not None and job.get_resources():
             connector = job.step.get_connector()
             for resource in job.get_resources():
                 return await remotepath.size(connector, resource,
                                              _get_paths(token_value))
             return 0
         else:
             return await remotepath.size(
                 None, None,
                 _get_paths(token_value)) if token_value is not None else 0
Exemplo n.º 2
0
 async def _process_secondary_file(
     self, job: Job, secondary_file: Any, token_value: MutableMapping[Text,
                                                                      Any],
     from_expression: bool, existing_sf: MutableMapping[Text, Any],
     load_contents: bool, load_listing: Optional[LoadListing]
 ) -> Optional[MutableMapping[Text, Any]]:
     step = job.step if job is not None else self.port.step
     # If value is None, simply return None
     if secondary_file is None:
         return None
     # If value is a dictionary, simply append it to the list
     elif isinstance(secondary_file, MutableMapping):
         connector = job.step.get_connector()
         filepath = utils.get_path_from_token(secondary_file)
         for resource in job.get_resources() or [None]:
             if await remotepath.exists(connector, resource, filepath):
                 return await _get_file_token(
                     step=step,
                     job=job,
                     token_class=secondary_file['class'],
                     filepath=filepath,
                     basename=secondary_file.get('basename'),
                     load_contents=load_contents,
                     load_listing=load_listing)
     # If value is a string
     else:
         # If value doesn't come from an expression, apply it to the primary path
         filepath = (
             secondary_file if from_expression else self._process_sf_path(
                 secondary_file, utils.get_path_from_token(token_value)))
         path_processor = get_path_processor(step)
         if not path_processor.isabs(filepath):
             filepath = path_processor.join(
                 path_processor.dirname(get_path_from_token(token_value)),
                 filepath)
         if filepath not in existing_sf:
             # Search file in job resources and build token value
             connector = job.step.get_connector()
             for resource in job.get_resources() or [None]:
                 if await remotepath.exists(connector, resource, filepath):
                     token_class = 'File' if await remotepath.isfile(
                         connector, resource, filepath) else 'Directory'
                     return await _get_file_token(
                         step=step,
                         job=job,
                         token_class=token_class,
                         filepath=filepath,
                         load_contents=load_contents,
                         load_listing=load_listing)
         else:
             return existing_sf[filepath]
Exemplo n.º 3
0
 async def weight_token(self, job: Job, token_value: Any) -> int:
     if isinstance(token_value, MutableSequence):
         return sum(await asyncio.gather(*[
             asyncio.create_task(self.weight_token(job, v))
             for v in token_value
         ]))
     if job is not None and job.get_resources():
         connector = job.step.get_connector()
         for resource in job.get_resources():
             return await remotepath.size(connector, resource, token_value)
         return 0
     else:
         return await remotepath.size(
             None, None, token_value) if token_value is not None else 0
Exemplo n.º 4
0
async def _get_class_from_path(path: Text, job: Job) -> Text:
    connector = job.step.get_connector()
    for resource in (job.get_resources()
                     or [None]) if job is not None else [None]:
        t_path = await remotepath.follow_symlink(connector, resource, path)
        return 'File' if await remotepath.isfile(connector, resource,
                                                 t_path) else 'Directory'
Exemplo n.º 5
0
async def _get_listing(
        step: Step, job: Job, dirpath: Text, load_contents: bool,
        recursive: bool) -> MutableSequence[MutableMapping[Text, Any]]:
    listing_tokens = {}
    connector = step.get_connector()
    resources = job.get_resources() or [None]
    for resource in resources:
        directories = await remotepath.listdir(connector, resource, dirpath,
                                               FileType.DIRECTORY)
        for directory in directories:
            if directory not in listing_tokens:
                load_listing = LoadListing.deep_listing if recursive else LoadListing.no_listing
                listing_tokens[directory] = asyncio.create_task(
                    _get_file_token(step=step,
                                    job=job,
                                    token_class='Directory',
                                    filepath=directory,
                                    load_contents=load_contents,
                                    load_listing=load_listing))
        files = await remotepath.listdir(connector, resource, dirpath,
                                         FileType.FILE)
        for file in files:
            if file not in listing_tokens:
                listing_tokens[file] = asyncio.create_task(
                    _get_file_token(step=step,
                                    job=job,
                                    token_class='File',
                                    filepath=file,
                                    load_contents=load_contents))
    return cast(MutableSequence[MutableMapping[Text, Any]], await
                asyncio.gather(*listing_tokens.values()))
Exemplo n.º 6
0
 async def compute_token(self, job: Job,
                         command_output: JupyterCommandOutput) -> Token:
     path_processor = utils.get_path_processor(self.port.step)
     if self.value is not None:
         connector = job.step.get_connector() if job is not None else None
         resources = job.get_resources() or [None]
         if job.output_directory and not path_processor.isabs(self.value):
             pattern = path_processor.join(job.output_directory, self.value)
         else:
             pattern = self.value
         token_value = utils.flatten_list(await asyncio.gather(*[
             asyncio.create_task(
                 remotepath.resolve(
                     connector=connector, target=resource, pattern=pattern))
             for resource in resources
         ]))
         if len(token_value) == 1:
             token_value = token_value[0]
     else:
         token_value = command_output.user_ns.get(self.value_from)
     if job.output_directory:
         if isinstance(token_value, MutableSequence):
             token_value = [
                 path_processor.join(job.output_directory, t)
                 if not path_processor.isabs(t) else t for t in token_value
             ]
         else:
             if not path_processor.isabs(token_value):
                 token_value = path_processor.join(job.output_directory,
                                                   token_value)
     return Token(name=self.port.name,
                  value=token_value,
                  job=job.name,
                  tag=utils.get_tag(job.inputs))
Exemplo n.º 7
0
async def _download_file(job: Job, url: Text) -> Text:
    connector = job.step.get_connector()
    resources = job.get_resources()
    try:
        return await remotepath.download(connector, resources, url,
                                         job.input_directory)
    except Exception:
        raise WorkflowExecutionException("Error downloading file from " + url)
Exemplo n.º 8
0
 async def _register_data(self, job: Job, path: Text):
     connector = job.step.get_connector()
     resources = job.get_resources() or [None]
     register_path_tasks = []
     for resource in resources:
         register_path_tasks.append(
             asyncio.create_task(
                 self.port.step.context.data_manager.register_path(
                     connector, resource, path)))
     await asyncio.gather(*register_path_tasks)
Exemplo n.º 9
0
async def _get_file_token(
        step: Step,
        job: Job,
        token_class: Text,
        filepath: Text,
        basename: Optional[Text] = None,
        load_contents: bool = False,
        load_listing: Optional[LoadListing] = None
) -> MutableMapping[Text, Any]:
    connector = step.get_connector()
    resources = job.get_resources() or [None] if job is not None else [None]
    path_processor = get_path_processor(step)
    basename = basename or path_processor.basename(filepath)
    location = ''.join(['file://', filepath])
    token = {
        'class': token_class,
        'location': location,
        'basename': basename,
        'path': filepath,
        'dirname': path_processor.dirname(filepath)
    }
    if token_class == 'File':
        token['nameroot'], token['nameext'] = path_processor.splitext(basename)
        for resource in resources:
            if await remotepath.exists(connector, resource, filepath):
                token['size'] = await remotepath.size(connector, resource,
                                                      filepath)
                if load_contents:
                    if token['size'] > CONTENT_LIMIT:
                        raise WorkflowExecutionException(
                            "Cannot read contents from files larger than {limit}kB"
                            .format(limit=CONTENT_LIMIT / 1024))
                    token['contents'] = await remotepath.head(
                        connector, resource, filepath, CONTENT_LIMIT)
                filepath = await remotepath.follow_symlink(
                    connector, resource, filepath)
                token['checksum'] = 'sha1${checksum}'.format(
                    checksum=await remotepath.checksum(connector, resource,
                                                       filepath))
                break
    elif token_class == 'Directory' and load_listing != LoadListing.no_listing:
        for resource in resources:
            if await remotepath.exists(connector, resource, filepath):
                token['listing'] = await _get_listing(
                    step, job, filepath, load_contents,
                    load_listing == LoadListing.deep_listing)
                break
    return token
Exemplo n.º 10
0
 async def _recover_path(self, job: Job, resources: MutableSequence[Text],
                         token: Token, path: Text) -> Optional[Text]:
     context = self.get_context()
     connector = self.port.step.get_connector()
     job_resources = job.get_resources() or [None]
     # Check if path is already present in actual job's resources
     for resource in job_resources:
         if await remotepath.exists(connector, resource, path):
             return path
     # Otherwise, get the list of other file locations from DataManager
     data_locations = set()
     for resource in resources:
         data_locations.update(
             context.data_manager.get_data_locations(
                 resource, path, DataLocationType.PRIMARY))
     # Check if path is still present in original resources
     for location in data_locations:
         if location.resource in job_resources:
             if await remotepath.exists(connector, location.resource, path):
                 return path
             else:
                 context.data_manager.invalidate_location(
                     location.resource, path)
     # Check if files are saved locally
     for location in data_locations:
         if location.resource == LOCAL_RESOURCE:
             return await self._transfer_file(None, job, location.path)
     # If not, check if files are stored elsewhere
     for location in data_locations:
         if location.resource not in job_resources and location.resource != LOCAL_RESOURCE:
             location_job = context.scheduler.get_job(location.job)
             location_connector = location_job.step.get_connector()
             available_resources = await location_connector.get_available_resources(
                 location_job.step.target.service)
             if (location.resource in available_resources and await
                     remotepath.exists(location_connector,
                                       location.resource, location.path)):
                 return await self._transfer_file(location_job, job,
                                                  location.path)
             else:
                 context.data_manager.invalidate_location(
                     location.resource, location.path)
     # If file has been lost, raise an exception
     message = "Failed to recover path {path} for token {token} from job {job}".format(
         path=path, token=token.name, job=token.job)
     logger.info(message)
     raise UnrecoverableTokenException(message, token)
Exemplo n.º 11
0
 def _register_data(self, job: Job,
                    token_value: Union[MutableSequence[MutableMapping[Text,
                                                                      Any]],
                                       MutableMapping[Text, Any]]):
     context = self.get_context()
     # If `token_value` is a list, process every item independently
     if isinstance(token_value, MutableSequence):
         for t in token_value:
             self._register_data(job, t)
     # Otherwise, if token value is a dictionary and it refers to a File or a Directory, register the path
     elif (isinstance(token_value, MutableMapping)
           and 'class' in token_value
           and token_value['class'] in ['File', 'Directory']):
         # Extract paths from token
         paths = []
         if 'path' in token_value and token_value['path'] is not None:
             paths.append(token_value['path'])
         elif 'location' in token_value and token_value[
                 'location'] is not None:
             paths.append(token_value['location'])
         elif 'listing' in token_value:
             paths.extend([
                 t['path'] if 'path' in t else t['location']
                 for t in token_value['listing']
             ])
         if 'secondaryFiles' in token_value:
             for sf in token_value['secondaryFiles']:
                 paths.append(get_path_from_token(sf))
         # Remove `file` protocol if present
         paths = [p[7:] if p.startswith('file://') else p for p in paths]
         # Register paths to the `DataManager`
         resources = job.get_resources() or [None]
         for path in paths:
             if resources:
                 for resource in resources or [None]:
                     context.data_manager.register_path(job, resource, path)
Exemplo n.º 12
0
 async def execute(self, job: Job) -> CommandOutput:
     connector = self.step.get_connector()
     # Transfer executor file to remote resource
     executor_path = await self._transfer_file(
         job, os.path.join(executor.__file__))
     # Modify code, environment and namespaces according to inputs
     input_names = {}
     environment = {}
     for token in job.inputs:
         if token.value is not None:
             command_token = self.input_tokens[token.name]
             token_value = ([token.value] if isinstance(
                 self.step.input_ports[token.name], ScatterInputPort) else
                            token.value)
             if command_token.token_type == 'file':
                 input_names[token.name] = token_value
             elif command_token.token_type == 'name':
                 input_names[token.name] = token_value
             elif command_token.token_type == 'env':
                 environment[token.name] = token_value
     # List output names to be retrieved from remote context
     output_names = [
         name for name, p in self.step.output_ports.items()
         if name != executor.CELL_OUTPUT
     ]
     # Serialize AST nodes to remote resource
     code_path = await self._serialize_to_remote_file(job, self.ast_nodes)
     # Configure output fiel path
     path_processor = get_path_processor(self.step)
     output_path = path_processor.join(job.output_directory, random_name())
     # Extract serializers from command tokens
     input_serializers = {
         k: v.serializer
         for k, v in self.input_tokens.items() if v.serializer is not None
     }
     output_serializers = {
         k: v.serializer
         for k, v in self.output_tokens.items() if v.serializer is not None
     }
     # Serialize namespaces to remote resource
     user_ns_path = await self._serialize_namespace(
         input_serializers=input_serializers,
         job=job,
         namespace=input_names)
     # Create dictionaries of postload input serializers and predump output serializers
     postload_input_serializers = {
         k: {
             'postload': v['postload']
         }
         for k, v in input_serializers.items() if 'postload' in v
     }
     predump_output_serializers = {
         k: {
             'predump': v['predump']
         }
         for k, v in output_serializers.items() if 'predump' in v
     }
     # Parse command
     cmd = [self.interpreter, executor_path]
     if os.path.basename(self.interpreter) == 'ipython':
         cmd.append('--')
     if self.step.workdir:
         cmd.extend(["--workdir", self.step.workdir])
     if self.autoawait:
         cmd.append("--autoawait")
     cmd.extend(["--local-ns-file", user_ns_path])
     if postload_input_serializers:
         postload_serializers_path = await self._serialize_to_remote_file(
             job, postload_input_serializers)
         cmd.extend(
             ["--postload-input-serializers", postload_serializers_path])
     if predump_output_serializers:
         predump_serializers_path = await self._serialize_to_remote_file(
             job, predump_output_serializers)
         cmd.extend(
             ["--predump-output-serializers", predump_serializers_path])
     for name in output_names:
         cmd.extend(["--output-name", name])
     cmd.extend([code_path, output_path])
     # Execute command
     if connector is not None:
         resources = job.get_resources()
         logger.info(
             'Executing job {job} on resource {resource} into directory {outdir}:\n{command}'
             .format(
                 job=job.name,
                 resource=resources[0] if resources else None,
                 outdir=job.output_directory,
                 command=' \\\n\t'.join(cmd),
             ))
         # If step is assigned to multiple resources, add the STREAMFLOW_HOSTS environment variable
         if len(resources) > 1:
             available_resources = await connector.get_available_resources(
                 self.step.target.service)
             hosts = {
                 k: v.hostname
                 for k, v in available_resources.items() if k in resources
             }
             environment['STREAMFLOW_HOSTS'] = ','.join(hosts.values())
         # Configure standard streams
         stdin = self.stdin
         stdout = self.stdout if self.stdout is not None else STDOUT
         stderr = self.stderr if self.stderr is not None else stdout
         # Execute command
         result, exit_code = await connector.run(
             resources[0] if resources else None,
             cmd,
             environment=environment,
             workdir=job.output_directory,
             stdin=stdin,
             stdout=stdout,
             stderr=stderr,
             capture_output=True,
             job_name=job.name)
     else:
         logger.info(
             'Executing job {job} into directory {outdir}: \n{command}'.
             format(job=job.name,
                    outdir=job.output_directory,
                    command=' \\\n\t'.join(cmd)))
         # Configure standard streams
         stdin = open(self.stdin, "rb") if self.stdin is not None else None
         stdout = open(self.stdout,
                       "wb") if self.stdout is not None else None
         stderr = open(self.stderr,
                       "wb") if self.stderr is not None else None
         # Execute command
         proc = await asyncio.create_subprocess_exec(
             *cmd,
             cwd=job.output_directory,
             env={
                 **os.environ,
                 **environment
             },
             stdin=stdin,
             stdout=stdout,
             stderr=stderr)
         result, error = await proc.communicate()
         exit_code = proc.returncode
         # Close streams
         if stdin is not None:
             stdin.close()
         if stdout is not None:
             stdout.close()
         if stderr is not None:
             stderr.close()
     # Retrieve outputs
     with TemporaryDirectory() as d:
         dest_path = os.path.join(d, path_processor.basename(output_path))
         await self.step.context.data_manager.transfer_data(src=output_path,
                                                            src_job=job,
                                                            dst=dest_path,
                                                            dst_job=None)
         with open(dest_path, mode='r') as f:
             json_output = json.load(f)
     # Infer status
     status = Status[json_output[executor.CELL_STATUS]]
     if status == Status.COMPLETED:
         command_stdout = json_output[executor.CELL_OUTPUT]
         if isinstance(command_stdout, MutableSequence
                       ):  # TODO: understand why we obtain a list here
             command_stdout = command_stdout[0]
         user_ns = await self._deserialize_namespace(
             job=job,
             output_serializers=output_serializers,
             remote_path=json_output[executor.CELL_LOCAL_NS])
     else:
         command_stdout = json_output[executor.CELL_OUTPUT]
         user_ns = {}
     # Return the command output object
     return JupyterCommandOutput(value=command_stdout,
                                 status=status,
                                 user_ns=user_ns)
Exemplo n.º 13
0
 async def execute(self, job: Job) -> CWLCommandOutput:
     context = utils.build_context(job)
     if logger.isEnabledFor(logging.DEBUG):
         logger.debug("Job {job} inputs: {inputs}".format(
             job=job.name,
             inputs=json.dumps(context['inputs'], indent=4,
                               sort_keys=True)))
     if self.initial_work_dir is not None:
         await self._prepare_work_dir(job, context, self.initial_work_dir)
     cmd = self._get_executable_command(context)
     parsed_env = {
         k: str(
             eval_expression(expression=v,
                             context=context,
                             full_js=self.full_js,
                             expression_lib=self.expression_lib))
         for (k, v) in self.environment.items()
     }
     if 'HOME' not in parsed_env:
         parsed_env['HOME'] = job.output_directory
     if 'TMPDIR' not in parsed_env:
         parsed_env['TMPDIR'] = job.tmp_directory
     if self.step.target is None:
         if self.is_shell_command:
             cmd = ["/bin/sh", "-c", " ".join(cmd)]
         # Open streams
         stderr = self._get_stream(job, context, self.stderr, sys.stderr)
         stdin = self._get_stream(job,
                                  context,
                                  self.stdin,
                                  sys.stdin,
                                  is_input=True)
         stdout = self._get_stream(job, context, self.stdout, sys.stderr)
         # Execute command
         logger.info(
             'Executing job {job} into directory {outdir}: \n{command}'.
             format(job=job.name,
                    outdir=job.output_directory,
                    command=' \\\n\t'.join(cmd)))
         proc = await asyncio.create_subprocess_exec(
             *cmd,
             cwd=job.output_directory,
             env=parsed_env,
             stdin=stdin,
             stdout=stdout,
             stderr=stderr)
         result, error = await asyncio.wait_for(proc.communicate(),
                                                self._get_timeout(job))
         exit_code = proc.returncode
         # Close streams
         if stdin is not sys.stdin:
             stdin.close()
         if stdout is not sys.stderr:
             stdout.close()
         if stderr is not sys.stderr:
             stderr.close()
     else:
         connector = self.step.get_connector()
         resources = job.get_resources()
         logger.info(
             'Executing job {job} on resource {resource} into directory {outdir}:\n{command}'
             .format(
                 job=job.name,
                 resource=resources[0] if resources else None,
                 outdir=job.output_directory,
                 command=' \\\n\t'.join([
                     "/bin/sh", "-c", "\"{cmd}\"".format(cmd=" ".join(cmd))
                 ] if self.is_shell_command else cmd)))
         if self.is_shell_command:
             cmd = [
                 "/bin/sh", "-c",
                 "\"$(echo {command} | base64 -d)\"".format(
                     command=base64.b64encode(" ".join(cmd).encode(
                         'utf-8')).decode('utf-8'))
             ]
         # If step is assigned to multiple resources, add the STREAMFLOW_HOSTS environment variable
         if len(resources) > 1:
             available_resources = await connector.get_available_resources(
                 self.step.target.service)
             hosts = {
                 k: v.hostname
                 for k, v in available_resources.items() if k in resources
             }
             parsed_env['STREAMFLOW_HOSTS'] = ','.join(hosts.values())
         # Process streams
         stdin = eval_expression(expression=self.stdin,
                                 context=context,
                                 full_js=self.full_js,
                                 expression_lib=self.expression_lib)
         stdout = eval_expression(expression=self.stdout,
                                  context=context,
                                  full_js=self.full_js,
                                  expression_lib=self.expression_lib
                                  ) if self.stdout is not None else STDOUT
         stderr = eval_expression(expression=self.stderr,
                                  context=context,
                                  full_js=self.full_js,
                                  expression_lib=self.expression_lib
                                  ) if self.stderr is not None else stdout
         # Execute remote command
         result, exit_code = await asyncio.wait_for(
             connector.run(resources[0] if resources else None,
                           cmd,
                           environment=parsed_env,
                           workdir=job.output_directory,
                           stdin=stdin,
                           stdout=stdout,
                           stderr=stderr,
                           capture_output=True,
                           job_name=job.name), self._get_timeout(job))
     # Handle exit codes
     if self.failure_codes is not None and exit_code in self.failure_codes:
         status = Status.FAILED
     elif (self.success_codes is not None
           and exit_code in self.success_codes) or exit_code == 0:
         status = Status.COMPLETED
         if result:
             logger.info(result)
     else:
         status = Status.FAILED
     return CWLCommandOutput(value=result,
                             status=status,
                             exit_code=exit_code)
Exemplo n.º 14
0
 async def _prepare_work_dir(self,
                             job: Job,
                             context: MutableMapping[Text, Any],
                             element: Any,
                             dest_path: Optional[Text] = None,
                             writable: bool = False) -> None:
     path_processor = get_path_processor(job.step)
     connector = job.step.get_connector()
     resources = job.get_resources() or [None]
     # If current element is a string, it must be an expression
     if isinstance(element, Text):
         listing = eval_expression(expression=element,
                                   context=context,
                                   full_js=self.full_js,
                                   expression_lib=self.expression_lib)
     else:
         listing = element
     # If listing is a list, each of its elements must be processed independently
     if isinstance(listing, MutableSequence):
         await asyncio.gather(*[
             asyncio.create_task(
                 self._prepare_work_dir(job, context, el, dest_path,
                                        writable)) for el in listing
         ])
     # If listing is a dictionary, it could be a File, a Directory, a Dirent or some other object
     elif isinstance(listing, MutableMapping):
         # If it is a File or Directory element, put the correspnding file in the output directory
         if 'class' in listing and listing['class'] in [
                 'File', 'Directory'
         ]:
             src_path = utils.get_path_from_token(listing)
             src_found = False
             if src_path is not None:
                 if dest_path is None:
                     if src_path.startswith(job.input_directory):
                         relpath = path_processor.relpath(
                             src_path, job.input_directory)
                         dest_path = path_processor.join(
                             job.output_directory, relpath)
                     else:
                         basename = path_processor.basename(src_path)
                         dest_path = path_processor.join(
                             job.output_directory, basename)
                 for resource in resources:
                     if await remotepath.exists(connector, resource,
                                                src_path):
                         await self.step.context.data_manager.transfer_data(
                             src=src_path,
                             src_job=job,
                             dst=dest_path,
                             dst_job=job,
                             writable=writable)
                         src_found = True
                         break
             # If the source path does not exist, create a File or a Directory in the remote path
             if not src_found:
                 if dest_path is None:
                     dest_path = job.output_directory
                 if src_path is not None:
                     dest_path = path_processor.join(
                         dest_path, path_processor.basename(src_path))
                 if listing['class'] == 'Directory':
                     await remotepath.mkdir(connector, resources, dest_path)
                 else:
                     await self._write_remote_file(
                         job=job,
                         content=listing['contents']
                         if 'contents' in listing else '',
                         dest_path=dest_path,
                         writable=writable)
             # If `listing` is present, recursively process folder contents
             if 'listing' in listing:
                 if 'basename' in listing:
                     dest_path = path_processor.join(
                         dest_path, listing['basename'])
                     await remotepath.mkdir(connector, resources, dest_path)
                 await asyncio.gather(*[
                     asyncio.create_task(
                         self._prepare_work_dir(job, context, element,
                                                dest_path, writable))
                     for element in listing['listing']
                 ])
         # If it is a Dirent element, put or create the corresponding file according to the entryname field
         elif 'entry' in listing:
             entry = eval_expression(expression=listing['entry'],
                                     context=context,
                                     full_js=self.full_js,
                                     expression_lib=self.expression_lib,
                                     strip_whitespace=False)
             if 'entryname' in listing:
                 dest_path = eval_expression(
                     expression=listing['entryname'],
                     context=context,
                     full_js=self.full_js,
                     expression_lib=self.expression_lib)
                 if not path_processor.isabs(dest_path):
                     dest_path = path_processor.join(
                         job.output_directory, dest_path)
             writable = listing[
                 'writable'] if 'writable' in listing else False
             # If entry is a string, a new text file must be created with the string as the file contents
             if isinstance(entry, Text):
                 await self._write_remote_file(job, entry, dest_path,
                                               writable)
             # If entry is a list
             elif isinstance(entry, MutableSequence):
                 # If all elements are Files or Directories, each of them must be processed independently
                 if all('class' in t
                        and t['class'] in ['File', 'Directory']
                        for t in entry):
                     await self._prepare_work_dir(job, context, entry,
                                                  dest_path, writable)
                 # Otherwise, the content should be serialised to JSON
                 else:
                     await self._write_remote_file(job, json.dumps(entry),
                                                   dest_path, writable)
             # If entry is a dict
             elif isinstance(entry, MutableMapping):
                 # If it is a File or Directory, it must be put in the destination path
                 if 'class' in entry and entry['class'] in [
                         'File', 'Directory'
                 ]:
                     await self._prepare_work_dir(job, context, entry,
                                                  dest_path, writable)
                 # Otherwise, the content should be serialised to JSON
                 else:
                     await self._write_remote_file(job, json.dumps(entry),
                                                   dest_path, writable)
             # Every object different from a string should be serialised to JSON
             else:
                 await self._write_remote_file(job, json.dumps(entry),
                                               dest_path, writable)
Exemplo n.º 15
0
 async def _build_token_value(
         self,
         job: Job,
         token_value: Any,
         load_contents: Optional[bool] = None,
         load_listing: Optional[LoadListing] = None) -> Any:
     if load_contents is None:
         load_contents = self.load_contents
     if token_value is None:
         return self.default_value
     elif isinstance(token_value, MutableSequence):
         value_tasks = []
         for t in token_value:
             value_tasks.append(
                 asyncio.create_task(
                     self._build_token_value(job, t, load_listing)))
         return await asyncio.gather(*value_tasks)
     elif (isinstance(token_value, MutableMapping) and token_value.get(
             'class', token_value.get('type')) in ['File', 'Directory']):
         step = job.step if job is not None else self.port.step
         # Get filepath
         filepath = get_path_from_token(token_value)
         if filepath is not None:
             # Process secondary files in token value
             sf_map = {}
             if 'secondaryFiles' in token_value:
                 sf_tasks = []
                 for sf in token_value.get('secondaryFiles', []):
                     sf_path = get_path_from_token(sf)
                     path_processor = get_path_processor(step)
                     if not path_processor.isabs(sf_path):
                         path_processor.join(
                             path_processor.dirname(filepath), sf_path)
                     sf_tasks.append(
                         asyncio.create_task(
                             _get_file_token(step=step,
                                             job=job,
                                             token_class=sf['class'],
                                             filepath=sf_path,
                                             basename=sf.get('basename'),
                                             load_contents=load_contents,
                                             load_listing=load_listing
                                             or self.load_listing)))
                 sf_map = {
                     get_path_from_token(sf): sf
                     for sf in await asyncio.gather(*sf_tasks)
                 }
             # Compute the new token value
             token_value = await _get_file_token(
                 step=step,
                 job=job,
                 token_class=token_value.get('class',
                                             token_value.get('type')),
                 filepath=filepath,
                 basename=token_value.get('basename'),
                 load_contents=load_contents,
                 load_listing=load_listing or self.load_listing)
             # Compute new secondary files from port specification
             if self.secondary_files:
                 context = utils.build_context(job)
                 context['self'] = token_value
                 sf_tasks, sf_specs = [], []
                 for secondary_file in self.secondary_files:
                     # If pattern is an expression, evaluate it and process result
                     if '$(' in secondary_file.pattern or '${' in secondary_file.pattern:
                         sf_value = utils.eval_expression(
                             expression=secondary_file.pattern,
                             context=context,
                             full_js=self.full_js,
                             expression_lib=self.expression_lib)
                         if isinstance(sf_value, MutableSequence):
                             for sf in sf_value:
                                 sf_tasks.append(
                                     asyncio.create_task(
                                         self._process_secondary_file(
                                             job=job,
                                             secondary_file=sf,
                                             token_value=token_value,
                                             from_expression=True,
                                             existing_sf=sf_map,
                                             load_contents=load_contents,
                                             load_listing=load_listing
                                             or self.load_listing)))
                                 sf_specs.append(secondary_file)
                         else:
                             sf_tasks.append(
                                 asyncio.create_task(
                                     self._process_secondary_file(
                                         job=job,
                                         secondary_file=sf_value,
                                         token_value=token_value,
                                         from_expression=True,
                                         existing_sf=sf_map,
                                         load_contents=load_contents,
                                         load_listing=load_listing
                                         or self.load_listing)))
                             sf_specs.append(secondary_file)
                     # Otherwise, simply process the pattern string
                     else:
                         sf_tasks.append(
                             asyncio.create_task(
                                 self._process_secondary_file(
                                     job=job,
                                     secondary_file=secondary_file.pattern,
                                     token_value=token_value,
                                     from_expression=False,
                                     existing_sf=sf_map,
                                     load_contents=load_contents,
                                     load_listing=load_listing
                                     or self.load_listing)))
                         sf_specs.append(secondary_file)
                 for sf_value, sf_spec in zip(
                         await asyncio.gather(*sf_tasks), sf_specs):
                     if sf_value is not None:
                         sf_map[get_path_from_token(sf_value)] = sf_value
                     elif sf_spec.required:
                         raise WorkflowExecutionException(
                             "Required secondary file {sf} not found".
                             format(sf=sf_spec.pattern))
             # Add all secondary files to the token
             if sf_map:
                 token_value['secondaryFiles'] = list(sf_map.values())
         # If there is only a 'contents' field, create a file on the step's resource and build the token
         elif 'contents' in token_value:
             path_processor = get_path_processor(self.port.step)
             filepath = path_processor.join(
                 job.output_directory,
                 token_value.get('basename', random_name()))
             connector = job.step.get_connector()
             resources = job.get_resources() or [None
                                                 ] if job is not None else [
                                                     None
                                                 ]
             await asyncio.gather(*[
                 asyncio.create_task(
                     remotepath.write(connector, res, filepath,
                                      token_value['contents']))
                 for res in resources
             ])
             token_value = await _get_file_token(
                 step=step,
                 job=job,
                 token_class=token_value.get('class',
                                             token_value.get('type')),
                 filepath=filepath,
                 basename=token_value.get('basename'),
                 load_contents=load_contents,
                 load_listing=load_listing or self.load_listing)
     return token_value
Exemplo n.º 16
0
 async def _get_value_from_command(self, job: Job,
                                   command_output: CWLCommandOutput):
     context = utils.build_context(job)
     path_processor = get_path_processor(self.port.step)
     connector = job.step.get_connector()
     resources = job.get_resources() or [None]
     token_value = command_output.value if command_output.value is not None else self.default_value
     # Check if file `cwl.output.json` exists either locally on at least one resource
     cwl_output_path = path_processor.join(job.output_directory,
                                           'cwl.output.json')
     for resource in resources:
         if await remotepath.exists(connector, resource, cwl_output_path):
             # If file exists, use its contents as token value
             token_value = json.loads(await remotepath.read(
                 connector, resource, cwl_output_path))
             break
     # If `token_value` is a dictionary, directly extract the token value from it
     if isinstance(token_value,
                   MutableMapping) and self.port.name in token_value:
         token = token_value[self.port.name]
         return await self._build_token_value(job, token)
     # Otherwise, generate the output object as described in `outputs` field
     if self.glob is not None:
         # Adjust glob path
         if '$(' in self.glob or '${' in self.glob:
             globpath = utils.eval_expression(
                 expression=self.glob,
                 context=context,
                 full_js=self.full_js,
                 expression_lib=self.expression_lib)
         else:
             globpath = self.glob
         # Resolve glob
         resolve_tasks = []
         for resource in resources:
             if isinstance(globpath, MutableSequence):
                 for path in globpath:
                     if not path_processor.isabs(path):
                         path = path_processor.join(job.output_directory,
                                                    path)
                     resolve_tasks.append(
                         _expand_glob(connector, resource, path))
             else:
                 if not path_processor.isabs(globpath):
                     globpath = path_processor.join(job.output_directory,
                                                    globpath)
                 resolve_tasks.append(
                     _expand_glob(connector, resource, globpath))
         paths = flatten_list(await asyncio.gather(*resolve_tasks))
         # Cannot glob outside the job output folder
         for path in paths:
             if not path.startswith(job.output_directory):
                 raise WorkflowDefinitionException(
                     "Globs outside the job's output folder are not allowed"
                 )
         # Get token class from paths
         class_tasks = [
             asyncio.create_task(_get_class_from_path(p, job))
             for p in paths
         ]
         paths = [{
             'path': p,
             'class': c
         } for p, c in zip(paths, await asyncio.gather(*class_tasks))]
         # If evaluation is not needed, simply return paths as token value
         if self.output_eval is None:
             token_list = await self._build_token_value(job, paths)
             return token_list if len(
                 token_list) > 1 else token_list[0] if len(
                     token_list) == 1 else None
         # Otherwise, fill context['self'] with glob data and proceed
         else:
             context['self'] = await self._build_token_value(job, paths)
     if self.output_eval is not None:
         # Fill context with exit code
         context['runtime']['exitCode'] = command_output.exit_code
         # Evaluate output
         token = utils.eval_expression(expression=self.output_eval,
                                       context=context,
                                       full_js=self.full_js,
                                       expression_lib=self.expression_lib)
         # Build token
         if isinstance(token, MutableSequence):
             paths = [{
                 'path': el['path'],
                 'class': el['class']
             } for el in token]
             return await self._build_token_value(job, paths)
         else:
             return await self._build_token_value(job, token)
     # As the default value (no return path is met in previous code), simply process the command output
     return await self._build_token_value(job, token_value)
Exemplo n.º 17
0
 async def _update_file_token(
         self,
         job: Job,
         src_job: Job,
         token_value: Any,
         load_listing: Optional[LoadListing] = None,
         writable: Optional[bool] = None) -> MutableMapping[Text, Any]:
     path_processor = get_path_processor(
         src_job.step) if src_job is not None else os.path
     if 'location' not in token_value and 'path' in token_value:
         token_value['location'] = token_value['path']
     if 'location' in token_value and token_value['location'] is not None:
         location = token_value['location']
         # Manage remote files
         scheme = urllib.parse.urlsplit(location).scheme
         if scheme in ['http', 'https']:
             location = await _download_file(job, location)
         elif scheme == 'file':
             location = location[7:]
         # If basename is explicitly stated in the token, use it as destination path
         dest_path = None
         if 'basename' in token_value:
             path_processor = get_path_processor(self.port.step)
             dest_path = path_processor.join(job.input_directory,
                                             token_value['basename'])
         # Check if source file exists
         src_connector = src_job.step.get_connector(
         ) if src_job is not None else None
         src_resources = src_job.get_resources() or [
             None
         ] if src_job is not None else [None]
         src_found = False
         for src_resource in src_resources:
             if await remotepath.exists(src_connector, src_resource,
                                        location):
                 src_found = True
                 break
         # If source_path exists, ransfer file in task's input folder
         if src_found:
             filepath = await self._transfer_file(src_job=src_job,
                                                  dest_job=job,
                                                  src_path=location,
                                                  dest_path=dest_path,
                                                  writable=writable)
         # Otherwise, keep the current path
         else:
             filepath = location
         new_token_value = {'class': token_value['class'], 'path': filepath}
         # If token contains secondary files, transfer them, too
         if 'secondaryFiles' in token_value:
             sf_tasks = []
             for sf in token_value['secondaryFiles']:
                 path = get_path_from_token(sf)
                 # If basename is explicitly stated in the token, use it as destination path
                 dest_path = None
                 if 'basename' in sf:
                     path_processor = get_path_processor(self.port.step)
                     dest_path = path_processor.join(
                         job.input_directory, sf['basename'])
                 sf_tasks.append(
                     asyncio.create_task(
                         self._transfer_file(src_job=src_job,
                                             dest_job=job,
                                             src_path=path,
                                             dest_path=dest_path)))
             sf_paths = await asyncio.gather(*sf_tasks)
             new_token_value['secondaryFiles'] = [{
                 'class': sf['class'],
                 'path': sf_path
             } for sf, sf_path in zip(token_value['secondaryFiles'],
                                      sf_paths)]
         # Build token
         token_value = await self._build_token_value(
             job=job,
             token_value=new_token_value,
             load_contents=self.load_contents or 'contents' in token_value,
             load_listing=load_listing)
         return token_value
     # If there is only a 'contents' field, simply build the token value
     elif 'contents' in token_value:
         return await self._build_token_value(job, token_value,
                                              load_listing)
     # If there is only a 'listing' field, transfer all the listed files to the remote resource
     elif 'listing' in token_value:
         # Compute destination path
         dest_path = get_path_from_token(token_value)
         if dest_path is None and 'basename' in token_value:
             dest_path = path_processor.join(job.input_directory,
                                             token_value['basename'])
         # Copy each element of the listing into the destination folder
         tasks = []
         classes = []
         for element in cast(List, token_value['listing']):
             # Compute destination path
             if dest_path is not None:
                 basename = path_processor.basename(element['path'])
                 current_dest_path = path_processor.join(
                     dest_path, basename)
             else:
                 current_dest_path = None
             # Transfer element to the remote resource
             tasks.append(
                 asyncio.create_task(
                     self._transfer_file(src_job=src_job,
                                         dest_job=job,
                                         src_path=element['path'],
                                         dest_path=current_dest_path,
                                         writable=writable)))
             classes.append(element['class'])
         dest_paths = await asyncio.gather(*tasks)
         # Compute listing on remote resource
         listing_tasks = []
         for token_class, path in zip(classes, dest_paths):
             listing_tasks.append(
                 asyncio.create_task(
                     _get_file_token(step=self.port.step,
                                     job=job,
                                     token_class=token_class,
                                     filepath=path)))
         token_value['listing'] = await asyncio.gather(*listing_tasks)
     return token_value