Пример #1
0
    def from_dict(self, doc, identifier=None, base_dir=None, validate=True):
        """Create an instance of the benchmark template from a dictionary
        serialization. Expects a dictionary that contains the three top-level
        elements of the template handle plus the 'result' schema.

        Parameters
        ----------
        dict: dict
            Dictionary serialization of a workflow template
        identifier: string, optional
            Unique template identifier. This value will override the value in
            the document.
        base_dir: string, optional
            Optional path to directory on disk that contains static files that
            are required to run the represented workflow. This value will
            override the value in the document.
        validate: bool, optional
            Flag indicating if given template parameter declarations are to be
            validated against the parameter schema or not.

        Returns
        -------
        benchtmpl.workflow.benchmark.base.BenchmarkTemplate

        Raises
        ------
        benchtmpl.error.InvalidTemplateError
        benchtmpl.error.UnknownParameterError
        """
        # Ensure that the mandatory elements are present
        if not LABEL_RESULTS in doc:
            raise err.InvalidTemplateError(
                'missing element \'{}\''.format(LABEL_RESULTS))
        # Get handle for workflow template from super class
        template = super(BenchmarkTemplateLoader,
                         self).from_dict(doc=doc,
                                         identifier=identifier,
                                         base_dir=base_dir,
                                         validate=validate)
        # Get schema object from serialization
        try:
            schema = BenchmarkResultSchema.from_dict(doc[LABEL_RESULTS])
        except ValueError as ex:
            raise err.InvalidTemplateError(str(ex))
        return BenchmarkTemplate(identifier=template.identifier,
                                 base_dir=template.base_dir,
                                 workflow_spec=template.workflow_spec,
                                 parameters=template.parameters.values(),
                                 schema=schema)
Пример #2
0
    def from_dict(self, doc, identifier=None, base_dir=None, validate=True):
        """Create an instance of the template handle for a dictionary
        serialization. The expected three top-level elements of the dictionary
        are 'workflow', 'id', and 'parameters'. The last two elements are
        optional.

        Parameters
        ----------
        dict: dict
            Dictionary serialization of a workflow template
        identifier: string, optional
            Unique template identifier. This value will override the value in
            the document.
        base_dir: string, optional
            Optional path to directory on disk that contains static files that
            are required to run the represented workflow. This value will
            override the value in the document.
        validate: bool, optional
            Flag indicating if given template parameter declarations are to be
            validated against the parameter schema or not.

        Returns
        -------
        benchtmpl.workflow.template.base.TemplateHandle

        Raises
        ------
        benchtmpl.error.InvalidTemplateError
        benchtmpl.error.UnknownParameterError
        """
        # Ensure that the mandatory elements are present
        if not LABEL_WORKFLOW in doc:
            raise err.InvalidTemplateError(
                'missing element \'{}\''.format(LABEL_WORKFLOW))
        workflow_spec = doc[LABEL_WORKFLOW]
        # Add given parameter declarations to the parameter list. Ensure that
        # all default values are set
        parameters = dict()
        if LABEL_PARAMETERS in doc:
            parameters = putil.create_parameter_index(doc[LABEL_PARAMETERS],
                                                      validate=validate)
        # Ensure that the workflow specification does not reference undefined
        # parameters if validate flag is True.
        if validate:
            for key in tmpl.get_parameter_references(workflow_spec):
                if not key in parameters:
                    raise err.UnknownParameterError(key)
        # Get identifier if present in document
        if LABEL_ID in doc:
            identifier = doc[LABEL_ID]
        return tmpl.TemplateHandle(identifier=identifier,
                                   base_dir=base_dir,
                                   workflow_spec=workflow_spec,
                                   parameters=list(parameters.values()))
Пример #3
0
def create_parameter_index(parameters, validate=True):
    """Create instances of template parameters from a list of dictionaries
    containing parameter declarations. The result is a dictionary containing the
    top-level parameters, indexed by their unique identifier.

    Parameters
    ----------
    parameters: list(dict)
        List of dictionaries containing template parameter declarations
    validate: bool, optional
        Flag indicating if given template parameter declarations are to be
        validated against the parameter schema or not.

    Returns
    -------
    dict(benchtmpl.workflow.parameter.base.TemplateParameter)

    Raises
    ------
    benchtmpl.error.InvalidTemplateError
    benchtmpl.error.UnknownParameterError
    """
    result = dict()
    for para in parameters:
        # Validate the template parameters if the validate flag is True
        if validate:
            pd.validate_parameter(para)
        # Create a TemplateParameter instance for the parameter. Keep
        # track of children for parameter that are of type DT_LIST or
        # DT_RECORD. Children are added after all parameters have been
        # instantiated.
        p_id = para[pd.LABEL_ID]
        # Ensure that the identifier of all parameters are unique
        if p_id in result:
            raise err.InvalidTemplateError('parameter \'{}\' not unique'.format(p_id))
        c = None
        if para[pd.LABEL_DATATYPE] in [pd.DT_LIST, pd.DT_RECORD]:
            c = list()
        tp = TemplateParameter(pd.set_defaults(para), children=c)
        result[p_id] = tp
    # Add parameter templates to the list of children for their
    # respective parent (if given). We currently only support one level
    # of nesting.
    for para in parameters:
        if pd.LABEL_PARENT in para:
            p_id = para[pd.LABEL_ID]
            parent = para[pd.LABEL_PARENT]
            if not parent is None:
                result[parent].add_child(result[p_id])
    return result
Пример #4
0
def get_parameter_references(spec, parameters=None):
    """Get set of parameter identifier that are referenced in the given
    workflow specification. Adds parameter identifier to the given parameter
    set.

    Parameters
    ----------
    spec: dict
        Parameterized workflow specification
    parameters: set, optional
        Result set of parameter identifier

    Returns
    -------
    set

    Raises
    ------
    benchtmpl.error.InvalidTemplateError
    """
    # The new object will contain the modified workflow specification
    if parameters is None:
        parameters = set()
    for key in spec:
        val = spec[key]
        if isinstance(val, str):
            # If the value is of type string we test whether the string is a
            # reference to a template parameter
            if is_parameter(val):
                # Extract variable name.
                parameters.add(get_parameter_name(val))
        elif isinstance(val, dict):
            # Recursive call to get_parameter_references
            get_parameter_references(val, parameters=parameters)
        elif isinstance(val, list):
            for list_val in val:
                if isinstance(list_val, str):
                    # Get potential references to template parameters in
                    # list elements of type string.
                    if is_parameter(list_val):
                        # Extract variable name.
                        parameters.add(get_parameter_name(list_val))
                elif isinstance(list_val, dict):
                    # Recursive replace for dictionaries
                    get_parameter_references(list_val, parameters=parameters)
                elif isinstance(list_val, list):
                    # We currently do not support lists of lists
                    raise err.InvalidTemplateError(
                        'nested lists not supported')
    return parameters
Пример #5
0
def replace_args(spec, arguments, parameters):
    """Replace template parameter references in the workflow specification
    with their respective values in the argument dictionary or their
    defined default value. The type of the result is depending on the type
    of the spec object

    Returns a modified dictionary.

    Parameters
    ----------
    spec: any
        Parameterized workflow specification
    arguments: dict(benchtmpl.workflow.parameter.value.TemplateArgument)
        Dictionary that associates template parameter identifiers with
        argument values
    parameters: dict(benchtmpl.workflow.parameter.base.TemplateParameter)
        Dictionary of parameter declarations

    Returns
    -------
    type(spec)

    Raises
    ------
    benchtmpl.error.InvalidTemplateError
    """
    if isinstance(spec, dict):
        # The new object will contain the modified workflow specification
        obj = dict()
        for key in spec:
            obj[key] = replace_args(spec[key], arguments, parameters)
    elif isinstance(spec, list):
        obj = list()
        for val in spec:
            if isinstance(val, list):
                # We currently do not support lists of lists
                raise err.InvalidTemplateError('nested lists not supported')
            obj.append(replace_args(val, arguments, parameters))
    elif isinstance(spec, str) or isinstance(spec, basestring):
        obj = replace_value(spec, arguments, parameters)
    else:
        obj = spec
    return obj
Пример #6
0
def get_commands(template, arguments):
    """Get expanded commands from template workflow specification. In this
    simple implementations the commands within each step of the workflow
    specification are expanded for the given set of arguments and appended to
    the result list of commands.

    Parameters
    ----------
    template: benchtmpl.workflow.template.base.TemplateHandle
        Workflow template containing the parameterized specification and the
        parameter declarations
    arguments: dict(benchtmpl.workflow.parameter.value.TemplateArgument)
        Dictionary of argument values for parameters in the template

    Returns
    -------
    list(string)

    Raises
    ------
    benchtmpl.error.InvalidTemplateError
    """
    spec = template.workflow_spec
    # Get the input/parameters dictionary from the workflow specification and
    # replace all references to template parameters with the given arguments
    # or default values
    workflow_parameters = tmpl.replace_args(spec=spec.get('inputs', {}).get(
        'parameters', {}),
                                            arguments=arguments,
                                            parameters=template.parameters)
    # Add all command stings in workflow steps to result after replacing
    # references to parameters
    result = list()
    steps = spec.get('workflow', {}).get('specification', {}).get('steps', [])
    for step in steps:
        for command in step.get('commands', []):
            try:
                result.append(
                    Template(command).substitute(workflow_parameters))
            except KeyError as ex:
                raise err.InvalidTemplateError(str(ex))
    return result
Пример #7
0
    def __init__(self,
                 workflow_spec,
                 identifier=None,
                 base_dir=None,
                 parameters=None):
        """Initialize the components of the workflow template. A ValueError is
        raised if the identifier of template parameters are not unique.

        Parameters
        ----------
        workflow_spec: dict
            Workflow specification object
        identifier: string, optional
            Unique template identifier. If no value is given a UUID will be
            assigned.
        base_dir: string, optional
            Optional path to directory on disk that contains static files that
            are required to run the represented workflow
        parameters: list(benchtmpl.workflow.parameter.base.TemplateParameter), optional
            List of workflow template parameter declarations

        Raises
        ------
        benchtmpl.error.InvalidTemplateError
        """
        self.workflow_spec = workflow_spec
        if not identifier is None:
            self.identifier = identifier
        else:
            self.identifier = util.get_unique_identifier()
        self.base_dir = base_dir
        # Add given parameter declarations to the parameter index.
        self.parameters = dict()
        if not parameters is None:
            for para in parameters:
                # Ensure that the identifier of all parameters are unique
                if para.identifier in self.parameters:
                    raise err.InvalidTemplateError(
                        'parameter \'{}\' not unique'.format(para.identifier))
                self.parameters[para.identifier] = para
Пример #8
0
    def add_template(self,
                     src_dir=None,
                     src_repo_url=None,
                     template_spec_file=None):
        """Create file and folder structure for a new workflow template. Assumes
        that either a workflow source directory or the Url of a remote Git
        repository is given.

        Creates a new folder with unique name in the base directory of the
        template store. The created folder will contain a copy of the source
        folder or the git repository.

        The source folder is expected to contain the template specification
        file. If the template_spec file is not given the method will look for a
        file using the entries in the file name and file suffix lists.

        If no template file is found in the source folder a ValueError is
        raised. The contents of the source directory will be copied to the
        new template directory (as subfolder named 'workflow'). The template
        directory may also contain other subfolders, e.g., subfolders for
        individual runs containing downloaded result files.

        Parameters
        ----------
        src_dir: string, optional
            Directory containing the workflow components, i.e., the fixed
            files and the template specification (optional).
        src_repo_url: string, optional
            Git repository that contains the the workflow components
        template_spec_file: string, optional
            Path to the workflow template specification file (absolute or
            relative to the workflow directory)

        Returns
        -------
        benchtmpl.workflow.template.TemplateHandle

        Raises
        ------
        benchtmpl.error.InvalidParameterError
        benchtmpl.error.InvalidTemplateError
        ValueError
        """
        # Exactly one of src_dir and src_repo_url has to be not None. If both
        # are None (or not None) a ValueError is raised.
        if src_dir is None and src_repo_url is None:
            raise ValueError(
                'both \'src_dir\' and \'src_repo_url\' are missing')
        elif not src_dir is None and not src_repo_url is None:
            raise ValueError(
                'cannot have both \'src_dir\' and \'src_repo_url\'')
        # Create a new unique folder for the template resources
        identifier = None
        template_dir = None
        attempt = 0
        while identifier is None or template_dir is None:
            identifier = self.id_func()
            template_dir = os.path.join(self.base_dir, identifier)
            if os.path.isdir(template_dir):
                identifier = None
                template_dir = None
                attempt += 1
                if attempt > self.max_attempts:
                    raise RuntimeError('could not create unique directory')
        # Create the new folder (this should be unique now)
        os.makedirs(template_dir)
        try:
            # Copy either the given workflow directory into the created template
            # folder or clone the Git repository.
            static_dir = os.path.join(template_dir, STATIC_FILES_DIR)
            if not src_dir is None:
                shutil.copytree(src=src_dir, dst=static_dir)
            else:
                git.Repo.clone_from(src_repo_url, static_dir)
            # Find template specification file in the template workflow folder.
            # If the file is not found the template directory is removed and a
            # ValueError is raised.
            candidates = list()
            for name in self.filenames:
                for suffix in self.suffixes:
                    candidates.append(os.path.join(static_dir, name + suffix))
            if not template_spec_file is None:
                candidates = [template_spec_file] + candidates
            for filename in candidates:
                if os.path.isfile(filename):
                    # Read template from file. If no error occurs the folder
                    # contains a valid template.
                    template = self.loader.load(filename=filename,
                                                identifier=identifier,
                                                base_dir=static_dir,
                                                validate=True)
                    # Store serialized template handle on disk
                    self.loader.write(template=template,
                                      filename=os.path.join(
                                          template_dir, TEMPLATE_FILE))
                    return template
        except (IOError, OSError, ValueError, err.TemplateError) as ex:
            # Make sure to cleanup by removing the created template folder
            shutil.rmtree(template_dir)
            raise err.InvalidTemplateError(str(ex))
        # No template file found. Cleanup and raise error
        shutil.rmtree(template_dir)
        raise err.InvalidTemplateError('no template file found')
Пример #9
0
def upload_files(template, files, arguments, loader):
    """Upload all references to local files in a given list of file names of
    parameter references. THe list of files, for example corresponds to the
    entries in the 'inputs.files' section of a REANA workflow specification.

    Uses a loader function to allow use of this method in cases where the
    workflow is executed locally or remote using a REANA cluster instance.

    Raises errors if (i) an unknown parameter is referenced or (ii) if the type
    of a referenced parameter in the input files section is not of type file.

    Parameters
    ----------
    template: benchtmpl.workflow.template.base.TemplateHandle
        Workflow template containing the parameterized specification and the
        parameter declarations
    files: list(string)
        List of file references
    arguments: dict(benchtmpl.workflow.parameter.value.TemplateArgument)
        Dictionary of argument values for parameters in the template
    loader: func
        File (up)load function that takes a filepath as the first argument and
        a (remote) target path as the second argument

    Raises
    ------
    benchtmpl.error.InvalidTemplateError
    benchtmpl.error.MissingArgumentError
    benchtmpl.error.UnknownParameterError
    """
    for val in files:
        # Set source and target values depending on whether the list
        # entry references a template parameter or not
        if tmpl.is_parameter(val):
            var = tmpl.get_parameter_name(val)
            # Raise error if the type of the referenced parameter is
            # not file
            para = template.get_parameter(var)
            if not para.is_file():
                raise err.InvalidTemplateError('expected file parameter for \'{}\''.format(var))
            arg = arguments.get(var)
            if arg is None:
                if para.default_value is None:
                    raise err.MissingArgumentError(var)
                else:
                    # Set argument to file handle using the default value
                    # (assuming that the default points to a file in the
                    # template base directory).
                    if para.has_constant() and not para.as_input():
                        target_path = para.get_constant()
                    else:
                        target_path = para.default_value
                    arg = TemplateArgument(
                        parameter=para,
                        value=InputFile(
                            f_handle=FileHandle(
                                filepath=os.path.join(
                                    template.base_dir,
                                    para.default_value
                                )
                            ),
                            target_path=target_path
                        )
                    )
            # Get path to source file and the target path from the input
            # file handle
            source = arg.value.source()
            target = arg.value.target()
        else:
            source = os.path.join(template.base_dir, val)
            target = val
        # Upload source file
        loader(source, target)