Python IOFile примеры использования

Язык программирования: Python

Пространство имен/Пакет: snakemake.io

Класс/Тип: IOFile

Примеров на hotexamples.com: 13

Python IOFile - 13 примеров найдено. Это лучшие примеры Python кода для snakemake.io.IOFile, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

IOFile(29)

clone_flags(4)

clone_remote_object(2)

get_wildcard_names(2)

apply_wildcards(1)

check(1)

Пример #1

Показать файл

Файл: jobs.py Проект: tianyabeef/gutMicrobiome

 def shadowed_path(self, f):
     """ Get the shadowed path of IOFile f. """
     if not self.shadow_dir:
         return f
     f_ = IOFile(os.path.join(self.shadow_dir, f), self.rule)
     f_.clone_flags(f)
     return f_

Пример #2

Показать файл

Файл: jobs.py Проект: tianyabeef/gutMicrobiome

 def expanded_output(self):
     """ Iterate over output files while dynamic output is expanded. """
     for f, f_ in zip(self.output, self.rule.output):
         if f in self.dynamic_output:
             expansion = self.expand_dynamic(f_)
             if not expansion:
                 yield f_
             for f, _ in expansion:
                 file_to_yield = IOFile(f, self.rule)
                 file_to_yield.clone_flags(f_)
                 yield file_to_yield
         else:
             yield f

Пример #3

Показать файл

 def conda_env(self, conda_env):
     self._conda_env = IOFile(conda_env, rule=self)

Пример #4

Показать файл

Файл: rules.py Проект: tianyabeef/gutMicrobiome

    def dynamic_branch(self, wildcards, input=True):
        def get_io(rule):
            return (rule.input, rule.dynamic_input) if input else (
                rule.output, rule.dynamic_output
            )

        def partially_expand(f, wildcards):
            """Expand the wildcards in f from the ones present in wildcards

            This is done by replacing all wildcard delimiters by `{{` or `}}`
            that are not in `wildcards.keys()`.
            """
            # perform the partial expansion from f's string representation
            s = str(f).replace('{', '{{').replace('}', '}}')
            for key in wildcards.keys():
                s = s.replace('{{{{{}}}}}'.format(key),
                              '{{{}}}'.format(key))
            # build result
            anno_s = AnnotatedString(s)
            anno_s.flags = f.flags
            return IOFile(anno_s, f.rule)

        io, dynamic_io = get_io(self)

        branch = Rule(self)
        io_, dynamic_io_ = get_io(branch)

        expansion = defaultdict(list)
        for i, f in enumerate(io):
            if f in dynamic_io:
                f = partially_expand(f, wildcards)
                try:
                    for e in reversed(expand(f, zip, **wildcards)):
                        # need to clone the flags so intermediate
                        # dynamic remote file paths are expanded and
                        # removed appropriately
                        ioFile = IOFile(e, rule=branch)
                        ioFile.clone_flags(f)
                        expansion[i].append(ioFile)
                except KeyError:
                    return None

        # replace the dynamic files with the expanded files
        replacements = [(i, io[i], e)
                        for i, e in reversed(list(expansion.items()))]
        for i, old, exp in replacements:
            dynamic_io_.remove(old)
            io_.insert_items(i, exp)

        if not input:
            for i, old, exp in replacements:
                if old in branch.temp_output:
                    branch.temp_output.discard(old)
                    branch.temp_output.update(exp)
                if old in branch.protected_output:
                    branch.protected_output.discard(old)
                    branch.protected_output.update(exp)
                if old in branch.touch_output:
                    branch.touch_output.discard(old)
                    branch.touch_output.update(exp)

            branch.wildcard_names.clear()
            non_dynamic_wildcards = dict((name, values[0])
                                         for name, values in wildcards.items()
                                         if len(set(values)) == 1)
            # TODO have a look into how to concretize dependencies here
            (branch._input, branch._output, branch._params, branch._log,
             branch._benchmark, _, branch.dependencies
             ) = branch.expand_wildcards(wildcards=non_dynamic_wildcards)
            return branch, non_dynamic_wildcards
        return branch

Пример #5

Показать файл

 def benchmark(self):
     return IOFile(strip_wildcard_constraints(self.rule.benchmark), rule=self.rule)

Пример #6

Показать файл

    def dynamic_branch(self, wildcards, input=True):
        def get_io(rule):
            return (
                (rule.input, rule.dynamic_input)
                if input
                else (rule.output, rule.dynamic_output)
            )

        def partially_expand(f, wildcards):
            """Expand the wildcards in f from the ones present in wildcards

            This is done by replacing all wildcard delimiters by `{{` or `}}`
            that are not in `wildcards.keys()`.
            """
            # perform the partial expansion from f's string representation
            s = str(f).replace("{", "{{").replace("}", "}}")
            for key in wildcards.keys():
                s = s.replace("{{{{{}}}}}".format(key), "{{{}}}".format(key))
            # build result
            anno_s = AnnotatedString(s)
            anno_s.flags = f.flags
            return IOFile(anno_s, f.rule)

        io, dynamic_io = get_io(self)

        branch = Rule(self)
        io_, dynamic_io_ = get_io(branch)

        expansion = collections.defaultdict(list)
        for i, f in enumerate(io):
            if f in dynamic_io:
                f = partially_expand(f, wildcards)
                try:
                    for e in reversed(expand(str(f), zip, **wildcards)):
                        # need to clone the flags so intermediate
                        # dynamic remote file paths are expanded and
                        # removed appropriately
                        ioFile = IOFile(e, rule=branch)
                        ioFile.clone_flags(f)
                        expansion[i].append(ioFile)
                except KeyError:
                    return None

        # replace the dynamic files with the expanded files
        replacements = [(i, io[i], e) for i, e in reversed(list(expansion.items()))]
        for i, old, exp in replacements:
            dynamic_io_.remove(old)
            io_._insert_items(i, exp)

        if not input:
            for i, old, exp in replacements:
                if old in branch.temp_output:
                    branch.temp_output.discard(old)
                    branch.temp_output.update(exp)
                if old in branch.protected_output:
                    branch.protected_output.discard(old)
                    branch.protected_output.update(exp)
                if old in branch.touch_output:
                    branch.touch_output.discard(old)
                    branch.touch_output.update(exp)

            branch.wildcard_names.clear()
            non_dynamic_wildcards = dict(
                (name, values[0])
                for name, values in wildcards.items()
                if len(set(values)) == 1
            )
            # TODO have a look into how to concretize dependencies here
            branch._input, _, branch.dependencies = branch.expand_input(
                non_dynamic_wildcards
            )
            branch._output, _ = branch.expand_output(non_dynamic_wildcards)

            resources = branch.expand_resources(non_dynamic_wildcards, branch._input, 1)
            branch._params = branch.expand_params(
                non_dynamic_wildcards,
                branch._input,
                branch._output,
                resources,
                omit_callable=True,
            )
            branch.resources = dict(resources.items())

            branch._log = branch.expand_log(non_dynamic_wildcards)
            branch._benchmark = branch.expand_benchmark(non_dynamic_wildcards)
            branch._conda_env = branch.expand_conda_env(non_dynamic_wildcards)
            return branch, non_dynamic_wildcards
        return branch

Пример #7

Показать файл

    def _set_inoutput_item(self, item, output=False, name=None):
        """
        Set an item to be input or output.

        Arguments
        item     -- the item
        inoutput -- a Namedlist of either input or output items
        name     -- an optional name for the item
        """
        inoutput = self.output if output else self.input
        # Check to see if the item is a path, if so, just make it a string
        if isinstance(item, Path):
            item = str(item)
        if isinstance(item, str):
            item = self.apply_default_remote(item)

            # Check to see that all flags are valid
            # Note that "remote", "dynamic", and "expand" are valid for both inputs and outputs.
            if isinstance(item, AnnotatedString):
                for flag in item.flags:
                    if not output and flag in [
                            "protected",
                            "temp",
                            "temporary",
                            "directory",
                            "touch",
                            "pipe",
                    ]:
                        logger.warning(
                            "The flag '{}' used in rule {} is only valid for outputs, not inputs."
                            .format(flag, self))
                    if output and flag in ["ancient"]:
                        logger.warning(
                            "The flag '{}' used in rule {} is only valid for inputs, not outputs."
                            .format(flag, self))

            # add the rule to the dependencies
            if isinstance(item,
                          _IOFile) and item.rule and item in item.rule.output:
                self.dependencies[item] = item.rule
            if output:
                item = self._update_item_wildcard_constraints(item)
            else:
                if (contains_wildcard_constraints(item)
                        and self.workflow.mode != Mode.subprocess):
                    logger.warning(
                        "Wildcard constraints in inputs are ignored.")
            # record rule if this is an output file output
            _item = IOFile(item, rule=self)
            if is_flagged(item, "temp"):
                if output:
                    self.temp_output.add(_item)
            if is_flagged(item, "protected"):
                if output:
                    self.protected_output.add(_item)
            if is_flagged(item, "touch"):
                if output:
                    self.touch_output.add(_item)
            if is_flagged(item, "dynamic"):
                if output:
                    self.dynamic_output.add(_item)
                else:
                    self.dynamic_input.add(_item)
            if is_flagged(item, "report"):
                report_obj = item.flags["report"]
                if report_obj.caption is not None:
                    r = ReportObject(
                        os.path.join(self.workflow.current_basedir,
                                     report_obj.caption),
                        report_obj.category,
                    )
                    item.flags["report"] = r
            if is_flagged(item, "subworkflow"):
                if output:
                    raise SyntaxError(
                        "Only input files may refer to a subworkflow")
                else:
                    # record the workflow this item comes from
                    sub = item.flags["subworkflow"]
                    if _item in self.subworkflow_input:
                        other = self.subworkflow_input[_item]
                        if sub != other:
                            raise WorkflowError(
                                "The input file {} is ambiguously "
                                "associated with two subworkflows "
                                "{} and {}.".format(item, sub, other),
                                rule=self,
                            )
                    self.subworkflow_input[_item] = sub
            inoutput.append(_item)
            if name:
                inoutput._add_name(name)
        elif callable(item):
            if output:
                raise SyntaxError(
                    "Only input files can be specified as functions")
            inoutput.append(item)
            if name:
                inoutput._add_name(name)
        else:
            try:
                start = len(inoutput)
                for i in item:
                    self._set_inoutput_item(i, output=output)
                if name:
                    # if the list was named, make it accessible
                    inoutput._set_name(name, start, end=len(inoutput))
            except TypeError:
                raise SyntaxError(
                    "Input and output files have to be specified as strings or lists of strings."
                )

Пример #8

Показать файл

class Rule:
    def __init__(self, *args, lineno=None, snakefile=None, restart_times=0):
        """
        Create a rule

        Arguments
        name -- the name of the rule
        """
        if len(args) == 2:
            name, workflow = args
            self.name = name
            self.workflow = workflow
            self.docstring = None
            self.message = None
            self._input = InputFiles()
            self._output = OutputFiles()
            self._params = Params()
            self._wildcard_constraints = dict()
            self.dependencies = dict()
            self.dynamic_output = set()
            self.dynamic_input = set()
            self.temp_output = set()
            self.protected_output = set()
            self.touch_output = set()
            self.subworkflow_input = dict()
            self.shadow_depth = None
            self.resources = None
            self.priority = 0
            self._version = None
            self._log = Log()
            self._benchmark = None
            self._conda_env = None
            self._singularity_img = None
            self.env_modules = None
            self.group = None
            self._wildcard_names = None
            self.lineno = lineno
            self.snakefile = snakefile
            self.run_func = None
            self.shellcmd = None
            self.script = None
            self.notebook = None
            self.wrapper = None
            self.cwl = None
            self.norun = False
            self.is_branched = False
            self.is_checkpoint = False
            self.restart_times = 0
            self.basedir = None
        elif len(args) == 1:
            other = args[0]
            self.name = other.name
            self.workflow = other.workflow
            self.docstring = other.docstring
            self.message = other.message
            self._input = InputFiles(other._input)
            self._output = OutputFiles(other._output)
            self._params = Params(other._params)
            self._wildcard_constraints = dict(other._wildcard_constraints)
            self.dependencies = dict(other.dependencies)
            self.dynamic_output = set(other.dynamic_output)
            self.dynamic_input = set(other.dynamic_input)
            self.temp_output = set(other.temp_output)
            self.protected_output = set(other.protected_output)
            self.touch_output = set(other.touch_output)
            self.subworkflow_input = dict(other.subworkflow_input)
            self.shadow_depth = other.shadow_depth
            self.resources = other.resources
            self.priority = other.priority
            self.version = other.version
            self._log = other._log
            self._benchmark = other._benchmark
            self._conda_env = other._conda_env
            self._singularity_img = other._singularity_img
            self.env_modules = other.env_modules
            self.group = other.group
            self._wildcard_names = (set(other._wildcard_names)
                                    if other._wildcard_names is not None else
                                    None)
            self.lineno = other.lineno
            self.snakefile = other.snakefile
            self.run_func = other.run_func
            self.shellcmd = other.shellcmd
            self.script = other.script
            self.notebook = other.notebook
            self.wrapper = other.wrapper
            self.cwl = other.cwl
            self.norun = other.norun
            self.is_branched = True
            self.is_checkpoint = other.is_checkpoint
            self.restart_times = other.restart_times
            self.basedir = other.basedir

    def dynamic_branch(self, wildcards, input=True):
        def get_io(rule):
            return ((rule.input, rule.dynamic_input) if input else
                    (rule.output, rule.dynamic_output))

        def partially_expand(f, wildcards):
            """Expand the wildcards in f from the ones present in wildcards

            This is done by replacing all wildcard delimiters by `{{` or `}}`
            that are not in `wildcards.keys()`.
            """
            # perform the partial expansion from f's string representation
            s = str(f).replace("{", "{{").replace("}", "}}")
            for key in wildcards.keys():
                s = s.replace("{{{{{}}}}}".format(key), "{{{}}}".format(key))
            # build result
            anno_s = AnnotatedString(s)
            anno_s.flags = f.flags
            return IOFile(anno_s, f.rule)

        io, dynamic_io = get_io(self)

        branch = Rule(self)
        io_, dynamic_io_ = get_io(branch)

        expansion = collections.defaultdict(list)
        for i, f in enumerate(io):
            if f in dynamic_io:
                f = partially_expand(f, wildcards)
                try:
                    for e in reversed(expand(str(f), zip, **wildcards)):
                        # need to clone the flags so intermediate
                        # dynamic remote file paths are expanded and
                        # removed appropriately
                        ioFile = IOFile(e, rule=branch)
                        ioFile.clone_flags(f)
                        expansion[i].append(ioFile)
                except KeyError:
                    return None

        # replace the dynamic files with the expanded files
        replacements = [(i, io[i], e)
                        for i, e in reversed(list(expansion.items()))]
        for i, old, exp in replacements:
            dynamic_io_.remove(old)
            io_._insert_items(i, exp)

        if not input:
            for i, old, exp in replacements:
                if old in branch.temp_output:
                    branch.temp_output.discard(old)
                    branch.temp_output.update(exp)
                if old in branch.protected_output:
                    branch.protected_output.discard(old)
                    branch.protected_output.update(exp)
                if old in branch.touch_output:
                    branch.touch_output.discard(old)
                    branch.touch_output.update(exp)

            branch.wildcard_names.clear()
            non_dynamic_wildcards = dict((name, values[0])
                                         for name, values in wildcards.items()
                                         if len(set(values)) == 1)
            # TODO have a look into how to concretize dependencies here
            branch._input, _, branch.dependencies = branch.expand_input(
                non_dynamic_wildcards)
            branch._output, _ = branch.expand_output(non_dynamic_wildcards)

            resources = branch.expand_resources(non_dynamic_wildcards,
                                                branch._input, 1)
            branch._params = branch.expand_params(
                non_dynamic_wildcards,
                branch._input,
                branch._output,
                resources,
                omit_callable=True,
            )
            branch.resources = dict(resources.items())

            branch._log = branch.expand_log(non_dynamic_wildcards)
            branch._benchmark = branch.expand_benchmark(non_dynamic_wildcards)
            branch._conda_env = branch.expand_conda_env(non_dynamic_wildcards)
            return branch, non_dynamic_wildcards
        return branch

    def check_caching(self):
        if self.name in self.workflow.cache_rules:
            if len(self.output) == 0:
                raise RuleException(
                    "Rules without output files cannot be cached.", rule=self)
            if len(self.output) > 1:
                prefixes = set(out.multiext_prefix for out in self.output)
                if None in prefixes or len(prefixes) > 1:
                    raise RuleException(
                        "Rules with multiple output files must define them as a single multiext() "
                        '(e.g. multiext("path/to/index", ".bwt", ".ann")). '
                        "The rationale is that multiple output files can only be unambiously resolved "
                        "if they can be distinguished by a fixed set of extensions (i.e. mime types).",
                        rule=self,
                    )
            if self.dynamic_output:
                raise RuleException(
                    "Rules with dynamic output files may not be cached.",
                    rule=self)

    def has_wildcards(self):
        """
        Return True if rule contains wildcards.
        """
        return bool(self.wildcard_names)

    @property
    def version(self):
        return self._version

    @version.setter
    def version(self, version):
        if isinstance(version, str) and "\n" in version:
            raise WorkflowError("Version string may not contain line breaks.",
                                rule=self)
        self._version = version

    @property
    def benchmark(self):
        return self._benchmark

    @benchmark.setter
    def benchmark(self, benchmark):
        if isinstance(benchmark, Path):
            benchmark = str(benchmark)
        if not callable(benchmark):
            benchmark = self.apply_default_remote(benchmark)
            benchmark = self._update_item_wildcard_constraints(benchmark)

        self._benchmark = IOFile(benchmark, rule=self)
        self.register_wildcards(self._benchmark.get_wildcard_names())

    @property
    def conda_env(self):
        return self._conda_env

    @conda_env.setter
    def conda_env(self, conda_env):
        self._conda_env = IOFile(conda_env, rule=self)

    @property
    def singularity_img(self):
        return self._singularity_img

    @singularity_img.setter
    def singularity_img(self, singularity_img):
        self._singularity_img = singularity_img

    @property
    def input(self):
        return self._input

    def set_input(self, *input, **kwinput):
        """
        Add a list of input files. Recursive lists are flattened.

        Arguments
        input -- the list of input files
        """
        for item in input:
            self._set_inoutput_item(item)
        for name, item in kwinput.items():
            self._set_inoutput_item(item, name=name)

    @property
    def output(self):
        return self._output

    @property
    def products(self):
        if self.benchmark:
            return chain(self.output, self.log, [self.benchmark])
        else:
            return chain(self.output, self.log)

    def register_wildcards(self, wildcard_names):
        if self._wildcard_names is None:
            self._wildcard_names = wildcard_names
        else:
            if self.wildcard_names != wildcard_names:
                raise SyntaxError("Not all output, log and benchmark files of "
                                  "rule {} contain the same wildcards. "
                                  "This is crucial though, in order to "
                                  "avoid that two or more jobs write to the "
                                  "same file.".format(self.name))

    @property
    def wildcard_names(self):
        if self._wildcard_names is None:
            return set()
        return self._wildcard_names

    def set_output(self, *output, **kwoutput):
        """
        Add a list of output files. Recursive lists are flattened.

        After creating the output files, they are checked for duplicates.

        Arguments
        output -- the list of output files
        """
        for item in output:
            self._set_inoutput_item(item, output=True)
        for name, item in kwoutput.items():
            self._set_inoutput_item(item, output=True, name=name)

        for item in self.output:
            if self.dynamic_output and item not in self.dynamic_output:
                raise SyntaxError(
                    "A rule with dynamic output may not define any "
                    "non-dynamic output files.")
            self.register_wildcards(item.get_wildcard_names())
        # Check output file name list for duplicates
        self.check_output_duplicates()
        self.check_caching()

    def check_output_duplicates(self):
        """Check ``Namedlist`` for duplicate entries and raise a ``WorkflowError``
        on problems.
        """
        seen = dict()
        idx = None
        for name, value in self.output._allitems():
            if name is None:
                if idx is None:
                    idx = 0
                else:
                    idx += 1
            if value in seen:
                raise WorkflowError(
                    "Duplicate output file pattern in rule {}. First two "
                    "duplicate for entries {} and {}".format(
                        self.name, seen[value], name or idx))
            seen[value] = name or idx

    def apply_default_remote(self, item):
        def is_annotated_callable(value):
            if isinstance(value, AnnotatedString):
                return bool(value.callable)

        def apply(value):
            if (not is_flagged(value, "remote_object")
                    and not is_flagged(value, "local")
                    and not is_annotated_callable(value)
                    and self.workflow.default_remote_provider is not None):
                return self.workflow.apply_default_remote(value)
            else:
                return value

        assert not callable(item)
        if isinstance(item, dict):
            return {k: apply(v) for k, v in item.items()}
        elif isinstance(
                item, collections.abc.Iterable) and not isinstance(item, str):
            return [apply(e) for e in item]
        else:
            return apply(item)

    def update_wildcard_constraints(self):
        for i in range(len(self.output)):
            item = self.output[i]
            newitem = IOFile(self._update_item_wildcard_constraints(
                self.output[i]),
                             rule=self)
            # the updated item has to have the same flags
            newitem.clone_flags(item)
            self.output[i] = newitem

    def _update_item_wildcard_constraints(self, item):
        if not (self.wildcard_constraints
                or self.workflow._wildcard_constraints):
            return item
        try:
            return update_wildcard_constraints(
                item, self.wildcard_constraints,
                self.workflow._wildcard_constraints)
        except ValueError as e:
            raise IOFileException(str(e),
                                  snakefile=self.snakefile,
                                  lineno=self.lineno)

    def _set_inoutput_item(self, item, output=False, name=None):
        """
        Set an item to be input or output.

        Arguments
        item     -- the item
        inoutput -- a Namedlist of either input or output items
        name     -- an optional name for the item
        """
        inoutput = self.output if output else self.input
        # Check to see if the item is a path, if so, just make it a string
        if isinstance(item, Path):
            item = str(item)
        if isinstance(item, str):
            item = self.apply_default_remote(item)

            # Check to see that all flags are valid
            # Note that "remote", "dynamic", and "expand" are valid for both inputs and outputs.
            if isinstance(item, AnnotatedString):
                for flag in item.flags:
                    if not output and flag in [
                            "protected",
                            "temp",
                            "temporary",
                            "directory",
                            "touch",
                            "pipe",
                    ]:
                        logger.warning(
                            "The flag '{}' used in rule {} is only valid for outputs, not inputs."
                            .format(flag, self))
                    if output and flag in ["ancient"]:
                        logger.warning(
                            "The flag '{}' used in rule {} is only valid for inputs, not outputs."
                            .format(flag, self))

            # add the rule to the dependencies
            if isinstance(item,
                          _IOFile) and item.rule and item in item.rule.output:
                self.dependencies[item] = item.rule
            if output:
                item = self._update_item_wildcard_constraints(item)
            else:
                if (contains_wildcard_constraints(item)
                        and self.workflow.mode != Mode.subprocess):
                    logger.warning(
                        "Wildcard constraints in inputs are ignored.")
            # record rule if this is an output file output
            _item = IOFile(item, rule=self)
            if is_flagged(item, "temp"):
                if output:
                    self.temp_output.add(_item)
            if is_flagged(item, "protected"):
                if output:
                    self.protected_output.add(_item)
            if is_flagged(item, "touch"):
                if output:
                    self.touch_output.add(_item)
            if is_flagged(item, "dynamic"):
                if output:
                    self.dynamic_output.add(_item)
                else:
                    self.dynamic_input.add(_item)
            if is_flagged(item, "report"):
                report_obj = item.flags["report"]
                if report_obj.caption is not None:
                    r = ReportObject(
                        os.path.join(self.workflow.current_basedir,
                                     report_obj.caption),
                        report_obj.category,
                    )
                    item.flags["report"] = r
            if is_flagged(item, "subworkflow"):
                if output:
                    raise SyntaxError(
                        "Only input files may refer to a subworkflow")
                else:
                    # record the workflow this item comes from
                    sub = item.flags["subworkflow"]
                    if _item in self.subworkflow_input:
                        other = self.subworkflow_input[_item]
                        if sub != other:
                            raise WorkflowError(
                                "The input file {} is ambiguously "
                                "associated with two subworkflows "
                                "{} and {}.".format(item, sub, other),
                                rule=self,
                            )
                    self.subworkflow_input[_item] = sub
            inoutput.append(_item)
            if name:
                inoutput._add_name(name)
        elif callable(item):
            if output:
                raise SyntaxError(
                    "Only input files can be specified as functions")
            inoutput.append(item)
            if name:
                inoutput._add_name(name)
        else:
            try:
                start = len(inoutput)
                for i in item:
                    self._set_inoutput_item(i, output=output)
                if name:
                    # if the list was named, make it accessible
                    inoutput._set_name(name, start, end=len(inoutput))
            except TypeError:
                raise SyntaxError(
                    "Input and output files have to be specified as strings or lists of strings."
                )

    @property
    def params(self):
        return self._params

    def set_params(self, *params, **kwparams):
        for item in params:
            self._set_params_item(item)
        for name, item in kwparams.items():
            self._set_params_item(item, name=name)

    def _set_params_item(self, item, name=None):
        self.params.append(item)
        if name:
            self.params._add_name(name)

    @property
    def wildcard_constraints(self):
        return self._wildcard_constraints

    def set_wildcard_constraints(self, **kwwildcard_constraints):
        self._wildcard_constraints.update(kwwildcard_constraints)

    @property
    def log(self):
        return self._log

    def set_log(self, *logs, **kwlogs):
        for item in logs:
            self._set_log_item(item)
        for name, item in kwlogs.items():
            self._set_log_item(item, name=name)

        for item in self.log:
            self.register_wildcards(item.get_wildcard_names())

    def _set_log_item(self, item, name=None):
        # Pathlib compatibility
        if isinstance(item, Path):
            item = str(item)
        if isinstance(item, str) or callable(item):
            if not callable(item):
                item = self.apply_default_remote(item)
                item = self._update_item_wildcard_constraints(item)

            self.log.append(
                IOFile(item, rule=self) if isinstance(item, str) else item)
            if name:
                self.log._add_name(name)
        else:
            try:
                start = len(self.log)
                for i in item:
                    self._set_log_item(i)
                if name:
                    self.log._set_name(name, start, end=len(self.log))
            except TypeError:
                raise SyntaxError("Log files have to be specified as strings.")

    def check_wildcards(self, wildcards):
        missing_wildcards = self.wildcard_names - set(wildcards.keys())

        if missing_wildcards:
            raise RuleException(
                "Could not resolve wildcards in rule {}:\n{}".format(
                    self.name, "\n".join(self.wildcard_names)),
                lineno=self.lineno,
                snakefile=self.snakefile,
            )

    def apply_input_function(self,
                             func,
                             wildcards,
                             incomplete_checkpoint_func=lambda e: None,
                             raw_exceptions=False,
                             **aux_params):
        incomplete = False
        if isinstance(func, _IOFile):
            func = func._file.callable
        elif isinstance(func, AnnotatedString):
            func = func.callable
        sig = inspect.signature(func)
        _aux_params = {
            k: v
            for k, v in aux_params.items() if k in sig.parameters
        }
        try:
            value = func(Wildcards(fromdict=wildcards), **_aux_params)
        except IncompleteCheckpointException as e:
            value = incomplete_checkpoint_func(e)
            incomplete = True
        except (Exception, BaseException) as e:
            if raw_exceptions:
                raise e
            else:
                raise InputFunctionException(e, rule=self, wildcards=wildcards)
        return value, incomplete

    def _apply_wildcards(
        self,
        newitems,
        olditems,
        wildcards,
        concretize=None,
        check_return_type=True,
        omit_callable=False,
        mapping=None,
        no_flattening=False,
        aux_params=None,
        apply_default_remote=True,
        incomplete_checkpoint_func=lambda e: None,
        allow_unpack=True,
    ):
        if aux_params is None:
            aux_params = dict()
        for name, item in olditems._allitems():
            start = len(newitems)
            is_unpack = is_flagged(item, "unpack")
            _is_callable = is_callable(item)

            if _is_callable:
                if omit_callable:
                    continue
                item, incomplete = self.apply_input_function(
                    item,
                    wildcards,
                    incomplete_checkpoint_func=incomplete_checkpoint_func,
                    is_unpack=is_unpack,
                    **aux_params)
                if apply_default_remote:
                    item = self.apply_default_remote(item)

            if is_unpack and not incomplete:
                if not allow_unpack:
                    raise WorkflowError(
                        "unpack() is not allowed with params. "
                        "Simply return a dictionary which can be directly ."
                        "used, e.g. via {params[mykey]}.")
                # Sanity checks before interpreting unpack()
                if not isinstance(item, (list, dict)):
                    raise WorkflowError(
                        "Can only use unpack() on list and dict", rule=self)
                if name:
                    raise WorkflowError(
                        "Cannot combine named input file with unpack()",
                        rule=self)
                # Allow streamlined code with/without unpack
                if isinstance(item, list):
                    pairs = zip([None] * len(item), item)
                else:
                    assert isinstance(item, dict)
                    pairs = item.items()
            else:
                pairs = [(name, item)]

            for name, item in pairs:
                is_iterable = True
                if not_iterable(item) or no_flattening:
                    item = [item]
                    is_iterable = False
                for item_ in item:
                    if check_return_type and not isinstance(item_, str):
                        raise WorkflowError(
                            "Function did not return str or list "
                            "of str.",
                            rule=self)
                    concrete = concretize(item_, wildcards, _is_callable)
                    newitems.append(concrete)
                    if mapping is not None:
                        mapping[concrete] = item_

                if name:
                    newitems._set_name(
                        name,
                        start,
                        end=len(newitems) if is_iterable else None)
                    start = len(newitems)

    def expand_input(self, wildcards):
        def concretize_iofile(f, wildcards, is_from_callable):
            if is_from_callable:
                if isinstance(f, Path):
                    f = str(Path)
                return IOFile(f, rule=self).apply_wildcards(
                    wildcards,
                    fill_missing=f in self.dynamic_input,
                    fail_dynamic=self.dynamic_output,
                )
            else:
                return f.apply_wildcards(
                    wildcards,
                    fill_missing=f in self.dynamic_input,
                    fail_dynamic=self.dynamic_output,
                )

        def handle_incomplete_checkpoint(exception):
            """If checkpoint is incomplete, target it such that it is completed
            before this rule gets executed."""
            return exception.targetfile

        input = InputFiles()
        mapping = dict()
        try:
            self._apply_wildcards(
                input,
                self.input,
                wildcards,
                concretize=concretize_iofile,
                mapping=mapping,
                incomplete_checkpoint_func=handle_incomplete_checkpoint,
            )
        except WildcardError as e:
            raise WildcardError(
                "Wildcards in input files cannot be "
                "determined from output files:",
                str(e),
                rule=self,
            )

        if self.dependencies:
            dependencies = {
                f: self.dependencies[f_]
                for f, f_ in mapping.items() if f_ in self.dependencies
            }
            if None in self.dependencies:
                dependencies[None] = self.dependencies[None]
        else:
            dependencies = self.dependencies

        for f in input:
            f.check()

        return input, mapping, dependencies

    def expand_params(self,
                      wildcards,
                      input,
                      output,
                      resources,
                      omit_callable=False):
        def concretize_param(p, wildcards, is_from_callable):
            if not is_from_callable:
                if isinstance(p, str):
                    return apply_wildcards(p, wildcards)
                if isinstance(p, list):
                    return [(apply_wildcards(v, wildcards) if isinstance(
                        v, str) else v) for v in p]
            return p

        params = Params()
        try:
            # When applying wildcards to params, the return type need not be
            # a string, so the check is disabled.
            self._apply_wildcards(
                params,
                self.params,
                wildcards,
                concretize=concretize_param,
                check_return_type=False,
                omit_callable=omit_callable,
                allow_unpack=False,
                no_flattening=True,
                apply_default_remote=False,
                aux_params={
                    "input": input._plainstrings(),
                    "resources": resources,
                    "output": output._plainstrings(),
                    "threads": resources._cores,
                },
                incomplete_checkpoint_func=lambda e: "<incomplete checkpoint>",
            )
        except WildcardError as e:
            raise WildcardError(
                "Wildcards in params cannot be "
                "determined from output files. Note that you have "
                "to use a function to deactivate automatic wildcard expansion "
                "in params strings, e.g., `lambda wildcards: '{test}'`. Also "
                "see https://snakemake.readthedocs.io/en/stable/snakefiles/"
                "rules.html#non-file-parameters-for-rules:",
                str(e),
                rule=self,
            )
        return params

    def expand_output(self, wildcards):
        output = OutputFiles(o.apply_wildcards(wildcards) for o in self.output)
        output._take_names(self.output._get_names())
        mapping = {f: f_ for f, f_ in zip(output, self.output)}

        for f in output:
            f.check()

        # Note that we do not need to check for duplicate file names after
        # expansion as all output patterns have contain all wildcards anyway.

        return output, mapping

    def expand_log(self, wildcards):
        def concretize_logfile(f, wildcards, is_from_callable):
            if is_from_callable:
                return IOFile(f, rule=self)
            else:
                return f.apply_wildcards(wildcards,
                                         fill_missing=False,
                                         fail_dynamic=self.dynamic_output)

        log = Log()

        try:
            self._apply_wildcards(log,
                                  self.log,
                                  wildcards,
                                  concretize=concretize_logfile)
        except WildcardError as e:
            raise WildcardError(
                "Wildcards in log files cannot be "
                "determined from output files:",
                str(e),
                rule=self,
            )

        for f in log:
            f.check()

        return log

    def expand_benchmark(self, wildcards):
        try:
            benchmark = (self.benchmark.apply_wildcards(wildcards)
                         if self.benchmark else None)
        except WildcardError as e:
            raise WildcardError(
                "Wildcards in benchmark file cannot be "
                "determined from output files:",
                str(e),
                rule=self,
            )

        if benchmark is not None:
            benchmark.check()

        return benchmark

    def expand_resources(self, wildcards, input, attempt):
        resources = dict()

        def apply(name, res, threads=None):
            if callable(res):
                aux = dict(rulename=self.name)
                if threads:
                    aux["threads"] = threads
                try:
                    try:
                        res, _ = self.apply_input_function(
                            res,
                            wildcards,
                            input=input,
                            attempt=attempt,
                            incomplete_checkpoint_func=lambda e: 0,
                            raw_exceptions=True,
                            **aux)
                    except FileNotFoundError as e:
                        # Resources can depend on input files. Since expansion can happen during dryrun,
                        # where input files are not yet present, we need to skip such resources and
                        # mark them as [TBD].
                        if e.filename in input:
                            # use zero for resource if it cannot yet be determined
                            res = TBDInt(0)
                        else:
                            raise e
                except e:
                    raise InputFunctionException(e,
                                                 rule=self,
                                                 wildcards=wildcards)

                if not isinstance(res, int):
                    raise WorkflowError(
                        "Resources function did not return int.", rule=self)
            res = min(self.workflow.global_resources.get(name, res), res)
            return res

        threads = apply("_cores", self.resources["_cores"])
        resources["_cores"] = threads

        for name, res in self.resources.items():
            if name != "_cores":
                resources[name] = apply(name, res, threads=threads)
        resources = Resources(fromdict=resources)
        return resources

    def expand_group(self, wildcards):
        """Expand the group given wildcards."""
        if callable(self.group):
            item, _ = self.apply_input_function(self.group, wildcards)
            return item
        elif isinstance(self.group, str):
            return apply_wildcards(self.group,
                                   wildcards,
                                   dynamic_fill=DYNAMIC_FILL)
        else:
            return self.group

    def expand_conda_env(self, wildcards):
        try:
            conda_env = (self.conda_env.apply_wildcards(wildcards)
                         if self.conda_env else None)
        except WildcardError as e:
            raise WildcardError(
                "Wildcards in conda environment file cannot be "
                "determined from output files:",
                str(e),
                rule=self,
            )

        if conda_env is not None:
            conda_env.check()

        return conda_env

    def is_producer(self, requested_output):
        """
        Returns True if this rule is a producer of the requested output.
        """
        try:
            for o in self.products:
                if o.match(requested_output):
                    return True
            return False
        except sre_constants.error as ex:
            raise IOFileException(
                "{} in wildcard statement".format(ex),
                snakefile=self.snakefile,
                lineno=self.lineno,
            )
        except ValueError as ex:
            raise IOFileException("{}".format(ex),
                                  snakefile=self.snakefile,
                                  lineno=self.lineno)

    def get_wildcards(self, requested_output):
        """
        Return wildcard dictionary by matching regular expression
        output files to the requested concrete ones.

        Arguments
        requested_output -- a concrete filepath
        """
        if requested_output is None:
            return dict()
        bestmatchlen = 0
        bestmatch = None

        for o in self.products:
            match = o.match(requested_output)
            if match:
                l = self.get_wildcard_len(match.groupdict())
                if not bestmatch or bestmatchlen > l:
                    bestmatch = match.groupdict()
                    bestmatchlen = l
        self.check_wildcards(bestmatch)
        return bestmatch

    @staticmethod
    def get_wildcard_len(wildcards):
        """
        Return the length of the given wildcard values.

        Arguments
        wildcards -- a dict of wildcards
        """
        return sum(map(len, wildcards.values()))

    def __lt__(self, rule):
        comp = self.workflow._ruleorder.compare(self, rule)
        return comp < 0

    def __gt__(self, rule):
        comp = self.workflow._ruleorder.compare(self, rule)
        return comp > 0

    def __str__(self):
        return self.name

    def __hash__(self):
        return self.name.__hash__()

    def __eq__(self, other):
        return self.name == other.name and self.output == other.output

Пример #9

Показать файл

Файл: rules.py Проект: vangalamaheshh/snakemake

    def _set_inoutput_item(self, item, output=False, name=None):
        """
        Set an item to be input or output.

        Arguments
        item     -- the item
        inoutput -- either a Namedlist of input or output items
        name     -- an optional name for the item
        """
        inoutput = self.output if output else self.input
        if isinstance(item, str):
            # add the rule to the dependencies
            if isinstance(item, _IOFile):
                self.dependencies[item] = item.rule
            _item = IOFile(item, rule=self)
            if is_flagged(item, "temp"):
                if not output:
                    raise SyntaxError("Only output files may be temporary")
                self.temp_output.add(_item)
            if is_flagged(item, "protected"):
                if not output:
                    raise SyntaxError("Only output files may be protected")
                self.protected_output.add(_item)
            if is_flagged(item, "touch"):
                if not output:
                    raise SyntaxError(
                        "Only output files may be marked for touching.")
                self.touch_output.add(_item)
            if is_flagged(item, "dynamic"):
                if output:
                    self.dynamic_output.add(_item)
                else:
                    self.dynamic_input.add(_item)
            if is_flagged(item, "subworkflow"):
                if output:
                    raise SyntaxError(
                        "Only input files may refer to a subworkflow")
                else:
                    # record the workflow this item comes from
                    self.subworkflow_input[_item] = item.flags["subworkflow"]
            inoutput.append(_item)
            if name:
                inoutput.add_name(name)
        elif callable(item):
            if output:
                raise SyntaxError(
                    "Only input files can be specified as functions")
            inoutput.append(item)
            if name:
                inoutput.add_name(name)
        else:
            try:
                start = len(inoutput)
                for i in item:
                    self._set_inoutput_item(i, output=output)
                if name:
                    # if the list was named, make it accessible
                    inoutput.set_name(name, start, end=len(inoutput))
            except TypeError:
                raise SyntaxError(
                    "Input and output files have to be specified as strings or lists of strings."
                )

Пример #10

Показать файл

Файл: rules.py Проект: vangalamaheshh/snakemake

 def benchmark(self, benchmark):
     self._benchmark = IOFile(benchmark, rule=self)

Пример #11

Показать файл

Файл: rules.py Проект: kyleabeauchamp/mirrorsnake

    def _set_inoutput_item(self, item, output=False, name=None):
        """
        Set an item to be input or output.

        Arguments
        item     -- the item
        inoutput -- either a Namedlist of input or output items
        name     -- an optional name for the item
        """
        inoutput = self.output if output else self.input
        if isinstance(item, str):
            # add the rule to the dependencies
            if isinstance(item, _IOFile):
                self.dependencies[item] = item.rule
            if output:
                if self.wildcard_constraints or self.workflow._wildcard_constraints:
                    try:
                        item = update_wildcard_constraints(
                            item, self.wildcard_constraints,
                            self.workflow._wildcard_constraints)
                    except ValueError as e:
                        raise IOFileException(str(e),
                                              snakefile=self.snakefile,
                                              lineno=self.lineno)
            else:
                if contains_wildcard_constraints(item):
                    logger.warning(
                        "wildcard constraints in inputs are ignored")
            _item = IOFile(item, rule=self)
            if is_flagged(item, "temp"):
                if output:
                    self.temp_output.add(_item)
            if is_flagged(item, "protected"):
                if output:
                    self.protected_output.add(_item)
            if is_flagged(item, "touch"):
                if output:
                    self.touch_output.add(_item)
            if is_flagged(item, "dynamic"):
                if output:
                    self.dynamic_output.add(_item)
                else:
                    self.dynamic_input.add(_item)
            if is_flagged(item, "subworkflow"):
                if output:
                    raise SyntaxError(
                        "Only input files may refer to a subworkflow")
                else:
                    # record the workflow this item comes from
                    self.subworkflow_input[_item] = item.flags["subworkflow"]
            inoutput.append(_item)
            if name:
                inoutput.add_name(name)
        elif callable(item):
            if output:
                raise SyntaxError(
                    "Only input files can be specified as functions")
            inoutput.append(item)
            if name:
                inoutput.add_name(name)
        else:
            try:
                start = len(inoutput)
                for i in item:
                    self._set_inoutput_item(i, output=output)
                if name:
                    # if the list was named, make it accessible
                    inoutput.set_name(name, start, end=len(inoutput))
            except TypeError:
                raise SyntaxError(
                    "Input and output files have to be specified as strings or lists of strings."
                )

Пример #12

Показать файл

 def _to_iofile(self, files):
     for i in range(len(files)):
         files[i] = IOFile(files[i], rule=self.rule)
     return files

Пример #13

Показать файл

Файл: rules.py Проект: ginger0106/snakemake

 def log(self, log):
     self._log = IOFile(log, rule=self)