Пример #1
0
class TestRules(unittest.TestCase):
    """Test rules"""
    def setUp(self):
        self.workflow = Workflow("foo")
        name = self.workflow.add_rule(name="bar")
        self.workflow._rules["bar"].set_output(*(), **{"foo":"bar"})
        self.workflow._rules["bar"].set_params(*(), **{"cmd":"foo", "options":["foo", "bar"]})
    
    def test_create_rule(self):
        self.assertListEqual(["bar"], [x.name for x in self.workflow.rules])
        create_rule_from_existing(name="foo", template="bar", workflow=self.workflow)
        self.assertListEqual(["bar", "foo"], [x.name for x in self.workflow.rules])

    @raises(NoRulesException)
    def test_create_rule_empty_workflow(self):
        create_rule_from_existing(name="foo", template="bar", workflow=Workflow("foo"))

    @raises(AssertionError)
    def test_create_rule_wrong_workflow(self):
        create_rule_from_existing(name="foo", template="bar", workflow=None)

    @raises(UnknownRuleException)
    def test_create_rule_wrong_template(self):
        create_rule_from_existing(name="bar", template="foo", workflow=self.workflow)

    def test_create_rule_add_output(self):
        create_rule_from_existing(name="foo", template="bar", workflow=self.workflow, **{'output':((), {'bar':'foo'})})
        self.assertDictEqual({'bar':'foo'}, dict(self.workflow.get_rule("foo").output))

    def test_create_rule_add_params(self):
        create_rule_from_existing(name="foo", template="bar", workflow=self.workflow, **{'params':((), {'cmd':'bar', 'options':['foo']})})
        self.assertDictEqual({'cmd':'bar', 'options':['foo']}, dict(self.workflow.get_rule("foo").params))
Пример #2
0
def bash_completion(snakefile="Snakefile"):
    if not len(sys.argv) >= 2:
        print("Calculate bash completion for snakemake. This tool shall not be invoked by hand.")
        sys.exit(1)

    prefix = sys.argv[2]

    if prefix.startswith("-"):
        opts = [
            action.option_strings[0] for action in get_argument_parser()._actions
            if action.option_strings and action.option_strings[0].startswith(prefix)]
        print(*opts, sep="\n")

    else:
        files = glob.glob("{}*".format(prefix))
        if files:
            print(*files, sep="\n")
        elif os.path.exists(snakefile):
            workflow = Workflow(snakefile=snakefile, snakemakepath=get_snakemake_path())
            workflow.include(snakefile)

            workflow_files = sorted(set(
                file for file in workflow.concrete_files
                if file.startswith(prefix)))
            if workflow_files:
                print(*workflow_files, sep="\n")
            
            rules = [
                rule.name for rule in workflow.rules
                if rule.name.startswith(prefix)]
            if rules:
                print(*rules, sep="\n")
    sys.exit(0)
Пример #3
0
def load_rule_args(snakefile, rule_name, default_wildcards=None, change_dir=False):
    """
    Returns a rule object for some default arguments.
    Example usage:
        ```
        try:
            snakemake
        except NameError:
            snakefile_path = os.getcwd() + "/Snakefile"

            snakemake = load_rule_args(
                snakefile = snakefile_path,
                rule_name = 'create_prediction_target',
                default_wildcards={
                    'ds_dir': 'full_data_samplefilter'
                }
            )
        ```
    """
    # save current working dir for later
    cwd = os.getcwd()
    try:
        if default_wildcards == None:
            default_wildcards = dict()

        # change to snakefile directory
        os.chdir(os.path.dirname(snakefile))

        # load workflow
        workflow = Workflow(snakefile=snakefile)
        workflow.include(snakefile)
        # get rule
        rule = workflow.get_rule(rule_name)

        smk_input = dict(rule.expand_input(default_wildcards)[0])
        smk_output = dict(rule.expand_output(default_wildcards)[0])
        smk_params = dict(rule.expand_params(
            default_wildcards, rule.input, rule.output, AttrDict(rule.resources)))

        # setup rule arguments
        retval = SnakemakeRuleArgs(
            input=smk_input,
            params=smk_params,
            output=smk_output,
            wildcards=default_wildcards
        )
        return retval
    finally:
        if not change_dir:
            # change back to previous working directory
            os.chdir(cwd)
Пример #4
0
def snake(args):
    chunk_path = 'mtsv-chunk'
    fm_build_path = 'mtsv-build'
    print(chunk_path)
    print(fm_build_path)
    print(args)

    print(partition(args))
    # for i in args:
    #     print(i)
    workflow = Workflow("__file__", overwrite_workdir=args.working_dir)
    if args.cluster_cfg is not None:
        workflow.cluster_cfg = args.cluster_cfg
    snakemake.workflow.rules = Rules()
    snakemake.workflow.config = dict()
Пример #5
0
    def get_workflow(self):
        """ make sure there is a workflow object

        TODO:
            * allow multiple workflows?
            * what king of options to allow?
            * allow options every time or just first?
        """
        if self.workflow is None:
            # create a new workflow object with some basic defaults

            # create a blank file just so snakemake has something to hang on to
            # (this file cannot be read from on some Windows systems...)
            self.tempfiles['root'] = tempfile.NamedTemporaryFile('w')
            self.workflow = Workflow(snakefile=self.tempfiles['root'].name)

        return self.workflow
Пример #6
0
def create_workflow(snakefile):
    workflow = Workflow(snakefile=snakefile, use_conda=True)

    try:
        workflow.include(snakefile,
                         overwrite_first_rule=True,
                         print_compilation=False)
        workflow.check()
    except (Exception, BaseException) as ex:
        print_exception(ex, workflow.linemaps)
        success = False

    return workflow, success
Пример #7
0
def load_rule_args(snakefile, rule_name, default_wildcards=None, change_dir=False, create_dir=True, root=None):
    """
    Returns a rule object for some default arguments.
    Example usage:
        ```
        snakefile_path = os.getcwd() + "/Snakefile"

        try:
            snakemake
        except NameError:
            snakemake = load_rule_args(
                snakefile = snakefile_path,
                rule_name = 'create_prediction_target',
                default_wildcards={
                    'ds_dir': 'full_data_samplefilter'
                },
                # root = "./" # path relative to snakefile
            )
        ```
    """
    # save current working dir for later
    cwd = os.getcwd()

    if root is None:
        root = os.path.dirname(snakefile)
    else:
        if not os.path.isabs(root):
            root = os.path.join(os.path.dirname(snakefile), root)

    log.info("root dir: %s", root)

    try:
        if default_wildcards == None:
            default_wildcards = dict()

        # change to root directory
        os.chdir(root)

        # load workflow
        workflow = Workflow(snakefile=snakefile)
        workflow.include(snakefile)
        # get rule
        rule = workflow.get_rule(rule_name)

        smk_resources = AttrDict(rule.resources)
        smk_input = dict(rule.expand_input(default_wildcards)[0])
        smk_output = dict(rule.expand_output(default_wildcards)[0])
        smk_params = dict(rule.expand_params(
            default_wildcards,
            rule.input,
            rule.output,
            smk_resources
        ))

        # Make paths in snakemake inputs and outputs absolute
        smk_input = map_custom_wd(workflow, smk_input, root)
        smk_output = map_custom_wd(workflow, smk_output, root)

        if create_dir:
            mk_dirs(smk_output)

        # setup rule arguments
        retval = SnakemakeRuleArgs(
            resources=smk_resources,
            input=smk_input,
            params=smk_params,
            output=smk_output,
            wildcards=default_wildcards
        )
        return retval
    finally:
        if not change_dir:
            # change back to previous working directory
            os.chdir(cwd)
Пример #8
0
                logger.info("rule '{}' up to date".format(dest))


parser = argparse.ArgumentParser("Copy/sync rules to a given directory")
parser.add_argument('Snakefile', help="Snakefile to import")
parser.add_argument('-n', '--dry-run', action="store_true", help="Dry run")
parser.add_argument('-d',
                    '--outdir',
                    action="store",
                    default=os.curdir,
                    help="Snakefile to import")
args = parser.parse_args()

snakefile = os.path.abspath(args.Snakefile)

workflow = Workflow(snakefile=snakefile)

try:
    workflow.include(snakefile,
                     overwrite_first_rule=True,
                     print_compilation=False)
    workflow.check()
except (Exception, BaseException) as ex:
    print_exception(ex, workflow.linemaps)
    success = False

# Map the rules included from snakemake_rules
DEST = args.outdir
rules = {
    x: os.path.join(DEST, os.path.relpath(x, SNAKEMAKE_RULES_PATH))
    for x in workflow.included if x.startswith(SNAKEMAKE_RULES_PATH)
Пример #9
0
def snakemake(snakefile,
    listrules=False,
    cores=1,
    resources=None,
    workdir=None,
    targets=None,
    dryrun=False,
    touch=False,
    forcetargets=False,
    forceall=False,
    forcerun=None,
    prioritytargets=None,
    stats=None,
    printreason=False,
    printshellcmds=False,
    printdag=False,
    printrulegraph=False,
    nocolor=False,
    quiet=False,
    keepgoing=False,
    cluster=None,
    immediate_submit=False,
    standalone=False,
    ignore_ambiguity=False,
    snakemakepath=None,
    lock=True,
    unlock=False,
    cleanup_metadata=None,
    force_incomplete=False,
    ignore_incomplete=False,
    list_version_changes=False,
    list_code_changes=False,
    list_input_changes=False,
    list_params_changes=False,
    summary=False,
    output_wait=3,
    print_compilation=False,
    debug=False,
    notemp=False,
    nodeps=False,
    jobscript=None,
    timestamp=False):
    """
    Run snakemake on a given snakefile.
    Note: at the moment, this function is not thread-safe!

    Arguments
    snakefile         -- the snakefile.
    list              -- list rules.
    jobs              -- maximum number of parallel jobs (default: 1).
    directory         -- working directory (default: current directory).
    rule              -- execute this rule (default: first rule in snakefile).
    dryrun            -- print the rules that would be executed,
        but do not execute them.
    forcethis         -- force the selected rule to be executed
    forceall          -- force all rules to be executed
    time_measurements -- measure the running times of all rules
    lock              -- lock the working directory
    """

    init_logger(nocolor=nocolor, stdout=dryrun, debug=debug, timestamp=timestamp)

    if not os.path.exists(snakefile):
        logger.error("Error: Snakefile \"{}\" not present.".format(snakefile))
        return False

    if workdir:
        olddir = os.getcwd()
    workflow = Workflow(
        snakefile=snakefile, snakemakepath=snakemakepath,
        jobscript=jobscript)

    if standalone:
        try:
            # set the process group
            os.setpgrp()
        except:
            # ignore: if it does not work we can still work without it
            pass

    success = True
    try:
        workflow.include(snakefile, workdir=workdir,
            overwrite_first_rule=True, print_compilation=print_compilation)
        workflow.check()

        if not print_compilation:
            if listrules:
                workflow.list_rules()
            else:
                if not printdag and not printrulegraph:
                    # handle subworkflows
                    subsnakemake = partial(
                        snakemake,
                        cores=cores,
                        resources=resources,
                        dryrun=dryrun,
                        touch=touch,
                        printreason=printreason,
                        printshellcmds=printshellcmds,
                        nocolor=nocolor,
                        quiet=quiet,
                        keepgoing=keepgoing,
                        cluster=cluster,
                        immediate_submit=immediate_submit,
                        standalone=standalone,
                        ignore_ambiguity=ignore_ambiguity,
                        snakemakepath=snakemakepath,
                        lock=lock,
                        unlock=unlock,
                        cleanup_metadata=cleanup_metadata,
                        force_incomplete=force_incomplete,
                        ignore_incomplete=ignore_incomplete,
                        output_wait=output_wait,
                        debug=debug,
                        notemp=notemp,
                        nodeps=nodeps,
                        jobscript=jobscript,
                        timestamp=timestamp)
                    for subworkflow in workflow.subworkflows:
                        logger.warning("Executing subworkflow {}.".format(subworkflow.name))
                        if not subsnakemake(subworkflow.snakefile, workdir=subworkflow.workdir, targets=subworkflow.targets):
                            success = False
                    if workflow.subworkflows:
                        logger.warning("Executing main workflow.")
                if success:
                    success = workflow.execute(
                        targets=targets, dryrun=dryrun, touch=touch,
                        cores=cores, forcetargets=forcetargets,
                        forceall=forceall, forcerun=forcerun,
                        prioritytargets=prioritytargets, quiet=quiet,
                        keepgoing=keepgoing, printshellcmds=printshellcmds,
                        printreason=printreason, printrulegraph=printrulegraph,
                        printdag=printdag, cluster=cluster,
                        immediate_submit=immediate_submit,
                        ignore_ambiguity=ignore_ambiguity,
                        workdir=workdir, stats=stats,
                        force_incomplete=force_incomplete,
                        ignore_incomplete=ignore_incomplete,
                        list_version_changes=list_version_changes,
                        list_code_changes=list_code_changes,
                        list_input_changes=list_input_changes,
                        list_params_changes=list_params_changes,
                        summary=summary,
                        output_wait=output_wait,
                        nolock=not lock,
                        unlock=unlock,
                        resources=resources,
                        notemp=notemp,
                        nodeps=nodeps,
                        cleanup_metadata=cleanup_metadata
                        )

    except (Exception, BaseException) as ex:
        print_exception(ex, workflow.linemaps)
        success = False
    if workdir:
        os.chdir(olddir)
    if workflow.persistence:
        workflow.persistence.unlock()
    return success
Пример #10
0
 def setUp(self):
     self.workflow = Workflow("foo")
     name = self.workflow.add_rule(name="bar")
     self.workflow._rules["bar"].set_output(*(), **{"foo":"bar"})
     self.workflow._rules["bar"].set_params(*(), **{"cmd":"foo", "options":["foo", "bar"]})
Пример #11
0
 def _load_snakefile(self, file_path):
     """Load the Snakefile"""
     logger.error("loading snakefile {}".format(file_path))
     workflow = Workflow(snakefile=file_path)
     workflow.include(file_path)
     return workflow.rules
Пример #12
0
def snakemake(snakefile,
              listrules=False,
              list_target_rules=False,
              cores=1,
              nodes=1,
              local_cores=1,
              resources=dict(),
              config=dict(),
              configfile=None,
              config_args=None,
              workdir=None,
              targets=None,
              dryrun=False,
              touch=False,
              forcetargets=False,
              forceall=False,
              forcerun=[],
              prioritytargets=[],
              stats=None,
              printreason=False,
              printshellcmds=False,
              printdag=False,
              printrulegraph=False,
              printd3dag=False,
              nocolor=False,
              quiet=False,
              keepgoing=False,
              cluster=None,
              cluster_config=None,
              cluster_sync=None,
              drmaa=None,
              jobname="snakejob.{rulename}.{jobid}.sh",
              immediate_submit=False,
              standalone=False,
              ignore_ambiguity=False,
              snakemakepath=None,
              lock=True,
              unlock=False,
              cleanup_metadata=None,
              force_incomplete=False,
              ignore_incomplete=False,
              list_version_changes=False,
              list_code_changes=False,
              list_input_changes=False,
              list_params_changes=False,
              list_resources=False,
              summary=False,
              detailed_summary=False,
              latency_wait=3,
              benchmark_repeats=1,
              wait_for_files=None,
              print_compilation=False,
              debug=False,
              notemp=False,
              nodeps=False,
              keep_target_files=False,
              allowed_rules=None,
              jobscript=None,
              timestamp=False,
              greediness=None,
              no_hooks=False,
              overwrite_shellcmd=None,
              updated_files=None,
              log_handler=None,
              keep_logger=False,
              verbose=False):
    """Run snakemake on a given snakefile.

    This function provides access to the whole snakemake functionality. It is not thread-safe.

    Args:
        snakefile (str):            the path to the snakefile
        listrules (bool):           list rules (default False)
        list_target_rules (bool):   list target rules (default False)
        cores (int):                the number of provided cores (ignored when using cluster support) (default 1)
        nodes (int):                the number of provided cluster nodes (ignored without cluster support) (default 1)
        local_cores (int):                the number of provided local cores if in cluster mode (ignored without cluster support) (default 1)
        resources (dict):           provided resources, a dictionary assigning integers to resource names, e.g. {gpu=1, io=5} (default {})
        config (dict):              override values for workflow config
        workdir (str):              path to working directory (default None)
        targets (list):             list of targets, e.g. rule or file names (default None)
        dryrun (bool):              only dry-run the workflow (default False)
        touch (bool):               only touch all output files if present (default False)
        forcetargets (bool):        force given targets to be re-created (default False)
        forceall (bool):            force all output files to be re-created (default False)
        forcerun (list):             list of files and rules that shall be re-created/re-executed (default [])
        prioritytargets (list):     list of targets that shall be run with maximum priority (default [])
        stats (str):                path to file that shall contain stats about the workflow execution (default None)
        printreason (bool):         print the reason for the execution of each job (default false)
        printshellcmds (bool):      print the shell command of each job (default False)
        printdag (bool):            print the dag in the graphviz dot language (default False)
        printrulegraph (bool):      print the graph of rules in the graphviz dot language (default False)
        printd3dag (bool):          print a D3.js compatible JSON representation of the DAG (default False)
        nocolor (bool):             do not print colored output (default False)
        quiet (bool):               do not print any default job information (default False)
        keepgoing (bool):           keep goind upon errors (default False)
        cluster (str):              submission command of a cluster or batch system to use, e.g. qsub (default None)
        cluster_config (str):       configuration file for cluster options (default None)
        cluster_sync (str):         blocking cluster submission command (like SGE 'qsub -sync y')  (default None)
        drmaa (str):                if not None use DRMAA for cluster support, str specifies native args passed to the cluster when submitting a job
        jobname (str):              naming scheme for cluster job scripts (default "snakejob.{rulename}.{jobid}.sh")
        immediate_submit (bool):    immediately submit all cluster jobs, regardless of dependencies (default False)
        standalone (bool):          kill all processes very rudely in case of failure (do not use this if you use this API) (default False)
        ignore_ambiguity (bool):    ignore ambiguous rules and always take the first possible one (default False)
        snakemakepath (str):        path to the snakemake executable (default None)
        lock (bool):                lock the working directory when executing the workflow (default True)
        unlock (bool):              just unlock the working directory (default False)
        cleanup_metadata (bool):    just cleanup metadata of output files (default False)
        force_incomplete (bool):    force the re-creation of incomplete files (default False)
        ignore_incomplete (bool):   ignore incomplete files (default False)
        list_version_changes (bool): list output files with changed rule version (default False)
        list_code_changes (bool):   list output files with changed rule code (default False)
        list_input_changes (bool):  list output files with changed input files (default False)
        list_params_changes (bool): list output files with changed params (default False)
        summary (bool):             list summary of all output files and their status (default False)
        latency_wait (int):         how many seconds to wait for an output file to appear after the execution of a job, e.g. to handle filesystem latency (default 3)
        benchmark_repeats (int):    number of repeated runs of a job if declared for benchmarking (default 1)
        wait_for_files (list):      wait for given files to be present before executing the workflow
        list_resources (bool):      list resources used in the workflow (default False)
        summary (bool):             list summary of all output files and their status (default False). If no option  is specified a basic summary will be ouput. If 'detailed' is added as an option e.g --summary detailed, extra info about the input and shell commands will be included
        detailed_summary (bool):    list summary of all input and output files and their status (default False)
        print_compilation (bool):   print the compilation of the snakefile (default False)
        debug (bool):               allow to use the debugger within rules
        notemp (bool):              ignore temp file flags, e.g. do not delete output files marked as temp after use (default False)
        nodeps (bool):              ignore dependencies (default False)
        keep_target_files (bool):   Do not adjust the paths of given target files relative to the working directory.
        allowed_rules (set):        Restrict allowed rules to the given set. If None or empty, all rules are used.
        jobscript (str):            path to a custom shell script template for cluster jobs (default None)
        timestamp (bool):           print time stamps in front of any output (default False)
        greediness (float):         set the greediness of scheduling. This value between 0 and 1 determines how careful jobs are selected for execution. The default value (0.5 if prioritytargets are used, 1.0 else) provides the best speed and still acceptable scheduling quality.
        overwrite_shellcmd (str):   a shell command that shall be executed instead of those given in the workflow. This is for debugging purposes only.
        updated_files(list):        a list that will be filled with the files that are updated or created during the workflow execution
        verbose(bool):              show additional debug output (default False)
        log_handler (function):     redirect snakemake output to this custom log handler, a function that takes a log message dictionary (see below) as its only argument (default None). The log message dictionary for the log handler has to following entries:

            :level:
                the log level ("info", "error", "debug", "progress", "job_info")

            :level="info", "error" or "debug":
                :msg:
                    the log message
            :level="progress":
                :done:
                    number of already executed jobs

                :total:
                    number of total jobs

            :level="job_info":
                :input:
                    list of input files of a job

                :output:
                    list of output files of a job

                :log:
                    path to log file of a job

                :local:
                    whether a job is executed locally (i.e. ignoring cluster)

                :msg:
                    the job message

                :reason:
                    the job reason

                :priority:
                    the job priority

                :threads:
                    the threads of the job


    Returns:
        bool:   True if workflow execution was successful.

    """

    if updated_files is None:
        updated_files = list()

    if cluster or cluster_sync or drmaa:
        cores = sys.maxsize
    else:
        nodes = sys.maxsize

    if cluster_config:
        cluster_config = load_configfile(cluster_config)
    else:
        cluster_config = dict()

    if not keep_logger:
        setup_logger(handler=log_handler,
                     quiet=quiet,
                     printreason=printreason,
                     printshellcmds=printshellcmds,
                     nocolor=nocolor,
                     stdout=dryrun,
                     debug=verbose,
                     timestamp=timestamp)

    if greediness is None:
        greediness = 0.5 if prioritytargets else 1.0
    else:
        if not (0 <= greediness <= 1.0):
            logger.error("Error: greediness must be a float between 0 and 1.")
            return False

    if not os.path.exists(snakefile):
        logger.error("Error: Snakefile \"{}\" not present.".format(snakefile))
        return False
    snakefile = os.path.abspath(snakefile)

    cluster_mode = (cluster is not None) + (cluster_sync is not
                                            None) + (drmaa is not None)
    if cluster_mode > 1:
        logger.error("Error: cluster and drmaa args are mutually exclusive")
        return False
    if debug and (cores > 1 or cluster_mode):
        logger.error(
            "Error: debug mode cannot be used with more than one core or cluster execution.")
        return False

    overwrite_config = dict()
    if configfile:
        overwrite_config.update(load_configfile(configfile))
    if config:
        overwrite_config.update(config)

    if workdir:
        olddir = os.getcwd()
        if not os.path.exists(workdir):
            logger.info(
                "Creating specified working directory {}.".format(workdir))
            os.makedirs(workdir)
        workdir = os.path.abspath(workdir)
        os.chdir(workdir)
    workflow = Workflow(snakefile=snakefile,
                        snakemakepath=snakemakepath,
                        jobscript=jobscript,
                        overwrite_shellcmd=overwrite_shellcmd,
                        overwrite_config=overwrite_config,
                        overwrite_workdir=workdir,
                        overwrite_configfile=configfile,
                        config_args=config_args,
                        debug=debug)

    if standalone:
        try:
            # set the process group
            os.setpgrp()
        except:
            # ignore: if it does not work we can still work without it
            pass

    success = True
    try:
        workflow.include(snakefile,
                         overwrite_first_rule=True,
                         print_compilation=print_compilation)
        workflow.check()

        if not print_compilation:
            if listrules:
                workflow.list_rules()
            elif list_target_rules:
                workflow.list_rules(only_targets=True)
            elif list_resources:
                workflow.list_resources()
            else:
                # if not printdag and not printrulegraph:
                # handle subworkflows
                subsnakemake = partial(snakemake,
                                       cores=cores,
                                       nodes=nodes,
                                       local_cores=local_cores,
                                       resources=resources,
                                       dryrun=dryrun,
                                       touch=touch,
                                       printreason=printreason,
                                       printshellcmds=printshellcmds,
                                       nocolor=nocolor,
                                       quiet=quiet,
                                       keepgoing=keepgoing,
                                       cluster=cluster,
                                       cluster_config=cluster_config,
                                       cluster_sync=cluster_sync,
                                       drmaa=drmaa,
                                       jobname=jobname,
                                       immediate_submit=immediate_submit,
                                       standalone=standalone,
                                       ignore_ambiguity=ignore_ambiguity,
                                       snakemakepath=snakemakepath,
                                       lock=lock,
                                       unlock=unlock,
                                       cleanup_metadata=cleanup_metadata,
                                       force_incomplete=force_incomplete,
                                       ignore_incomplete=ignore_incomplete,
                                       latency_wait=latency_wait,
                                       benchmark_repeats=benchmark_repeats,
                                       verbose=verbose,
                                       notemp=notemp,
                                       nodeps=nodeps,
                                       jobscript=jobscript,
                                       timestamp=timestamp,
                                       greediness=greediness,
                                       no_hooks=no_hooks,
                                       overwrite_shellcmd=overwrite_shellcmd,
                                       config=config,
                                       config_args=config_args,
                                       keep_logger=True)
                success = workflow.execute(
                    targets=targets,
                    dryrun=dryrun,
                    touch=touch,
                    cores=cores,
                    nodes=nodes,
                    local_cores=local_cores,
                    forcetargets=forcetargets,
                    forceall=forceall,
                    forcerun=forcerun,
                    prioritytargets=prioritytargets,
                    quiet=quiet,
                    keepgoing=keepgoing,
                    printshellcmds=printshellcmds,
                    printreason=printreason,
                    printrulegraph=printrulegraph,
                    printdag=printdag,
                    cluster=cluster,
                    cluster_config=cluster_config,
                    cluster_sync=cluster_sync,
                    jobname=jobname,
                    drmaa=drmaa,
                    printd3dag=printd3dag,
                    immediate_submit=immediate_submit,
                    ignore_ambiguity=ignore_ambiguity,
                    stats=stats,
                    force_incomplete=force_incomplete,
                    ignore_incomplete=ignore_incomplete,
                    list_version_changes=list_version_changes,
                    list_code_changes=list_code_changes,
                    list_input_changes=list_input_changes,
                    list_params_changes=list_params_changes,
                    summary=summary,
                    latency_wait=latency_wait,
                    benchmark_repeats=benchmark_repeats,
                    wait_for_files=wait_for_files,
                    detailed_summary=detailed_summary,
                    nolock=not lock,
                    unlock=unlock,
                    resources=resources,
                    notemp=notemp,
                    nodeps=nodeps,
                    keep_target_files=keep_target_files,
                    cleanup_metadata=cleanup_metadata,
                    subsnakemake=subsnakemake,
                    updated_files=updated_files,
                    allowed_rules=allowed_rules,
                    greediness=greediness,
                    no_hooks=no_hooks)

    except BrokenPipeError:
        # ignore this exception and stop. It occurs if snakemake output is piped into less and less quits before reading the whole output.
        # in such a case, snakemake shall stop scheduling and quit with error 1
        success = False
    except (Exception, BaseException) as ex:
        print_exception(ex, workflow.linemaps)
        success = False
    if workdir:
        os.chdir(olddir)
    if workflow.persistence:
        workflow.persistence.unlock()
    if not keep_logger:
        logger.cleanup()
    return success
Пример #13
0
    def _create_snakemake_dag(snakefile: str,
                              configfiles: Optional[List[str]] = None,
                              **kwargs: Any) -> DAG:
        """Create ``snakemake.dag.DAG`` instance.

        The code of this function comes from the Snakemake codebase and is adapted
        to fullfil REANA purposes of getting the needed metadata.

        :param snakefile: Path to Snakefile.
        :type snakefile: string
        :param configfiles: List of config files paths.
        :type configfiles: List
        :param kwargs: Snakemake args.
        :type kwargs: Any
        """
        overwrite_config = dict()
        if configfiles is None:
            configfiles = []
        for f in configfiles:
            # get values to override. Later configfiles override earlier ones.
            overwrite_config.update(load_configfile(f))
        # convert provided paths to absolute paths
        configfiles = list(map(os.path.abspath, configfiles))
        workflow = Workflow(
            snakefile=snakefile,
            overwrite_configfiles=configfiles,
            overwrite_config=overwrite_config,
        )

        workflow.include(snakefile=snakefile, overwrite_first_rule=True)
        workflow.check()

        # code copied and adapted from `snakemake.workflow.Workflow.execute()`
        # in order to build the DAG and calculate the job dependencies.
        # https://github.com/snakemake/snakemake/blob/75a544ba528b30b43b861abc0ad464db4d6ae16f/snakemake/workflow.py#L525
        def rules(items):
            return map(
                workflow._rules.__getitem__,
                filter(workflow.is_rule, items),
            )

        if kwargs.get("keep_target_files"):

            def files(items):
                return filterfalse(workflow.is_rule, items)

        else:

            def files(items):
                relpath = (lambda f: f if os.path.isabs(f) or f.startswith(
                    "root://") else os.path.relpath(f))
                return map(relpath, filterfalse(workflow.is_rule, items))

        if not kwargs.get("targets"):
            targets = ([workflow.first_rule]
                       if workflow.first_rule is not None else list())

        prioritytargets = kwargs.get("prioritytargets", [])
        forcerun = kwargs.get("forcerun", [])
        until = kwargs.get("until", [])
        omit_from = kwargs.get("omit_from", [])

        priorityrules = set(rules(prioritytargets))
        priorityfiles = set(files(prioritytargets))
        forcerules = set(rules(forcerun))
        forcefiles = set(files(forcerun))
        untilrules = set(rules(until))
        untilfiles = set(files(until))
        omitrules = set(rules(omit_from))
        omitfiles = set(files(omit_from))

        targetrules = set(
            chain(
                rules(targets),
                filterfalse(Rule.has_wildcards, priorityrules),
                filterfalse(Rule.has_wildcards, forcerules),
                filterfalse(Rule.has_wildcards, untilrules),
            ))
        targetfiles = set(
            chain(files(targets), priorityfiles, forcefiles, untilfiles))
        dag = DAG(
            workflow,
            workflow.rules,
            targetrules=targetrules,
            targetfiles=targetfiles,
            omitfiles=omitfiles,
            omitrules=omitrules,
        )

        workflow.persistence = Persistence(dag=dag)
        dag.init()
        dag.update_checkpoint_dependencies()
        dag.check_dynamic()
        return dag
#!/usr/bin/env python3

import sys, os
import unittest

from snakemake.workflow import Workflow
""" Can I unit test a Snakefile?
    Of course I can!
    Or, at least, I can test any functions defined at the top level.
    Importing the functions from the Snakefile requres parsing it with the
    Snakemake internals, and setting a couple of environment things.
"""
os.environ['TOOLBOX'] = 'dummy'
sf = os.path.join(os.path.dirname(__file__), '..', 'Snakefile.qc')
wf = Workflow(sf, overwrite_config=dict(runid='170221_K00166_0183_AHHT3HBBXX'))
wf.include(sf)

# I can now import top-level functions like so:
split_fq_name = wf.globals['split_fq_name']


class T(unittest.TestCase):
    def test_split_fq_name(self):
        """Test the function which claims to work as follows...
            Break out components from the name of a a FASTQ file.
                eg. 10749/10749DMpool03/170221_K00166_0183_AHHT3HBBXX_8_10749DM0001L01_1.fastq.gz
                eg. 170221_K00166_0183_AHHT3HBBXX_1_unassigned_1.fastq.gz
        """
        # eg. 1
        self.assertEqual(
            split_fq_name(
Пример #15
0
        n = 0
        with open(input.fastq) as f:
            for _ in f:
                n += 1
        with open(output.counts, 'w') as f:
            print(n / 4, file=f)

"""
from snakemake.workflow import Workflow, Rules
import snakemake.workflow
from snakemake import shell
from snakemake.logging import setup_logger

setup_logger()

workflow = Workflow(__file__)
snakemake.workflow.rules = Rules()
snakemake.workflow.config = dict()

### Output from snakemake --print-compilation follows (reformatted)

workflow.include("pipeline.conf")

shell.prefix("set -euo pipefail;")


@workflow.rule(name='all', lineno=6, snakefile='.../Snakefile')
@workflow.input("reads.counts")
@workflow.norun()
@workflow.run
def __all(input, output, params, wildcards, threads, resources, log, version):