Exemplo n.º 1
0
    def __init__(self, wk=None):
        super(VariantCallingPipeline, self).__init__(wk=wk)
        # Define the data
        data = sequana_data("Hm2_GTGAAA_L005_R1_001.fastq.gz")
        input_directory = os.path.dirname(data)
        self.input_pattern = input_directory + "/Hm*gz"
        self.pipeline = "variant_calling"

        # Define the project and config file
        subprocess.check_call([
            "sequana", "--pipeline", self.pipeline, "--input-pattern",
            '%s' % self.input_pattern, "--working-directory", self.wk,
            "--force"
        ])

        cmd = [
            "sequana", "--pipeline", self.pipeline, "--input-pattern",
            '%s' % self.input_pattern, "--working-directory", self.wk,
            "--force"
        ]

        if "TRAVIS_PYTHON_VERSION" in os.environ:
            cmd += ["--snakemake-jobs", "1"]

        subprocess.check_call(cmd)

        # Add reference in the config
        cfg = SequanaConfig(self.wk + "/config.yaml")
        # We added a TTTT in position 5881
        cfg._yaml_code['bwa_mem_ref']['reference'] = sequana_data("measles.fa")
        cfg.save(self.wk + '/config.yaml')
Exemplo n.º 2
0
    def __init__(self, wk=None):
        super(VariantCallingPipeline, self).__init__(wk=wk)
        # Define the data
        data = sequana_data("Hm2_GTGAAA_L005_R1_001.fastq.gz")
        input_directory = os.path.dirname(data)
        self.input_pattern = input_directory + "/Hm*gz"
        self.pipeline = "variant_calling"

        # Define the project and config file
        subprocess.check_call([
            "sequana", "--pipeline", self.pipeline,
            "--input-pattern", '%s' % self.input_pattern,
            "--working-directory", self.wk, "--force"
            ])


        cmd = ["sequana", "--pipeline", self.pipeline,
             "--input-pattern", '%s'% self.input_pattern,
             "--working-directory", self.wk, "--force"]

        if "TRAVIS_PYTHON_VERSION" in os.environ:
             cmd += ["--snakemake-jobs", "1"]

        subprocess.check_call(cmd)


        # Add reference in the config
        cfg = SequanaConfig(self.wk + "/config.yaml")
        # We added a TTTT in position 5881
        cfg._yaml_code['bwa_mem_ref']['reference'] = sequana_data("measles.fa")
        cfg.save(self.wk + '/config.yaml')
Exemplo n.º 3
0
def test_sequana_config():
    s = snaketools.Module("compressor")
    config = snaketools.SequanaConfig(s.config)

    assert config.config.get("compressor")["source"] == "fastq.gz"
    assert config.config.get("kraken:dummy") == None

    # --------------------------------- tests different constructors
    config = snaketools.SequanaConfig()
    config = snaketools.SequanaConfig({"test": 1})
    assert config.config.test == 1
    # with a dictionary
    config = snaketools.SequanaConfig(config.config)
    # with a sequanaConfig instance
    config = snaketools.SequanaConfig(config)
    # with a non-yaml file
    try:
        json = sequana_data('test_summary_fastq_stats.json')
        config = snaketools.SequanaConfig(json)
        assert False
    except:
        assert True
    try:
        config = snaketools.SequanaConfig("dummy_dummy")
        assert False
    except:
        assert True

    # Test an exception
    s = snaketools.Module("compressor")
    config = snaketools.SequanaConfig(s.config)
    config._recursive_update(config._yaml_code,
                             {"input_directory_dummy": "test"})

    #config.check_config_with_schema(s.schema_config)
    # loop over all pipelines, read the config, save it and check the content is
    # identical. This requires to remove the templates. We want to make sure the
    # empty strings are kept and that "no value" are kept as well
    #
    #    field1: ""
    #    field2:
    #
    # is unchanged
    from easydev import TempFile
    output = TempFile(suffix=".yaml")
    for pipeline in snaketools.pipeline_names:
        config_filename = Module(pipeline)._get_config()
        cfg1 = SequanaConfig(config_filename)
        cfg1.cleanup()  # remove templates and strip strings

        cfg1.save(output.name)
        cfg2 = SequanaConfig(output.name)
        assert cfg2._yaml_code == cfg1._yaml_code
        cfg2._update_config()
        assert cfg1.config == cfg2.config
    output.delete()
Exemplo n.º 4
0
    def __init__(self, wk=None):
        super(PacbioQCPipeline, self).__init__(wk=wk)
        # Define the data
        data = sequana_data("test_pacbio_subreads.bam")
        input_directory = os.path.dirname(data)
        self.input_pattern = input_directory + "/test_pacbio_subreads.bam"
        self.pipeline = "pacbio_qc"

        # Define the project and config file
        cmd = [
            "sequana", "--pipeline", self.pipeline, "--input-pattern",
            '%s' % self.input_pattern, "--extension", "bam",
            "--working-directory", self.wk, "--force"
        ]

        if "TRAVIS_PYTHON_VERSION" in os.environ:
            cmd += ["--snakemake-jobs", "1"]
        subprocess.check_call(cmd)

        cfg = SequanaConfig(self.wk + "/config.yaml")
        cfg._yaml_code["input_directory"] = ''
        cfg._yaml_code["input_readtag"] = "_R[12]_"
        cfg._yaml_code['input_extension'] = "bam"
        cfg._yaml_code['input_pattern'] = self.input_pattern
        cfg._yaml_code[
            'input_samples'] = "CommentedMap([('file1', None), ('file2', None)])"
        cfg.save(self.wk + '/config.yaml')

        self.output = self.wk + "/test_pacbio_subreads//summary_test_pacbio_subreads.json"
Exemplo n.º 5
0
def test_pipeline_manager_generic():
    cfg = SequanaConfig({})
    file1 = sequana_data("Hm2_GTGAAA_L005_R1_001.fastq.gz")
    cfg.config.input_directory, cfg.config.input_pattern = os.path.split(file1)
    cfg.config.input_pattern = "Hm*gz"
    pm = snaketools.PipelineManagerGeneric("quality_control", cfg)
    pm.getlogdir("fastqc")
    pm.getwkdir("fastqc")
    pm.getrawdata()
    pm.getreportdir("test")
    pm.getname("fastqc")
    gg = globals()
    gg['__snakefile__'] = "dummy"
    pm.setup(gg)
    del gg['__snakefile__']

    class WF():
        included_stack = ["dummy", 'dummy']

    wf = WF()
    gg['workflow'] = wf
    pm.setup(gg)
    pm.teardown()

    with tempfile.TemporaryDirectory() as dd:
        multiqc = open(dd + "/multiqc.html", "w")
        multiqc.write("test")
        multiqc.close()
        newfile = dd + "/multiqc.html_tmp_"
        pm.clean_multiqc(dd + "/multiqc.html")
Exemplo n.º 6
0
def test_sequana_config():
    s = snaketools.Module("quality_control")
    config = snaketools.SequanaConfig(s.config)

    assert config.config.get("kraken:dummy", "test") == "test"
    assert config.config.get("kraken:dummy") == None

    # --------------------------------- tests different constructors
    config = snaketools.SequanaConfig()
    config = snaketools.SequanaConfig({"test":1})
    assert config.config.test == 1
    # with a dictionary
    config = snaketools.SequanaConfig(config.config)
    # with a sequanaConfig instance
    config = snaketools.SequanaConfig(config)
    # with a non-yaml file
    try:
        json = sequana_data('test_summary_fastq_stats.json')
        config = snaketools.SequanaConfig(json)
        assert False
    except:
        assert True
    try:
        config = snaketools.SequanaConfig("dummy_dummy")
        assert False
    except:
        assert True

    # Test an exception
    s = snaketools.Module("quality_control")
    config = snaketools.SequanaConfig(s.config)
    config._recursive_update(config._yaml_code, {"input_directory_dummy": "test"})

    # loop over all pipelines, read the config, save it and check the content is
    # identical. This requires to remove the templates. We want to make sure the
    # empty strings are kept and that "no value" are kept as well
    #
    #    field1: ""
    #    field2:
    #
    # is unchanged
    from easydev import TempFile
    output = TempFile(suffix=".yaml")
    for pipeline in snaketools.pipeline_names:
        config_filename = Module(pipeline)._get_config()
        cfg1 = SequanaConfig(config_filename)
        cfg1.cleanup() # remove templates and strip strings

        cfg1.save(output.name)
        cfg2 = SequanaConfig(output.name)
        assert cfg2._yaml_code == cfg1._yaml_code
        cfg2._update_config()
        assert cfg1.config == cfg2.config
    output.delete()
Exemplo n.º 7
0
    def __init__(self, wk=None):
        super(DenovoPipeline, self).__init__(wk)
        data = sequana_data("Hm2_GTGAAA_L005_R1_001.fastq.gz")
        input_directory = os.path.dirname(data)
        self.input_pattern = input_directory + "/Hm*gz"
        self.pipeline = "denovo_assembly"

        #self.output = self.wk + "/Hm2_GTGAAA_L005/report_qc_Hm2_GTGAAA_L005/summary.json"
        subprocess.check_call([
            "sequana", "--pipeline", self.pipeline, "--input-pattern",
            '%s' % self.input_pattern, "--working-directory", self.wk,
            "--force"
        ])

        # Add reference in the config
        cfg = SequanaConfig(self.wk + "/config.yaml")
        # We added a TTTT in position 5881
        cfg._yaml_code['digital_normalisation']['max_memory_usage'] = 1e9
        cfg.save(self.wk + '/config.yaml')
Exemplo n.º 8
0
def test_pipeline_manager_generic():
    cfg = SequanaConfig({})
    file1 = sequana_data("Hm2_GTGAAA_L005_R1_001.fastq.gz")
    cfg.config.input_directory, cfg.config.input_pattern = os.path.split(file1)
    cfg.config.input_pattern = "Hm*gz"
    pm = snaketools.PipelineManagerGeneric("custom", cfg)
    pm.getlogdir("fastqc")
    pm.getwkdir("fastqc")
    pm.getrawdata()
    pm.getreportdir("test")
    pm.getname("fastqc")
Exemplo n.º 9
0
def test_pipeline_manager():

    # test missing input_directory
    cfg = SequanaConfig({})
    try:
        pm = snaketools.PipelineManager("custom", cfg)
        assert False
    except:
        assert True

    # normal behaviour but no input provided:
    config = Module("quality_control")._get_config()
    cfg = SequanaConfig(config)
    cfg.cleanup() # remove templates
    try:
        pm = snaketools.PipelineManager("custome", cfg)
        assert False
    except:
        assert True

    cfg = SequanaConfig(config)
    cfg.cleanup() # remove templates
    file1 = sequana_data("Hm2_GTGAAA_L005_R1_001.fastq.gz")
    file2 = sequana_data("Hm2_GTGAAA_L005_R2_001.fastq.gz")
    cfg.config.input_samples['file1'] = file1
    cfg.config.input_samples['file2'] = file2
    pm = snaketools.PipelineManager("custome", cfg)
    assert pm.paired == True

    cfg = SequanaConfig(config)
    cfg.cleanup() # remove templates
    file1 = sequana_data("Hm2_GTGAAA_L005_R1_001.fastq.gz")
    cfg.config.input_samples['file1'] = file1
    pm = snaketools.PipelineManager("custome", cfg)
    assert pm.paired == False

    pm.getlogdir("fastqc")
    pm.getwkdir("fastqc")
    pm.getrawdata()
    pm.getreportdir("test")
    pm.getname("fastqc")
Exemplo n.º 10
0
    def __init__(self, wk=None):
        super(RNASeqPipeline, self).__init__(wk)
        data = sequana_data("KO_ATCACG_R1_test.fastq.gz")
        input_directory = os.path.dirname(data)
        self.input_pattern = input_directory + "/KO_ATCACG_R1_test.fastq.gz"
        self.pipeline = "rnaseq"

        #self.output = self.wk + "/Hm2_GTGAAA_L005/report_qc_Hm2_GTGAAA_L005/summary.json"
        subprocess.check_call([
            "sequana", "--pipeline", self.pipeline,
            "--input-pattern", '%s'% self.input_pattern,
            "--working-directory", self.wk,
            "--adapter-fwd", 
            "GATCGGAAGAGCACACGTCTGAACTCCAGTCA", 
            "--adapter-rev", 
            "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATC",
            "--force"])

        # Need to edit the config file
        cfg = SequanaConfig(self.wk + "/config.yaml")
        cfg._yaml_code['genome']['genome_directory'] = "Saccer3"
        cfg._yaml_code['genome']['name'] = "Saccer3"
        cfg._yaml_code['genome']['fasta_file'] = "Saccer3/Saccer3.fa"
        cfg._yaml_code['genome']['fasta_file'] = "Saccer3/Saccer3.gff"
        cfg.save(self.wk + '/config.yaml')
Exemplo n.º 11
0
def test_standalone_generic_with_config(qtbot, tmpdir):
    # Standalone for generic case given a wkdir and snakefile

    wkdir = TemporaryDirectory()
    args = Namespace(wkdir=wkdir.name,
                     snakefile=sequana_data("test_generic.rules"),
                     configfile=sequana_data("test_generic.yaml"))
    widget = sequana_gui.SequanaGUI(ipython=False, user_options=args)
    qtbot.addWidget(widget)
    assert widget.mode == "generic"
    assert widget.generic_factory.is_runnable() == True
    widget.save_project()

    # read back.
    yaml = SequanaConfig(wkdir.name + "/test_generic.yaml").config
    assert yaml['test']["mylist"] == [1, 2, 3]
Exemplo n.º 12
0
    def __init__(self, wk=None):
        super(RNASeqPipeline, self).__init__(wk)
        data = sequana_data("KO_ATCACG_R1_test.fastq.gz")
        input_directory = os.path.dirname(data)
        self.input_pattern = input_directory + "/KO_ATCACG_R1_test.fastq.gz"
        self.pipeline = "rnaseq"

        #self.output = self.wk + "/Hm2_GTGAAA_L005/report_qc_Hm2_GTGAAA_L005/summary.json"
        subprocess.check_call([
            "sequana", "--pipeline", self.pipeline, "--input-pattern",
            '%s' % self.input_pattern, "--working-directory", self.wk,
            "--adapter-fwd", "GATCGGAAGAGCACACGTCTGAACTCCAGTCA",
            "--adapter-rev", "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATC", "--force"
        ])

        # Need to edit the config file
        cfg = SequanaConfig(self.wk + "/config.yaml")
        cfg._yaml_code['genome']['genome_directory'] = "Saccer3"
        cfg._yaml_code['genome']['name'] = "Saccer3"
        cfg._yaml_code['genome']['fasta_file'] = "Saccer3/Saccer3.fa"
        cfg._yaml_code['genome']['fasta_file'] = "Saccer3/Saccer3.gff"
        cfg.save(self.wk + '/config.yaml')
Exemplo n.º 13
0
def sequana_init(options):
    import sequana
    from sequana.misc import textwrap
    from sequana import SequanaConfig, sequana_data
    sa = Tools(verbose=options.verbose)

    # Check that the pipeline is well defined
    module = Module(options.pipeline)

    if os.path.exists(options.target_dir):
        txt = "Will override the following files if present: %s.rules " +\
              "config.yaml, runme.sh, ..."
        sa.blue(txt % options.pipeline)

        if options.force is True:
            choice = "y"
        else:
            choice = input(
                red("Do you want to proceed (to avoid this " +
                    " message, use --force)? [y]/n:"))
        if choice == "n":
            sys.exit(0)

    # Copying snakefile
    logger.info("Copying snakefile")
    sa.mkdir(options.target_dir)
    shutil.copy(module.snakefile,
                options.target_dir + os.sep + options.pipeline + ".rules")

    # Creating README to print on the screen and in a file
    txt = "User command::\n\n"
    txt += "    %s \n\n" % " ".join(sys.argv)
    txt += "You can now run snakemake yourself or type::"
    txt += purple("""

    snakemake -s %s.rules --stats stats.txt -p -j 4

    """ % options.pipeline)
    txt += """
    # -j 4 means you will use 4 cores
    # -p prints the commands used
    # --stats stats.txt must be used since stats.txt is expected to be found.

    or just run the bash script::

        sh runme.sh

    EDIT THE config.yaml if needed

    Once finished with success, the report/ directory contains a summary.html
    and relevant files (depends on the pipeline).
    """
    logger.info("Creating README")
    with open(options.target_dir + os.sep + "README", "w") as fh:
        fh.write(txt.replace("\x1b[35m", "").replace("\x1b[39;49;00m", ""))

    # Creating Config file
    logger.info("Creating the config file")

    # Create (if needed) and update the config file
    config_filename = options.target_dir + os.sep + "config.yaml"

    if options.config:
        # full existing path
        if os.path.exists(options.config):
            shutil.copy(options.config, config_filename)
        else:  # or a sequana config file in the module path ?
            raise (IOError("Config file %s not found locally" %
                           options.config))
    else:
        copy_config_from_sequana(module, "config.yaml", config_filename)

    # Copy multiqc if it is available
    multiqc_filename = options.target_dir + os.sep + "multiqc_config.yaml"
    copy_config_from_sequana(module, "multiqc_config.yaml", multiqc_filename)
    cluster_cfg_filename = options.target_dir + os.sep + "cluster_config.json"
    copy_config_from_sequana(module, "cluster_config.json",
                             cluster_cfg_filename)

    # The input
    cfg = SequanaConfig(config_filename)
    cfg.config.input_directory = options.input_directory
    cfg.config.input_pattern = options.pattern
    cfg.config.input_extension = options.extension
    cfg.config.input_samples.file1 = options.file1
    cfg.config.input_samples.file2 = options.file2
    cfg.config.input_readtag = options.input_readtag

    # Dedicated section for quality control section
    if options.pipeline == "quality_control":
        if options.design:
            shutil.copy(options.design, options.target_dir + os.sep)
            cfg.config['cutadapt'].design_file = os.path.basename(
                options.design)

        if options.kraken:
            cfg.config.kraken.database_directory = os.path.abspath(
                options.kraken)
            cfg.config.kraken.do = True
        else:
            cfg.config.kraken.do = False

        cfg.config['cutadapt'].fwd = options.adapter_fwd
        cfg.config['cutadapt'].rev = options.adapter_rev
        cfg.config['cutadapt'].adapter_type = options.adapters
        # Foir all pipeline using BWA
        if options.reference:
            cfg.config.bwa_mem.reference = os.path.abspath(options.reference)
    if options.pipeline == "variant_calling":
        if options.reference:
            cfg.config.bwa_mem_ref.reference = os.path.abspath(
                options.reference)

    if options.pipeline in ["rnaseq", "smallrnaseq"]:
        if options.design:
            shutil.copy(options.design, options.target_dir + os.sep)
            cfg.config['cutadapt'].design_file = os.path.basename(
                options.design)
        cfg.config['cutadapt'].fwd = options.adapter_fwd
        cfg.config['cutadapt'].rev = options.adapter_rev
        cfg.config['cutadapt'].adapter_choice = options.adapters

    cfg.copy_requirements(target=options.target_dir)

    # FIXME If invalid, no error raised
    if options.config_params:
        params = [this.strip() for this in options.config_params.split(",")]
        for param in params:
            if param.count(":") not in [1, 2, 3]:
                txt = "incorrect format following --config-params"
                txt += "Expected at least one : sign or at most 2 of them"
                txt += "Config file section such as :\n"
                txt += "project: tutorial\n"
                txt += "should be encoded project:tutorial"
                raise ValueError(txt)
            if param.count(":") == 1:
                k, v = param.split(':')
                cfg.config[k] = v
            elif param.count(":") == 2:
                k1, k2, v = param.split(":")
                cfg.config[k1][k2] = v
            elif param.count(":") == 3:
                k1, k2, k3, v = param.split(":")
                cfg.config[k1][k2][k3] = v

    # important to update yaml with content of config
    cfg._update_yaml()
    cfg.save(config_filename)

    # Creating a unique runme.sh file
    runme_filename = options.target_dir + os.sep + "runme.sh"
    with open(runme_filename, "w") as fout:
        cmd = "#!/bin/sh\n"
        cmd += "# generated with sequana version %s with this command:\n" % sequana.version
        cmd += "# %s\n" % " ".join(sys.argv)
        cmd += "snakemake -s %(project)s.rules --stats stats.txt -p -j %(jobs)s --nolock"
        if options.forceall:
            cmd += " --forceall "

        if options.cluster:
            # Do we want to include the cluster config option ?
            cluster_config = Module(options.pipeline).cluster_config
            if options.ignore_cluster_config is True:
                cluster_config = None

            if cluster_config is None:
                cmd += ' --cluster "%s"' % options.cluster
            else:
                cmd += ' --cluster "%s"  --cluster-config %s' %\
                    (options.cluster, os.path.basename(cluster_config))

        if options.redirection:
            cmd += " 1>run.out 2>run.err"
        fout.write(
            cmd % {
                'project': options.pipeline,
                'jobs': options.jobs,
                "version": sequana.version
            })
    # change permission of runme.sh to 755
    st = os.stat(runme_filename)
    os.chmod(runme_filename, st.st_mode | 0o755)

    sa.green("Initialisation of %s succeeded" % options.target_dir)
    sa.green("Please, go to the project directory ")
    sa.purple("\n   cd %s\n" % options.target_dir)
    sa.green("Check out the README and config.yaml files")
    sa.green("A basic script to run the analysis is named runme.sh ")
    sa.purple("\n    sh runme.sh\n")
    sa.purple("On a slurm cluster, you may type:")
    sa.purple("\n  srun --qos normal runme.sh\n")
    sa.green(
        "In case of trouble, please post an issue on https://github.com/sequana/sequana/issue "
    )
    sa.green(
        "or type sequana --issue and fill a post with the error and the config file (NO DATA PLEASE)"
    )

    # Change permission
    try:  #python 3
        os.chmod(runme_filename, 0o755)
    except:
        logger.info(
            "Please use Python3. Change the mode of %s manually to 755" %
            runme_filename)
Exemplo n.º 14
0
def test_pipeline_manager():

    # test missing input_directory
    cfg = SequanaConfig({})
    try:
        pm = snaketools.PipelineManager("custom", cfg)
        assert False
    except:
        assert True

    # normal behaviour but no input provided:
    config = Module("quality_control")._get_config()
    cfg = SequanaConfig(config)
    cfg.cleanup()  # remove templates
    try:
        pm = snaketools.PipelineManager("custome", cfg)
        assert False
    except:
        assert True

    cfg = SequanaConfig(config)
    cfg.cleanup()  # remove templates
    file1 = sequana_data("Hm2_GTGAAA_L005_R1_001.fastq.gz")
    file2 = sequana_data("Hm2_GTGAAA_L005_R2_001.fastq.gz")
    cfg.config.input_samples['file1'] = file1
    cfg.config.input_samples['file2'] = file2
    pm = snaketools.PipelineManager("custome", cfg)
    assert pm.paired == True

    cfg = SequanaConfig(config)
    cfg.cleanup()  # remove templates
    file1 = sequana_data("Hm2_GTGAAA_L005_R1_001.fastq.gz")
    cfg.config.input_samples['file1'] = file1
    pm = snaketools.PipelineManager("custome", cfg)
    assert pm.paired == False

    pm.getlogdir("fastqc")
    pm.getwkdir("fastqc")
    pm.getrawdata()
    pm.getreportdir("test")
    pm.getname("fastqc")
Exemplo n.º 15
0
def main(args=None):

    user_options = Options(prog="sequana")

    if args is None:
        args = sys.argv

    # If --help or no options provided, show the help
    if len(args) == 1:
        user_options.parse_args(["prog", "--help"])
    else:
       options = user_options.parse_args(args[1:])

    if options.version:
        import sequana
        print(sequana.version)
        sys.exit()

    if options.jobs > 20 and options.bypass is False:
        raise ValueError('The number of jobs is limited to 20. You can ' +
            'force this limit by using --bypass-job-limit')

    if misc.on_cluster("tars-") and options.unlock is False:
        if options.cluster is None:
            raise ValueError("You are on TARS (Institut Pasteur). You " +
                " must use --cluster option to provide the scheduler " +
                " options (typically ' --cluster 'sbatch --qos normal' )")

    # valid codecs:
    valid_extensions = [("fastq." + ext2).rstrip(".")
                        for ext2 in ['', 'bz2', 'gz', 'dsrc']]

    valid_extensions += [("fq." + ext2).rstrip(".")
                        for ext2 in ['', 'bz2', 'gz', 'dsrc']]

    valid_combos = [(x, y) for x in valid_extensions
                           for y in valid_extensions
                           if x!=y]

    if (options.source, options.target) not in valid_combos:
        raise ValueError("""--target and --source combo not valid.
Must be one of fastq, fastq.gz, fastq.bz2 or fastq.dsrc""")

    # Create the config file locally
    module = Module("compressor")

    with TempFile(suffix=".yaml", dir=".") as temp:
        cfg = SequanaConfig(module.config)
        cfg.config.compressor.source = options.source
        cfg.config.compressor.target = options.target
        cfg.config.compressor.recursive = options.recursive
        cfg.config.compressor.verbose = options.verbose
        cfg.config.compressor.threads = options.threads
        cfg._update_yaml()
        cfg.save(filename=temp.name)

        # The Snakefile can stay in its original place:
        rule = module.path + os.sep +  "compressor.rules"

        # Run the snakemake command itself.
        cmd = 'snakemake -s %s  --configfile %s -j %s ' % \
                (rule, temp.name, options.jobs)

        if options.dryrun:
            cmd += " --dryrun "

        if options.verbose is False:
            cmd += " --quiet "
        else:
            cmd += " -p "

        # for slurm only: --cores-per-socket
        if options.cluster:
            cluster = ' --cluster "%s" ' % options.cluster
            cmd += cluster

        if options.snakemake:
            if " -s " in options.snakemake or " -j " in options.snakemake:
                raise ValueError("-s or -j cannot be used in " +
                    " --snakemake-options    (already used internally")
            cmd += options.snakemake

        if options.unlock:
            cmd += " --unlock "

        if options.verbose:
            print(cmd)

        # On travis, snakemake.shell command from snakemake fails.
        # Most probably because travis itself uses a subprocess.
        # excute from easydev uses pexpect.spawn, which seems to work well
        from easydev import execute
        execute(cmd, showcmd=False)
Exemplo n.º 16
0
def test_check_config_with_schema():
    schema = Module("compressor").schema_config
    SequanaConfig(Module("compressor").config).check_config_with_schema(schema) 
Exemplo n.º 17
0
def test_pipeline_manager():

    # test missing input_directory
    cfg = SequanaConfig({})
    try:
        pm = snaketools.PipelineManager("custom", cfg)
        assert False
    except:
        assert True

    # normal behaviour but no input provided:
    config = Module("compressor")._get_config()
    cfg = SequanaConfig(config)
    cfg.cleanup() # remove templates
    try:
        pm = snaketools.PipelineManager("custome", cfg)
        assert False
    except:
        assert True

    cfg = SequanaConfig(config)
    cfg.cleanup() # remove templates
    file1 = sequana_data("Hm2_GTGAAA_L005_R1_001.fastq.gz")
    cfg.config.input_directory, cfg.config.input_pattern = os.path.split(file1)
    #file2 = sequana_data("Hm2_GTGAAA_L005_R2_001.fastq.gz")
    pm = snaketools.PipelineManager("custom", cfg)
    assert pm.paired == False

    cfg = SequanaConfig(config)
    cfg.cleanup() # remove templates
    cfg.config.input_directory, cfg.config.input_pattern = os.path.split(file1)
    cfg.config.input_pattern = "Hm*gz"
    #file1 = sequana_data("Hm2_GTGAAA_L005_R1_001.fastq.gz")
    pm = snaketools.PipelineManager("custom", cfg)
    pm.plot_stats()
    assert pm.paired == True

    pm.getlogdir("fastqc")
    pm.getwkdir("fastqc")
    pm.getrawdata()
    pm.getreportdir("test")
    pm.getname("fastqc")

    # Test different configuration of input_directory, input_readtag,
    # input_pattern
    # Test the _R[12]_ paired
    with tempfile.TemporaryDirectory() as tmpdir:
        cfg = SequanaConfig()
        cfgname = tmpdir + "/config.yaml"
        cfg.config.input_pattern = "*fastq.gz"
        cfg.config.input_directory = tmpdir
        cfg.config.input_readtag = "_R[12]_"
        cfg._update_yaml()
        cfg.save(cfgname)
        cmd = "touch {}/test_R1_.fastq.gz".format(tmpdir)
        subprocess.call(cmd.split())
        cmd = "touch {}/test_R2_.fastq.gz".format(tmpdir)
        subprocess.call(cmd.split())
        pm = snaketools.PipelineManager("test", cfgname)
        assert pm.paired == True

    # Test the _[12]_ paired 
    with tempfile.TemporaryDirectory() as tmpdir:
        cfg = SequanaConfig()
        cfgname = tmpdir + "/config.yaml"
        cfg.config.input_pattern = "*fastq.gz"
        cfg.config.input_directory = tmpdir
        cfg.config.input_readtag = "_[12]."
        cfg._update_yaml()
        cfg.save(cfgname)
        cmd = "touch {}/test_1.fastq.gz".format(tmpdir)
        subprocess.call(cmd.split())
        cmd = "touch {}/test_2.fastq.gz".format(tmpdir)
        subprocess.call(cmd.split())
        pm = snaketools.PipelineManager("test", cfgname)
        assert pm.paired is True

    # Test the _R[12]_ single end
    with tempfile.TemporaryDirectory() as tmpdir:
        cfg = SequanaConfig()
        cfgname = tmpdir + "/config.yaml"
        cfg.config.input_pattern = "*fastq.gz"
        cfg.config.input_directory = tmpdir
        cfg.config.input_readtag = "_R[12]_"
        cfg._update_yaml()
        cfg.save(cfgname)
        cmd = "touch {}/test_R1_.fastq.gz".format(tmpdir)
        subprocess.call(cmd.split())
        pm = snaketools.PipelineManager("test", cfgname)
        assert pm.paired is False

    # Test the _R[12]_ single end
    with tempfile.TemporaryDirectory() as tmpdir:
        cfg = SequanaConfig()
        cfgname = tmpdir + "/config.yaml"
        cfg.config.input_pattern = "*fq.gz" # wrong on purpose
        cfg.config.input_directory = tmpdir
        cfg.config.input_readtag = "_R[12]_"
        cfg._update_yaml()
        cfg.save(cfgname)
        cmd = "touch {}/test_R1_.fastq.gz".format(tmpdir)
        subprocess.call(cmd.split())
        try:
            pm = snaketools.PipelineManager("test", cfgname)
            assert False
        except:
            assert True

    # Test the _R[12]_ single end
    with tempfile.TemporaryDirectory() as tmpdir:
        cfg = SequanaConfig()
        cfgname = tmpdir + "/config.yaml"
        cfg.config.input_pattern = "*fastq.gz" 
        cfg.config.input_directory = tmpdir
        cfg.config.input_readtag = "R[12]_"
        cfg._update_yaml()
        cfg.save(cfgname)
        cmd = "touch {}/testR1_.fastq.gz".format(tmpdir)
        subprocess.call(cmd.split())
        cmd = "touch {}/testR2_.fastq.gz".format(tmpdir)
        subprocess.call(cmd.split())
        try:
            pm = snaketools.PipelineManager("test", cfgname)
            assert False
        except:
            assert True