Exemplo n.º 1
0
    def __init__(self, prog=NAME, epilog=None):
        usage = col.purple(sequana_prolog.format(**{"name": NAME}))
        super(Options, self).__init__(
            usage=usage,
            prog=prog,
            description="",
            epilog=epilog,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)

        # add a new group of options to the parser
        so = SlurmOptions()
        so.add_options(self)

        # add a snakemake group of options to the parser
        so = SnakemakeOptions(working_directory=NAME)
        so.add_options(self)

        so = InputOptions()
        so.add_options(self)

        so = GeneralOptions()
        so.add_options(self)

        pipeline_group = self.add_argument_group("pipeline")

        pipeline_group.add_argument("--TODO", dest="TODO", default=4, type=int)
Exemplo n.º 2
0
    def __init__(self, prog=NAME, epilog=None):
        usage = col.purple(sequana_prolog.format(**{"name": NAME}))
        super(Options, self).__init__(
            usage=usage,
            prog=prog,
            description="",
            epilog=epilog,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)

        # add a new group of options to the parser
        so = SlurmOptions()
        so.add_options(self)

        # add a snakemake group of options to the parser
        so = SnakemakeOptions(working_directory=NAME)
        so.add_options(self)

        #so = InputOptions()
        #so.add_options(self)

        so = GeneralOptions()
        so.add_options(self)

        pipeline_group = self.add_argument_group("pipeline")

        pipeline_group.add_argument("--flowcell-paths",
                                    dest="flowcell_paths",
                                    required=True,
                                    nargs="+",
                                    default=[])
        pipeline_group.add_argument(
            "--input-pattern",
            dest="input_pattern",
            default="*fastq.gz",
            help="""pattern for the input FastQ files (default: *fastq.gz)""")
Exemplo n.º 3
0
    def __init__(self, prog=NAME, epilog=None):
        usage = col.purple(sequana_prolog.format(**{"name": NAME}))
        super(Options, self).__init__(
            usage=usage,
            prog=prog,
            description="",
            epilog=epilog,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
        # add a new group of options to the parser
        so = SlurmOptions()
        so.add_options(self)

        # add a snakemake group of options to the parser
        so = SnakemakeOptions(working_directory=NAME)
        so.add_options(self)

        so = InputOptions(input_pattern="*.bed")
        so.add_options(self)

        so = GeneralOptions()
        so.add_options(self)

        pipeline_group = self.add_argument_group("pipeline")

        pipeline_group.add_argument("-o", "--circular", action="store_true")
        pipeline_group.add_argument("--double-threshold", default=0.5)
        pipeline_group.add_argument(
            "--genbank",
            default=None,
            help="the genbank to annotate the events found")
        pipeline_group.add_argument(
            "--reference",
            default=None,
            help="the genome reference used to plot GC content")
        pipeline_group.add_argument("--high-threshold", default=4)
        pipeline_group.add_argument("--low-threshold", default=-4)
        pipeline_group.add_argument(
            "--mixture-models",
            default=2,
            type=int,
            help="""Number of models to use in the mixture model. (default 2).
                 No need to change this value. Possibly, you may want to set 
                 to 1 or 3 in some rate occasions. """)
        pipeline_group.add_argument(
            "--window",
            default=20000,
            type=int,
            help="""Length of the running median window. Keep to 20000 as much as
            possible. This allows the detection of CNV up to 10kb. If longer
            event are present, increase this window size.""")
        pipeline_group.add_argument("--chunksize", default=5000000, type=int)
        pipeline_group.add_argument("--binning", default=-1, type=int)
        pipeline_group.add_argument("--cnv-clustering", default=-1)
Exemplo n.º 4
0
    def __init__(self, prog=NAME, epilog=None):
        usage = col.purple(sequana_prolog.format(**{"name": NAME}))
        super(Options, self).__init__(
            usage=usage,
            prog=prog,
            description="",
            epilog=epilog,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
        # add a new group of options to the parser
        so = SlurmOptions()
        so.add_options(self)

        # add a snakemake group of options to the parser
        so = SnakemakeOptions(working_directory=NAME)
        so.add_options(self)

        so = InputOptions()
        so.add_options(self)

        so = GeneralOptions()
        so.add_options(self)

        pipeline_group = self.add_argument_group("pipeline")
        pipeline_group.add_argument("--mapper",
                                    default='bwa',
                                    choices=['bwa', 'minimap2', 'bowtie2'],
                                    help="Choose one of the valid mapper")
        pipeline_group.add_argument(
            "--reference-file",
            required=True,
            help="You input reference file in fasta format")
        pipeline_group.add_argument(
            "--annotation-file",
            help="Used by the sequana_coverage tool if provided")

        pipeline_group.add_argument("--do-coverage",
                                    action="store_true",
                                    help="Use sequana_coverage (prokaryotes)")

        pipeline_group.add_argument(
            "--pacbio",
            action="store_true",
            help=
            "If set, automatically set the input-readtag to None and set minimap2 options to -x map-pb"
        )

        pipeline_group.add_argument(
            "--create-bigwig",
            action="store_true",
            help="create the bigwig files from the BAM files")
Exemplo n.º 5
0
    def __init__(self, prog=NAME, epilog=None):
        usage = col.purple(sequana_prolog.format(**{"name": NAME}))
        super(Options, self).__init__(
            usage=usage,
            prog=prog,
            description="",
            epilog=epilog,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
        # add a new group of options to the parser
        so = SlurmOptions()
        so.add_options(self)

        # add a snakemake group of options to the parser
        so = SnakemakeOptions(working_directory=NAME)
        so.add_options(self)

        so = InputOptions(add_input_readtag=False)
        so.add_options(self)

        so = GeneralOptions()
        so.add_options(self)

        pipeline_group = self.add_argument_group("sequana_fastqc")
        pipeline_group.add_argument(
            "--method",
            dest="method",
            default="fastqc",
            choices=['fastqc', 'falco'],
            help="""Software to be used to perform QC of input data set,
                Standard tool is fastqc (default), but one can use falco, which is 3-4 faster
                and produces same plots""")
        pipeline_group.add_argument(
            "--skip-multiqc",
            default=False,
            action="store_true",
            help="""It may happen that multiqc requires lots of memory. For local
run, you may want to swithc multiqc off with this option""")
        #pipeline_group.add_argument("--data-type", dest="data_type",
        #    default="illumina", choices=['illumina', 'nanopore', 'pacbio', 'mgi', 'others'],
        #    help="""nanopore, others and pacbio are not paired. The --input-readtag then be ignored""")

        self.add_argument("--run",
                          default=False,
                          action="store_true",
                          help="execute the pipeline directly")
Exemplo n.º 6
0
    def __init__(self, prog=NAME, epilog=None):
        usage = col.purple(sequana_prolog.format(**{"name": NAME}))
        super(Options, self).__init__(
            usage=usage,
            prog=prog,
            description="",
            epilog=epilog,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)

        # add a new group of options to the parser
        # demultiplex requires lots of memory sometimes hence the 64G options
        #
        so = SlurmOptions(queue="biomicspole", memory="64000", cores=16)
        so.add_options(self)

        # add a snakemake group of options to the parser
        so = SnakemakeOptions(working_directory="fastq")
        so.add_options(self)

        so = GeneralOptions()
        so.add_options(self)

        pipeline_group = self.add_argument_group("pipeline")

        pipeline_group.add_argument(
            "--threads",
            dest="threads",
            default=4,
            type=int,
            help="Number of threads to use during the demultiplexing. ")
        pipeline_group.add_argument("--barcode-mismatch",
                                    dest="mismatch",
                                    default=0,
                                    type=int)
        pipeline_group.add_argument(
            "--merging-strategy",
            required=True,
            dest="merging_strategy",
            choices=["merge", "none", "none_and_force"],
            help=
            """Merge Lanes or not. options are : merge, none, none_and_force.
            The 'merge' choice merges all lanes. The 'none' choice do NOT merge the lanes.
            For NextSeq runs, we should merge the lanes; if users demultiplex NextSeq
            and set this option to none, an error is raised. If you still want to
            skip the merging step, then set this option to 'none_and_force'""")
        pipeline_group.add_argument(
            "--bcl-directory",
            dest="bcl_directory",
            required=True,
            help="""Directory towards the raw BCL files. This directory should
            contains files such as RunParameters.xml, RunInfo.xml """)
        pipeline_group.add_argument("--sample-sheet",
                                    dest="samplesheet",
                                    required=True,
                                    default="SampleSheet.csv",
                                    help="Sample sheet filename to be used")
        pipeline_group.add_argument(
            "--no-ignore-missing-bcls",
            dest="no_ignore_missing_bcls",
            action="store_true",
            default=False,
            help="""In bcl2fastq, the option --ignore-missing-bcls implies that
we assume 'N'/'#' for missing calls. In Sequana_demultiplex, we use that option
by default. If you do not want that behviour, but the one from bcl2fastq, use
this flag(--no-ignore-missing-bcls)""")
        pipeline_group.add_argument(
            "--bgzf-compression",
            dest="bgzf_compression",
            action="store_true",
            default=False,
            help="""turn on BGZF compression for FASTQ files. By default,
bcl2fastq uses this option; By default we don't. Set --bgzl--compression flag to
set it back""")
        self.add_argument(
            "--mars-seq",
            default=False,
            action="store_true",
            help=
            """Set options to  --minimum-trimmed-read-length 15 --mask-short-adapter-reads 15 
and do not merge lanes""")
        self.add_argument("--run",
                          default=False,
                          action="store_true",
                          help="execute the pipeline directly")
Exemplo n.º 7
0
    def __init__(self, prog=NAME, epilog=None):
        usage = col.purple(sequana_prolog.format(**{"name": NAME}))
        super(Options, self).__init__(
            usage=usage,
            prog=prog,
            description="",
            epilog=epilog,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)
        # add a new group of options to the parser
        so = SlurmOptions()
        so.add_options(self)

        # add a snakemake group of options to the parser
        so = SnakemakeOptions(working_directory=NAME)
        so.add_options(self)

        so = InputOptions()
        so.add_options(self)

        so = GeneralOptions()
        so.add_options(self)

        pipeline_group = self.add_argument_group("pipeline_general")
        pipeline_group.add_argument("--genome-directory",
                                    dest="genome_directory",
                                    default=".",
                                    required=True)
        pipeline_group.add_argument(
            "--aligner",
            dest="aligner",
            required=True,
            choices=['bowtie2', 'bowtie1', 'star', "salmon"],
            help="a mapper in bowtie, bowtie2, star")
        pipeline_group.add_argument(
            "--force-indexing",
            action="store_true",
            default=False,
            help="""If indexing files exists already, but you wish to
                create them again, use this option. Note that you will need
                permissions for that""")
        pipeline_group.add_argument(
            "--rRNA-feature",
            default="rRNA",
            help="""Feature name corresponding to the rRNA to be identified in
the input GFF/GTF files""")
        pipeline_group.add_argument(
            "--contaminant-file",
            default=None,
            help="""A fasta file. If used, the rRNA-feature is not used 
This option is useful if you have a dedicated list of rRNA feature or a dedicated 
fasta file to search for contaminants""")

        # cutadapt related
        so = CutadaptOptions()
        so.add_options(self)

        pipeline_group.add_argument(
            "--skip-gff-check",
            action="store_true",
            default=False,
            help="""By default we check the coherence between the input
GFF file and related options (e.g. --feature_counts_feature_type and 
--feature_counts_attribute options). This may take time e.g. for mouse or human.
Using this option skips the sanity checks""")

        # feature counts related
        so = FeatureCountsOptions()
        so.add_options(self)

        # others
        self.add_argument("--run",
                          default=False,
                          action="store_true",
                          help="execute the pipeline directly")

        pipeline_group = self.add_argument_group("pipeline_others")
        pipeline_group.add_argument(
            '--do-igvtools',
            action="store_true",
            help="""if set, this will compute TDF files that can be imported in
IGV browser. TDF file allows to quickly visualise the coverage of the mapped
reads.""")
        pipeline_group.add_argument(
            '--do-bam-coverage',
            action="store_true",
            help="Similar to --do-igvtools using bigwig")
        pipeline_group.add_argument(
            '--do-mark-duplicates',
            action="store_true",
            help="""Mark duplicates. To be used e.g. with QCs""")

        pipeline_group = self.add_argument_group("pipeline_RNAseQC")
        pipeline_group.add_argument('--do-rnaseqc',
                                    action="store_true",
                                    help="do RNA-seq QC using RNAseQC v2")
        pipeline_group.add_argument(
            '--rnaseqc-gtf-file',
            help="""The GTF file to be used for RNAseQC. Without a valid GTF,
            RNAseqQC will not work. Again, yu may try sequana.gff3 module to build the gtf"""
        )

        # RNADIFF
        pipeline_group = self.add_argument_group("section_rnadiff")
        pipeline_group.add_argument(
            '--rnadiff-mode',
            type=str,
            required=False,
            choices=["one_factor", "GLM"],
            default="one_factor",
            help=
            """Fix the type of analyis (one_factor or GLM). By default uses one_factor"""
        )
Exemplo n.º 8
0
    def __init__(self, prog=NAME, epilog=None):
        usage = col.purple(sequana_prolog.format(**{"name": NAME}))
        super(Options, self).__init__(
            usage=usage,
            prog=prog,
            description="",
            epilog=epilog,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        )
        # add a new group of options to the parser
        so = SlurmOptions()
        so.add_options(self)

        # add a snakemake group of options to the parser
        so = SnakemakeOptions(working_directory=NAME)
        so.add_options(self)

        so = InputOptions()
        so.add_options(self)

        so = GeneralOptions()
        so.add_options(self)

        pipeline_group = self.add_argument_group("pipeline_general")
        pipeline_group.add_argument("--genome-directory",
                                    dest="genome_directory",
                                    default=".",
                                    required=True)
        pipeline_group.add_argument(
            "--aligner",
            dest="aligner",
            required=True,
            choices=["bowtie2", "bowtie1", "star", "salmon"],
            help="a mapper in bowtie, bowtie2, star",
        )
        pipeline_group.add_argument(
            "--rRNA-feature",
            default="rRNA",
            help="""Feature name corresponding to the rRNA to be identified in
the input GFF/GTF files""",
        )
        pipeline_group.add_argument(
            "--contaminant-file",
            default=None,
            help="""A fasta file. If used, the rRNA-feature is not used 
This option is useful if you have a dedicated list of rRNA feature or a dedicated 
fasta file to search for contaminants""",
        )

        # cutadapt related
        so = TrimmingOptions()
        so.add_options(self)

        pipeline_group.add_argument(
            "--skip-gff-check",
            action="store_true",
            default=False,
            help="""By default we check the coherence between the input
GFF file and related options (e.g. --feature_counts_feature_type and 
--feature_counts_attribute options). This may take time e.g. for mouse or human.
Using this option skips the sanity checks""",
        )

        # feature counts related
        so = FeatureCountsOptions()
        so.add_options(self)

        # others
        self.add_argument(
            "--run",
            default=False,
            action="store_true",
            help="execute the pipeline directly",
        )

        pipeline_group = self.add_argument_group("pipeline_others")
        pipeline_group.add_argument(
            "--do-igvtools",
            action="store_true",
            help="""if set, this will compute TDF files that can be imported in
IGV browser. TDF file allows to quickly visualise the coverage of the mapped
reads.""",
        )
        pipeline_group.add_argument(
            "--do-bam-coverage",
            action="store_true",
            help="Similar to --do-igvtools using bigwig",
        )
        pipeline_group.add_argument(
            "--do-mark-duplicates",
            action="store_true",
            help="""Mark duplicates. To be used e.g. with QCs""",
        )

        pipeline_group.add_argument("--do-rnaseqc",
                                    action="store_true",
                                    help="do RNA-seq QC using RNAseQC v2")
        pipeline_group.add_argument(
            "--rnaseqc-gtf-file",
            help="""The GTF file to be used for RNAseQC. Without a valid GTF,
            RNAseqQC will not work. You may try sequana.gff3 module to build the gtf from the GFF file""",
        )
        pipeline_group.add_argument(
            "--do-rseqc",
            action="store_true",
            help="""do RNA-seq QC using RseQC. This will need a BED file
corresponding to your GFF file. For prokaryotes, the BED file is created on the
fly.""",
        )
        pipeline_group.add_argument("--rseqc-bed-file",
                                    help="""The rseQC input bed file.""")
Exemplo n.º 9
0
    def __init__(self, prog=NAME, epilog=None):
        usage = col.purple(sequana_prolog.format(**{"name": NAME}))
        super(Options, self).__init__(
            usage=usage,
            prog=prog,
            description="",
            epilog=epilog,
            formatter_class=argparse.ArgumentDefaultsHelpFormatter)

        # add a new group of options to the parser
        so = SlurmOptions()
        so.add_options(self)

        # add a snakemake group of options to the parser
        so = SnakemakeOptions(working_directory=NAME)
        so.add_options(self)
        so = InputOptions()
        so.add_options(self)

        so = GeneralOptions()
        so.add_options(self)

        pipeline_group = self.add_argument_group("pipeline")

        pipeline_group.add_argument(
            "--downsampling-input-format",
            default="fastq",
            type=str,
            choices=["fasta", "fastq", "sam"],
            help=
            "set input format (only 'fastq', 'fasta', 'sam' supported for now)"
        )
        pipeline_group.add_argument(
            "--downsampling-method",
            default="random",
            type=str,
            choices=["random", "random_pct"],
            help="""set the downsampling method to be random based on read
                counts (random) on read percentage (random_pct))""")
        pipeline_group.add_argument(
            "--downsampling-percent",
            default=10,
            type=float,
            help=
            """Percentage of reads to select. Use with method *random_pct* only"""
        )
        pipeline_group.add_argument(
            "--downsampling-max-entries",
            default=1000,
            type=int,
            help=
            """max entries (reads, alignement) to select. Use with method *random* only"""
        )
        pipeline_group.add_argument("--downsampling-threads",
                                    default=4,
                                    type=int,
                                    help="""max threads to use with pigz""")

        pipeline_group.add_argument("--run",
                                    default=False,
                                    action="store_true",
                                    help="Execute the pipeline")