示例#1
0
    class Input:
        """Input fields."""
        class MyGroup:
            foo = IntegerField(label="Foo", required=False, default=42)
            bar = StringField(label="Bar", required=False)

        class MyGroup2:
            foo = IntegerField(label="Foo", required=False)

        my_group = GroupField(MyGroup, label="My group")
        my_group2 = GroupField(
            MyGroup2,
            label="My group2 that has all elements without defaults.")
示例#2
0
    class Input:
        """Input fields to process ImportSra."""

        sra_accession = ListField(StringField(), label="SRA accession(s)")
        show_advanced = BooleanField(label="Show advanced options", default=False)

        class Advanced:
            """Advanced options."""

            prefetch = BooleanField(label="Prefetch SRA file", default=True)
            max_size_prefetch = StringField(
                label="Maximum file size to download in KB",
                default="20G",
                description="A unit prefix can be used instead of a value in KB (e.g. 1024M or 1G).",
            )
            min_spot_id = IntegerField(label="Minimum spot ID", required=False)
            max_spot_id = IntegerField(label="Maximum spot ID", required=False)
            min_read_len = IntegerField(label="Minimum read length", required=False)
            clip = BooleanField(label="Clip adapter sequences", default=False)
            aligned = BooleanField(label="Dump only aligned sequences", default=False)
            unaligned = BooleanField(
                label="Dump only unaligned sequences", default=False
            )

        advanced = GroupField(
            Advanced, label="Advanced options", hidden="!show_advanced"
        )
示例#3
0
    class Input:
        """Input fields."""

        gse_accession = StringField(
            label="GEO accession",
            description="Enter a GEO series accession number.")
        show_advanced = BooleanField(label="Show advanced options",
                                     default=False)

        class Advanced:
            """Advanced options."""

            prefetch = BooleanField(label="Prefetch SRA file", default=True)
            max_size_prefetch = StringField(
                label="Maximum file size to download in KB",
                default="20G",
                description=
                "A unit prefix can be used instead of a value in KB (e.g. 1024M or 1G).",
            )
            min_spot_id = IntegerField(label="Minimum spot ID", required=False)
            max_spot_id = IntegerField(label="Maximum spot ID", required=False)
            min_read_len = IntegerField(label="Minimum read length",
                                        required=False)
            clip = BooleanField(label="Clip adapter sequences", default=False)
            aligned = BooleanField(label="Dump only aligned sequences",
                                   default=False)
            unaligned = BooleanField(label="Dump only unaligned sequences",
                                     default=False)
            mapping_file = FileField(
                label="File with probe ID mappings",
                description=
                "The file should be tab-separated and contain two columns with their column names. The "
                "first column should contain Gene IDs and the second one should contain probe names. Supported file "
                "extensions are .tab.*, .tsv.*, .txt.*",
                required=False,
            )
            source = StringField(
                label="Gene ID source",
                description=
                "Gene ID source used for probe mapping is required when using a custom file.",
                allow_custom_choice=True,
                required=False,
                choices=[
                    ("AFFY", "AFFY"),
                    ("DICTYBASE", "DICTYBASE"),
                    ("ENSEMBL", "ENSEMBL"),
                    ("NCBI", "NCBI"),
                    ("UCSC", "UCSC"),
                ],
            )
            build = StringField(
                label="Genome build",
                description=
                "Genome build of mapping file is required when using a custom file.",
                required=False,
            )

        advanced = GroupField(Advanced,
                              label="Advanced options",
                              hidden="!show_advanced")
示例#4
0
    class Input:
        """Input fields for GatkHaplotypeCallerGvcf."""

        bam = DataField("alignment:bam", label="Analysis ready BAM file")
        ref_seq = DataField("seq:nucleotide", label="Reference sequence")

        advanced = BooleanField(
            label="Show advanced options",
            description="Inspect and modify parameters.",
            default=False,
        )

        class Options:
            """Options."""

            intervals = DataField(
                "bed",
                label=
                "Use intervals BED file to limit the analysis to the specified parts of the genome.",
                required=False,
            )

            contamination = FloatField(
                label="Contamination fraction",
                default=0,
                description=
                "Fraction of contamination in sequencing data (for all samples) to aggressively remove.",
            )

        options = GroupField(Options, label="Options", hidden="!advanced")
示例#5
0
    class Input:
        """Input fields to process WgsPreprocess."""

        reads = DataField("reads:fastq:paired", label="Input sample")
        ref_seq = DataField("seq:nucleotide", label="Reference sequence")
        bwa_index = DataField("index:bwa", label="BWA genome index")
        known_sites = ListField(DataField("variants:vcf"),
                                label="Known sites of variation (VCF)")

        advanced = BooleanField(
            label="Show advanced options",
            description="Inspect and modify parameters.",
            default=False,
        )

        class AdvancedOptions:
            """Advanced options."""

            pixel_distance = IntegerField(
                label="--OPTICAL_DUPLICATE_PIXEL_DISTANCE",
                default=2500,
                description="Set the optical pixel distance, e.g. "
                "distance between clusters. Modify this parameter to "
                "ensure compatibility with older Illumina platforms.",
            )

        advanced_options = GroupField(AdvancedOptions,
                                      label="Advanced options",
                                      hidden="!advanced")
示例#6
0
    class Input:
        """Input fields."""

        my_field = StringField(label="My field")
        my_list = ListField(StringField(), label="My list")
        input_data = DataField("test:save", label="My input data")
        input_entity_data = DataField("entity", label="My entity data")
        bar = DataField(data_type="test:save", label="My bar")
        url = UrlField(UrlField.DOWNLOAD, label="My URL")
        integer = IntegerField(label="My integer")
        my_float = FloatField(label="My float")
        my_json = JsonField(label="Blah blah")
        my_optional = StringField(label="Optional",
                                  required=False,
                                  default="default value")
        my_optional_no_default = StringField(label="Optional no default",
                                             required=False)

        class MyGroup:
            foo = IntegerField(label="Foo")
            bar = StringField(label="Bar")
            group_optional_no_default = StringField(
                label="Group optional no default", required=False)

        my_group = GroupField(MyGroup, label="My group")
示例#7
0
    class Input:
        """Input fields to process ChipQC."""

        alignment = DataField(
            data_type="alignment:bam",
            label="Aligned reads",
        )
        peaks = DataField(
            data_type="chipseq:callpeak",
            label="Called peaks",
        )
        blacklist = DataField(
            data_type="bed",
            label="Blacklist regions",
            description="BED file containing genomic regions that should be "
            "excluded from the analysis.",
            required=False,
        )
        calculate_enrichment = BooleanField(
            label="Calculate enrichment",
            description="Calculate enrichment of signal in known genomic "
            "annotation. By default annotation is provided from "
            "the TranscriptDB package specified by genome bulid "
            "which should match one of the supported annotations "
            "(hg19, hg38, hg18, mm10, mm9, rn4, ce6, dm3). If "
            "annotation is not supported the analysis is skipped.",
            default=False,
        )

        class Advanced:
            """Add advanced list of options."""

            quality_threshold = IntegerField(
                label="Mapping quality threshold",
                description="Only reads with mapping quality scores above "
                "this threshold will be used for some statistics.",
                default=15,
            )
            profile_window = IntegerField(
                label="Window size",
                description="An integer indicating the width of the window "
                "used for peak profiles. Peaks will be centered "
                "on their summits and include half of the window "
                "size upstream and half downstream of this point.",
                default=400,
            )
            shift_size = StringField(
                label="Shift size",
                description="Vector of values to try when computing optimal "
                "shift sizes. It should be specifeird as "
                "consecutive numbers vector with start:end",
                default="1:300",
            )

        advanced = GroupField(
            Advanced,
            label="Advanced parameters",
        )
示例#8
0
    class Input:
        """Input fields for GatkGenotypeGVCFs."""

        gvcfs = ListField(
            DataField("variants:gvcf"),
            label="Input data (GVCF)",
        )
        ref_seq = DataField("seq:nucleotide", label="Reference sequence")

        intervals = DataField(
            "bed",
            label="Intervals file (.bed)",
        )

        dbsnp = DataField("variants:vcf", label="dbSNP file")

        advanced = BooleanField(
            label="Show advanced options",
            description="Inspect and modify parameters.",
            default=False,
        )

        class AdvancedOptions:
            """Advanced options."""

            batch_size = IntegerField(
                label="Batch size",
                default=0,
                description="Batch size controls the number of samples "
                "for which readers are open at once and therefore provides "
                "a way to minimize memory consumption. However, it can "
                "take longer to complete. Use the consolidate flag if more "
                "than a hundred batches were used. This will improve feature "
                "read time. batchSize=0 means no batching "
                "(i.e. readers for all samples will be opened at once).",
            )

            consolidate = BooleanField(
                label="Consolidate",
                default=False,
                description="Boolean flag to enable consolidation. If "
                "importing data in batches, a new fragment is created for "
                "each batch. In case thousands of fragments are created, "
                "GenomicsDB feature readers will try to open ~20x as many "
                "files. Also, internally GenomicsDB would consume more "
                "memory to maintain bookkeeping data from all fragments. "
                "Use this flag to merge all fragments into one. Merging "
                "can potentially improve read performance, however overall "
                "benefit might not be noticeable as the top Java layers "
                "have significantly higher overheads. This flag has no "
                "effect if only one batch is used.",
            )

        advanced_options = GroupField(AdvancedOptions,
                                      label="Advanced options",
                                      hidden="!advanced")
示例#9
0
        class MyGroup:
            foo = IntegerField(label="Foo")
            bar = StringField(label="Bar")
            group_optional_no_default = StringField(
                label="Group optional no default", required=False)

            class SubGroup:
                foo = IntegerField(label="Foo", default=2)

            subgroup = GroupField(SubGroup, label="Subgroup")
示例#10
0
    class Input:
        """Input fields to process MultiQC."""

        data = ListField(
            DataField(
                data_type="",
                description=
                "Select multiple data objects for which the MultiQC report is to be "
                "generated.",
            ),
            label="Input data",
        )

        class Advanced:
            """Options."""

            dirs = BooleanField(
                label="--dirs",
                default=True,
                description="Prepend directory to sample names.",
            )

            dirs_depth = IntegerField(
                label="--dirs-depth",
                default=-1,
                description=
                "Prepend a specified number of directories to sample names. Enter a "
                "negative number (default) to take from start of path.",
            )

            fullnames = BooleanField(
                label="--fullnames",
                default=False,
                description=
                "Disable the sample name cleaning (leave as full file name).",
            )

            config = BooleanField(
                label="Use configuration file",
                default=True,
                description=
                "Use Genialis configuration file for MultiQC report.",
            )

            cl_config = StringField(
                label="--cl-config",
                required=False,
                description=
                "Enter text with command-line configuration options to override the "
                "defaults (e.g. custom_logo_url: https://www.genialis.com).",
            )

        advanced = GroupField(Advanced, label="Advanced options")
示例#11
0
    class Input:
        """Input fields."""

        reads = DataField("reads:fastq:single", label="Select sample(s)")

        class Options:
            """Options."""

            nextseq_trim = IntegerField(
                label="NextSeq/NovaSeq trim",
                description=
                "NextSeq/NovaSeq-specific quality trimming. Trims also dark "
                "cycles appearing as high-quality G bases. This option is mutually "
                "exclusive with the use of standard quality-cutoff trimming and is "
                "suitable for the use with data generated by the recent Illumina "
                "machines that utilize two-color chemistry to encode the four bases.",
                default=10,
            )

            quality_cutoff = IntegerField(
                label="Quality cutoff",
                description=
                "Trim low-quality bases from 3' end of each read before adapter "
                "removal. The use of this option will override the use of "
                "NextSeq/NovaSeq trim option.",
                required=False,
            )

            min_len = IntegerField(
                label="Discard reads shorter than specified minimum length.",
                default=20,
            )

            min_overlap = IntegerField(
                label="Mimimum overlap",
                description=
                "Minimum overlap between adapter and read for an adapter to be found.",
                default=20,
            )

            times = IntegerField(
                label=
                "Remove up to a specified number of adapters from each read.",
                default=2,
            )

        options = GroupField(Options, label="Options")
示例#12
0
    class Input:
        """Input fields."""

        alignment = DataField('alignment:bam', label="Alignment")
        annotation = DataField('annotation:gtf', label="GTF annotation")

        class Options:
            """Options."""

            stranded = StringField(
                label="Assay type",
                default='non_specific',
                choices=[
                    ('non_specific', 'Strand non-specific'),
                    ('forward', 'Strand-specific forward'),
                    ('reverse', 'Strand-specific reverse'),
                    ('auto', 'Detect automatically'),
                ],
            )

            cdna_index = DataField(
                'index:salmon',
                label="cDNA index file",
                required=False,
                hidden="options.stranded != 'auto'"
            )

            n_reads = IntegerField(
                label="Number of reads in subsampled alignment file",
                default=5000000,
                hidden="options.stranded != 'auto'"
            )

            maxPhredScore = IntegerField(
                label="Max Phred Score",
                required=False,
            )

            adjustPhredScore = IntegerField(
                label="Adjust Phred Score",
                required=False,
            )

        options = GroupField(Options, label="Options")
示例#13
0
    class Input:
        """Input fields."""

        alignment = DataField("alignment:bam", label="Alignment")
        annotation = DataField("annotation:gtf", label="GTF annotation")

        class Options:
            """Options."""

            stranded = StringField(
                label="Assay type",
                default="non_specific",
                choices=[
                    ("non_specific", "Strand non-specific"),
                    ("forward", "Strand-specific forward"),
                    ("reverse", "Strand-specific reverse"),
                    ("auto", "Detect automatically"),
                ],
            )

            cdna_index = DataField(
                "index:salmon",
                label="cDNA index file",
                required=False,
                hidden="options.stranded != 'auto'",
            )

            n_reads = IntegerField(
                label="Number of reads in subsampled alignment file",
                default=5000000,
                hidden="options.stranded != 'auto'",
            )

            maxPhredScore = IntegerField(
                label="Max Phred Score",
                required=False,
            )

            adjustPhredScore = IntegerField(
                label="Adjust Phred Score",
                required=False,
            )

        options = GroupField(Options, label="Options")
示例#14
0
    class Input:
        """Input fields."""

        alignment = DataField('alignment:bam', label="Alignment")
        annotation = DataField('annotation:gtf', label="GTF annotation")

        class Options:
            """Options."""

            stranded = StringField(
                label="Assay type",
                default='non_specific',
                choices=[
                    ('non_specific', 'Strand non-specific'),
                    ('forward', 'Strand-specific forward'),
                    ('reverse', 'Strand-specific reverse'),
                ],
            )

        options = GroupField(Options, label="Options")
示例#15
0
    class Input:
        """Input fields to process AlignmentSieve."""

        alignment = DataField(data_type="alignment:bam",
                              label="Alignment BAM file")
        min_fragment_length = IntegerField(
            label="--minFragmentLength",
            description="The minimum fragment length needed for "
            "read/pair inclusion. This option is primarily useful in "
            "ATACseq experiments, for filtering mono- or di-nucleosome "
            "fragments. (Default: 0)",
            default=0,
        )
        max_fragment_length = IntegerField(
            label="--maxFragmentLength",
            description="The maximum fragment length needed for "
            "read/pair inclusion. A value of 0 indicates "
            "no limit. (Default: 0)",
            default=0,
        )

        class BigWigOptions:
            """Options for calculating BigWig."""

            bigwig_binsize = IntegerField(
                label="BigWig bin size",
                description="Size of the bins, in bases, for the output of the "
                "bigwig/bedgraph file. Default is 50.",
                default=50,
            )
            bigwig_timeout = IntegerField(
                label="BigWig timeout",
                description=
                "Number of seconds before creation of BigWig timeouts. "
                "Default is after 480 seconds (8 minutes).",
                default=480,
            )

        bigwig_opts = GroupField(BigWigOptions, label="BigWig options")
示例#16
0
        class MySubGroup:
            class SubGroup:
                foo = IntegerField(label="Foo", default=2)

            subgroup = GroupField(SubGroup, label="Subgroup foo")
示例#17
0
    class Input:
        """Input fields to process MergeData."""

        string_field = StringField(
            label="Labels are short and do not end in a period",
            description="Description ends in a period.",
            choices=[
                ("computer_readable1", "Human readable 1"),
                ("computer_readable2", "Human readable 2"),
            ],
            default="computer_readable1",
            required=False,
            hidden=False,
            allow_custom_choice=True,
        )
        text_field = TextField(
            label="Labels are short and do not end in a period",
            description="Description ends in a period.",
            default="default text",
            required=False,
            hidden=True,
        )
        boolean_field1 = BooleanField(
            label="Labels are short and do not end in a period",
            description="Note that description fields always end in a period.",
            default=False,
            required=True,
            hidden=False,
        )
        integer_field = IntegerField(
            label="Labels are short and do not end in a period",
            description="Description ends in a period.",
            default=1,
        )
        float_field = FloatField(
            label="Labels are short and do not end in a period",
            description="Description ends in a period.",
            default=3.14,
        )
        date_field = DateField(
            label="Labels are short and do not end in a period",
            description="Description ends in a period.",
            default="2020-04-20",
        )
        datetime_field = DateTimeField(
            label="Labels are short and do not end in a period",
            description="Description ends in a period.",
            default="2020-04-20 12:16:00",
        )
        url_field = UrlField(
            label="Labels are short and do not end in a period",
            description="Description ends in a period.",
        )
        secret_field = SecretField(
            label="Labels are short and do not end in a period",
            description="Description ends in a period.",
        )
        file_field = FileField(
            label="Labels are short and do not end in a period",
            description="Description ends in a period.",
        )
        filehtml_field = FileHtmlField(
            label="Labels are short and do not end in a period",
            description="Description ends in a period.",
        )
        dir_field = DirField(
            label="Labels are short and do not end in a period",
            description="Description ends in a period.",
        )
        json_field = JsonField(
            label="Labels are short and do not end in a period",
            description="Description ends in a period.",
        )
        list_field = ListField(
            DataField(data_type="your:data:type"),
            label="Labels are short and do not end in a period",
            description="Description ends in a period.",
        )
        data_field = DataField(
            # data_type should not start with data:
            data_type="your:data:type",
            label="Labels are short and do not end in a period",
            description="Description ends in a period.",
        )

        class Advanced:
            """Add advanced list of options."""

            boolean_field2 = BooleanField(
                label="Labels are short and do not end in a period",
                description="Description ends in a period.",
                default=False,
            )

        group_field = GroupField(
            Advanced,
            label="Labels are short and do not end in a period",
            disabled=False,
            # Will show when boolean_field1 is flipped.
            hidden="!boolean_field1",
            collapsed=True,
        )
示例#18
0
    class Input:
        """Input fields for CollectWgsMetrics."""

        bam = DataField("alignment:bam", label="Alignment BAM file")
        genome = DataField("seq:nucleotide", label="Genome")

        read_length = IntegerField(label="Average read length", default=150)

        create_histogram = BooleanField(
            label="Include data for base quality histogram in the metrics file",
            default=False,
        )

        advanced = BooleanField(
            label="Show advanced options",
            description="Inspect and modify parameters.",
            default=False,
        )

        class Options:
            """Options."""

            min_map_quality = IntegerField(
                label=
                "Minimum mapping quality for a read to contribute coverage",
                default=20,
            )

            min_quality = IntegerField(
                label="Minimum base quality for a base to contribute coverage",
                description=
                "N bases will be treated as having a base quality of "
                "negative infinity and will therefore be excluded from coverage "
                "regardless of the value of this parameter.",
                default=20,
            )

            coverage_cap = IntegerField(
                label="Maximum coverage cap",
                description=
                "Treat positions with coverage exceeding this value as "
                "if they had coverage at this set value.",
                default=250,
            )

            accumulation_cap = IntegerField(
                label="Ignore positions with coverage above this value",
                description="At positions with coverage exceeding this value, "
                "completely ignore reads that accumulate beyond this value",
                default=100000,
            )

            count_unpaired = BooleanField(
                label=
                "Count unpaired reads and paired reads with one end unmapped",
                default=False,
            )

            sample_size = IntegerField(
                label=
                "Sample Size used for Theoretical Het Sensitivity sampling",
                default=10000,
            )

            validation_stringency = StringField(
                label="Validation stringency",
                description=
                "Validation stringency for all SAM files read by this "
                "program. Setting stringency to SILENT can improve "
                "performance when processing a BAM file in which "
                "variable-length data (read, qualities, tags) do not "
                "otherwise need to be decoded. Default is STRICT.",
                choices=[
                    ("STRICT", "STRICT"),
                    ("LENIENT", "LENIENT"),
                    ("SILENT", "SILENT"),
                ],
                default="STRICT",
            )

        options = GroupField(Options, label="Options", hidden="!advanced")
示例#19
0
    class Input:
        """Input fields of trimGalorePaired."""

        reads = DataField("reads:fastq:paired",
                          label="Select paired-end reads")

        class QualityTrimming:
            """Quality trimming options."""

            quality = IntegerField(
                label="Quality cutoff",
                description=
                "Trim low-quality ends from reads based on phred score.",
                default=20,
            )
            nextseq = IntegerField(
                label="NextSeq/NovaSeq trim cutoff",
                description="NextSeq/NovaSeq-specific quality "
                "trimming. Trims also dark cycles appearing as "
                "high-quality G bases. This will set a specific "
                "quality cutoff, but qualities of G bases are ignored. "
                "This can not be used with Quality cutoff and will "
                "override it.",
                required=False,
            )
            phred = StringField(
                label="Phred score encoding",
                description="Use either ASCII+33 quality scores as "
                "Phred scores (Sanger/Illumina 1.9+ encoding) or "
                "ASCII+64 quality scores (Illumina 1.5 encoding) for "
                "quality trimming",
                choices=[
                    ("--phred33", "ASCII+33"),
                    ("--phred64", "ASCII+64"),
                ],
                default="--phred33",
            )
            min_length = IntegerField(
                label="Minimum length after trimming",
                description="Discard reads that became shorter than "
                "selected length because of either quality or adapter "
                "trimming. Both reads of a read-pair need to be longer "
                "than specified length to be printed out to validated "
                "paired-end files. If only one read became too short "
                "there is the possibility of keeping such unpaired "
                "single-end reads with Retain unpaired. A value of 0 "
                "disables filtering based on length.",
                default=20,
            )
            max_n = IntegerField(
                label="Maximum number of Ns",
                description="Read exceeding this limit will result in "
                "the entire pair being removed from the trimmed output "
                "files.",
                required=False,
            )
            retain_unpaired = BooleanField(
                label="Retain unpaired reads after trimming",
                description="If only one of the two paired-end reads "
                "became too short, the longer read will be written.",
                default=False,
            )
            unpaired_len_1 = IntegerField(
                label="Unpaired read length cutoff for mate 1",
                default=35,
                hidden="!quality_trim.retain_unpaired",
            )
            unpaired_len_2 = IntegerField(
                label="Unpaired read length cutoff for mate 2",
                default=35,
                hidden="!quality_trim.retain_unpaired",
            )
            clip_r1 = IntegerField(
                label="Trim bases from 5' end of read 1",
                description="This may be useful if the qualities were "
                "very poor, or if there is some sort of unwanted bias "
                "at the 5' end.",
                required=False,
            )
            clip_r2 = IntegerField(
                label="Trim bases from 5' end of read 2",
                description="This may be useful if the qualities were "
                "very poor, or if there is some sort of unwanted bias "
                "at the 5' end. For paired-end bisulfite sequencing, "
                "it is recommended to remove the first few bp because "
                "the end-repair reaction may introduce a bias towards "
                "low methylation.",
                required=False,
            )
            three_prime_r1 = IntegerField(
                label="Trim bases from 3' end of read 1",
                description="Remove bases from the 3' end of read 1 "
                "after adapter/quality trimming has been performed. "
                "This may remove some unwanted bias from the 3' end "
                "that is not directly related to adapter sequence or "
                "basecall quality.",
                required=False,
            )
            three_prime_r2 = IntegerField(
                label="Trim bases from 3' end of read 2",
                description="Remove bases from the 3' end of read 2 "
                "after adapter/quality trimming has been performed. "
                "This may remove some unwanted bias from the 3' end "
                "that is not directly related to adapter sequence or "
                "basecall quality.",
                required=False,
            )

        class AdapterTrimming:
            """Adapter trimming options."""

            adapter = ListField(
                StringField(),
                label="Read 1 adapter sequence",
                description="Adapter sequences to be trimmed. "
                "Also see universal adapters field for predefined "
                "adapters. This is mutually exclusive with read 1 "
                "adapters file and universal adapters.",
                required=False,
                default=[],
            )
            adapter_2 = ListField(
                StringField(),
                label="Read 2 adapter sequence",
                description="Optional adapter sequence to be trimmed "
                "off read 2 of paired-end files. This is mutually "
                "exclusive with read 2 adapters file and universal "
                "adapters.",
                required=False,
                default=[],
            )
            adapter_file_1 = DataField(
                "seq:nucleotide",
                label="Read 1 adapters file",
                description="This is mutually exclusive with read 1 "
                "adapters and universal adapters.",
                required=False,
            )
            adapter_file_2 = DataField(
                "seq:nucleotide",
                label="Read 2 adapters file",
                description="This is mutually exclusive with read 2 "
                "adapters and universal adapters.",
                required=False,
            )
            universal_adapter = StringField(
                label="Universal adapters",
                description="Instead of default detection use specific "
                "adapters. Use 13bp of the Illumina universal adapter, "
                "12bp of the Nextera adapter or 12bp of the Illumina "
                "Small RNA 3' Adapter. Selecting to trim smallRNA "
                "adapters will also lower the length value to 18bp. "
                "If the smallRNA libraries are paired-end then read 2 "
                "adapter will be set to the Illumina small RNA 5' "
                "adapter automatically (GATCGTCGGACT) unless defined "
                "explicitly. This is mutually exclusive with manually "
                "defined adapters and adapter files.",
                choices=[
                    ("--illumina", "Illumina"),
                    ("--nextera", "Nextera"),
                    ("--small_rna", "Illumina small RNA"),
                ],
                required=False,
            )
            stringency = IntegerField(
                label="Overlap with adapter sequence required to trim",
                description="Defaults to a very stringent setting of "
                "1, i.e. even a single base pair of overlapping "
                "sequence will be trimmed of the 3' end of any read.",
                default=1,
            )
            error_rate = FloatField(
                label="Maximum allowed error rate",
                description="Number of errors divided by the length of "
                "the matching region",
                default=0.1,
            )

        class HardTrimming:
            """Hard trim options."""

            trim_5 = IntegerField(
                label="Hard trim sequences from 3' end",
                description="Instead of performing adapter-/quality "
                "trimming, this option will simply hard-trim sequences "
                "to bp from the 3' end. This is incompatible with "
                "other hard trimming options.",
                required=False,
            )
            trim_3 = IntegerField(
                label="Hard trim sequences from 5' end",
                description="Instead of performing adapter-/quality "
                "trimming, this option will simply hard-trim sequences "
                "to bp from the 5' end. This is incompatible with "
                "other hard trimming options.",
                required=False,
            )

        adapter_trim = GroupField(AdapterTrimming, label="Adapter trimming")
        quality_trim = GroupField(QualityTrimming, label="Quality trimming")
        hard_trim = GroupField(HardTrimming, label="Hard trimming")
示例#20
0
    class Input:
        """Input fields."""

        reads = DataField("reads:fastq", label="Input sample(s)")
        salmon_index = DataField("index:salmon", label="Salmon index")
        annotation = DataField("annotation:gtf", label="GTF annotation")

        advanced = BooleanField(
            label="Show advanced options",
            description="Inspect and modify parameters.",
            default=False,
        )

        class Options:
            """Options."""

            stranded = StringField(
                label="Assay type",
                default="A",
                choices=[
                    ("A", "Detect automatically"),
                    ("U", "Strand non-specific (U)"),
                    ("SF", "Strand-specific forward (SF)"),
                    ("SR", "Strand-specific reverse (SR)"),
                    ("IU", "Strand non-specific (paired-end IU)"),
                    ("ISF", "Strand-specific forward (paired-end ISF)"),
                    ("ISR", "Strand-specific reverse (paired-end (ISR)"),
                ],
            )

            seq_bias = BooleanField(
                label="--seqBias",
                default=False,
                description="Perform sequence-specific bias correction.",
            )

            gc_bias = BooleanField(
                label="--gcBias",
                default=False,
                description=
                "[beta for single-end reads] Perform fragment GC bias correction.",
            )

            discard_orphans_quasi = BooleanField(
                label="--discardOrphansQuasi",
                default=False,
                description="Discard orphan mappings in quasi-mapping mode. "
                "If this flag is passed then only paired "
                "mappings will be considered toward "
                "quantification estimates. The default "
                "behavior is to consider orphan mappings "
                "if no valid paired mappings exist.",
            )

            no_length_correction = BooleanField(
                label="--noLengthCorrection",
                default=False,
                description="[Experimental] Entirely disables "
                "length correction when estimating the "
                "abundance of transcripts. The abundance "
                "estimates are reported in CPM (counts per "
                "million) unit. This option can be used "
                "with protocols where one expects that "
                "fragments derive from their underlying "
                "targets without regard to that target's  "
                "length (e.g. QuantSeq).",
            )

            consensus_slack = FloatField(
                label="--consensusSlack",
                required=False,
                description="The amount of slack allowed in the quasi-mapping "
                "consensus mechanism.  Normally, a transcript must "
                "cover all hits to be considered for mapping.  "
                "If this is set to a fraction, X, greater than 0 "
                "(and in [0,1)), then a transcript can fail "
                "to cover up to (100 * X)% of the hits before it "
                "is discounted as a mapping candidate. The default "
                "value of this option is 0.2 in selective alignment mode "
                "and 0 otherwise.",
            )

            min_score_fraction = FloatField(
                label="--minScoreFraction",
                default=0.65,
                description="The fraction of the optimal possible alignment "
                "score that a mapping must achieve in order to be "
                "considered valid - should be in (0,1]",
            )

            incompat_prior = FloatField(
                label="---incompatPrior",
                default=0,
                description="This option sets the prior probability "
                "that an alignment that disagrees with "
                "the specified library type (--libType) "
                "results from the true fragment origin. "
                "Setting this to 0 specifies that "
                "alignments that disagree with the "
                "library type should be impossible, "
                "while setting it to 1 says that "
                "alignments that disagree with the "
                "library type are no less likely than "
                "those that do.",
            )

            range_factorization_bins = IntegerField(
                label="--rangeFactorizationBins",
                default=4,
                description="Factorizes the likelihood used in "
                "quantification by adopting a new notion "
                "of equivalence classes based on the "
                "conditional probabilities with which "
                "fragments are generated from different "
                "transcripts.  This is a more "
                "fine-grained factorization than the "
                "normal rich equivalence classes. The "
                "default value (4) corresponds to the "
                "default used in Zakeri et al. 2017 "
                "and larger values imply a more "
                "fine-grained factorization. If range "
                "factorization is enabled, a common "
                "value to select for this parameter is "
                "4. A value of 0 signifies the use of "
                "basic rich equivalence classes.",
            )

            min_assigned_frag = IntegerField(
                label="--minAssignedFrags",
                default=10,
                description="The minimum number of fragments that "
                "must be assigned to the transcriptome "
                "for quantification to proceed.",
            )

        options = GroupField(Options, label="Options", hidden="!advanced")
示例#21
0
    class Input:
        """Input fields for VariantFiltrationVqsr."""

        vcf = DataField("variants:vcf", label="Input data (VCF)")

        class ResourceFiles:
            """Resource files options."""

            dbsnp = DataField("variants:vcf", label="dbSNP file")

            mills = DataField(
                "variants:vcf",
                label="Mills and 1000G gold standard indels",
                required=False,
            )

            axiom_poly = DataField(
                "variants:vcf",
                label="1000G Axiom genotype data",
                required=False,
            )

            hapmap = DataField(
                "variants:vcf",
                label="HapMap variants",
                required=False,
            )

            omni = DataField(
                "variants:vcf",
                label="1000G Omni variants",
                required=False,
            )

            thousand_genomes = DataField(
                "variants:vcf",
                label="1000G high confidence SNPs",
                required=False,
            )

        advanced = BooleanField(
            label="Show advanced options",
            description="Inspect and modify parameters.",
            default=False,
        )

        class AdvancedOptions:
            """Advanced options."""

            use_as_anno = BooleanField(
                label="--use-allele-specific-annotations", default=False
            )

            indel_anno_fields = ListField(
                StringField(),
                label="Annotation fields (INDEL filtering)",
                default=[
                    "FS",
                    "ReadPosRankSum",
                    "MQRankSum",
                    "QD",
                    "SOR",
                    "DP",
                ],
            )

            snp_anno_fields = ListField(
                StringField(),
                label="Annotation fields (SNP filtering)",
                default=[
                    "QD",
                    "MQRankSum",
                    "ReadPosRankSum",
                    "FS",
                    "MQ",
                    "SOR",
                    "DP",
                ],
            )

            indel_filter_level = FloatField(
                label="--truth-sensitivity-filter-level (INDELs)", default=99.0
            )

            snp_filter_level = FloatField(
                label="--truth-sensitivity-filter-level (SNPs)", default=99.7
            )

            max_gaussians_indels = IntegerField(
                label="--max-gaussians (INDELs)",
                default=4,
                description="This parameter determines the maximum number "
                "of Gaussians that should be used when building a positive "
                "model using the variational Bayes algorithm. This parameter "
                "sets the expected number of clusters in modeling. If a "
                "dataset gives fewer distinct clusters, e.g. as can happen "
                "for smaller data, then the tool will tell you there is "
                "insufficient data with a No data found error message. "
                "In this case, try decrementing the --max-gaussians value.",
            )

            max_gaussians_snps = IntegerField(
                label="--max-gaussians (SNPs)",
                default=6,
                description="This parameter determines the maximum number "
                "of Gaussians that should be used when building a positive "
                "model using the variational Bayes algorithm. This parameter "
                "sets the expected number of clusters in modeling. If a "
                "dataset gives fewer distinct clusters, e.g. as can happen "
                "for smaller data, then the tool will tell you there is "
                "insufficient data with a No data found error message. "
                "In this case, try decrementing the --max-gaussians value.",
            )

        resource_files = GroupField(
            ResourceFiles,
            label="Resource files",
        )

        advanced_options = GroupField(
            AdvancedOptions, label="Advanced options", hidden="!advanced"
        )
示例#22
0
    class Input:
        """Input fields to process MarkDuplicates."""

        bam = DataField("alignment:bam", label="Alignment BAM file")
        skip = BooleanField(
            label="Skip MarkDuplicates step",
            description="MarkDuplicates step can be skipped.",
            default=False,
        )
        remove_duplicates = BooleanField(
            label="Remove duplicates",
            description="If true do not write duplicates to the output file "
            "instead of writing them with appropriate flags set.",
            default=False,
        )
        validation_stringency = StringField(
            label="Validation stringency",
            description="Validation stringency for all SAM files read by this "
            "program. Setting stringency to SILENT can improve "
            "performance when processing a BAM file in which "
            "variable-length data (read, qualities, tags) do not "
            "otherwise need to be decoded. Default is STRICT.",
            choices=[
                ("STRICT", "STRICT"),
                ("LENIENT", "LENIENT"),
                ("SILENT", "SILENT"),
            ],
            default="STRICT",
        )
        assume_sort_order = StringField(
            label="Assume sort order",
            description="If not null (default), assume that the input file "
            "has this order even if the header says otherwise."
            "Possible values are unsorted, queryname, coordinate "
            "and unknown.",
            choices=[
                ("", "as in BAM header (default)"),
                ("unsorted", "unsorted"),
                ("queryname", "queryname"),
                ("coordinate", "coordinate"),
                ("duplicate", "duplicate"),
                ("unknown", "unknown"),
            ],
            default="",
        )

        class BigWigOptions:
            """Options for calculating BigWig."""

            bigwig_binsize = IntegerField(
                label="BigWig bin size",
                description="Size of the bins, in bases, for the output of the "
                "bigwig/bedgraph file. Default is 50.",
                default=50,
            )
            bigwig_timeout = IntegerField(
                label="BigWig timeout",
                description=
                "Number of seconds before creation of BigWig timeouts. "
                "Default is after 480 seconds (8 minutes).",
                default=480,
            )

        bigwig_opts = GroupField(BigWigOptions, label="BigWig options")
示例#23
0
    class Input:
        """Input fields to process Deseq."""

        case = ListField(
            DataField("expression"),
            label="Case",
            description="Case samples (replicates)",
        )
        control = ListField(
            DataField("expression"),
            label="Control",
            description="Control samples (replicates)",
        )

        create_sets = BooleanField(
            label="Create gene sets",
            description="After calculating differential gene "
            "expressions create gene sets for up-regulated genes, "
            "down-regulated genes and all genes.",
            default=False,
        )
        logfc = FloatField(
            label="Log2 fold change threshold for gene sets",
            description="Genes above Log2FC are considered as "
            "up-regulated and genes below -Log2FC as down-regulated.",
            default=1.0,
            hidden="!create_sets",
        )
        fdr = FloatField(
            label="FDR threshold for gene sets",
            default=0.05,
            hidden="!create_sets",
        )

        class Options:
            """Options."""

            beta_prior = BooleanField(
                label="Beta prior",
                default=False,
                description="Whether or not to put a zero-mean normal prior "
                "on the non-intercept coefficients.",
            )

        class FilterOptions:
            """Filtering options."""

            count = BooleanField(
                label="Filter genes based on expression count",
                default=True,
            )
            min_count_sum = IntegerField(
                label="Minimum gene expression count summed over all samples",
                default=10,
                description="Filter genes in the expression matrix input. "
                "Remove genes where the expression count sum over all samples "
                "is below the threshold.",
                hidden="!filter_options.count",
            )
            cook = BooleanField(
                label="Filter genes based on Cook's distance",
                default=False,
            )
            cooks_cutoff = FloatField(
                label="Threshold on Cook's distance",
                required=False,
                description="If one or more samples have Cook's distance "
                "larger than the threshold set here, the p-value for the row "
                "is set to NA. If left empty, the default threshold of 0.99 "
                "quantile of the F(p, m-p) distribution is used, where p is "
                "the number of coefficients being fitted and m is the number "
                "of samples. This test excludes Cook's distance of samples "
                "belonging to experimental groups with only two samples.",
                hidden="!filter_options.cook",
            )
            independent = BooleanField(
                label="Apply independent gene filtering",
                default=False,
            )
            alpha = FloatField(
                label="Significance cut-off used for optimizing independent "
                "gene filtering",
                default=0.1,
                description="The value should be set to adjusted p-value "
                "cut-off (FDR).",
                hidden="!filter_options.independent",
            )

        options = GroupField(Options, label="Gene filtering options")
        filter_options = GroupField(
            FilterOptions, label="Differential expression analysis options")
示例#24
0
    class Input:
        """Input fields."""

        reads = DataField("reads:fastq:paired", label="Input sample")
        ref_seq = DataField("seq:nucleotide", label="Reference sequence")
        bwa_index = DataField("index:bwa", label="BWA genome index")
        known_sites = ListField(
            DataField("variants:vcf"), label="Known sites of variation (VCF)"
        )

        advanced = BooleanField(
            label="Show advanced options",
            description="Inspect and modify parameters.",
            default=False,
        )

        class GatkOptions:
            """Options."""

            intervals = DataField(
                "bed",
                label="Intervals BED file",
                description="Use intervals BED file to limit the analysis to "
                "the specified parts of the genome.",
                required=False,
            )

            contamination = IntegerField(
                label="Contamination fraction",
                default=0,
                description="Fraction of contamination in sequencing "
                "data (for all samples) to aggressively remove.",
            )

        class Trimming:
            """Trimming parameters."""

            adapters = DataField(
                "seq:nucleotide",
                label="Adapter sequences",
                required=False,
                description="Adapter sequences in FASTA format that will "
                "be removed from the reads.",
            )

            seed_mismatches = IntegerField(
                label="Seed mismatches",
                required=False,
                disabled="!trimming_options.adapters",
                description="Specifies the maximum mismatch count which "
                "will still allow a full match to be performed. This field "
                "is required to perform adapter trimming.",
            )

            simple_clip_threshold = IntegerField(
                label="Simple clip threshold",
                required=False,
                disabled="!trimming_options.adapters",
                description="Specifies how accurate the match between any "
                "adapter sequence must be against a read. This field is "
                "required to perform adapter trimming.",
            )

            min_adapter_length = IntegerField(
                label="Minimum adapter length",
                default=8,
                disabled="!trimming_options.seed_mismatches && "
                "!trimming_options.simple_clip_threshold && "
                "!trimming_options.palindrome_clip_threshold",
                description="In addition to the alignment score, palindrome "
                "mode can verify that a minimum length of adapter has been "
                "detected. If unspecified, this defaults to 8 bases, for "
                "historical reasons. However, since palindrome mode has a "
                "very low false positive rate, this can be safely reduced, "
                "even down to 1, to allow shorter adapter fragments to be "
                "removed.",
            )

            palindrome_clip_threshold = IntegerField(
                label="Palindrome clip threshold",
                required=False,
                disabled="!trimming_options.adapters",
                description="Specifies how accurate the match between the "
                "two adapter ligated reads must be for PE palindrome read "
                "alignment. This field is required to perform adapter "
                "trimming.",
            )

            leading = IntegerField(
                label="Leading quality",
                required=False,
                description="Remove low quality bases from the beginning, "
                "if below a threshold quality.",
            )

            trailing = IntegerField(
                label="Trailing quality",
                required=False,
                description="Remove low quality bases from the end, if "
                "below a threshold quality.",
            )

            minlen = IntegerField(
                label="Minimum length",
                required=False,
                description="Drop the read if it is below a specified length.",
            )

        class AlignmentSummary:
            """AlignmentSummary parameters."""

            adapters = DataField(
                "seq:nucleotide",
                label="Adapter sequences",
                required=False,
            )

            max_insert_size = IntegerField(
                label="Maximum insert size",
                default=100000,
            )

            pair_orientation = StringField(
                label="Pair orientation",
                default="null",
                choices=[
                    ("null", "Unspecified"),
                    ("FR", "FR"),
                    ("RF", "RF"),
                    ("TANDEM", "TANDEM"),
                ],
            )

        class PicardWGSMetrics:
            """PicardWGSMetrics parameters."""

            read_length = IntegerField(
                label="Average read length",
                default=150,
            )

            min_map_quality = IntegerField(
                label="Minimum mapping quality for a read to contribute coverage",
                default=20,
            )

            min_quality = IntegerField(
                label="Minimum base quality for a base to contribute coverage",
                default=20,
                description="N bases will be treated as having a base quality of "
                "negative infinity and will therefore be excluded from "
                "coverage regardless of the value of this parameter.",
            )

            coverage_cap = IntegerField(
                label="Maximum coverage cap",
                default=250,
                description="Treat positions with coverage exceeding this "
                "value as if they had coverage at this set value.",
            )

            accumulation_cap = IntegerField(
                label="Ignore positions with coverage above this value",
                default=100000,
                description="At positions with coverage exceeding this value, "
                "completely ignore reads that accumulate beyond this value.",
            )

            sample_size = IntegerField(
                label="Sample size used for Theoretical Het Sensitivity sampling",
                default=10000,
            )

        class InsertSizeMetrics:
            """InsertSizeMetrics parameters."""

            minimum_fraction = FloatField(
                label="Minimum fraction of reads in a category to be considered",
                default=0.05,
                description="When generating the histogram, discard any data "
                "categories (out of FR, TANDEM, RF) that have fewer than "
                "this fraction of overall reads (Range: 0 and 0.5).",
            )

            include_duplicates = BooleanField(
                label="Include reads marked as duplicates in the insert size histogram",
                default=False,
            )

            deviations = FloatField(
                label="Deviations limit",
                default=10.0,
                description="Generate mean, standard deviation and plots "
                "by trimming the data down to MEDIAN + DEVIATIONS * "
                "MEDIAN_ABSOLUTE_DEVIATION. This is done because insert "
                "size data typically includes enough anomalous values "
                "from chimeras and other artifacts to make the mean and "
                "standard deviation grossly misleading regarding the real "
                "distribution.",
            )

        trimming_options = GroupField(
            Trimming,
            label="Trimming options",
        )

        gatk_options = GroupField(GatkOptions, label="GATK options", hidden="!advanced")

        alignment_summary = GroupField(
            AlignmentSummary, label="Alignment summary options", hidden="!advanced"
        )

        wgs_metrics = GroupField(
            PicardWGSMetrics, label="Picard WGS metrics options", hidden="!advanced"
        )

        insert_size = GroupField(
            InsertSizeMetrics,
            label="Picard InsertSizeMetrics options",
            hidden="!advanced",
        )