class Input: """Input fields.""" class MyGroup: foo = IntegerField(label="Foo", required=False, default=42) bar = StringField(label="Bar", required=False) class MyGroup2: foo = IntegerField(label="Foo", required=False) my_group = GroupField(MyGroup, label="My group") my_group2 = GroupField( MyGroup2, label="My group2 that has all elements without defaults.")
class Input: """Input fields to process ImportSra.""" sra_accession = ListField(StringField(), label="SRA accession(s)") show_advanced = BooleanField(label="Show advanced options", default=False) class Advanced: """Advanced options.""" prefetch = BooleanField(label="Prefetch SRA file", default=True) max_size_prefetch = StringField( label="Maximum file size to download in KB", default="20G", description="A unit prefix can be used instead of a value in KB (e.g. 1024M or 1G).", ) min_spot_id = IntegerField(label="Minimum spot ID", required=False) max_spot_id = IntegerField(label="Maximum spot ID", required=False) min_read_len = IntegerField(label="Minimum read length", required=False) clip = BooleanField(label="Clip adapter sequences", default=False) aligned = BooleanField(label="Dump only aligned sequences", default=False) unaligned = BooleanField( label="Dump only unaligned sequences", default=False ) advanced = GroupField( Advanced, label="Advanced options", hidden="!show_advanced" )
class Input: """Input fields.""" gse_accession = StringField( label="GEO accession", description="Enter a GEO series accession number.") show_advanced = BooleanField(label="Show advanced options", default=False) class Advanced: """Advanced options.""" prefetch = BooleanField(label="Prefetch SRA file", default=True) max_size_prefetch = StringField( label="Maximum file size to download in KB", default="20G", description= "A unit prefix can be used instead of a value in KB (e.g. 1024M or 1G).", ) min_spot_id = IntegerField(label="Minimum spot ID", required=False) max_spot_id = IntegerField(label="Maximum spot ID", required=False) min_read_len = IntegerField(label="Minimum read length", required=False) clip = BooleanField(label="Clip adapter sequences", default=False) aligned = BooleanField(label="Dump only aligned sequences", default=False) unaligned = BooleanField(label="Dump only unaligned sequences", default=False) mapping_file = FileField( label="File with probe ID mappings", description= "The file should be tab-separated and contain two columns with their column names. The " "first column should contain Gene IDs and the second one should contain probe names. Supported file " "extensions are .tab.*, .tsv.*, .txt.*", required=False, ) source = StringField( label="Gene ID source", description= "Gene ID source used for probe mapping is required when using a custom file.", allow_custom_choice=True, required=False, choices=[ ("AFFY", "AFFY"), ("DICTYBASE", "DICTYBASE"), ("ENSEMBL", "ENSEMBL"), ("NCBI", "NCBI"), ("UCSC", "UCSC"), ], ) build = StringField( label="Genome build", description= "Genome build of mapping file is required when using a custom file.", required=False, ) advanced = GroupField(Advanced, label="Advanced options", hidden="!show_advanced")
class Input: """Input fields for GatkHaplotypeCallerGvcf.""" bam = DataField("alignment:bam", label="Analysis ready BAM file") ref_seq = DataField("seq:nucleotide", label="Reference sequence") advanced = BooleanField( label="Show advanced options", description="Inspect and modify parameters.", default=False, ) class Options: """Options.""" intervals = DataField( "bed", label= "Use intervals BED file to limit the analysis to the specified parts of the genome.", required=False, ) contamination = FloatField( label="Contamination fraction", default=0, description= "Fraction of contamination in sequencing data (for all samples) to aggressively remove.", ) options = GroupField(Options, label="Options", hidden="!advanced")
class Input: """Input fields to process WgsPreprocess.""" reads = DataField("reads:fastq:paired", label="Input sample") ref_seq = DataField("seq:nucleotide", label="Reference sequence") bwa_index = DataField("index:bwa", label="BWA genome index") known_sites = ListField(DataField("variants:vcf"), label="Known sites of variation (VCF)") advanced = BooleanField( label="Show advanced options", description="Inspect and modify parameters.", default=False, ) class AdvancedOptions: """Advanced options.""" pixel_distance = IntegerField( label="--OPTICAL_DUPLICATE_PIXEL_DISTANCE", default=2500, description="Set the optical pixel distance, e.g. " "distance between clusters. Modify this parameter to " "ensure compatibility with older Illumina platforms.", ) advanced_options = GroupField(AdvancedOptions, label="Advanced options", hidden="!advanced")
class Input: """Input fields.""" my_field = StringField(label="My field") my_list = ListField(StringField(), label="My list") input_data = DataField("test:save", label="My input data") input_entity_data = DataField("entity", label="My entity data") bar = DataField(data_type="test:save", label="My bar") url = UrlField(UrlField.DOWNLOAD, label="My URL") integer = IntegerField(label="My integer") my_float = FloatField(label="My float") my_json = JsonField(label="Blah blah") my_optional = StringField(label="Optional", required=False, default="default value") my_optional_no_default = StringField(label="Optional no default", required=False) class MyGroup: foo = IntegerField(label="Foo") bar = StringField(label="Bar") group_optional_no_default = StringField( label="Group optional no default", required=False) my_group = GroupField(MyGroup, label="My group")
class Input: """Input fields to process ChipQC.""" alignment = DataField( data_type="alignment:bam", label="Aligned reads", ) peaks = DataField( data_type="chipseq:callpeak", label="Called peaks", ) blacklist = DataField( data_type="bed", label="Blacklist regions", description="BED file containing genomic regions that should be " "excluded from the analysis.", required=False, ) calculate_enrichment = BooleanField( label="Calculate enrichment", description="Calculate enrichment of signal in known genomic " "annotation. By default annotation is provided from " "the TranscriptDB package specified by genome bulid " "which should match one of the supported annotations " "(hg19, hg38, hg18, mm10, mm9, rn4, ce6, dm3). If " "annotation is not supported the analysis is skipped.", default=False, ) class Advanced: """Add advanced list of options.""" quality_threshold = IntegerField( label="Mapping quality threshold", description="Only reads with mapping quality scores above " "this threshold will be used for some statistics.", default=15, ) profile_window = IntegerField( label="Window size", description="An integer indicating the width of the window " "used for peak profiles. Peaks will be centered " "on their summits and include half of the window " "size upstream and half downstream of this point.", default=400, ) shift_size = StringField( label="Shift size", description="Vector of values to try when computing optimal " "shift sizes. It should be specifeird as " "consecutive numbers vector with start:end", default="1:300", ) advanced = GroupField( Advanced, label="Advanced parameters", )
class Input: """Input fields for GatkGenotypeGVCFs.""" gvcfs = ListField( DataField("variants:gvcf"), label="Input data (GVCF)", ) ref_seq = DataField("seq:nucleotide", label="Reference sequence") intervals = DataField( "bed", label="Intervals file (.bed)", ) dbsnp = DataField("variants:vcf", label="dbSNP file") advanced = BooleanField( label="Show advanced options", description="Inspect and modify parameters.", default=False, ) class AdvancedOptions: """Advanced options.""" batch_size = IntegerField( label="Batch size", default=0, description="Batch size controls the number of samples " "for which readers are open at once and therefore provides " "a way to minimize memory consumption. However, it can " "take longer to complete. Use the consolidate flag if more " "than a hundred batches were used. This will improve feature " "read time. batchSize=0 means no batching " "(i.e. readers for all samples will be opened at once).", ) consolidate = BooleanField( label="Consolidate", default=False, description="Boolean flag to enable consolidation. If " "importing data in batches, a new fragment is created for " "each batch. In case thousands of fragments are created, " "GenomicsDB feature readers will try to open ~20x as many " "files. Also, internally GenomicsDB would consume more " "memory to maintain bookkeeping data from all fragments. " "Use this flag to merge all fragments into one. Merging " "can potentially improve read performance, however overall " "benefit might not be noticeable as the top Java layers " "have significantly higher overheads. This flag has no " "effect if only one batch is used.", ) advanced_options = GroupField(AdvancedOptions, label="Advanced options", hidden="!advanced")
class MyGroup: foo = IntegerField(label="Foo") bar = StringField(label="Bar") group_optional_no_default = StringField( label="Group optional no default", required=False) class SubGroup: foo = IntegerField(label="Foo", default=2) subgroup = GroupField(SubGroup, label="Subgroup")
class Input: """Input fields to process MultiQC.""" data = ListField( DataField( data_type="", description= "Select multiple data objects for which the MultiQC report is to be " "generated.", ), label="Input data", ) class Advanced: """Options.""" dirs = BooleanField( label="--dirs", default=True, description="Prepend directory to sample names.", ) dirs_depth = IntegerField( label="--dirs-depth", default=-1, description= "Prepend a specified number of directories to sample names. Enter a " "negative number (default) to take from start of path.", ) fullnames = BooleanField( label="--fullnames", default=False, description= "Disable the sample name cleaning (leave as full file name).", ) config = BooleanField( label="Use configuration file", default=True, description= "Use Genialis configuration file for MultiQC report.", ) cl_config = StringField( label="--cl-config", required=False, description= "Enter text with command-line configuration options to override the " "defaults (e.g. custom_logo_url: https://www.genialis.com).", ) advanced = GroupField(Advanced, label="Advanced options")
class Input: """Input fields.""" reads = DataField("reads:fastq:single", label="Select sample(s)") class Options: """Options.""" nextseq_trim = IntegerField( label="NextSeq/NovaSeq trim", description= "NextSeq/NovaSeq-specific quality trimming. Trims also dark " "cycles appearing as high-quality G bases. This option is mutually " "exclusive with the use of standard quality-cutoff trimming and is " "suitable for the use with data generated by the recent Illumina " "machines that utilize two-color chemistry to encode the four bases.", default=10, ) quality_cutoff = IntegerField( label="Quality cutoff", description= "Trim low-quality bases from 3' end of each read before adapter " "removal. The use of this option will override the use of " "NextSeq/NovaSeq trim option.", required=False, ) min_len = IntegerField( label="Discard reads shorter than specified minimum length.", default=20, ) min_overlap = IntegerField( label="Mimimum overlap", description= "Minimum overlap between adapter and read for an adapter to be found.", default=20, ) times = IntegerField( label= "Remove up to a specified number of adapters from each read.", default=2, ) options = GroupField(Options, label="Options")
class Input: """Input fields.""" alignment = DataField('alignment:bam', label="Alignment") annotation = DataField('annotation:gtf', label="GTF annotation") class Options: """Options.""" stranded = StringField( label="Assay type", default='non_specific', choices=[ ('non_specific', 'Strand non-specific'), ('forward', 'Strand-specific forward'), ('reverse', 'Strand-specific reverse'), ('auto', 'Detect automatically'), ], ) cdna_index = DataField( 'index:salmon', label="cDNA index file", required=False, hidden="options.stranded != 'auto'" ) n_reads = IntegerField( label="Number of reads in subsampled alignment file", default=5000000, hidden="options.stranded != 'auto'" ) maxPhredScore = IntegerField( label="Max Phred Score", required=False, ) adjustPhredScore = IntegerField( label="Adjust Phred Score", required=False, ) options = GroupField(Options, label="Options")
class Input: """Input fields.""" alignment = DataField("alignment:bam", label="Alignment") annotation = DataField("annotation:gtf", label="GTF annotation") class Options: """Options.""" stranded = StringField( label="Assay type", default="non_specific", choices=[ ("non_specific", "Strand non-specific"), ("forward", "Strand-specific forward"), ("reverse", "Strand-specific reverse"), ("auto", "Detect automatically"), ], ) cdna_index = DataField( "index:salmon", label="cDNA index file", required=False, hidden="options.stranded != 'auto'", ) n_reads = IntegerField( label="Number of reads in subsampled alignment file", default=5000000, hidden="options.stranded != 'auto'", ) maxPhredScore = IntegerField( label="Max Phred Score", required=False, ) adjustPhredScore = IntegerField( label="Adjust Phred Score", required=False, ) options = GroupField(Options, label="Options")
class Input: """Input fields.""" alignment = DataField('alignment:bam', label="Alignment") annotation = DataField('annotation:gtf', label="GTF annotation") class Options: """Options.""" stranded = StringField( label="Assay type", default='non_specific', choices=[ ('non_specific', 'Strand non-specific'), ('forward', 'Strand-specific forward'), ('reverse', 'Strand-specific reverse'), ], ) options = GroupField(Options, label="Options")
class Input: """Input fields to process AlignmentSieve.""" alignment = DataField(data_type="alignment:bam", label="Alignment BAM file") min_fragment_length = IntegerField( label="--minFragmentLength", description="The minimum fragment length needed for " "read/pair inclusion. This option is primarily useful in " "ATACseq experiments, for filtering mono- or di-nucleosome " "fragments. (Default: 0)", default=0, ) max_fragment_length = IntegerField( label="--maxFragmentLength", description="The maximum fragment length needed for " "read/pair inclusion. A value of 0 indicates " "no limit. (Default: 0)", default=0, ) class BigWigOptions: """Options for calculating BigWig.""" bigwig_binsize = IntegerField( label="BigWig bin size", description="Size of the bins, in bases, for the output of the " "bigwig/bedgraph file. Default is 50.", default=50, ) bigwig_timeout = IntegerField( label="BigWig timeout", description= "Number of seconds before creation of BigWig timeouts. " "Default is after 480 seconds (8 minutes).", default=480, ) bigwig_opts = GroupField(BigWigOptions, label="BigWig options")
class MySubGroup: class SubGroup: foo = IntegerField(label="Foo", default=2) subgroup = GroupField(SubGroup, label="Subgroup foo")
class Input: """Input fields to process MergeData.""" string_field = StringField( label="Labels are short and do not end in a period", description="Description ends in a period.", choices=[ ("computer_readable1", "Human readable 1"), ("computer_readable2", "Human readable 2"), ], default="computer_readable1", required=False, hidden=False, allow_custom_choice=True, ) text_field = TextField( label="Labels are short and do not end in a period", description="Description ends in a period.", default="default text", required=False, hidden=True, ) boolean_field1 = BooleanField( label="Labels are short and do not end in a period", description="Note that description fields always end in a period.", default=False, required=True, hidden=False, ) integer_field = IntegerField( label="Labels are short and do not end in a period", description="Description ends in a period.", default=1, ) float_field = FloatField( label="Labels are short and do not end in a period", description="Description ends in a period.", default=3.14, ) date_field = DateField( label="Labels are short and do not end in a period", description="Description ends in a period.", default="2020-04-20", ) datetime_field = DateTimeField( label="Labels are short and do not end in a period", description="Description ends in a period.", default="2020-04-20 12:16:00", ) url_field = UrlField( label="Labels are short and do not end in a period", description="Description ends in a period.", ) secret_field = SecretField( label="Labels are short and do not end in a period", description="Description ends in a period.", ) file_field = FileField( label="Labels are short and do not end in a period", description="Description ends in a period.", ) filehtml_field = FileHtmlField( label="Labels are short and do not end in a period", description="Description ends in a period.", ) dir_field = DirField( label="Labels are short and do not end in a period", description="Description ends in a period.", ) json_field = JsonField( label="Labels are short and do not end in a period", description="Description ends in a period.", ) list_field = ListField( DataField(data_type="your:data:type"), label="Labels are short and do not end in a period", description="Description ends in a period.", ) data_field = DataField( # data_type should not start with data: data_type="your:data:type", label="Labels are short and do not end in a period", description="Description ends in a period.", ) class Advanced: """Add advanced list of options.""" boolean_field2 = BooleanField( label="Labels are short and do not end in a period", description="Description ends in a period.", default=False, ) group_field = GroupField( Advanced, label="Labels are short and do not end in a period", disabled=False, # Will show when boolean_field1 is flipped. hidden="!boolean_field1", collapsed=True, )
class Input: """Input fields for CollectWgsMetrics.""" bam = DataField("alignment:bam", label="Alignment BAM file") genome = DataField("seq:nucleotide", label="Genome") read_length = IntegerField(label="Average read length", default=150) create_histogram = BooleanField( label="Include data for base quality histogram in the metrics file", default=False, ) advanced = BooleanField( label="Show advanced options", description="Inspect and modify parameters.", default=False, ) class Options: """Options.""" min_map_quality = IntegerField( label= "Minimum mapping quality for a read to contribute coverage", default=20, ) min_quality = IntegerField( label="Minimum base quality for a base to contribute coverage", description= "N bases will be treated as having a base quality of " "negative infinity and will therefore be excluded from coverage " "regardless of the value of this parameter.", default=20, ) coverage_cap = IntegerField( label="Maximum coverage cap", description= "Treat positions with coverage exceeding this value as " "if they had coverage at this set value.", default=250, ) accumulation_cap = IntegerField( label="Ignore positions with coverage above this value", description="At positions with coverage exceeding this value, " "completely ignore reads that accumulate beyond this value", default=100000, ) count_unpaired = BooleanField( label= "Count unpaired reads and paired reads with one end unmapped", default=False, ) sample_size = IntegerField( label= "Sample Size used for Theoretical Het Sensitivity sampling", default=10000, ) validation_stringency = StringField( label="Validation stringency", description= "Validation stringency for all SAM files read by this " "program. Setting stringency to SILENT can improve " "performance when processing a BAM file in which " "variable-length data (read, qualities, tags) do not " "otherwise need to be decoded. Default is STRICT.", choices=[ ("STRICT", "STRICT"), ("LENIENT", "LENIENT"), ("SILENT", "SILENT"), ], default="STRICT", ) options = GroupField(Options, label="Options", hidden="!advanced")
class Input: """Input fields of trimGalorePaired.""" reads = DataField("reads:fastq:paired", label="Select paired-end reads") class QualityTrimming: """Quality trimming options.""" quality = IntegerField( label="Quality cutoff", description= "Trim low-quality ends from reads based on phred score.", default=20, ) nextseq = IntegerField( label="NextSeq/NovaSeq trim cutoff", description="NextSeq/NovaSeq-specific quality " "trimming. Trims also dark cycles appearing as " "high-quality G bases. This will set a specific " "quality cutoff, but qualities of G bases are ignored. " "This can not be used with Quality cutoff and will " "override it.", required=False, ) phred = StringField( label="Phred score encoding", description="Use either ASCII+33 quality scores as " "Phred scores (Sanger/Illumina 1.9+ encoding) or " "ASCII+64 quality scores (Illumina 1.5 encoding) for " "quality trimming", choices=[ ("--phred33", "ASCII+33"), ("--phred64", "ASCII+64"), ], default="--phred33", ) min_length = IntegerField( label="Minimum length after trimming", description="Discard reads that became shorter than " "selected length because of either quality or adapter " "trimming. Both reads of a read-pair need to be longer " "than specified length to be printed out to validated " "paired-end files. If only one read became too short " "there is the possibility of keeping such unpaired " "single-end reads with Retain unpaired. A value of 0 " "disables filtering based on length.", default=20, ) max_n = IntegerField( label="Maximum number of Ns", description="Read exceeding this limit will result in " "the entire pair being removed from the trimmed output " "files.", required=False, ) retain_unpaired = BooleanField( label="Retain unpaired reads after trimming", description="If only one of the two paired-end reads " "became too short, the longer read will be written.", default=False, ) unpaired_len_1 = IntegerField( label="Unpaired read length cutoff for mate 1", default=35, hidden="!quality_trim.retain_unpaired", ) unpaired_len_2 = IntegerField( label="Unpaired read length cutoff for mate 2", default=35, hidden="!quality_trim.retain_unpaired", ) clip_r1 = IntegerField( label="Trim bases from 5' end of read 1", description="This may be useful if the qualities were " "very poor, or if there is some sort of unwanted bias " "at the 5' end.", required=False, ) clip_r2 = IntegerField( label="Trim bases from 5' end of read 2", description="This may be useful if the qualities were " "very poor, or if there is some sort of unwanted bias " "at the 5' end. For paired-end bisulfite sequencing, " "it is recommended to remove the first few bp because " "the end-repair reaction may introduce a bias towards " "low methylation.", required=False, ) three_prime_r1 = IntegerField( label="Trim bases from 3' end of read 1", description="Remove bases from the 3' end of read 1 " "after adapter/quality trimming has been performed. " "This may remove some unwanted bias from the 3' end " "that is not directly related to adapter sequence or " "basecall quality.", required=False, ) three_prime_r2 = IntegerField( label="Trim bases from 3' end of read 2", description="Remove bases from the 3' end of read 2 " "after adapter/quality trimming has been performed. " "This may remove some unwanted bias from the 3' end " "that is not directly related to adapter sequence or " "basecall quality.", required=False, ) class AdapterTrimming: """Adapter trimming options.""" adapter = ListField( StringField(), label="Read 1 adapter sequence", description="Adapter sequences to be trimmed. " "Also see universal adapters field for predefined " "adapters. This is mutually exclusive with read 1 " "adapters file and universal adapters.", required=False, default=[], ) adapter_2 = ListField( StringField(), label="Read 2 adapter sequence", description="Optional adapter sequence to be trimmed " "off read 2 of paired-end files. This is mutually " "exclusive with read 2 adapters file and universal " "adapters.", required=False, default=[], ) adapter_file_1 = DataField( "seq:nucleotide", label="Read 1 adapters file", description="This is mutually exclusive with read 1 " "adapters and universal adapters.", required=False, ) adapter_file_2 = DataField( "seq:nucleotide", label="Read 2 adapters file", description="This is mutually exclusive with read 2 " "adapters and universal adapters.", required=False, ) universal_adapter = StringField( label="Universal adapters", description="Instead of default detection use specific " "adapters. Use 13bp of the Illumina universal adapter, " "12bp of the Nextera adapter or 12bp of the Illumina " "Small RNA 3' Adapter. Selecting to trim smallRNA " "adapters will also lower the length value to 18bp. " "If the smallRNA libraries are paired-end then read 2 " "adapter will be set to the Illumina small RNA 5' " "adapter automatically (GATCGTCGGACT) unless defined " "explicitly. This is mutually exclusive with manually " "defined adapters and adapter files.", choices=[ ("--illumina", "Illumina"), ("--nextera", "Nextera"), ("--small_rna", "Illumina small RNA"), ], required=False, ) stringency = IntegerField( label="Overlap with adapter sequence required to trim", description="Defaults to a very stringent setting of " "1, i.e. even a single base pair of overlapping " "sequence will be trimmed of the 3' end of any read.", default=1, ) error_rate = FloatField( label="Maximum allowed error rate", description="Number of errors divided by the length of " "the matching region", default=0.1, ) class HardTrimming: """Hard trim options.""" trim_5 = IntegerField( label="Hard trim sequences from 3' end", description="Instead of performing adapter-/quality " "trimming, this option will simply hard-trim sequences " "to bp from the 3' end. This is incompatible with " "other hard trimming options.", required=False, ) trim_3 = IntegerField( label="Hard trim sequences from 5' end", description="Instead of performing adapter-/quality " "trimming, this option will simply hard-trim sequences " "to bp from the 5' end. This is incompatible with " "other hard trimming options.", required=False, ) adapter_trim = GroupField(AdapterTrimming, label="Adapter trimming") quality_trim = GroupField(QualityTrimming, label="Quality trimming") hard_trim = GroupField(HardTrimming, label="Hard trimming")
class Input: """Input fields.""" reads = DataField("reads:fastq", label="Input sample(s)") salmon_index = DataField("index:salmon", label="Salmon index") annotation = DataField("annotation:gtf", label="GTF annotation") advanced = BooleanField( label="Show advanced options", description="Inspect and modify parameters.", default=False, ) class Options: """Options.""" stranded = StringField( label="Assay type", default="A", choices=[ ("A", "Detect automatically"), ("U", "Strand non-specific (U)"), ("SF", "Strand-specific forward (SF)"), ("SR", "Strand-specific reverse (SR)"), ("IU", "Strand non-specific (paired-end IU)"), ("ISF", "Strand-specific forward (paired-end ISF)"), ("ISR", "Strand-specific reverse (paired-end (ISR)"), ], ) seq_bias = BooleanField( label="--seqBias", default=False, description="Perform sequence-specific bias correction.", ) gc_bias = BooleanField( label="--gcBias", default=False, description= "[beta for single-end reads] Perform fragment GC bias correction.", ) discard_orphans_quasi = BooleanField( label="--discardOrphansQuasi", default=False, description="Discard orphan mappings in quasi-mapping mode. " "If this flag is passed then only paired " "mappings will be considered toward " "quantification estimates. The default " "behavior is to consider orphan mappings " "if no valid paired mappings exist.", ) no_length_correction = BooleanField( label="--noLengthCorrection", default=False, description="[Experimental] Entirely disables " "length correction when estimating the " "abundance of transcripts. The abundance " "estimates are reported in CPM (counts per " "million) unit. This option can be used " "with protocols where one expects that " "fragments derive from their underlying " "targets without regard to that target's " "length (e.g. QuantSeq).", ) consensus_slack = FloatField( label="--consensusSlack", required=False, description="The amount of slack allowed in the quasi-mapping " "consensus mechanism. Normally, a transcript must " "cover all hits to be considered for mapping. " "If this is set to a fraction, X, greater than 0 " "(and in [0,1)), then a transcript can fail " "to cover up to (100 * X)% of the hits before it " "is discounted as a mapping candidate. The default " "value of this option is 0.2 in selective alignment mode " "and 0 otherwise.", ) min_score_fraction = FloatField( label="--minScoreFraction", default=0.65, description="The fraction of the optimal possible alignment " "score that a mapping must achieve in order to be " "considered valid - should be in (0,1]", ) incompat_prior = FloatField( label="---incompatPrior", default=0, description="This option sets the prior probability " "that an alignment that disagrees with " "the specified library type (--libType) " "results from the true fragment origin. " "Setting this to 0 specifies that " "alignments that disagree with the " "library type should be impossible, " "while setting it to 1 says that " "alignments that disagree with the " "library type are no less likely than " "those that do.", ) range_factorization_bins = IntegerField( label="--rangeFactorizationBins", default=4, description="Factorizes the likelihood used in " "quantification by adopting a new notion " "of equivalence classes based on the " "conditional probabilities with which " "fragments are generated from different " "transcripts. This is a more " "fine-grained factorization than the " "normal rich equivalence classes. The " "default value (4) corresponds to the " "default used in Zakeri et al. 2017 " "and larger values imply a more " "fine-grained factorization. If range " "factorization is enabled, a common " "value to select for this parameter is " "4. A value of 0 signifies the use of " "basic rich equivalence classes.", ) min_assigned_frag = IntegerField( label="--minAssignedFrags", default=10, description="The minimum number of fragments that " "must be assigned to the transcriptome " "for quantification to proceed.", ) options = GroupField(Options, label="Options", hidden="!advanced")
class Input: """Input fields for VariantFiltrationVqsr.""" vcf = DataField("variants:vcf", label="Input data (VCF)") class ResourceFiles: """Resource files options.""" dbsnp = DataField("variants:vcf", label="dbSNP file") mills = DataField( "variants:vcf", label="Mills and 1000G gold standard indels", required=False, ) axiom_poly = DataField( "variants:vcf", label="1000G Axiom genotype data", required=False, ) hapmap = DataField( "variants:vcf", label="HapMap variants", required=False, ) omni = DataField( "variants:vcf", label="1000G Omni variants", required=False, ) thousand_genomes = DataField( "variants:vcf", label="1000G high confidence SNPs", required=False, ) advanced = BooleanField( label="Show advanced options", description="Inspect and modify parameters.", default=False, ) class AdvancedOptions: """Advanced options.""" use_as_anno = BooleanField( label="--use-allele-specific-annotations", default=False ) indel_anno_fields = ListField( StringField(), label="Annotation fields (INDEL filtering)", default=[ "FS", "ReadPosRankSum", "MQRankSum", "QD", "SOR", "DP", ], ) snp_anno_fields = ListField( StringField(), label="Annotation fields (SNP filtering)", default=[ "QD", "MQRankSum", "ReadPosRankSum", "FS", "MQ", "SOR", "DP", ], ) indel_filter_level = FloatField( label="--truth-sensitivity-filter-level (INDELs)", default=99.0 ) snp_filter_level = FloatField( label="--truth-sensitivity-filter-level (SNPs)", default=99.7 ) max_gaussians_indels = IntegerField( label="--max-gaussians (INDELs)", default=4, description="This parameter determines the maximum number " "of Gaussians that should be used when building a positive " "model using the variational Bayes algorithm. This parameter " "sets the expected number of clusters in modeling. If a " "dataset gives fewer distinct clusters, e.g. as can happen " "for smaller data, then the tool will tell you there is " "insufficient data with a No data found error message. " "In this case, try decrementing the --max-gaussians value.", ) max_gaussians_snps = IntegerField( label="--max-gaussians (SNPs)", default=6, description="This parameter determines the maximum number " "of Gaussians that should be used when building a positive " "model using the variational Bayes algorithm. This parameter " "sets the expected number of clusters in modeling. If a " "dataset gives fewer distinct clusters, e.g. as can happen " "for smaller data, then the tool will tell you there is " "insufficient data with a No data found error message. " "In this case, try decrementing the --max-gaussians value.", ) resource_files = GroupField( ResourceFiles, label="Resource files", ) advanced_options = GroupField( AdvancedOptions, label="Advanced options", hidden="!advanced" )
class Input: """Input fields to process MarkDuplicates.""" bam = DataField("alignment:bam", label="Alignment BAM file") skip = BooleanField( label="Skip MarkDuplicates step", description="MarkDuplicates step can be skipped.", default=False, ) remove_duplicates = BooleanField( label="Remove duplicates", description="If true do not write duplicates to the output file " "instead of writing them with appropriate flags set.", default=False, ) validation_stringency = StringField( label="Validation stringency", description="Validation stringency for all SAM files read by this " "program. Setting stringency to SILENT can improve " "performance when processing a BAM file in which " "variable-length data (read, qualities, tags) do not " "otherwise need to be decoded. Default is STRICT.", choices=[ ("STRICT", "STRICT"), ("LENIENT", "LENIENT"), ("SILENT", "SILENT"), ], default="STRICT", ) assume_sort_order = StringField( label="Assume sort order", description="If not null (default), assume that the input file " "has this order even if the header says otherwise." "Possible values are unsorted, queryname, coordinate " "and unknown.", choices=[ ("", "as in BAM header (default)"), ("unsorted", "unsorted"), ("queryname", "queryname"), ("coordinate", "coordinate"), ("duplicate", "duplicate"), ("unknown", "unknown"), ], default="", ) class BigWigOptions: """Options for calculating BigWig.""" bigwig_binsize = IntegerField( label="BigWig bin size", description="Size of the bins, in bases, for the output of the " "bigwig/bedgraph file. Default is 50.", default=50, ) bigwig_timeout = IntegerField( label="BigWig timeout", description= "Number of seconds before creation of BigWig timeouts. " "Default is after 480 seconds (8 minutes).", default=480, ) bigwig_opts = GroupField(BigWigOptions, label="BigWig options")
class Input: """Input fields to process Deseq.""" case = ListField( DataField("expression"), label="Case", description="Case samples (replicates)", ) control = ListField( DataField("expression"), label="Control", description="Control samples (replicates)", ) create_sets = BooleanField( label="Create gene sets", description="After calculating differential gene " "expressions create gene sets for up-regulated genes, " "down-regulated genes and all genes.", default=False, ) logfc = FloatField( label="Log2 fold change threshold for gene sets", description="Genes above Log2FC are considered as " "up-regulated and genes below -Log2FC as down-regulated.", default=1.0, hidden="!create_sets", ) fdr = FloatField( label="FDR threshold for gene sets", default=0.05, hidden="!create_sets", ) class Options: """Options.""" beta_prior = BooleanField( label="Beta prior", default=False, description="Whether or not to put a zero-mean normal prior " "on the non-intercept coefficients.", ) class FilterOptions: """Filtering options.""" count = BooleanField( label="Filter genes based on expression count", default=True, ) min_count_sum = IntegerField( label="Minimum gene expression count summed over all samples", default=10, description="Filter genes in the expression matrix input. " "Remove genes where the expression count sum over all samples " "is below the threshold.", hidden="!filter_options.count", ) cook = BooleanField( label="Filter genes based on Cook's distance", default=False, ) cooks_cutoff = FloatField( label="Threshold on Cook's distance", required=False, description="If one or more samples have Cook's distance " "larger than the threshold set here, the p-value for the row " "is set to NA. If left empty, the default threshold of 0.99 " "quantile of the F(p, m-p) distribution is used, where p is " "the number of coefficients being fitted and m is the number " "of samples. This test excludes Cook's distance of samples " "belonging to experimental groups with only two samples.", hidden="!filter_options.cook", ) independent = BooleanField( label="Apply independent gene filtering", default=False, ) alpha = FloatField( label="Significance cut-off used for optimizing independent " "gene filtering", default=0.1, description="The value should be set to adjusted p-value " "cut-off (FDR).", hidden="!filter_options.independent", ) options = GroupField(Options, label="Gene filtering options") filter_options = GroupField( FilterOptions, label="Differential expression analysis options")
class Input: """Input fields.""" reads = DataField("reads:fastq:paired", label="Input sample") ref_seq = DataField("seq:nucleotide", label="Reference sequence") bwa_index = DataField("index:bwa", label="BWA genome index") known_sites = ListField( DataField("variants:vcf"), label="Known sites of variation (VCF)" ) advanced = BooleanField( label="Show advanced options", description="Inspect and modify parameters.", default=False, ) class GatkOptions: """Options.""" intervals = DataField( "bed", label="Intervals BED file", description="Use intervals BED file to limit the analysis to " "the specified parts of the genome.", required=False, ) contamination = IntegerField( label="Contamination fraction", default=0, description="Fraction of contamination in sequencing " "data (for all samples) to aggressively remove.", ) class Trimming: """Trimming parameters.""" adapters = DataField( "seq:nucleotide", label="Adapter sequences", required=False, description="Adapter sequences in FASTA format that will " "be removed from the reads.", ) seed_mismatches = IntegerField( label="Seed mismatches", required=False, disabled="!trimming_options.adapters", description="Specifies the maximum mismatch count which " "will still allow a full match to be performed. This field " "is required to perform adapter trimming.", ) simple_clip_threshold = IntegerField( label="Simple clip threshold", required=False, disabled="!trimming_options.adapters", description="Specifies how accurate the match between any " "adapter sequence must be against a read. This field is " "required to perform adapter trimming.", ) min_adapter_length = IntegerField( label="Minimum adapter length", default=8, disabled="!trimming_options.seed_mismatches && " "!trimming_options.simple_clip_threshold && " "!trimming_options.palindrome_clip_threshold", description="In addition to the alignment score, palindrome " "mode can verify that a minimum length of adapter has been " "detected. If unspecified, this defaults to 8 bases, for " "historical reasons. However, since palindrome mode has a " "very low false positive rate, this can be safely reduced, " "even down to 1, to allow shorter adapter fragments to be " "removed.", ) palindrome_clip_threshold = IntegerField( label="Palindrome clip threshold", required=False, disabled="!trimming_options.adapters", description="Specifies how accurate the match between the " "two adapter ligated reads must be for PE palindrome read " "alignment. This field is required to perform adapter " "trimming.", ) leading = IntegerField( label="Leading quality", required=False, description="Remove low quality bases from the beginning, " "if below a threshold quality.", ) trailing = IntegerField( label="Trailing quality", required=False, description="Remove low quality bases from the end, if " "below a threshold quality.", ) minlen = IntegerField( label="Minimum length", required=False, description="Drop the read if it is below a specified length.", ) class AlignmentSummary: """AlignmentSummary parameters.""" adapters = DataField( "seq:nucleotide", label="Adapter sequences", required=False, ) max_insert_size = IntegerField( label="Maximum insert size", default=100000, ) pair_orientation = StringField( label="Pair orientation", default="null", choices=[ ("null", "Unspecified"), ("FR", "FR"), ("RF", "RF"), ("TANDEM", "TANDEM"), ], ) class PicardWGSMetrics: """PicardWGSMetrics parameters.""" read_length = IntegerField( label="Average read length", default=150, ) min_map_quality = IntegerField( label="Minimum mapping quality for a read to contribute coverage", default=20, ) min_quality = IntegerField( label="Minimum base quality for a base to contribute coverage", default=20, description="N bases will be treated as having a base quality of " "negative infinity and will therefore be excluded from " "coverage regardless of the value of this parameter.", ) coverage_cap = IntegerField( label="Maximum coverage cap", default=250, description="Treat positions with coverage exceeding this " "value as if they had coverage at this set value.", ) accumulation_cap = IntegerField( label="Ignore positions with coverage above this value", default=100000, description="At positions with coverage exceeding this value, " "completely ignore reads that accumulate beyond this value.", ) sample_size = IntegerField( label="Sample size used for Theoretical Het Sensitivity sampling", default=10000, ) class InsertSizeMetrics: """InsertSizeMetrics parameters.""" minimum_fraction = FloatField( label="Minimum fraction of reads in a category to be considered", default=0.05, description="When generating the histogram, discard any data " "categories (out of FR, TANDEM, RF) that have fewer than " "this fraction of overall reads (Range: 0 and 0.5).", ) include_duplicates = BooleanField( label="Include reads marked as duplicates in the insert size histogram", default=False, ) deviations = FloatField( label="Deviations limit", default=10.0, description="Generate mean, standard deviation and plots " "by trimming the data down to MEDIAN + DEVIATIONS * " "MEDIAN_ABSOLUTE_DEVIATION. This is done because insert " "size data typically includes enough anomalous values " "from chimeras and other artifacts to make the mean and " "standard deviation grossly misleading regarding the real " "distribution.", ) trimming_options = GroupField( Trimming, label="Trimming options", ) gatk_options = GroupField(GatkOptions, label="GATK options", hidden="!advanced") alignment_summary = GroupField( AlignmentSummary, label="Alignment summary options", hidden="!advanced" ) wgs_metrics = GroupField( PicardWGSMetrics, label="Picard WGS metrics options", hidden="!advanced" ) insert_size = GroupField( InsertSizeMetrics, label="Picard InsertSizeMetrics options", hidden="!advanced", )