class Input: """Input fields for SlamdunkAllPaired.""" reads = DataField('reads:fastq:paired', label='Reads') transcriptome = DataField( 'seq:nucleotide', label='FASTA file containig sequences for alingnig.') regions = DataField( 'bed', label='BED file with coordinates of regions of interest.') filter_multimappers = BooleanField( label='Filter multimappers', description= 'If true filter and reasign multimappers based on provided BED file with regions of interest.', default=True) max_alignments = IntegerField( label='Maximum number of multimapper alignments', description= 'The maximum number of alignments that will be reported for a multi-mapping read (i.e. reads' 'with multiple alignments of equal best scores).', default=1) read_length = IntegerField( label='Maximum read length', description='Maximul length of reads in the input FASTQ file.', default=150)
class Options: """Options.""" stranded = StringField( label="Assay type", default='non_specific', choices=[ ('non_specific', 'Strand non-specific'), ('forward', 'Strand-specific forward'), ('reverse', 'Strand-specific reverse'), ('auto', 'Detect automatically'), ], ) cdna_index = DataField('index:salmon', label="cDNA index file", required=False, hidden="options.stranded != 'auto'") n_reads = IntegerField( label="Number of reads in subsampled alignment file", default=5000000, hidden="options.stranded != 'auto'") maxPhredScore = IntegerField( label="Max Phred Score", required=False, ) adjustPhredScore = IntegerField( label="Adjust Phred Score", required=False, )
class Options: """Options.""" min_map_quality = IntegerField( label= "Minimum mapping quality for a read to contribute coverage", default=20, ) min_quality = IntegerField( label="Minimum base quality for a base to contribute coverage", description= "N bases will be treated as having a base quality of " "negative infinity and will therefore be excluded from coverage " "regardless of the value of this parameter.", default=20, ) coverage_cap = IntegerField( label="Maximum coverage cap", description= "Treat positions with coverage exceeding this value as " "if they had coverage at this set value.", default=250, ) accumulation_cap = IntegerField( label="Ignore positions with coverage above this value", description="At positions with coverage exceeding this value, " "completely ignore reads that accumulate beyond this value", default=100000, ) count_unpaired = BooleanField( label= "Count unpaired reads and paired reads with one end unmapped", default=False, ) sample_size = IntegerField( label= "Sample Size used for Theoretical Het Sensitivity sampling", default=10000, ) validation_stringency = StringField( label="Validation stringency", description= "Validation stringency for all SAM files read by this " "program. Setting stringency to SILENT can improve " "performance when processing a BAM file in which " "variable-length data (read, qualities, tags) do not " "otherwise need to be decoded. Default is STRICT.", choices=[ ("STRICT", "STRICT"), ("LENIENT", "LENIENT"), ("SILENT", "SILENT"), ], default="STRICT", )
class Output: """Output fields.""" out_foo = IntegerField(label="Foo.", required=False) out_bar = StringField(label="Bar.", required=False) out_foo2 = IntegerField(label="Foo2.", required=False) out_subgroup = IntegerField(label="SubGroupFoo", required=True)
class Input: """Input fields for SlamdunkAllPaired.""" reads = DataField("reads:fastq:paired", label="Reads") ref_seq = DataField("seq:nucleotide", label="FASTA file") regions = DataField( "bed", label="BED file with coordinates of regions of interest" ) filter_multimappers = BooleanField( label="Filter multimappers", description="If true filter and reasign multimappers based on provided BED file with regions of interest", default=True, ) max_alignments = IntegerField( label="Maximum number of multimapper alignments", description="The maximum number of alignments that will be reported for a multi-mapping read (i.e. reads" "with multiple alignments of equal best scores)", default=1, ) read_length = IntegerField( label="Maximum read length", description="Maximum length of reads in the input FASTQ file", default=150, )
class Options: """Options.""" nextseq_trim = IntegerField( label="NextSeq/NovaSeq trim", description= "NextSeq/NovaSeq-specific quality trimming. Trims also dark " "cycles appearing as high-quality G bases. This option is mutually " "exclusive with the use of standard quality-cutoff trimming and is " "suitable for the use with data generated by the recent Illumina " "machines that utilize two-color chemistry to encode the four bases.", default=10, ) quality_cutoff = IntegerField( label="Quality cutoff", description= "Trim low-quality bases from 3' end of each read before adapter " "removal. The use of this option will override the use of " "NextSeq/NovaSeq trim option.", required=False, ) min_len = IntegerField( label="Minimum read length", default=20, ) min_overlap = IntegerField( label="Mimimum overlap", description= "Minimum overlap between adapter and read for an adapter to be found.", default=20, )
class Output: """Output field of the process UploadOrangeMetadata.""" table = FileField(label="Uploaded table") n_samples = IntegerField(label="Number of samples") features = StringField(label="Number of features") target = StringField(label="Target class description") n_metas = IntegerField(label="Number of meta attributes")
class Input: """Input fields to process ROSE2.""" input_macs = DataField( "chipseq:callpeak", label="BED/narrowPeak file (MACS results)", required=False, hidden="input_upload", ) input_upload = DataField( "bed", label="BED file (Upload)", required=False, hidden="input_macs || use_filtered_bam", ) use_filtered_bam = BooleanField( label="Use Filtered BAM File", default=False, hidden="input_upload", description=("Use filtered BAM file from a MACS2 object to rank " "enhancers by. Only applicable if input is MACS2."), ) rankby = DataField( "alignment:bam", label="BAM file", required=False, hidden="use_filtered_bam", description="BAM file to rank enhancers by.", ) control = DataField( "alignment:bam", label="Control BAM File", required=False, hidden="use_filtered_bam", description="BAM file to rank enhancers by.", ) tss = IntegerField( label="TSS exclusion", default=0, description= "Enter a distance from TSS to exclude. 0 = no TSS exclusion.", ) stitch = IntegerField( label="Stitch", required=False, description=( "Enter a max linking distance for stitching. If not " "given, optimal stitching parameter will be determined" " automatically."), ) mask = DataField( "bed", label="Masking BED file", required=False, description=( "Mask a set of regions from analysis. Provide a BED of" " masking regions."), )
class Input: """Input fields to process ImportScRNA10x.""" reads = DataField( data_type="screads:10x:", label="10x reads data object", ) genome_index = DataField( data_type="genomeindex:10x:", label="10x genome index data object", ) chemistry = StringField( label="Chemistry", required=False, default="auto", description= ("Assay configuration. By default the assay configuration is detected " "automatically, which is the recommended mode. You should only specify " "chemistry if there is an error in automatic detection."), choices=[ ("auto", "auto"), ("Single Cell 3'", "threeprime"), ("Single Cell 5'", "fiveprime"), ("Single Cell 3' v1", "SC3Pv1"), ("Single Cell 3' v2", "SC3Pv2"), ("Single Cell 3' v3", "SC3Pv3"), ("Single Cell 5' paired-end", "C5P-PE"), ("Single Cell 5' R2-only", "SC5P-R2"), ], ) trim_r1 = IntegerField( label="Trim R1", required=False, description= ("Hard-trim the input R1 sequence to this length. Note that the length " "includes the Barcode and UMI sequences so do not set this below 26 for " "Single Cell 3' v2 or Single Cell 5'. This and \"Trim R2\" are useful for " "determining the optimal read length for sequencing."), ) trim_r2 = IntegerField( label="Trim R2", required=False, description="Hard-trim the input R2 sequence to this length.", ) expected_cells = IntegerField( label="Expected number of recovered cells", default=3000, ) force_cells = IntegerField( label="Force cell number", required=False, description= ("Force pipeline to use this number of cells, bypassing the cell " "detection algorithm. Use this if the number of cells estimated by Cell " "Ranger is not consistent with the barcode rank plot."), )
class Output: """Output fields.""" relation_id = IntegerField(label="Relation id") relation_type = StringField(label="Relation type") relation_ordered = StringField(label="Relation ordering") relation_category = StringField(label="Relation category") relation_unit = StringField(label="Relation unit") relation_partition_label = StringField(label="Relation partition label") relation_partition_position = IntegerField(label="Relation partition label")
class BigWigOptions: """Options for calculating BigWig.""" bigwig_binsize = IntegerField( label="BigWig bin size", description="Size of the bins, in bases, for the output of the " "bigwig/bedgraph file. Default is 50.", default=50, ) bigwig_timeout = IntegerField( label="BigWig timeout", description="Number of seconds before creation of BigWig timeouts. " "Default is after 480 seconds (8 minutes).", default=480, )
class Input: """Input fields for BsConversionRate.""" mr = DataField( "alignment:bam:walt", label="Aligned reads from bisulfite sequencing", description="Bisulfite specifc alignment such as WALT is required as .mr file type is used. Duplicates" "should be removed to reduce any bias introduced by incomplete conversion on PCR duplicate" "reads.", ) skip = BooleanField( label="Skip Bisulfite conversion rate step", description="Bisulfite conversion rate step can be skipped.", default=False, ) sequence = DataField( "seq:nucleotide", label="Unmethylated control sequence", description="Separate unmethylated control sequence FASTA file is required to estimate bisulfite" "conversion rate.", required=False, ) count_all = BooleanField( label="Count all cytosines including CpGs", default=True ) read_length = IntegerField(label="Average read length", default=150) max_mismatch = FloatField( label="Maximum fraction of mismatches", required=False ) a_rich = BooleanField(label="Reads are A-rich", default=False)
class Advanced: """Options.""" dirs = BooleanField( label="--dirs", default=True, description="Prepend directory to sample names.", ) dirs_depth = IntegerField( label="--dirs-depth", default=-1, description="Prepend a specified number of directories to sample names. Enter a " "negative number (default) to take from start of path.", ) fullnames = BooleanField( label="--fullnames", default=False, description="Disable the sample name cleaning (leave as full file name).", ) config = BooleanField( label="Use configuration file", default=True, description="Use Genialis configuration file for MultiQC report.", ) cl_config = StringField( label="--cl-config", required=False, description="Enter text with command-line configuration options to override the " "defaults (e.g. custom_logo_url: https://www.genialis.com).", )
class AdvancedOptions: """Advanced options.""" batch_size = IntegerField( label="Batch size", default=0, description="Batch size controls the number of samples " "for which readers are open at once and therefore provides " "a way to minimize memory consumption. However, it can " "take longer to complete. Use the consolidate flag if more " "than a hundred batches were used. This will improve feature " "read time. batchSize=0 means no batching " "(i.e. readers for all samples will be opened at once).", ) consolidate = BooleanField( label="Consolidate", default=False, description="Boolean flag to enable consolidation. If " "importing data in batches, a new fragment is created for " "each batch. In case thousands of fragments are created, " "GenomicsDB feature readers will try to open ~20x as many " "files. Also, internally GenomicsDB would consume more " "memory to maintain bookkeeping data from all fragments. " "Use this flag to merge all fragments into one. Merging " "can potentially improve read performance, however overall " "benefit might not be noticeable as the top Java layers " "have significantly higher overheads. This flag has no " "effect if only one batch is used.", )
class Input: """Input fields.""" my_field = StringField(label="My field") my_list = ListField(StringField(), label="My list") input_data = DataField("test:save", label="My input data") input_entity_data = DataField("entity", label="My entity data") bar = DataField(data_type="test:save", label="My bar") url = UrlField(UrlField.DOWNLOAD, label="My URL") integer = IntegerField(label="My integer") my_float = FloatField(label="My float") my_json = JsonField(label="Blah blah") my_optional = StringField(label="Optional", required=False, default="default value") my_optional_no_default = StringField(label="Optional no default", required=False) class MyGroup: foo = IntegerField(label="Foo") bar = StringField(label="Bar") group_optional_no_default = StringField( label="Group optional no default", required=False) my_group = GroupField(MyGroup, label="My group")
class Input: """Input fields for CollectRrbsMetrics.""" bam = DataField("alignment:bam", label="Alignment BAM file") genome = DataField("seq:nucleotide", label="Genome") min_quality = IntegerField( label= "Threshold for base quality of a C base before it is considered", default=20, ) next_base_quality = IntegerField( label= "Threshold for quality of a base next to a C before the C base is considered", default=10, ) min_lenght = IntegerField(label="Minimum read length", default=5) mismatch_rate = FloatField( label= "Maximum fraction of mismatches in a read to be considered (Range: 0 and 1)", default=0.1, ) validation_stringency = StringField( label="Validation stringency", description="Validation stringency for all SAM files read by this " "program. Setting stringency to SILENT can improve " "performance when processing a BAM file in which " "variable-length data (read, qualities, tags) do not " "otherwise need to be decoded. Default is STRICT.", choices=[ ("STRICT", "STRICT"), ("LENIENT", "LENIENT"), ("SILENT", "SILENT"), ], default="STRICT", ) assume_sorted = BooleanField( label="Sorted BAM file", description= "If true the sort order in the header file will be ignored.", default=False, )
class Advanced: """Advanced options.""" prefetch = BooleanField(label="Prefetch SRA file", default=True) max_size_prefetch = StringField( label="Maximum file size to download in KB", default="20G", description= "A unit prefix can be used instead of a value in KB (e.g. 1024M or 1G).", ) min_spot_id = IntegerField(label="Minimum spot ID", required=False) max_spot_id = IntegerField(label="Maximum spot ID", required=False) min_read_len = IntegerField(label="Minimum read length", required=False) clip = BooleanField(label="Clip adapter sequences", default=False) aligned = BooleanField(label="Dump only aligned sequences", default=False) unaligned = BooleanField(label="Dump only unaligned sequences", default=False) mapping_file = FileField( label="File with probe ID mappings", description= "The file should be tab-separated and contain two columns with their column names. The " "first column should contain Gene IDs and the second one should contain probe names. Supported file " "extensions are .tab.*, .tsv.*, .txt.*", required=False, ) source = StringField( label="Gene ID source", description= "Gene ID source used for probe mapping is required when using a custom file.", allow_custom_choice=True, required=False, choices=[ ("AFFY", "AFFY"), ("DICTYBASE", "DICTYBASE"), ("ENSEMBL", "ENSEMBL"), ("NCBI", "NCBI"), ("UCSC", "UCSC"), ], ) build = StringField( label="Genome build", description= "Genome build of mapping file is required when using a custom file.", required=False, )
class Advanced: """Advanced options.""" prefetch = BooleanField(label="Prefetch SRA file", default=True) max_size_prefetch = StringField( label="Maximum file size to download in KB", default="20G", description="A unit prefix can be used instead of a value in KB (e.g. 1024M or 1G).", ) min_spot_id = IntegerField(label="Minimum spot ID", required=False) max_spot_id = IntegerField(label="Maximum spot ID", required=False) min_read_len = IntegerField(label="Minimum read length", required=False) clip = BooleanField(label="Clip adapter sequences", default=False) aligned = BooleanField(label="Dump only aligned sequences", default=False) unaligned = BooleanField( label="Dump only unaligned sequences", default=False )
class Output: """Output field of the process ImportFastaNucleotide.""" fastagz = FileField(label="FASTA file (compressed)") fasta = FileField(label="FASTA file") fai = FileField(label="FASTA file index") fasta_dict = FileField(label="FASTA dictionary") num_seqs = IntegerField(label="Number of sequences") species = StringField(label="Species") build = StringField(label="Build")
class HardTrimming: """Hard trim options.""" trim_5 = IntegerField( label="Hard trim sequences from 3' end", description="Instead of performing adapter-/quality " "trimming, this option will simply hard-trim sequences " "to bp from the 3' end. This is incompatible with " "other hard trimming options.", required=False, ) trim_3 = IntegerField( label="Hard trim sequences from 5' end", description="Instead of performing adapter-/quality " "trimming, this option will simply hard-trim sequences " "to bp from the 5' end. This is incompatible with " "other hard trimming options.", required=False, )
class AdvancedOptions: """Advanced options.""" pixel_distance = IntegerField( label="--OPTICAL_DUPLICATE_PIXEL_DISTANCE", default=2500, description="Set the optical pixel distance, e.g. " "distance between clusters. Modify this parameter to " "ensure compatibility with older Illumina platforms.", )
class MyGroup: foo = IntegerField(label="Foo") bar = StringField(label="Bar") group_optional_no_default = StringField( label="Group optional no default", required=False) class SubGroup: foo = IntegerField(label="Foo", default=2) subgroup = GroupField(SubGroup, label="Subgroup")
class Input: """Input fields to process AlignmentSieve.""" alignment = DataField("alignment:bam", label="Alignment BAM file") min_fragment_length = IntegerField( label="--minFragmentLength", description="The minimum fragment length needed for " "read/pair inclusion. This option is primarily useful in " "ATACseq experiments, for filtering mono- or di-nucleosome " "fragments. (Default: 0)", default=0, ) max_fragment_length = IntegerField( label="--maxFragmentLength", description="The maximum fragment length needed for " "read/pair inclusion. A value of 0 indicates " "no limit. (Default: 0)", default=0, )
class Input: """Input fields to process AlignmentSieve.""" alignment = DataField(data_type="alignment:bam", label="Alignment BAM file") min_fragment_length = IntegerField( label="--minFragmentLength", description="The minimum fragment length needed for " "read/pair inclusion. This option is primarily useful in " "ATACseq experiments, for filtering mono- or di-nucleosome " "fragments. (Default: 0)", default=0, ) max_fragment_length = IntegerField( label="--maxFragmentLength", description="The maximum fragment length needed for " "read/pair inclusion. A value of 0 indicates " "no limit. (Default: 0)", default=0, ) class BigWigOptions: """Options for calculating BigWig.""" bigwig_binsize = IntegerField( label="BigWig bin size", description="Size of the bins, in bases, for the output of the " "bigwig/bedgraph file. Default is 50.", default=50, ) bigwig_timeout = IntegerField( label="BigWig timeout", description= "Number of seconds before creation of BigWig timeouts. " "Default is after 480 seconds (8 minutes).", default=480, ) bigwig_opts = GroupField(BigWigOptions, label="BigWig options")
class Advanced: """Add advanced list of options.""" quality_threshold = IntegerField( label="Mapping quality threshold", description="Only reads with mapping quality scores above " "this threshold will be used for some statistics.", default=15, ) profile_window = IntegerField( label="Window size", description="An integer indicating the width of the window " "used for peak profiles. Peaks will be centered " "on their summits and include half of the window " "size upstream and half downstream of this point.", default=400, ) shift_size = StringField( label="Shift size", description="Vector of values to try when computing optimal " "shift sizes. It should be specifeird as " "consecutive numbers vector with start:end", default="1:300", )
class Options: """Options.""" stranded = StringField( label="Assay type", default="non_specific", choices=[ ("non_specific", "Strand non-specific"), ("forward", "Strand-specific forward"), ("reverse", "Strand-specific reverse"), ("auto", "Detect automatically"), ], ) cdna_index = DataField( "index:salmon", label="cDNA index file", required=False, hidden="options.stranded != 'auto'", ) n_reads = IntegerField( label="Number of reads in subsampled alignment file", default=5000000, hidden="options.stranded != 'auto'", ) maxPhredScore = IntegerField( label="Max Phred Score", required=False, ) adjustPhredScore = IntegerField( label="Adjust Phred Score", required=False, )
class Input: """Input fields for AlleyoopSnpEval.""" ref_seq = DataField( "seq:nucleotide", label="FASTA file containig sequences for aligning") regions = DataField( "bed", label="BED file with coordinates of regions of interest") slamdunk = DataField("alignment:bam:slamdunk", label="Slamdunk results") read_length = IntegerField( label="Maximum read length", description="Maximum length of reads in the input FASTQ file", default=150, )
class Input: """Input fields for AlignmentSummary.""" bam = DataField("alignment:bam", label="Alignment BAM file") genome = DataField("seq:nucleotide", label="Genome") adapters = DataField("seq:nucleotide", label="Adapter sequences", required=False) validation_stringency = StringField( label="Validation stringency", description="Validation stringency for all SAM files read by this " "program. Setting stringency to SILENT can improve " "performance when processing a BAM file in which " "variable-length data (read, qualities, tags) do not " "otherwise need to be decoded. Default is STRICT.", choices=[ ("STRICT", "STRICT"), ("LENIENT", "LENIENT"), ("SILENT", "SILENT"), ], default="STRICT", ) insert_size = IntegerField(label="Maximum insert size", default=100000) pair_orientation = StringField( label="Pair orientation", default="null", choices=[ ("null", "Unspecified"), ("FR", "FR"), ("RF", "RF"), ("TANDEM", "TANDEM"), ], ) bisulfite = BooleanField( label="BAM file consists of bisulfite sequenced reads", default=False) assume_sorted = BooleanField( label="Sorted BAM file", description= "If true the sort order in the header file will be ignored.", default=False, )
class GatkOptions: """Options.""" intervals = DataField( "bed", label="Intervals BED file", description="Use intervals BED file to limit the analysis to " "the specified parts of the genome.", required=False, ) contamination = IntegerField( label="Contamination fraction", default=0, description="Fraction of contamination in sequencing " "data (for all samples) to aggressively remove.", )
class FilterOptions: """Filtering options.""" count = BooleanField( label="Filter genes based on expression count", default=True, ) min_count_sum = IntegerField( label="Minimum gene expression count summed over all samples", default=10, description="Filter genes in the expression matrix input. " "Remove genes where the expression count sum over all samples " "is below the threshold.", hidden="!filter_options.count", ) cook = BooleanField( label="Filter genes based on Cook's distance", default=False, ) cooks_cutoff = FloatField( label="Threshold on Cook's distance", required=False, description="If one or more samples have Cook's distance " "larger than the threshold set here, the p-value for the row " "is set to NA. If left empty, the default threshold of 0.99 " "quantile of the F(p, m-p) distribution is used, where p is " "the number of coefficients being fitted and m is the number " "of samples. This test excludes Cook's distance of samples " "belonging to experimental groups with only two samples.", hidden="!filter_options.cook", ) independent = BooleanField( label="Apply independent gene filtering", default=False, ) alpha = FloatField( label="Significance cut-off used for optimizing independent " "gene filtering", default=0.1, description="The value should be set to adjusted p-value " "cut-off (FDR).", hidden="!filter_options.independent", )