class Input: """Input fields for GatkHaplotypeCallerGvcf.""" bam = DataField("alignment:bam", label="Analysis ready BAM file") ref_seq = DataField("seq:nucleotide", label="Reference sequence") advanced = BooleanField( label="Show advanced options", description="Inspect and modify parameters.", default=False, ) class Options: """Options.""" intervals = DataField( "bed", label= "Use intervals BED file to limit the analysis to the specified parts of the genome.", required=False, ) contamination = FloatField( label="Contamination fraction", default=0, description= "Fraction of contamination in sequencing data (for all samples) to aggressively remove.", ) options = GroupField(Options, label="Options", hidden="!advanced")
class Input: """Input fields for SlamdunkAllPaired.""" reads = DataField('reads:fastq:paired', label='Reads') transcriptome = DataField( 'seq:nucleotide', label='FASTA file containig sequences for alingnig.') regions = DataField( 'bed', label='BED file with coordinates of regions of interest.') filter_multimappers = BooleanField( label='Filter multimappers', description= 'If true filter and reasign multimappers based on provided BED file with regions of interest.', default=True) max_alignments = IntegerField( label='Maximum number of multimapper alignments', description= 'The maximum number of alignments that will be reported for a multi-mapping read (i.e. reads' 'with multiple alignments of equal best scores).', default=1) read_length = IntegerField( label='Maximum read length', description='Maximul length of reads in the input FASTQ file.', default=150)
class Input: """Input fields.""" data = DataField("test", label="My input data") data2 = DataField("test", label="My second non-required input data", required=False)
class Input: """Input fields to process WgsPreprocess.""" reads = DataField("reads:fastq:paired", label="Input sample") ref_seq = DataField("seq:nucleotide", label="Reference sequence") bwa_index = DataField("index:bwa", label="BWA genome index") known_sites = ListField(DataField("variants:vcf"), label="Known sites of variation (VCF)") advanced = BooleanField( label="Show advanced options", description="Inspect and modify parameters.", default=False, ) class AdvancedOptions: """Advanced options.""" pixel_distance = IntegerField( label="--OPTICAL_DUPLICATE_PIXEL_DISTANCE", default=2500, description="Set the optical pixel distance, e.g. " "distance between clusters. Modify this parameter to " "ensure compatibility with older Illumina platforms.", ) advanced_options = GroupField(AdvancedOptions, label="Advanced options", hidden="!advanced")
class Input: """Input fields for SlamdunkAllPaired.""" reads = DataField("reads:fastq:paired", label="Reads") ref_seq = DataField("seq:nucleotide", label="FASTA file") regions = DataField( "bed", label="BED file with coordinates of regions of interest" ) filter_multimappers = BooleanField( label="Filter multimappers", description="If true filter and reasign multimappers based on provided BED file with regions of interest", default=True, ) max_alignments = IntegerField( label="Maximum number of multimapper alignments", description="The maximum number of alignments that will be reported for a multi-mapping read (i.e. reads" "with multiple alignments of equal best scores)", default=1, ) read_length = IntegerField( label="Maximum read length", description="Maximum length of reads in the input FASTQ file", default=150, )
class Input: """Input fields.""" my_field = StringField(label="My field") my_list = ListField(StringField(), label="My list") input_data = DataField("test:save", label="My input data") input_entity_data = DataField("entity", label="My entity data") bar = DataField(data_type="test:save", label="My bar") url = UrlField(UrlField.DOWNLOAD, label="My URL") integer = IntegerField(label="My integer") my_float = FloatField(label="My float") my_json = JsonField(label="Blah blah") my_optional = StringField(label="Optional", required=False, default="default value") my_optional_no_default = StringField(label="Optional no default", required=False) class MyGroup: foo = IntegerField(label="Foo") bar = StringField(label="Bar") group_optional_no_default = StringField( label="Group optional no default", required=False) my_group = GroupField(MyGroup, label="My group")
class Input: """Input fields for BsConversionRate.""" mr = DataField( "alignment:bam:walt", label="Aligned reads from bisulfite sequencing", description="Bisulfite specifc alignment such as WALT is required as .mr file type is used. Duplicates" "should be removed to reduce any bias introduced by incomplete conversion on PCR duplicate" "reads.", ) skip = BooleanField( label="Skip Bisulfite conversion rate step", description="Bisulfite conversion rate step can be skipped.", default=False, ) sequence = DataField( "seq:nucleotide", label="Unmethylated control sequence", description="Separate unmethylated control sequence FASTA file is required to estimate bisulfite" "conversion rate.", required=False, ) count_all = BooleanField( label="Count all cytosines including CpGs", default=True ) read_length = IntegerField(label="Average read length", default=150) max_mismatch = FloatField( label="Maximum fraction of mismatches", required=False ) a_rich = BooleanField(label="Reads are A-rich", default=False)
class Input: """Input fields for AlleyoopRates.""" ref_seq = DataField( "seq:nucleotide", label="FASTA file containig sequences for aligning") slamdunk = DataField("alignment:bam:slamdunk", label="Slamdunk results")
class Input: """Input fields to process ROSE2.""" input_macs = DataField( "chipseq:callpeak", label="BED/narrowPeak file (MACS results)", required=False, hidden="input_upload", ) input_upload = DataField( "bed", label="BED file (Upload)", required=False, hidden="input_macs || use_filtered_bam", ) use_filtered_bam = BooleanField( label="Use Filtered BAM File", default=False, hidden="input_upload", description=("Use filtered BAM file from a MACS2 object to rank " "enhancers by. Only applicable if input is MACS2."), ) rankby = DataField( "alignment:bam", label="BAM file", required=False, hidden="use_filtered_bam", description="BAM file to rank enhancers by.", ) control = DataField( "alignment:bam", label="Control BAM File", required=False, hidden="use_filtered_bam", description="BAM file to rank enhancers by.", ) tss = IntegerField( label="TSS exclusion", default=0, description= "Enter a distance from TSS to exclude. 0 = no TSS exclusion.", ) stitch = IntegerField( label="Stitch", required=False, description=( "Enter a max linking distance for stitching. If not " "given, optimal stitching parameter will be determined" " automatically."), ) mask = DataField( "bed", label="Masking BED file", required=False, description=( "Mask a set of regions from analysis. Provide a BED of" " masking regions."), )
class Input: """Input fields to process ChipQC.""" alignment = DataField( data_type="alignment:bam", label="Aligned reads", ) peaks = DataField( data_type="chipseq:callpeak", label="Called peaks", ) blacklist = DataField( data_type="bed", label="Blacklist regions", description="BED file containing genomic regions that should be " "excluded from the analysis.", required=False, ) calculate_enrichment = BooleanField( label="Calculate enrichment", description="Calculate enrichment of signal in known genomic " "annotation. By default annotation is provided from " "the TranscriptDB package specified by genome bulid " "which should match one of the supported annotations " "(hg19, hg38, hg18, mm10, mm9, rn4, ce6, dm3). If " "annotation is not supported the analysis is skipped.", default=False, ) class Advanced: """Add advanced list of options.""" quality_threshold = IntegerField( label="Mapping quality threshold", description="Only reads with mapping quality scores above " "this threshold will be used for some statistics.", default=15, ) profile_window = IntegerField( label="Window size", description="An integer indicating the width of the window " "used for peak profiles. Peaks will be centered " "on their summits and include half of the window " "size upstream and half downstream of this point.", default=400, ) shift_size = StringField( label="Shift size", description="Vector of values to try when computing optimal " "shift sizes. It should be specifeird as " "consecutive numbers vector with start:end", default="1:300", ) advanced = GroupField( Advanced, label="Advanced parameters", )
class Input: """Input fields to process Bamclipper.""" alignment = DataField('alignment:bam', label='Alignment BAM file') bedpe = DataField('bedpe', label='BEDPE file', required=False) skip = BooleanField( label='Skip Bamclipper step', description='Use this option to skip Bamclipper step.', default=False)
class Input: """Input fields to process ImportScRNA10x.""" reads = DataField( data_type="screads:10x:", label="10x reads data object", ) genome_index = DataField( data_type="genomeindex:10x:", label="10x genome index data object", ) chemistry = StringField( label="Chemistry", required=False, default="auto", description= ("Assay configuration. By default the assay configuration is detected " "automatically, which is the recommended mode. You should only specify " "chemistry if there is an error in automatic detection."), choices=[ ("auto", "auto"), ("Single Cell 3'", "threeprime"), ("Single Cell 5'", "fiveprime"), ("Single Cell 3' v1", "SC3Pv1"), ("Single Cell 3' v2", "SC3Pv2"), ("Single Cell 3' v3", "SC3Pv3"), ("Single Cell 5' paired-end", "C5P-PE"), ("Single Cell 5' R2-only", "SC5P-R2"), ], ) trim_r1 = IntegerField( label="Trim R1", required=False, description= ("Hard-trim the input R1 sequence to this length. Note that the length " "includes the Barcode and UMI sequences so do not set this below 26 for " "Single Cell 3' v2 or Single Cell 5'. This and \"Trim R2\" are useful for " "determining the optimal read length for sequencing."), ) trim_r2 = IntegerField( label="Trim R2", required=False, description="Hard-trim the input R2 sequence to this length.", ) expected_cells = IntegerField( label="Expected number of recovered cells", default=3000, ) force_cells = IntegerField( label="Force cell number", required=False, description= ("Force pipeline to use this number of cells, bypassing the cell " "detection algorithm. Use this if the number of cells estimated by Cell " "Ranger is not consistent with the barcode rank plot."), )
class Input: """Input fields for GatkGenotypeGVCFs.""" gvcfs = ListField( DataField("variants:gvcf"), label="Input data (GVCF)", ) ref_seq = DataField("seq:nucleotide", label="Reference sequence") intervals = DataField( "bed", label="Intervals file (.bed)", ) dbsnp = DataField("variants:vcf", label="dbSNP file") advanced = BooleanField( label="Show advanced options", description="Inspect and modify parameters.", default=False, ) class AdvancedOptions: """Advanced options.""" batch_size = IntegerField( label="Batch size", default=0, description="Batch size controls the number of samples " "for which readers are open at once and therefore provides " "a way to minimize memory consumption. However, it can " "take longer to complete. Use the consolidate flag if more " "than a hundred batches were used. This will improve feature " "read time. batchSize=0 means no batching " "(i.e. readers for all samples will be opened at once).", ) consolidate = BooleanField( label="Consolidate", default=False, description="Boolean flag to enable consolidation. If " "importing data in batches, a new fragment is created for " "each batch. In case thousands of fragments are created, " "GenomicsDB feature readers will try to open ~20x as many " "files. Also, internally GenomicsDB would consume more " "memory to maintain bookkeeping data from all fragments. " "Use this flag to merge all fragments into one. Merging " "can potentially improve read performance, however overall " "benefit might not be noticeable as the top Java layers " "have significantly higher overheads. This flag has no " "effect if only one batch is used.", ) advanced_options = GroupField(AdvancedOptions, label="Advanced options", hidden="!advanced")
class Input: """Input fields to process Bamclipper.""" alignment = DataField("alignment:bam", label="Alignment BAM file") bedpe = DataField("bedpe", label="BEDPE file", required=False) skip = BooleanField( label="Skip Bamclipper step", description="Use this option to skip Bamclipper step.", default=False, )
class Input: """Input fields to process CellRangerMkref.""" genome = DataField( data_type='genome:fasta:', label='Reference genome', ) annotation = DataField( data_type='annotation:gtf:', label='Annotation', )
class Input: """Input fields to process CellRangerMkref.""" genome = DataField( data_type="seq:nucleotide:", label="Reference genome", ) annotation = DataField( data_type="annotation:gtf:", label="Annotation", )
class Input: """Input fields for InsertSizeMetrics.""" bam = DataField("alignment:bam", label="Alignment BAM file") genome = DataField("seq:nucleotide", label="Genome") minimum_fraction = FloatField( label="Minimum fraction of reads in a category to be considered ", description="When generating the histogram, discard any data " "categories (out of FR, TANDEM, RF) that have fewer than this " "fraction of overall reads (Range: 0 and 0.5).", default=0.05, ) include_duplicates = BooleanField( label= "Include reads marked as duplicates in the insert size histogram", default=False, ) deviations = FloatField( label="Deviations limit", description= "Generate mean, standard deviation and plots by trimming " "the data down to MEDIAN + DEVIATIONS*MEDIAN_ABSOLUTE_DEVIATION. " "This is done because insert size data typically includes enough " "anomalous values from chimeras and other artifacts to make the " "mean and standard deviation grossly misleading regarding the real " "distribution.", default=10.0, ) validation_stringency = StringField( label="Validation stringency", description="Validation stringency for all SAM files read by this " "program. Setting stringency to SILENT can improve " "performance when processing a BAM file in which " "variable-length data (read, qualities, tags) do not " "otherwise need to be decoded. Default is STRICT.", choices=[ ("STRICT", "STRICT"), ("LENIENT", "LENIENT"), ("SILENT", "SILENT"), ], default="STRICT", ) assume_sorted = BooleanField( label="Sorted BAM file", description= "If True, the sort order in the header file will be ignored.", default=False, )
class Input: """Input fields for AlleyoopSnpEval.""" ref_seq = DataField( "seq:nucleotide", label="FASTA file containig sequences for aligning") regions = DataField( "bed", label="BED file with coordinates of regions of interest") slamdunk = DataField("alignment:bam:slamdunk", label="Slamdunk results") read_length = IntegerField( label="Maximum read length", description="Maximum length of reads in the input FASTQ file", default=150, )
class Input: """Input fields for AlignmentSummary.""" bam = DataField("alignment:bam", label="Alignment BAM file") genome = DataField("seq:nucleotide", label="Genome") adapters = DataField("seq:nucleotide", label="Adapter sequences", required=False) validation_stringency = StringField( label="Validation stringency", description="Validation stringency for all SAM files read by this " "program. Setting stringency to SILENT can improve " "performance when processing a BAM file in which " "variable-length data (read, qualities, tags) do not " "otherwise need to be decoded. Default is STRICT.", choices=[ ("STRICT", "STRICT"), ("LENIENT", "LENIENT"), ("SILENT", "SILENT"), ], default="STRICT", ) insert_size = IntegerField(label="Maximum insert size", default=100000) pair_orientation = StringField( label="Pair orientation", default="null", choices=[ ("null", "Unspecified"), ("FR", "FR"), ("RF", "RF"), ("TANDEM", "TANDEM"), ], ) bisulfite = BooleanField( label="BAM file consists of bisulfite sequenced reads", default=False) assume_sorted = BooleanField( label="Sorted BAM file", description= "If true the sort order in the header file will be ignored.", default=False, )
class Input: """Input fields to perform Base quality score recalibration.""" bam = DataField("alignment:bam", label="BAM file containing reads") reference = DataField("seq:nucleotide", label="Reference genome file") known_sites = ListField( DataField( data_type="variants:vcf", description= "One or more databases of known polymorphic sites used to exclude regions around known " "polymorphisms from analysis.", ), label="List of known sites of variation", ) intervals = DataField( data_type="bed", required=False, label="One or more genomic intervals over which to operate.", description= "This field is optional, but it can speed up the process by restricting calculations to " "specific genome regions.", ) read_group = StringField( label="Replace read groups in BAM", description= "Replace read groups in a BAM file.This argument enables the user to replace all read groups " "in the INPUT file with a single new read group and assign all reads to this read group in " "the OUTPUT BAM file. Addition or replacement is performed using Picard's " "AddOrReplaceReadGroups tool. Input should take the form of -name=value delimited by a " '";", e.g. "-ID=1;-LB=GENIALIS;-PL=ILLUMINA;-PU=BARCODE;-SM=SAMPLENAME1". See tool\'s ' "documentation for more information on tag names. Note that PL, LB, PU and SM are require " "fields. See caveats of rewriting read groups in the documentation.", default="", ) validation_stringency = StringField( label="Validation stringency", description= "Validation stringency for all SAM files read by this program. Setting stringency to SILENT " "can improve performance when processing a BAM file in which variable-length data (read, " "qualities, tags) do not otherwise need to be decoded. Default is STRICT. This setting is " "used in BaseRecalibrator and ApplyBQSR processes.", choices=[ ("STRICT", "STRICT"), ("LENIENT", "LENIENT"), ("SILENT", "SILENT"), ], default="STRICT", )
class Input: """Input fields to process MergeFastqPaired.""" reads = ListField( DataField(data_type="reads:fastq:paired:"), label="Reads data objects", )
class Input: """Input fields to process MergeFastqSingle.""" reads = ListField( DataField(data_type="reads:fastq:single:"), label="Reads data objects", )
class Options: """Options.""" stranded = StringField( label="Assay type", default='non_specific', choices=[ ('non_specific', 'Strand non-specific'), ('forward', 'Strand-specific forward'), ('reverse', 'Strand-specific reverse'), ('auto', 'Detect automatically'), ], ) cdna_index = DataField('index:salmon', label="cDNA index file", required=False, hidden="options.stranded != 'auto'") n_reads = IntegerField( label="Number of reads in subsampled alignment file", default=5000000, hidden="options.stranded != 'auto'") maxPhredScore = IntegerField( label="Max Phred Score", required=False, ) adjustPhredScore = IntegerField( label="Adjust Phred Score", required=False, )
class Input: """Input fields to process MapMicroarrayProbes.""" expressions = ListField( DataField("microarray:normalized"), label="Normalized expressions", ) mapping_file = FileField( label="File with probe ID mappings", description= "The file should be tab-separated and contain two columns with their column names. The first " "column should contain Gene IDs and the second one should contain probe names. Supported file extensions " "are .tab.*, .tsv.*, .txt.*", required=False, ) source = StringField( label="Gene ID source", description= "Gene ID source used for probe mapping is required when using a custom file.", allow_custom_choice=True, required=False, choices=[ ("AFFY", "AFFY"), ("DICTYBASE", "DICTYBASE"), ("ENSEMBL", "ENSEMBL"), ("NCBI", "NCBI"), ("UCSC", "UCSC"), ], ) build = StringField( label="Genome build", description= "Genome build of mapping file is required when using a custom file.", required=False, )
class Input: """Input fields to process ClusterTimeCourse.""" expressions = ListField( DataField("expression"), relation_type="series", label="Time series relation", description= "Select time course to which the expressions belong to.", ) genes = ListField( StringField(), label="Gene subset", required=False, description="Select at least two genes or leave this field empty.", ) gene_species = StringField( label="Species", description="Species to which the selected genes belong to. " "This field is required if gene subset is set.", required=False, hidden="!genes", allow_custom_choice=True, choices=[ ("Dictyostelium discoideum", "Dictyostelium discoideum"), ("H**o sapiens", "H**o sapiens"), ("Macaca mulatta", "Macaca mulatta"), ("Mus musculus", "Mus musculus"), ("Rattus norvegicus", "Rattus norvegicus"), ], ) gene_source = StringField( label="Gene ID database of selected genes", description="This field is required if gene subset is set.", required=False, hidden="!genes", ) distance = StringField( label="Distance metric", choices=[ ("spearman", "Spearman"), ("pearson", "Pearson"), ], default="spearman", ) linkage = StringField( label="Linkage method", choices=[ ("single", "single"), ("average", "average"), ("complete", "complete"), ], default="average", ) ordering = BooleanField( label="Use optimal ordering", description="Results in a more intuitive tree structure, " "but may slow down the clustering on large datasets", default=False, )
class Input: """Input fields to process MicroarrayExpression.""" exp_unmapped = DataField( "microarray:normalized", label="Unmapped normalized expressions", description= "Unmapped normalized expression with the original probe IDs.", ) exp = FileField( label="Normalized and mapped expressions file", description= "Files should have two columns one with GeneIDs and the other one with expression values." "Expected column names are 'Gene' and 'Expression'.Supported file extensions are .tab.*, .tsv.*, .txt.*", ) source = StringField( label="Gene ID source", allow_custom_choice=True, choices=[ ("AFFY", "AFFY"), ("DICTYBASE", "DICTYBASE"), ("ENSEMBL", "ENSEMBL"), ("NCBI", "NCBI"), ("UCSC", "UCSC"), ], ) build = StringField(label="Genome build", ) probe_mapping = StringField(label="Probe to transcript mapping used", )
class Input: """Input fields for CollectRrbsMetrics.""" bam = DataField("alignment:bam", label="Alignment BAM file") genome = DataField("seq:nucleotide", label="Genome") min_quality = IntegerField( label= "Threshold for base quality of a C base before it is considered", default=20, ) next_base_quality = IntegerField( label= "Threshold for quality of a base next to a C before the C base is considered", default=10, ) min_lenght = IntegerField(label="Minimum read length", default=5) mismatch_rate = FloatField( label= "Maximum fraction of mismatches in a read to be considered (Range: 0 and 1)", default=0.1, ) validation_stringency = StringField( label="Validation stringency", description="Validation stringency for all SAM files read by this " "program. Setting stringency to SILENT can improve " "performance when processing a BAM file in which " "variable-length data (read, qualities, tags) do not " "otherwise need to be decoded. Default is STRICT.", choices=[ ("STRICT", "STRICT"), ("LENIENT", "LENIENT"), ("SILENT", "SILENT"), ], default="STRICT", ) assume_sorted = BooleanField( label="Sorted BAM file", description= "If true the sort order in the header file will be ignored.", default=False, )
class Input: """Input fields to process MethylationArraySesame.""" idat_file = DataField( data_type="methylationarray:idat", label="Illumina methylation array IDAT file", description="Illumina methylation array BeadChip raw IDAT file.", )
class Input: """Input fields.""" alignment = DataField("alignment:bam", label="Alignment") annotation = DataField("annotation:gtf", label="GTF annotation") class Options: """Options.""" stranded = StringField( label="Assay type", default="non_specific", choices=[ ("non_specific", "Strand non-specific"), ("forward", "Strand-specific forward"), ("reverse", "Strand-specific reverse"), ("auto", "Detect automatically"), ], ) cdna_index = DataField( "index:salmon", label="cDNA index file", required=False, hidden="options.stranded != 'auto'", ) n_reads = IntegerField( label="Number of reads in subsampled alignment file", default=5000000, hidden="options.stranded != 'auto'", ) maxPhredScore = IntegerField( label="Max Phred Score", required=False, ) adjustPhredScore = IntegerField( label="Adjust Phred Score", required=False, ) options = GroupField(Options, label="Options")
class Input: """Input fields to perform Base quality score recalibration.""" bam = DataField('alignment:bam', label='BAM file containing reads') reference = DataField('genome:fasta', label='Reference genome file') known_sites = ListField( DataField( data_type='variants:vcf', description= 'One or more databases of known polymorphic sites used to exclude regions around known ' 'polymorphisms from analysis.'), label='List of known sites of variation', ) intervals = DataField( data_type='bed', label='One or more genomic intervals over which to operate.', description= 'This field is optional, but it can speed up the process by restricting calculations to ' 'specific genome regions.') read_group = StringField( label='Replace read groups in BAM', description= 'Replace read groups in a BAM file.This argument enables the user to replace all read groups ' 'in the INPUT file with a single new read group and assign all reads to this read group in ' 'the OUTPUT BAM file. Addition or replacement is performed using Picard\'s ' 'AddOrReplaceReadGroups tool. Input should take the form of -name=value delimited by a ' '";", e.g. "-ID=1;-LB=GENIALIS;-PL=ILLUMINA;-PU=BARCODE;-SM=SAMPLENAME1". See tool\'s ' 'documentation for more information on tag names. Note that PL, LB, PU and SM are require ' 'fields. See caveats of rewriting read groups in the documentation.', default='') validation_stringency = StringField( label='Validation stringency', description= 'Validation stringency for all SAM files read by this program. Setting stringency to SILENT ' 'can improve performance when processing a BAM file in which variable-length data (read, ' 'qualities, tags) do not otherwise need to be decoded. Default is STRICT. This setting is ' 'used in BaseRecalibrator and ApplyBQSR processes.', choices=[ ('STRICT', 'STRICT'), ('LENIENT', 'LENIENT'), ('SILENT', 'SILENT'), ], default='STRICT', )