def SetupOptionsParser(): # { description_string = ( "Compares a list of fusions predicted by Barnacle " "and a list predicted by TopHat-Fusion and returns which predictions " "are common to both, or unique to one or the other." ) args = ["LIB", "BARNACLE_FILE", "TOPHAT_FUSION_FILES"] usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) parser.add_option( "--coord-buffer", type="int", metavar="N", help="Consider events identical if their coordinates " "are within Nbp of each other. [default: %default]", ) parser.add_option( "-f", "--force", action="store_true", help="Force filtering to take place, even if the output " "directory already exists.", ) parser.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") parser.set_defaults(coord_buffer=1000, force=False, debug=False) return parser
def SetupOptionsParser(): #{ description_string = ("") #TODO args = [ "LIB", "BARNACLE_FILE", ] #TODO usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) misc_group = OptionGroup(parser, "Miscellaneous Options") misc_group.add_option("-f", "--force", action="store_true", help="Force filtering to take place, even if the output " "directory already exists.") misc_group.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") misc_group.add_option("--extreme-debug", action="store_true", dest="extreme_debug", help="Print extremely in-depth debug information while " "the program runs. Not recommended for large jobs.") parser.add_option_group(misc_group) parser.set_defaults(force=False, debug=False, extreme_debug=False) return parser
def SetupOptionsParser(): #{ description_string = ("Use Primer3 and gfPcr to create qPCR and sequence " "validation primers for the Barnacle events in the given file.") args = [ "LIB", "BARNACLE_FILE", "ALIGNMENTS_FILES", "CONTIG_SEQS_FILE", "TRANSCRIPT_SEQS_FILE", "TRANSCRIPTOME_HOSTNAME", "TRANSCRIPTOME_PORT", "GENOME_HOSTNAME", "GENOME_PORT" ] usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) parser.add_option("--qpcr-buffer", type="int", metavar="N", help="Ensure that qPCR primers are at least Nbp from the " "breakpoint. [default: %default]") parser.add_option("--seq-buffer", type="int", metavar="N", help="Ensure that sequencing primers are at least Nbp " "from the breakpoint. [default: %default]") parser.add_option("-f", "--force", action="store_true", help="Force filtering to take place, even if the output " "directory already exists.") parser.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") parser.set_defaults(qpcr_buffer=20, seq_buffer=70, force=False, debug=False) return parser
def SetupOptionsParser(): #{ description_string = ("Go through the group members in the input file and " "for each member check how many of its breakpoint coordinates match up " "with exon boundaries.") args = [ "LIB", "BARNACLE_FILE", "GENES_FILE", ] usage_string = ("%prog " + " ".join(args) + " [ OPTIONS ]") parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) parser.add_option("-b", "--buffer-size", type="int", metavar="N", help="Count the breakpoint coordinate as matching an exon " "coordinate if it is within Nbp to either side of " "it. [default:%default]") # add option for additional annotations files? misc_group = OptionGroup(parser, "Miscellaneous Options") misc_group.add_option("-f", "--force", action="store_true", help="Force filtering to take place, even if the output " "directory already exists.") misc_group.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") misc_group.add_option("--extreme-debug", action="store_true", dest="extreme_debug", help="Print extremely in-depth debug information while " "the program runs. Not recommended for large jobs.") parser.add_option_group(misc_group) parser.set_defaults(buffer_size=4, force=False, debug=False, extreme_debug=False) return parser
def SetupOptionsParser(): #{ description_string = ("Calculates read-to-contig support for groups in " "the input file") args = [ "LIB", "BARNACLE_FILE", "READ_TO_CONTIG_FILE", ] usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) parser.add_option("--min-overlap", type="int", metavar="N", help="Require that reads overlap breakpoints by at " "least N bp. [default: %default]") parser.add_option("--disable-profiling-timer", action="store_true", dest="dpt", help="Sometimes this script can hang when trying to spawn " "child processes, due to the kernel's profiling " "timer. Use this option to disable the profiling " "timer if the script seems to be hanging.") parser.add_option("--log-file", dest="log_file_name", metavar="FILE", help="Log all messages in FILE") parser.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") parser.add_option("--extreme-debug", action="store_true", dest="extreme_debug", help="Print extremely in-depth debug information while " "the program runs. Not recommended for large jobs.") parser.set_defaults(min_overlap=5, dpt=False, debug=False, extreme_debug=False) return parser
def SetupOptionsParser(): #{ description_string = ("Counts number of predictions, number of recurrent " "predictions, number of genes with predictions, and number of genes with " "recurrent predictions for each library in the input list.") args = [ "LIBRARY_LIST", "LIBS_DIR", ] usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) parser.add_option("--write-events", action="store_true", dest="write_events", help="Write events to recurrent or library-specific " "output files. [default]") parser.add_option("--count-only", action="store_false", dest="write_events", help="Do not write events to recurrent or " "library-specific output files.") parser.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") parser.set_defaults(write_events=True, force=False, debug=False) return parser
def SetupOptionsParser(): #{ description_string = ("Calculates pair-to-genome support for groups in " "the input file") args = [ "BARNACLE_FILE", "PAIR_TO_GENOME_FILE", ] usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) parser.add_option("--read-length", type="int", metavar="N", help="Each read is N bp long. [ default: %default ]") parser.add_option("--frag-len", type="int", help="The expected length of the mate-pair fragments " "is N bp. [ default: %default ]") parser.add_option("--frag-fract", type="float", metavar="F", help="When looking for read-pairs spanning the genomic " "event region, only count a pair as being " "significantly different from the expected length if " "the fractional difference between the observed and " "expected fragment lengths is more than F. " "[ default: %default ]") parser.add_option("--min-mapq", type="int", metavar="N", help="When looking for read-pairs spanning the genomic " "event region, filter pairs with mapping quality " "less than N. [ default: %default ]") parser.add_option("--max-sam-retries", type="int", metavar="N", help="If there is an error running a samtools view " "command, retry the command a maximum of N times. " "[ default: %default ]") parser.add_option("--disable-profiling-timer", action="store_true", dest="dpt", help="Sometimes this script can hang when trying to spawn " "child processes, due to the kernel's profiling " "timer. Use this option to disable the profiling " "timer if the script seems to be hanging.") parser.add_option("--log-file", dest="log_file_name", metavar="FILE", help="Log all messages in FILE") parser.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") parser.add_option("--extreme-debug", action="store_true", dest="extreme_debug", help="Print extremely in-depth debug information while " "the program runs. Not recommended for large jobs.") parser.set_defaults(read_length = 50, frag_len = 200, frag_fract = 0.10, min_mapq = 10, max_sam_retries = 5, dpt=False, debug=False, extreme_debug=False) return parser
def SetupOptionsParser(): #{ description_string = ("Calculates maximum total coverage of exons involved " "in events, then compares that to event coverage to estimate relative " "coverage of event transcripts.") args = [ "LIB", "BARNACLE_FILE", "P2G_FILE" ] usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) parser.add_option("-r", "--read-length", type="int", help="The length of the reads in the paired-read to genome file. " "[default=%default].") parser.add_option("--min-overlap", type="int", metavar="N", help="Require that reads overlap positions by at least N bp. " "[default: %default]") parser.add_option("--allow-mismatches", action="store_false", dest="require_perfect", help="Allow gaps and mismatches in read-to-genome alignments. [default]") parser.add_option("--require-perfect", action="store_true", dest="require_perfect", help="Only count reads with perfect read-to-genome alignments.") parser.add_option("-e", "--max-edit-distance", type="int", metavar="N", help="Only count reads with an edit distance not greater than N. " "[default: %default]") misc_group = OptionGroup(parser, "Miscellaneous Options") misc_group.add_option("--disable-profiling-timer", action="store_true", dest="dpt", help="Sometimes this script can hang when trying to spawn " "child processes, due to the kernel's profiling " "timer. Use this option to disable the profiling " "timer if the script seems to be hanging.") misc_group.add_option("-f", "--force", action="store_true", help="Force filtering to take place, even if the output " "directory already exists.") misc_group.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") misc_group.add_option("--extreme-debug", action="store_true", dest="extreme_debug", help="Print extremely in-depth debug information while " "the program runs. Not recommended for large jobs.") parser.add_option_group(misc_group) parser.set_defaults(read_length=75, min_overlap=5, max_edit_distance=1, dpt=False, force=False, debug=False, extreme_debug=False) return parser
def SetupOptionsParser(): # { description_string = ( "Extract exon coordinates from a gene annotation " "file and create a file holding the exon coordinates formatted for the " "overlap code, with non-coding genes and UTRs marked." ) args = ["INPUT_FILE", "OUTPUT_DIR"] usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) parser.add_option( "--annotations-type", help="The type of annotations file being used for the " "gene names, e.g.: %s. " % ", ".join(ANNOT_TYPES) + "The code will try to automatically determine the " "annotations type if this option is not used.", ) parser.add_option( "--filter-chromosomes", action="store_true", help="Only write out exons for genes on standard " "chromosomes: chr<I>, chrX, chrY, chrM (where " "<I> is any integer). [default]", ) parser.add_option( "--all-chromosomes", action="store_false", dest="filter_chromosomes", help="Write out exons for genes on all chromosomes.", ) parser.add_option( "--no-introns", action="store_false", dest="include_introns", help="Write only exon coordinates, not introns." ) parser.add_option( "--introns", action="store_true", dest="include_introns", help="Write both exon and intron coordinates. [default]", ) parser.add_option( "-f", "--force", action="store_true", help="Force filtering to take place, overwriting the exon " "coordinates file if it already exists.", ) parser.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") parser.add_option( "--extreme-debug", action="store_true", dest="extreme_debug", help="Print extremely in-depth debug information while " "the program runs. Not recommended for large jobs.", ) parser.set_defaults(filter_chromosomes=True, include_introns=True, force=False, debug=False, extreme_debug=False) return parser
def SetupOptionsParser(): #{ description_string = ("Makes a backup of the input file, and creates a new " "file with any unparsable events removed. Unparsable events are saved in " "their own file.") args = [ "LIB", "BARNACLE_FILE", ] usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) parser.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") parser.set_defaults( debug=False) return parser
def SetupOptionsParser(): #{ description_string = ("") #TODO args = [ "LIB", "BARNACLE_FILE", "GENES_FILE", ] usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) parser.add_option("--event-buffer", type="int", metavar="N", help="For split events, check whether the breakpoint " "coordinates are within N bases of an exon. " "[default: %default]") parser.add_option("--use-existing-coords", action="store_true", dest="use_existing_group_coords", help="If a breakpoint coordinates file already exists, " "just use it rather than generating a new one.") parser.add_option("--keep-coords-file", action="store_true", help="After exon-overlap processing, do not remove the " "alignment-coordinates file that was produced.") parser.add_option("--disable-profiling-timer", action="store_true", dest="dpt", help="Sometimes this script can hang when trying to spawn " "child processes, due to the kernel's profiling " "timer. Use this option to disable the profiling " "timer if the script seems to be hanging.") parser.add_option("-f", "--force", action="store_true", help="Force filtering to take place, even if the output " "directory already exists.") parser.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") parser.add_option("--extreme-debug", action="store_true", dest="extreme_debug", help="Print extremely in-depth debug information while " "the program runs. Not recommended for large jobs.") parser.set_defaults(event_buffer=5, use_existing_group_coords=False, keep_coords_file=False, dpt=False, force=False, debug=False, extreme_debug=False) return parser
def SetupOptionsParser(): description_string = ("Runs the check ACF status script on all libraries " "in the input file.") args = [ "LIB_LIST_FILE", "DIR_HEAD", ] usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) parser.add_option("--log-file", dest="log_file_name", metavar="FILE", help="Log all messages in FILE") parser.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") parser.set_defaults( debug=False) return parser
def SetupOptionsParser(): #{ description_string = "Submit jobs to cluster" args = [ "JOB_NAME", "JOB_FILE", "CLUSTER_HEAD", ] usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) parser.add_option("-s", "--single", action="store_true", help="JOB_FILE is a single job [default].") parser.add_option("-m", "--multiple", action="store_false", dest="single", help="JOB_FILE is a list of jobs.") parser.add_option("--mem", "--memory", help="The memory requirement of the jobs " "[default:\"%default\"].") parser.add_option("--hostname", help="The hostname(s) to submit to " "[default: read from barnacle.cfg].") #"[default:\"%default\"].") parser.add_option("--queue", help="The queue(s) to submit to [default: read from " "barnacle.cfg].") #help="The queue(s) to submit to [default:\"%default\"].") parser.add_option("-w", "--wall-time", metavar="H:MM:SS", help="The maximum time to spend on the job.") parser.add_option("--email", help="E-mail status updates to the given email address") parser.add_option("--disable-profiling-timer", action="store_true", dest="dpt", help="Sometimes this script can hang when trying to spawn " "child processes, due to the kernel's profiling " "timer. Use this option to disable the profiling " "timer if the script seems to be hanging.") parser.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") parser.set_defaults(single=True, mem="1G", dpt=False, debug=False) return parser
def SetupOptionsParser(): #{ description_string = ("Merges split read-to-contig support results file " "and integrates it into the chimeric transcript results") args = [ "LIB", "BARNACLE_FILE", ] usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) parser.add_option("--r2c-dir", dest = "jobs_dir", metavar="DIR", help="Use this directory for the read-to-contig support " "results, rather than the default.") parser.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") parser.set_defaults( debug=False) return parser
def SetupOptionsParser(): #{ description_string = ("Use Primer3 and gfPcr to create qPCR and sequence " "validation primers for the Barnacle events in the given file.") args = [ "LIB", "BARNACLE_FILE", "ALIGNMENTS_FILES", "CONTIG_SEQS_FILE", "TRANSCRIPT_SEQS_FILE" ] usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) parser.add_option("-f", "--force", action="store_true", help="Force filtering to take place, even if the output " "directory already exists.") parser.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") parser.set_defaults(force=False, debug=False) return parser
def SetupOptionsParser(): #{ description_string = ("Merges split pair-to-genome support results " "and integrates them into the Barnacle results") args = [ "LIB", "BARNACLE_FILE", ] usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) parser.add_option("--disable-profiling-timer", action="store_true", dest="dpt", help="Sometimes this script can hang when trying to spawn " "child processes, due to the kernel's profiling " "timer. Use this option to disable the profiling " "timer if the script seems to be hanging.") parser.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") parser.set_defaults( dpt=False, debug=False) return parser
def SetupOptionsParser(): #{ description_string = ("Setup the Barnacle tool-suite: compile Gap " "Realigner binaries") args = [ "CLUSTER_HEAD", ] usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) parser.add_option("--no-annots", action="store_false", dest="setup_annots", help="Skipping setting up annotation files.") parser.add_option("-f", "--force", action="store_true", help="Force filtering to take place, even if the output " "directory already exists.") parser.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") parser.set_defaults(setup_annots=True, force=False, debug=False) return parser
def SetupOptionsParser(): #{ description_string = ("Examines contig alignments to find contigs " "resulting in interesting split or gapped alignments potentially " "representing chimeric transcripts.") args = [ "ALIGNMENT_FILE", "OUTPUT_DIR", ] usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) parser.add_option("--split-candidates", action="store_true", dest="check_split", help="Check alignments for contigs matching split " "candidate alignment signatures. [default]") parser.add_option("--no-split-candidates", action="store_false", dest="check_split", help="Do not check alignments for contigs matching split " "candidate alignment signatures.") parser.add_option("--gap-candidates", action="store_true", dest="check_gap", help="Check alignments for contigs matching gapped " "candidate alignment signatures. [default]") parser.add_option("--no-gap-candidates", action="store_false", dest="check_gap", help="Do not check alignments for contigs matching " "gapped candidate alignment signatures.") alignment_parsing_grp = OptionGroup(parser, "Alignment Parsing Options") alignment_parsing_grp.add_option("-n", "--num-aligns", type='int', dest="num_aligns", metavar="N", help="For each contig extract from the alignments file " "the N highest scoring alignments that pass the PID " "filtering criteria used. [default: %default]") alignment_parsing_grp.add_option("-i", "--min-identity", type='float', dest="min_identity", metavar="PID", help="Extract from the alignments file only alignments " "with a percent identity of at least PID. " "[default: %default]") alignment_parsing_grp.add_option("--genes", action="store_true", dest="add_gene_annotation", help="Record the gene features overlapped by each " "alignment [default]") alignment_parsing_grp.add_option("--no-genes", action="store_false", dest="add_gene_annotation", help="Do not record the gene features overlapped by any " "alignments") alignment_parsing_grp.add_option("--gene-coords", metavar="FILE", dest="gene_coords_path", help="Determine whether the alignments overlap any gene " "features using the coordinates in FILE") alignment_parsing_grp.add_option("--transcript-selection-buffer", type="int", metavar="N", help="When selecting which transcript(s) a contig " "represents, select all transcripts that overlap " "the alignments by amounts at most Nbp less than " "the largest amount of overlap. [default: %default]") alignment_parsing_grp.add_option("--keep-coords-file", action="store_true", help="After gene feature-overlap processing, do not remove " "the alignment-coordinates file that was produced.") parser.add_option_group(alignment_parsing_grp) alignment_grouping_grp = OptionGroup(parser, "Alignment Grouping Options") alignment_grouping_grp.add_option("--single-align", type='float', dest="longest_single_align", metavar="FRACTION", help="If any single alignment for a contig uses at least " "FRACTION of the total contig length, do not look " "for any split or gapped alignments for that contig. " "[default: %default]") alignment_grouping_grp.add_option("--merge-overlap", type='float', dest="min_merge_overlap", metavar="FRACTION", help="If two alignments represent portions of the contig " "that overlap by at least FRACTION of the length of " "the shorter alignment, group the alignments " "together. [default: %default]") parser.add_option_group(alignment_grouping_grp) alignment_selecting_grp = OptionGroup(parser, "Alignment Selection Options") alignment_selecting_grp.add_option("--smart-chooser", action="store_false", dest="use_quick_chooser", help="Use the smarter, but slower, choose best " "alignments method [default]") alignment_selecting_grp.add_option("--quick-chooser", action="store_true", dest="use_quick_chooser", help="Use the simple, but faster, choose best alignments " "method") alignment_selecting_grp.add_option("--maintain-pared-groups", action="store_true", dest="maintain_pared_groups", help="Use paring info to only add an alignment to a " "group if the alignment will not later be pared; " "only relevant when using smart chooser [default]") alignment_selecting_grp.add_option("--pare-after-grouping", action="store_false", dest="maintain_pared_groups", help="Do not pare alignments until all alignments for " "the current contig have been grouped; only " "relevant when using smart chooser") alignment_selecting_grp.add_option("--mm-min-score", type='float', metavar="F", dest="mm_min_score_fract", help="When deciding whether an alignment group " "multi-maps, only use alignments with scores at " "least F of the maximum score in the group " "[default: %default]") alignment_selecting_grp.add_option("--mm-max-pid-diff", type='float', metavar="F", dest="mm_max_pid_diff", help="When deciding whether an alignment group " "multi-maps, only use alignments with percent " "identities at most F less than the maximum percent " "identify in the group; only relevant when using " "smart chooser [default: %default]") alignment_selecting_grp.add_option("--mm-max-pid-diff-gap", type='float', metavar="F", dest="mm_max_pid_diff_gap", help="When deciding whether the alignment group used to " "look for gap candidates multi-maps, only use " "alignments with percent identities at most F less " "than the maximum percent identify in the group; " "only relevant when using smart chooser [default: " "%default]") alignment_selecting_grp.add_option("--min-score", type='float', metavar="F", dest="min_score_fract", help="When paring alignment groups, only keep alignments " "with scores at least F of the maximum score " "in the group [default: %default]") alignment_selecting_grp.add_option("--max-pid-diff", type='float', metavar="F", dest="max_pid_diff", help="When paring alignment groups, only keep alignments " "with percent identities at most F less than the " "maximum percent identify in the group; only " "relevant when using smart chooser [default: " "%default]") alignment_selecting_grp.add_option("--prefer-exons", action="store_true", help="When paring alignment groups, if any alignments in " "the group overlap exons, discard all alignments " "in the group that do not overlap any exon") alignment_selecting_grp.add_option("--prefer-spliced", action="store_true", help="When paring alignment groups, if any alignments in " "the group are spliced, discard all unspliced " "alignments in the group; only relevant when using " "smart chooser") parser.add_option_group(alignment_selecting_grp) split_candidate_grp = OptionGroup(parser, "Split Candidate Options") split_candidate_grp.add_option("--ctg-rep", type='float', dest="min_ctg_represented", metavar="FRACTION", help="Only report split alignments when the total amount " "of the contig involved in the two alignments are at " "least FRACTION of the total contig length. " "[default: %default]") split_candidate_grp.add_option("--min-end-dup-fract", type='float', dest="min_end_dup_fract", metavar="F", help="When labeling alignment topologies, use the " "end-duplication label if the target regions " "overlap by at least F of the smaller target " "region [default: %default]") parser.add_option_group(split_candidate_grp) gap_realignment_grp = OptionGroup(parser, "Gap Realignment Options") gap_realignment_grp.add_option("--ctg-file", dest="ctg_seq_path", metavar="FILE", help="Read in contig sequences from FILE. Otherwise, the " "contig sequences file path is inferred from the " "alignments path. This option is ignored without " "the --use-gap option.") gap_realignment_grp.add_option("--gap-realigner", metavar="FILE", help="Use FILE as the program to realign gap sequence of " "gapped alignments") gap_realignment_grp.add_option("--gap-config", metavar="FILE", help="FILE is the configuration file for the gap " "realigner.") gap_realignment_grp.add_option("--gap-min-size", type='int', dest="min_gap_size", metavar="N", help="When processing alignments, only realign gaps of " "at least N bases [default: %default]") gap_realignment_grp.add_option("--gap-check-min-pid", type='float', dest="min_gap_check_pid", metavar="F", help="When processing alignments, only realign gaps " "when the initial alignment has a percent identity " "of at least F. [default: %default]") gap_realignment_grp.add_option("--gap-check-min-len", type='float', dest="min_gap_check_len_fract", metavar="F", help="When processing alignments, only realign gaps " "when the initial alignment involves at least F of " "the total contig length [default: %default]") gap_realignment_grp.add_option("--gap-check-min-score", type='float', dest="min_gap_check_score_fract", metavar="F", help="When processing alignments, only realign gaps " "when the initial alignment score is at least F " "of the total contig length [default: %default]") gap_realignment_grp.add_option("--gap-max-num-aligns", type="int", metavar="N", help="If a contig has more than N \"good\" alignments, " "do not attempt gap realignment for any of them. " "[default: %default]") gap_realignment_grp.add_option("--gap-min-identity", type='float', dest="min_gap_pid", metavar="PID", help="When realigning gaps, only report candidates when " "the gap sequence aligns back to the rest of the " "contig with a percent identity of at least PID " "[default: %default]") gap_realignment_grp.add_option("--gap-min-fraction", type='float', dest="min_gap_fract", metavar="F", help="When realigning gaps, only report candidates when " "at least F of the bases in the gap sequence align " "back to the rest of the contig [default: %default]") gap_realignment_grp.add_option("--gap-max-len", type="int", metavar="N", help="Only run the gap-realigner on contigs shorter " "than Nbp long. [default: %default]") gap_realignment_grp.add_option("--gap-debug", action="store_true", help="Use the debug option when calling the gap " "realignment tool.") parser.add_option_group(gap_realignment_grp) misc_grp = OptionGroup(parser, "Miscellaneous Options") misc_grp.add_option("--no-mito", action="store_true", dest="no_mito", help="Do not report results involving mitochondrial DNA. " "[default]") misc_grp.add_option("--allow-mito", action="store_false", dest="no_mito", help="Allow results involving mitochondrial DNA.") misc_grp.add_option("--output-psl", action="store_true", help="Output psl alignment lines for events found " "[default]") misc_grp.add_option("--no-output-psl", action="store_false", dest="output_psl", help="Do not output psl alignment lines for events found") misc_grp.add_option("--contig-set", help="Include the contig set name in the output file " "names (e.g. main, adj, etc.)") misc_grp.add_option("--disable-profiling-timer", action="store_true", dest="dpt", help="Sometimes this script can hang when trying to spawn " "child processes, due to the kernel's profiling " "timer. Use this option to disable the profiling " "timer if the script seems to be hanging.") misc_grp.add_option("-a", "--append", action="store_true", dest="append", help="Append to the output file, rather than " "overwriting it.") misc_grp.add_option("--log-file", dest="log_file_name", metavar="FILE", help="Log all messages in FILE") misc_grp.add_option("-d", "--debug", action="store_true", dest="debug", help="Print debug information while the program runs.") misc_grp.add_option("--extreme-debug", action="store_true", dest="extreme_debug", help="Print extremely in-depth debug information while " "the program runs. Not recommended for large jobs.") parser.add_option_group(misc_grp) parser.set_defaults(check_split=True, check_gap=True, num_aligns=500, min_identity=40.0, add_gene_annotation=True, transcript_selection_buffer=10, keep_coords_file=False, longest_single_align=0.999, min_merge_overlap=0.8, use_quick_chooser=False, maintain_pared_groups=True, mm_min_score_fract=0.8, mm_max_pid_diff=1.0, mm_max_pid_diff_gap=5.0, min_score_fract=0.85, max_pid_diff=0.5, prefer_exons=False, prefer_spliced=False, min_ctg_represented=0.85, min_end_dup_fract=0.80, min_gap_size=4, min_gap_check_pid=40.0, min_gap_check_len_fract=0.4, min_gap_check_score_fract=0.4, gap_max_num_aligns=3, min_gap_pid=0.95, min_gap_fract=0.3, gap_max_len=50000, gap_debug=False, no_mito=True, output_psl=True, dpt=False, append=False, debug=False, extreme_debug=False) return parser
def SetupOptionsParser(): #{ description_string = ("Create and submit parallel jobs for calculating " "hybrid read-support") args = [ "LIB", "BARNACLE_FILE", "PAIR_TO_GENOME_FILE", "READ_TO_CONTIG_FILE", ] usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) parser.add_option("--min-overlap", type="int", metavar="N", help="Require that reads overlap breakpoints by at " "least N bp. [default: %default]") parser.add_option("--records-per-job", type="int", metavar="N", help="Process N records in each job. [default=%default]") cluster_group = OptionGroup(parser, "Cluster Options") cluster_group.add_option("-c", "--cluster-head", dest="cluster_head", help="Cluster head node to submit to.") cluster_group.add_option("--mem", "--memory", help="The memory requirement of the jobs " "[default:\"%default\"].") cluster_group.add_option("--hostname", help="The hostname(s) to submit to " "[default:\"%default\"].") cluster_group.add_option("--queue", help="The queue(s) to submit to [default:\"%default\"].") cluster_group.add_option("-w", "--wall-time", metavar="H:MM:SS", help="The maximum time to spend on the job.") cluster_group.add_option("--email", help="E-mail status updates on submitted jobs to the " "given email address") parser.add_option_group(cluster_group) misc_group = OptionGroup(parser, "Miscellaneous Options") misc_group.add_option("--disable-profiling-timer", action="store_true", dest="dpt", help="Sometimes this script can hang when trying to spawn " "child processes, due to the kernel's profiling " "timer. Use this option to disable the profiling " "timer if the script seems to be hanging.") misc_group.add_option("-f", "--force", action="store_true", help="Force filtering to take place, even if the output " "directory already exists.") misc_group.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") misc_group.add_option("--extreme-debug", action="store_true", dest="extreme_debug", help="Print extremely in-depth debug information while " "the program runs. Not recommended for large jobs.") parser.add_option_group(misc_group) parser.set_defaults(min_overlap=5, records_per_job=1000, mem="3G", hostname="q*", queue="all.q", dpt=False, force=False, debug=False, extreme_debug=False) return parser
def SetupOptionsParser(): #{ description_string = ("Applies the given filters to the candidate contig " "groups that Barnacle identified.") args = [ "LIB", "BARNACLE_FILE", ] usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) parser.add_option("--read-to-contig-with-weak", type="int", dest="read_to_ctg", metavar="N", help="Fail groups with no contig with more than N " "supporting read-to-contig alignments, unless they " "pass the strong-only read-to-contig filter. [default: " "%default]") parser.add_option("--read-to-contig", type="int", dest="read_to_ctg_u", metavar="N", help="Fail groups with no contig with more than N " "strongly-supporting read-to-contig alignments, unless " "they pass the with-weak read-to-contig filter. " "[default: %default]") parser.add_option("--read-to-contig-weak", action="store_true", dest="req_u_read_to_ctg", help="Do not fail groups failing the unique " "read-to-contig filter, unless they also fail the " "total read-to-contig filter. [default]") parser.add_option("--read-to-contig-strong", action="store_true", dest="req_u_read_to_ctg", help="Fail groups failing the unique read-to-contig " "filter, even if they would pass the total " "read-to-contig filter.") parser.add_option("--pair-to-genome", type="int", dest="pair_to_gen", metavar="N", help="Fail groups with fewer than N supporting " "read-pair-to-genome alignments. [default: %default]") parser.add_option("--filtered-pair-to-genome", type="int", dest="pair_to_gen_f", metavar="N", help="Fail groups with fewer than N mapq-filtered " "supporting read-pair-to-genome alignments. " "[default: %default]") parser.add_option("--ignore-intronic", action="store_true", dest="ignore_intron", help="Do not count read-pairs mapping to inferred " "intronic locations when calculating " "read-pair-to-genome support. [default]") parser.add_option("--count-intronic", action="store_false", dest="ignore_intron", help="Count read-pairs mapping to inferred intronic " "locations when calculating read-pair-to-genome " "support.") parser.add_option("--no-struct-RNA", action="store_true", dest="no_rna", help="Fail groups overlapping small structural RNA " "regions. [default]") parser.add_option("--allow-struct-RNA", action="store_false", dest="no_rna", help="Do not fail groups overlapping small structural " "RNA regions.") parser.add_option("--no-mitochondria", action="store_true", dest="no_mito", help="Fail groups involving mitochondrial DNA. " "[default]") parser.add_option("--allow-mitochondria", action="store_false", dest="no_mito", help="Do not fail groups involving mitochondrial DNA.") parser.add_option("--max-num-groups", type="int", dest="max_num_groups", metavar="N", help="Fail groups with no contig appearing in fewer than " "N groups. Set this to zero to disable this filter. " "[default: %default]") parser.add_option("--allow-single-aligner", action="store_false", dest="reguire_multiple_aligns", help="Do not fail groups based on the number of aligners " "they were found with. [default]") parser.add_option("--require-multiple-aligners", action="store_true", dest="reguire_multiple_aligns", help="Fail groups that were not found with multiple " "aligners.") parser.add_option("--min-identity", type="float", dest="min_pid", metavar="F", help="Fail groups with no contig that has greater than " "F% percent identity for both alignments. [default: " "%default]") parser.add_option("--length-sensitive-pid", action="store_true", dest="adjust_pid", help="Adjust the minimum percent identity to use " "maximum number of mismatches for short alignment " "blocks. [default]") parser.add_option("--absolute-pid", action="store_false", dest="adjust_pid", help="Use the minimum percent identity provided without " "adjustment from maximum number of mismatches for " "short alignment blocks.") parser.add_option("--max-mismatches", type="int", metavar="N", help="If --length-sensitive-pid is used, use the " "minimum of the given minimum percent identity " "value and the calculated percent identity of an " "alignment with N mismatches. [default=%default]") parser.add_option("--no-homopolymers", action="store_true", dest="no_runs", help="Fail gapped events involving runs of a single " "base. [default]") parser.add_option("--allow-homopolymers", action="store_false", dest="no_runs", help="Do not fail gapped events involving runs of a " "single base.") parser.add_option("--soft-runs", action="store_true", dest="soft_runs", help="Allow a single divergent base when determining " "whether a sequence will be considered a homopolymer " "run. [default]") parser.add_option("--hard-runs", action="store_false", dest="soft_runs", help="Strictly require all bases to be identical for a " "sequence to be considered a homopolymer run.") parser.add_option("--max-ctg-overlap", type="int", dest="ctg_olap", metavar="N", help="Fail paired-alignment candidates when the " "contig coordinates of the alignments overlap by " "more than Nbp. It makes sense for this value to be " "approximately one read-length. [default: %default]") parser.add_option("--min-ctg-rep", type="float", dest="ctg_rep", metavar="F", help="Fail groups with no contig with the fraction of " "its length represented by the alignments being at " "least F. [default: %default]") parser.add_option("--no-multi-mapping", action="store_true", dest="filter_multi_maps", help="Fail groups with alignments that are flagged " "as multi-mapping. [default]") parser.add_option("--allow-multi-mapping", action="store_false", dest="filter_multi_maps", help="Do not fail groups with alignments that are " "flagged as multi-mapping.") parser.add_option("--no-repeats", action="store_true", dest="filter_repeats", help="Fail groups that are flagged as " "overlapping repeat sequence.") parser.add_option("--allow-repeats", action="store_false", dest="filter_repeats", help="Do not fail groups that are flagged as " "overlapping repeat sequence. [default]") parser.add_option("--no-polyA-events", action="store_true", dest="filter_polyA", help="Fail groups that are probably polyA " "tails: single-base runs at the very " "beginning or end of a contig. [default]") parser.add_option("--allow-polyA-events", action="store_false", dest="filter_polyA", help="Do not fail groups that are probably " "polyA tails: single-base runs at the very " "beginning or end of a contig.") parser.add_option("--ignore-topologies", metavar="TOPOLOGIES", help="Ignore groups classified as any of the topologies " "in the comma-separated list TOPOLOGIES.") parser.add_option("--sort-by-r2cu", action="store_true", help="Output the groups sorted by their unique " "read-to-contig coverage in descending order.") parser.add_option("-p", "--pretty", action="store_true", help="Print output in a more readable, but less easily " "searched/parsed format.") parser.add_option("--data-check", action="store_true", help="Perform some simple sanity checks on the groups to " "ensure that the upstream code is not having any " "obvious problems. [default]") parser.add_option("--no-data-check", action="store_false", dest="data_check", help="Presume that the input file is well-formatted.") parser.add_option("--no-split-out", action="store_false", dest="split_out", help="Do not output groups of different topologies to " "separate output files (create only pass and fail " "output files). [default]") parser.add_option("--split-out", action="store_true", dest="split_out", help="Output groups with different alignment topologies " "to separate output files.") parser.add_option("--print-fails", action="store_true", dest="print_fails", help="Write failing groups to a failed groups file. " "[default]") parser.add_option("--no-print-fails", action="store_false", dest="print_fails", help="Do not write failing groups to a failed groups " "file.") parser.add_option("--filter-gene-names", dest="gene_names_path", metavar="FILE", help="Use only gene names found in FILE (file must have " "one gene name per line).") parser.add_option("--recalculate-ctg-rep", action="store_true", help="Recalculate the contig representation fraction " "for gap events. [default]") parser.add_option("--no-recalculate-ctg-rep", action="store_true", help="Always use the reported contig representation " "fraction.") parser.add_option("--robust-crc", action="store_true", dest="robust_crc", help="If a reasonable contig representation cannot be " "calculated for a gap event, just fail that " "candidate and display a warning rather than " "raising an error. [default]") parser.add_option("--brittle-crc", action="store_false", dest="robust_crc", help="If a reasonable contig representation cannot be " "calculated for a gap event, raise an error.") parser.add_option("--member-wise", action="store_true", dest="member_wise", help="At least one member of a group must pass all " "filters for the whole group to pass. Only output " "group members that pass all filters. [default]") parser.add_option("--group-wise", action="store_false", dest="member_wise", help="If any member of a group passes a filter, then the " "whole group passes that filter.") misc_group = OptionGroup(parser, "Miscellaneous Options") misc_group.add_option("-f", "--force", action="store_true", help="Force filtering to take place, even if the output " "directory already exists.") misc_group.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") misc_group.add_option("--extreme-debug", action="store_true", dest="extreme_debug", help="Print extremely in-depth debug information while " "the program runs. Not recommended for large jobs.") parser.add_option_group(misc_group) parser.set_defaults(read_to_ctg=100, read_to_ctg_u=5, req_u_read_to_ctg=False, pair_to_gen=0, pair_to_gen_f=0, ignore_intron=True, no_rna=True, no_mito=True, max_num_groups=3, reguire_multiple_aligns=False, min_pid=99.0, adjust_pid=True, max_mismatches=1, no_runs=True, soft_runs=True, ctg_olap=75, ctg_rep=0.90, filter_multi_maps=True, filter_repeats=False, filter_polyA=True, ignore_topologies="gap-nontandem-inverted_duplication,gap-tandem-inverted_duplication,local-inversion", sort_by_r2cu=False, pretty=False, check_data=True, split_out=False, print_fails=True, recalculate_ctg_rep=True, robust_crc=True, member_wise=True, force=False, debug=False, extreme_debug=False) return parser
def SetupOptionsParser(): #{ description_string = ("Checks status of read-to-contig support " "cluster jobs.") args = [ "JOBS_DIR", ] usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) resubmit_group = OptionGroup(parser, "Resubmission Options") resubmit_group.add_option("-r", "--resubmit", action="store_true", help="Resubmit all incomplete jobs.") resubmit_group.add_option("-c", "--cluster-head", dest="cluster_head", help="Specify cluster head node if resubmitting jobs to " "cluster") resubmit_group.add_option("--mem", "--memory", help="The memory requirement of the resubmitted jobs " "[default:\"%default\"].") resubmit_group.add_option("--hostname", help="The hostname(s) to resubmit to " "[default: read from barnacle.cfg].") resubmit_group.add_option("--queue", help="The queue(s) to resubmit to [default: read " "from barnacle.cfg].") resubmit_group.add_option("-w", "--wall-time", metavar="H:MM:SS", help="When resubmitting, use this value as the " "wall-time option.") resubmit_group.add_option("--email", help="E-mail status updates on resubmitted jobs to the " "given email address") parser.add_option_group(resubmit_group) misc_group = OptionGroup(parser, "Miscellaneous Options") misc_group.add_option("--disable-profiling-timer", action="store_true", dest="dpt", help="Sometimes this script can hang when trying to spawn " "child processes, due to the kernel's profiling " "timer. Use this option to disable the profiling " "timer if the script seems to be hanging.") misc_group.add_option("--log-file", dest="log_file_name", metavar="FILE", help="Log all messages in FILE") misc_group.add_option("-t", "--terse", action="store_true", help="Only write number complete and total number of jobs") misc_group.add_option("-q", "--quiet", action="store_true", help="Only write output to log-file, not to the screen.") misc_group.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") misc_group.add_option("--extreme-debug", action="store_true", dest="extreme_debug", help="Print extremely in-depth debug information while " "the program runs. Not recommended for large jobs.") parser.add_option_group(misc_group) parser.set_defaults(resubmit=False, mem="5G", dpt=False, terse=False, quiet=False, debug=False, extreme_debug=False) return parser
def SetupOptionsParser(): #{ description_string = "Produce sequences for simulated chimeric transcripts" args = [ "GENE_ANNOTATIONS_FILE", "GENOME_SEQUENCE_FILE", "OUTPUT_DIR", ] usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) parser.add_option("--skip-single-exon-genes", action="store_true", dest="skip_single", help="Do not use genes with only a single exon when " "simulating events. [default]") parser.add_option("--allow-single-exon-genes", action="store_false", dest="skip_single", help="Allow the use of genes with only a single exon " "when simulating events.") parser.add_option("--chr-filter", help="Only simulate events involving chromosomes in the provided " "comma-delimited list.") parser.add_option("--min-len", type="int", metavar="N", help="Ensure that each simulated transcript is at least Nbp long. " "[default: %default]") fusion_grp = OptionGroup(parser, "Fusion Options") fusion_grp.add_option("--num-fusions", type="int", metavar="N", help="Simulate N fusion transcripts. " "[default: %default]") fusion_grp.add_option("--fract-inter", type="float", metavar="F", help="The fraction of fusions to be interchromosomal " "(between genes on distinct chromosomes). " "[default: %default]") fusion_grp.add_option("--fract-cont", type="float", metavar="F", help="The fraction of fusions to maintain continuous " "transcription direction of both genes. " "[default: %default]") fusion_grp.add_option("--fract-exon", type="float", metavar="F", help="The fraction of fusions to have their " "breakpoints exactly at exon boundaries. " "[default: %default]") fusion_grp.add_option("--min-len-fus", type="int", metavar="N", help="When the breakpoint is not being constrained " "to exon edges, it must be at least Nbp away " "from the edge of the transcript. " "[default: %default]") parser.add_option_group(fusion_grp) ptd_grp = OptionGroup(parser, "Partial Tandem Duplication Options") ptd_grp.add_option("--num-ptds", type="int", metavar="N", help="Simulate N partial tandem duplication transcripts. " "[default: %default]") ptd_grp.add_option("--fract-multi", type="float", metavar="F", help="The fraction of PTDs to involve a duplication " "of multiple exons, rather than just one. " "[default: %default]") ptd_grp.add_option("--fract-extra-ptd", type="float", metavar="F", help="The fraction of PTDs to involve extra sequence " "between the copies of the duplicated sequence. " "[default: %default]") ptd_grp.add_option("--min-extra-ptd", type="int", metavar="N", help="When extra sequence is inserted in PTDs, it is at " "least Nbp long. [default: %default]") ptd_grp.add_option("--max-extra-ptd", type="int", metavar="N", help="When extra sequence is inserted in PTDs, it is at " "most Nbp long. [default: %default]") parser.add_option_group(ptd_grp) itd_grp = OptionGroup(parser, "Internal Tandem Duplication Options") itd_grp.add_option("--num-itds", type="int", metavar="N", help="Simulate N internal tandem duplication " "transcripts. [default: %default]") itd_grp.add_option("--min-len-itd", type="int", metavar="N", help="For ITDs, the duplicated sequence must be at least " "Nbp long. [default: %default]") itd_grp.add_option("--max-len-itd", type="int", metavar="N", help="For ITDs, the duplicated sequence must be at most " "Nbp long. [default: %default]") itd_grp.add_option("--fract-extra-itd", type="float", metavar="F", help="The fraction of ITDs to involve extra sequence " "between the copies of the duplicated sequence. " "[default: %default]") itd_grp.add_option("--min-extra-itd", type="int", metavar="N", help="When extra sequence is inserted in ITDs, it is at " "least Nbp long. [default: %default]") itd_grp.add_option("--max-extra-itd", type="int", metavar="N", help="When extra sequence is inserted in ITDs, it is at " "most Nbp long. [default: %default]") parser.add_option_group(itd_grp) misc_group = OptionGroup(parser, "Miscellaneous Options") misc_group.add_option("--seed", type="float", help="The seed to use to initialize the random number generator. If no " "value is specified, current system time is used.") misc_group.add_option("-f", "--force", action="store_true", help="Force filtering to take place, even if the output " "directory already exists.") misc_group.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") parser.add_option_group(misc_group) parser.set_defaults(skip_single=True, min_len=200, num_fusions=0, fract_inter=.5, fract_cont=.75, fract_exon=.9, min_len_fus=15, num_ptds=0, fract_multi=.75, fract_extra_ptd=.05, min_extra_ptd=1, max_extra_ptd=5, num_itds=0, min_len_itd=5, max_len_itd=100, fract_extra_itd=.5, min_extra_itd=1, max_extra_itd=20, force=False, debug=False) return parser
def SetupOptionsParser(): #{ description_string = ("Predict events of specific types") args = [ "LIB", "BARNACLE_FILE", ] usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) fusion_grp = OptionGroup(parser, "Fusion Prediction Options") fusion_grp.add_option("--predict-fusions", action="store_true", dest="predict_fusions", help="Predict fusion events. [default]") fusion_grp.add_option("--no-fusions", action="store_false", dest="predict_fusions", help="Do not predict fusion events") fusion_grp.add_option("--with-gene-directions", action="store_true", dest="use_gene_directions", help="Only predict a fusion event if the transcription " "direction of the genes involved is continuous " "across the contig. [default]") fusion_grp.add_option("--without-gene-directions", action="store_false", dest="use_gene_directions", help="Do not consider transcription direction of genes " "involved.") fusion_grp.add_option("--transcript-annotations", metavar="PATH", help="PATH is the path to a file containing gene " "transcript annotations. Used for gene directions.") #"and ITD realignment.") fusion_grp.add_option("--use-conflicts", action="store_true", help="When a gene has conflicting directions in the " "annotations file, believe that the gene really " "does go both ways. [default]") fusion_grp.add_option("--no-use-conflicts", action="store_false", dest="use_conflicts", help="Do not consider genes going in both directions. " "Raise an error if the annotations file contains " "conflicting directions.") fusion_grp.add_option("--ignore-conflicts", action="store_true", help="When a gene has conflicting directions in the " "annotations file, just ignore that gene, rather " "than halting the script.") fusion_grp.add_option("--include-introns", action="store_true", help="Predict fusions that overlap introns even if they " "do not overlap any coding regions.") fusion_grp.add_option("--include-nearby", action="store_true", help="Predict fusions that do not overlap genes if they " "are close to genes.") fusion_grp.add_option("--fusion-dup-filter", action="store_true", help="Do not predict a fusion when the contig could be " "explained by a duplication event instead. " "[default]") fusion_grp.add_option("--no-fusion-dup-filter", action="store_false", dest="fusion_dup_filter", help="Do not check whether fusion contigs could be " "explained by duplication events instead.") fusion_grp.add_option("--min-unique-align", type="int", metavar="N", help="Require that at least Nbp of the contig aligns to " "only one of the fusion partners. [default: %default]") fusion_grp.add_option("--min-exon-bounds", type="int", metavar="N", help="Require that N (0, 1, or 2) of the breakpoints " "match up with annotated exon boundaries. [default: " "%default]") fusion_grp.add_option("--read-through", type="int", metavar="N", help="Consider colinear split alignments closer than Nbp " "to be read-through events. [default: %default]") parser.add_option_group(fusion_grp) #parser.add_option("--filter-gene-names", # dest="gene_names_path", metavar="FILE", # help="Use only gene names found in FILE (should be " # "tab-separated file, with gene name in column 13).") ptd_group = OptionGroup(parser, "PTD Prediction Options") ptd_group.add_option("--predict-PTDs", action="store_true", dest="predict_ptds", help="Predict PTD events. [default]") ptd_group.add_option("--no-PTDs", action="store_false", dest="predict_ptds", help="Do not predict PTD events") parser.add_option_group(ptd_group) itd_group = OptionGroup(parser, "ITD Prediction Options") itd_group.add_option("--predict-ITDs", action="store_true", dest="predict_itds", help="Predict ITD events. [default]") itd_group.add_option("--no-ITDs", action="store_false", dest="predict_itds", help="Do not predict ITD events") itd_group.add_option("--allow-ITD-repeats", action="store_false", dest="filter_itd_repeats", help="Allow ITD event prediction for candidate " "contigs that overlap annotated repeats.") itd_group.add_option("--remove-ITD-repeats", action="store_true", dest="filter_itd_repeats", help="Do not predict an ITD event if the candidate " "contig overlaps any annotated repeats. [default]") itd_group.add_option("--require-internal-gaps", action="store_true", dest="require_internal_gaps", help="Require that the duplicated sequence is internal " "to the contig for gapped ITD " "event predictions.") itd_group.add_option("--allow-edge-gaps", action="store_false", dest="require_internal_gaps", help="Allow the duplicated sequence to be at the very " "edge of the contig for gapped partial exon " "duplication predictions. [default]") itd_group.add_option("--min-edge-gap-fraction", type="float", metavar="F", help="If the gap is at the very edge of a contig, " "require that the fraction of the gap involved " "in the duplication is at least F. This option " "is ignored if the --require-internal-gaps option " "is used. [default: %default]") itd_group.add_option("--allow-non-gap-ITDs", action="store_true", dest="allow_non_gap_itds", help="Look for ITDs in groups with junction duplication, " "end duplication, and non-colinear topologies " "as well as gap topologies.") itd_group.add_option("--only-gap-ITDs", action="store_false", dest="allow_non_gap_itds", help="Only look for ITDs in groups with gap " "topologies [default].") itd_group.add_option("--exclude-non-coding", action="store_true", help="Do not predict ITD events in non-coding genes. " "[default]") itd_group.add_option("--allow-non-coding", action="store_false", dest="exclude_non_coding", help="Report ITD events predicted in non-coding genes.") parser.add_option_group(itd_group) parser.add_option("--get-breakpoint-exons", metavar="FILE", dest="breakpoint_exons", help="Use the exon coordinates in FILE to get the exon " "overlapped by the breakpoint in each event (used " "for ITDs).") parser.add_option("--event-buffer", type="int", metavar="N", help="For split events, check whether the breakpoint " "coordinates are within N bases of an exon. " "[default: %default]") parser.add_option("--use-existing-overlaps", action="store_true", help="If a breakpoint/transcripts overlap file already " "exists, just use it rather than generating a new " "one.") parser.add_option("--transcript-sequences", metavar="PATH", dest="tran_seq_path", help="The path to a file containing transcript sequences " "to realign candidate contigs against, for avoiding " "false positives due to problems with contig to " "genome alignments.") parser.add_option("--contig-sequences", metavar="PATH", dest="ctg_seq_path", help="The path to a file containing candidate contig " "sequences to realign to transcript sequences, for " "avoiding false positives due to problems with " "contig to genome alignments.") parser.add_option("--use-existing-realigns", action="store_true", help="If contig-to-transcript realignment files already " "exist, just use them rather than generating new " "ones.") misc_group = OptionGroup(parser, "Miscellaneous Options") misc_group.add_option("--read-length", type="int", metavar="N", help="The length of the sequenced reads (used for ITD full duplicate " "alignment filter). [default: %default]") misc_group.add_option("-p", "--pretty", action="store_true", help="Print more readable output file as well as " "standard data file.") misc_group.add_option("--disable-profiling-timer", action="store_true", dest="dpt", help="Sometimes this script can hang when trying to spawn " "child processes, due to the kernel's profiling " "timer. Use this option to disable the profiling " "timer if the script seems to be hanging.") misc_group.add_option("-f", "--force", action="store_true", help="Force prediction to take place, even if the output " "directory already exists.") misc_group.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") misc_group.add_option("--extreme-debug", action="store_true", dest="extreme_debug", help="Print extremely in-depth debug information while " "the program runs. Not recommended for large jobs.") parser.add_option_group(misc_group) parser.set_defaults(#list_input=True, predict_fusions=True, use_gene_directions=True, use_conflicts=True, ignore_conflicts=False, include_introns=False, include_nearby=False, fusion_dup_filter=True, min_unique_align=5, min_exon_bounds=0, read_through=50000, predict_ptds=True, predict_itds=True, filter_itd_repeats=True, require_internal_gaps=False, min_edge_gap_fraction=0.80, allow_non_gap_itds=False, exclude_non_coding=True, event_buffer=5, use_existing_overlaps=False, use_existing_realigns=False, read_length=75, pretty=False, dpt=False, force=False, debug=False, extreme_debug=False) return parser
def SetupOptionsParser(): #{ description_string = ("Checks the status of Barnacle support jobs submitted " "to the cluster.") args = [ "LIB_LIST_FILE", "DIR_HEAD", ] usage_string = ("%prog " + " ".join(args) + " [ OPTIONS ]\n" "LIB_LIST_FILE is a text file containing a list of library info lines, " "each line of the form \"lib_name,assembly_ver,barnacle_ver\" (e.g. " "\"A00001,assembler-1.2.1,1.1.0\").\nDIR_HEAD is the directory containing " "the subdirectories of the libraries in LIB_LIST_FILE.") parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) parser.add_option("-c", "--check-cid", action="store_true", dest="check_cid", help="Check the status of the candidate identification " "jobs as well as the support job.") parser.add_option("-p", "--check-p2g", action="store_true", dest="check_p2g", help="Check the status of the pair-to-genome support " "jobs as well as the support job.") parser.add_option("-r", "--check-r2c", action="store_true", dest="check_r2c", help="Check the status of the read-to-contig support " "jobs as well as the support job.") parser.add_option("-t", "--template", metavar="TEMPLATE", dest="lib_path_template", help="The template to use to construct the path to the " "event data files for a library. \"DIR_HEAD\", " "\"%{lib}\", \"%{assembly_ver}\", and " "\"%{barnacle_ver}\" will be replaced with the " "appropriate values from LIB_LIST_FILE. " "[default: %default]") parser.add_option("--disable-profiling-timer", action="store_true", dest="dpt", help="Sometimes this script can hang when trying to spawn " "child processes, due to the kernel's profiling " "timer. Use this option to disable the profiling " "timer if the script seems to be hanging.") parser.add_option("--log-file", dest="log_file_name", metavar="FILE", help="Log all messages in FILE") parser.add_option("--terse", action="store_true", dest="terse", help="When checking candidate identification, " "pair-to-genome, or read-to-contig status, only " "report number complete and total number of jobs.") parser.add_option("-q", "--quiet", action="store_true", help="Only write output to log-file, not to the screen.") parser.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") parser.set_defaults(check_cid=False, check_p2g=False, check_r2c=False, lib_path_template= os.path.join(["DIR_HEAD", "%{lib}", "Assembly", "%{assembly_ver}", "barnacle", "%{barnacle_ver}"]), dpt=False, terse=False, quiet=False, debug=False) return parser
def SetupOptionsParser(): # { description_string = "Use dwgsim to generate simulated reads for wildtype " "and event sequences." # description_string = ("Associates each coverage value with a sequence " # "and converts the coverage to an absolute number of reads for that " # "sequence, then simulates reads from each sequence using the dwgsim " # "tool.") args = ["WT_ANNOT", "WT_SEQ", "WT_COV", "EVENT_SEQ", "EVENT_COV"] usage_string = "%prog " + " ".join(args) + " [ OPTIONS ]" parser = OptionParser(description=description_string, version="%prog " + VERSION, usage=usage_string) parser.num_args = len(args) # parser.add_option("-A", "--cov-adjust", # type="float", metavar="F", # help="Add F to each coverage value read in from the coverages file " # "(to ensure minimum coverage value) [default: %default]") parser.add_option( "-R", "--read-length", type="int", metavar="N", help="Simulate reads Nnt long. [default: %default]" ) parser.add_option( "-F", "--frag-length", type="int", metavar="N", help="Simulate read-pairs from fragments with a mean length of Nnt. " "[default: %default]", ) parser.add_option( "--min-overlap", type="int", metavar="N", help="Ensure that for each event sequence, there is at least one read " "generated that overlaps the event split position by Nnt. " "[default: %default]", ) parser.add_option( "--chr-filter", help="Only simulate reads from genes on chromosomes in the provided " "comma-delimited list." ) dwgsim_group = OptionGroup(parser, "dwgsim Options") dwgsim_group.add_option( "--err-rate", type="float", metavar="F", help="The per-base rate of sequencing errors. [default: %default]" ) # help="The per-base rate of sequencing errors, defaults to dwgsim " # "default if unused.") dwgsim_group.add_option("--mut-rate", type="float", metavar="F", help="The rate of mutations. [default: %default]") dwgsim_group.add_option( "--std-dev", type="float", metavar="F", help="The standard deviation of the fragment length, defaults to dwgsim " "default if unused.", ) dwgsim_group.add_option( "--extra-dwgsim", help="A string containing any extra options to be used in the call to " "dwgsim." ) parser.add_option_group(dwgsim_group) misc_group = OptionGroup(parser, "Miscellaneous Options") misc_group.add_option( "--seed", type="float", help="The seed to use to initialize the random number generator. If no " "value is specified, current system time is used.", ) misc_group.add_option( "--disable-profiling-timer", action="store_true", dest="dpt", help="Sometimes this script can hang when trying to spawn " "child processes, due to the kernel's profiling " "timer. Use this option to disable the profiling " "timer if the script seems to be hanging.", ) misc_group.add_option( "-f", "--force", action="store_true", help="Force filtering to take place, even if the output " "directory already exists.", ) misc_group.add_option("-d", "--debug", action="store_true", help="Print debug information while the program runs.") misc_group.add_option( "--extreme-debug", action="store_true", dest="extreme_debug", help="Print extremely in-depth debug information while " "the program runs. Not recommended for large jobs.", ) parser.add_option_group(misc_group) parser.set_defaults( # cov_adjust=0.6, read_length=75, frag_length=200, min_overlap=5, err_rate=0.0037, mut_rate=0, dpt=False, force=False, debug=False, extreme_debug=False, ) return parser