def analyze_assembly_rna(self, query=None, name=None): self.print_start_of_process_pid() if query: if not name: name = self.get_output_header() rnammer_output = os.path.join( self.get_work_dir(), self.__concatenate_file_name(name, 'rnammer.out')) rdp_output = os.path.join( self.get_work_dir(), self.__concatenate_file_name(name, 'rdp.out')) table_output = os.path.join(self.get_table_dir(), name) cmd_list = [ 'run_rna_analysis.py', '-c', '-f', rnammer_output, '-r', rdp_output, query ] rc = RunCommand(cmd_list) self.print_and_run_command(rc) cmd_list = [ 'analyze_rna_hits.py', '-c', '-r', rdp_output, '-o', table_output, rnammer_output ] rc = RunCommand(cmd_list) self.print_and_run_command(rc) self.print_end_of_process_pid()
def main(): # set up command cmd = None task = options.task if options.make_blastdb: type = 'nucl' if options.is_protein: type = 'prot' cmd_list = [constant.MAKEBLASTDB, '-in', args[0], '-dbtype', type] rc = RunCommand(cmd_list) print "Running command: " + rc.get_command() + '\n' rc.run_command() if options.vec_screen: cmd = NcbiblastnCommandline(query=args[1], db=args[0], evalue=700, outfmt=options.outfmt, reward=1, penalty=-5, gapopen=3, gapextend=3, dust='yes', searchsp=1750000000000, out=options.output, task=task, num_threads=options.threads) elif options.ncbi_screen: # Note: new NCBI requirements say to use lcase_masking (See GAAG-510 for documentation) cmd = NcbiblastnCommandline(query=args[1], db=args[0], outfmt=options.outfmt, dust='yes', perc_identity=90, lcase_masking='true', task='megablast', out=options.output, num_threads=options.threads) #Blast documentation says soft_masking option if not specified defaults to true so even though #this biopython class does not support it the feature should be enabled by default elif options.mito_screen: # Note: new NCBI requirements say to use lcase_masking (See GAAG-510 for documentation) cmd = NcbiblastnCommandline(query=args[1], db=args[0], outfmt=options.outfmt, dust='yes', perc_identity=98.6, lcase_masking='true', task='blastn', out=options.output, num_threads=options.threads) #Blast documentation says soft_masking option if not specified defaults to true so even though #this biopython class does not support it the feature should be enabled by default elif options.rRNA_screen: cmd = NcbiblastnCommandline(query=args[1], db=args[0], outfmt=options.outfmt, dust='yes', perc_identity=95, lcase_masking='', task='megablast', out=options.output, num_threads=options.threads, evalue=1e-9, window_size=120, gapextend=2, gapopen=4, no_greedy='', penalty=-4, reward=3, word_size=12, xdrop_gap=20) #Options not supported: in_pssm='',soft_masking='true', matrix=5000000, max_intron_length=18, db_gencode=3, else: if options.task in TASKS: program = TASKS[options.task] if re.search("Ncbiblastn", str(program)): cmd = program(query=args[1], db=args[0], evalue=options.evalue, outfmt=options.outfmt, out=options.output, num_threads=options.threads, max_target_seqs=options.max_targets, task=task) else: cmd = program(query=args[1], db=args[0], evalue=options.evalue, outfmt=options.outfmt, out=options.output, num_threads=options.threads, max_target_seqs=options.max_targets) else: print "Unrecognized blast task, " + options.task sys.exit(-1) print "Running BLAST command: " + str(cmd) + '\n' out, err = cmd() # print "out: out\terr: err" return 0
def main(): output_header = options.output if not output_header: output_header = __make_prefix_from_files(args[0]) rc = RunCommand(__build_insert_size_command(args[0],output_header,options.insert_size,options.std_dev)) out = rc.run_command() return 0
def main(): delta = args[0] # get information about our inputs reference, query = _get_query_and_reference_from_delta_file(delta) query_seqs = _get_query_sequences(query) reference_list, reference_lengths = _get_reference_details(reference) # get alignment information command = _get_show_tilings_command(options.output, options.id, options.coverage, options.circular, delta) reference_tilings = _get_tilings_information(command) # see if we need circular query information circular_queries = {} if options.circular: command = _get_circular_show_tilings_command(options.output, delta) circular_queries = _get_circular_alignments(command) print "Ordering and orienting using", delta print "Reference", reference print "Query", query # get ono information from our gathered data ono_sequences_list, ono_details_list = _parse_tilings( reference_list, reference_lengths, reference_tilings, query_seqs, circular_queries) # print out our output _write_details_to_file(ono_details_list, options.output + ".ono.details.txt") interim_fasta = options.output + ".interim.fasta" SeqIO.write(ono_sequences_list, interim_fasta, "fasta") make_assembly_command = [ "make_standard_assembly_files.py", "-S", interim_fasta, "-o", options.output + ".ono" ] if options.rename: make_assembly_command += ['-r'] rc = RunCommand(make_assembly_command) print "Executing", rc.get_command() rc.run_command() return 0
def generate_bam_plots(self, bam_files=[], ref=None, name=None, ref_header='reference', window_size=1000): self.print_start_of_process_pid() if bam_files and ref: if not name: name = self.get_output_header() analysis_output_header = self.__concatenate_file_name( name, ref_header) data_dump_file = os.path.join( self.get_work_dir(), self.__concatenate_file_name(analysis_output_header, 'gc_cvg.details.txt')) plot_output_header = os.path.join(self.get_chart_dir(), analysis_output_header) histo_plot = self.__concatenate_file_name(plot_output_header, 'gc_cvg') cmd_list = [ "generate_bam_plots.py", "-g", ref, "-d", data_dump_file, "-w", str(window_size), "-o", plot_output_header, "-hi", histo_plot ] + bam_files rc = RunCommand(cmd_list) self.print_and_run_command(rc) self.print_end_of_process_pid()
def align_reads(self, unmapped_bam=None, ref=None, threads=None, aligner='BWA', ref_header='reference', make_index=True, align_type='-s'): self.print_start_of_process_pid() if unmapped_bam and ref: if not threads: threads = self.get_num_threads() output_header = re.sub("unmapped.bam", ref_header, unmapped_bam) cmd_list = [ 'align_reads.py', '-i', unmapped_bam, '-o', output_header, '-r', ref, '-a', aligner, align_type, '-t', str(threads), '-T', self.get_work_dir() ] if not make_index: cmd_list += ['-x'] rc = RunCommand(cmd_list) self.print_and_run_command(rc) self.print_end_of_process_pid() return self.__concatenate_file_name(output_header, 'bam')
def revert_to_bam(self, files=[], output_base='reads', direction='fr'): self.print_start_of_process_pid() if files: output_bam = os.path.join( self.get_work_dir(), self.__concatenate_file_name(output_base, 'unmapped.bam')) for i in files: if self.__is_bam(i) and not self.__is_aligned_bam(i): os.symlink(i, output_bam) self.print_end_of_process_pid() return output_bam cmd_list = ['read_format_converter.py', '-o', output_bam] if direction: cmd_list += ['-d', direction] cmd_list += files rc = RunCommand(cmd_list) self.print_and_run_command(rc) self.print_end_of_process_pid() if not os.path.exists(output_bam): return None return output_bam
def run_nucmer(self, query=None, ref=None, name=None, extension='ref_vs_assembly'): self.print_start_of_process_pid() if query and ref: if not name: name = self.get_output_header() prefix = os.path.join( self.get_work_dir(), self.__concatenate_file_name(name, extension)) cmd_list = [ 'run_nucmer.py', '--mummerplot', '-p', prefix, ref, query ] rc = RunCommand(cmd_list) self.print_and_run_command(rc) self.link_files(self.get_chart_dir(), [self.__concatenate_file_name(prefix, 'png')]) self.print_end_of_process_pid() return self.__concatenate_file_name(prefix, 'coords')
def make_detailed_table(self, name=None, aligned_bam_dict=None, contigs=None, agp=None, taxonomy_output=None): self.print_start_of_process_pid() # aligned_bam_dict=["type" = # ["group1 = [ # [file1, file2, filen] # # ] # ] # ] if contigs and agp: if not name: name = self.get_output_header() out_header = os.path.join(self.get_table_dir(),name) cmd_list = ['make_detailed_assembly_table.py','-c',out_header,'-s',out_header,'-a',agp] if taxonomy_output: cmd_list += ['-t',taxonomy_output] if aligned_bam_dict: for type in aligned_bam_dict.keys(): arg = "--" + str(type) + "_bam" bam_file_string = '' for group in aligned_bam_dict[type].keys(): bam_file_string += aligned_bam_dict[type][group]['file'] + ',' cmd_list += [arg,bam_file_string[:-1]] cmd_list += [contigs] rc = RunCommand(cmd_list) self.print_and_run_command(rc) self.print_end_of_process_pid() return self.__concatenate_file_name(out_header, 'contig_detail.table.txt')
def run_blast(self, query=None, db=constant.BLAST_NT, threads=None, extension='blast.xml', name=None, blast_task='megablast'): self.print_start_of_process_pid() if query: if not threads: threads = self.get_num_threads() if not name: name = self.get_output_header() blast_output = self.__concatenate_file_name( os.path.join(self.get_work_dir(), name), extension) rc = RunCommand([ 'run_blast.py', '-o', blast_output, '-b', blast_task, '-t', str(threads), db, query ]) self.print_and_run_command(rc) self.print_end_of_process_pid() return blast_output
def standardize_file_inputs(self, scaffolds=None, contigs=None, agp=None, minScaffSize=1, minConSize=1, minGapSize=10, rename=True): # We will make the assembly files from the inputs to ensure minimum NCBI gap size is enforce #if contigs and agp and scaffolds: # return self.copy_files(self.get_work_dir(),[scaffolds,contigs,agp]) self.print_start_of_process_pid() if not contigs and not scaffolds: print "Must give a fasta file for make_standard_assembly_files." sys.exit(-1) out_header = os.path.join(self.get_work_dir(),self.get_output_header()) cmd_list = ['make_standard_assembly_files.py','-o',out_header,'-s', str(minScaffSize),'-c', str(minConSize), '-g', str(minGapSize)] if rename: cmd_list += ['--rename'] if contigs: cmd_list += ['-C', contigs] if agp: cmd_list += ['-A', agp] if scaffolds: cmd_list += ['-S', scaffolds] rc = RunCommand(cmd_list) self.print_and_run_command(rc) self.print_end_of_process_pid() return self.__concatenate_file_name(out_header,"scaffolds.fasta"),\ self.__concatenate_file_name(out_header, "contigs.fasta"), \ self.__concatenate_file_name(out_header, "agp")
def get_simple_bam_stats(self, bam_files=[], name=None, ref_header='reference'): self.print_start_of_process_pid() if bam_files: if not name: name = self.get_output_header() cmd_list = ['get_simple_bam_stats.py','-o', os.path.join(self.get_table_dir(),self.__concatenate_file_name(name, ref_header))] + bam_files rc = RunCommand(cmd_list) self.print_and_run_command(rc) self.print_end_of_process_pid()
def parse_blast_xml(self, blast_xml=None): self.print_start_of_process_pid() if blast_xml: parse_output = re.sub("xml", "parsed.txt", blast_xml) rc = RunCommand( ['parse_blast_xml.py', '-o', parse_output, blast_xml]) self.print_and_run_command(rc) self.print_end_of_process_pid() return parse_output
def main(): if __check_inputs(options.classify, options.rdp_out): rnammer_cmd = __build_rnammer_cmd(args[0], options.rnammer_out, options.gene, options.superkingdom) rc = RunCommand(rnammer_cmd) print "RUNNING: " + rc.get_command() rc.run_command() if options.classify: rdp_cmd = __build_rdp_cmd(options.rnammer_out, options.rdp_out) rc = RunCommand(rdp_cmd) print "RUNNING: " + rc.get_command() rc.run_command() return 0 else: print "If classifying hits, must supply classify flag and rdp output file." sys.exit(-1)
def _get_circular_show_tilings_command(output_header, delta): """ Build up the circular show-tilings command from output header and the nucmer delta file. """ arg_list = [ '-R', '-a', '-v 5', '-g -1', '-V 0', '-u', output_header + '.unplaced', '-c' ] return RunCommand(_build_showtiling_command(delta, arg_list))
def get_kmer_copy_number(self, fasta=None, name=None, kmer_size=29): self.print_start_of_process_pid() if fasta: if not name: name = self.get_output_header() cmd_list = ['kmer_copy_number.py', '-k', str(kmer_size), '-o', os.path.join(self.get_table_dir(), name), fasta] rc = RunCommand(cmd_list) self.print_and_run_command(rc) self.print_end_of_process_pid()
def run_kmer_coverage(self, ref=None, query=None, name=None, kmer_size=29): self.print_start_of_process_pid() if ref and query: if not name: name = self.get_output_header() cmd_list = ['kmer_coverage.py', '-k', str(kmer_size), '-o', os.path.join(self.get_table_dir(), name), ref, query] rc = RunCommand(cmd_list) self.print_and_run_command(rc) self.print_end_of_process_pid()
def plot_insert_size(self, insert_size_files=[], direction='fr', ref_header='reference', output_base='reads'): self.print_start_of_process_pid() if insert_size_files: type_output_header = os.path.join(self.get_table_dir(),self.__concatenate_file_name(output_base, ref_header)) plot_output_header = os.path.join(self.get_chart_dir(),self.__concatenate_file_name(output_base, ref_header)) cmd_list = ['plot_insert_size.py', '-o', plot_output_header,'-m', type_output_header, '-d', direction] + insert_size_files rc = RunCommand(cmd_list) self.print_and_run_command(rc) self.print_end_of_process_pid()
def run_scaffold_accuracy(self, ref=None, query=None, name=None): self.print_start_of_process_pid() if ref and query: if not name: name = self.get_output_header() cmd_list = ['run_scaffold_accuracy.py', '-o', os.path.join(self.get_work_dir(), name), '-t', os.path.join(self.get_table_dir(), name), ref, query] rc = RunCommand(cmd_list) self.print_and_run_command(rc) self.print_end_of_process_pid()
def get_bam_coverage_stats(self, bam_files=[], name=None, ref_header='reference', want_phys_cvg=False): self.print_start_of_process_pid() if bam_files: if not name: name = self.get_output_header() cmd_list = ['get_bam_coverage_stats.py','-o', os.path.join(self.get_table_dir(),self.__concatenate_file_name(name, ref_header))] if want_phys_cvg: cmd_list += ['-p'] cmd_list += bam_files rc = RunCommand(cmd_list) self.print_and_run_command(rc) self.print_end_of_process_pid()
def compare_to_reference(self, coords_file=None, name=None): self.print_start_of_process_pid() if coords_file: if not name: name = self.get_output_header() table = os.path.join(self.get_table_dir(), name) cmd_list = ['compare_to_reference.py', '-o',table, '-n', '-c', coords_file] rc = RunCommand(cmd_list) self.print_and_run_command(rc) self.print_end_of_process_pid()
def identify_coverage_anomalies(self, bam_files=[], name=None, ref_header='reference', window_size=1000): self.print_start_of_process_pid() if bam_files and ref_header: if not name: name = self.get_output_header() analysis_output_header = self.__concatenate_file_name(name, ref_header) coverage_anomalies = os.path.join(self.get_chart_dir(), self.__concatenate_file_name(analysis_output_header, "coverage_anomalies")) cmd_list = ['identify_coverage_anomalies.py','--window_size', str(window_size)] + bam_files + [coverage_anomalies] rc = RunCommand(cmd_list) self.print_and_run_command(rc) self.print_end_of_process_pid()
def parse_blast_xml(self, blast_xml=None, no_blast_filter=False ): self.print_start_of_process_pid() if blast_xml: parse_output = re.sub("xml", "parsed.txt", blast_xml) cmd = ['parse_blast_xml.py', '-o', parse_output] if no_blast_filter: cmd += ['--no_filter'] rc = RunCommand(cmd + [blast_xml]) self.print_and_run_command(rc) self.print_end_of_process_pid() return parse_output
def _get_show_tilings_command(output_header, id, coverage, circular, delta): """ Build up the basic show-tilings command from output header, an identity cutoff, a coverage cutoff, whether or not we want circular alignments, and the nucmer delta file. """ arg_list = [ '-R', '-u', output_header + '.unplaced', "-i", id, "-v", coverage ] if circular: arg_list += ["-c"] return RunCommand(_build_showtiling_command(delta, arg_list))
def analyze_gap_ends(self, name=None, contigs=None, agp=None, extension='analyze_gap_ends'): self.print_start_of_process_pid() if contigs and agp: if not name: name = self.get_output_header() output = os.path.join(self.get_table_dir(), name) chart = self.__concatenate_file_name(os.path.join(self.get_chart_dir(), name), extension) cmd_list = ['analyze_gap_ends.py', '-c', chart, '-t', output, contigs, agp] rc = RunCommand(cmd_list) self.print_and_run_command(rc) self.print_end_of_process_pid()
def blast_bubbles(self, name=None, taxonomy_output=None, contig_detail=None): self.print_start_of_process_pid() if taxonomy_output and contig_detail: if not name: name = self.get_output_header() blast_bubble_output = os.path.join(self.get_chart_dir(),name) blast_bubble_detail = os.path.join(self.get_work_dir(),name) cmd_list = ['blast_bubbles.py', '-v', blast_bubble_detail,'-o', blast_bubble_output, contig_detail, taxonomy_output] rc = RunCommand(cmd_list) self.print_and_run_command(rc) self.print_end_of_process_pid()
def get_blast_hit_taxonomy(self, parsed_blast=None, query=None, nodes=constant.BLAST_NODES, names=constant.BLAST_NAMES, name=None): self.print_start_of_process_pid() if parsed_blast and query: if not name: name = self.get_output_header() tax_output = os.path.join(self.get_table_dir(), name) taxonomy_heatmap = re.sub("txt", "heatmap", parsed_blast) if nodes and names: rc = RunCommand(['get_blast_hit_taxonomy.py', '-o', tax_output,'-m', taxonomy_heatmap, parsed_blast, query]) self.print_and_run_command(rc) self.print_end_of_process_pid() return taxonomy_heatmap
def blast_map(self, taxonomy_heatmap=None, agp=None, name=None): self.print_start_of_process_pid() if taxonomy_heatmap: if not name: name = self.get_output_header() blast_map_output = os.path.join(self.get_chart_dir(), name) cmd = ['blast_map.py', '-o', blast_map_output] if agp: cmd += ['-g', agp] cmd += [taxonomy_heatmap] rc = RunCommand(cmd) self.print_and_run_command(rc) self.print_end_of_process_pid()
def run_insert_size(self, bam_file=None, insert_size=None, std_dev=None): self.print_start_of_process_pid() if bam_file: output_header = re.sub(".bam", "", bam_file) cmd_list = ['run_insert_size_from_bam.py','-o',output_header] if insert_size: cmd_list += ['-i', str(insert_size)] if std_dev: cmd_list += ['-s', str(std_dev)] cmd_list += [bam_file] rc = RunCommand(cmd_list) self.print_and_run_command(rc) self.print_end_of_process_pid() return self.__concatenate_file_name(output_header, 'insert_size.metrics')
def get_basic_assembly_stats(self, name=None, contigs=None, agp=None, assembler='assembler', extension='cumulative_sizes', extra_args = []): self.print_start_of_process_pid() if not name: name = self.get_output_header() output = os.path.join(self.get_table_dir(), name) chart = self.__concatenate_file_name(os.path.join(self.get_chart_dir(), name), extension) cmd_list = ['basic_assembly_stats.py','-n',name,'-a',assembler,'-o', output,'-C','-S','-t',chart] + extra_args if contigs and agp: cmd_list += ['-f', agp, contigs] elif not agp: cmd_list += [contigs] else: print "No file given for assembly stats." return rc = RunCommand(cmd_list) self.print_and_run_command(rc) self.print_end_of_process_pid()