def main(): usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option("-f", "--file", metavar="FILE", help="Fasta FILE to analyse", action="store", type="string", dest="file") (options, args) = parser.parse_args() if not (options.file): parser.print_help() sys.exit() infoseq(options.file) infoseq_file = open("%s.infoseq" % options.file, 'r').readlines() total_nb_residues = 0 number_of_sequences = 0 stat_list = [] for line in infoseq_file: line = line.strip() number_of_sequences = number_of_sequences + 1 total_nb_residues = total_nb_residues + int(line) stat_list.append(int(line)) average_length = total_nb_residues / number_of_sequences stat_list.sort() smallest = stat_list[0] largest = stat_list[-1] stats_file = open("%s.stats" % options.file, 'w') # tab delimited output stats_file.write("#seq\t#bases\tsmallest\tlargest\tavg\tN50_size\tN50_#seq\n") stats_file.write("%s\t%s\t%s\t%s\t%s\t" % (number_of_sequences, total_nb_residues, smallest, largest, average_length)) # N50 stat_list.reverse() n50_sum = 0 n50_size = 0 n50_number_of_sequences = 0 for x in stat_list: n50_sum = n50_sum + x n50_number_of_sequences = n50_number_of_sequences + 1 n50_size = x if (n50_sum > (total_nb_residues / 2)): stats_file.write("%s\t%s\n" % (n50_size, n50_number_of_sequences)) break # clean tmp file util.rmFile("%s.infoseq" % options.file) log.info("Results in %s.stats" % options.file)
def doRun(): usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option("-o", metavar="NAME", help="organism common name", action="store", type="string", dest="name") parser.add_option( "-i", metavar="FILE", help="input organism sequence file in FASTA format", action="store", type="string", dest="input", ) (options, args) = parser.parse_args() try: common_name = options.name input_file = checkValidInput(options.input, common_name) output_file = "%s.prodigal" % common_name # Print info log.info("Running prodigal on %s\n" % common_name) log.info("Getting sequence from %s\n" % input_file) # Run prodigal softname = "prodigal" util.checkSoft(softname) cmd = "%s < %s > %s" % (softname, input_file, output_file) util.runProcess(cmd) # Run the conversion only if successful if os.path.exists(output_file): # Convert output results into a feature table EMBL file. tab_file = convertToTab(output_file, common_name) # Tidy up util.rmFile(common_name + ".fna") util.rmFile(output_file) log.info("%s is the final feature table Prodigal predictions\n" % tab_file) else: log.info("%s file does not exists\n" % output_file) except Exception, e: log.error(e) raise e
def doRun(): usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option("-o", metavar="NAME", help="organism common name", action="store", type="string", dest="name") parser.add_option("-i", metavar="FILE", help="input organism sequence file in FASTA format", action="store", type="string", dest="input") (options, args) = parser.parse_args() try: common_name = options.name input_file = checkValidInput(options.input, common_name) # Print info log.info("Running Glimmer3 on %s\n" % common_name) log.info("Getting sequence from %s\n" % input_file) # Run glimmer3 iterated script = "/software/pathogen/external/applications/glimmer/glimmer/scripts/g3-iterated.csh" util.checkFile(script) cmd = "%s %s %s" % (script, input_file, common_name) util.runProcess(cmd) # Run the conversion only if g3 successful g3_predict_file = "%s.predict" % common_name if os.path.exists(g3_predict_file): # Convert output results into a feature table EMBL file. g3_tab = convertToTab(g3_predict_file, common_name) # Tidy up util.rmFile(common_name + ".longorfs") util.rmFile(common_name + ".train") util.rmFile(common_name + ".icm") util.rmFile(common_name + ".run1.detail") util.rmFile(common_name + ".run1.predict") util.rmFile(common_name + ".coords") util.rmFile(common_name + ".upstream") util.rmFile(common_name + ".motif") util.rmFile(common_name + ".detail") util.rmFile(g3_predict_file) log.info("%s is the final feature table Glimmer3 predictions\n" % g3_tab) else: log.info("%s file does not exists\n" % g3_predict_file) except Exception, e: log.error(e) raise e