def infoseq(file): """ Run EMBOSS infoseq to Display basic information about sequences """ util.checkFile(file) util.checkSoft("infoseq") cmd = "infoseq -only -length -noheading %s -outfile %s.infoseq" % (file, file) util.runProcess(cmd)
def union(file, common_name, locus_tag, organism_name, strain): """ Merge scaffolds into one sequence file Run EMBOSS union if more than on '>' is found """ util.checkFile(file) cmd = "grep '>' %s | wc -l" % file result = util.runProcess(cmd) if int(result) > 1: new_file = "%s.fsa" % common_name util.checkSoft("union") util.checkSoft("descseq") name = "%s [organism=%s] [strain=%s] [gcode=11]" % (locus_tag, organism_name, strain) cmd_union = "union -sequence %s -stdout Yes -auto Yes | descseq -filter Yes -name '%s' -auto Yes > %s" % (file, name, new_file) util.runProcess(cmd_union) return new_file else: return file
def checkValidInput(input_file, common_name): """ Check if the input fasta sequence file is of correct format. RAST re-arrange the scaffolds if a splitted sequences is submitted Run EMBOSS union before if more than on '>' is found """ util.checkFile(input_file) cmd = "grep '>' %s | wc -l" % input_file result = util.runProcess(cmd) if int(result) > 1: new_input_file = "%s.fna" % common_name util.checkSoft("union") util.checkSoft("descseq") cmd_union = "union -sequence %s -stdout Yes -auto Yes | descseq -filter Yes -name '%s' -auto Yes > %s" % (input_file, common_name, new_input_file) util.runProcess(cmd_union) return new_input_file else: return input_file
def doRun(): usage = "usage: %prog [Options]" parser = OptionParser(usage=usage) parser.add_option("-o", metavar="NAME", help="organism common name", action="store", type="string", dest="name") parser.add_option( "-i", metavar="FILE", help="input organism sequence file in FASTA format", action="store", type="string", dest="input", ) (options, args) = parser.parse_args() try: common_name = options.name input_file = checkValidInput(options.input, common_name) output_file = "%s.prodigal" % common_name # Print info log.info("Running prodigal on %s\n" % common_name) log.info("Getting sequence from %s\n" % input_file) # Run prodigal softname = "prodigal" util.checkSoft(softname) cmd = "%s < %s > %s" % (softname, input_file, output_file) util.runProcess(cmd) # Run the conversion only if successful if os.path.exists(output_file): # Convert output results into a feature table EMBL file. tab_file = convertToTab(output_file, common_name) # Tidy up util.rmFile(common_name + ".fna") util.rmFile(output_file) log.info("%s is the final feature table Prodigal predictions\n" % tab_file) else: log.info("%s file does not exists\n" % output_file) except Exception, e: log.error(e) raise e
def checkValidInput(input_file, common_name): """ Check if the input fasta sequence file is of correct format. Segmentation fault while running glimmer on splitted sequences with a fasta file Run EMBOSS union before if more than on '>' is found """ try: util.checkFile(input_file) cmd = "grep '>' %s | wc -l" % input_file result = util.runProcess(cmd) if int(result) > 1: new_input_file = "%s.fna" % common_name util.checkSoft("union") util.checkSoft("descseq") cmd_union = "union -sequence %s -stdout Yes -auto Yes | descseq -filter Yes -name '%s' -auto Yes > %s" % (input_file, common_name, new_input_file) util.runProcess(cmd_union) return new_input_file else: return input_file except util.UtilException, ue: raise ue