示例#1
0
文件: stats.py 项目: pajanne/rococo
def main():
    usage = "usage: %prog [Options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-f", "--file", metavar="FILE", help="Fasta FILE to analyse", action="store", type="string", dest="file")
    
    (options, args) = parser.parse_args()

    if not (options.file):
        parser.print_help()
        sys.exit()
        
    infoseq(options.file)
    infoseq_file = open("%s.infoseq" % options.file, 'r').readlines()
    total_nb_residues = 0
    number_of_sequences = 0
    stat_list = []
    for line in infoseq_file:
        line = line.strip()
        number_of_sequences = number_of_sequences + 1
        total_nb_residues = total_nb_residues + int(line)
        stat_list.append(int(line))
    average_length = total_nb_residues / number_of_sequences 
    stat_list.sort()
    smallest = stat_list[0]
    largest = stat_list[-1]

    stats_file = open("%s.stats" % options.file, 'w')

    # tab delimited output
    stats_file.write("#seq\t#bases\tsmallest\tlargest\tavg\tN50_size\tN50_#seq\n")
    stats_file.write("%s\t%s\t%s\t%s\t%s\t" % (number_of_sequences, total_nb_residues, smallest, largest, average_length))
    
    # N50
    stat_list.reverse()
    n50_sum = 0
    n50_size = 0
    n50_number_of_sequences = 0
    for x in stat_list:
        n50_sum = n50_sum + x
        n50_number_of_sequences = n50_number_of_sequences + 1
        n50_size = x
        if (n50_sum > (total_nb_residues / 2)):
            stats_file.write("%s\t%s\n" % (n50_size, n50_number_of_sequences))
            break

    # clean tmp file
    util.rmFile("%s.infoseq" % options.file)

    log.info("Results in %s.stats" % options.file)
示例#2
0
def doRun():
    usage = "usage: %prog [Options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-o", metavar="NAME", help="organism common name", action="store", type="string", dest="name")
    parser.add_option(
        "-i",
        metavar="FILE",
        help="input organism sequence file in FASTA format",
        action="store",
        type="string",
        dest="input",
    )
    (options, args) = parser.parse_args()

    try:
        common_name = options.name
        input_file = checkValidInput(options.input, common_name)
        output_file = "%s.prodigal" % common_name

        # Print info
        log.info("Running prodigal on %s\n" % common_name)
        log.info("Getting sequence from %s\n" % input_file)

        # Run prodigal
        softname = "prodigal"
        util.checkSoft(softname)
        cmd = "%s < %s > %s" % (softname, input_file, output_file)
        util.runProcess(cmd)

        # Run the conversion only if successful
        if os.path.exists(output_file):
            # Convert output results into a feature table EMBL file.
            tab_file = convertToTab(output_file, common_name)

            # Tidy up
            util.rmFile(common_name + ".fna")
            util.rmFile(output_file)

            log.info("%s is the final feature table Prodigal predictions\n" % tab_file)
        else:
            log.info("%s file does not exists\n" % output_file)
    except Exception, e:
        log.error(e)
        raise e
示例#3
0
文件: glimmer.py 项目: pajanne/rococo
def doRun():
    usage = "usage: %prog [Options]"
    parser = OptionParser(usage=usage)
    parser.add_option("-o", metavar="NAME", help="organism common name", action="store", type="string", dest="name")
    parser.add_option("-i", metavar="FILE", help="input organism sequence file in FASTA format", action="store", type="string", dest="input")
    (options, args) = parser.parse_args()

    try:
        common_name = options.name
        input_file = checkValidInput(options.input, common_name)
    
        # Print info
        log.info("Running Glimmer3 on %s\n" % common_name)
        log.info("Getting sequence from %s\n" % input_file)
    
        # Run glimmer3 iterated
        script = "/software/pathogen/external/applications/glimmer/glimmer/scripts/g3-iterated.csh"
        util.checkFile(script)
        cmd = "%s %s %s" % (script, input_file, common_name)
        util.runProcess(cmd)
    
        # Run the conversion only if g3 successful 
        g3_predict_file = "%s.predict" % common_name
        if os.path.exists(g3_predict_file):
            # Convert output results into a feature table EMBL file.
            g3_tab = convertToTab(g3_predict_file, common_name)
        
            # Tidy up
            util.rmFile(common_name + ".longorfs")
            util.rmFile(common_name + ".train")
            util.rmFile(common_name + ".icm")
            util.rmFile(common_name + ".run1.detail")
            util.rmFile(common_name + ".run1.predict")
            util.rmFile(common_name + ".coords")
            util.rmFile(common_name + ".upstream")
            util.rmFile(common_name + ".motif")
            util.rmFile(common_name + ".detail")
            util.rmFile(g3_predict_file)
    
            log.info("%s is the final feature table Glimmer3 predictions\n" % g3_tab)
        else:
            log.info("%s file does not exists\n" % g3_predict_file)
    except Exception, e:
        log.error(e)
        raise e