Exemplo n.º 1
0
def annotate_hmm_matches(hits_file, hits_annot_file, args):
    hits_annot_header = map(
        str.strip, '''#query_name, hit, level, evalue,
                         sum_score, query_length, hmmfrom, hmmto, seqfrom, seqto, query_coverage,
                         members_in_og, og_description, og_COG_categories'''.
        split(','))

    annota.connect()
    print colorify("Functional annotation of hits starts now", 'green')
    start_time = time.time()
    if pexists(hits_file):
        OUT = open(hits_annot_file, "w")
        if not args.no_file_comments:
            print >> OUT, get_call_info()
            print >> OUT, '\t'.join(hits_annot_header)
        qn = 0
        t1 = time.time()
        for line in open(hits_file):
            if not line.strip() or line.startswith('#'):
                continue
            qn += 1
            if qn and (qn % 10000 == 0):
                total_time = time.time() - start_time
                print >>sys.stderr, qn, total_time, "%0.2f q/s (refinement)" %\
                    ((float(qn) / total_time))
                sys.stderr.flush()

            (query, hit, evalue, sum_score, query_length, hmmfrom, hmmto,
             seqfrom, seqto, q_coverage) = map(str.strip, line.split('\t'))
            if hit not in ['ERROR', '-']:
                hitname = cleanup_og_name(hit)
                level, nm, desc, cats = annota.get_og_annotations(hitname)
                print >> OUT, '\t'.join(
                    map(str, [
                        query, hitname, level, evalue, sum_score, query_length,
                        hmmfrom, hmmto, seqfrom, seqto, q_coverage, nm, desc,
                        cats
                    ]))
            else:
                print >> OUT, '\t'.join([query] + [hit] *
                                        (len(hits_annot_header) - 1))
        elapsed_time = time.time() - t1
        if not args.no_file_comments:
            print >> OUT, '# %d queries scanned' % (qn)
            print >> OUT, '# Total time (seconds):', elapsed_time
            print >> OUT, '# Rate:', "%0.2f q/s" % ((float(qn) / elapsed_time))
        OUT.close()
        print colorify(" Processed queries:%s total_time:%s rate:%s" %\
                       (qn, elapsed_time, "%0.2f q/s" % ((float(qn) / elapsed_time))), 'lblue')
Exemplo n.º 2
0
def annotate_hmm_matches(hits_file, hits_annot_file, args):
    hits_annot_header = map(str.strip, '''#query_name, hit, level, evalue,
                         sum_score, query_length, hmmfrom, hmmto, seqfrom, seqto, query_coverage,
                         members_in_og, og_description, og_COG_categories'''.split(','))

    annota.connect()
    print colorify("Functional annotation of hits starts now", 'green')
    start_time = time.time()
    if pexists(hits_file):
        OUT = open(hits_annot_file, "w")
        if not args.no_file_comments:
            print >>OUT, get_call_info()
            print >>OUT, '\t'.join(hits_annot_header)
        qn = 0
        t1 = time.time()
        for line in open(hits_file):
            if not line.strip() or line.startswith('#'):
                continue
            if qn and (qn % 10000 == 0):
                total_time = time.time() - start_time
                print >>sys.stderr, qn+1, total_time, "%0.2f q/s (refinement)" %\
                    ((float(qn + 1) / total_time))
                sys.stderr.flush()
            qn += 1
            (query, hit, evalue, sum_score, query_length, hmmfrom, hmmto,
             seqfrom, seqto, q_coverage) = map(str.strip, line.split('\t'))
            if hit not in ['ERROR', '-']:
                hitname = cleanup_og_name(hit)
                level, nm, desc, cats = annota.get_og_annotations(hitname)
                print >>OUT, '\t'.join(map( str, [query, hitname, level, evalue,
                                                  sum_score, query_length,
                                                  hmmfrom, hmmto, seqfrom,
                                                  seqto, q_coverage, nm, desc,
                                                  cats]))
            else:
                print >>OUT, '\t'.join(
                    [query] + [hit] * (len(hits_annot_header) - 1))
        elapsed_time = time.time() - t1
        if not args.no_file_comments:
            print >>OUT, '# %d queries scanned' % (qn + 1)
            print >>OUT, '# Total time (seconds):', elapsed_time
            print >>OUT, '# Rate:', "%0.2f q/s" % ((float(qn + 1) / elapsed_time))
        OUT.close()
        print colorify(" Processed queries:%s total_time:%s rate:%s" %\
                       (qn+1, elapsed_time, "%0.2f q/s" % ((float(qn+1) / elapsed_time))), 'lblue')