Пример #1
0
              88770 name: Panarthropoda    blast name: Panarthropoda   rank: no rank   parent: 1206794
              1206794 name: Ecdysozoa    blast name: Ecdysozoa   rank: no rank   parent: 33317
              33317 name: Protostomia    blast name: Protostomia   rank: no rank   parent: 33213
              33213 name: Bilateria    blast name: Bilateria   rank: no rank   parent: 6072
              6072 name: Eumetazoa    blast name: Eumetazoa   rank: no rank   parent: 33208
              33208 name: Metazoa    blast name: animals   rank: kingdom   parent: 33154
              33154 name: Opisthokonta    blast name: Opisthokonta   rank: no rank   parent: 2759
              2759 name: Eukaryota    blast name: eukaryotes   rank: superkingdom   parent: 131567
              """)
        sys.exit(0)
    else:
        ids=sys.argv[1:]

    taxa = taxon.read_nodes()
    names,blastname = taxon.read_names()
    divs = taxon.read_divisions()
    for i in ids:
        c=0
        if i not in taxa:
            sys.stderr.write("Error: ID " + str(i) + " was not found in the taxonomy filei\n")
            sys.exit(0)
        while taxa[i].parent != '1' and i != '1':
            #print " " * c, taxa[i].taxid, "name:", names[i].name, "rank:", taxa[i].rank, "div:", taxa[i].division, "code:", divs[taxa[i].division].code
            bn=names[i].name
            if i in blastname:
                bn=blastname[i].name
            print("{} name: {}    blast name: {}   rank: {}   parent: {}".format(taxa[i].taxid, names[i].name, bn, taxa[i].rank, taxa[i].parent))
            i=taxa[i].parent
            c+=1

import argparse
import taxon

if __name__ == '__main__':
    parser = argparse.ArgumentParser(description="Append taxonomy to the patric metadata file. This adds it at column 67")
    parser.add_argument('-f', help='patric metadata file', required=True)
    parser.add_argument('-o', help='output file', required=True)
    parser.add_argument('-t', help='taxonomy directory (default=/home2/db/taxonomy/current/)',
                        default='/home2/db/taxonomy/current/')
    parser.add_argument('-v', help='verbose output', action="store_true")
    args = parser.parse_args()

    sys.stderr.write("Reading taxonomy\n")
    taxa = taxon.read_nodes(directory=args.t)
    names, blastname = taxon.read_names(directory=args.t)
    divs = taxon.read_divisions(directory=args.t)

    sys.stderr.write("Read taxonomy\n")
    want = ['superkingdom', 'phylum', 'class', 'order', 'family', 'genus', 'species']

    with open(args.o, 'w', encoding='utf-8') as out: 
        with open(args.f, 'r', encoding='utf-8') as f:
            for l in f:
                p = l.strip().split("\t")
                while (len(p) <= 68):
                    p.append("")

                if l.startswith("genome_id"):
                    out.write("{}\t{}\n".format(l.strip(), "\t".join(want)))
                    continue
Пример #3
0
              88770 name: Panarthropoda    blast name: Panarthropoda   rank: no rank   parent: 1206794
              1206794 name: Ecdysozoa    blast name: Ecdysozoa   rank: no rank   parent: 33317
              33317 name: Protostomia    blast name: Protostomia   rank: no rank   parent: 33213
              33213 name: Bilateria    blast name: Bilateria   rank: no rank   parent: 6072
              6072 name: Eumetazoa    blast name: Eumetazoa   rank: no rank   parent: 33208
              33208 name: Metazoa    blast name: animals   rank: kingdom   parent: 33154
              33154 name: Opisthokonta    blast name: Opisthokonta   rank: no rank   parent: 2759
              2759 name: Eukaryota    blast name: eukaryotes   rank: superkingdom   parent: 131567
              """)
        sys.exit(0)
    else:
        ids = sys.argv[1:]

    taxa = taxon.read_nodes()
    names, blastname = taxon.read_names()
    divs = taxon.read_divisions()
    for i in ids:
        c = 0
        if i not in taxa:
            sys.stderr.write("Error: ID " + str(i) +
                             " was not found in the taxonomy filei\n")
            sys.exit(0)
        while taxa[i].parent != '1' and i != '1':
            #print " " * c, taxa[i].taxid, "name:", names[i].name, "rank:", taxa[i].rank, "div:", taxa[i].division, "code:", divs[taxa[i].division].code
            bn = names[i].name
            if i in blastname:
                bn = blastname[i].name
            print(
                "{} name: {}    blast name: {}   rank: {}   parent: {}".format(
                    taxa[i].taxid, names[i].name, bn, taxa[i].rank,
                    taxa[i].parent))