Exemplo n.º 1
0
#pVOGdb.addPvogs_old(pVOGlines,LOG_H)
pVOGdb.addPvogs(PVOG_LIB_DIR, LOG_H)
if CHATTY:
    print("pVOGs have been recorded")
LOG_H.write("%s%s%s\n" % ("There are ", len(pVOGdb.pVOGlist), " pVOGs"))
accessionCount = pVOGdb.getAccessionCount()
LOG_H.write("%s%s\n" % ("The total number of accessions is ", accessionCount))

# Visit each pVOG and find the fasta sequence that corresponds to each member accession
# Modify the header of each identified fasta to reflect its membership in the pVOG cluster

if CHATTY:
    print("Searching sequences for each pVOG-associated accession")

# Create a fasta object (to be replicated as needed)
nextFasta = dbPrep_fastaSequence.fasta()

# For each pVOG, get its associated peptide accessions, then
# Find that fasta in the ncbi database subset, and
# Tag the fasta header with the pVOG information
accnCount = 0
for pVOG in pVOGdb.pVOGlist:
    foundCount = 0
    missingCount = 0
    missingList = []
    for accession in pVOG.accessionList:  # For each accession (approx 200k of them, members of pVOG groups)
        accnCount += 1
        if CHATTY:
            print("Processing pVOG", pVOG.pVOGid, "and accession", accession)
        LOG_H.write(
            "%s%s%s%s\n" %
Exemplo n.º 2
0
import timeit
import datetime
import dbPrep_fastaSequence

#DEBUG = True
DEBUG = False

VERBOSE = False
if "dbPrep_VERBOSE" in os.environ.keys():
    if os.environ["dbPrep_VERBOSE"] == 'True':
        VERBOSE = True

DO_GENE = True
DO_PROTEIN = True

fastaObj = dbPrep_fastaSequence.fasta()


class VOG(object):
    def __init__(self):
        self.VOGid = ''  # e.g., VOG0334
        self.VOGannotation = ''
        self.accnList = []  # list of accession numbers
        self.geneCount = 0
        self.peptideCount = 0  # ??? are these different?

    def printAll(self):
        print("VOG identifier:", self.VOGid)
        print("VOG annotation:", self.VOGannotation)
        print("accnList:", self.accnList)
        print("geneCount:", self.geneCount)