'''
Generate a list of all pairwise comparisons of the exact matches
'''

import sys
import re
from phage import Phage
phage = Phage()

try:
    f = sys.argv[1]
except:
    sys.exit("Exact match file, probably phage.kmers.bacteria.rc.txt")

bg = phage.completeBacteriaIDs()
pg = phage.phageIDs()

matches = {}
for p in pg:
    matches[p] = {}
    for b in bg:
        matches[p][b] = 0

with open(f, 'r') as fin:
    for l in fin:
        p = l.strip().split("\t")
        m = re.findall('NC_\d+', l)
        if len(m) != 2:
            #sys.stderr.write("Error parsing two NC ids from " + l)
            continue
'''


import sys
from phage import Phage
phage=Phage()

try:
    f=sys.argv[1]
except:
    sys.exit(sys.argv[0] + " <blast output file converted to NC/NC format. Probably phage.genomes.blastx")

count={}

lens=phage.phageSequenceLengths()
bctG = set(phage.completeBacteriaIDs())
phgG = set(phage.phageIDs())

for p in phgG:
    count[p]={}

sys.stderr.write("Reading " + f + "\n")
with open(f, 'r') as bin:
    for l in bin:
        p=l.strip().split("\t")
        if p[0] not in phgG:
            continue
        if p[1] not in bctG:
            continue

        if p[1] not in count[p[0]]:
示例#3
0
import sys
from phage import Phage
phage = Phage()

try:
    f = sys.argv[1]
except:
    sys.exit(
        sys.argv[0] +
        " <blast output file converted to NC/NC format. Probably phage.genomes.blastx"
    )

count = {}

lens = phage.phageSequenceLengths()
bctG = set(phage.completeBacteriaIDs())
phgG = set(phage.phageIDs())

for p in phgG:
    count[p] = {}

sys.stderr.write("Reading " + f + "\n")
with open(f, 'r') as bin:
    for l in bin:
        p = l.strip().split("\t")
        if p[0] not in phgG:
            continue
        if p[1] not in bctG:
            continue

        if p[1] not in count[p[0]]:
Generate a list of all pairwise comparisons of the exact matches
'''


import sys
import re
from phage import Phage
phage = Phage()

try:
    f = sys.argv[1]
except:
    sys.exit("Exact match file, probably phage.kmers.bacteria.rc.txt")


bg = phage.completeBacteriaIDs()
pg = phage.phageIDs()


matches={}
for p in pg:
    matches[p]={}
    for b in bg:
        matches[p][b] = 0

with open(f, 'r') as fin:
    for l in fin:
        p=l.strip().split("\t")
        m=re.findall('NC_\d+', l)
        if len(m) != 2:
            #sys.stderr.write("Error parsing two NC ids from " + l)