'''
Generate a list of all pairwise comparisons of the exact matches
'''

import sys
import re
from phage import Phage
phage = Phage()

try:
    f = sys.argv[1]
except:
    sys.exit("Exact match file, probably phage.kmers.bacteria.rc.txt")

bg = phage.completeBacteriaIDs()
pg = phage.phageIDs()

matches = {}
for p in pg:
    matches[p] = {}
    for b in bg:
        matches[p][b] = 0

with open(f, 'r') as fin:
    for l in fin:
        p = l.strip().split("\t")
        m = re.findall('NC_\d+', l)
        if len(m) != 2:
            #sys.stderr.write("Error parsing two NC ids from " + l)
            continue
示例#2
0
from phage import Phage
phage = Phage()

try:
    blastf = sys.argv[1]
except:
    sys.exit(sys.argv[0] + "< blast file>")

# read the fasta file of phages to get the lengths
lens = phage.phageSequenceLengths()
sys.stderr.write("Found " + str(len(lens)) + " sequences\n")

# get the phage and bacteria so we can limit our data appropriately

bacteriaG = set(phage.completeBacteriaIDs())
phageG = set(phage.phageIDs())

hits = {}
for p in phageG:
    hits[p] = {}

with open(blastf, 'r') as fin:
    for l in fin:
        p = l.strip().split("\t")
        e = float(p[10])
        if e > 0.001:
            continue
        m = re.findall('(NC_\d+)', p[0])
        if m == []:
            sys.stderr.write("WARNING: No phage found in " + p[0] + "\n")
            continue
示例#3
0
from phage import Phage
phage = Phage()

try:
    f = sys.argv[1]
except:
    sys.exit(
        sys.argv[0] +
        " <blast output file converted to NC/NC format. Probably phage.genomes.blastx"
    )

count = {}

lens = phage.phageSequenceLengths()
bctG = set(phage.completeBacteriaIDs())
phgG = set(phage.phageIDs())

for p in phgG:
    count[p] = {}

sys.stderr.write("Reading " + f + "\n")
with open(f, 'r') as bin:
    for l in bin:
        p = l.strip().split("\t")
        if p[0] not in phgG:
            continue
        if p[1] not in bctG:
            continue

        if p[1] not in count[p[0]]:
            count[p[0]][p[1]] = []

import sys
from phage import Phage
phage=Phage()

try:
    f=sys.argv[1]
except:
    sys.exit(sys.argv[0] + " <blast output file converted to NC/NC format. Probably phage.genomes.blastx")

count={}

lens=phage.phageSequenceLengths()
bctG = set(phage.completeBacteriaIDs())
phgG = set(phage.phageIDs())

for p in phgG:
    count[p]={}

sys.stderr.write("Reading " + f + "\n")
with open(f, 'r') as bin:
    for l in bin:
        p=l.strip().split("\t")
        if p[0] not in phgG:
            continue
        if p[1] not in bctG:
            continue

        if p[1] not in count[p[0]]:
            count[p[0]][p[1]]=[]
'''


import sys
import re
from phage import Phage
phage = Phage()

try:
    f = sys.argv[1]
except:
    sys.exit("Exact match file, probably phage.kmers.bacteria.rc.txt")


bg = phage.completeBacteriaIDs()
pg = phage.phageIDs()


matches={}
for p in pg:
    matches[p]={}
    for b in bg:
        matches[p][b] = 0

with open(f, 'r') as fin:
    for l in fin:
        p=l.strip().split("\t")
        m=re.findall('NC_\d+', l)
        if len(m) != 2:
            #sys.stderr.write("Error parsing two NC ids from " + l)
            continue
示例#6
0
            sys.stderr.write("No length for " + pnc + "\n")
            continue

        m = re.findall('(NC_\d+)', p[1])
        if m == []:
            sys.stderr.write("WARNING: No bacteria found in " + p[1] + "\n")
            continue
        bnc = m[0]

        if bnc not in hits[pnc]:
            hits[pnc][bnc] = []
            for i in xrange(lens[pnc] + 1):
                hits[pnc][bnc].append(0)

        for i in range(int(p[6]), int(p[7]) + 1):
            hits[pnc][bnc][i] = 1

# now print the table of phage and bacteria
bacteria = phage.completeBacteriaIDs()
phages = phage.phageIDs()

print "Bacteria\t" + "\t".join(phages)
for b in bacteria:
    print b
    for p in phages:
        if hits[p][b]:
            print "\t" + str(1.0 * sum(hits[p][b]) / lens[b])
        else:
            print "\t0"
    print
示例#7
0

import sys
from phage import Phage
phage=Phage()

try:
    f=sys.argv[1]
except:
    sys.exit(sys.argv[0] + " <blast output file converted to NC/NC format. Probably phage.genomes.blastx")

count={}


bctG = phage.completeBacteriaIDs()
phgG = phage.phageIDs()

for p in phgG:
    count[p]={}
    for b in bctG:
        count[p][b]=0

with open(f, 'r') as bin:
    for l in bin:
        p=l.strip().split("\t")
        if p[0] in count and p[1] in count[p[0]]:
            count[p[0]][p[1]] = count[p[0]].get(p[1], 0) + 1

for p in count:
    for b in count[p]:
        print("\t".join([p, b, str(count[p][b])]))
phage=Phage()

try:
    blastf=sys.argv[1]
except:
    sys.exit(sys.argv[0] + "< blast file>")

# read the fasta file of phages to get the lengths
lens=phage.phageSequenceLengths()
sys.stderr.write("Found " + str(len(lens)) + " sequences\n")


# get the phage and bacteria so we can limit our data appropriately

bacteriaG = set(phage.completeBacteriaIDs())
phageG = set(phage.phageIDs())

hits = {}
for p in phageG:
    hits[p]={}

with open(blastf, 'r') as fin:
    for l in fin:
        p=l.strip().split("\t")
        e=float(p[10])
        if e > 0.001:
            continue
        m=re.findall('(NC_\d+)', p[0])
        if m == []:
            sys.stderr.write("WARNING: No phage found in " + p[0] + "\n")
            continue