Exemplo n.º 1
0
def test_to_string():
    f = "tests/data/tabd.blast"
    for line in open(f):
        a = BlastLine(line)
        b = BlastLine(a.to_blast_line())

        # works better than string comparison because of floats.
        for attr in some_attrs:
            assert getattr(a, attr) == getattr(b, attr), (a, b, attr)
Exemplo n.º 2
0
def test_query_subject_props():
    f = "tests/data/tabd.blast"
    line = BlastLine(open(f).readline())
    line.query = "asdf"
    line.subject = "dddd"
    assert line.query == "asdf"
    assert line.subject == "dddd"
    assert "asdf" in line.to_blast_line()
    assert "dddd" in line.to_blast_line()
Exemplo n.º 3
0
def test_to_string():
    f = "tests/data/tabd.blast" 
    for line in open(f):
        a = BlastLine(line)
        b = BlastLine(a.to_blast_line())

        # works better than string comparison because of floats.
        for attr in some_attrs:
            assert getattr(a, attr) == getattr(b, attr), (a, b, attr)
Exemplo n.º 4
0
def test_query_subject_props():
    f = "tests/data/tabd.blast" 
    line = BlastLine(open(f).readline())
    line.query = "asdf"
    line.subject = "dddd"
    assert line.query == "asdf"
    assert line.subject == "dddd"
    assert "asdf" in line.to_blast_line()
    assert "dddd" in line.to_blast_line()
Exemplo n.º 5
0
def test_blastline():
    f = "tests/data/tabd.blast"
    blasts = []
    for line in open(f):
        b = BlastLine(line)
        blasts.append(BlastLine(line))

    yield check_type, blasts, ('qstart', 'qstop', 'sstart', 'sstop',
                               'nmismatch', 'ngaps'), int

    yield check_type, blasts, ('evalue', 'score', 'pctid'), float
    yield check_type, blasts, ('query', 'subject'), str
Exemplo n.º 6
0
def grouper(blast_file):
    """\
    group all subjects to a single query. so for
        grape.features_vs_papaya.genomic.masked.blast
    group all the papaya hits to the grape query"""
    g = collections.defaultdict(dict)
    for sline in open(blast_file):
        b = BlastLine(sline)
        # this removes low-copy transposons (length > 200, percent_id > 98)
        if b.pctid > 98.0 and b.hitlen > 200: continue
        if not b.subject in g[b.query]: g[b.query][b.subject] = []
        g[b.query][b.subject].append(b)
    return g
Exemplo n.º 7
0
def test_pickle():
    import cPickle
    f = "tests/data/tabd.blast"
    line = BlastLine(open(f).readline())

    d = cPickle.dumps(line, -1)

    loaded = cPickle.loads(d)

    for k in BlastLine.attrs:
        assert getattr(loaded, k) == getattr(line, k)
    loaded.query = "asdf"

    assert loaded.query != line.query
Exemplo n.º 8
0
def test_blastfile():
    f = "tests/data/tabd.blast"
    bf = BlastFile(f)
    fh = open(f, 'r')

    # iterate via python and c and check each line is the same.
    for line, b in zip(fh, bf):
        bl = BlastLine(line)
        assert isinstance(b, BlastLine)
        assert bl == b

    i = 0
    for c in bf:
        i += 1
        assert isinstance(c, BlastLine)
    assert i == len(open(f).readlines())

    del bf
Exemplo n.º 9
0
sys.path.insert(0, "scripts2/")
from make_genelist import is_ortho, parse_orthos
from biostuff import BlastLine

ortho = parse_orthos(forthology, is_same=True)
gff = gtpym.FeatureIndexMemory(fgff)

xs = []
ys = []
QSEQ = '2'
SSEQ = '4'

print >>sys.stderr, ortho[(QSEQ, SSEQ)]

for line in open(fblast):
    b = BlastLine(line)
    q = gff[b.query]
    s = gff[b.subject]
    if not (q.seqid == QSEQ and s.seqid == SSEQ): continue

    b.qstart += q.start
    b.qstop += q.start
    b.sstart += s.start
    b.sstop += s.start
    print b.to_blast_line()

    xs.append(b.qstart)
    ys.append(b.sstart)

import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
Exemplo n.º 10
0
sys.path.insert(0, "scripts2/")
from make_genelist import is_ortho, parse_orthos
from biostuff import BlastLine

ortho = parse_orthos(forthology, is_same=True)
gff = gtpym.FeatureIndexMemory(fgff)

xs = []
ys = []
QSEQ = '2'
SSEQ = '4'

print >> sys.stderr, ortho[(QSEQ, SSEQ)]

for line in open(fblast):
    b = BlastLine(line)
    q = gff[b.query]
    s = gff[b.subject]
    if not (q.seqid == QSEQ and s.seqid == SSEQ): continue

    b.qstart += q.start
    b.qstop += q.start
    b.sstart += s.start
    b.sstop += s.start
    print b.to_blast_line()

    xs.append(b.qstart)
    ys.append(b.sstart)

import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle