def test_to_string(): f = "tests/data/tabd.blast" for line in open(f): a = BlastLine(line) b = BlastLine(a.to_blast_line()) # works better than string comparison because of floats. for attr in some_attrs: assert getattr(a, attr) == getattr(b, attr), (a, b, attr)
def test_query_subject_props(): f = "tests/data/tabd.blast" line = BlastLine(open(f).readline()) line.query = "asdf" line.subject = "dddd" assert line.query == "asdf" assert line.subject == "dddd" assert "asdf" in line.to_blast_line() assert "dddd" in line.to_blast_line()
def test_blastline(): f = "tests/data/tabd.blast" blasts = [] for line in open(f): b = BlastLine(line) blasts.append(BlastLine(line)) yield check_type, blasts, ('qstart', 'qstop', 'sstart', 'sstop', 'nmismatch', 'ngaps'), int yield check_type, blasts, ('evalue', 'score', 'pctid'), float yield check_type, blasts, ('query', 'subject'), str
def grouper(blast_file): """\ group all subjects to a single query. so for grape.features_vs_papaya.genomic.masked.blast group all the papaya hits to the grape query""" g = collections.defaultdict(dict) for sline in open(blast_file): b = BlastLine(sline) # this removes low-copy transposons (length > 200, percent_id > 98) if b.pctid > 98.0 and b.hitlen > 200: continue if not b.subject in g[b.query]: g[b.query][b.subject] = [] g[b.query][b.subject].append(b) return g
def test_pickle(): import cPickle f = "tests/data/tabd.blast" line = BlastLine(open(f).readline()) d = cPickle.dumps(line, -1) loaded = cPickle.loads(d) for k in BlastLine.attrs: assert getattr(loaded, k) == getattr(line, k) loaded.query = "asdf" assert loaded.query != line.query
def test_blastfile(): f = "tests/data/tabd.blast" bf = BlastFile(f) fh = open(f, 'r') # iterate via python and c and check each line is the same. for line, b in zip(fh, bf): bl = BlastLine(line) assert isinstance(b, BlastLine) assert bl == b i = 0 for c in bf: i += 1 assert isinstance(c, BlastLine) assert i == len(open(f).readlines()) del bf
sys.path.insert(0, "scripts2/") from make_genelist import is_ortho, parse_orthos from biostuff import BlastLine ortho = parse_orthos(forthology, is_same=True) gff = gtpym.FeatureIndexMemory(fgff) xs = [] ys = [] QSEQ = '2' SSEQ = '4' print >>sys.stderr, ortho[(QSEQ, SSEQ)] for line in open(fblast): b = BlastLine(line) q = gff[b.query] s = gff[b.subject] if not (q.seqid == QSEQ and s.seqid == SSEQ): continue b.qstart += q.start b.qstop += q.start b.sstart += s.start b.sstop += s.start print b.to_blast_line() xs.append(b.qstart) ys.append(b.sstart) import matplotlib.pyplot as plt from matplotlib.patches import Rectangle
sys.path.insert(0, "scripts2/") from make_genelist import is_ortho, parse_orthos from biostuff import BlastLine ortho = parse_orthos(forthology, is_same=True) gff = gtpym.FeatureIndexMemory(fgff) xs = [] ys = [] QSEQ = '2' SSEQ = '4' print >> sys.stderr, ortho[(QSEQ, SSEQ)] for line in open(fblast): b = BlastLine(line) q = gff[b.query] s = gff[b.subject] if not (q.seqid == QSEQ and s.seqid == SSEQ): continue b.qstart += q.start b.qstop += q.start b.sstart += s.start b.sstop += s.start print b.to_blast_line() xs.append(b.qstart) ys.append(b.sstart) import matplotlib.pyplot as plt from matplotlib.patches import Rectangle