def get_stats(blastfile, strict=False): from jcvi.utils.range import range_union, range_span from .pyblast import BlastLine logging.debug("Report stats on `%s`" % blastfile) fp = open(blastfile) ref_ivs = [] qry_ivs = [] identicals = 0 ngaps = 0 alignlens = [] for row in fp: c = BlastLine(row) qstart, qstop = c.qstart, c.qstop if qstart > qstop: qstart, qstop = qstop, qstart qry_ivs.append((c.query, qstart, qstop)) sstart, sstop = c.sstart, c.sstop if sstart > sstop: sstart, sstop = sstop, sstart ref_ivs.append((c.subject, sstart, sstop)) alen = c.hitlen ngaps += c.ngaps identicals += c.hitlen - c.nmismatch - c.ngaps alignlens.append(alen) qrycovered = range_union(qry_ivs) refcovered = range_union(ref_ivs) if strict: # We discount gaps in counting covered bases, since we # did not track individually gaps in qry and ref, we assume # the gaps are opened evenly in the two sequences qrycovered -= ngaps / 2 refcovered -= ngaps / 2 qryspan = range_span(qry_ivs) refspan = range_span(ref_ivs) _, AL50, _ = calculate_A50(alignlens) filename = op.basename(blastfile) alignstats = AlignStats( filename, qrycovered, refcovered, qryspan, refspan, identicals, AL50 ) return alignstats
def get_stats(blastfile, strict=False): from jcvi.utils.range import range_union, range_span from .pyblast import BlastLine logging.debug("Report stats on `%s`" % blastfile) fp = open(blastfile) ref_ivs = [] qry_ivs = [] identicals = 0 ngaps = 0 alignlens = [] for row in fp: c = BlastLine(row) qstart, qstop = c.qstart, c.qstop if qstart > qstop: qstart, qstop = qstop, qstart qry_ivs.append((c.query, qstart, qstop)) sstart, sstop = c.sstart, c.sstop if sstart > sstop: sstart, sstop = sstop, sstart ref_ivs.append((c.subject, sstart, sstop)) alen = c.hitlen ngaps += c.ngaps identicals += c.hitlen - c.nmismatch - c.ngaps alignlens.append(alen) qrycovered = range_union(qry_ivs) refcovered = range_union(ref_ivs) if strict: # We discount gaps in counting covered bases, since we # did not track individually gaps in qry and ref, we assume # the gaps are opened evenly in the two sequences qrycovered -= ngaps / 2 refcovered -= ngaps / 2 qryspan = range_span(qry_ivs) refspan = range_span(ref_ivs) _, AL50, _ = calculate_A50(alignlens) filename = op.basename(blastfile) alignstats = AlignStats(filename, qrycovered, refcovered, qryspan, refspan, identicals, AL50) return alignstats
def test_range_span(ranges, expected): from jcvi.utils.range import range_span assert range_span(ranges) == expected