Пример #1
0
def overlaps(hits, fasta_index, backtrans=False):
    tree = IntervalTree()
    faidx = fasta.Indexer("", fasta_index)
    faidx.load()
    prevOrg, curOrg = None, None
    prevStrand, curStrand = None, None
    newHits = []
    # print "Before",len(hits)
    for hit in hits:
        acc, clrname, full_evalue, hmm_st, hmm_end, env_st, env_end, description = hit[:8]
        curOrg = fasta.getName(acc)
        if backtrans:
            hitSt, curStrand = faidx.sixframe_to_nucleotide(acc, env_st)
            hitEnd, curStrand = faidx.sixframe_to_nucleotide(acc, env_end)
        else:
            hitSt, hitEnd = map(int, [env_st, env_end])
        if prevOrg == None:
            prevOrg = curOrg
            prevStrand = curStrand
            tree.add(hitSt, hitEnd, hit)
            newHits.append(hit)
        elif prevOrg != curOrg or prevStrand != curStrand:
            tree = IntervalTree()
            tree.add(hitSt, hitEnd, hit)
            prevOrg = curOrg
            prevStrand = curStrand
            newHits.append(hit)
        else:
            overlaps = tree.find(hitSt, hitEnd)
            if len(overlaps) == 0:
                tree.add(hitSt, hitEnd, hit)
                newHits.append(hit)
    # print "After",len(newHits)

    return newHits
Пример #2
0
def unique(hits):
    hitset = set()
    newHits = []
    for hit in hits:
        acc, clrname, full_evalue, hmm_st, hmm_end, env_st, env_end, description = hit[:8]
        curOrg = fasta.getName(acc)
        if (env_st, env_end, curOrg) in hitset:
            continue
        hitset.add((env_st, env_end, curOrg))
        newHits.append(hit)
    return newHits