def kmer_homology(self, k=10, span=100): seq1 = ''.join([ a.capitalize() for a in hg.interval( self.v1.chrom, max(1, self.v1.pos - span), min(self.v1.pos + span, hg.chrLen[hg.chrNum(self.v1.chrom)]), self.v1.strand).sequence() ]) seq2 = ''.join([ a.capitalize() for a in hg.interval( self.v2.chrom, max(1, self.v2.pos - span), min(self.v2.pos + span, hg.chrLen[hg.chrNum(self.v2.chrom)]), -1 * self.v2.strand).sequence() ]) kset1 = Set([seq1[i:i + 10] for i in range(len(seq1) - k + 1)]) kset2 = Set([seq2[i:i + 10] for i in range(len(seq2) - k + 1)]) return len(kset1.intersection(kset2))
def kmer_homology(self, k=10, span=100): """Number of shared k-mers within "span" distance on either side of vertex positions""" seq1 = ''.join([ a.capitalize() for a in hg.interval( self.v1.chrom, max(1, self.v1.pos - span), min(self.v1.pos + span, hg.chrLen[hg.chrNum(self.v1.chrom)]), self.v1.strand).sequence() ]) seq2 = ''.join([ a.capitalize() for a in hg.interval( self.v2.chrom, max(1, self.v2.pos - span), min(self.v2.pos + span, hg.chrLen[hg.chrNum(self.v2.chrom)]), -1 * self.v2.strand).sequence() ]) kset1 = Set([seq1[i:i + 10] for i in range(len(seq1) - k + 1)]) kset2 = Set([seq2[i:i + 10] for i in range(len(seq2) - k + 1)]) return len(kset1.intersection(kset2))
old_stdout = sys.stdout sys.stdout = mystdout = StringIO() amplist = bamFileb2b.interval_hops(rdList, explore=False) alist = hg.interval_list( [hg.interval(e[0].v1.chrom, e[0].v1.pos, e[0].v1.pos) for e in de] + [hg.interval(e[0].v2.chrom, e[0].v2.pos, e[0].v2.pos) for e in de] + rdList) alist.sort() rdList = hg.interval_list([ i[0] for i in alist.merge_clusters(extend=5000000) if len( hg.interval_list([i[0]]).intersection(amplist) + hg.interval_list([i[0]]).intersection(rdList)) > 0 ]) rdList = hg.interval_list([ hg.interval(i.chrom, max(0, i.start - 10000), min(i.end + 10000, hg.chrLen[hg.chrNum(i.chrom)])) for i in rdList ]) iout = open(outName + '.integration_search.out', 'w') iout.write(mystdout.getvalue()) iout.close() sys.stdout = old_stdout all_ilist = copy.copy(rdList) irdhops = [] irddict = {} irdSets = Set([Set([ird]) for ird in rdList]) irdgroupdict = {ird: Set([ird]) for ird in rdList} if args.extendmode == 'EXPLORE' or args.extendmode == 'VIRAL': for ird in rdList: logging.info("#TIME " + '%.3f\t' % (clock() - TSTART) +