Пример #1
0
 def kmer_homology(self, k=10, span=100):
     seq1 = ''.join([
         a.capitalize() for a in hg.interval(
             self.v1.chrom, max(1, self.v1.pos - span),
             min(self.v1.pos + span, hg.chrLen[hg.chrNum(self.v1.chrom)]),
             self.v1.strand).sequence()
     ])
     seq2 = ''.join([
         a.capitalize() for a in hg.interval(
             self.v2.chrom, max(1, self.v2.pos - span),
             min(self.v2.pos +
                 span, hg.chrLen[hg.chrNum(self.v2.chrom)]), -1 *
             self.v2.strand).sequence()
     ])
     kset1 = Set([seq1[i:i + 10] for i in range(len(seq1) - k + 1)])
     kset2 = Set([seq2[i:i + 10] for i in range(len(seq2) - k + 1)])
     return len(kset1.intersection(kset2))
Пример #2
0
 def kmer_homology(self, k=10, span=100):
     """Number of shared k-mers within "span" distance on either side of vertex positions"""
     seq1 = ''.join([
         a.capitalize() for a in hg.interval(
             self.v1.chrom, max(1, self.v1.pos - span),
             min(self.v1.pos + span, hg.chrLen[hg.chrNum(self.v1.chrom)]),
             self.v1.strand).sequence()
     ])
     seq2 = ''.join([
         a.capitalize() for a in hg.interval(
             self.v2.chrom, max(1, self.v2.pos - span),
             min(self.v2.pos +
                 span, hg.chrLen[hg.chrNum(self.v2.chrom)]), -1 *
             self.v2.strand).sequence()
     ])
     kset1 = Set([seq1[i:i + 10] for i in range(len(seq1) - k + 1)])
     kset2 = Set([seq2[i:i + 10] for i in range(len(seq2) - k + 1)])
     return len(kset1.intersection(kset2))
Пример #3
0
    old_stdout = sys.stdout
    sys.stdout = mystdout = StringIO()
    amplist = bamFileb2b.interval_hops(rdList, explore=False)
    alist = hg.interval_list(
        [hg.interval(e[0].v1.chrom, e[0].v1.pos, e[0].v1.pos) for e in de] +
        [hg.interval(e[0].v2.chrom, e[0].v2.pos, e[0].v2.pos)
         for e in de] + rdList)
    alist.sort()
    rdList = hg.interval_list([
        i[0] for i in alist.merge_clusters(extend=5000000) if len(
            hg.interval_list([i[0]]).intersection(amplist) +
            hg.interval_list([i[0]]).intersection(rdList)) > 0
    ])
    rdList = hg.interval_list([
        hg.interval(i.chrom, max(0, i.start - 10000),
                    min(i.end + 10000, hg.chrLen[hg.chrNum(i.chrom)]))
        for i in rdList
    ])
    iout = open(outName + '.integration_search.out', 'w')
    iout.write(mystdout.getvalue())
    iout.close()
    sys.stdout = old_stdout

all_ilist = copy.copy(rdList)
irdhops = []
irddict = {}
irdSets = Set([Set([ird]) for ird in rdList])
irdgroupdict = {ird: Set([ird]) for ird in rdList}
if args.extendmode == 'EXPLORE' or args.extendmode == 'VIRAL':
    for ird in rdList:
        logging.info("#TIME " + '%.3f\t' % (clock() - TSTART) +