def qualify_with_secondary_structure(self, read, ref_seq_id, i, match_size): """ A read matches to <ref_seq_id> starting at (local V3) position <i> Check for each base-pairing within the range of this 51-bp read, whether there's a base-pairing whenever bpseq says there is """ bped, acc = 0, 0 mapping = self.mapping[ref_seq_id] # the 0-crap (full-length, gapped) position range of this read is thus: # mapping[i] + V3_offset -- mapping[i+50] + V3_offset try: j = i + match_size - 1 l,h = mapping[i] + V3_offset, mapping[j] + V3_offset except IndexError: raise Exception, "trying to index {0}[{1}] and {0}[{2}] failed".format(ref_seq_id, i, j) for x,y in self.bpseq.iteritems(): if l <= x <= h and l <= y <= h and x < y: try: ii = mapping.index( x - V3_offset ) - i jj = mapping.index( y - V3_offset ) - i except ValueError: continue # it is possible that ii or jj is beyond the range of read # in that case just ignore if ii < 0 or jj < 0 or ii >= len(read) or jj >= len(read): print >> sys.stderr, "IGNORE some in qualifying 2nd structure cuz out of range" continue acc += 1 if miscIUPAC.can_pair(read[ii], read[jj]): bped += 1 if acc == 0: # if there is no bp present, then the ratio counts as 1.5! return HybridCons.NO_BP_PRESENT else: return bped*1. / acc
def qualify_assembled(self, gapped_seq, offset): """ Given an assembled, gapped seq that starts at (V3)'s offset-th position see how many of the base-pairs match. Returns -1 if none of the positions are supposed to be base-paired anyways. Otherwise returns a fraction of <should-bp-and-did> / <should-bp> """ bped, acc = 0, 0 i = V3_offset + offset j_1 = i + len(gapped_seq) for x,y in self.bpseq.iteritems(): if i <= x < j_1 and i <= y < j_1 and x < y: u = gapped_seq[ x - V3_offset ] v = gapped_seq[ y - V3_offset ] acc += 1 if u not in GAP_SYMBOLS and v not in GAP_SYMBOLS and miscIUPAC.can_pair(u, v): bped += 1 if acc == 0: return 1. else: return bped*1. / acc
def hybrid_structure(bp_map, seq): """ Given bp_map (output from read_BPSEQ) and seq which is a string (can have gaps) returns list L where L[i] is None if i-th position is gap L[i] is (i,nt,0) if i-th position has no pairing L[i] is (i,nt,j) if i-th position pairs with j-th position """ seq = seq.upper().replace('T', 'U') seq_len = len(seq) L = [0] * seq_len for i,x in enumerate(seq): if x in GAP_SYMBOLS: L[i] = None elif i not in bp_map: L[i] = (i, x, 0) else: y = seq[ bp_map[i] ] if y not in GAP_SYMBOLS and miscIUPAC.can_pair(x, y): L[i] = (i, x, bp_map[i]) else: L[i] = (i, x, 0) return L