def print_table(self): '''Print a table of probabilities at each SNP.''' options = np.get_printoptions() np.set_printoptions(precision=3, suppress=True, threshold=np.nan, linewidth=200) print 'lambda = %s, Delta = %s, eps = %.1e' % (self.lam, repr(self.Delta)[6:-1], self.e) print 'Viterbi path (frame SNPs): ' + ' -> '.join(map(lambda x: '%d (%d-%d)' % (x[0], x[1][0], x[1][1]), itemutil.groupby_with_range(self.Q_star + 1))) print 'Viterbi path (SNPs): ' + ' -> '.join(map(lambda x: '%d (%d-%d)' % (x[0], self.snps[x[1][0]], self.snps[x[1][1]]), itemutil.groupby_with_range(self.Q_star + 1))) print ' %-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s' % \ ('t', 'SNP#', 'Obs', 'G1', 'G2', 'lam*dx', 'p', 'Gam1', 'Gam2', 'Gam3', 'Gam4', 'Gam5', 'Gam6', 'Gam7', 'Gam8', 'Gam9', 'p(IBD)', 'Viterbi', 'IBD?') print np.concatenate((np.arange(len(self.x))[np.newaxis].transpose(), self.snps[np.newaxis].transpose(), self.Obs[np.newaxis].transpose(), np.array([ProbIbdHmmCalculator._T_STATE_G[t][0] for t in self.Obs])[np.newaxis].transpose(), np.array([ProbIbdHmmCalculator._T_STATE_G[t][1] for t in self.Obs])[np.newaxis].transpose(), np.concatenate((self.lam_x, [0]))[np.newaxis].transpose(), self.p[np.newaxis].transpose(), self.Gamma.transpose(), self.p_ibd_gamma[np.newaxis].transpose(), (self.Q_star + 1)[np.newaxis].transpose(), self.p_ibd_viterbi[np.newaxis].transpose() ), axis=1) util.set_printoptions(options)
def print_table(self): '''Print a table of probabilities at each SNP.''' options = np.get_printoptions() np.set_printoptions(precision=3, suppress=True, threshold=np.nan, linewidth=200) print 'lambda = %.2f, f = %.2f, eps = %.1e' % (self.lam, self.f, self.e) print 'Viterbi path (frame SNPs): ' + ' -> '.join( map(lambda x: '%d (%d-%d)' % (x[0], x[1][0], x[1][1]), itemutil.groupby_with_range(self.Q_star))) print 'Viterbi path (SNPs): ' + ' -> '.join( map( lambda x: '%d (%d-%d)' % (x[0], self.snps[x[1][0]], self.snps[x[1][1]]), itemutil.groupby_with_range(self.Q_star))) print ' %-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s' % \ ('t', 'SNP#', 'H1', 'H2', 'x', 'lam*dx', 'p', 'Gam0', 'Gam1', 'p(IBD)', 'Viterbi', 'IBD?') print np.concatenate( (np.arange(len(self.x))[np.newaxis].transpose(), self.snps[np.newaxis].transpose(), self.Obs[np.newaxis].transpose() / 2, self.Obs[np.newaxis].transpose() % 2, self.x[np.newaxis].transpose(), np.concatenate((self.lam * np.diff(self.x), [ 0 ]))[np.newaxis].transpose(), self.p[np.newaxis].transpose(), self.Gamma.transpose(), self.p_ibd_gamma[np.newaxis].transpose(), (self.Q_star)[np.newaxis].transpose(), self.p_ibd_viterbi[np.newaxis].transpose()), axis=1) util.set_printoptions(options)
def test_groupby_with_range(self): '''Test the groupby_with_range() function.''' assert_equal(iu.groupby_with_range(''), []) assert_equal(iu.groupby_with_range('AAAABBBCCDAABBB'), [('A', (0, 3)), ('B', (4, 6)), ('C', (7, 8)), ('D', (9, 9)), ('A', (10, 11)), ('B', (12, 14))])
def print_table(self): '''Print a table of probabilities at each SNP.''' options = np.get_printoptions() np.set_printoptions(precision=3, suppress=True, threshold=np.nan, linewidth=200) print 'lambda = %s, Delta = %s, eps = %.1e' % ( self.lam, repr(self.Delta)[6:-1], self.e) print 'Viterbi path (frame SNPs): ' + ' -> '.join( map(lambda x: '%d (%d-%d)' % (x[0], x[1][0], x[1][1]), itemutil.groupby_with_range(self.Q_star + 1))) print 'Viterbi path (SNPs): ' + ' -> '.join( map( lambda x: '%d (%d-%d)' % (x[0], self.snps[x[1][0]], self.snps[x[1][1]]), itemutil.groupby_with_range(self.Q_star + 1))) print ' %-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s' % \ ('t', 'SNP#', 'Obs', 'G1', 'G2', 'lam*dx', 'p', 'Gam1', 'Gam2', 'Gam3', 'Gam4', 'Gam5', 'Gam6', 'Gam7', 'Gam8', 'Gam9', 'p(IBD)', 'Viterbi', 'IBD?') print np.concatenate( (np.arange(len(self.x))[np.newaxis].transpose(), self.snps[np.newaxis].transpose(), self.Obs[np.newaxis].transpose(), np.array( [ProbIbdHmmCalculator._T_STATE_G[t][0] for t in self.Obs])[np.newaxis].transpose(), np.array([ ProbIbdHmmCalculator._T_STATE_G[t][1] for t in self.Obs ])[np.newaxis].transpose(), np.concatenate( (self.lam_x, [0]))[np.newaxis].transpose(), self.p[np.newaxis].transpose(), self.Gamma.transpose(), self.p_ibd_gamma[np.newaxis].transpose(), (self.Q_star + 1)[np.newaxis].transpose(), self.p_ibd_viterbi[np.newaxis].transpose()), axis=1) util.set_printoptions(options)
def print_table(self): '''Print a table of probabilities at each SNP.''' options = np.get_printoptions() np.set_printoptions(precision=3, suppress=True, threshold=np.nan, linewidth=200) print 'lambda = %.2f, f = %.2f, eps = %.1e' % (self.lam, self.f, self.e) print 'Viterbi path (frame SNPs): ' + ' -> '.join(map(lambda x: '%d (%d-%d)' % (x[0], x[1][0], x[1][1]), itemutil.groupby_with_range(self.Q_star))) print 'Viterbi path (SNPs): ' + ' -> '.join(map(lambda x: '%d (%d-%d)' % (x[0], self.snps[x[1][0]], self.snps[x[1][1]]), itemutil.groupby_with_range(self.Q_star))) print ' %-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s%-10s' % \ ('t', 'SNP#', 'H1', 'H2', 'x', 'lam*dx', 'p', 'Gam0', 'Gam1', 'p(IBD)', 'Viterbi', 'IBD?') print np.concatenate((np.arange(len(self.x))[np.newaxis].transpose(), self.snps[np.newaxis].transpose(), self.Obs[np.newaxis].transpose() / 2, self.Obs[np.newaxis].transpose() % 2, self.x[np.newaxis].transpose(), np.concatenate((self.lam * np.diff(self.x), [0]))[np.newaxis].transpose(), self.p[np.newaxis].transpose(), self.Gamma.transpose(), self.p_ibd_gamma[np.newaxis].transpose(), (self.Q_star)[np.newaxis].transpose(), self.p_ibd_viterbi[np.newaxis].transpose() ), axis=1) util.set_printoptions(options)
def hap_segments(h): '''Return IBD segments between two phased sample haplotypes (i,a),(j,b) encapsulated by the IbdProblem object ''h''.''' # Load data (i, a), (j, b), debug, num_snps, frame = h.hap1, h.hap2, h.params.debug, h.num_snps, h.snps if debug: print '-' * 70 print 'IBD Segments between (%d,%d), (%d,%d)' % (i, a, j, b) print '-' * 70 # Calculate IBD posterior using the largest frame prob_ibd = prob_ibd_hmm_from_raw_input(h) is_ibd = prob_ibd > 0.0 if debug: # print 'IBD mask', is_ibd.astype(np.uint) print 'Frame IBD segments', [(frame[x[START]], frame[x[STOP]] if x[STOP] < len(frame) else num_snps) for x in im.segment.segments_with_value(is_ibd, True, 3)] # Output sufficiently-long segments min_len_sub_segment = h.params.min_len_sub_segment # If a segment is identified by HMM and is above params.min_len, allow smaller sub-segments in which the haps fit @UnusedVariable bp, cm, len_unit = h.bp, h.cm, h.params.len_unit coord = cm if (len_unit == 'cm') else bp / (1.0 * im.constants.MEGA_BASE_PAIR) pair = [h.hap1, h.hap2] long_segments = im.segment.SegmentSet([]) coord_bp = lambda x, y: (bp[x], im.segment.stop_bp(bp, y, num_snps)) coord_cm = lambda x, y: (cm[x], im.segment.stop_bp(cm, y, num_snps)) coord_unit = lambda x, y: (coord[x], im.segment.stop_bp(coord, y, num_snps)) for fr, full_segment in ((fr, __frame_segment_to_snp(frame, fr, num_snps)) for fr in im.segment.segments_with_value(is_ibd, True)): # Interpolate prob_ibd from frame SNPs to all SNPs in the segments start, stop = full_segment[START], full_segment[STOP] # Original indices of segment boundaries cm_start, cm_stop = coord_unit(start, stop) if debug: print '%d:%d coordinates %f:%f' % (start, stop, cm_start, cm_stop) if cm_stop - cm_start >= h.params.min_len: # Threshold should be really small to catch everything, since we believe the IBD HMM to give the correct answer fr_start, fr_stop = fr[START], fr[STOP] # Frame indices of segment boundaries confidence = np.interp(coord[start:stop], coord[frame[fr_start:fr_stop]], prob_ibd[fr_start:fr_stop]) # Add entire segment # long_segments += Segment((start, stop), pair, (bp_start, bp_stop), confidence=confidence) # Check if haplotypes fit in ALL SNPs, not just SNP frames. Restrict to largest # sub-segment of consecutive "1"s found in [start,stop] (allowing genotype error mismatches) d = h.d[start:stop] d_filtered = im.ibd.filter_diff(d, 'median', 10) if debug: np.set_printoptions(threshold=np.nan) print ' %-4s %-4s %-4s %-4s %-4s' % ('SNP', 'h1', 'h2', 'd', 'filt') print np.concatenate((np.arange(start, stop)[np.newaxis].transpose(), h.all_h1[start:stop][np.newaxis].transpose(), h.all_h2[start:stop][np.newaxis].transpose(), d.astype(np.int)[np.newaxis].transpose(), d_filtered.astype(np.int)[np.newaxis].transpose() ), axis=1) # Use all sufficiently-long sub-segments ind = [v for k, v in itemutil.groupby_with_range(d_filtered) if k] s = map(lambda x: x + start, ind) # segments of ones sub_segments = [segment for segment in (Segment((int(s[i][START]), int(s[i][STOP])), pair, coord_bp(s[i][START], s[i][STOP]), confidence=confidence[ind[i][START]:ind[i][STOP]], cm=coord_cm(s[i][START], s[i][STOP])) for i in xrange(len(ind)) if s[i][STOP] > s[i][START]) if (segment.length_cm if (len_unit == 'cm') else segment.length) >= min_len_sub_segment] if stop > start: long_segments += sub_segments if debug: print 'segments of ones', s print 'ind', ind print 'sub-segments', sub_segments else: if debug: print 'Segment shorter than %.2f threhold' % (h.params.min_len,) if debug: print 'Long-enough segments (segment >= %.2f %s, sub-segment >= %.2f %s):' % \ (h.params.min_len, len_unit, h.params.min_len_sub_segment, len_unit) print long_segments.pprint_segments(show_bp=True) if long_segments else '\t-' print '' return long_segments