def show_distances_distributions(bam_fpath, n=None, kind_of_interest=None): bamfile = pysam.Samfile(bam_fpath) type1 = 0 type2a = [] type2b = [] type3a = [] type3b = [] type4 = [] type5 = [] type6 = 0 others = 0 h = 0 #It tries to find out the kind of each pair of sequences for grouped_mates in _group_alignments_by_reads(bamfile): if n is not None and h == n: break h += 1 mates_alignments = _split_mates(grouped_mates) i = 0 pair = [] for alignments_group in mates_alignments: i += 1 mate = _get_mate(i, mates_alignments) primary_mate = _get_primary_alignment(mate) primary_alignment = _get_primary_alignment(alignments_group) mates = [primary_alignment, primary_mate] if _read_is_totally_mapped(alignments_group, 0.05): if primary_alignment.mate_is_unmapped: kind = '1' else: if primary_alignment.rname != primary_alignment.rnext: kind = '6' else: if _mates_are_outies(mates): kind = '3a' elif _mates_are_innies(mates): kind = '2a' else: kind = '5' else: fragment = _find_secondary_fragment(alignments_group, 5, 100) if fragment is not None: fragments = [primary_alignment, fragment] if (_alignments_in_same_ref([fragments[0], primary_mate]) or _alignments_in_same_ref([fragments[1], primary_mate])): kind = '4' else: kind = 'other' else: if primary_alignment.is_unmapped: kind = '1' else: if primary_alignment.rname == primary_alignment.rnext: if _mates_are_outies(mates): kind = '3b' elif _mates_are_innies(mates): kind = '2b' else: kind = '5' else: kind = '6' pair.append(kind) if '1' in pair: type1 += 1 elif '6' in pair: type6 += 1 elif 'other' in pair: others += 1 else: distance = _find_distance(mates) if '4' in pair: type4.append(distance) elif '2b' in pair: type2b.append(distance) elif '3b' in pair: type3b.append(distance) elif '2a' in pair: type2a.append(distance) elif '3a' in pair: type3a.append(distance) elif '5' in pair: type5.append(distance) stats1 = {'1': type1, '6': type6, 'other': others} stats2 = {'4': type4, '2b': type2b, '3b': type3b, '2a': type2a, '3a': type3a, '5': type5} for key in stats1.keys(): print key.ljust(5), stats1[key] for key in stats2.keys(): print key.ljust(5), len(stats2[key]) for key in stats2.keys(): if key in kind_of_interest: print key, 'distance distribution' counter = IntCounter(iter(stats2[key])) distribution = counter.calculate_distribution(remove_outliers=True) counts = distribution['counts'] bin_limits = distribution['bin_limits'] print draw_histogram(bin_limits, counts)
def test_ascii_histogram(self): 'It plots an ASCII histogram' hist = draw_histogram(bin_limits=[-2, -1, 0, 1, 2], counts=[9, 20, 30, 40]) assert '[-2 , -1[ ( 9): ****************' in hist