def __init__(self, fhand, compressed=None, filename=None, min_calls_for_pop_stats=DEF_MIN_CALLS_FOR_POP_STATS): self.fhand = fhand self.pyvcf_reader = pyvcfReader(fsock=fhand, compressed=compressed, filename=filename) self.min_calls_for_pop_stats = min_calls_for_pop_stats self._snpcaller = None
def __init__(self, vcf_fhand, parents_a, parents_b, threshold=DEF_AB_CODER_THRESHOLD, offspring=None, window=DEF_AB_CODING_WIN): self._reader = pyvcfReader(vcf_fhand) self.parents_a = parents_a self.parents_b = parents_b self._offspring = offspring self.window = window self.threshold = threshold self.log = Counter() self.indexes = array('f')
def calc_recomb_rates_along_chroms(vcf_fpath, pop_type, samples=None, min_phys_dist=1000, max_phys_dist=50000): reader = pyvcfReader(open(vcf_fpath)) random_reader = pyvcfReader(open(vcf_fpath)) diffs_from_r_expected = [] diffs_distort =[] recomb_rates = [] diffs_xs = [] diffs_ys = [] to_return = [] r_xs = [] r_ys = [] r_dist_color = [] #samples = reader.samples #samples = [sample for sample in samples if sample[0] == '4'] for snv in reader: pos = snv.POS chrom = snv.CHROM start = pos - max_phys_dist if start < 0: start = 0 end = pos - min_phys_dist if end < 0: end = 0 if end > 0: snvs_prev = random_reader.fetch(chrom=chrom, start=start, end=end) else: snvs_prev = iter([]) start = pos + min_phys_dist end = pos + max_phys_dist snvs_after = random_reader.fetch(chrom=chrom, start=start, end=end) if samples is None: calls1 = snv.samples else: calls1 = [call for call in snv.samples if call.sample in samples] recombs = [] for snv_2 in chain(snvs_prev, snvs_after): dist = abs(pos - snv_2.POS) if samples is None: calls2 = snv_2.samples else: calls2 = [call for call in snv_2.samples if call.sample in samples] result = _calc_recomb_rate(calls1, calls2, pop_type) if result: recomb, haplo_count = result else: recomb, haplo_count = None, None if haplo_count: diff_from_r_expected = ((abs(haplo_count.AB - haplo_count.ab) + abs(haplo_count.Ab - haplo_count.aB)) / sum(haplo_count)) diff_distort = ((abs((haplo_count.Ab + haplo_count.AB) - (haplo_count.ab + haplo_count.aB)) + abs((haplo_count.AB + haplo_count.aB) - (haplo_count.ab + haplo_count.Ab))) / sum(haplo_count)) diffs_xs.append(diff_from_r_expected) diffs_ys.append(diff_distort) diffs_from_r_expected.append(diff_from_r_expected) diffs_distort.append(diff_distort) if recomb is not None: recomb_rates.append(recomb) r_xs.append(recomb) r_ys.append(diff_from_r_expected) r_dist_color.append(diff_distort) #if 0.6 > recomb > 0.2 and diff_from_r_expected > 0.9: # print 'pau->', haplo_count, recomb, diff_from_r_expected #if recomb > 0.5: if recomb is None: continue print snv.CHROM, snv.POS, snv_2.CHROM, snv_2.POS, recomb, diff_from_r_expected, diff_distort recombs.append((dist, recomb)) if recombs: to_return.append([chrom, pos, recombs]) from crumbs.plot import build_histogram, draw_density_plot print 'hola' from os.path import join as pjoin dir_ = '/home/jose/tmp/rils/results/' fpath = pjoin(dir_, 'diffs_from_r.png') build_histogram(diffs_from_r_expected, open(fpath, 'w'), bins=30) fpath = pjoin(dir_, 'diffs_distort.png') build_histogram(diffs_distort, open(fpath, 'w'), bins=30) fpath = pjoin(dir_, 'recomb_rates.png') build_histogram(recomb_rates, open(fpath, 'w'), bins=30, log=True) fpath = pjoin(dir_, 'diffs_density.png') draw_density_plot(diffs_xs, diffs_ys, open(fpath, 'w')) fpath = pjoin(dir_, 'diffs_scatter.png') draw_scatter(diffs_xs, diffs_ys, open(fpath, 'w')) fpath = pjoin(dir_, 'r_vs_diff_r_scatter.png') print len(r_xs), len(r_ys) draw_scatter(r_xs, r_ys, open(fpath, 'w'), color=r_dist_color) fpath = pjoin(dir_, 'r_vs_diff_r_density.png') draw_density_plot(r_xs, r_ys, open(fpath, 'w')) return to_return