Python _count_biallelic_haplotypes示例，crumbs.vcf.ld._count_biallelic_haplotypes Python示例

示例#1

0

显示文件

文件： smooth.py 项目： fw1121/ngs_crumbs

    def _smooth(self, snp_idx_to_smooth, snp_gts, samples):
        snps, gt_for_snps_in_win = zip(*snp_gts)

        # we need the recomb rates
        # TODO, this is already calculated in ab coding, we have to cache
        snp1 = snps[snp_idx_to_smooth]
        snp1_calls = [snp1.genotype(sample) for sample in samples]
        weights = []
        for snp2 in snps:
            snp2_calls = [snp2.genotype(sample) for sample in samples]
            haplos = _count_biallelic_haplotypes(snp1_calls, snp2_calls,
                                                 return_alleles=True)
            if haplos is None:
                weight = 0
            else:
                haplo_cnt, alleles = haplos
                recomb_rate = (haplo_cnt.aB + haplo_cnt.Ab) / sum(haplo_cnt)
                weight = 2 * (0.5 - recomb_rate) if recomb_rate < 0.5 else 0
            weights.append(weight)

        # we have to transpose, we want the genotype for each indi not for
        # each snp
        indis_gts = {indi: [] for indi in samples}
        for indi in samples:
            for snp_gt in gt_for_snps_in_win:
                snp_indi_gt = snp_gt.get(indi, (indi, None))
                snp_indi_gt = tuple(sorted(snp_indi_gt))
                indis_gts[indi].append(snp_indi_gt)

        # Now we can do the smoothing
        recomb_thres = self.recomb_threshold
        smooth_threhsold = self.smooth_threhsold
        smoothed_genos = []
        for indi in samples:
            indi_gt = indis_gts[indi]
            n_recombs = self._count_recombinations(indi_gt)

            counts = Counter()
            for weight, geno in zip(weights, indi_gt):
                counts[geno] += weight
            smoothed_geno, vote = counts.most_common(1)[0]
            index = vote / sum(counts.values())
            self._recombs.append(n_recombs)
            self._smoothes.append(index)
            if recomb_thres is None:
                if index > smooth_threhsold:
                    geno = smoothed_geno
                else:
                    geno = None
            else:
                if n_recombs > recomb_thres:
                    # We're assuming diploid here
                    geno = ('A', 'B')
                else:
                    if index > smooth_threhsold:
                        geno = smoothed_geno
                    else:
                        geno = None
            smoothed_genos.append(geno)
        return smoothed_genos

示例#2

0

显示文件

    def test_empy_snv(self):
        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t./.\t./.\t./.\t./.\t./.\t./.
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)
        assert counts is None

示例#3

0

显示文件

文件： test_ld.py 项目： JoseBlanca/seq_crumbs

    def test_empy_snv(self):
        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t./.\t./.\t./.\t./.\t./.\t./.
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)
        assert counts is None

示例#4

0

显示文件

    def _deduce_coding(self, snp_and_coding, snp1_calls, snp2_idxs):
        votes = Counter()
        offspring = self.offspring
        for snp2_idx in snp2_idxs:
            snp2, coding2 = snp_and_coding[snp2_idx]
            if coding2 is None:
                continue
            snp2_calls = [snp2.genotype(sample) for sample in offspring]
            haplos = _count_biallelic_haplotypes(snp1_calls,
                                                 snp2_calls,
                                                 return_alleles=True)
            if haplos is None:
                continue
            else:
                haplo_cnt, alleles = haplos
            alleles_in_major_haplo = {
                alleles.b: alleles.a,
                alleles.B: alleles.A
            }
            if haplo_cnt is None:
                continue

            if (coding2.A not in alleles_in_major_haplo
                    or coding2.B not in alleles_in_major_haplo):
                # The offspring alleles in snp2 do not match the alleles
                # in the parents
                continue
            # This is allele A in snp 1
            allele1A = alleles_in_major_haplo[coding2.A]
            # This is allele B in snp 1
            allele1B = alleles_in_major_haplo[coding2.B]
            voted_coding1 = AlleleCoding(allele1A, allele1B)

            recomb_rate = (haplo_cnt.aB + haplo_cnt.Ab) / sum(haplo_cnt)
            weight = 2 * (0.5 - recomb_rate) if recomb_rate < 0.5 else 0
            votes[voted_coding1] += weight
        if not votes or sum(votes.values()) == 0:
            deduced_coding1 = None
            self.log[NO_INFO] += 1
        elif len(votes) > 2:
            deduced_coding1 = None
            self.log[MORE_THAN_2_CODINGS] += 1
        else:
            deduced_coding1 = votes.most_common(1)[0][0]
            index = votes[deduced_coding1] / sum(votes.values())
            self.indexes.append(index)
            if index < self.parent_index_threshold:
                deduced_coding1 = None
                self.log[NOT_ENOUGH_SUPPORT] += 1
            else:
                self.log[ENOUGH_SUPPORT] += 1
        if deduced_coding1 is None:
            return None
        return {deduced_coding1.A: 'A', deduced_coding1.B: 'B'}

示例#5

0

显示文件

文件： ab_coding.py 项目： fw1121/ngs_crumbs

    def _deduce_coding(self, snp_and_coding, snp1_calls, snp2_idxs):
        votes = Counter()
        offspring = self.offspring
        for snp2_idx in snp2_idxs:
            snp2, coding2 = snp_and_coding[snp2_idx]
            if coding2 is None:
                continue
            snp2_calls = [snp2.genotype(sample) for sample in offspring]
            haplos = _count_biallelic_haplotypes(snp1_calls, snp2_calls,
                                                 return_alleles=True)
            if haplos is None:
                continue
            else:
                haplo_cnt, alleles = haplos
            alleles_in_major_haplo = {alleles.b: alleles.a,
                                      alleles.B: alleles.A}
            if haplo_cnt is None:
                continue

            if (coding2.A not in alleles_in_major_haplo or
               coding2.B not in alleles_in_major_haplo):
                # The offspring alleles in snp2 do not match the alleles
                # in the parents
                continue
            # This is allele A in snp 1
            allele1A = alleles_in_major_haplo[coding2.A]
            # This is allele B in snp 1
            allele1B = alleles_in_major_haplo[coding2.B]
            voted_coding1 = AlleleCoding(allele1A, allele1B)

            recomb_rate = (haplo_cnt.aB + haplo_cnt.Ab) / sum(haplo_cnt)
            weight = 2 * (0.5 - recomb_rate) if recomb_rate < 0.5 else 0
            votes[voted_coding1] += weight
        if not votes or sum(votes.values()) == 0:
            deduced_coding1 = None
            self.log[NO_INFO] += 1
        elif len(votes) > 2:
            deduced_coding1 = None
            self.log[MORE_THAN_2_CODINGS] += 1
        else:
            deduced_coding1 = votes.most_common(1)[0][0]
            index = votes[deduced_coding1] / sum(votes.values())
            self.indexes.append(index)
            if index < self.parent_index_threshold:
                deduced_coding1 = None
                self.log[NOT_ENOUGH_SUPPORT] += 1
            else:
                self.log[ENOUGH_SUPPORT] += 1
        if deduced_coding1 is None:
            return None
        return {deduced_coding1.A: 'A', deduced_coding1.B: 'B'}

示例#6

0

显示文件

    def test_count_homo_haplotypes(self):
        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)
        assert counts.AB == 3
        assert counts.ab == 2

        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7 8
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1\t1/1\t0/0
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1\t1/1\t0/0'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)
        assert counts.AB == 4
        assert counts.ab == 3

        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7 8
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1\t1/1\t0/0
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t2/2\t1/1\t0/0\t0/0\t0/1\t1/1\t0/0'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)
        assert counts.AB == 4
        assert counts.ab == 3

        r_sqr = calculate_r_sqr(snps[0], snps[1])
        self.assertAlmostEqual(r_sqr,  1)

        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7 8
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1\t1/1\t0/0
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t0/0\t1/1\t1/1\t0/0\t1/1\t0/0\t1/1'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)

        r_sqr = calculate_r_sqr(snps[0], snps[1])
        assert r_sqr - 1.0 < 0.0001

        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7 8
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1\t1/1\t0/0
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1\t1/1\t1/1'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)

        r_sqr = calculate_r_sqr(snps[0], snps[1])
        assert r_sqr - 1.0 < 0.0001

        # monomorphic
        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7 8
20\t2\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t0/0\t0/0\t0/0\t0/0\t0/0\t0/0\t0/0
20\t3\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t0/0\t0/0\t0/0\t0/0\t0/0\t0/0\t0/0'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        assert _count_biallelic_haplotypes(call1, call2) is None
        r_sqr = calculate_r_sqr(snps[0], snps[1])
        assert r_sqr is None
        assert fisher_exact(snps[0], snps[1]) is None

        # Ab and aB
        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7
20\t2\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t0/0\t0/0\t1/1\t1/1\t1/1\t0/0\t
20\t3\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t0/0\t0/0\t1/1\t1/1\t0/0\t1/1\t'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)
        assert counts.AB == 3
        assert counts.ab == 2
        assert counts.aB == 1
        assert counts.Ab == 1

        # different major allele names in snp1 (1, 2) and snp2 (2,3)
        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7
20\t2\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t0/0\t0/0\t1/1\t1/1\t1/1\t0/0\t
20\t3\t.\tG\tA\t29\tPASS\tNS=3\tGT\t3/3\t3/3\t3/3\t2/2\t2/2\t3/3\t3/3\t'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)
        assert counts.AB == 4
        assert counts.ab == 2
        assert counts.aB == 1
        assert counts.Ab == 0

        # missing data
        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7
20\t2\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t0/0\t0/0\t1/1\t1/1\t1/1\t./.\t
20\t3\t.\tG\tA\t29\tPASS\tNS=3\tGT\t3/3\t3/3\t3/3\t2/2\t2/2\t3/3\t3/3\t'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)
        assert counts.AB == 3
        assert counts.ab == 2
        assert counts.aB == 1
        assert counts.Ab == 0

示例#7

0

显示文件

文件： test_ld.py 项目： JoseBlanca/seq_crumbs

    def test_count_homo_haplotypes(self):
        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)
        assert counts.AB == 3
        assert counts.ab == 2

        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7 8
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1\t1/1\t0/0
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1\t1/1\t0/0'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)
        assert counts.AB == 4
        assert counts.ab == 3

        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7 8
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1\t1/1\t0/0
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t2/2\t1/1\t0/0\t0/0\t0/1\t1/1\t0/0'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)
        assert counts.AB == 4
        assert counts.ab == 3

        r_sqr = calculate_r_sqr(snps[0], snps[1])
        self.assertAlmostEqual(r_sqr,  1)

        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7 8
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1\t1/1\t0/0
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t0/0\t1/1\t1/1\t0/0\t1/1\t0/0\t1/1'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)

        r_sqr = calculate_r_sqr(snps[0], snps[1])
        assert r_sqr - 1.0 < 0.0001

        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7 8
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1\t1/1\t0/0
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t0/0\t0/0\t0/1\t1/1\t1/1'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)

        r_sqr = calculate_r_sqr(snps[0], snps[1])
        assert r_sqr - 1.0 < 0.0001

        # monomorphic
        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7 8
20\t2\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t0/0\t0/0\t0/0\t0/0\t0/0\t0/0\t0/0
20\t3\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t0/0\t0/0\t0/0\t0/0\t0/0\t0/0\t0/0'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        assert _count_biallelic_haplotypes(call1, call2) is None
        r_sqr = calculate_r_sqr(snps[0], snps[1])
        assert r_sqr is None
        assert fisher_exact(snps[0], snps[1]) is None

        # Ab and aB
        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7
20\t2\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t0/0\t0/0\t1/1\t1/1\t1/1\t0/0\t
20\t3\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t0/0\t0/0\t1/1\t1/1\t0/0\t1/1\t'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)
        assert counts.AB == 3
        assert counts.ab == 2
        assert counts.aB == 1
        assert counts.Ab == 1

        # different major allele names in snp1 (1, 2) and snp2 (2,3)
        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7
20\t2\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t0/0\t0/0\t1/1\t1/1\t1/1\t0/0\t
20\t3\t.\tG\tA\t29\tPASS\tNS=3\tGT\t3/3\t3/3\t3/3\t2/2\t2/2\t3/3\t3/3\t'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)
        assert counts.AB == 4
        assert counts.ab == 2
        assert counts.aB == 1
        assert counts.Ab == 0

        # missing data
        vcf = '''#CHROM POS ID REF ALT QUAL FILTER INFO FORMAT 1 2 3 4 5 6 7
20\t2\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t0/0\t0/0\t1/1\t1/1\t1/1\t./.\t
20\t3\t.\tG\tA\t29\tPASS\tNS=3\tGT\t3/3\t3/3\t3/3\t2/2\t2/2\t3/3\t3/3\t'''

        vcf = StringIO(VCF_HEADER + vcf)
        snps = list(VCFReader(vcf).parse_snvs())
        call1 = snps[0].record.samples
        call2 = snps[1].record.samples
        counts = _count_biallelic_haplotypes(call1, call2)
        assert counts.AB == 3
        assert counts.ab == 2
        assert counts.aB == 1
        assert counts.Ab == 0

示例#8

0

显示文件

    def _smooth(self, snp_idx_to_smooth, snp_gts, samples):
        snps, gt_for_snps_in_win = zip(*snp_gts)

        # we need the recomb rates
        # TODO, this is already calculated in ab coding, we have to cache
        snp1 = snps[snp_idx_to_smooth]
        snp1_calls = [snp1.genotype(sample) for sample in samples]
        weights = []
        for snp2 in snps:
            snp2_calls = [snp2.genotype(sample) for sample in samples]
            haplos = _count_biallelic_haplotypes(snp1_calls,
                                                 snp2_calls,
                                                 return_alleles=True)
            if haplos is None:
                weight = 0
            else:
                haplo_cnt, alleles = haplos
                recomb_rate = (haplo_cnt.aB + haplo_cnt.Ab) / sum(haplo_cnt)
                weight = 2 * (0.5 - recomb_rate) if recomb_rate < 0.5 else 0
            weights.append(weight)

        # we have to transpose, we want the genotype for each indi not for
        # each snp
        indis_gts = {indi: [] for indi in samples}
        for indi in samples:
            for snp_gt in gt_for_snps_in_win:
                snp_indi_gt = snp_gt.get(indi, (indi, None))
                snp_indi_gt = tuple(sorted(snp_indi_gt))
                indis_gts[indi].append(snp_indi_gt)

        # Now we can do the smoothing
        recomb_thres = self.recomb_threshold
        smooth_threhsold = self.smooth_threhsold
        smoothed_genos = []
        for indi in samples:
            indi_gt = indis_gts[indi]
            n_recombs = self._count_recombinations(indi_gt)

            counts = Counter()
            for weight, geno in zip(weights, indi_gt):
                counts[geno] += weight
            smoothed_geno, vote = counts.most_common(1)[0]
            index = vote / sum(counts.values())
            self._recombs.append(n_recombs)
            self._smoothes.append(index)
            if recomb_thres is None:
                if index > smooth_threhsold:
                    geno = smoothed_geno
                else:
                    geno = None
            else:
                if n_recombs > recomb_thres:
                    # We're assuming diploid here
                    geno = ('A', 'B')
                else:
                    if index > smooth_threhsold:
                        geno = smoothed_geno
                    else:
                        geno = None
            smoothed_genos.append(geno)
        return smoothed_genos