Exemplo n.º 1
0
def write_map_maker(vcf_fhand, parents_a, parents_b, genos_fhand,
                    pop_type=DEF_MAPMAKER_POP, phys_map_fhand=None,
                    coder_threshold=DEF_AB_CODER_THRESHOLD,
                    smooth_threshold=None, recomb_threshold=None):
    sep = '\t'
    coder = ABCoder(vcf_fhand, parents_a, parents_b,
                    parent_index_threshold=coder_threshold,
                    smooth_threhsold=smooth_threshold,
                    recomb_threshold=recomb_threshold)

    if pop_type not in ['f2 intercross', 'f2 backcross', 'f3 self',
                        'ri self', 'ri sib']:
        msg = 'Defined pop type \'{0}\' does not match an allowed pop_type '
        msg += '(f2 intercross, f2 backcross, f3 self, ri self or ri sib)'
        raise ValueError(msg.format(pop_type))

    samples = coder.offspring
    snp_ids = []
    snp_genos = []
    coding = 'ascii'

    if phys_map_fhand is not None:
        phys_map_fhand.write('marker\tposition\tChromosome\n')

    for snp, genos in coder.recode_genotypes(samples):
        snp_id = get_or_create_id(snp).encode(coding)
        snp_ids.append(snp_id)
        geno_array = array('c')
        for geno in genos.values():
            geno_array.append(_code_to_one_letter(geno))
        snp_genos.append(geno_array)
        if phys_map_fhand is not None:
            phys_map_fhand.write(snp_id)
            phys_map_fhand.write(sep)
            phys_map_fhand.write(str(snp.POS))
            phys_map_fhand.write(sep)
            phys_map_fhand.write(snp.CHROM.encode(coding))
            phys_map_fhand.write('\n')

    genos_fhand.write('data type ' + pop_type + '\n')
    genos_fhand.write('{0} {1} 1\n'.format(len(samples), len(snp_genos)))

    for snp_id, snp_geno in zip(snp_ids, snp_genos):
        genos_fhand.write('*{0}'.format(snp_id))
        genos_fhand.write(sep)
        genos_fhand.write(sep.join(snp_geno))
        genos_fhand.write('\n')

    genos_fhand.write('\n')
    genos_fhand.write('*sample_names' + sep)
    encoded_samples = [sample.encode(coding) for sample in samples]
    genos_fhand.write(sep.join(encoded_samples))
    genos_fhand.write('\n')

    return coder
Exemplo n.º 2
0
def write_parent_checker(vcf_fhand,
                         parents_a,
                         parents_b,
                         genos_fhand,
                         phys_map_fhand=None,
                         coder_threshold=DEF_AB_CODER_THRESHOLD,
                         smooth_threshold=None,
                         recomb_threshold=None):
    sep = '\t'
    coder = ABCoder(vcf_fhand,
                    parents_a,
                    parents_b,
                    parent_index_threshold=coder_threshold,
                    smooth_threhsold=smooth_threshold,
                    recomb_threshold=recomb_threshold)
    samples = coder.offspring
    snp_ids = []
    snp_genos = []
    coding = 'ascii'

    if phys_map_fhand is not None:
        phys_map_fhand.write('marker\tposition\tChromosome\n')

    for snp, genos in coder.recode_genotypes(samples):
        snp_id = get_or_create_id(snp).encode(coding)
        snp_ids.append(snp_id)
        geno_array = array('c')
        for geno in genos.values():
            geno_array.append(_code_to_one_letter(geno))
        snp_genos.append(geno_array)
        if phys_map_fhand is not None:
            phys_map_fhand.write(snp_id)
            phys_map_fhand.write(sep)
            phys_map_fhand.write(str(snp.POS))
            phys_map_fhand.write(sep)
            phys_map_fhand.write(snp.CHROM.encode(coding))
            phys_map_fhand.write('\n')

    genos_fhand.write('ID')
    genos_fhand.write(sep)
    genos_fhand.write(sep.join(snp_ids))
    genos_fhand.write('\n')

    for sample_idx, sample in enumerate(samples):
        genos_fhand.write(sample.encode(coding))
        genos_fhand.write(sep)
        to_write = sep.join(snp_genos[snp_idx][sample_idx]
                            for snp_idx in range(len(snp_ids)))
        genos_fhand.write(to_write)
        genos_fhand.write('\n')

    return coder
Exemplo n.º 3
0
    def test_smooth(self):
        vcf = '''#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\tS2\tS3\tS4\tS5\tS6
20\t11\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t1/1\t1/1\t0/0
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t0/0
20\t15\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t0/0
20\t16\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t0/0
20\t17\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t0/0
20\t18\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t0/0\t0/0
20\t19\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t1/0
20\t20\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t1/1
20\t21\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t0/0
20\t22\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t1/1
20\t23\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t0/0
'''
        fhand = StringIO(self.VCF_HEADER + vcf)

        coder = ABCoder(fhand, parents_a=['S1'], parents_b=['S2'],
                        parent_index_threshold=0.9, smooth_threhsold=0.5,
                        window=7)
        assert coder._create_windows(8) == ((5, 11), (2, 8), (8, 14))

        result = coder.recode_genotypes(samples=coder.offspring)
        return

        expected = '''11 AA,BB,BB,AA
14 AA,BB,BB,AA
15 AA,BB,BB,AA
16 AA,BB,BB,AA
17 AA,BB,BB,AA
18 AA,BB,BB,AA
19 AA,BB,BB,..
20 AA,BB,BB,..
21 AA,BB,BB,AA
22 AA,BB,BB,..
23 AA,BB,BB,AA
'''
        assert self._ab_result_to_str(result) == expected

        fhand = StringIO(self.VCF_HEADER + vcf)
        coder = ABCoder(fhand, parents_a=['S1'], parents_b=['S2'],
                        parent_index_threshold=0.9, smooth_threhsold=0.6,
                        recomb_threshold=2, window=7)

        result = coder.recode_genotypes(samples=coder.offspring)
        expected = '''11 ..,BB,BB,AA
14 AA,BB,BB,AA
15 AA,BB,BB,AA
16 AA,BB,BB,AA
17 AA,BB,BB,AA
18 AA,BB,BB,AB
19 AA,BB,BB,AB
20 AA,BB,BB,AB
21 AA,BB,BB,AB
22 AA,BB,BB,AB
23 AA,BB,BB,AB
'''
        assert self._ab_result_to_str(result) == expected

        fhand = NamedTemporaryFile(suffix='.png')
        coder.plot_smooth_hist(fhand)
Exemplo n.º 4
0
def write_parent_checker(vcf_fhand, parents_a, parents_b, genos_fhand,
                         phys_map_fhand=None,
                         coder_threshold=DEF_AB_CODER_THRESHOLD,
                         smooth_threshold=None, recomb_threshold=None):
    sep = '\t'
    coder = ABCoder(vcf_fhand, parents_a, parents_b,
                    parent_index_threshold=coder_threshold,
                    smooth_threhsold=smooth_threshold,
                    recomb_threshold=recomb_threshold)
    samples = coder.offspring
    snp_ids = []
    snp_genos = []
    coding = 'ascii'

    if phys_map_fhand is not None:
        phys_map_fhand.write('marker\tposition\tChromosome\n')

    for snp, genos in coder.recode_genotypes(samples):
        snp_id = get_or_create_id(snp).encode(coding)
        snp_ids.append(snp_id)
        geno_array = array('c')
        for geno in genos.values():
            geno_array.append(_code_to_one_letter(geno))
        snp_genos.append(geno_array)
        if phys_map_fhand is not None:
            phys_map_fhand.write(snp_id)
            phys_map_fhand.write(sep)
            phys_map_fhand.write(str(snp.POS))
            phys_map_fhand.write(sep)
            phys_map_fhand.write(snp.CHROM.encode(coding))
            phys_map_fhand.write('\n')

    genos_fhand.write('ID')
    genos_fhand.write(sep)
    genos_fhand.write(sep.join(snp_ids))
    genos_fhand.write('\n')

    for sample_idx, sample in enumerate(samples):
        genos_fhand.write(sample.encode(coding))
        genos_fhand.write(sep)
        to_write = sep.join(snp_genos[snp_idx][sample_idx] for snp_idx in range(len(snp_ids)))
        genos_fhand.write(to_write)
        genos_fhand.write('\n')

    return coder
Exemplo n.º 5
0
    def test_ab_coding(self):
        fhand = StringIO(self.VCF_HEADER + self.vcf)

        coder = ABCoder(fhand, parents_a=['S1'], parents_b=['S2'],
                        parent_index_threshold=0.9)
        assert coder.offspring == ['S3', 'S4', 'S5', 'S6']
        try:
            list(coder.recode_genotypes())
            self.fail('RuntimeError expected')
        except RuntimeError:
            pass

        fhand = StringIO(self.VCF_HEADER + self.vcf)

        coder = ABCoder(fhand, parents_a=['S1'], parents_b=['S2'],
                        parent_index_threshold=0.9)
        result = coder.recode_genotypes(samples=coder.offspring)
        string = ''
        for snp, geno in result:
            string += str(snp.POS) + ' '
            string += ','.join(''.join(geno) for geno in geno.values())
            string += '\n'
        assert string == '''11 AA,AA,BB,BB
16 AA,AA,BB,BB
17 AA,AA,BB,BB
'''
        assert sum(coder.log.values()) == 6
        assert coder.log[NOT_ENOUGH_SUPPORT] == 2
        assert coder.log[ENOUGH_SUPPORT] == 3
        fhand = StringIO()
        coder.write_log(fhand)
        assert '6 SNPs ' in fhand.getvalue()
Exemplo n.º 6
0
    def test_ab_coding(self):
        fhand = StringIO(self.VCF_HEADER + self.vcf)

        coder = ABCoder(fhand, parents_a=["S1"], parents_b=["S2"], threshold=0.9)
        assert coder.offspring == ["S3", "S4", "S5", "S6"]
        try:
            list(coder.recode_genotypes())
            self.fail("RuntimeError expected")
        except RuntimeError:
            pass

        fhand = StringIO(self.VCF_HEADER + self.vcf)

        coder = ABCoder(fhand, parents_a=["S1"], parents_b=["S2"], threshold=0.9)
        result = coder.recode_genotypes(samples=coder.offspring)
        string = ""
        for snp, geno in result:
            string += str(snp.POS) + " "
            string += ",".join("".join(geno) for geno in geno.values())
            string += "\n"
        assert sum(coder.log.values()) == 6
        assert coder.log[NOT_ENOUGH_SUPPORT] == 2
        assert coder.log[ENOUGH_SUPPORT] == 3
        fhand = StringIO()
        coder.write_log(fhand)
        assert "6 SNPs " in fhand.getvalue()
Exemplo n.º 7
0
    def test_ab_coding(self):
        fhand = StringIO(self.VCF_HEADER + self.vcf)

        coder = ABCoder(fhand,
                        parents_a=['S1'],
                        parents_b=['S2'],
                        parent_index_threshold=0.9)
        assert coder.offspring == ['S3', 'S4', 'S5', 'S6']
        try:
            list(coder.recode_genotypes())
            self.fail('RuntimeError expected')
        except RuntimeError:
            pass

        fhand = StringIO(self.VCF_HEADER + self.vcf)

        coder = ABCoder(fhand,
                        parents_a=['S1'],
                        parents_b=['S2'],
                        parent_index_threshold=0.9)
        result = coder.recode_genotypes(samples=coder.offspring)
        string = ''
        for snp, geno in result:
            string += str(snp.POS) + ' '
            string += ','.join(''.join(geno) for geno in geno.values())
            string += '\n'
        assert string == '''11 AA,AA,BB,BB
16 AA,AA,BB,BB
17 AA,AA,BB,BB
'''
        assert sum(coder.log.values()) == 6
        assert coder.log[NOT_ENOUGH_SUPPORT] == 2
        assert coder.log[ENOUGH_SUPPORT] == 3
        fhand = StringIO()
        coder.write_log(fhand)
        assert '6 SNPs ' in fhand.getvalue()
Exemplo n.º 8
0
    def test_smooth(self):
        vcf = '''#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\tS2\tS3\tS4\tS5\tS6
20\t11\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t1/1\t1/1\t0/0
20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t0/0
20\t15\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t0/0
20\t16\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t0/0
20\t17\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t0/0
20\t18\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t0/0\t0/0
20\t19\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t1/0
20\t20\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t1/1
20\t21\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t0/0
20\t22\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t1/1
20\t23\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t0/0
'''
        fhand = StringIO(self.VCF_HEADER + vcf)

        coder = ABCoder(fhand,
                        parents_a=['S1'],
                        parents_b=['S2'],
                        parent_index_threshold=0.9,
                        smooth_threhsold=0.5,
                        window=7)
        assert coder._create_windows(8) == ((5, 11), (2, 8), (8, 14))

        result = coder.recode_genotypes(samples=coder.offspring)
        return

        expected = '''11 AA,BB,BB,AA
14 AA,BB,BB,AA
15 AA,BB,BB,AA
16 AA,BB,BB,AA
17 AA,BB,BB,AA
18 AA,BB,BB,AA
19 AA,BB,BB,..
20 AA,BB,BB,..
21 AA,BB,BB,AA
22 AA,BB,BB,..
23 AA,BB,BB,AA
'''
        assert self._ab_result_to_str(result) == expected

        fhand = StringIO(self.VCF_HEADER + vcf)
        coder = ABCoder(fhand,
                        parents_a=['S1'],
                        parents_b=['S2'],
                        parent_index_threshold=0.9,
                        smooth_threhsold=0.6,
                        recomb_threshold=2,
                        window=7)

        result = coder.recode_genotypes(samples=coder.offspring)
        expected = '''11 ..,BB,BB,AA
14 AA,BB,BB,AA
15 AA,BB,BB,AA
16 AA,BB,BB,AA
17 AA,BB,BB,AA
18 AA,BB,BB,AB
19 AA,BB,BB,AB
20 AA,BB,BB,AB
21 AA,BB,BB,AB
22 AA,BB,BB,AB
23 AA,BB,BB,AB
'''
        assert self._ab_result_to_str(result) == expected

        fhand = NamedTemporaryFile(suffix='.png')
        coder.plot_smooth_hist(fhand)