예제 #1
0
    def test_calculate_hwe(self):
        variations = VariationsArrays()
        gts = numpy.array([])
        variations['/calls/GT'] = gts
        variations['/variations/alt'] = gts
        result = calc_hwe_chi2_test(variations, min_num_genotypes=0,
                                    chunk_size=None)
        assert result.shape[0] == 0

        variations = VariationsArrays()
        gts = numpy.array([[[0, 0], [0, 1], [0, 1], [0, 0], [0, 1], [0, 0],
                            [0, 0], [0, 1], [1, 1], [0, 0]],
                           [[0, 0], [1, 0], [0, 1], [0, 0], [0, 1], [0, 0],
                            [0, 0], [1, 0], [1, 1], [0, 0]]])
        variations['/calls/GT'] = gts
        variations._create_matrix('/variations/alt', shape=(1, 1),
                                  dtype=numpy.int16, fillvalue=0)
        expected = numpy.array([[1.25825397e+01, 1.85240619e-03],
                                [1.25825397e+01, 1.85240619e-03]])
        result = calc_hwe_chi2_test(variations, min_num_genotypes=0,
                                    chunk_size=None)
        assert numpy.allclose(result, expected)

        hdf5 = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r')
        hwe_test1 = calc_hwe_chi2_test(hdf5, chunk_size=None)
        hdf5 = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r')
        hwe_test2 = calc_hwe_chi2_test(hdf5)
        assert numpy.allclose(hwe_test1, hwe_test2, equal_nan=True)
    def test_calculate_hwe(self):
        variations = VariationsArrays()
        gts = numpy.array([])
        variations['/calls/GT'] = gts
        variations['/variations/alt'] = gts
        result = calc_hwe_chi2_test(variations, min_num_genotypes=0,
                                    chunk_size=None)
        assert result.shape[0] == 0

        variations = VariationsArrays()
        gts = numpy.array([[[0, 0], [0, 1], [0, 1], [0, 0], [0, 1], [0, 0],
                            [0, 0], [0, 1], [1, 1], [0, 0]],
                           [[0, 0], [1, 0], [0, 1], [0, 0], [0, 1], [0, 0],
                            [0, 0], [1, 0], [1, 1], [0, 0]]])
        variations['/calls/GT'] = gts
        variations._create_matrix('/variations/alt', shape=(1, 1),
                                  dtype=numpy.int16, fillvalue=0)
        expected = numpy.array([[1.25825397e+01, 1.85240619e-03],
                                [1.25825397e+01, 1.85240619e-03]])
        result = calc_hwe_chi2_test(variations, min_num_genotypes=0,
                                    chunk_size=None)
        assert numpy.allclose(result, expected)

        hdf5 = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r')
        hwe_test1 = calc_hwe_chi2_test(hdf5, chunk_size=None)
        hdf5 = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r')
        hwe_test2 = calc_hwe_chi2_test(hdf5)
        assert numpy.allclose(hwe_test1, hwe_test2, equal_nan=True)
예제 #3
0
def plot_hwe(variations, max_num_alleles, data_dir, ploidy=2,
             min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT,
             chunk_size=SNPS_PER_CHUNK):
    fpath = join(data_dir, 'hwe_chi2_distrib.png')
    fhand = open(fpath, 'w')
    fig = Figure(figsize=(10, 20))
    canvas = FigureCanvas(fig)
    
    num_alleles = range(2, max_num_alleles + 1)
    gs = gridspec.GridSpec(len(num_alleles), 1)
    for i, num_allele in enumerate(num_alleles):
        df = len(list(combinations_with_replacement(range(num_allele),
                                                    ploidy))) - num_allele
                                                    
        hwe_test =  calc_hwe_chi2_test(variations, num_allele=num_allele,
                                       min_num_genotypes=min_num_genotypes,
                                       chunk_size=chunk_size)
        hwe_chi2 = hwe_test[:, 0]
        hwe_chi2_distrib, bins = histogram(hwe_chi2, n_bins=50)
        
        # Plot observed distribution
        axes = fig.add_subplot(gs[i, 0])
        title = 'Chi2 df={} statistic values distribution'.format(df)
        mpl_params = {'set_xlabel': {'args': ['Chi2 statistic'], 'kwargs': {}},
                      'set_ylabel': {'args': ['SNP number'], 'kwargs': {}},
                      'set_title': {'args': [title], 'kwargs': {}}}
        plot_distrib(hwe_chi2_distrib, bins, axes=axes, mpl_params=mpl_params)
        
        # Plot expected chi2 distribution
        axes = axes.twinx()
        rv = chi2(df)
        x = numpy.linspace(0, max(hwe_chi2), 1000)
        axes.plot(x, rv.pdf(x), color='b', lw=2, label='Expected Chi2')
        axes.set_ylabel('Expected Chi2 density')
    canvas.print_figure(fhand)
예제 #4
0
def plot_hwe(variations,
             max_num_alleles,
             data_dir,
             ploidy=2,
             min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT,
             chunk_size=SNPS_PER_CHUNK):
    fpath = join(data_dir, 'hwe_chi2_distrib.png')
    fhand = open(fpath, 'w')
    fig = Figure(figsize=(10, 20))
    canvas = FigureCanvas(fig)

    num_alleles = range(2, max_num_alleles + 1)
    gs = gridspec.GridSpec(len(num_alleles), 1)
    for i, num_allele in enumerate(num_alleles):
        df = len(list(combinations_with_replacement(range(num_allele),
                                                    ploidy))) - num_allele

        hwe_test = calc_hwe_chi2_test(variations,
                                      num_allele=num_allele,
                                      min_num_genotypes=min_num_genotypes,
                                      chunk_size=chunk_size)
        hwe_chi2 = hwe_test[:, 0]
        hwe_chi2_distrib, bins = histogram(hwe_chi2, n_bins=50)

        # Plot observed distribution
        axes = fig.add_subplot(gs[i, 0])
        title = 'Chi2 df={} statistic values distribution'.format(df)
        mpl_params = {
            'set_xlabel': {
                'args': ['Chi2 statistic'],
                'kwargs': {}
            },
            'set_ylabel': {
                'args': ['SNP number'],
                'kwargs': {}
            },
            'set_title': {
                'args': [title],
                'kwargs': {}
            }
        }
        plot_distrib(hwe_chi2_distrib, bins, axes=axes, mpl_params=mpl_params)

        # Plot expected chi2 distribution
        axes = axes.twinx()
        rv = chi2(df)
        x = numpy.linspace(0, max(hwe_chi2), 1000)
        axes.plot(x, rv.pdf(x), color='b', lw=2, label='Expected Chi2')
        axes.set_ylabel('Expected Chi2 density')
    canvas.print_figure(fhand)