def test_calculate_hwe(self): variations = VariationsArrays() gts = numpy.array([]) variations['/calls/GT'] = gts variations['/variations/alt'] = gts result = calc_hwe_chi2_test(variations, min_num_genotypes=0, chunk_size=None) assert result.shape[0] == 0 variations = VariationsArrays() gts = numpy.array([[[0, 0], [0, 1], [0, 1], [0, 0], [0, 1], [0, 0], [0, 0], [0, 1], [1, 1], [0, 0]], [[0, 0], [1, 0], [0, 1], [0, 0], [0, 1], [0, 0], [0, 0], [1, 0], [1, 1], [0, 0]]]) variations['/calls/GT'] = gts variations._create_matrix('/variations/alt', shape=(1, 1), dtype=numpy.int16, fillvalue=0) expected = numpy.array([[1.25825397e+01, 1.85240619e-03], [1.25825397e+01, 1.85240619e-03]]) result = calc_hwe_chi2_test(variations, min_num_genotypes=0, chunk_size=None) assert numpy.allclose(result, expected) hdf5 = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r') hwe_test1 = calc_hwe_chi2_test(hdf5, chunk_size=None) hdf5 = VariationsH5(join(TEST_DATA_DIR, 'ril.hdf5'), mode='r') hwe_test2 = calc_hwe_chi2_test(hdf5) assert numpy.allclose(hwe_test1, hwe_test2, equal_nan=True)
def plot_hwe(variations, max_num_alleles, data_dir, ploidy=2, min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT, chunk_size=SNPS_PER_CHUNK): fpath = join(data_dir, 'hwe_chi2_distrib.png') fhand = open(fpath, 'w') fig = Figure(figsize=(10, 20)) canvas = FigureCanvas(fig) num_alleles = range(2, max_num_alleles + 1) gs = gridspec.GridSpec(len(num_alleles), 1) for i, num_allele in enumerate(num_alleles): df = len(list(combinations_with_replacement(range(num_allele), ploidy))) - num_allele hwe_test = calc_hwe_chi2_test(variations, num_allele=num_allele, min_num_genotypes=min_num_genotypes, chunk_size=chunk_size) hwe_chi2 = hwe_test[:, 0] hwe_chi2_distrib, bins = histogram(hwe_chi2, n_bins=50) # Plot observed distribution axes = fig.add_subplot(gs[i, 0]) title = 'Chi2 df={} statistic values distribution'.format(df) mpl_params = {'set_xlabel': {'args': ['Chi2 statistic'], 'kwargs': {}}, 'set_ylabel': {'args': ['SNP number'], 'kwargs': {}}, 'set_title': {'args': [title], 'kwargs': {}}} plot_distrib(hwe_chi2_distrib, bins, axes=axes, mpl_params=mpl_params) # Plot expected chi2 distribution axes = axes.twinx() rv = chi2(df) x = numpy.linspace(0, max(hwe_chi2), 1000) axes.plot(x, rv.pdf(x), color='b', lw=2, label='Expected Chi2') axes.set_ylabel('Expected Chi2 density') canvas.print_figure(fhand)
def plot_hwe(variations, max_num_alleles, data_dir, ploidy=2, min_num_genotypes=MIN_NUM_GENOTYPES_FOR_POP_STAT, chunk_size=SNPS_PER_CHUNK): fpath = join(data_dir, 'hwe_chi2_distrib.png') fhand = open(fpath, 'w') fig = Figure(figsize=(10, 20)) canvas = FigureCanvas(fig) num_alleles = range(2, max_num_alleles + 1) gs = gridspec.GridSpec(len(num_alleles), 1) for i, num_allele in enumerate(num_alleles): df = len(list(combinations_with_replacement(range(num_allele), ploidy))) - num_allele hwe_test = calc_hwe_chi2_test(variations, num_allele=num_allele, min_num_genotypes=min_num_genotypes, chunk_size=chunk_size) hwe_chi2 = hwe_test[:, 0] hwe_chi2_distrib, bins = histogram(hwe_chi2, n_bins=50) # Plot observed distribution axes = fig.add_subplot(gs[i, 0]) title = 'Chi2 df={} statistic values distribution'.format(df) mpl_params = { 'set_xlabel': { 'args': ['Chi2 statistic'], 'kwargs': {} }, 'set_ylabel': { 'args': ['SNP number'], 'kwargs': {} }, 'set_title': { 'args': [title], 'kwargs': {} } } plot_distrib(hwe_chi2_distrib, bins, axes=axes, mpl_params=mpl_params) # Plot expected chi2 distribution axes = axes.twinx() rv = chi2(df) x = numpy.linspace(0, max(hwe_chi2), 1000) axes.plot(x, rv.pdf(x), color='b', lw=2, label='Expected Chi2') axes.set_ylabel('Expected Chi2 density') canvas.print_figure(fhand)