Exemplo n.º 1
0
def compositional_effect_size(max_alpha, reps, intervals, n_species, n_diff,
                              n_contaminants, lam, library_size, asymmetry,
                              fold_balance, template_biom,
                              template_sample_name, output_dir):
    if template_biom is not None:
        templ = load_table(template_biom)
        template = templ.data(id=template_sample_name, axis='sample')
    else:
        template = None

    os.mkdir(output_dir)
    gen = compositional_effect_size_generator(max_alpha,
                                              reps,
                                              intervals,
                                              n_species,
                                              n_diff,
                                              n_contaminants,
                                              lam,
                                              library_size=library_size,
                                              asymmetry=asymmetry,
                                              fold_balance=fold_balance,
                                              template=template)

    for i, g in enumerate(gen):
        table, groups, truth = g
        output_table = "%s/table.%d.biom" % (output_dir, i)
        output_groups = "%s/metadata.%d.txt" % (output_dir, i)
        output_truth = "%s/truth.%d.csv" % (output_dir, i)
        deposit(table, groups, truth, output_table, output_groups,
                output_truth)
Exemplo n.º 2
0
    def test_composition_effect_size_balanced(self):

        gen = compositional_effect_size_generator(max_alpha=1, reps=5,
                                                  intervals=2, n_species=5, n_diff=1,
                                                  n_contaminants=2, lam=0.1,
                                                  balanced=False)
        table, metadata, truth = next(gen)
        table, metadata, truth = next(gen)

        exp_table = pd.DataFrame({
            'S0': [0.100000, 0.100000, 0.100000, 0.100000, 0.100000,
                   0.499977, 0.0000226989],
            'S1': [0.100000, 0.100000, 0.100000, 0.100000, 0.100000,
                   0.499977, 0.0000226989],
            'S2': [0.100000, 0.100000, 0.100000, 0.100000, 0.100000,
                   0.499977, 0.0000226989],
            'S3': [0.100000, 0.100000, 0.100000, 0.100000, 0.100000,
                   0.499977, 0.0000226989],
            'S4': [0.100000, 0.100000, 0.100000, 0.100000, 0.100000,
                   0.499977, 0.0000226989],
            'S5': [0.003817, 0.038168, 0.038168, 0.038168, 0.381679,
                   0.499977, 0.0000226989],
            'S6': [0.003817, 0.038168, 0.038168, 0.038168, 0.381679,
                   0.499977, 0.0000226989],
            'S7': [0.003817, 0.038168, 0.038168, 0.038168, 0.381679,
                   0.499977, 0.0000226989],
            'S8': [0.003817, 0.038168, 0.038168, 0.038168, 0.381679,
                   0.499977, 0.0000226989],
            'S9': [0.003817, 0.038168, 0.038168, 0.038168, 0.381679,
                   0.499977, 0.0000226989]},
            index=['F0', 'F1', 'F2', 'F3', 'F4', 'X0', 'X1']
        ).T

        pdt.assert_frame_equal(table, exp_table, check_less_precise=True)
        exp_metadata = pd.DataFrame(
            {'group': [0] * 5 + [1] * 5,
             'n_diff': [2] * 10,
             'effect_size': [10.0] * 10,
             'library_size': [10000] * 10
            },
            index = ['S0', 'S1', 'S2', 'S3', 'S4',
                     'S5', 'S6', 'S7', 'S8', 'S9'],
        )
        metadata = metadata.reindex_axis(sorted(metadata.columns), axis=1)
        exp_metadata = exp_metadata.reindex_axis(sorted(exp_metadata.columns), axis=1)
        pdt.assert_frame_equal(metadata, exp_metadata)

        exp_truth = ['F0', 'F4']
        self.assertListEqual(truth, exp_truth)
Exemplo n.º 3
0
def compositional_effect_size(max_alpha, reps, intervals, n_species, n_diff,
                              n_contaminants, lam, library_size, balanced,
                              output_dir):
    os.mkdir(output_dir)
    gen = compositional_effect_size_generator(max_alpha,
                                              reps,
                                              intervals,
                                              n_species,
                                              n_diff,
                                              n_contaminants,
                                              lam,
                                              library_size=library_size,
                                              balanced=balanced)
    for i, g in enumerate(gen):
        table, groups, truth = g
        output_table = "%s/table.%d.biom" % (output_dir, i)
        output_groups = "%s/metadata.%d.txt" % (output_dir, i)
        output_truth = "%s/truth.%d.csv" % (output_dir, i)
        deposit(table, groups, truth, output_table, output_groups,
                output_truth)
Exemplo n.º 4
0
 def test_composition_effect_template(self):
     # test template
     np.random.seed(0)
     gen = compositional_effect_size_generator(
         max_alpha=1, reps=5,
         intervals=2, n_species=5, n_diff=1,
         n_contaminants=2, lam=0.1,
         fold_balance=False,
         template=np.array([7.0, 3.0, 1.0, 1.0, 2.0, 4.0, 6.0, 1.0, 10.0]))
     table, metadata, truth = next(gen)
     table, metadata, truth = next(gen)
     exp_table = pd.DataFrame(
         np.array([
             [0.227273, 0.045455, 0.022727, 0.079545,
              0.125, 0.499977, 0.000023],
             [0.227273, 0.045455, 0.022727, 0.079545,
              0.125, 0.499977, 0.000023],
             [0.227273, 0.045455, 0.022727, 0.079545,
              0.125, 0.499977, 0.000023],
             [0.227273, 0.045455, 0.022727, 0.079545,
              0.125, 0.499977, 0.000023],
             [0.227273, 0.045455, 0.022727, 0.079545,
              0.125, 0.499977, 0.000023],
             [0.008000, 0.016000, 0.008000, 0.028000,
              0.440, 0.499977, 0.000023],
             [0.008000, 0.016000, 0.008000, 0.028000,
              0.440, 0.499977, 0.000023],
             [0.008000, 0.016000, 0.008000, 0.028000,
              0.440, 0.499977, 0.000023],
             [0.008000, 0.016000, 0.008000, 0.028000,
              0.440, 0.499977, 0.000023],
             [0.008000, 0.016000, 0.008000, 0.028000,
              0.440, 0.499977, 0.000023]]),
         index = ['S0', 'S1', 'S2', 'S3', 'S4',
                  'S5', 'S6', 'S7', 'S8', 'S9'],
         columns = ['F0', 'F1', 'F2', 'F3', 'F4', 'X0', 'X1']
     )
     npt.assert_allclose(table.values, exp_table.values,
                        atol=1e-2, rtol=1e-2)
Exemplo n.º 5
0
    def test_composition_effect_size_simple(self):

        gen = compositional_effect_size_generator(max_alpha=1, reps=5,
                                                  intervals=2, n_species=5, n_diff=1,
                                                  n_contaminants=2, lam=0.1)
        table, metadata, truth = next(gen)

        exp_table = pd.DataFrame(
            np.vstack((
                np.array([.1, .1, .1, .1, .1, 0.499977, 0.00002269]),
                np.array([.1, .1, .1, .1, .1, 0.499977, 0.00002269]),
                np.array([.1, .1, .1, .1, .1, 0.499977, 0.00002269]),
                np.array([.1, .1, .1, .1, .1, 0.499977, 0.00002269]),
                np.array([.1, .1, .1, .1, .1, 0.499977, 0.00002269]),
                np.array([.1, .1, .1, .1, .1, 0.499977, 0.00002269]),
                np.array([.1, .1, .1, .1, .1, 0.499977, 0.00002269]),
                np.array([.1, .1, .1, .1, .1, 0.499977, 0.00002269]),
                np.array([.1, .1, .1, .1, .1, 0.499977, 0.00002269]),
                np.array([.1, .1, .1, .1, .1, 0.499977, 0.00002269])
            )),
            index = ['S0', 'S1', 'S2', 'S3', 'S4',
                     'S5', 'S6', 'S7', 'S8', 'S9'],
            columns = ['F0', 'F1', 'F2', 'F3', 'F4', 'X0', 'X1']
        )
        pdt.assert_frame_equal(table, exp_table, check_less_precise=True)
        exp_metadata = pd.DataFrame(
            {'group': [0] * 5 + [1] * 5,
             'n_diff': [2] * 10,
             'effect_size': [1.0] * 10,
             'library_size': [10000] * 10
            },
            index = ['S0', 'S1', 'S2', 'S3', 'S4',
                     'S5', 'S6', 'S7', 'S8', 'S9'],
        )
        metadata = metadata.reindex_axis(sorted(metadata.columns), axis=1)
        exp_metadata = exp_metadata.reindex_axis(sorted(exp_metadata.columns), axis=1)
        pdt.assert_frame_equal(metadata, exp_metadata)

        exp_truth = ['F0', 'F4']
        self.assertListEqual(truth, exp_truth)

        # test to see if the groups are different
        table, metadata, truth = next(gen)

        exp_table = pd.DataFrame(
            closure(
                np.vstack((
                    np.array([0.357143] + [0.035714]*4 +
                             [0.499977, 0.00002269]),
                    np.array([0.357143] + [0.035714]*4 +
                             [0.499977, 0.00002269]),
                    np.array([0.357143] + [0.035714]*4 +
                             [0.499977, 0.00002269]),
                    np.array([0.357143] + [0.035714]*4 +
                             [0.499977, 0.00002269]),
                    np.array([0.357143] + [0.035714]*4 +
                             [0.499977, 0.00002269]),
                    np.array([0.035714]*4 + [0.357143] +
                             [0.499977, 0.00002269]),
                    np.array([0.035714]*4 + [0.357143] +
                             [0.499977, 0.00002269]),
                    np.array([0.035714]*4 + [0.357143] +
                             [0.499977, 0.00002269]),
                    np.array([0.035714]*4 + [0.357143] +
                             [0.499977, 0.00002269]),
                    np.array([0.035714]*4 + [0.357143] +
                             [0.499977, 0.00002269])
                ))),
            index = ['S0', 'S1', 'S2', 'S3', 'S4',
                     'S5', 'S6', 'S7', 'S8', 'S9'],
            columns = ['F0', 'F1', 'F2', 'F3', 'F4', 'X0', 'X1']
        )

        pdt.assert_frame_equal(table, exp_table, check_less_precise=True)

        exp_metadata = pd.DataFrame(
            {'group': [0] * 5 + [1] * 5,
             'n_diff': [2] * 10,
             'effect_size': [10.0] * 10,
             'library_size': [10000] * 10
            },
            index = ['S0', 'S1', 'S2', 'S3', 'S4',
                     'S5', 'S6', 'S7', 'S8', 'S9'],
        )
        metadata = metadata.reindex_axis(sorted(metadata.columns), axis=1)
        exp_metadata = exp_metadata.reindex_axis(sorted(exp_metadata.columns), axis=1)

        pdt.assert_frame_equal(metadata, exp_metadata)

        exp_truth = ['F0', 'F4']
        self.assertListEqual(truth, exp_truth)