def compositional_effect_size(max_alpha, reps, intervals, n_species, n_diff, n_contaminants, lam, library_size, asymmetry, fold_balance, template_biom, template_sample_name, output_dir): if template_biom is not None: templ = load_table(template_biom) template = templ.data(id=template_sample_name, axis='sample') else: template = None os.mkdir(output_dir) gen = compositional_effect_size_generator(max_alpha, reps, intervals, n_species, n_diff, n_contaminants, lam, library_size=library_size, asymmetry=asymmetry, fold_balance=fold_balance, template=template) for i, g in enumerate(gen): table, groups, truth = g output_table = "%s/table.%d.biom" % (output_dir, i) output_groups = "%s/metadata.%d.txt" % (output_dir, i) output_truth = "%s/truth.%d.csv" % (output_dir, i) deposit(table, groups, truth, output_table, output_groups, output_truth)
def test_composition_effect_size_balanced(self): gen = compositional_effect_size_generator(max_alpha=1, reps=5, intervals=2, n_species=5, n_diff=1, n_contaminants=2, lam=0.1, balanced=False) table, metadata, truth = next(gen) table, metadata, truth = next(gen) exp_table = pd.DataFrame({ 'S0': [0.100000, 0.100000, 0.100000, 0.100000, 0.100000, 0.499977, 0.0000226989], 'S1': [0.100000, 0.100000, 0.100000, 0.100000, 0.100000, 0.499977, 0.0000226989], 'S2': [0.100000, 0.100000, 0.100000, 0.100000, 0.100000, 0.499977, 0.0000226989], 'S3': [0.100000, 0.100000, 0.100000, 0.100000, 0.100000, 0.499977, 0.0000226989], 'S4': [0.100000, 0.100000, 0.100000, 0.100000, 0.100000, 0.499977, 0.0000226989], 'S5': [0.003817, 0.038168, 0.038168, 0.038168, 0.381679, 0.499977, 0.0000226989], 'S6': [0.003817, 0.038168, 0.038168, 0.038168, 0.381679, 0.499977, 0.0000226989], 'S7': [0.003817, 0.038168, 0.038168, 0.038168, 0.381679, 0.499977, 0.0000226989], 'S8': [0.003817, 0.038168, 0.038168, 0.038168, 0.381679, 0.499977, 0.0000226989], 'S9': [0.003817, 0.038168, 0.038168, 0.038168, 0.381679, 0.499977, 0.0000226989]}, index=['F0', 'F1', 'F2', 'F3', 'F4', 'X0', 'X1'] ).T pdt.assert_frame_equal(table, exp_table, check_less_precise=True) exp_metadata = pd.DataFrame( {'group': [0] * 5 + [1] * 5, 'n_diff': [2] * 10, 'effect_size': [10.0] * 10, 'library_size': [10000] * 10 }, index = ['S0', 'S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9'], ) metadata = metadata.reindex_axis(sorted(metadata.columns), axis=1) exp_metadata = exp_metadata.reindex_axis(sorted(exp_metadata.columns), axis=1) pdt.assert_frame_equal(metadata, exp_metadata) exp_truth = ['F0', 'F4'] self.assertListEqual(truth, exp_truth)
def compositional_effect_size(max_alpha, reps, intervals, n_species, n_diff, n_contaminants, lam, library_size, balanced, output_dir): os.mkdir(output_dir) gen = compositional_effect_size_generator(max_alpha, reps, intervals, n_species, n_diff, n_contaminants, lam, library_size=library_size, balanced=balanced) for i, g in enumerate(gen): table, groups, truth = g output_table = "%s/table.%d.biom" % (output_dir, i) output_groups = "%s/metadata.%d.txt" % (output_dir, i) output_truth = "%s/truth.%d.csv" % (output_dir, i) deposit(table, groups, truth, output_table, output_groups, output_truth)
def test_composition_effect_template(self): # test template np.random.seed(0) gen = compositional_effect_size_generator( max_alpha=1, reps=5, intervals=2, n_species=5, n_diff=1, n_contaminants=2, lam=0.1, fold_balance=False, template=np.array([7.0, 3.0, 1.0, 1.0, 2.0, 4.0, 6.0, 1.0, 10.0])) table, metadata, truth = next(gen) table, metadata, truth = next(gen) exp_table = pd.DataFrame( np.array([ [0.227273, 0.045455, 0.022727, 0.079545, 0.125, 0.499977, 0.000023], [0.227273, 0.045455, 0.022727, 0.079545, 0.125, 0.499977, 0.000023], [0.227273, 0.045455, 0.022727, 0.079545, 0.125, 0.499977, 0.000023], [0.227273, 0.045455, 0.022727, 0.079545, 0.125, 0.499977, 0.000023], [0.227273, 0.045455, 0.022727, 0.079545, 0.125, 0.499977, 0.000023], [0.008000, 0.016000, 0.008000, 0.028000, 0.440, 0.499977, 0.000023], [0.008000, 0.016000, 0.008000, 0.028000, 0.440, 0.499977, 0.000023], [0.008000, 0.016000, 0.008000, 0.028000, 0.440, 0.499977, 0.000023], [0.008000, 0.016000, 0.008000, 0.028000, 0.440, 0.499977, 0.000023], [0.008000, 0.016000, 0.008000, 0.028000, 0.440, 0.499977, 0.000023]]), index = ['S0', 'S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9'], columns = ['F0', 'F1', 'F2', 'F3', 'F4', 'X0', 'X1'] ) npt.assert_allclose(table.values, exp_table.values, atol=1e-2, rtol=1e-2)
def test_composition_effect_size_simple(self): gen = compositional_effect_size_generator(max_alpha=1, reps=5, intervals=2, n_species=5, n_diff=1, n_contaminants=2, lam=0.1) table, metadata, truth = next(gen) exp_table = pd.DataFrame( np.vstack(( np.array([.1, .1, .1, .1, .1, 0.499977, 0.00002269]), np.array([.1, .1, .1, .1, .1, 0.499977, 0.00002269]), np.array([.1, .1, .1, .1, .1, 0.499977, 0.00002269]), np.array([.1, .1, .1, .1, .1, 0.499977, 0.00002269]), np.array([.1, .1, .1, .1, .1, 0.499977, 0.00002269]), np.array([.1, .1, .1, .1, .1, 0.499977, 0.00002269]), np.array([.1, .1, .1, .1, .1, 0.499977, 0.00002269]), np.array([.1, .1, .1, .1, .1, 0.499977, 0.00002269]), np.array([.1, .1, .1, .1, .1, 0.499977, 0.00002269]), np.array([.1, .1, .1, .1, .1, 0.499977, 0.00002269]) )), index = ['S0', 'S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9'], columns = ['F0', 'F1', 'F2', 'F3', 'F4', 'X0', 'X1'] ) pdt.assert_frame_equal(table, exp_table, check_less_precise=True) exp_metadata = pd.DataFrame( {'group': [0] * 5 + [1] * 5, 'n_diff': [2] * 10, 'effect_size': [1.0] * 10, 'library_size': [10000] * 10 }, index = ['S0', 'S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9'], ) metadata = metadata.reindex_axis(sorted(metadata.columns), axis=1) exp_metadata = exp_metadata.reindex_axis(sorted(exp_metadata.columns), axis=1) pdt.assert_frame_equal(metadata, exp_metadata) exp_truth = ['F0', 'F4'] self.assertListEqual(truth, exp_truth) # test to see if the groups are different table, metadata, truth = next(gen) exp_table = pd.DataFrame( closure( np.vstack(( np.array([0.357143] + [0.035714]*4 + [0.499977, 0.00002269]), np.array([0.357143] + [0.035714]*4 + [0.499977, 0.00002269]), np.array([0.357143] + [0.035714]*4 + [0.499977, 0.00002269]), np.array([0.357143] + [0.035714]*4 + [0.499977, 0.00002269]), np.array([0.357143] + [0.035714]*4 + [0.499977, 0.00002269]), np.array([0.035714]*4 + [0.357143] + [0.499977, 0.00002269]), np.array([0.035714]*4 + [0.357143] + [0.499977, 0.00002269]), np.array([0.035714]*4 + [0.357143] + [0.499977, 0.00002269]), np.array([0.035714]*4 + [0.357143] + [0.499977, 0.00002269]), np.array([0.035714]*4 + [0.357143] + [0.499977, 0.00002269]) ))), index = ['S0', 'S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9'], columns = ['F0', 'F1', 'F2', 'F3', 'F4', 'X0', 'X1'] ) pdt.assert_frame_equal(table, exp_table, check_less_precise=True) exp_metadata = pd.DataFrame( {'group': [0] * 5 + [1] * 5, 'n_diff': [2] * 10, 'effect_size': [10.0] * 10, 'library_size': [10000] * 10 }, index = ['S0', 'S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8', 'S9'], ) metadata = metadata.reindex_axis(sorted(metadata.columns), axis=1) exp_metadata = exp_metadata.reindex_axis(sorted(exp_metadata.columns), axis=1) pdt.assert_frame_equal(metadata, exp_metadata) exp_truth = ['F0', 'F4'] self.assertListEqual(truth, exp_truth)