def testFuncBar(self): infile = testfile('eggnog_out.tab') tabfile = testfile('viz_f_filt.tab') # bp run_viz('bar', self.img, infile, mode='f', nterms='2', meancol='NS_mean', target_onto='bp', barcol='1', tabfile=tabfile) df = pd.read_csv(tabfile, sep='\t') namespace = df.namespace.unique() self.assertEqual(namespace, ['biological_process']) # cc run_viz('bar', self.img, infile, mode='f', nterms='2', meancol='NS_mean', target_onto='cc', barcol='1', tabfile=tabfile) df = pd.read_csv(tabfile, sep='\t') namespace = df.namespace.unique() self.assertEqual(namespace, ['cellular_component']) # mf run_viz('bar', self.img, infile, mode='f', nterms='2', meancol='NS_mean', target_onto='mf', barcol='1', tabfile=tabfile) df = pd.read_csv(tabfile, sep='\t') namespace = df.namespace.unique() self.assertEqual(namespace, ['molecular_function'])
def testMultCols(self): tax=testfile('multiple_tax.tab') int=testfile('multiple_int.tab') tax_df = expand.expand('t', sinfo='{"s1": ["int1", "int2", "int3"]}', int_file=int, pep_colname_int='peptide', pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, tax_file=tax, tax_colname='lca') self.assertEqual(tax_df.query("rank == 'phylum' and taxon_name == 'Proteobacteria'")['int3'].values[0], np.log2(70))
def testFtDist(self): infile = testfile('ft_out.tab') run_viz('ft_dist', self.img, infile, meancol="s1_mean", whichway='t_dist', id="GO:0008150", target_rank="genus", nterms="all") # f dist run_viz('ft_dist', self.img, infile, meancol="s1_mean", whichway='f_dist', target_onto="bp", nterms="all", id=209) # test tabfile tabfile = testfile("tmp") run_viz('ft_dist', self.img, infile, meancol="s1_mean", whichway='t_dist', id="GO:0008150", target_rank="genus", nterms="all", tabfile=tabfile) self.assertTrue(os.path.exists(tabfile)) os.remove(tabfile)
def testSingleBasic(self): tax = testfile('simple_tax.tab') int = testfile('simple_int.tab') tax_df = expand.expand('t', sinfo='{"s1": ["int"]}', int_file=int, pep_colname_int='peptide', pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, tax_file=tax, tax_colname='lca') self.assertEqual(tax_df.query("taxon_name == 'Helicobacter pylori'")['int'].values, np.log2(100))
def testDA(self): func = testfile('multiple_func.tab') int = testfile('int_ttest.tab') expanded = testfile('go_expanded_ttest.tab') test_write = testfile('go_tested.tab') df_expd = expand.expand('f', sinfo=TTEST_SINFO, int_file=int, pep_colname_int='peptide', pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, outfile=expanded, func_file=func, func_colname='go', ontology='go') df_tst = stat.stat(expanded, sinfo=TTEST_SINFO, paired=False, parametric=True, ontology='go', mode='f', outfile=test_write) # make sure false is > 0.05 and trues are less than 0.05 self.assertTrue(df_tst['p']['GO:0008152'] > 0.05) self.assertTrue(df_tst['p'][['GO:0022610', 'GO:0000003', 'GO:0032505']].le(0.05).all())
def testReadAndDE(self): func = testfile('multiple_func.tab') int = testfile('int_ttest.tab') '''intensity: peptide int1 int2 int3 int4 int5 int6 A 12 20 15 12 21 10 B 20 30 20 3500 2000 3000 C 1000 1200 900 12 13 10 ''' # todo - add test for non-slim tax = testfile('multiple_tax.tab') ft_out = testfile('ft_out.tab') ft_df = expand.expand(mode='ft', sinfo=tu.TTEST_SINFO, int_file=int, pep_colname_int='peptide', pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, outfile=ft_out, func_file=func, func_colname='go', ontology='go', slim_down=True, tax_file=tax, tax_colname='lca') # make sure calculated mean is accurate # b and c both map to 8150 exp_s1_mean = np.log2(((20 + 1000) + (1200 + 30) + (900 + 20)) / 3) obtained_mean = ft_df.loc[(ft_df['taxon_name'] == 'Clostridioides') & (ft_df['go'] == 'GO:0008150'), 's1_mean'][0] self.assertEqual(exp_s1_mean, obtained_mean)
def testTaxTTests(self): tax = testfile('multiple_tax.tab') int = testfile('int_ttest.tab') expanded = testfile('expand_taxttest.tab') tax_df = expand.expand('t', sinfo=TTEST_SINFO, int_file=int, pep_colname_int='peptide', pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, outfile=expanded, tax_file=tax, tax_colname='lca') tax_tst = stat.stat(expanded, sinfo=TTEST_SINFO, paired=False, parametric=False, ontology=None, mode=None, outfile=None) # make sure false is > 0.05 and trues are less than 0.05 self.assertTrue(tax_tst['p'][210] > 0.05) self.assertTrue(tax_tst['p'][[1496, 1870884]].le(0.05).all()) # also, make sure firmicutes phylum is sum of c difficile and clostridiaceae self.assertEqual(tax_tst['int1'][1239], np.log2(1020))
def testDiffAbundEc(self): func = testfile('multiple_func.tab') int = testfile('int_ttest.tab') expandfile = testfile('ec_ttest.tab') tested_file = testfile('ec_ttest_tested.tab') expand.expand('f', sinfo=TTEST_SINFO, int_file=int, pep_colname_int='peptide', pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, outfile=expandfile, func_file=func, func_colname='ec', ontology='ec') ec_tst = stat.stat(expandfile, sinfo=TTEST_SINFO, paired=False, parametric=True, ontology='ec', mode='f', outfile=tested_file) # make sure false is > 0.05 and trues are less than 0.05 self.assertTrue(ec_tst['p']['3.4.11.-'] > 0.05) self.assertTrue(ec_tst['p'][['3.4.21.70', '1.2.-.-']].le(0.05).all())
def testSimpleEc(self): func=testfile('simple_ec.tab') int=testfile('simple_int.tab') ec_df = expand.expand('f', sinfo='{"s1": ["int"]}', int_file=int, pep_colname_int='peptide', pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, func_file=func, func_colname='ec', ontology='ec') self.assertEqual(ec_df.loc["3.4.11.-"]['int'], np.log2(100)) self.assertEqual(ec_df.loc["3.4.-.-"]['int'], np.log2(300))
def testCog(self): func=testfile('multiple_func.tab') int=testfile('multiple_int.tab') cog_df = expand.expand('f', sinfo='{"s1": ["int1", "int2", "int3"]}', int_file=int, pep_colname_int='peptide', pep_colname_func='peptide', pep_colname_tax='peptide', func_file=func, func_colname='cog', ontology='cog') self.assertEqual(cog_df.loc["C"]['s1_mean'], np.log2((10+20+70)/3)) self.assertEqual(cog_df.loc["N"]['int2'], np.log2(30))
def testSingleInt(self): func=testfile('simple_func.tab') int=testfile('simple_int.tab') go_df = expand.expand('f', sinfo='{"s1": ["int"]}', int_file=int, pep_colname_int='peptide', pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, func_file=func, func_colname='go', ontology='go') self.assertEqual(go_df.loc["GO:0022610"]['int'], np.log2(200)) self.assertEqual(go_df.loc["GO:0008152"]['int'], np.log2(100))
def testDifferentNames(self): tax = testfile('ft_tax.tab') func = testfile('ft_func.tab') int = testfile('ft_int.tab') ft = expand.expand('ft', sinfo='{"A": ["int"]}', int_file=int, pep_colname_int='Sequence', pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, tax_file=tax, tax_colname='lca', func_file=func, func_colname="go") self.assertIn("A_mean", list(ft))
def testWrite(self): tax = testfile('simple_tax.tab') int = testfile('simple_int.tab') out = testfile('taxonomy_write_simple.tab') df = expand.expand(mode='t', sinfo='{"samp1": ["int"]}', int_file=int, pep_colname_int='peptide', pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, outfile=out, tax_file=tax, tax_colname='lca') written = pd.read_table(out) self.assertAlmostEqual(written.query("taxon_name == 'Clostridioides difficile'")['samp1_mean'].values[0], np.log2(200))
def testPCABig(self): infile = testfile('tax_filt_out.tab') sampfile = testfile('rudney_samples.tab') imgfile = testfile('cli_pca_viz.png') cmd = ' '.join([ 'python3 metaquantome/cli.py viz -m t --plottype pca', '--infile', infile, '--img', imgfile, "--samps", sampfile, '--calculate_sep' ]) test_status = subprocess.call(cmd, shell=True) self.assertEqual(test_status, 0)
def testMultipleEc(self): func=testfile('multiple_func.tab') int=testfile('multiple_int.tab') ec_df = expand.expand('f', sinfo='{"s1": ["int1", "int2", "int3"]}', int_file=int, pep_colname_int='peptide', pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, func_file=func, func_colname='ec', ontology='ec') self.assertEqual(ec_df.loc['3.4.-.-']['int1'], np.log2(50)) self.assertEqual(ec_df.loc['1.2.-.-']['int2'], np.log2(50)) # missing values (zeros, nans, NA's, etc) are turned into NaN's self.assertTrue(np.isnan(ec_df.loc['1.2.-.-']['int3']))
def testViz(self): infile = testfile('taxonomy_write_simple.tab') imgfile = testfile('cli_bar_viz.png') cmd = ' '.join([ 'python3 metaquantome/cli.py viz -m t --plottype bar --infile', infile, '--img', imgfile, """--samps '{"samp1": ["int"]}'""", '--nterms 2 --meancol samp1_mean --target_rank genus' ]) test_status = subprocess.call(cmd, shell=True) self.assertEqual(test_status, 0)
def testMultipleInt(self): func = testfile('multiple_func.tab') int = testfile('multiple_int.tab') go_df = expand.expand('f', sinfo='{"s1": ["int1", "int2", "int3"]}', int_file=int, pep_colname_int='peptide', pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, func_file=func, func_colname='go', ontology='go') self.assertEqual(go_df.loc['GO:0008152']['int1'], np.log2(10)) self.assertEqual(go_df.loc['GO:0022610']['int2'], np.log2(30)) # missing values (zeros, nans, NA's, etc) are turned into NaN's self.assertTrue(np.isnan(go_df.loc['GO:0000003']['int3'])) return go_df
def testBasicTaxBar(self): infile = testfile('taxonomy_write_simple.tab') tabfile = testfile('taxonomy_plot_out.tab') run_viz('bar', self.img, infile, mode='t', nterms='2', meancol='samp1_mean', target_rank="genus", barcol="6", tabfile=tabfile) self.assertTrue(os.path.exists(tabfile)) os.remove(tabfile) run_viz('bar', self.img, infile, mode='t', nterms='2', meancol='samp1_mean', target_rank="genus", barcol="6")
def testFuncBar(self): infile = testfile('eggnog_out.tab') imgfile = testfile('test_eggnog_viz.png') samps = testfile('rudney_samples.tab') tabfile = testfile("eggnog_viz_file.tab") cmd = ' '.join([ 'python3 metaquantome/cli.py viz -m f --plottype bar ' '--infile', infile, '--img', imgfile, '--samps', samps, '--nterms 20 --meancol NS_mean --target_onto bp', '--tabfile', tabfile ]) test_status = subprocess.call(cmd, shell=True) self.assertEqual(test_status, 0)
def testCogTTest(self): func = testfile('multiple_func.tab') int = testfile('int_ttest.tab') expandfile = testfile('cog_ttest.tab') cog_df = expand.expand('f', sinfo=TTEST_SINFO, int_file=int, pep_colname_int='peptide', pep_colname_func='peptide', pep_colname_tax='peptide', outfile=expandfile, func_file=func, func_colname='cog', ontology='cog') cog_tst = stat.stat(expandfile, sinfo=TTEST_SINFO, paired=False, parametric=True, ontology='cog', mode='f', control_group='s2', outfile=None) # make sure false is > 0.05 and trues are less than 0.05 cog_tst.set_index('id', inplace=True) self.assertTrue(cog_tst['p_s1_over_s2']['C'] > 0.05) self.assertTrue(cog_tst['p_s1_over_s2'][['N', 'D']].le(0.05).all())
def testSlimDown(self): func=testfile('func_eggnog.tab') int=testfile('int_eggnog.tab') outfile=testfile('eggnog_out.tab') sinfo='{"NS": ["int737NS", "int852NS", "int867NS"], "WS": ["int737WS", "int852WS", "int867WS"]}' go_df = expand.expand('f', sinfo=sinfo, int_file=int, pep_colname_int='peptide', pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, func_file=func, func_colname='go', ontology='go', slim_down=True, outfile=outfile) # test that all go terms are in slim # load slim returned_gos = set(go_df['id']) # potential of unknown, so just drop that returned_gos.discard('unknown') self.assertTrue(returned_gos.issubset(self.db.goslim.keys()))
def testGOVolcano(self): infile = testfile('go_tested.tab') run_viz('volcano', self.img, infile, textannot="id", fc_name="log2fc_s1_over_s2", fc_corr_p="corrected_p_s1_over_s2", gosplit=True)
def testVolcano(self): infile = testfile('cli_mult_test_out.tab') run_viz('volcano', self.img, infile, textannot="id", fc_corr_p="corrected_p_s1_over_s2", fc_name="log2fc_s1_over_s2") # test tabfile tabfile = testfile('taxonomy_plot_out.tab') run_viz('volcano', self.img, infile, textannot="id", fc_name="log2fc_s1_over_s2", fc_corr_p="corrected_p_s1_over_s2", tabfile=tabfile) self.assertTrue(os.path.exists(tabfile)) os.remove(tabfile)
def testPCA(self): infile = testfile('go_tested.tab') run_viz('pca', self.img, infile, sinfo=TTEST_SINFO, calculate_sep=False)
def testNopep(self): nopep=testfile('nopep.tab') tax_df = expand.expand('t', sinfo='{"s1": ["int1", "int2", "int3"]}', int_file=None, pep_colname_int='peptide', pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, tax_colname='lca', nopep=True, nopep_file=nopep) self.assertEqual(tax_df.query("rank == 'phylum' and taxon_name == 'Proteobacteria'")['int3'].values[0], np.log2(70))
def testHeatmapViz(self): infile = testfile('ec_ttest_tested.tab') imgfile = testfile('cli_heatmap_viz.png') cmd = ' '.join([ 'python3 metaquantome/cli.py viz -m f --ontology ec --plottype heatmap', '--infile', infile, '--img', imgfile, "--samps '", TTEST_SINFO, "'", '--filter_to_sig', '--alpha 0.5' ]) test_status = subprocess.call(cmd, shell=True) self.assertEqual(test_status, 0) cmd2 = ' '.join([ 'python3 metaquantome/cli.py viz -m f --ontology ec --plottype heatmap', '--infile', infile, '--img', imgfile, "--samps '", TTEST_SINFO, "'" ]) test_status2 = subprocess.call(cmd2, shell=True) self.assertEqual(test_status2, 0)
def testParentIntensityHigher(self): """ make sure that parents always have higher intensity than children """ tax=testfile('test_root_sum_uni.tab') int=testfile('test_root_sum_int.tab') tax_df = expand.expand('t', sinfo='{"A": ["int"]}', int_file=int, pep_colname_int='peptide', pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, tax_file=tax, tax_colname='taxon_id') # filter to phylum and below tax_df_filt = tax_df[(tax_df["rank"] != 'no rank') & (tax_df["rank"] != 'superkingdom')] # firmicutes phylum should be highest ints = tax_df_filt['int'] self.assertEqual(ints.max(), ints[1239]) # strep genus intensity should be greater than or equal to that of strep species self.assertGreaterEqual(ints[1301], ints[1302]) self.assertGreaterEqual(ints[1301], ints[1305])
def testFilter(self): intfile = testfile('filt_int.tab') taxfile = testfile('multiple_tax.tab') expandfile = testfile('expand_out.tab') expanded = expand('t', TTEST_SINFO, int_file=intfile, pep_colname_int='peptide', pep_colname_func='peptide', pep_colname_tax='peptide', data_dir=TEST_DIR, outfile=expandfile, tax_file=taxfile, tax_colname='lca') exp_ids = set(expanded['id']) # no filtering nofilt = run_filter(expandfile, TTEST_SINFO, ontology=None, mode="t", qthreshold=0, min_child_non_leaf=0, min_child_nsamp=0, min_peptides=0, min_pep_nsamp=0) nofilt_ids = set(nofilt['id']) # make sure that ids are the same when no filtering is done self.assertSetEqual(nofilt_ids, exp_ids) # now, require 3 intensities per group. we shouldn't see 1496 or 1870884 filt3 = run_filter(expandfile, TTEST_SINFO, ontology=None, mode="t", qthreshold=3, min_child_non_leaf=0, min_child_nsamp=0, min_peptides=0, min_pep_nsamp=0) filt3_ids = set(filt3['id']) self.assertNotIn(1496, filt3_ids) self.assertNotIn(1870884, filt3_ids)
def testVizTabfile(self): infile = testfile('taxonomy_write_simple.tab') imgfile = testfile('cli_bar_viz2.png') tabfile = testfile("tmp") cmd = ' '.join([ 'python3 metaquantome/cli.py viz -m t --plottype bar --infile', infile, '--img', imgfile, """--samps '{"samp1": ["int"]}'""", '--nterms 2 --meancol samp1_mean --target_rank genus', '--tabfile', tabfile, ]) test_status = subprocess.call(cmd, shell=True) self.assertEqual(test_status, 0) nline = subprocess.run(['wc', '-l', tabfile], stdout=subprocess.PIPE) self.assertEqual(b'3', nline.stdout.strip().split()[0]) os.remove(tabfile)
def testMultipleInt(self): exp_out = testfile('cli_mult_out.tab') exp_command = '''python3 metaquantome/cli.py expand -m f --pep_colname_int peptide --pep_colname_func peptide ''' +\ '''--outfile ''' + exp_out exp_command += ''' -i metaquantome/data/test/int_ttest.tab --func_file metaquantome/data/test/multiple_func.tab ''' exp_command += ''' --func_colname cog --ontology cog ''' + " --samps '" + TTEST_SINFO + "' " exp_command += '''--data_dir ''' + TEST_DIR exp_status = subprocess.call(exp_command, shell=True) self.assertEqual(exp_status, 0) test_out = testfile('cli_mult_test_out.tab') test_command = "python3 metaquantome/cli.py stat -m f --outfile " + test_out + ' --file ' + exp_out test_command += ''' --ontology cog ''' + " --samps '" + TTEST_SINFO + "'" + ' --parametric True ' test_status = subprocess.call(test_command, shell=True) self.assertEqual(test_status, 0) test_df = pd.read_csv(test_out, sep="\t", index_col='id') # make sure false is > 0.05 and trues are less than 0.05 self.assertTrue(test_df['corrected_p']['C'] > 0.05) self.assertTrue(test_df['corrected_p'][['N', 'D']].le(0.05).all())