def test_simple(self): eigvals = [0.51236726, 0.30071909, 0.26791207, 0.20898868, 0.19169895, 0.16054235, 0.15017696, 0.12245775, 0.0] proportion_explained = [0.2675738328, 0.157044696, 0.1399118638, 0.1091402725, 0.1001110485, 0.0838401162, 0.0784269939, 0.0639511764, 0.0] sample_ids = ['PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593', 'PC.355', 'PC.607', 'PC.634'] axis_labels = ['PC%d' % i for i in range(1, 10)] expected_results = OrdinationResults( short_method_name='PCoA', long_method_name='Principal Coordinate Analysis', eigvals=pd.Series(eigvals, index=axis_labels), samples=pd.DataFrame( np.loadtxt(get_data_path('exp_PCoAEigenResults_site')), index=sample_ids, columns=axis_labels), proportion_explained=pd.Series(proportion_explained, index=axis_labels)) dm = DistanceMatrix.read(get_data_path('PCoA_sample_data_3')) results = pcoa(dm) assert_ordination_results_equal(results, expected_results, ignore_directionality=True)
def test_scaling1(self): eigvals = pd.Series(np.array([0.09613302, 0.04094181]), self.pc_ids) # p. 458 features = pd.DataFrame( np.array([ [1.31871, -0.34374], # V [-0.37215, 1.48150], [-0.99972, -0.92612] ]), self.feature_ids, self.pc_ids) samples = pd.DataFrame( np.array([ [-0.26322, -0.17862], # F [-0.06835, 0.27211], [0.51685, -0.09517] ]), self.sample_ids, self.pc_ids) exp = OrdinationResults('CA', 'Correspondance Analysis', eigvals=eigvals, features=features, samples=samples) scores = ca(self.contingency, 1) assert_ordination_results_equal(exp, scores, decimal=5, ignore_directionality=True)
def test_simple(self): eigvals = [ 0.51236726, 0.30071909, 0.26791207, 0.20898868, 0.19169895, 0.16054235, 0.15017696, 0.12245775, 0.0 ] proportion_explained = [ 0.2675738328, 0.157044696, 0.1399118638, 0.1091402725, 0.1001110485, 0.0838401162, 0.0784269939, 0.0639511764, 0.0 ] sample_ids = [ 'PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593', 'PC.355', 'PC.607', 'PC.634' ] axis_labels = ['PC%d' % i for i in range(1, 10)] expected_results = OrdinationResults( short_method_name='PCoA', long_method_name='Principal Coordinate Analysis', eigvals=pd.Series(eigvals, index=axis_labels), samples=pd.DataFrame(np.loadtxt( get_data_path('exp_PCoAEigenResults_site')), index=sample_ids, columns=axis_labels), proportion_explained=pd.Series(proportion_explained, index=axis_labels)) dm = DistanceMatrix.read(get_data_path('PCoA_sample_data_3')) results = pcoa(dm) assert_ordination_results_equal(results, expected_results, ignore_directionality=True)
def test_scaling2(self): eigvals = pd.Series(np.array([0.09613302, 0.04094181]), self.pc_ids) # p. 460 L&L 1998 features = pd.DataFrame( np.array([ [0.40887, -0.06955], # F_hat [-0.11539, 0.29977], [-0.30997, -0.18739] ]), self.feature_ids, self.pc_ids) samples = pd.DataFrame( np.array([ [-0.84896, -0.88276], # V_hat [-0.22046, 1.34482], [1.66697, -0.47032] ]), self.sample_ids, self.pc_ids) exp = OrdinationResults('CA', 'Correspondance Analysis', eigvals=eigvals, features=features, samples=samples) scores = ca(self.contingency, 2) assert_ordination_results_equal(exp, scores, decimal=5, ignore_directionality=True)
def test_scaling2(self): scores = rda(self.Y, self.X, scaling=2) mat = np.loadtxt(get_data_path('example2_biplot_scaling2')) cropped_pc_ids = self.pc_ids[:mat.shape[1]] biplot_scores = pd.DataFrame(mat, index=self.env_ids, columns=cropped_pc_ids) sample_constraints = pd.DataFrame(np.loadtxt( get_data_path('example2_sample_constraints_scaling2'))) # Load data as computed with vegan 2.0-8 vegan_features = pd.DataFrame( np.loadtxt(get_data_path( 'example2_species_scaling2_from_vegan')), index=self.feature_ids, columns=self.pc_ids) vegan_samples = pd.DataFrame( np.loadtxt(get_data_path( 'example2_site_scaling2_from_vegan')), index=self.sample_ids, columns=self.pc_ids) sample_constraints = pd.DataFrame( np.loadtxt(get_data_path( 'example2_sample_constraints_scaling2')), index=self.sample_ids, columns=self.pc_ids) mat = np.loadtxt(get_data_path( 'example2_biplot_scaling2')) cropped_pc_ids = self.pc_ids[:mat.shape[1]] biplot_scores = pd.DataFrame(mat, index=self.env_ids, columns=cropped_pc_ids) proportion_explained = pd.Series([0.44275783, 0.25614586, 0.15280354, 0.10497021, 0.02873375, 0.00987052, 0.00471828], index=self.pc_ids) eigvals = pd.Series([25.897954, 14.982578, 8.937841, 6.139956, 1.680705, 0.577350, 0.275984], index=self.pc_ids) exp = OrdinationResults( 'RDA', 'Redundancy Analysis', samples=vegan_samples, features=vegan_features, sample_constraints=sample_constraints, biplot_scores=biplot_scores, proportion_explained=proportion_explained, eigvals=eigvals) assert_ordination_results_equal(scores, exp, ignore_directionality=True, decimal=6)
def test_ordination_integration_callbacks(self): viz = Empress(self.tree, self.table, self.sample_metadata, ordination=self.pcoa) # table should be unchanged and be a different id instance self.assertEqual(self.table, viz.table) self.assertNotEqual(id(self.table), id(viz.table)) # sample metadata should be unchanged and be a different id instance assert_frame_equal(self.sample_metadata, viz.samples) self.assertNotEqual(id(self.sample_metadata), id(viz.samples)) self.assertIsNone(viz.features) assert_ordination_results_equal(viz.ordination, self.pcoa) # emperor is instantiated as needed but not yet setup self.assertTrue(isinstance(viz._emperor, Emperor)) # ensure the callbacks were rendered obs = viz.make_empress() self.assertTrue('setOnNodeMenuVisibleCallback' in obs) self.assertTrue('setOnNodeMenuHiddenCallback' in obs)
def test_init_with_ordination(self): viz = Empress(self.tree, self.table, self.sample_metadata, ordination=self.pcoa, shear_to_table=False) self.assertEqual(viz.base_url, 'support_files') self.assertEqual( list(viz.tree.B), [1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0]) names = ['a', 'e', None, 'b', 'g', None, 'd', 'h', None] for i in range(1, len(viz.tree) + 1): node = viz.tree.postorderselect(i) self.assertEqual(viz.tree.name(node), names[i - 1]) # table should be unchanged and be a different id instance self.assertEqual(self.table, viz.table) self.assertNotEqual(id(self.table), id(viz.table)) # sample metadata should be unchanged and be a different id instance assert_frame_equal(self.sample_metadata, viz.samples) self.assertNotEqual(id(self.sample_metadata), id(viz.samples)) self.assertIsNone(viz.features) assert_ordination_results_equal(viz.ordination, self.pcoa) # emperor is instantiated as needed but not yet setup self.assertTrue(isinstance(viz._emperor, Emperor))
def test_init_with_ordination(self): viz = Empress(self.tree, self.table, self.sample_metadata, ordination=self.pcoa, filter_unobserved_features_from_phylogeny=False) self.assertEqual(viz.base_url, './') self.assertEqual( viz._bp_tree, [1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0]) names = [ 'a', 'e', 'EmpressNode0', 'b', 'g', 'EmpressNode1', 'd', 'h', 'EmpressNode2' ] for i, node in enumerate(viz.tree.postorder()): self.assertEqual(node.name, names[i]) # table should be unchanged and be a different id instance assert_frame_equal(self.table, viz.table.T) self.assertNotEqual(id(self.table), id(viz.table)) # sample metadata should be unchanged and be a different id instance assert_frame_equal(self.sample_metadata, viz.samples) self.assertNotEqual(id(self.sample_metadata), id(viz.samples)) self.assertIsNone(viz.features) assert_ordination_results_equal(viz.ordination, self.pcoa) # emperor is instantiated as needed but not yet setup self.assertTrue(isinstance(viz._emperor, Emperor))
def test_scaling2(self): eigvals = pd.Series(np.array([0.09613302, 0.04094181]), self.pc_ids) # p. 460 L&L 1998 features = pd.DataFrame(np.array([[0.40887, -0.06955], # F_hat [-0.11539, 0.29977], [-0.30997, -0.18739]]), self.feature_ids, self.pc_ids) samples = pd.DataFrame(np.array([[-0.84896, -0.88276], # V_hat [-0.22046, 1.34482], [1.66697, -0.47032]]), self.sample_ids, self.pc_ids) proportion_explained = pd.Series(np.array([0.701318, 0.298682]), self.pc_ids) exp = OrdinationResults('CA', 'Correspondance Analysis', eigvals=eigvals, features=features, samples=samples, proportion_explained=proportion_explained) scores = ca(self.contingency, 2) assert_ordination_results_equal(exp, scores, decimal=5, ignore_directionality=True)
def test_book_example_dataset(self): # Adapted from PyCogent's `test_principal_coordinate_analysis`: # "I took the example in the book (see intro info), and did # the principal coordinates analysis, plotted the data and it # looked right". eigvals = [ 0.73599103, 0.26260032, 0.14926222, 0.06990457, 0.02956972, 0.01931184, 0., 0., 0., 0., 0., 0., 0., 0. ] proportion_explained = [ 0.58105792, 0.20732046, 0.1178411, 0.05518899, 0.02334502, 0.01524651, 0., 0., 0., 0., 0., 0., 0., 0. ] sample_ids = [str(i) for i in range(14)] axis_labels = ['PC%d' % i for i in range(1, 15)] expected_results = OrdinationResults( short_method_name='PCoA', long_method_name='Principal Coordinate Analysis', eigvals=pd.Series(eigvals, index=axis_labels), samples=pd.DataFrame(np.loadtxt( get_data_path('exp_PCoAzeros_site')), index=sample_ids, columns=axis_labels), proportion_explained=pd.Series(proportion_explained, index=axis_labels)) results = npt.assert_warns(RuntimeWarning, pcoa, self.dm) # Note the absolute value because column can have signs swapped results.samples = np.abs(results.samples) assert_ordination_results_equal(results, expected_results, ignore_directionality=True)
def test_book_example_dataset(self): # Adapted from PyCogent's `test_principal_coordinate_analysis`: # "I took the example in the book (see intro info), and did # the principal coordinates analysis, plotted the data and it # looked right". eigvals = [0.73599103, 0.26260032, 0.14926222, 0.06990457, 0.02956972, 0.01931184, 0., 0., 0., 0., 0., 0., 0., 0.] proportion_explained = [0.58105792, 0.20732046, 0.1178411, 0.05518899, 0.02334502, 0.01524651, 0., 0., 0., 0., 0., 0., 0., 0.] sample_ids = [str(i) for i in range(14)] axis_labels = ['PC%d' % i for i in range(1, 15)] expected_results = OrdinationResults( short_method_name='PCoA', long_method_name='Principal Coordinate Analysis', eigvals=pd.Series(eigvals, index=axis_labels), samples=pd.DataFrame( np.loadtxt(get_data_path('exp_PCoAzeros_site')), index=sample_ids, columns=axis_labels), proportion_explained=pd.Series(proportion_explained, index=axis_labels)) results = npt.assert_warns(RuntimeWarning, pcoa, self.dm) # Note the absolute value because column can have signs swapped results.samples = np.abs(results.samples) assert_ordination_results_equal(results, expected_results, ignore_directionality=True)
def test_read_valid_files(self): for fp, obj in zip(self.valid_fps, self.ordination_results_objs): obs = _ordination_to_ordination_results(fp) assert_ordination_results_equal(obs, obj, ignore_method_names=True, ignore_axis_labels=True)
def test_pcoa_fsvd(self): # Run fsvd, computing all dimensions. fsvd_result = pcoa(self.dm, number_of_dimensions=self.dm.data.shape[0]) # Run eigh, which computes all dimensions by default. eigh_result = pcoa(self.dm) assert_ordination_results_equal(fsvd_result, eigh_result, ignore_directionality=True, ignore_method_names=True)
def test_from_seralized_results(self): # the current implementation of ordination results loses some # information, test that pcoa_biplot works fine regardless results = OrdinationResults.read(get_data_path('PCoA_skbio')) serialized = pcoa_biplot(results, self.descriptors) in_memory = pcoa_biplot(self.ordination, self.descriptors) assert_ordination_results_equal(serialized, in_memory, ignore_directionality=True, ignore_axis_labels=True, ignore_method_names=True)
def test_fsvd_inplace(self): dm1 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3')) dm2 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3')) expected_results = pcoa(dm1, method="eigh", number_of_dimensions=3, inplace=True) results = pcoa(dm2, method="fsvd", number_of_dimensions=3, inplace=True) assert_ordination_results_equal(results, expected_results, ignore_directionality=True, ignore_method_names=True)
def test_scaling2(self): scores = cca(self.Y, self.X, scaling=2) # Load data as computed with vegan 2.0-8 vegan_features = pd.DataFrame( np.loadtxt(get_data_path( 'example3_species_scaling2_from_vegan')), index=self.feature_ids, columns=self.pc_ids) vegan_samples = pd.DataFrame( np.loadtxt(get_data_path( 'example3_site_scaling2_from_vegan')), index=self.sample_ids, columns=self.pc_ids) sample_constraints = pd.DataFrame( np.loadtxt(get_data_path( 'example3_sample_constraints_scaling2')), index=self.sample_ids, columns=self.pc_ids) mat = np.loadtxt(get_data_path( 'example3_biplot_scaling2')) cropped_pc_ids = self.pc_ids[:mat.shape[1]] biplot_scores = pd.DataFrame(mat, index=self.env_ids, columns=cropped_pc_ids) proportion_explained = pd.Series([0.466911, 0.238327, 0.100548, 0.104937, 0.044805, 0.029747, 0.012631, 0.001562, 0.000532], index=self.pc_ids) eigvals = pd.Series([0.366136, 0.186888, 0.078847, 0.082288, 0.035135, 0.023327, 0.009905, 0.001225, 0.000417], index=self.pc_ids) exp = OrdinationResults( 'CCA', 'Canonical Correspondence Analysis', samples=vegan_samples, features=vegan_features, sample_constraints=sample_constraints, biplot_scores=biplot_scores, proportion_explained=proportion_explained, eigvals=eigvals) assert_ordination_results_equal(scores, exp, decimal=6)
def test_roundtrip_read_write(self): for fp in self.valid_fps: # Read. obj1 = _ordination_to_ordination_results(fp) # Write. fh = io.StringIO() _ordination_results_to_ordination(obj1, fh) fh.seek(0) # Read. obj2 = _ordination_to_ordination_results(fh) fh.close() assert_ordination_results_equal(obj1, obj2)
def test_pcoa_biplot_from_ape(self): """Test against a reference implementation from R's ape package The test data was generated with the R script below and using a modified version of pcoa.biplot that returns the U matrix. library(ape) # files can be found in the test data folder of the ordination module y = t(read.table('PCoA_biplot_descriptors', row.names = 1, header = 1)) dm = read.table('PCoA_sample_data_3', row.names = 1, header = 1) h = pcoa(dm) # biplot.pcoa will only calculate the biplot for two axes at a time acc = NULL for (axes in c(1, 3, 5, 7)) { new = biplot.pcoa(h, y, plot.axes=c(axes, axes+1), rn = rep('.', length(colnames(dm))) ) if(is.null(acc)) { acc = new } else { b = acc acc <- cbind(acc, new) } } write.csv(acc, file='PCoA_biplot_projected_descriptors') """ obs = pcoa_biplot(self.ordination, self.descriptors) # we'll build a dummy ordination results object based on the expected # the main thing we'll compare and modify is the features dataframe exp = deepcopy(obs) fp = get_data_path('PCoA_biplot_projected_descriptors') # R won't calculate the last dimension, so pad with zeros to make the # arrays comparable exp.features = pd.read_table(fp, sep=',', index_col=0) exp.features['Axis.9'] = np.zeros_like(exp.features['Axis.8']) # make the order comparable exp.features = exp.features.reindex(obs.features.index) assert_ordination_results_equal(obs, exp, ignore_directionality=True, ignore_axis_labels=True)
def test_scaling2(self): scores = cca(self.Y, self.X, scaling=2) # Load data as computed with vegan 2.0-8 vegan_features = pd.DataFrame(np.loadtxt( get_data_path('example3_species_scaling2_from_vegan')), index=self.feature_ids, columns=self.pc_ids) vegan_samples = pd.DataFrame(np.loadtxt( get_data_path('example3_site_scaling2_from_vegan')), index=self.sample_ids, columns=self.pc_ids) sample_constraints = pd.DataFrame(np.loadtxt( get_data_path('example3_sample_constraints_scaling2')), index=self.sample_ids, columns=self.pc_ids) mat = np.loadtxt(get_data_path('example3_biplot_scaling2')) cropped_pc_ids = self.pc_ids[:mat.shape[1]] biplot_scores = pd.DataFrame(mat, index=self.env_ids, columns=cropped_pc_ids) proportion_explained = pd.Series([ 0.466911, 0.238327, 0.100548, 0.104937, 0.044805, 0.029747, 0.012631, 0.001562, 0.000532 ], index=self.pc_ids) eigvals = pd.Series([ 0.366136, 0.186888, 0.078847, 0.082288, 0.035135, 0.023327, 0.009905, 0.001225, 0.000417 ], index=self.pc_ids) exp = OrdinationResults('CCA', 'Canonical Correspondence Analysis', samples=vegan_samples, features=vegan_features, sample_constraints=sample_constraints, biplot_scores=biplot_scores, proportion_explained=proportion_explained, eigvals=eigvals) assert_ordination_results_equal(scores, exp, decimal=6)
def test_extensive(self): eigvals = [ 0.3984635, 0.36405689, 0.28804535, 0.27479983, 0.19165361, 0.0 ] proportion_explained = [ 0.2626621381, 0.2399817314, 0.1898758748, 0.1811445992, 0.1263356565, 0.0 ] sample_ids = [str(i) for i in range(6)] axis_labels = ['PC%d' % i for i in range(1, 7)] samples = [ [-0.028597, 0.22903853, 0.07055272, 0.26163576, 0.28398669, 0.0], [ 0.37494056, 0.22334055, -0.20892914, 0.05057395, -0.18710366, 0.0 ], [ -0.33517593, -0.23855979, -0.3099887, 0.11521787, -0.05021553, 0.0 ], [0.25412394, -0.4123464, 0.23343642, 0.06403168, -0.00482608, 0.0], [ -0.28256844, 0.18606911, 0.28875631, -0.06455635, -0.21141632, 0.0 ], [0.01727687, 0.012458, -0.07382761, -0.42690292, 0.1695749, 0.0] ] expected_results = OrdinationResults( short_method_name='PCoA', long_method_name='Principal Coordinate Analysis', eigvals=pd.Series(eigvals, index=axis_labels), samples=pd.DataFrame(samples, index=sample_ids, columns=axis_labels), proportion_explained=pd.Series(proportion_explained, index=axis_labels)) data = np.loadtxt(get_data_path('PCoA_sample_data_2')) # test passing a numpy.ndarray and a DistanceMatrix to pcoa # gives same results for dm in (data, DistanceMatrix(data)): results = pcoa(dm) assert_ordination_results_equal(results, expected_results, ignore_directionality=True)
def test_scaling1(self): eigvals = pd.Series(np.array([0.09613302, 0.04094181]), self.pc_ids) # p. 458 features = pd.DataFrame(np.array([[1.31871, -0.34374], # V [-0.37215, 1.48150], [-0.99972, -0.92612]]), self.feature_ids, self.pc_ids) samples = pd.DataFrame(np.array([[-0.26322, -0.17862], # F [-0.06835, 0.27211], [0.51685, -0.09517]]), self.sample_ids, self.pc_ids) exp = OrdinationResults('CA', 'Correspondance Analysis', eigvals=eigvals, features=features, samples=samples) scores = ca(self.contingency, 1) assert_ordination_results_equal(exp, scores, decimal=5, ignore_directionality=True)
def test_extensive(self): eigvals = [0.3984635, 0.36405689, 0.28804535, 0.27479983, 0.19165361, 0.0] proportion_explained = [0.2626621381, 0.2399817314, 0.1898758748, 0.1811445992, 0.1263356565, 0.0] sample_ids = [str(i) for i in range(6)] axis_labels = ['PC%d' % i for i in range(1, 7)] samples = [[-0.028597, 0.22903853, 0.07055272, 0.26163576, 0.28398669, 0.0], [0.37494056, 0.22334055, -0.20892914, 0.05057395, -0.18710366, 0.0], [-0.33517593, -0.23855979, -0.3099887, 0.11521787, -0.05021553, 0.0], [0.25412394, -0.4123464, 0.23343642, 0.06403168, -0.00482608, 0.0], [-0.28256844, 0.18606911, 0.28875631, -0.06455635, -0.21141632, 0.0], [0.01727687, 0.012458, -0.07382761, -0.42690292, 0.1695749, 0.0]] expected_results = OrdinationResults( short_method_name='PCoA', long_method_name='Principal Coordinate Analysis', eigvals=pd.Series(eigvals, index=axis_labels), samples=pd.DataFrame(samples, index=sample_ids, columns=axis_labels), proportion_explained=pd.Series(proportion_explained, index=axis_labels)) data = np.loadtxt(get_data_path('PCoA_sample_data_2')) # test passing a numpy.ndarray and a DistanceMatrix to pcoa # gives same results for dm in (data, DistanceMatrix(data)): results = pcoa(dm) assert_ordination_results_equal(results, expected_results, ignore_directionality=True)
def test_fsvd(self): dm1 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3')) dm2 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3')) dm3 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3')) # Test eigh vs. fsvd pcoa and inplace parameter expected_results = pcoa(dm1, method="eigh", number_of_dimensions=3, inplace=False) results = pcoa(dm2, method="fsvd", number_of_dimensions=3, inplace=False) results_inplace = pcoa(dm2, method="fsvd", number_of_dimensions=3, inplace=True) assert_ordination_results_equal(results, expected_results, ignore_directionality=True, ignore_method_names=True) assert_ordination_results_equal(results, results_inplace, ignore_directionality=True, ignore_method_names=True) # Test number_of_dimensions edge cases results2 = pcoa(dm3, method="fsvd", number_of_dimensions=0, inplace=False) expected_results2 = pcoa(dm3, method="fsvd", number_of_dimensions=dm3.data.shape[0], inplace=False) assert_ordination_results_equal(results2, expected_results2, ignore_directionality=True, ignore_method_names=True) with self.assertRaises(ValueError): dim_too_large = dm1.data.shape[0] + 10 pcoa(dm2, method="fsvd", number_of_dimensions=dim_too_large) with self.assertRaises(ValueError): pcoa(dm2, method="fsvd", number_of_dimensions=-1) with self.assertRaises(ValueError): dim_too_large = dm1.data.shape[0] + 10 pcoa(dm2, method="eigh", number_of_dimensions=dim_too_large) with self.assertRaises(ValueError): pcoa(dm2, method="eigh", number_of_dimensions=-1) dm_big = DistanceMatrix.read(get_data_path('PCoA_sample_data_12dim')) with self.assertWarnsRegex(RuntimeWarning, "no value for number_of_dimensions"): pcoa(dm_big, method="fsvd", number_of_dimensions=0)
def test_biplot_score(self): rda_ = rda(y=self.Y, x=self.X, scale_Y=False, scaling=1) # Load data as computed with vegan 2.4-3: # library(vegan) # data(varechem) # data(varespec) # rda_ = rda(X=varespec, Y=varechem, scale=FALSE) # write.table(summary(rda_, scaling=1)$biplot, # 'vare_rda_biplot_from_vegan.csv', sep=',') # write.table(summary(rda_, scaling=1)$sites, # 'vare_rda_sites_from_vegan.csv', sep=',') # write.table(summary(rda_, scaling=1)$species, # 'vare_rda_species_from_vegan.csv', sep=',') # write.table(summary(rda_, scaling=1)$constraints, # # 'vare_rda_constraints_from_vegan.csv', sep=',') # write.table(summary(rda_, scaling=1)$cont$importance[2, ], # 'vare_rda_propexpl_from_vegan.csv', sep=',') # write.table(summary(rda_, scaling=1)$cont$importance[1, ], # 'vare_rda_eigvals_from_vegan.csv', sep=',') vegan_features = pd.read_csv( get_data_path('vare_rda_species_from_vegan.csv')) vegan_samples = pd.read_csv( get_data_path('vare_rda_sites_from_vegan.csv')) vegan_biplot = pd.read_csv( get_data_path('vare_rda_biplot_from_vegan.csv')) vegan_constraints = pd.read_csv( get_data_path('vare_rda_constraints_from_vegan.csv')) vegan_propexpl = pd.read_csv( get_data_path('vare_rda_propexpl_from_vegan.csv')) vegan_propexpl = pd.Series( vegan_propexpl.x.values, index=rda_.eigvals.index) vegan_eigvals = pd.read_csv( get_data_path('vare_rda_eigvals_from_vegan.csv')) vegan_eigvals = pd.Series( vegan_eigvals.x.values, index=rda_.eigvals.index) # scikit-bio returns singular values, whereas vegan returns eigenvalues vegan_eigvals = np.sqrt(vegan_eigvals*vegan_eigvals.shape[0]) vegan_propexpl = vegan_eigvals/vegan_eigvals.sum() # transform the output of rda_ to match column selection of vegan res_samples = rda_.samples.iloc[:, 0:6] res_features = rda_.features.iloc[:, 0:6] rda_ = OrdinationResults( 'RDA', 'Redundancy Analysis', samples=res_samples, features=res_features, sample_constraints=rda_.sample_constraints.iloc[:, 0:6], biplot_scores=rda_.biplot_scores.iloc[:, 0:6], proportion_explained=rda_.proportion_explained, eigvals=rda_.eigvals) exp = OrdinationResults( 'RDA', 'Redundancy Analysis', samples=vegan_samples, features=vegan_features, sample_constraints=vegan_constraints, biplot_scores=vegan_biplot, proportion_explained=vegan_propexpl, eigvals=vegan_eigvals) pdt.assert_frame_equal(res_samples, vegan_samples) # This scaling constant is required to make skbio comparable to vegan. scaling = (rda_.eigvals[0] / rda_.eigvals[:6]) exp.biplot_scores *= scaling assert_ordination_results_equal( rda_, exp, ignore_directionality=False, decimal=6)
def test_biplot_score(self): rda_ = rda(y=self.Y, x=self.X, scale_Y=False, scaling=1) # Load data as computed with vegan 2.4-3: # library(vegan) # data(varechem) # data(varespec) # rda_ = rda(X=varespec, Y=varechem, scale=FALSE) # write.table(summary(rda_, scaling=1)$biplot, # 'vare_rda_biplot_from_vegan.csv', sep=',') # write.table(summary(rda_, scaling=1)$sites, # 'vare_rda_sites_from_vegan.csv', sep=',') # write.table(summary(rda_, scaling=1)$species, # 'vare_rda_species_from_vegan.csv', sep=',') # write.table(summary(rda_, scaling=1)$constraints, # # 'vare_rda_constraints_from_vegan.csv', sep=',') # write.table(summary(rda_, scaling=1)$cont$importance[2, ], # 'vare_rda_propexpl_from_vegan.csv', sep=',') # write.table(summary(rda_, scaling=1)$cont$importance[1, ], # 'vare_rda_eigvals_from_vegan.csv', sep=',') vegan_features = pd.read_csv( get_data_path('vare_rda_species_from_vegan.csv')) vegan_samples = pd.read_csv( get_data_path('vare_rda_sites_from_vegan.csv')) vegan_biplot = pd.read_csv( get_data_path('vare_rda_biplot_from_vegan.csv')) vegan_constraints = pd.read_csv( get_data_path('vare_rda_constraints_from_vegan.csv')) vegan_propexpl = pd.read_csv( get_data_path('vare_rda_propexpl_from_vegan.csv')) vegan_propexpl = pd.Series(vegan_propexpl.x.values, index=rda_.eigvals.index) vegan_eigvals = pd.read_csv( get_data_path('vare_rda_eigvals_from_vegan.csv')) vegan_eigvals = pd.Series(vegan_eigvals.x.values, index=rda_.eigvals.index) # scikit-bio returns singular values, whereas vegan returns eigenvalues vegan_eigvals = np.sqrt(vegan_eigvals * vegan_eigvals.shape[0]) vegan_propexpl = vegan_eigvals / vegan_eigvals.sum() # transform the output of rda_ to match column selection of vegan res_samples = rda_.samples.iloc[:, 0:6] res_features = rda_.features.iloc[:, 0:6] rda_ = OrdinationResults( 'RDA', 'Redundancy Analysis', samples=res_samples, features=res_features, sample_constraints=rda_.sample_constraints.iloc[:, 0:6], biplot_scores=rda_.biplot_scores.iloc[:, 0:6], proportion_explained=rda_.proportion_explained, eigvals=rda_.eigvals) exp = OrdinationResults('RDA', 'Redundancy Analysis', samples=vegan_samples, features=vegan_features, sample_constraints=vegan_constraints, biplot_scores=vegan_biplot, proportion_explained=vegan_propexpl, eigvals=vegan_eigvals) # This scaling constant is required to make skbio comparable to vegan. scaling = (rda_.eigvals[0] / rda_.eigvals[:6]) exp.biplot_scores *= scaling assert_ordination_results_equal(rda_, exp, ignore_directionality=False, decimal=6)
def test_read_valid_files(self): for fp, obj in zip(self.valid_fps, self.ordination_results_objs): obs = _ordination_to_ordination_results(fp) assert_ordination_results_equal( obs, obj, ignore_method_names=True, ignore_axis_labels=True, ignore_biplot_scores_labels=True)
def test_assert_ordination_results_equal(self): minimal1 = OrdinationResults('foo', 'bar', pd.Series([1.0, 2.0]), pd.DataFrame([[1, 2, 3], [4, 5, 6]])) # a minimal set of results should be equal to itself assert_ordination_results_equal(minimal1, minimal1) # type mismatch with npt.assert_raises(AssertionError): assert_ordination_results_equal(minimal1, 'foo') # numeric values should be checked that they're almost equal almost_minimal1 = OrdinationResults( 'foo', 'bar', pd.Series([1.0000001, 1.9999999]), pd.DataFrame([[1, 2, 3], [4, 5, 6]])) assert_ordination_results_equal(minimal1, almost_minimal1) # test each of the optional numeric attributes for attr in ('features', 'samples', 'biplot_scores', 'sample_constraints'): # missing optional numeric attribute in one, present in the other setattr(almost_minimal1, attr, pd.DataFrame([[1, 2], [3, 4]])) with npt.assert_raises(AssertionError): assert_ordination_results_equal(minimal1, almost_minimal1) setattr(almost_minimal1, attr, None) # optional numeric attributes present in both, but not almost equal setattr(minimal1, attr, pd.DataFrame([[1, 2], [3, 4]])) setattr(almost_minimal1, attr, pd.DataFrame([[1, 2], [3.00002, 4]])) with npt.assert_raises(AssertionError): assert_ordination_results_equal(minimal1, almost_minimal1) setattr(minimal1, attr, None) setattr(almost_minimal1, attr, None) # optional numeric attributes present in both, and almost equal setattr(minimal1, attr, pd.DataFrame([[1.0, 2.0], [3.0, 4.0]])) setattr(almost_minimal1, attr, pd.DataFrame([[1.0, 2.0], [3.00000002, 4]])) assert_ordination_results_equal(minimal1, almost_minimal1) setattr(minimal1, attr, None) setattr(almost_minimal1, attr, None) # missing optional numeric attribute in one, present in the other almost_minimal1.proportion_explained = pd.Series([1, 2, 3]) with npt.assert_raises(AssertionError): assert_ordination_results_equal(minimal1, almost_minimal1) almost_minimal1.proportion_explained = None # optional numeric attributes present in both, but not almost equal minimal1.proportion_explained = pd.Series([1, 2, 3]) almost_minimal1.proportion_explained = pd.Series([1, 2, 3.00002]) with npt.assert_raises(AssertionError): assert_ordination_results_equal(minimal1, almost_minimal1) almost_minimal1.proportion_explained = None almost_minimal1.proportion_explained = None # optional numeric attributes present in both, and almost equal minimal1.proportion_explained = pd.Series([1, 2, 3]) almost_minimal1.proportion_explained = pd.Series([1, 2, 3.00000002]) assert_ordination_results_equal(minimal1, almost_minimal1) almost_minimal1.proportion_explained = None almost_minimal1.proportion_explained = None