def setup(self): """Data from table 11.3 in Legendre & Legendre 1998 (p. 590). Loaded results as computed with vegan 2.0-8 and compared with table 11.5 if also there.""" Y = np.loadtxt(get_data_path('example3_Y')) X = np.loadtxt(get_data_path('example3_X')) self.ordination = CCA(Y, X[:, :-1])
def setUp(self): self.methods = ('pearson', 'spearman') self.alternatives = ('two-sided', 'greater', 'less') # Small dataset of minimal size (3x3). Mix of floats and ints in a # native Python nested list structure. self.minx = [[0, 1, 2], [1, 0, 3], [2, 3, 0]] self.miny = [[0, 2, 7], [2, 0, 6], [7, 6, 0]] self.minz = [[0, 0.5, 0.25], [0.5, 0, 0.1], [0.25, 0.1, 0]] # No variation in distances. Taken from Figure 10.20(b), pg. 603 in L&L # 3rd edition. Their example is 4x4 but using 3x3 here for easy # comparison to the minimal dataset above. self.no_variation = [[0, 0.667, 0.667], [0.667, 0, 0.667], [0.667, 0.667, 0]] # This second dataset is derived from vegan::mantel's example dataset. # The "veg" distance matrix contains Bray-Curtis distances derived from # the varespec data (named "veg.dist" in the example). The "env" # distance matrix contains Euclidean distances derived from scaled # varechem data (named "env.dist" in the example). self.veg_dm_vegan = np.loadtxt( get_data_path('mantel_veg_dm_vegan.txt')) self.env_dm_vegan = np.loadtxt( get_data_path('mantel_env_dm_vegan.txt')) # Expected test statistic when comparing x and y with method='pearson'. self.exp_x_vs_y = 0.7559289 # Expected test statistic when comparing x and z with method='pearson'. self.exp_x_vs_z = -0.9897433
def setUp(self): self.bad_dm_fp = get_data_path('bad_dm.txt') self.dm_2x2_asym_fp = get_data_path('dm_2x2_asym.txt') self.dm_3x3_fp = get_data_path('dm_3x3.txt') fd = open(self.bad_dm_fp, 'U') self.bad_dm_f2_lines = ''.join(fd.readlines()) fd.close() fd = open(self.dm_2x2_asym_fp, 'U') self.dm_2x2_asym_lines = ''.join(fd.readlines()) fd.close() fd = open(self.dm_3x3_fp, 'U') self.dm_3x3_lines = ''.join(fd.readlines()) fd.close() self.dm_1x1_data = [[0.0]] self.dm_1x1_f = StringIO(DM_1x1_F) self.dm_2x2_data = [[0.0, 0.123], [0.123, 0.0]] self.dm_2x2_f = StringIO(DM_2x2_F) self.dm_2x2_asym_data = [[0.0, 1.0], [-2.0, 0.0]] self.dm_2x2_asym_f = StringIO(self.dm_2x2_asym_lines) self.dm_3x3_data = [[0.0, 0.01, 4.2], [0.01, 0.0, 12.0], [4.2, 12.0, 0.0]] self.dm_3x3_f = StringIO(self.dm_3x3_lines) self.dm_3x3_whitespace_f = StringIO('\n'.join(DM_3x3_WHITESPACE_F)) self.bad_dm_f1 = StringIO(BAD_DM_F1) self.bad_dm_f2 = StringIO(self.bad_dm_f2_lines) self.bad_dm_f3 = StringIO(BAD_DM_F3) self.bad_dm_f4 = StringIO(BAD_DM_F4) self.bad_dm_f5 = StringIO(BAD_DM_F5) self.bad_dm_f6 = StringIO(BAD_DM_F6)
def setup(self): """Data from table 11.3 in Legendre & Legendre 1998.""" Y = np.loadtxt(get_data_path('example2_Y')) X = np.loadtxt(get_data_path('example2_X')) self.ordination = RDA(Y, X, ['Site0', 'Site1', 'Site2', 'Site3', 'Site4', 'Site5', 'Site6', 'Site7', 'Site8', 'Site9'], ['Species0', 'Species1', 'Species2', 'Species3', 'Species4', 'Species5'])
def test_from_file_error(self): for test_path in self.fferror_test_paths: with open(get_data_path(test_path), 'U') as f: with npt.assert_raises(FileFormatError): OrdinationResults.from_file(f) for test_path in self.verror_test_paths: with open(get_data_path(test_path), 'U') as f: with npt.assert_raises(ValueError): OrdinationResults.from_file(f)
def setup(self): """Data from table 11.3 in Legendre & Legendre 1998.""" Y = np.loadtxt(get_data_path('example2_Y')) X = np.loadtxt(get_data_path('example2_X')) self.ordination = RDA(Y, X, [ 'Site0', 'Site1', 'Site2', 'Site3', 'Site4', 'Site5', 'Site6', 'Site7', 'Site8', 'Site9' ], [ 'Species0', 'Species1', 'Species2', 'Species3', 'Species4', 'Species5' ])
def test_scaling2(self): scores = self.ordination.scores(2) # Load data as computed with vegan 2.0-8 vegan_species = np.loadtxt( get_data_path('example2_species_scaling2_from_vegan')) npt.assert_almost_equal(scores.species, vegan_species, decimal=6) vegan_site = np.loadtxt( get_data_path('example2_site_scaling2_from_vegan')) npt.assert_almost_equal(scores.site, vegan_site, decimal=6)
def test_scaling2(self): scores = self.ordination.scores(2) # Load data as computed with vegan 2.0-8 vegan_species = np.loadtxt(get_data_path( 'example2_species_scaling2_from_vegan')) npt.assert_almost_equal(scores.species, vegan_species, decimal=6) vegan_site = np.loadtxt(get_data_path( 'example2_site_scaling2_from_vegan')) npt.assert_almost_equal(scores.site, vegan_site, decimal=6)
def setup(self): """Data from table 11.3 in Legendre & Legendre 1998 (p. 590). Loaded results as computed with vegan 2.0-8 and compared with table 11.5 if also there.""" Y = np.loadtxt(get_data_path('example3_Y')) X = np.loadtxt(get_data_path('example3_X')) self.ordination = CCA(Y, X[:, :-1], ['Site0', 'Site1', 'Site2', 'Site3', 'Site4', 'Site5', 'Site6', 'Site7', 'Site8', 'Site9'], ['Species0', 'Species1', 'Species2', 'Species3', 'Species4', 'Species5', 'Species6', 'Species7', 'Species8'])
def setup(self): """Data from table 11.3 in Legendre & Legendre 1998 (p. 590). Loaded results as computed with vegan 2.0-8 and compared with table 11.5 if also there.""" Y = np.loadtxt(get_data_path('example3_Y')) X = np.loadtxt(get_data_path('example3_X')) self.ordination = CCA(Y, X[:, :-1], [ 'Site0', 'Site1', 'Site2', 'Site3', 'Site4', 'Site5', 'Site6', 'Site7', 'Site8', 'Site9' ], [ 'Species0', 'Species1', 'Species2', 'Species3', 'Species4', 'Species5', 'Species6', 'Species7', 'Species8' ])
def test_values(self): """Adapted from cogent's `test_principal_coordinate_analysis`: "I took the example in the book (see intro info), and did the principal coordinates analysis, plotted the data and it looked right".""" with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=RuntimeWarning) ordination = PCoA(self.dist_matrix) scores = ordination.scores() exp_eigvals = np.array([ 0.73599103, 0.26260032, 0.14926222, 0.06990457, 0.02956972, 0.01931184, 0., 0., 0., 0., 0., 0., 0., 0. ]) exp_site = np.loadtxt(get_data_path('exp_PCoAzeros_site')) exp_prop_expl = np.array([ 0.58105792, 0.20732046, 0.1178411, 0.05518899, 0.02334502, 0.01524651, 0., 0., 0., 0., 0., 0., 0., 0. ]) exp_site_ids = [ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13' ] # Note the absolute value because column can have signs swapped npt.assert_almost_equal(scores.eigvals, exp_eigvals) npt.assert_almost_equal(np.abs(scores.site), exp_site) npt.assert_almost_equal(scores.proportion_explained, exp_prop_expl) npt.assert_equal(scores.site_ids, exp_site_ids)
def setup(self): with open(get_data_path('PCoA_sample_data_3'), 'U') as lines: dist_matrix = DistanceMatrix.from_file(lines) self.ordination = PCoA(dist_matrix) self.ids = [ 'PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593', 'PC.355', 'PC.607', 'PC.634' ]
def test_from_file(self): for exp_scores, test_path in zip(self.scores, self.test_paths): for file_type in ('file like', 'file name'): fname = get_data_path(test_path) if file_type == 'file like': with open(fname) as fh: obs = OrdinationResults.from_file(fh) elif file_type == 'file name': obs = OrdinationResults.from_file(fname) yield self.check_OrdinationResults_equal, obs, exp_scores
def setUp(self): self.fna1 = get_data_path('fna1.fasta') self.fna1gz = get_data_path('fna1.fna.gz') self.fq1 = get_data_path('fq1.fq') self.fq1gz = get_data_path('fq1.fastq.gz') self.qual1 = get_data_path('fna1.qual') self.noext = get_data_path('noextensionfasta')
def setUp(self): self.minx = DistanceMatrix([[0, 1, 2], [1, 0, 3], [2, 3, 0]]) self.miny = DistanceMatrix([[0, 2, 7], [2, 0, 6], [7, 6, 0]]) self.minz = DistanceMatrix([[0, 0.5, 0.25], [0.5, 0, 0.1], [0.25, 0.1, 0]]) self.min_dms = (self.minx, self.miny, self.minz) # Versions of self.minx and self.minz (above) that each have an extra # ID on the end. self.x_extra = DistanceMatrix([[0, 1, 2, 7], [1, 0, 3, 2], [2, 3, 0, 4], [7, 2, 4, 0]], ['0', '1', '2', 'foo']) self.z_extra = DistanceMatrix([[0, 0.5, 0.25, 3], [0.5, 0, 0.1, 24], [0.25, 0.1, 0, 5], [3, 24, 5, 0]], ['0', '1', '2', 'bar']) # Load expected results. We have to load the p-value column (column # index 3) as a string dtype in order to compare with the in-memory # results since we're formatting the p-values as strings with the # correct number of decimal places. Without this explicit converter, # the p-value column will be loaded as a float dtype and the frames # won't compare equal. p_val_conv = {3: str} self.exp_results_minimal = pd.read_csv( get_data_path('pwmantel_exp_results_minimal.txt'), sep='\t', index_col=(0, 1), converters=p_val_conv) self.exp_results_minimal_with_labels = pd.read_csv( get_data_path('pwmantel_exp_results_minimal_with_labels.txt'), sep='\t', index_col=(0, 1), converters=p_val_conv) self.exp_results_duplicate_dms = pd.read_csv( get_data_path('pwmantel_exp_results_duplicate_dms.txt'), sep='\t', index_col=(0, 1), converters=p_val_conv) self.exp_results_na_p_value = pd.read_csv( get_data_path('pwmantel_exp_results_na_p_value.txt'), sep='\t', index_col=(0, 1), converters=p_val_conv) self.exp_results_too_few_permutations = pd.read_csv( get_data_path('pwmantel_exp_results_too_few_permutations.txt'), sep='\t', index_col=(0, 1), converters=p_val_conv) self.exp_results_reordered_distance_matrices = pd.read_csv( get_data_path('pwmantel_exp_results_reordered_distance_matrices' '.txt'), sep='\t', index_col=(0, 1), converters=p_val_conv)
def test_to_file(self): for scores, test_path in zip(self.scores, self.test_paths): for file_type in ('file like', 'file name'): if file_type == 'file like': obs_f = StringIO() scores.to_file(obs_f) obs = obs_f.getvalue() obs_f.close() elif file_type == 'file name': with tempfile.NamedTemporaryFile('r+') as temp_file: scores.to_file(temp_file.name) temp_file.flush() temp_file.seek(0) obs = temp_file.read() with open(get_data_path(test_path), 'U') as f: exp = f.read() yield npt.assert_equal, obs, exp
def test_values(self): results = self.ordination.scores() npt.assert_almost_equal(len(results.eigvals), len(results.site[0])) expected = np.loadtxt(get_data_path('exp_PCoAEigenResults_site')) npt.assert_almost_equal(*normalize_signs(expected, results.site)) expected = np.array([0.51236726, 0.30071909, 0.26791207, 0.20898868, 0.19169895, 0.16054235, 0.15017696, 0.12245775, 0.0]) npt.assert_almost_equal(results.eigvals, expected) expected = np.array([0.2675738328, 0.157044696, 0.1399118638, 0.1091402725, 0.1001110485, 0.0838401162, 0.0784269939, 0.0639511764, 0.0]) npt.assert_almost_equal(results.proportion_explained, expected) npt.assert_equal(results.site_ids, self.ids)
def test_values(self): results = self.ordination.scores() npt.assert_almost_equal(len(results.eigvals), len(results.site[0])) expected = np.loadtxt(get_data_path('exp_PCoAEigenResults_site')) npt.assert_almost_equal(*normalize_signs(expected, results.site)) expected = np.array([ 0.51236726, 0.30071909, 0.26791207, 0.20898868, 0.19169895, 0.16054235, 0.15017696, 0.12245775, 0.0 ]) npt.assert_almost_equal(results.eigvals, expected) expected = np.array([ 0.2675738328, 0.157044696, 0.1399118638, 0.1091402725, 0.1001110485, 0.0838401162, 0.0784269939, 0.0639511764, 0.0 ]) npt.assert_almost_equal(results.proportion_explained, expected) npt.assert_equal(results.site_ids, self.ids)
def test_values(self): """Adapted from cogent's `test_principal_coordinate_analysis`: "I took the example in the book (see intro info), and did the principal coordinates analysis, plotted the data and it looked right".""" with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=RuntimeWarning) ordination = PCoA(self.dist_matrix) scores = ordination.scores() exp_eigvals = np.array([0.73599103, 0.26260032, 0.14926222, 0.06990457, 0.02956972, 0.01931184, 0., 0., 0., 0., 0., 0., 0., 0.]) exp_site = np.loadtxt(get_data_path('exp_PCoAzeros_site')) exp_prop_expl = np.array([0.58105792, 0.20732046, 0.1178411, 0.05518899, 0.02334502, 0.01524651, 0., 0., 0., 0., 0., 0., 0., 0.]) exp_site_ids = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13'] # Note the absolute value because column can have signs swapped npt.assert_almost_equal(scores.eigvals, exp_eigvals) npt.assert_almost_equal(np.abs(scores.site), exp_site) npt.assert_almost_equal(scores.proportion_explained, exp_prop_expl) npt.assert_equal(scores.site_ids, exp_site_ids)
def setup(self): """Data from table 11.3 in Legendre & Legendre 1998.""" self.Y = np.loadtxt(get_data_path('example3_Y')) self.X = np.loadtxt(get_data_path('example3_X'))
def setup(self): """Data from table 9.11 in Legendre & Legendre 1998.""" self.X = np.loadtxt(get_data_path('L&L_CA_data')) self.ordination = CA(self.X, ['Site1', 'Site2', 'Site3'], ['Species1', 'Species2', 'Species3'])
def setUp(self): # The test dataset used here is a subset of the Lauber et al. 2009 # "88 Soils" dataset. It has been altered to exercise various aspects # of the code, including (but not limited to): # # - order of distance matrix IDs and IDs in data frame (metadata) are # not exactly the same # - data frame has an extra sample that is not in the distance matrix # - this extra sample has non-numeric and missing values in some of its # cells # # Additional variations of the distance matrix and data frame are used # to test different orderings of rows/columns, extra non-numeric data # frame columns, etc. # # This dataset is also useful because it is non-trivial in size (6 # samples, 11 environment variables) and it includes positive/negative # floats and integers in the data frame. self.dm = DistanceMatrix.from_file(get_data_path('dm.txt')) # Reordered rows and columns (i.e., different ID order). Still # conceptually the same distance matrix. self.dm_reordered = DistanceMatrix.from_file( get_data_path('dm_reordered.txt')) self.df = pd.read_csv(get_data_path('df.txt'), sep='\t', index_col=0) # Similar to the above data frame, except that it has an extra # non-numeric column, and some of the other rows and columns have been # reordered. self.df_extra_column = pd.read_csv( get_data_path('df_extra_column.txt'), sep='\t', index_col=0) # All columns in the original data frame (these are all numeric # columns). self.cols = self.df.columns.tolist() # This second dataset is derived from vegan::bioenv's example dataset # (varespec and varechem). The original dataset includes a site x # species table (e.g., OTU table) and a data frame of environmental # variables. Since the bioenv function defined here accepts a distance # matrix, we use a Bray-Curtis distance matrix that is derived from the # site x species table (this matches what is done by vegan::bioenv when # provided an OTU table, using their default distance measure). The # data frame only includes the numeric environmental variables we're # interested in for these tests: log(N), P, K, Ca, pH, Al self.dm_vegan = DistanceMatrix.from_file( get_data_path('bioenv_dm_vegan.txt')) self.df_vegan = pd.read_csv( get_data_path('bioenv_df_vegan.txt'), sep='\t', converters={0: str}) self.df_vegan.set_index('#SampleID', inplace=True) # Load expected results. self.exp_results = pd.read_csv(get_data_path('exp_results.txt'), sep='\t', index_col=0) self.exp_results_single_column = pd.read_csv( get_data_path('exp_results_single_column.txt'), sep='\t', index_col=0) self.exp_results_different_column_order = pd.read_csv( get_data_path('exp_results_different_column_order.txt'), sep='\t', index_col=0) self.exp_results_vegan = pd.read_csv( get_data_path('bioenv_exp_results_vegan.txt'), sep='\t', index_col=0)
def setup(self): """Data from table 11.3 in Legendre & Legendre 1998.""" Y = np.loadtxt(get_data_path('example2_Y')) X = np.loadtxt(get_data_path('example2_X')) self.ordination = RDA(Y, X)
def setup(self): matrix = np.loadtxt(get_data_path('PCoA_sample_data_2')) self.ids = map(str, range(matrix.shape[0])) dist_matrix = DistanceMatrix(matrix, self.ids) self.ordination = PCoA(dist_matrix)
def test_scaling2_site(self): scores = self.ordination.scores(2) vegan_site = np.loadtxt(get_data_path( 'example3_site_scaling2_from_vegan')) npt.assert_almost_equal(scores.site, vegan_site, decimal=4)
def setup(self): """Sample data set from page 111 of W.J Krzanowski. Principles of multivariate analysis, 2000, Oxford University Press.""" matrix = np.loadtxt(get_data_path('PCoA_sample_data')) dist_matrix = DistanceMatrix(matrix, map(str, range(matrix.shape[0]))) self.dist_matrix = dist_matrix
def setup_class(cls): # CA results eigvals = np.array([0.0961330159181, 0.0409418140138]) species = np.array([[0.408869425742, 0.0695518116298], [-0.1153860437, -0.299767683538], [-0.309967102571, 0.187391917117]]) site = np.array([[-0.848956053187, 0.882764759014], [-0.220458650578, -1.34482000302], [1.66697179591, 0.470324389808]]) biplot = None site_constraints = None prop_explained = None species_ids = ['Species1', 'Species2', 'Species3'] site_ids = ['Site1', 'Site2', 'Site3'] ca_scores = OrdinationResults(eigvals=eigvals, species=species, site=site, biplot=biplot, site_constraints=site_constraints, proportion_explained=prop_explained, species_ids=species_ids, site_ids=site_ids) # CCA results eigvals = np.array([0.366135830393, 0.186887643052, 0.0788466514249, 0.082287840501, 0.0351348475787, 0.0233265839374, 0.0099048981912, 0.00122461669234, 0.000417454724117]) species = np.loadtxt(get_data_path('exp_OrdRes_CCA_species')) site = np.loadtxt(get_data_path('exp_OrdRes_CCA_site')) biplot = np.array([[-0.169746767979, 0.63069090084, 0.760769036049], [-0.994016563505, 0.0609533148724, -0.0449369418179], [0.184352565909, -0.974867543612, 0.0309865007541]]) site_constraints = np.loadtxt( get_data_path('exp_OrdRes_CCA_site_constraints')) prop_explained = None species_ids = ['Species0', 'Species1', 'Species2', 'Species3', 'Species4', 'Species5', 'Species6', 'Species7', 'Species8'] site_ids = ['Site0', 'Site1', 'Site2', 'Site3', 'Site4', 'Site5', 'Site6', 'Site7', 'Site8', 'Site9'] cca_scores = OrdinationResults(eigvals=eigvals, species=species, site=site, biplot=biplot, site_constraints=site_constraints, proportion_explained=prop_explained, species_ids=species_ids, site_ids=site_ids) # PCoA results eigvals = np.array([0.512367260461, 0.300719094427, 0.267912066004, 0.208988681078, 0.19169895326, 0.16054234528, 0.15017695712, 0.122457748167, 0.0]) species = None site = np.loadtxt(get_data_path('exp_OrdRes_PCoA_site')) biplot = None site_constraints = None prop_explained = np.array([0.267573832777, 0.15704469605, 0.139911863774, 0.109140272454, 0.100111048503, 0.0838401161912, 0.0784269939011, 0.0639511763509, 0.0]) species_ids = None site_ids = ['PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593', 'PC.355', 'PC.607', 'PC.634'] pcoa_scores = OrdinationResults(eigvals=eigvals, species=species, site=site, biplot=biplot, site_constraints=site_constraints, proportion_explained=prop_explained, species_ids=species_ids, site_ids=site_ids) # RDA results eigvals = np.array([25.8979540892, 14.9825779819, 8.93784077262, 6.13995623072, 1.68070536498, 0.57735026919, 0.275983624351]) species = np.loadtxt(get_data_path('exp_OrdRes_RDA_species')) site = np.loadtxt(get_data_path('exp_OrdRes_RDA_site')) biplot = np.array([[0.422650019179, -0.559142585857, -0.713250678211], [0.988495963777, 0.150787422017, -0.0117848614073], [-0.556516618887, 0.817599992718, 0.147714267459], [-0.404079676685, -0.9058434809, -0.127150316558]]) site_constraints = np.loadtxt( get_data_path('exp_OrdRes_RDA_site_constraints')) prop_explained = None species_ids = ['Species0', 'Species1', 'Species2', 'Species3', 'Species4', 'Species5'] site_ids = ['Site0', 'Site1', 'Site2', 'Site3', 'Site4', 'Site5', 'Site6', 'Site7', 'Site8', 'Site9'] rda_scores = OrdinationResults(eigvals=eigvals, species=species, site=site, biplot=biplot, site_constraints=site_constraints, proportion_explained=prop_explained, species_ids=species_ids, site_ids=site_ids) cls.scores = [ca_scores, cca_scores, pcoa_scores, rda_scores] cls.test_paths = ['L&L_CA_data_scores', 'example3_scores', 'PCoA_sample_data_3_scores', 'example2_scores'] cls.fferror_test_paths = ['error1', 'error2', 'error3', 'error4', 'error5', 'error6', 'error7'] cls.verror_test_paths = ['v_error1', 'v_error2', 'v_error3', 'v_error4', 'v_error5', 'v_error6', 'v_error7', 'v_error8', 'v_error9', 'v_error10', 'v_error11', 'v_error12', 'v_error13', 'v_error14']
def setup(self): matrix = np.loadtxt(get_data_path('PCoA_sample_data_2')) self.ids = [str(i) for i in range(matrix.shape[0])] dist_matrix = DistanceMatrix(matrix, self.ids) self.ordination = PCoA(dist_matrix)
def setUp(self): # The test dataset used here is a subset of the Lauber et al. 2009 # "88 Soils" dataset. It has been altered to exercise various aspects # of the code, including (but not limited to): # # - order of distance matrix IDs and IDs in data frame (metadata) are # not exactly the same # - data frame has an extra sample that is not in the distance matrix # - this extra sample has non-numeric and missing values in some of its # cells # # Additional variations of the distance matrix and data frame are used # to test different orderings of rows/columns, extra non-numeric data # frame columns, etc. # # This dataset is also useful because it is non-trivial in size (6 # samples, 11 environment variables) and it includes positive/negative # floats and integers in the data frame. self.dm = DistanceMatrix.from_file(get_data_path('dm.txt')) # Reordered rows and columns (i.e., different ID order). Still # conceptually the same distance matrix. self.dm_reordered = DistanceMatrix.from_file( get_data_path('dm_reordered.txt')) self.df = pd.read_csv(get_data_path('df.txt'), sep='\t', index_col=0) # Similar to the above data frame, except that it has an extra # non-numeric column, and some of the other rows and columns have been # reordered. self.df_extra_column = pd.read_csv( get_data_path('df_extra_column.txt'), sep='\t', index_col=0) # All columns in the original data frame (these are all numeric # columns). self.cols = self.df.columns.tolist() # This second dataset is derived from vegan::bioenv's example dataset # (varespec and varechem). The original dataset includes a site x # species table (e.g., OTU table) and a data frame of environmental # variables. Since the bioenv function defined here accepts a distance # matrix, we use a Bray-Curtis distance matrix that is derived from the # site x species table (this matches what is done by vegan::bioenv when # provided an OTU table, using their default distance measure). The # data frame only includes the numeric environmental variables we're # interested in for these tests: log(N), P, K, Ca, pH, Al self.dm_vegan = DistanceMatrix.from_file( get_data_path('bioenv_dm_vegan.txt')) self.df_vegan = pd.read_csv(get_data_path('bioenv_df_vegan.txt'), sep='\t', converters={0: str}) self.df_vegan.set_index('#SampleID', inplace=True) # Load expected results. self.exp_results = pd.read_csv(get_data_path('exp_results.txt'), sep='\t', index_col=0) self.exp_results_single_column = pd.read_csv( get_data_path('exp_results_single_column.txt'), sep='\t', index_col=0) self.exp_results_different_column_order = pd.read_csv( get_data_path('exp_results_different_column_order.txt'), sep='\t', index_col=0) self.exp_results_vegan = pd.read_csv( get_data_path('bioenv_exp_results_vegan.txt'), sep='\t', index_col=0)
def test_scaling1_species(self): scores = self.ordination.scores(1) vegan_species = np.loadtxt(get_data_path( 'example3_species_scaling1_from_vegan')) npt.assert_almost_equal(scores.species, vegan_species, decimal=6)
def test_scaling2_site(self): scores = self.ordination.scores(2) vegan_site = np.loadtxt( get_data_path('example3_site_scaling2_from_vegan')) npt.assert_almost_equal(scores.site, vegan_site, decimal=4)
def test_scaling1_species(self): scores = self.ordination.scores(1) vegan_species = np.loadtxt( get_data_path('example3_species_scaling1_from_vegan')) npt.assert_almost_equal(scores.species, vegan_species, decimal=6)
def setup(self): with open(get_data_path('PCoA_sample_data_3'), 'U') as lines: dist_matrix = DistanceMatrix.from_file(lines) self.ordination = PCoA(dist_matrix) self.ids = ['PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593', 'PC.355', 'PC.607', 'PC.634']
def setup_class(cls): # CA results eigvals = np.array([0.0961330159181, 0.0409418140138]) species = np.array([[0.408869425742, 0.0695518116298], [-0.1153860437, -0.299767683538], [-0.309967102571, 0.187391917117]]) site = np.array([[-0.848956053187, 0.882764759014], [-0.220458650578, -1.34482000302], [1.66697179591, 0.470324389808]]) biplot = None site_constraints = None prop_explained = None species_ids = ['Species1', 'Species2', 'Species3'] site_ids = ['Site1', 'Site2', 'Site3'] ca_scores = OrdinationResults(eigvals=eigvals, species=species, site=site, biplot=biplot, site_constraints=site_constraints, proportion_explained=prop_explained, species_ids=species_ids, site_ids=site_ids) # CCA results eigvals = np.array([ 0.366135830393, 0.186887643052, 0.0788466514249, 0.082287840501, 0.0351348475787, 0.0233265839374, 0.0099048981912, 0.00122461669234, 0.000417454724117 ]) species = np.loadtxt(get_data_path('exp_OrdRes_CCA_species')) site = np.loadtxt(get_data_path('exp_OrdRes_CCA_site')) biplot = np.array( [[-0.169746767979, 0.63069090084, 0.760769036049], [-0.994016563505, 0.0609533148724, -0.0449369418179], [0.184352565909, -0.974867543612, 0.0309865007541]]) site_constraints = np.loadtxt( get_data_path('exp_OrdRes_CCA_site_constraints')) prop_explained = None species_ids = [ 'Species0', 'Species1', 'Species2', 'Species3', 'Species4', 'Species5', 'Species6', 'Species7', 'Species8' ] site_ids = [ 'Site0', 'Site1', 'Site2', 'Site3', 'Site4', 'Site5', 'Site6', 'Site7', 'Site8', 'Site9' ] cca_scores = OrdinationResults(eigvals=eigvals, species=species, site=site, biplot=biplot, site_constraints=site_constraints, proportion_explained=prop_explained, species_ids=species_ids, site_ids=site_ids) # PCoA results eigvals = np.array([ 0.512367260461, 0.300719094427, 0.267912066004, 0.208988681078, 0.19169895326, 0.16054234528, 0.15017695712, 0.122457748167, 0.0 ]) species = None site = np.loadtxt(get_data_path('exp_OrdRes_PCoA_site')) biplot = None site_constraints = None prop_explained = np.array([ 0.267573832777, 0.15704469605, 0.139911863774, 0.109140272454, 0.100111048503, 0.0838401161912, 0.0784269939011, 0.0639511763509, 0.0 ]) species_ids = None site_ids = [ 'PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593', 'PC.355', 'PC.607', 'PC.634' ] pcoa_scores = OrdinationResults(eigvals=eigvals, species=species, site=site, biplot=biplot, site_constraints=site_constraints, proportion_explained=prop_explained, species_ids=species_ids, site_ids=site_ids) # RDA results eigvals = np.array([ 25.8979540892, 14.9825779819, 8.93784077262, 6.13995623072, 1.68070536498, 0.57735026919, 0.275983624351 ]) species = np.loadtxt(get_data_path('exp_OrdRes_RDA_species')) site = np.loadtxt(get_data_path('exp_OrdRes_RDA_site')) biplot = np.array([[0.422650019179, -0.559142585857, -0.713250678211], [0.988495963777, 0.150787422017, -0.0117848614073], [-0.556516618887, 0.817599992718, 0.147714267459], [-0.404079676685, -0.9058434809, -0.127150316558]]) site_constraints = np.loadtxt( get_data_path('exp_OrdRes_RDA_site_constraints')) prop_explained = None species_ids = [ 'Species0', 'Species1', 'Species2', 'Species3', 'Species4', 'Species5' ] site_ids = [ 'Site0', 'Site1', 'Site2', 'Site3', 'Site4', 'Site5', 'Site6', 'Site7', 'Site8', 'Site9' ] rda_scores = OrdinationResults(eigvals=eigvals, species=species, site=site, biplot=biplot, site_constraints=site_constraints, proportion_explained=prop_explained, species_ids=species_ids, site_ids=site_ids) cls.scores = [ca_scores, cca_scores, pcoa_scores, rda_scores] cls.test_paths = [ 'L&L_CA_data_scores', 'example3_scores', 'PCoA_sample_data_3_scores', 'example2_scores' ] cls.fferror_test_paths = [ 'error1', 'error2', 'error3', 'error4', 'error5', 'error6' ] cls.verror_test_paths = [ 'v_error1', 'v_error2', 'v_error3', 'v_error4', 'v_error5', 'v_error6', 'v_error7', 'v_error8', 'v_error9', 'v_error10' ]
def setup(self): """Data from table 9.11 in Legendre & Legendre 1998.""" self.X = np.loadtxt(get_data_path('L&L_CA_data')) self.ordination = CA(self.X)
def test_get_data_path(): fn = 'parrot' path = os.path.dirname(os.path.abspath(__file__)) data_path = os.path.join(path, 'data', fn) data_path_2 = get_data_path(fn) npt.assert_string_equal(data_path_2, data_path)