Пример #1
0
 def setup(self):
     """Data from table 11.3 in Legendre & Legendre 1998
     (p. 590). Loaded results as computed with vegan 2.0-8 and
     compared with table 11.5 if also there."""
     Y = np.loadtxt(get_data_path('example3_Y'))
     X = np.loadtxt(get_data_path('example3_X'))
     self.ordination = CCA(Y, X[:, :-1])
Пример #2
0
    def setUp(self):
        self.methods = ('pearson', 'spearman')
        self.alternatives = ('two-sided', 'greater', 'less')

        # Small dataset of minimal size (3x3). Mix of floats and ints in a
        # native Python nested list structure.
        self.minx = [[0, 1, 2], [1, 0, 3], [2, 3, 0]]
        self.miny = [[0, 2, 7], [2, 0, 6], [7, 6, 0]]
        self.minz = [[0, 0.5, 0.25], [0.5, 0, 0.1], [0.25, 0.1, 0]]

        # No variation in distances. Taken from Figure 10.20(b), pg. 603 in L&L
        # 3rd edition. Their example is 4x4 but using 3x3 here for easy
        # comparison to the minimal dataset above.
        self.no_variation = [[0, 0.667, 0.667],
                             [0.667, 0, 0.667],
                             [0.667, 0.667, 0]]

        # This second dataset is derived from vegan::mantel's example dataset.
        # The "veg" distance matrix contains Bray-Curtis distances derived from
        # the varespec data (named "veg.dist" in the example). The "env"
        # distance matrix contains Euclidean distances derived from scaled
        # varechem data (named "env.dist" in the example).
        self.veg_dm_vegan = np.loadtxt(
            get_data_path('mantel_veg_dm_vegan.txt'))
        self.env_dm_vegan = np.loadtxt(
            get_data_path('mantel_env_dm_vegan.txt'))

        # Expected test statistic when comparing x and y with method='pearson'.
        self.exp_x_vs_y = 0.7559289

        # Expected test statistic when comparing x and z with method='pearson'.
        self.exp_x_vs_z = -0.9897433
Пример #3
0
 def setup(self):
     """Data from table 11.3 in Legendre & Legendre 1998
     (p. 590). Loaded results as computed with vegan 2.0-8 and
     compared with table 11.5 if also there."""
     Y = np.loadtxt(get_data_path('example3_Y'))
     X = np.loadtxt(get_data_path('example3_X'))
     self.ordination = CCA(Y, X[:, :-1])
Пример #4
0
    def setUp(self):
        self.bad_dm_fp = get_data_path('bad_dm.txt')
        self.dm_2x2_asym_fp = get_data_path('dm_2x2_asym.txt')
        self.dm_3x3_fp = get_data_path('dm_3x3.txt')

        fd = open(self.bad_dm_fp, 'U')
        self.bad_dm_f2_lines = ''.join(fd.readlines())
        fd.close()
        fd = open(self.dm_2x2_asym_fp, 'U')
        self.dm_2x2_asym_lines = ''.join(fd.readlines())
        fd.close()
        fd = open(self.dm_3x3_fp, 'U')
        self.dm_3x3_lines = ''.join(fd.readlines())
        fd.close()

        self.dm_1x1_data = [[0.0]]
        self.dm_1x1_f = StringIO(DM_1x1_F)

        self.dm_2x2_data = [[0.0, 0.123], [0.123, 0.0]]
        self.dm_2x2_f = StringIO(DM_2x2_F)

        self.dm_2x2_asym_data = [[0.0, 1.0], [-2.0, 0.0]]
        self.dm_2x2_asym_f = StringIO(self.dm_2x2_asym_lines)

        self.dm_3x3_data = [[0.0, 0.01, 4.2], [0.01, 0.0, 12.0],
                            [4.2, 12.0, 0.0]]
        self.dm_3x3_f = StringIO(self.dm_3x3_lines)
        self.dm_3x3_whitespace_f = StringIO('\n'.join(DM_3x3_WHITESPACE_F))

        self.bad_dm_f1 = StringIO(BAD_DM_F1)
        self.bad_dm_f2 = StringIO(self.bad_dm_f2_lines)
        self.bad_dm_f3 = StringIO(BAD_DM_F3)
        self.bad_dm_f4 = StringIO(BAD_DM_F4)
        self.bad_dm_f5 = StringIO(BAD_DM_F5)
        self.bad_dm_f6 = StringIO(BAD_DM_F6)
Пример #5
0
    def setUp(self):
        self.bad_dm_fp = get_data_path('bad_dm.txt')
        self.dm_2x2_asym_fp = get_data_path('dm_2x2_asym.txt')
        self.dm_3x3_fp = get_data_path('dm_3x3.txt')

        fd = open(self.bad_dm_fp, 'U')
        self.bad_dm_f2_lines = ''.join(fd.readlines())
        fd.close()
        fd = open(self.dm_2x2_asym_fp, 'U')
        self.dm_2x2_asym_lines = ''.join(fd.readlines())
        fd.close()
        fd = open(self.dm_3x3_fp, 'U')
        self.dm_3x3_lines = ''.join(fd.readlines())
        fd.close()

        self.dm_1x1_data = [[0.0]]
        self.dm_1x1_f = StringIO(DM_1x1_F)

        self.dm_2x2_data = [[0.0, 0.123], [0.123, 0.0]]
        self.dm_2x2_f = StringIO(DM_2x2_F)

        self.dm_2x2_asym_data = [[0.0, 1.0], [-2.0, 0.0]]
        self.dm_2x2_asym_f = StringIO(self.dm_2x2_asym_lines)

        self.dm_3x3_data = [[0.0, 0.01, 4.2], [0.01, 0.0, 12.0],
                            [4.2, 12.0, 0.0]]
        self.dm_3x3_f = StringIO(self.dm_3x3_lines)
        self.dm_3x3_whitespace_f = StringIO('\n'.join(DM_3x3_WHITESPACE_F))

        self.bad_dm_f1 = StringIO(BAD_DM_F1)
        self.bad_dm_f2 = StringIO(self.bad_dm_f2_lines)
        self.bad_dm_f3 = StringIO(BAD_DM_F3)
        self.bad_dm_f4 = StringIO(BAD_DM_F4)
        self.bad_dm_f5 = StringIO(BAD_DM_F5)
        self.bad_dm_f6 = StringIO(BAD_DM_F6)
Пример #6
0
    def setUp(self):
        self.methods = ('pearson', 'spearman')
        self.alternatives = ('two-sided', 'greater', 'less')

        # Small dataset of minimal size (3x3). Mix of floats and ints in a
        # native Python nested list structure.
        self.minx = [[0, 1, 2], [1, 0, 3], [2, 3, 0]]
        self.miny = [[0, 2, 7], [2, 0, 6], [7, 6, 0]]
        self.minz = [[0, 0.5, 0.25], [0.5, 0, 0.1], [0.25, 0.1, 0]]

        # No variation in distances. Taken from Figure 10.20(b), pg. 603 in L&L
        # 3rd edition. Their example is 4x4 but using 3x3 here for easy
        # comparison to the minimal dataset above.
        self.no_variation = [[0, 0.667, 0.667], [0.667, 0, 0.667],
                             [0.667, 0.667, 0]]

        # This second dataset is derived from vegan::mantel's example dataset.
        # The "veg" distance matrix contains Bray-Curtis distances derived from
        # the varespec data (named "veg.dist" in the example). The "env"
        # distance matrix contains Euclidean distances derived from scaled
        # varechem data (named "env.dist" in the example).
        self.veg_dm_vegan = np.loadtxt(
            get_data_path('mantel_veg_dm_vegan.txt'))
        self.env_dm_vegan = np.loadtxt(
            get_data_path('mantel_env_dm_vegan.txt'))

        # Expected test statistic when comparing x and y with method='pearson'.
        self.exp_x_vs_y = 0.7559289

        # Expected test statistic when comparing x and z with method='pearson'.
        self.exp_x_vs_z = -0.9897433
Пример #7
0
 def setup(self):
     """Data from table 11.3 in Legendre & Legendre 1998."""
     Y = np.loadtxt(get_data_path('example2_Y'))
     X = np.loadtxt(get_data_path('example2_X'))
     self.ordination = RDA(Y, X,
                           ['Site0', 'Site1', 'Site2', 'Site3', 'Site4',
                            'Site5', 'Site6', 'Site7', 'Site8', 'Site9'],
                           ['Species0', 'Species1', 'Species2', 'Species3',
                            'Species4', 'Species5'])
Пример #8
0
    def test_from_file_error(self):
        for test_path in self.fferror_test_paths:
            with open(get_data_path(test_path), 'U') as f:
                with npt.assert_raises(FileFormatError):
                    OrdinationResults.from_file(f)

        for test_path in self.verror_test_paths:
            with open(get_data_path(test_path), 'U') as f:
                with npt.assert_raises(ValueError):
                    OrdinationResults.from_file(f)
Пример #9
0
    def test_from_file_error(self):
        for test_path in self.fferror_test_paths:
            with open(get_data_path(test_path), 'U') as f:
                with npt.assert_raises(FileFormatError):
                    OrdinationResults.from_file(f)

        for test_path in self.verror_test_paths:
            with open(get_data_path(test_path), 'U') as f:
                with npt.assert_raises(ValueError):
                    OrdinationResults.from_file(f)
Пример #10
0
 def setup(self):
     """Data from table 11.3 in Legendre & Legendre 1998."""
     Y = np.loadtxt(get_data_path('example2_Y'))
     X = np.loadtxt(get_data_path('example2_X'))
     self.ordination = RDA(Y, X, [
         'Site0', 'Site1', 'Site2', 'Site3', 'Site4', 'Site5', 'Site6',
         'Site7', 'Site8', 'Site9'
     ], [
         'Species0', 'Species1', 'Species2', 'Species3', 'Species4',
         'Species5'
     ])
Пример #11
0
    def test_scaling2(self):
        scores = self.ordination.scores(2)

        # Load data as computed with vegan 2.0-8
        vegan_species = np.loadtxt(
            get_data_path('example2_species_scaling2_from_vegan'))
        npt.assert_almost_equal(scores.species, vegan_species, decimal=6)

        vegan_site = np.loadtxt(
            get_data_path('example2_site_scaling2_from_vegan'))
        npt.assert_almost_equal(scores.site, vegan_site, decimal=6)
Пример #12
0
    def test_scaling2(self):
        scores = self.ordination.scores(2)

        # Load data as computed with vegan 2.0-8
        vegan_species = np.loadtxt(get_data_path(
            'example2_species_scaling2_from_vegan'))
        npt.assert_almost_equal(scores.species, vegan_species, decimal=6)

        vegan_site = np.loadtxt(get_data_path(
            'example2_site_scaling2_from_vegan'))
        npt.assert_almost_equal(scores.site, vegan_site, decimal=6)
Пример #13
0
 def setup(self):
     """Data from table 11.3 in Legendre & Legendre 1998
     (p. 590). Loaded results as computed with vegan 2.0-8 and
     compared with table 11.5 if also there."""
     Y = np.loadtxt(get_data_path('example3_Y'))
     X = np.loadtxt(get_data_path('example3_X'))
     self.ordination = CCA(Y, X[:, :-1],
                           ['Site0', 'Site1', 'Site2', 'Site3', 'Site4',
                            'Site5', 'Site6', 'Site7', 'Site8', 'Site9'],
                           ['Species0', 'Species1', 'Species2', 'Species3',
                            'Species4', 'Species5', 'Species6', 'Species7',
                            'Species8'])
Пример #14
0
 def setup(self):
     """Data from table 11.3 in Legendre & Legendre 1998
     (p. 590). Loaded results as computed with vegan 2.0-8 and
     compared with table 11.5 if also there."""
     Y = np.loadtxt(get_data_path('example3_Y'))
     X = np.loadtxt(get_data_path('example3_X'))
     self.ordination = CCA(Y, X[:, :-1], [
         'Site0', 'Site1', 'Site2', 'Site3', 'Site4', 'Site5', 'Site6',
         'Site7', 'Site8', 'Site9'
     ], [
         'Species0', 'Species1', 'Species2', 'Species3', 'Species4',
         'Species5', 'Species6', 'Species7', 'Species8'
     ])
Пример #15
0
    def test_values(self):
        """Adapted from cogent's `test_principal_coordinate_analysis`:
        "I took the example in the book (see intro info), and did the
        principal coordinates analysis, plotted the data and it looked
        right"."""
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=RuntimeWarning)
            ordination = PCoA(self.dist_matrix)
        scores = ordination.scores()

        exp_eigvals = np.array([
            0.73599103, 0.26260032, 0.14926222, 0.06990457, 0.02956972,
            0.01931184, 0., 0., 0., 0., 0., 0., 0., 0.
        ])
        exp_site = np.loadtxt(get_data_path('exp_PCoAzeros_site'))
        exp_prop_expl = np.array([
            0.58105792, 0.20732046, 0.1178411, 0.05518899, 0.02334502,
            0.01524651, 0., 0., 0., 0., 0., 0., 0., 0.
        ])
        exp_site_ids = [
            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12',
            '13'
        ]
        # Note the absolute value because column can have signs swapped
        npt.assert_almost_equal(scores.eigvals, exp_eigvals)
        npt.assert_almost_equal(np.abs(scores.site), exp_site)
        npt.assert_almost_equal(scores.proportion_explained, exp_prop_expl)
        npt.assert_equal(scores.site_ids, exp_site_ids)
Пример #16
0
 def setup(self):
     with open(get_data_path('PCoA_sample_data_3'), 'U') as lines:
         dist_matrix = DistanceMatrix.from_file(lines)
     self.ordination = PCoA(dist_matrix)
     self.ids = [
         'PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593',
         'PC.355', 'PC.607', 'PC.634'
     ]
Пример #17
0
    def test_from_file(self):
        for exp_scores, test_path in zip(self.scores, self.test_paths):
            for file_type in ('file like', 'file name'):
                fname = get_data_path(test_path)
                if file_type == 'file like':
                    with open(fname) as fh:
                        obs = OrdinationResults.from_file(fh)
                elif file_type == 'file name':
                    obs = OrdinationResults.from_file(fname)

                yield self.check_OrdinationResults_equal, obs, exp_scores
Пример #18
0
    def test_from_file(self):
        for exp_scores, test_path in zip(self.scores, self.test_paths):
            for file_type in ('file like', 'file name'):
                fname = get_data_path(test_path)
                if file_type == 'file like':
                    with open(fname) as fh:
                        obs = OrdinationResults.from_file(fh)
                elif file_type == 'file name':
                    obs = OrdinationResults.from_file(fname)

                yield self.check_OrdinationResults_equal, obs, exp_scores
Пример #19
0
 def setUp(self):
     self.fna1 = get_data_path('fna1.fasta')
     self.fna1gz = get_data_path('fna1.fna.gz')
     self.fq1 = get_data_path('fq1.fq')
     self.fq1gz = get_data_path('fq1.fastq.gz')
     self.qual1 = get_data_path('fna1.qual')
     self.noext = get_data_path('noextensionfasta')
Пример #20
0
 def setUp(self):
     self.fna1 = get_data_path('fna1.fasta')
     self.fna1gz = get_data_path('fna1.fna.gz')
     self.fq1 = get_data_path('fq1.fq')
     self.fq1gz = get_data_path('fq1.fastq.gz')
     self.qual1 = get_data_path('fna1.qual')
     self.noext = get_data_path('noextensionfasta')
Пример #21
0
    def setUp(self):
        self.minx = DistanceMatrix([[0, 1, 2], [1, 0, 3], [2, 3, 0]])
        self.miny = DistanceMatrix([[0, 2, 7], [2, 0, 6], [7, 6, 0]])
        self.minz = DistanceMatrix([[0, 0.5, 0.25],
                                    [0.5, 0, 0.1],
                                    [0.25, 0.1, 0]])
        self.min_dms = (self.minx, self.miny, self.minz)

        # Versions of self.minx and self.minz (above) that each have an extra
        # ID on the end.
        self.x_extra = DistanceMatrix([[0, 1, 2, 7],
                                       [1, 0, 3, 2],
                                       [2, 3, 0, 4],
                                       [7, 2, 4, 0]], ['0', '1', '2', 'foo'])
        self.z_extra = DistanceMatrix([[0, 0.5, 0.25, 3],
                                       [0.5, 0, 0.1, 24],
                                       [0.25, 0.1, 0, 5],
                                       [3, 24, 5, 0]], ['0', '1', '2', 'bar'])

        # Load expected results. We have to load the p-value column (column
        # index 3) as a string dtype in order to compare with the in-memory
        # results since we're formatting the p-values as strings with the
        # correct number of decimal places. Without this explicit converter,
        # the p-value column will be loaded as a float dtype and the frames
        # won't compare equal.
        p_val_conv = {3: str}

        self.exp_results_minimal = pd.read_csv(
            get_data_path('pwmantel_exp_results_minimal.txt'), sep='\t',
            index_col=(0, 1), converters=p_val_conv)

        self.exp_results_minimal_with_labels = pd.read_csv(
            get_data_path('pwmantel_exp_results_minimal_with_labels.txt'),
            sep='\t', index_col=(0, 1), converters=p_val_conv)

        self.exp_results_duplicate_dms = pd.read_csv(
            get_data_path('pwmantel_exp_results_duplicate_dms.txt'),
            sep='\t', index_col=(0, 1), converters=p_val_conv)

        self.exp_results_na_p_value = pd.read_csv(
            get_data_path('pwmantel_exp_results_na_p_value.txt'),
            sep='\t', index_col=(0, 1), converters=p_val_conv)

        self.exp_results_too_few_permutations = pd.read_csv(
            get_data_path('pwmantel_exp_results_too_few_permutations.txt'),
            sep='\t', index_col=(0, 1), converters=p_val_conv)

        self.exp_results_reordered_distance_matrices = pd.read_csv(
            get_data_path('pwmantel_exp_results_reordered_distance_matrices'
                          '.txt'),
            sep='\t', index_col=(0, 1), converters=p_val_conv)
Пример #22
0
    def test_to_file(self):
        for scores, test_path in zip(self.scores, self.test_paths):
            for file_type in ('file like', 'file name'):
                if file_type == 'file like':
                    obs_f = StringIO()
                    scores.to_file(obs_f)
                    obs = obs_f.getvalue()
                    obs_f.close()
                elif file_type == 'file name':
                    with tempfile.NamedTemporaryFile('r+') as temp_file:
                        scores.to_file(temp_file.name)
                        temp_file.flush()
                        temp_file.seek(0)
                        obs = temp_file.read()

                with open(get_data_path(test_path), 'U') as f:
                    exp = f.read()

                yield npt.assert_equal, obs, exp
Пример #23
0
    def test_to_file(self):
        for scores, test_path in zip(self.scores, self.test_paths):
            for file_type in ('file like', 'file name'):
                if file_type == 'file like':
                    obs_f = StringIO()
                    scores.to_file(obs_f)
                    obs = obs_f.getvalue()
                    obs_f.close()
                elif file_type == 'file name':
                    with tempfile.NamedTemporaryFile('r+') as temp_file:
                        scores.to_file(temp_file.name)
                        temp_file.flush()
                        temp_file.seek(0)
                        obs = temp_file.read()

                with open(get_data_path(test_path), 'U') as f:
                    exp = f.read()

                yield npt.assert_equal, obs, exp
Пример #24
0
    def test_values(self):
        results = self.ordination.scores()

        npt.assert_almost_equal(len(results.eigvals), len(results.site[0]))

        expected = np.loadtxt(get_data_path('exp_PCoAEigenResults_site'))
        npt.assert_almost_equal(*normalize_signs(expected, results.site))

        expected = np.array([0.51236726, 0.30071909, 0.26791207, 0.20898868,
                             0.19169895, 0.16054235,  0.15017696,  0.12245775,
                             0.0])
        npt.assert_almost_equal(results.eigvals, expected)

        expected = np.array([0.2675738328, 0.157044696, 0.1399118638,
                             0.1091402725, 0.1001110485, 0.0838401162,
                             0.0784269939, 0.0639511764, 0.0])
        npt.assert_almost_equal(results.proportion_explained, expected)

        npt.assert_equal(results.site_ids, self.ids)
Пример #25
0
    def test_values(self):
        results = self.ordination.scores()

        npt.assert_almost_equal(len(results.eigvals), len(results.site[0]))

        expected = np.loadtxt(get_data_path('exp_PCoAEigenResults_site'))
        npt.assert_almost_equal(*normalize_signs(expected, results.site))

        expected = np.array([
            0.51236726, 0.30071909, 0.26791207, 0.20898868, 0.19169895,
            0.16054235, 0.15017696, 0.12245775, 0.0
        ])
        npt.assert_almost_equal(results.eigvals, expected)

        expected = np.array([
            0.2675738328, 0.157044696, 0.1399118638, 0.1091402725,
            0.1001110485, 0.0838401162, 0.0784269939, 0.0639511764, 0.0
        ])
        npt.assert_almost_equal(results.proportion_explained, expected)

        npt.assert_equal(results.site_ids, self.ids)
Пример #26
0
    def test_values(self):
        """Adapted from cogent's `test_principal_coordinate_analysis`:
        "I took the example in the book (see intro info), and did the
        principal coordinates analysis, plotted the data and it looked
        right"."""
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', category=RuntimeWarning)
            ordination = PCoA(self.dist_matrix)
        scores = ordination.scores()

        exp_eigvals = np.array([0.73599103, 0.26260032, 0.14926222, 0.06990457,
                                0.02956972, 0.01931184, 0., 0., 0., 0., 0., 0.,
                                0., 0.])
        exp_site = np.loadtxt(get_data_path('exp_PCoAzeros_site'))
        exp_prop_expl = np.array([0.58105792, 0.20732046, 0.1178411,
                                  0.05518899, 0.02334502, 0.01524651, 0., 0.,
                                  0., 0., 0., 0., 0., 0.])
        exp_site_ids = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
                        '10', '11', '12', '13']
        # Note the absolute value because column can have signs swapped
        npt.assert_almost_equal(scores.eigvals, exp_eigvals)
        npt.assert_almost_equal(np.abs(scores.site), exp_site)
        npt.assert_almost_equal(scores.proportion_explained, exp_prop_expl)
        npt.assert_equal(scores.site_ids, exp_site_ids)
Пример #27
0
 def setup(self):
     """Data from table 11.3 in Legendre & Legendre 1998."""
     self.Y = np.loadtxt(get_data_path('example3_Y'))
     self.X = np.loadtxt(get_data_path('example3_X'))
Пример #28
0
 def setup(self):
     """Data from table 11.3 in Legendre & Legendre 1998."""
     self.Y = np.loadtxt(get_data_path('example3_Y'))
     self.X = np.loadtxt(get_data_path('example3_X'))
Пример #29
0
 def setup(self):
     """Data from table 9.11 in Legendre & Legendre 1998."""
     self.X = np.loadtxt(get_data_path('L&L_CA_data'))
     self.ordination = CA(self.X, ['Site1', 'Site2', 'Site3'],
                          ['Species1', 'Species2', 'Species3'])
Пример #30
0
    def setUp(self):
        # The test dataset used here is a subset of the Lauber et al. 2009
        # "88 Soils" dataset. It has been altered to exercise various aspects
        # of the code, including (but not limited to):
        #
        # - order of distance matrix IDs and IDs in data frame (metadata) are
        #   not exactly the same
        # - data frame has an extra sample that is not in the distance matrix
        # - this extra sample has non-numeric and missing values in some of its
        #   cells
        #
        # Additional variations of the distance matrix and data frame are used
        # to test different orderings of rows/columns, extra non-numeric data
        # frame columns, etc.
        #
        # This dataset is also useful because it is non-trivial in size (6
        # samples, 11 environment variables) and it includes positive/negative
        # floats and integers in the data frame.
        self.dm = DistanceMatrix.from_file(get_data_path('dm.txt'))

        # Reordered rows and columns (i.e., different ID order). Still
        # conceptually the same distance matrix.
        self.dm_reordered = DistanceMatrix.from_file(
            get_data_path('dm_reordered.txt'))

        self.df = pd.read_csv(get_data_path('df.txt'), sep='\t', index_col=0)

        # Similar to the above data frame, except that it has an extra
        # non-numeric column, and some of the other rows and columns have been
        # reordered.
        self.df_extra_column = pd.read_csv(
            get_data_path('df_extra_column.txt'), sep='\t', index_col=0)

        # All columns in the original data frame (these are all numeric
        # columns).
        self.cols = self.df.columns.tolist()

        # This second dataset is derived from vegan::bioenv's example dataset
        # (varespec and varechem). The original dataset includes a site x
        # species table (e.g., OTU table) and a data frame of environmental
        # variables. Since the bioenv function defined here accepts a distance
        # matrix, we use a Bray-Curtis distance matrix that is derived from the
        # site x species table (this matches what is done by vegan::bioenv when
        # provided an OTU table, using their default distance measure). The
        # data frame only includes the numeric environmental variables we're
        # interested in for these tests: log(N), P, K, Ca, pH, Al
        self.dm_vegan = DistanceMatrix.from_file(
            get_data_path('bioenv_dm_vegan.txt'))
        self.df_vegan = pd.read_csv(
            get_data_path('bioenv_df_vegan.txt'), sep='\t',
            converters={0: str})
        self.df_vegan.set_index('#SampleID', inplace=True)

        # Load expected results.
        self.exp_results = pd.read_csv(get_data_path('exp_results.txt'),
                                       sep='\t', index_col=0)
        self.exp_results_single_column = pd.read_csv(
            get_data_path('exp_results_single_column.txt'), sep='\t',
            index_col=0)
        self.exp_results_different_column_order = pd.read_csv(
            get_data_path('exp_results_different_column_order.txt'), sep='\t',
            index_col=0)
        self.exp_results_vegan = pd.read_csv(
            get_data_path('bioenv_exp_results_vegan.txt'), sep='\t',
            index_col=0)
Пример #31
0
 def setup(self):
     """Data from table 11.3 in Legendre & Legendre 1998."""
     Y = np.loadtxt(get_data_path('example2_Y'))
     X = np.loadtxt(get_data_path('example2_X'))
     self.ordination = RDA(Y, X)
Пример #32
0
 def setup(self):
     matrix = np.loadtxt(get_data_path('PCoA_sample_data_2'))
     self.ids = map(str, range(matrix.shape[0]))
     dist_matrix = DistanceMatrix(matrix, self.ids)
     self.ordination = PCoA(dist_matrix)
Пример #33
0
    def test_scaling2_site(self):
        scores = self.ordination.scores(2)

        vegan_site = np.loadtxt(get_data_path(
            'example3_site_scaling2_from_vegan'))
        npt.assert_almost_equal(scores.site, vegan_site, decimal=4)
Пример #34
0
 def setup(self):
     """Sample data set from page 111 of W.J Krzanowski. Principles
     of multivariate analysis, 2000, Oxford University Press."""
     matrix = np.loadtxt(get_data_path('PCoA_sample_data'))
     dist_matrix = DistanceMatrix(matrix, map(str, range(matrix.shape[0])))
     self.dist_matrix = dist_matrix
Пример #35
0
    def setup_class(cls):
        # CA results
        eigvals = np.array([0.0961330159181, 0.0409418140138])
        species = np.array([[0.408869425742, 0.0695518116298],
                            [-0.1153860437, -0.299767683538],
                            [-0.309967102571, 0.187391917117]])
        site = np.array([[-0.848956053187, 0.882764759014],
                         [-0.220458650578, -1.34482000302],
                         [1.66697179591, 0.470324389808]])
        biplot = None
        site_constraints = None
        prop_explained = None
        species_ids = ['Species1', 'Species2', 'Species3']
        site_ids = ['Site1', 'Site2', 'Site3']
        ca_scores = OrdinationResults(eigvals=eigvals, species=species,
                                      site=site, biplot=biplot,
                                      site_constraints=site_constraints,
                                      proportion_explained=prop_explained,
                                      species_ids=species_ids,
                                      site_ids=site_ids)
        # CCA results
        eigvals = np.array([0.366135830393, 0.186887643052, 0.0788466514249,
                            0.082287840501, 0.0351348475787, 0.0233265839374,
                            0.0099048981912, 0.00122461669234,
                            0.000417454724117])
        species = np.loadtxt(get_data_path('exp_OrdRes_CCA_species'))
        site = np.loadtxt(get_data_path('exp_OrdRes_CCA_site'))
        biplot = np.array([[-0.169746767979, 0.63069090084, 0.760769036049],
                           [-0.994016563505, 0.0609533148724,
                            -0.0449369418179],
                           [0.184352565909, -0.974867543612, 0.0309865007541]])
        site_constraints = np.loadtxt(
            get_data_path('exp_OrdRes_CCA_site_constraints'))
        prop_explained = None
        species_ids = ['Species0', 'Species1', 'Species2', 'Species3',
                       'Species4', 'Species5', 'Species6', 'Species7',
                       'Species8']
        site_ids = ['Site0', 'Site1', 'Site2', 'Site3', 'Site4', 'Site5',
                    'Site6', 'Site7', 'Site8', 'Site9']
        cca_scores = OrdinationResults(eigvals=eigvals, species=species,
                                       site=site, biplot=biplot,
                                       site_constraints=site_constraints,
                                       proportion_explained=prop_explained,
                                       species_ids=species_ids,
                                       site_ids=site_ids)
        # PCoA results
        eigvals = np.array([0.512367260461, 0.300719094427, 0.267912066004,
                            0.208988681078, 0.19169895326, 0.16054234528,
                            0.15017695712, 0.122457748167, 0.0])
        species = None
        site = np.loadtxt(get_data_path('exp_OrdRes_PCoA_site'))
        biplot = None
        site_constraints = None
        prop_explained = np.array([0.267573832777, 0.15704469605,
                                   0.139911863774, 0.109140272454,
                                   0.100111048503, 0.0838401161912,
                                   0.0784269939011, 0.0639511763509, 0.0])
        species_ids = None
        site_ids = ['PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593',
                    'PC.355', 'PC.607', 'PC.634']
        pcoa_scores = OrdinationResults(eigvals=eigvals, species=species,
                                        site=site, biplot=biplot,
                                        site_constraints=site_constraints,
                                        proportion_explained=prop_explained,
                                        species_ids=species_ids,
                                        site_ids=site_ids)
        # RDA results
        eigvals = np.array([25.8979540892, 14.9825779819, 8.93784077262,
                            6.13995623072, 1.68070536498, 0.57735026919,
                            0.275983624351])
        species = np.loadtxt(get_data_path('exp_OrdRes_RDA_species'))
        site = np.loadtxt(get_data_path('exp_OrdRes_RDA_site'))
        biplot = np.array([[0.422650019179, -0.559142585857, -0.713250678211],
                           [0.988495963777, 0.150787422017, -0.0117848614073],
                           [-0.556516618887, 0.817599992718, 0.147714267459],
                           [-0.404079676685, -0.9058434809, -0.127150316558]])
        site_constraints = np.loadtxt(
            get_data_path('exp_OrdRes_RDA_site_constraints'))
        prop_explained = None
        species_ids = ['Species0', 'Species1', 'Species2', 'Species3',
                       'Species4', 'Species5']
        site_ids = ['Site0', 'Site1', 'Site2', 'Site3', 'Site4', 'Site5',
                    'Site6', 'Site7', 'Site8', 'Site9']
        rda_scores = OrdinationResults(eigvals=eigvals, species=species,
                                       site=site, biplot=biplot,
                                       site_constraints=site_constraints,
                                       proportion_explained=prop_explained,
                                       species_ids=species_ids,
                                       site_ids=site_ids)

        cls.scores = [ca_scores, cca_scores, pcoa_scores, rda_scores]
        cls.test_paths = ['L&L_CA_data_scores', 'example3_scores',
                          'PCoA_sample_data_3_scores', 'example2_scores']

        cls.fferror_test_paths = ['error1', 'error2', 'error3', 'error4',
                                  'error5', 'error6', 'error7']
        cls.verror_test_paths = ['v_error1', 'v_error2', 'v_error3',
                                 'v_error4', 'v_error5', 'v_error6',
                                 'v_error7', 'v_error8', 'v_error9',
                                 'v_error10', 'v_error11', 'v_error12',
                                 'v_error13', 'v_error14']
Пример #36
0
 def setup(self):
     matrix = np.loadtxt(get_data_path('PCoA_sample_data_2'))
     self.ids = [str(i) for i in range(matrix.shape[0])]
     dist_matrix = DistanceMatrix(matrix, self.ids)
     self.ordination = PCoA(dist_matrix)
Пример #37
0
    def setUp(self):
        # The test dataset used here is a subset of the Lauber et al. 2009
        # "88 Soils" dataset. It has been altered to exercise various aspects
        # of the code, including (but not limited to):
        #
        # - order of distance matrix IDs and IDs in data frame (metadata) are
        #   not exactly the same
        # - data frame has an extra sample that is not in the distance matrix
        # - this extra sample has non-numeric and missing values in some of its
        #   cells
        #
        # Additional variations of the distance matrix and data frame are used
        # to test different orderings of rows/columns, extra non-numeric data
        # frame columns, etc.
        #
        # This dataset is also useful because it is non-trivial in size (6
        # samples, 11 environment variables) and it includes positive/negative
        # floats and integers in the data frame.
        self.dm = DistanceMatrix.from_file(get_data_path('dm.txt'))

        # Reordered rows and columns (i.e., different ID order). Still
        # conceptually the same distance matrix.
        self.dm_reordered = DistanceMatrix.from_file(
            get_data_path('dm_reordered.txt'))

        self.df = pd.read_csv(get_data_path('df.txt'), sep='\t', index_col=0)

        # Similar to the above data frame, except that it has an extra
        # non-numeric column, and some of the other rows and columns have been
        # reordered.
        self.df_extra_column = pd.read_csv(
            get_data_path('df_extra_column.txt'), sep='\t', index_col=0)

        # All columns in the original data frame (these are all numeric
        # columns).
        self.cols = self.df.columns.tolist()

        # This second dataset is derived from vegan::bioenv's example dataset
        # (varespec and varechem). The original dataset includes a site x
        # species table (e.g., OTU table) and a data frame of environmental
        # variables. Since the bioenv function defined here accepts a distance
        # matrix, we use a Bray-Curtis distance matrix that is derived from the
        # site x species table (this matches what is done by vegan::bioenv when
        # provided an OTU table, using their default distance measure). The
        # data frame only includes the numeric environmental variables we're
        # interested in for these tests: log(N), P, K, Ca, pH, Al
        self.dm_vegan = DistanceMatrix.from_file(
            get_data_path('bioenv_dm_vegan.txt'))
        self.df_vegan = pd.read_csv(get_data_path('bioenv_df_vegan.txt'),
                                    sep='\t',
                                    converters={0: str})
        self.df_vegan.set_index('#SampleID', inplace=True)

        # Load expected results.
        self.exp_results = pd.read_csv(get_data_path('exp_results.txt'),
                                       sep='\t',
                                       index_col=0)
        self.exp_results_single_column = pd.read_csv(
            get_data_path('exp_results_single_column.txt'),
            sep='\t',
            index_col=0)
        self.exp_results_different_column_order = pd.read_csv(
            get_data_path('exp_results_different_column_order.txt'),
            sep='\t',
            index_col=0)
        self.exp_results_vegan = pd.read_csv(
            get_data_path('bioenv_exp_results_vegan.txt'),
            sep='\t',
            index_col=0)
Пример #38
0
 def setup(self):
     """Data from table 9.11 in Legendre & Legendre 1998."""
     self.X = np.loadtxt(get_data_path('L&L_CA_data'))
     self.ordination = CA(self.X, ['Site1', 'Site2', 'Site3'],
                          ['Species1', 'Species2', 'Species3'])
Пример #39
0
    def test_scaling1_species(self):
        scores = self.ordination.scores(1)

        vegan_species = np.loadtxt(get_data_path(
            'example3_species_scaling1_from_vegan'))
        npt.assert_almost_equal(scores.species, vegan_species, decimal=6)
Пример #40
0
    def test_scaling2_site(self):
        scores = self.ordination.scores(2)

        vegan_site = np.loadtxt(
            get_data_path('example3_site_scaling2_from_vegan'))
        npt.assert_almost_equal(scores.site, vegan_site, decimal=4)
Пример #41
0
 def setup(self):
     """Sample data set from page 111 of W.J Krzanowski. Principles
     of multivariate analysis, 2000, Oxford University Press."""
     matrix = np.loadtxt(get_data_path('PCoA_sample_data'))
     dist_matrix = DistanceMatrix(matrix, map(str, range(matrix.shape[0])))
     self.dist_matrix = dist_matrix
Пример #42
0
    def test_scaling1_species(self):
        scores = self.ordination.scores(1)

        vegan_species = np.loadtxt(
            get_data_path('example3_species_scaling1_from_vegan'))
        npt.assert_almost_equal(scores.species, vegan_species, decimal=6)
Пример #43
0
 def setup(self):
     with open(get_data_path('PCoA_sample_data_3'), 'U') as lines:
         dist_matrix = DistanceMatrix.from_file(lines)
     self.ordination = PCoA(dist_matrix)
     self.ids = ['PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593',
                 'PC.355', 'PC.607', 'PC.634']
Пример #44
0
    def setup_class(cls):
        # CA results
        eigvals = np.array([0.0961330159181, 0.0409418140138])
        species = np.array([[0.408869425742, 0.0695518116298],
                            [-0.1153860437, -0.299767683538],
                            [-0.309967102571, 0.187391917117]])
        site = np.array([[-0.848956053187, 0.882764759014],
                         [-0.220458650578, -1.34482000302],
                         [1.66697179591, 0.470324389808]])
        biplot = None
        site_constraints = None
        prop_explained = None
        species_ids = ['Species1', 'Species2', 'Species3']
        site_ids = ['Site1', 'Site2', 'Site3']
        ca_scores = OrdinationResults(eigvals=eigvals,
                                      species=species,
                                      site=site,
                                      biplot=biplot,
                                      site_constraints=site_constraints,
                                      proportion_explained=prop_explained,
                                      species_ids=species_ids,
                                      site_ids=site_ids)
        # CCA results
        eigvals = np.array([
            0.366135830393, 0.186887643052, 0.0788466514249, 0.082287840501,
            0.0351348475787, 0.0233265839374, 0.0099048981912,
            0.00122461669234, 0.000417454724117
        ])
        species = np.loadtxt(get_data_path('exp_OrdRes_CCA_species'))
        site = np.loadtxt(get_data_path('exp_OrdRes_CCA_site'))
        biplot = np.array(
            [[-0.169746767979, 0.63069090084, 0.760769036049],
             [-0.994016563505, 0.0609533148724, -0.0449369418179],
             [0.184352565909, -0.974867543612, 0.0309865007541]])
        site_constraints = np.loadtxt(
            get_data_path('exp_OrdRes_CCA_site_constraints'))
        prop_explained = None
        species_ids = [
            'Species0', 'Species1', 'Species2', 'Species3', 'Species4',
            'Species5', 'Species6', 'Species7', 'Species8'
        ]
        site_ids = [
            'Site0', 'Site1', 'Site2', 'Site3', 'Site4', 'Site5', 'Site6',
            'Site7', 'Site8', 'Site9'
        ]
        cca_scores = OrdinationResults(eigvals=eigvals,
                                       species=species,
                                       site=site,
                                       biplot=biplot,
                                       site_constraints=site_constraints,
                                       proportion_explained=prop_explained,
                                       species_ids=species_ids,
                                       site_ids=site_ids)
        # PCoA results
        eigvals = np.array([
            0.512367260461, 0.300719094427, 0.267912066004, 0.208988681078,
            0.19169895326, 0.16054234528, 0.15017695712, 0.122457748167, 0.0
        ])
        species = None
        site = np.loadtxt(get_data_path('exp_OrdRes_PCoA_site'))
        biplot = None
        site_constraints = None
        prop_explained = np.array([
            0.267573832777, 0.15704469605, 0.139911863774, 0.109140272454,
            0.100111048503, 0.0838401161912, 0.0784269939011, 0.0639511763509,
            0.0
        ])
        species_ids = None
        site_ids = [
            'PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593',
            'PC.355', 'PC.607', 'PC.634'
        ]
        pcoa_scores = OrdinationResults(eigvals=eigvals,
                                        species=species,
                                        site=site,
                                        biplot=biplot,
                                        site_constraints=site_constraints,
                                        proportion_explained=prop_explained,
                                        species_ids=species_ids,
                                        site_ids=site_ids)
        # RDA results
        eigvals = np.array([
            25.8979540892, 14.9825779819, 8.93784077262, 6.13995623072,
            1.68070536498, 0.57735026919, 0.275983624351
        ])
        species = np.loadtxt(get_data_path('exp_OrdRes_RDA_species'))
        site = np.loadtxt(get_data_path('exp_OrdRes_RDA_site'))
        biplot = np.array([[0.422650019179, -0.559142585857, -0.713250678211],
                           [0.988495963777, 0.150787422017, -0.0117848614073],
                           [-0.556516618887, 0.817599992718, 0.147714267459],
                           [-0.404079676685, -0.9058434809, -0.127150316558]])
        site_constraints = np.loadtxt(
            get_data_path('exp_OrdRes_RDA_site_constraints'))
        prop_explained = None
        species_ids = [
            'Species0', 'Species1', 'Species2', 'Species3', 'Species4',
            'Species5'
        ]
        site_ids = [
            'Site0', 'Site1', 'Site2', 'Site3', 'Site4', 'Site5', 'Site6',
            'Site7', 'Site8', 'Site9'
        ]
        rda_scores = OrdinationResults(eigvals=eigvals,
                                       species=species,
                                       site=site,
                                       biplot=biplot,
                                       site_constraints=site_constraints,
                                       proportion_explained=prop_explained,
                                       species_ids=species_ids,
                                       site_ids=site_ids)

        cls.scores = [ca_scores, cca_scores, pcoa_scores, rda_scores]
        cls.test_paths = [
            'L&L_CA_data_scores', 'example3_scores',
            'PCoA_sample_data_3_scores', 'example2_scores'
        ]

        cls.fferror_test_paths = [
            'error1', 'error2', 'error3', 'error4', 'error5', 'error6'
        ]
        cls.verror_test_paths = [
            'v_error1', 'v_error2', 'v_error3', 'v_error4', 'v_error5',
            'v_error6', 'v_error7', 'v_error8', 'v_error9', 'v_error10'
        ]
Пример #45
0
 def setup(self):
     """Data from table 9.11 in Legendre & Legendre 1998."""
     self.X = np.loadtxt(get_data_path('L&L_CA_data'))
     self.ordination = CA(self.X)
Пример #46
0
 def setup(self):
     """Data from table 9.11 in Legendre & Legendre 1998."""
     self.X = np.loadtxt(get_data_path('L&L_CA_data'))
     self.ordination = CA(self.X)
Пример #47
0
def test_get_data_path():
    fn = 'parrot'
    path = os.path.dirname(os.path.abspath(__file__))
    data_path = os.path.join(path, 'data', fn)
    data_path_2 = get_data_path(fn)
    npt.assert_string_equal(data_path_2, data_path)
Пример #48
0
 def setup(self):
     """Data from table 11.3 in Legendre & Legendre 1998."""
     Y = np.loadtxt(get_data_path('example2_Y'))
     X = np.loadtxt(get_data_path('example2_X'))
     self.ordination = RDA(Y, X)