def test_simple(self):
        eigvals = [0.51236726, 0.30071909, 0.26791207, 0.20898868,
                   0.19169895, 0.16054235,  0.15017696,  0.12245775,
                   0.0]
        proportion_explained = [0.2675738328, 0.157044696, 0.1399118638,
                                0.1091402725, 0.1001110485,
                                0.0838401162, 0.0784269939,
                                0.0639511764, 0.0]
        sample_ids = ['PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354',
                      'PC.593', 'PC.355', 'PC.607', 'PC.634']
        axis_labels = ['PC%d' % i for i in range(1, 10)]

        expected_results = OrdinationResults(
            short_method_name='PCoA',
            long_method_name='Principal Coordinate Analysis',
            eigvals=pd.Series(eigvals, index=axis_labels),
            samples=pd.DataFrame(
                np.loadtxt(get_data_path('exp_PCoAEigenResults_site')),
                index=sample_ids, columns=axis_labels),
            proportion_explained=pd.Series(proportion_explained,
                                           index=axis_labels))

        dm = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
        results = pcoa(dm)

        assert_ordination_results_equal(results, expected_results,
                                        ignore_directionality=True)
示例#2
0
    def test_scaling1(self):
        eigvals = pd.Series(np.array([0.09613302, 0.04094181]), self.pc_ids)
        # p. 458
        features = pd.DataFrame(
            np.array([
                [1.31871, -0.34374],  # V
                [-0.37215, 1.48150],
                [-0.99972, -0.92612]
            ]),
            self.feature_ids,
            self.pc_ids)
        samples = pd.DataFrame(
            np.array([
                [-0.26322, -0.17862],  # F
                [-0.06835, 0.27211],
                [0.51685, -0.09517]
            ]),
            self.sample_ids,
            self.pc_ids)
        exp = OrdinationResults('CA',
                                'Correspondance Analysis',
                                eigvals=eigvals,
                                features=features,
                                samples=samples)
        scores = ca(self.contingency, 1)

        assert_ordination_results_equal(exp,
                                        scores,
                                        decimal=5,
                                        ignore_directionality=True)
    def test_simple(self):
        eigvals = [
            0.51236726, 0.30071909, 0.26791207, 0.20898868, 0.19169895,
            0.16054235, 0.15017696, 0.12245775, 0.0
        ]
        proportion_explained = [
            0.2675738328, 0.157044696, 0.1399118638, 0.1091402725,
            0.1001110485, 0.0838401162, 0.0784269939, 0.0639511764, 0.0
        ]
        sample_ids = [
            'PC.636', 'PC.635', 'PC.356', 'PC.481', 'PC.354', 'PC.593',
            'PC.355', 'PC.607', 'PC.634'
        ]
        axis_labels = ['PC%d' % i for i in range(1, 10)]

        expected_results = OrdinationResults(
            short_method_name='PCoA',
            long_method_name='Principal Coordinate Analysis',
            eigvals=pd.Series(eigvals, index=axis_labels),
            samples=pd.DataFrame(np.loadtxt(
                get_data_path('exp_PCoAEigenResults_site')),
                                 index=sample_ids,
                                 columns=axis_labels),
            proportion_explained=pd.Series(proportion_explained,
                                           index=axis_labels))

        dm = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
        results = pcoa(dm)

        assert_ordination_results_equal(results,
                                        expected_results,
                                        ignore_directionality=True)
示例#4
0
    def test_scaling2(self):

        eigvals = pd.Series(np.array([0.09613302, 0.04094181]), self.pc_ids)
        # p. 460 L&L 1998
        features = pd.DataFrame(
            np.array([
                [0.40887, -0.06955],  # F_hat
                [-0.11539, 0.29977],
                [-0.30997, -0.18739]
            ]),
            self.feature_ids,
            self.pc_ids)
        samples = pd.DataFrame(
            np.array([
                [-0.84896, -0.88276],  # V_hat
                [-0.22046, 1.34482],
                [1.66697, -0.47032]
            ]),
            self.sample_ids,
            self.pc_ids)
        exp = OrdinationResults('CA',
                                'Correspondance Analysis',
                                eigvals=eigvals,
                                features=features,
                                samples=samples)

        scores = ca(self.contingency, 2)

        assert_ordination_results_equal(exp,
                                        scores,
                                        decimal=5,
                                        ignore_directionality=True)
    def test_scaling2(self):

        scores = rda(self.Y, self.X, scaling=2)
        mat = np.loadtxt(get_data_path('example2_biplot_scaling2'))
        cropped_pc_ids = self.pc_ids[:mat.shape[1]]
        biplot_scores = pd.DataFrame(mat,
                                     index=self.env_ids,
                                     columns=cropped_pc_ids)

        sample_constraints = pd.DataFrame(np.loadtxt(
            get_data_path('example2_sample_constraints_scaling2')))

        # Load data as computed with vegan 2.0-8
        vegan_features = pd.DataFrame(
            np.loadtxt(get_data_path(
                'example2_species_scaling2_from_vegan')),
            index=self.feature_ids,
            columns=self.pc_ids)

        vegan_samples = pd.DataFrame(
            np.loadtxt(get_data_path(
                'example2_site_scaling2_from_vegan')),
            index=self.sample_ids,
            columns=self.pc_ids)

        sample_constraints = pd.DataFrame(
            np.loadtxt(get_data_path(
                'example2_sample_constraints_scaling2')),
            index=self.sample_ids,
            columns=self.pc_ids)

        mat = np.loadtxt(get_data_path(
            'example2_biplot_scaling2'))
        cropped_pc_ids = self.pc_ids[:mat.shape[1]]
        biplot_scores = pd.DataFrame(mat,
                                     index=self.env_ids,
                                     columns=cropped_pc_ids)

        proportion_explained = pd.Series([0.44275783, 0.25614586,
                                          0.15280354, 0.10497021,
                                          0.02873375, 0.00987052,
                                          0.00471828],
                                         index=self.pc_ids)

        eigvals = pd.Series([25.897954, 14.982578, 8.937841, 6.139956,
                             1.680705, 0.577350, 0.275984],
                            index=self.pc_ids)

        exp = OrdinationResults(
            'RDA', 'Redundancy Analysis',
            samples=vegan_samples,
            features=vegan_features,
            sample_constraints=sample_constraints,
            biplot_scores=biplot_scores,
            proportion_explained=proportion_explained,
            eigvals=eigvals)

        assert_ordination_results_equal(scores, exp,
                                        ignore_directionality=True,
                                        decimal=6)
示例#6
0
    def test_ordination_integration_callbacks(self):
        viz = Empress(self.tree,
                      self.table,
                      self.sample_metadata,
                      ordination=self.pcoa)

        # table should be unchanged and be a different id instance
        self.assertEqual(self.table, viz.table)
        self.assertNotEqual(id(self.table), id(viz.table))

        # sample metadata should be unchanged and be a different id instance
        assert_frame_equal(self.sample_metadata, viz.samples)
        self.assertNotEqual(id(self.sample_metadata), id(viz.samples))

        self.assertIsNone(viz.features)

        assert_ordination_results_equal(viz.ordination, self.pcoa)

        # emperor is instantiated as needed but not yet setup
        self.assertTrue(isinstance(viz._emperor, Emperor))

        # ensure the callbacks were rendered
        obs = viz.make_empress()
        self.assertTrue('setOnNodeMenuVisibleCallback' in obs)
        self.assertTrue('setOnNodeMenuHiddenCallback' in obs)
示例#7
0
    def test_init_with_ordination(self):
        viz = Empress(self.tree,
                      self.table,
                      self.sample_metadata,
                      ordination=self.pcoa,
                      shear_to_table=False)

        self.assertEqual(viz.base_url, 'support_files')
        self.assertEqual(
            list(viz.tree.B),
            [1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0])

        names = ['a', 'e', None, 'b', 'g', None, 'd', 'h', None]
        for i in range(1, len(viz.tree) + 1):
            node = viz.tree.postorderselect(i)
            self.assertEqual(viz.tree.name(node), names[i - 1])

        # table should be unchanged and be a different id instance
        self.assertEqual(self.table, viz.table)
        self.assertNotEqual(id(self.table), id(viz.table))

        # sample metadata should be unchanged and be a different id instance
        assert_frame_equal(self.sample_metadata, viz.samples)
        self.assertNotEqual(id(self.sample_metadata), id(viz.samples))

        self.assertIsNone(viz.features)

        assert_ordination_results_equal(viz.ordination, self.pcoa)

        # emperor is instantiated as needed but not yet setup
        self.assertTrue(isinstance(viz._emperor, Emperor))
示例#8
0
    def test_init_with_ordination(self):
        viz = Empress(self.tree,
                      self.table,
                      self.sample_metadata,
                      ordination=self.pcoa,
                      filter_unobserved_features_from_phylogeny=False)

        self.assertEqual(viz.base_url, './')
        self.assertEqual(
            viz._bp_tree,
            [1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0])

        names = [
            'a', 'e', 'EmpressNode0', 'b', 'g', 'EmpressNode1', 'd', 'h',
            'EmpressNode2'
        ]
        for i, node in enumerate(viz.tree.postorder()):
            self.assertEqual(node.name, names[i])

        # table should be unchanged and be a different id instance
        assert_frame_equal(self.table, viz.table.T)
        self.assertNotEqual(id(self.table), id(viz.table))

        # sample metadata should be unchanged and be a different id instance
        assert_frame_equal(self.sample_metadata, viz.samples)
        self.assertNotEqual(id(self.sample_metadata), id(viz.samples))

        self.assertIsNone(viz.features)

        assert_ordination_results_equal(viz.ordination, self.pcoa)

        # emperor is instantiated as needed but not yet setup
        self.assertTrue(isinstance(viz._emperor, Emperor))
    def test_scaling2(self):

        eigvals = pd.Series(np.array([0.09613302, 0.04094181]), self.pc_ids)
        # p. 460 L&L 1998
        features = pd.DataFrame(np.array([[0.40887, -0.06955],  # F_hat
                                          [-0.11539, 0.29977],
                                          [-0.30997, -0.18739]]),
                                self.feature_ids,
                                self.pc_ids)
        samples = pd.DataFrame(np.array([[-0.84896, -0.88276],  # V_hat
                                         [-0.22046, 1.34482],
                                         [1.66697, -0.47032]]),
                               self.sample_ids,
                               self.pc_ids)

        proportion_explained = pd.Series(np.array([0.701318, 0.298682]),
                                         self.pc_ids)

        exp = OrdinationResults('CA', 'Correspondance Analysis',
                                eigvals=eigvals, features=features,
                                samples=samples,
                                proportion_explained=proportion_explained)

        scores = ca(self.contingency, 2)

        assert_ordination_results_equal(exp, scores, decimal=5,
                                        ignore_directionality=True)
    def test_book_example_dataset(self):
        # Adapted from PyCogent's `test_principal_coordinate_analysis`:
        #   "I took the example in the book (see intro info), and did
        #   the principal coordinates analysis, plotted the data and it
        #   looked right".
        eigvals = [
            0.73599103, 0.26260032, 0.14926222, 0.06990457, 0.02956972,
            0.01931184, 0., 0., 0., 0., 0., 0., 0., 0.
        ]
        proportion_explained = [
            0.58105792, 0.20732046, 0.1178411, 0.05518899, 0.02334502,
            0.01524651, 0., 0., 0., 0., 0., 0., 0., 0.
        ]
        sample_ids = [str(i) for i in range(14)]
        axis_labels = ['PC%d' % i for i in range(1, 15)]

        expected_results = OrdinationResults(
            short_method_name='PCoA',
            long_method_name='Principal Coordinate Analysis',
            eigvals=pd.Series(eigvals, index=axis_labels),
            samples=pd.DataFrame(np.loadtxt(
                get_data_path('exp_PCoAzeros_site')),
                                 index=sample_ids,
                                 columns=axis_labels),
            proportion_explained=pd.Series(proportion_explained,
                                           index=axis_labels))

        results = npt.assert_warns(RuntimeWarning, pcoa, self.dm)

        # Note the absolute value because column can have signs swapped
        results.samples = np.abs(results.samples)
        assert_ordination_results_equal(results,
                                        expected_results,
                                        ignore_directionality=True)
    def test_book_example_dataset(self):
        # Adapted from PyCogent's `test_principal_coordinate_analysis`:
        #   "I took the example in the book (see intro info), and did
        #   the principal coordinates analysis, plotted the data and it
        #   looked right".
        eigvals = [0.73599103, 0.26260032, 0.14926222, 0.06990457,
                   0.02956972, 0.01931184, 0., 0., 0., 0., 0., 0., 0.,
                   0.]
        proportion_explained = [0.58105792, 0.20732046, 0.1178411,
                                0.05518899, 0.02334502, 0.01524651, 0.,
                                0., 0., 0., 0., 0., 0., 0.]
        sample_ids = [str(i) for i in range(14)]
        axis_labels = ['PC%d' % i for i in range(1, 15)]

        expected_results = OrdinationResults(
            short_method_name='PCoA',
            long_method_name='Principal Coordinate Analysis',
            eigvals=pd.Series(eigvals, index=axis_labels),
            samples=pd.DataFrame(
                np.loadtxt(get_data_path('exp_PCoAzeros_site')),
                index=sample_ids, columns=axis_labels),
            proportion_explained=pd.Series(proportion_explained,
                                           index=axis_labels))

        results = npt.assert_warns(RuntimeWarning, pcoa, self.dm)

        # Note the absolute value because column can have signs swapped
        results.samples = np.abs(results.samples)
        assert_ordination_results_equal(results, expected_results,
                                        ignore_directionality=True)
示例#12
0
 def test_read_valid_files(self):
     for fp, obj in zip(self.valid_fps, self.ordination_results_objs):
         obs = _ordination_to_ordination_results(fp)
         assert_ordination_results_equal(obs,
                                         obj,
                                         ignore_method_names=True,
                                         ignore_axis_labels=True)
示例#13
0
    def test_pcoa_fsvd(self):
        # Run fsvd, computing all dimensions.
        fsvd_result = pcoa(self.dm,
                           number_of_dimensions=self.dm.data.shape[0])

        # Run eigh, which computes all dimensions by default.
        eigh_result = pcoa(self.dm)

        assert_ordination_results_equal(fsvd_result, eigh_result,
                                        ignore_directionality=True,
                                        ignore_method_names=True)
示例#14
0
    def test_pcoa_fsvd(self):
        # Run fsvd, computing all dimensions.
        fsvd_result = pcoa(self.dm, number_of_dimensions=self.dm.data.shape[0])

        # Run eigh, which computes all dimensions by default.
        eigh_result = pcoa(self.dm)

        assert_ordination_results_equal(fsvd_result,
                                        eigh_result,
                                        ignore_directionality=True,
                                        ignore_method_names=True)
示例#15
0
    def test_from_seralized_results(self):
        # the current implementation of ordination results loses some
        # information, test that pcoa_biplot works fine regardless
        results = OrdinationResults.read(get_data_path('PCoA_skbio'))

        serialized = pcoa_biplot(results, self.descriptors)
        in_memory = pcoa_biplot(self.ordination, self.descriptors)

        assert_ordination_results_equal(serialized, in_memory,
                                        ignore_directionality=True,
                                        ignore_axis_labels=True,
                                        ignore_method_names=True)
示例#16
0
    def test_fsvd_inplace(self):
        dm1 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
        dm2 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))

        expected_results = pcoa(dm1, method="eigh", number_of_dimensions=3,
                                inplace=True)

        results = pcoa(dm2, method="fsvd", number_of_dimensions=3,
                       inplace=True)

        assert_ordination_results_equal(results, expected_results,
                                        ignore_directionality=True,
                                        ignore_method_names=True)
    def test_scaling2(self):
        scores = cca(self.Y, self.X, scaling=2)

        # Load data as computed with vegan 2.0-8
        vegan_features = pd.DataFrame(
            np.loadtxt(get_data_path(
                'example3_species_scaling2_from_vegan')),
            index=self.feature_ids,
            columns=self.pc_ids)

        vegan_samples = pd.DataFrame(
            np.loadtxt(get_data_path(
                'example3_site_scaling2_from_vegan')),
            index=self.sample_ids,
            columns=self.pc_ids)

        sample_constraints = pd.DataFrame(
            np.loadtxt(get_data_path(
                'example3_sample_constraints_scaling2')),
            index=self.sample_ids,
            columns=self.pc_ids)

        mat = np.loadtxt(get_data_path(
            'example3_biplot_scaling2'))

        cropped_pc_ids = self.pc_ids[:mat.shape[1]]
        biplot_scores = pd.DataFrame(mat,
                                     index=self.env_ids,
                                     columns=cropped_pc_ids)

        proportion_explained = pd.Series([0.466911, 0.238327, 0.100548,
                                          0.104937, 0.044805, 0.029747,
                                          0.012631, 0.001562, 0.000532],
                                         index=self.pc_ids)
        eigvals = pd.Series([0.366136, 0.186888, 0.078847, 0.082288,
                             0.035135, 0.023327, 0.009905, 0.001225,
                             0.000417], index=self.pc_ids)

        exp = OrdinationResults(
            'CCA', 'Canonical Correspondence Analysis',
            samples=vegan_samples,
            features=vegan_features,
            sample_constraints=sample_constraints,
            biplot_scores=biplot_scores,
            proportion_explained=proportion_explained,
            eigvals=eigvals)

        assert_ordination_results_equal(scores, exp,
                                        decimal=6)
示例#18
0
    def test_roundtrip_read_write(self):
        for fp in self.valid_fps:
            # Read.
            obj1 = _ordination_to_ordination_results(fp)

            # Write.
            fh = io.StringIO()
            _ordination_results_to_ordination(obj1, fh)
            fh.seek(0)

            # Read.
            obj2 = _ordination_to_ordination_results(fh)
            fh.close()

            assert_ordination_results_equal(obj1, obj2)
示例#19
0
    def test_roundtrip_read_write(self):
        for fp in self.valid_fps:
            # Read.
            obj1 = _ordination_to_ordination_results(fp)

            # Write.
            fh = io.StringIO()
            _ordination_results_to_ordination(obj1, fh)
            fh.seek(0)

            # Read.
            obj2 = _ordination_to_ordination_results(fh)
            fh.close()

            assert_ordination_results_equal(obj1, obj2)
    def test_pcoa_biplot_from_ape(self):
        """Test against a reference implementation from R's ape package

        The test data was generated with the R script below and using a
        modified version of pcoa.biplot that returns the U matrix.

        library(ape)
        # files can be found in the test data folder of the ordination module
        y = t(read.table('PCoA_biplot_descriptors', row.names = 1, header = 1))
        dm = read.table('PCoA_sample_data_3', row.names = 1, header = 1)

        h = pcoa(dm)

        # biplot.pcoa will only calculate the biplot for two axes at a time
        acc = NULL
        for (axes in c(1, 3, 5, 7)) {
            new = biplot.pcoa(h, y, plot.axes=c(axes, axes+1),
                              rn = rep('.', length(colnames(dm))) )

            if(is.null(acc)) {
                acc = new
            }
            else {
                b = acc
                acc <- cbind(acc, new)
            }
        }
        write.csv(acc, file='PCoA_biplot_projected_descriptors')
        """
        obs = pcoa_biplot(self.ordination, self.descriptors)

        # we'll build a dummy ordination results object based on the expected
        # the main thing we'll compare and modify is the features dataframe
        exp = deepcopy(obs)

        fp = get_data_path('PCoA_biplot_projected_descriptors')
        # R won't calculate the last dimension, so pad with zeros to make the
        # arrays comparable
        exp.features = pd.read_table(fp, sep=',', index_col=0)
        exp.features['Axis.9'] = np.zeros_like(exp.features['Axis.8'])

        # make the order comparable
        exp.features = exp.features.reindex(obs.features.index)

        assert_ordination_results_equal(obs,
                                        exp,
                                        ignore_directionality=True,
                                        ignore_axis_labels=True)
示例#21
0
    def test_scaling2(self):
        scores = cca(self.Y, self.X, scaling=2)

        # Load data as computed with vegan 2.0-8
        vegan_features = pd.DataFrame(np.loadtxt(
            get_data_path('example3_species_scaling2_from_vegan')),
                                      index=self.feature_ids,
                                      columns=self.pc_ids)

        vegan_samples = pd.DataFrame(np.loadtxt(
            get_data_path('example3_site_scaling2_from_vegan')),
                                     index=self.sample_ids,
                                     columns=self.pc_ids)

        sample_constraints = pd.DataFrame(np.loadtxt(
            get_data_path('example3_sample_constraints_scaling2')),
                                          index=self.sample_ids,
                                          columns=self.pc_ids)

        mat = np.loadtxt(get_data_path('example3_biplot_scaling2'))

        cropped_pc_ids = self.pc_ids[:mat.shape[1]]
        biplot_scores = pd.DataFrame(mat,
                                     index=self.env_ids,
                                     columns=cropped_pc_ids)

        proportion_explained = pd.Series([
            0.466911, 0.238327, 0.100548, 0.104937, 0.044805, 0.029747,
            0.012631, 0.001562, 0.000532
        ],
                                         index=self.pc_ids)
        eigvals = pd.Series([
            0.366136, 0.186888, 0.078847, 0.082288, 0.035135, 0.023327,
            0.009905, 0.001225, 0.000417
        ],
                            index=self.pc_ids)

        exp = OrdinationResults('CCA',
                                'Canonical Correspondence Analysis',
                                samples=vegan_samples,
                                features=vegan_features,
                                sample_constraints=sample_constraints,
                                biplot_scores=biplot_scores,
                                proportion_explained=proportion_explained,
                                eigvals=eigvals)

        assert_ordination_results_equal(scores, exp, decimal=6)
    def test_extensive(self):
        eigvals = [
            0.3984635, 0.36405689, 0.28804535, 0.27479983, 0.19165361, 0.0
        ]
        proportion_explained = [
            0.2626621381, 0.2399817314, 0.1898758748, 0.1811445992,
            0.1263356565, 0.0
        ]
        sample_ids = [str(i) for i in range(6)]
        axis_labels = ['PC%d' % i for i in range(1, 7)]
        samples = [
            [-0.028597, 0.22903853, 0.07055272, 0.26163576, 0.28398669, 0.0],
            [
                0.37494056, 0.22334055, -0.20892914, 0.05057395, -0.18710366,
                0.0
            ],
            [
                -0.33517593, -0.23855979, -0.3099887, 0.11521787, -0.05021553,
                0.0
            ],
            [0.25412394, -0.4123464, 0.23343642, 0.06403168, -0.00482608, 0.0],
            [
                -0.28256844, 0.18606911, 0.28875631, -0.06455635, -0.21141632,
                0.0
            ],
            [0.01727687, 0.012458, -0.07382761, -0.42690292, 0.1695749, 0.0]
        ]

        expected_results = OrdinationResults(
            short_method_name='PCoA',
            long_method_name='Principal Coordinate Analysis',
            eigvals=pd.Series(eigvals, index=axis_labels),
            samples=pd.DataFrame(samples,
                                 index=sample_ids,
                                 columns=axis_labels),
            proportion_explained=pd.Series(proportion_explained,
                                           index=axis_labels))

        data = np.loadtxt(get_data_path('PCoA_sample_data_2'))
        # test passing a numpy.ndarray and a DistanceMatrix to pcoa
        # gives same results
        for dm in (data, DistanceMatrix(data)):
            results = pcoa(dm)
            assert_ordination_results_equal(results,
                                            expected_results,
                                            ignore_directionality=True)
    def test_pcoa_biplot_from_ape(self):
        """Test against a reference implementation from R's ape package

        The test data was generated with the R script below and using a
        modified version of pcoa.biplot that returns the U matrix.

        library(ape)
        # files can be found in the test data folder of the ordination module
        y = t(read.table('PCoA_biplot_descriptors', row.names = 1, header = 1))
        dm = read.table('PCoA_sample_data_3', row.names = 1, header = 1)

        h = pcoa(dm)

        # biplot.pcoa will only calculate the biplot for two axes at a time
        acc = NULL
        for (axes in c(1, 3, 5, 7)) {
            new = biplot.pcoa(h, y, plot.axes=c(axes, axes+1),
                              rn = rep('.', length(colnames(dm))) )

            if(is.null(acc)) {
                acc = new
            }
            else {
                b = acc
                acc <- cbind(acc, new)
            }
        }
        write.csv(acc, file='PCoA_biplot_projected_descriptors')
        """
        obs = pcoa_biplot(self.ordination, self.descriptors)

        # we'll build a dummy ordination results object based on the expected
        # the main thing we'll compare and modify is the features dataframe
        exp = deepcopy(obs)

        fp = get_data_path('PCoA_biplot_projected_descriptors')
        # R won't calculate the last dimension, so pad with zeros to make the
        # arrays comparable
        exp.features = pd.read_table(fp, sep=',', index_col=0)
        exp.features['Axis.9'] = np.zeros_like(exp.features['Axis.8'])

        # make the order comparable
        exp.features = exp.features.reindex(obs.features.index)

        assert_ordination_results_equal(obs, exp, ignore_directionality=True,
                                        ignore_axis_labels=True)
    def test_scaling1(self):
        eigvals = pd.Series(np.array([0.09613302, 0.04094181]), self.pc_ids)
        # p. 458
        features = pd.DataFrame(np.array([[1.31871, -0.34374],  # V
                                          [-0.37215, 1.48150],
                                          [-0.99972, -0.92612]]),
                                self.feature_ids,
                                self.pc_ids)
        samples = pd.DataFrame(np.array([[-0.26322, -0.17862],  # F
                                         [-0.06835, 0.27211],
                                         [0.51685, -0.09517]]),
                               self.sample_ids,
                               self.pc_ids)
        exp = OrdinationResults('CA', 'Correspondance Analysis',
                                eigvals=eigvals, features=features,
                                samples=samples)
        scores = ca(self.contingency, 1)

        assert_ordination_results_equal(exp, scores, decimal=5,
                                        ignore_directionality=True)
    def test_extensive(self):
        eigvals = [0.3984635, 0.36405689, 0.28804535, 0.27479983,
                   0.19165361, 0.0]
        proportion_explained = [0.2626621381, 0.2399817314,
                                0.1898758748, 0.1811445992,
                                0.1263356565, 0.0]
        sample_ids = [str(i) for i in range(6)]
        axis_labels = ['PC%d' % i for i in range(1, 7)]
        samples = [[-0.028597, 0.22903853, 0.07055272, 0.26163576,
                    0.28398669, 0.0],
                   [0.37494056, 0.22334055, -0.20892914, 0.05057395,
                    -0.18710366, 0.0],
                   [-0.33517593, -0.23855979, -0.3099887, 0.11521787,
                    -0.05021553, 0.0],
                   [0.25412394, -0.4123464, 0.23343642, 0.06403168,
                    -0.00482608, 0.0],
                   [-0.28256844, 0.18606911, 0.28875631, -0.06455635,
                    -0.21141632, 0.0],
                   [0.01727687, 0.012458, -0.07382761, -0.42690292,
                    0.1695749, 0.0]]

        expected_results = OrdinationResults(
            short_method_name='PCoA',
            long_method_name='Principal Coordinate Analysis',
            eigvals=pd.Series(eigvals, index=axis_labels),
            samples=pd.DataFrame(samples, index=sample_ids,
                                 columns=axis_labels),
            proportion_explained=pd.Series(proportion_explained,
                                           index=axis_labels))

        data = np.loadtxt(get_data_path('PCoA_sample_data_2'))
        # test passing a numpy.ndarray and a DistanceMatrix to pcoa
        # gives same results
        for dm in (data, DistanceMatrix(data)):
            results = pcoa(dm)
            assert_ordination_results_equal(results, expected_results,
                                            ignore_directionality=True)
示例#26
0
    def test_fsvd(self):
        dm1 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
        dm2 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))
        dm3 = DistanceMatrix.read(get_data_path('PCoA_sample_data_3'))

        # Test eigh vs. fsvd pcoa and inplace parameter
        expected_results = pcoa(dm1, method="eigh", number_of_dimensions=3,
                                inplace=False)

        results = pcoa(dm2, method="fsvd", number_of_dimensions=3,
                       inplace=False)

        results_inplace = pcoa(dm2, method="fsvd", number_of_dimensions=3,
                               inplace=True)

        assert_ordination_results_equal(results, expected_results,
                                        ignore_directionality=True,
                                        ignore_method_names=True)

        assert_ordination_results_equal(results, results_inplace,
                                        ignore_directionality=True,
                                        ignore_method_names=True)

        # Test number_of_dimensions edge cases
        results2 = pcoa(dm3, method="fsvd", number_of_dimensions=0,
                        inplace=False)
        expected_results2 = pcoa(dm3, method="fsvd",
                                 number_of_dimensions=dm3.data.shape[0],
                                 inplace=False)

        assert_ordination_results_equal(results2, expected_results2,
                                        ignore_directionality=True,
                                        ignore_method_names=True)

        with self.assertRaises(ValueError):
            dim_too_large = dm1.data.shape[0] + 10
            pcoa(dm2, method="fsvd", number_of_dimensions=dim_too_large)

        with self.assertRaises(ValueError):
            pcoa(dm2, method="fsvd", number_of_dimensions=-1)

        with self.assertRaises(ValueError):
            dim_too_large = dm1.data.shape[0] + 10
            pcoa(dm2, method="eigh", number_of_dimensions=dim_too_large)

        with self.assertRaises(ValueError):
            pcoa(dm2, method="eigh", number_of_dimensions=-1)

        dm_big = DistanceMatrix.read(get_data_path('PCoA_sample_data_12dim'))
        with self.assertWarnsRegex(RuntimeWarning,
                                   "no value for number_of_dimensions"):
            pcoa(dm_big, method="fsvd", number_of_dimensions=0)
    def test_biplot_score(self):

        rda_ = rda(y=self.Y, x=self.X, scale_Y=False, scaling=1)

        # Load data as computed with vegan 2.4-3:
        # library(vegan)
        # data(varechem)
        # data(varespec)
        # rda_ = rda(X=varespec, Y=varechem, scale=FALSE)
        # write.table(summary(rda_, scaling=1)$biplot,
        #             'vare_rda_biplot_from_vegan.csv', sep=',')
        # write.table(summary(rda_, scaling=1)$sites,
        #                     'vare_rda_sites_from_vegan.csv', sep=',')
        # write.table(summary(rda_, scaling=1)$species,
        #                     'vare_rda_species_from_vegan.csv', sep=',')
        # write.table(summary(rda_, scaling=1)$constraints, #
        #                     'vare_rda_constraints_from_vegan.csv', sep=',')
        # write.table(summary(rda_, scaling=1)$cont$importance[2, ],
        #                     'vare_rda_propexpl_from_vegan.csv', sep=',')
        # write.table(summary(rda_, scaling=1)$cont$importance[1, ],
        #                     'vare_rda_eigvals_from_vegan.csv', sep=',')

        vegan_features = pd.read_csv(
            get_data_path('vare_rda_species_from_vegan.csv'))
        vegan_samples = pd.read_csv(
            get_data_path('vare_rda_sites_from_vegan.csv'))
        vegan_biplot = pd.read_csv(
            get_data_path('vare_rda_biplot_from_vegan.csv'))
        vegan_constraints = pd.read_csv(
            get_data_path('vare_rda_constraints_from_vegan.csv'))
        vegan_propexpl = pd.read_csv(
            get_data_path('vare_rda_propexpl_from_vegan.csv'))
        vegan_propexpl = pd.Series(
            vegan_propexpl.x.values, index=rda_.eigvals.index)
        vegan_eigvals = pd.read_csv(
            get_data_path('vare_rda_eigvals_from_vegan.csv'))
        vegan_eigvals = pd.Series(
            vegan_eigvals.x.values, index=rda_.eigvals.index)

        # scikit-bio returns singular values, whereas vegan returns eigenvalues
        vegan_eigvals = np.sqrt(vegan_eigvals*vegan_eigvals.shape[0])
        vegan_propexpl = vegan_eigvals/vegan_eigvals.sum()

        # transform the output of rda_ to match column selection of vegan
        res_samples = rda_.samples.iloc[:, 0:6]
        res_features = rda_.features.iloc[:, 0:6]

        rda_ = OrdinationResults(
            'RDA', 'Redundancy Analysis',
            samples=res_samples,
            features=res_features,
            sample_constraints=rda_.sample_constraints.iloc[:, 0:6],
            biplot_scores=rda_.biplot_scores.iloc[:, 0:6],
            proportion_explained=rda_.proportion_explained,
            eigvals=rda_.eigvals)

        exp = OrdinationResults(
            'RDA', 'Redundancy Analysis',
            samples=vegan_samples,
            features=vegan_features,
            sample_constraints=vegan_constraints,
            biplot_scores=vegan_biplot,
            proportion_explained=vegan_propexpl,
            eigvals=vegan_eigvals)

        pdt.assert_frame_equal(res_samples, vegan_samples)
        # This scaling constant is required to make skbio comparable to vegan.
        scaling = (rda_.eigvals[0] / rda_.eigvals[:6])
        exp.biplot_scores *= scaling
        assert_ordination_results_equal(
            rda_, exp,
            ignore_directionality=False,
            decimal=6)
    def test_biplot_score(self):

        rda_ = rda(y=self.Y, x=self.X, scale_Y=False, scaling=1)

        # Load data as computed with vegan 2.4-3:
        # library(vegan)
        # data(varechem)
        # data(varespec)
        # rda_ = rda(X=varespec, Y=varechem, scale=FALSE)
        # write.table(summary(rda_, scaling=1)$biplot,
        #             'vare_rda_biplot_from_vegan.csv', sep=',')
        # write.table(summary(rda_, scaling=1)$sites,
        #                     'vare_rda_sites_from_vegan.csv', sep=',')
        # write.table(summary(rda_, scaling=1)$species,
        #                     'vare_rda_species_from_vegan.csv', sep=',')
        # write.table(summary(rda_, scaling=1)$constraints, #
        #                     'vare_rda_constraints_from_vegan.csv', sep=',')
        # write.table(summary(rda_, scaling=1)$cont$importance[2, ],
        #                     'vare_rda_propexpl_from_vegan.csv', sep=',')
        # write.table(summary(rda_, scaling=1)$cont$importance[1, ],
        #                     'vare_rda_eigvals_from_vegan.csv', sep=',')

        vegan_features = pd.read_csv(
            get_data_path('vare_rda_species_from_vegan.csv'))
        vegan_samples = pd.read_csv(
            get_data_path('vare_rda_sites_from_vegan.csv'))
        vegan_biplot = pd.read_csv(
            get_data_path('vare_rda_biplot_from_vegan.csv'))
        vegan_constraints = pd.read_csv(
            get_data_path('vare_rda_constraints_from_vegan.csv'))
        vegan_propexpl = pd.read_csv(
            get_data_path('vare_rda_propexpl_from_vegan.csv'))
        vegan_propexpl = pd.Series(vegan_propexpl.x.values,
                                   index=rda_.eigvals.index)
        vegan_eigvals = pd.read_csv(
            get_data_path('vare_rda_eigvals_from_vegan.csv'))
        vegan_eigvals = pd.Series(vegan_eigvals.x.values,
                                  index=rda_.eigvals.index)

        # scikit-bio returns singular values, whereas vegan returns eigenvalues
        vegan_eigvals = np.sqrt(vegan_eigvals * vegan_eigvals.shape[0])
        vegan_propexpl = vegan_eigvals / vegan_eigvals.sum()

        # transform the output of rda_ to match column selection of vegan
        res_samples = rda_.samples.iloc[:, 0:6]
        res_features = rda_.features.iloc[:, 0:6]

        rda_ = OrdinationResults(
            'RDA',
            'Redundancy Analysis',
            samples=res_samples,
            features=res_features,
            sample_constraints=rda_.sample_constraints.iloc[:, 0:6],
            biplot_scores=rda_.biplot_scores.iloc[:, 0:6],
            proportion_explained=rda_.proportion_explained,
            eigvals=rda_.eigvals)

        exp = OrdinationResults('RDA',
                                'Redundancy Analysis',
                                samples=vegan_samples,
                                features=vegan_features,
                                sample_constraints=vegan_constraints,
                                biplot_scores=vegan_biplot,
                                proportion_explained=vegan_propexpl,
                                eigvals=vegan_eigvals)

        # This scaling constant is required to make skbio comparable to vegan.
        scaling = (rda_.eigvals[0] / rda_.eigvals[:6])
        exp.biplot_scores *= scaling
        assert_ordination_results_equal(rda_,
                                        exp,
                                        ignore_directionality=False,
                                        decimal=6)
示例#29
0
 def test_read_valid_files(self):
     for fp, obj in zip(self.valid_fps, self.ordination_results_objs):
             obs = _ordination_to_ordination_results(fp)
             assert_ordination_results_equal(
                 obs, obj, ignore_method_names=True,
                 ignore_axis_labels=True, ignore_biplot_scores_labels=True)
示例#30
0
    def test_assert_ordination_results_equal(self):
        minimal1 = OrdinationResults('foo', 'bar', pd.Series([1.0, 2.0]),
                                     pd.DataFrame([[1, 2, 3], [4, 5, 6]]))

        # a minimal set of results should be equal to itself
        assert_ordination_results_equal(minimal1, minimal1)

        # type mismatch
        with npt.assert_raises(AssertionError):
            assert_ordination_results_equal(minimal1, 'foo')

        # numeric values should be checked that they're almost equal
        almost_minimal1 = OrdinationResults(
            'foo', 'bar',
            pd.Series([1.0000001, 1.9999999]),
            pd.DataFrame([[1, 2, 3], [4, 5, 6]]))
        assert_ordination_results_equal(minimal1, almost_minimal1)

        # test each of the optional numeric attributes
        for attr in ('features', 'samples', 'biplot_scores',
                     'sample_constraints'):
            # missing optional numeric attribute in one, present in the other
            setattr(almost_minimal1, attr, pd.DataFrame([[1, 2], [3, 4]]))
            with npt.assert_raises(AssertionError):
                assert_ordination_results_equal(minimal1, almost_minimal1)
            setattr(almost_minimal1, attr, None)

            # optional numeric attributes present in both, but not almost equal
            setattr(minimal1, attr, pd.DataFrame([[1, 2], [3, 4]]))
            setattr(almost_minimal1, attr, pd.DataFrame([[1, 2],
                                                         [3.00002, 4]]))
            with npt.assert_raises(AssertionError):
                assert_ordination_results_equal(minimal1, almost_minimal1)
            setattr(minimal1, attr, None)
            setattr(almost_minimal1, attr, None)

            # optional numeric attributes present in both, and almost equal
            setattr(minimal1, attr, pd.DataFrame([[1.0, 2.0], [3.0, 4.0]]))
            setattr(almost_minimal1, attr,
                    pd.DataFrame([[1.0, 2.0], [3.00000002, 4]]))
            assert_ordination_results_equal(minimal1, almost_minimal1)
            setattr(minimal1, attr, None)
            setattr(almost_minimal1, attr, None)

        # missing optional numeric attribute in one, present in the other
        almost_minimal1.proportion_explained = pd.Series([1, 2, 3])
        with npt.assert_raises(AssertionError):
            assert_ordination_results_equal(minimal1, almost_minimal1)
        almost_minimal1.proportion_explained = None

        # optional numeric attributes present in both, but not almost equal
        minimal1.proportion_explained = pd.Series([1, 2, 3])
        almost_minimal1.proportion_explained = pd.Series([1, 2, 3.00002])
        with npt.assert_raises(AssertionError):
            assert_ordination_results_equal(minimal1, almost_minimal1)
        almost_minimal1.proportion_explained = None
        almost_minimal1.proportion_explained = None

        # optional numeric attributes present in both, and almost equal
        minimal1.proportion_explained = pd.Series([1, 2, 3])
        almost_minimal1.proportion_explained = pd.Series([1, 2, 3.00000002])
        assert_ordination_results_equal(minimal1, almost_minimal1)
        almost_minimal1.proportion_explained = None
        almost_minimal1.proportion_explained = None