def test_scaling2(self):

        scores = rda(self.Y, self.X, scaling=2)
        mat = np.loadtxt(get_data_path('example2_biplot_scaling2'))
        cropped_pc_ids = self.pc_ids[:mat.shape[1]]
        biplot_scores = pd.DataFrame(mat,
                                     index=self.env_ids,
                                     columns=cropped_pc_ids)

        sample_constraints = pd.DataFrame(np.loadtxt(
            get_data_path('example2_sample_constraints_scaling2')))

        # Load data as computed with vegan 2.0-8
        vegan_features = pd.DataFrame(
            np.loadtxt(get_data_path(
                'example2_species_scaling2_from_vegan')),
            index=self.feature_ids,
            columns=self.pc_ids)

        vegan_samples = pd.DataFrame(
            np.loadtxt(get_data_path(
                'example2_site_scaling2_from_vegan')),
            index=self.sample_ids,
            columns=self.pc_ids)

        sample_constraints = pd.DataFrame(
            np.loadtxt(get_data_path(
                'example2_sample_constraints_scaling2')),
            index=self.sample_ids,
            columns=self.pc_ids)

        mat = np.loadtxt(get_data_path(
            'example2_biplot_scaling2'))
        cropped_pc_ids = self.pc_ids[:mat.shape[1]]
        biplot_scores = pd.DataFrame(mat,
                                     index=self.env_ids,
                                     columns=cropped_pc_ids)

        proportion_explained = pd.Series([0.44275783, 0.25614586,
                                          0.15280354, 0.10497021,
                                          0.02873375, 0.00987052,
                                          0.00471828],
                                         index=self.pc_ids)

        eigvals = pd.Series([25.897954, 14.982578, 8.937841, 6.139956,
                             1.680705, 0.577350, 0.275984],
                            index=self.pc_ids)

        exp = OrdinationResults(
            'RDA', 'Redundancy Analysis',
            samples=vegan_samples,
            features=vegan_features,
            sample_constraints=sample_constraints,
            biplot_scores=biplot_scores,
            proportion_explained=proportion_explained,
            eigvals=eigvals)

        assert_ordination_results_equal(scores, exp,
                                        ignore_directionality=True,
                                        decimal=6)
def do_analysis(df, n_components=-1):
    Y_cols = ["slump", "flow", "compressive_strength"]
    X_cols = [
        "cement", "slag", "fly_ash", "water", "superplasticizer",
        "coarse_aggregate", "fine_aggregate"
    ]
    Y = df[Y_cols]
    X = df[X_cols]

    if n_components == -1:
        r2s = []
        mses = []
        rpds = []
        xticks = np.arange(1, X.shape[1] + 1)
        for n_comp in xticks:
            y_cv, r2, mse, rpd = optimise_cca_cv(X, Y, n_comp)
            r2s.append(r2)
            mses.append(mse)
            rpds.append(rpd)
        plot_metrics(mses, 'MSE', 'min', xticks)
        plot_metrics(r2s, 'R2', 'max', xticks)
        #plot_metrics(rpds, 'RPD', 'max', xticks)
        n_components = np.argmin(mses) + 1

    cca = CCA(n_components=n_components, scale=True)
    cca.fit(X, Y)
    loadings = pd.DataFrame(cca.x_loadings_)
    scores = pd.DataFrame(cca.x_scores_)
    X_rows_dict = {i: X_cols[i] for i in range(0, len(X_cols))}
    X_cols_dict = {i: 'LV' + str(i + 1) for i in range(0, n_components)}
    loadings.rename(index=X_rows_dict, columns=X_cols_dict, inplace=True)
    print(loadings)

    rda_res = rda(Y, X, scale_Y=True)
    print(rda_res)
    print(rda_res.proportion_explained)
    def test_biplot_score(self):

        rda_ = rda(y=self.Y, x=self.X, scale_Y=False, scaling=1)

        # Load data as computed with vegan 2.4-3:
        # library(vegan)
        # data(varechem)
        # data(varespec)
        # rda_ = rda(X=varespec, Y=varechem, scale=FALSE)
        # write.table(summary(rda_, scaling=1)$biplot,
        #             'vare_rda_biplot_from_vegan.csv', sep=',')
        # write.table(summary(rda_, scaling=1)$sites,
        #                     'vare_rda_sites_from_vegan.csv', sep=',')
        # write.table(summary(rda_, scaling=1)$species,
        #                     'vare_rda_species_from_vegan.csv', sep=',')
        # write.table(summary(rda_, scaling=1)$constraints, #
        #                     'vare_rda_constraints_from_vegan.csv', sep=',')
        # write.table(summary(rda_, scaling=1)$cont$importance[2, ],
        #                     'vare_rda_propexpl_from_vegan.csv', sep=',')
        # write.table(summary(rda_, scaling=1)$cont$importance[1, ],
        #                     'vare_rda_eigvals_from_vegan.csv', sep=',')

        vegan_features = pd.read_csv(
            get_data_path('vare_rda_species_from_vegan.csv'))
        vegan_samples = pd.read_csv(
            get_data_path('vare_rda_sites_from_vegan.csv'))
        vegan_biplot = pd.read_csv(
            get_data_path('vare_rda_biplot_from_vegan.csv'))
        vegan_constraints = pd.read_csv(
            get_data_path('vare_rda_constraints_from_vegan.csv'))
        vegan_propexpl = pd.read_csv(
            get_data_path('vare_rda_propexpl_from_vegan.csv'))
        vegan_propexpl = pd.Series(vegan_propexpl.x.values,
                                   index=rda_.eigvals.index)
        vegan_eigvals = pd.read_csv(
            get_data_path('vare_rda_eigvals_from_vegan.csv'))
        vegan_eigvals = pd.Series(vegan_eigvals.x.values,
                                  index=rda_.eigvals.index)

        # scikit-bio returns singular values, whereas vegan returns eigenvalues
        vegan_eigvals = np.sqrt(vegan_eigvals * vegan_eigvals.shape[0])
        vegan_propexpl = vegan_eigvals / vegan_eigvals.sum()

        # transform the output of rda_ to match column selection of vegan
        res_samples = rda_.samples.iloc[:, 0:6]
        res_features = rda_.features.iloc[:, 0:6]

        rda_ = OrdinationResults(
            'RDA',
            'Redundancy Analysis',
            samples=res_samples,
            features=res_features,
            sample_constraints=rda_.sample_constraints.iloc[:, 0:6],
            biplot_scores=rda_.biplot_scores.iloc[:, 0:6],
            proportion_explained=rda_.proportion_explained,
            eigvals=rda_.eigvals)

        exp = OrdinationResults('RDA',
                                'Redundancy Analysis',
                                samples=vegan_samples,
                                features=vegan_features,
                                sample_constraints=vegan_constraints,
                                biplot_scores=vegan_biplot,
                                proportion_explained=vegan_propexpl,
                                eigvals=vegan_eigvals)

        # This scaling constant is required to make skbio comparable to vegan.
        scaling = (rda_.eigvals[0] / rda_.eigvals[:6])
        exp.biplot_scores *= scaling
        assert_ordination_results_equal(rda_,
                                        exp,
                                        ignore_directionality=False,
                                        decimal=6)
    def test_biplot_score(self):

        rda_ = rda(y=self.Y, x=self.X, scale_Y=False, scaling=1)

        # Load data as computed with vegan 2.4-3:
        # library(vegan)
        # data(varechem)
        # data(varespec)
        # rda_ = rda(X=varespec, Y=varechem, scale=FALSE)
        # write.table(summary(rda_, scaling=1)$biplot,
        #             'vare_rda_biplot_from_vegan.csv', sep=',')
        # write.table(summary(rda_, scaling=1)$sites,
        #                     'vare_rda_sites_from_vegan.csv', sep=',')
        # write.table(summary(rda_, scaling=1)$species,
        #                     'vare_rda_species_from_vegan.csv', sep=',')
        # write.table(summary(rda_, scaling=1)$constraints, #
        #                     'vare_rda_constraints_from_vegan.csv', sep=',')
        # write.table(summary(rda_, scaling=1)$cont$importance[2, ],
        #                     'vare_rda_propexpl_from_vegan.csv', sep=',')
        # write.table(summary(rda_, scaling=1)$cont$importance[1, ],
        #                     'vare_rda_eigvals_from_vegan.csv', sep=',')

        vegan_features = pd.read_csv(
            get_data_path('vare_rda_species_from_vegan.csv'))
        vegan_samples = pd.read_csv(
            get_data_path('vare_rda_sites_from_vegan.csv'))
        vegan_biplot = pd.read_csv(
            get_data_path('vare_rda_biplot_from_vegan.csv'))
        vegan_constraints = pd.read_csv(
            get_data_path('vare_rda_constraints_from_vegan.csv'))
        vegan_propexpl = pd.read_csv(
            get_data_path('vare_rda_propexpl_from_vegan.csv'))
        vegan_propexpl = pd.Series(
            vegan_propexpl.x.values, index=rda_.eigvals.index)
        vegan_eigvals = pd.read_csv(
            get_data_path('vare_rda_eigvals_from_vegan.csv'))
        vegan_eigvals = pd.Series(
            vegan_eigvals.x.values, index=rda_.eigvals.index)

        # scikit-bio returns singular values, whereas vegan returns eigenvalues
        vegan_eigvals = np.sqrt(vegan_eigvals*vegan_eigvals.shape[0])
        vegan_propexpl = vegan_eigvals/vegan_eigvals.sum()

        # transform the output of rda_ to match column selection of vegan
        res_samples = rda_.samples.iloc[:, 0:6]
        res_features = rda_.features.iloc[:, 0:6]

        rda_ = OrdinationResults(
            'RDA', 'Redundancy Analysis',
            samples=res_samples,
            features=res_features,
            sample_constraints=rda_.sample_constraints.iloc[:, 0:6],
            biplot_scores=rda_.biplot_scores.iloc[:, 0:6],
            proportion_explained=rda_.proportion_explained,
            eigvals=rda_.eigvals)

        exp = OrdinationResults(
            'RDA', 'Redundancy Analysis',
            samples=vegan_samples,
            features=vegan_features,
            sample_constraints=vegan_constraints,
            biplot_scores=vegan_biplot,
            proportion_explained=vegan_propexpl,
            eigvals=vegan_eigvals)

        pdt.assert_frame_equal(res_samples, vegan_samples)
        # This scaling constant is required to make skbio comparable to vegan.
        scaling = (rda_.eigvals[0] / rda_.eigvals[:6])
        exp.biplot_scores *= scaling
        assert_ordination_results_equal(
            rda_, exp,
            ignore_directionality=False,
            decimal=6)