示例#1
0
    def perform(self):
        # Adapted from https://github.com/JonathanTay/CS-7641-assignment-3/blob/master/benchmark.py
        self.log("Performing {}".format(self.experiment_name()))

        # %% benchmarking for chart type 2
        grid = {'NN__alpha': self._nn_reg, 'NN__hidden_layer_sizes': self._nn_arch}
        mlp = MLPClassifier(activation='relu', max_iter=2000, early_stopping=True, random_state=self._details.seed)
        pipe = Pipeline([('NN', mlp)], memory=experiments.pipeline_memory)
        gs, _ = self.gs_with_best_estimator(pipe, grid)
        self.log("Grid search complete")

        tmp = pd.DataFrame(gs.cv_results_)
        tmp.to_csv(self._out.format('{}_nn_bmk.csv'.format(self._details.ds_name)))
        self.log("Done")

        # benchmark based on best params from assignment 1
        mlp = MLPClassifier(activation='relu', max_iter=2000, early_stopping=True, random_state=self._details.seed)
        pipe = Pipeline([('NN', mlp)], memory=experiments.pipeline_memory)
        gs, _ = self.gs_with_best_estimator(pipe, self._details.best_nn_params, type='ass1')

        tmp = pd.DataFrame(gs.cv_results_)
        tmp.to_csv(self._out.format('{}_ass1_nn_bmk.csv'.format(self._details.ds_name)))

        # Run clustering as a subexperiment
        self.log("Running clustering sub-experiment")
        experiments.run_subexperiment(self, self._out.format('clustering/'))
示例#2
0
    def perform_cluster(self, dim_param):
        self.log('Running clustering for {} with dim param {}'.format(
            self.experiment_name(), dim_param))
        # TODO: USE UNSUPERVISED METHOD TO GET THIS BEST VALUE
        # %% Data for 3
        # Set this from chart 2 and dump, use clustering script to finish up
        pca = PCA(n_components=dim_param, random_state=self._details.seed)

        # ANN based on best params from assignment 1
        mlp = MLPClassifier(activation='relu',
                            max_iter=2000,
                            early_stopping=True,
                            random_state=self._details.seed)
        pipe = Pipeline([('pca', pca), ('NN', mlp)],
                        memory=experiments.pipeline_memory)
        gs, _ = self.gs_with_best_estimator(pipe,
                                            self._details.best_nn_params,
                                            type='ass1')

        tmp = pd.DataFrame(gs.cv_results_)
        tmp.to_csv(
            self._out.format('{}_ass1_dim_red.csv'.format(
                self._details.ds_name)))

        hdf_path = self.dump_for_clustering(
            lambda x: pca.fit_transform(x.get_details().ds.training_x))

        # Run clustering as a subexperiment
        self.log("Running clustering sub-experiment")
        updated_ds = self._details.ds.reload_from_hdf(
            hdf_path=hdf_path,
            hdf_ds_name=self._details.ds_name,
            preprocess=False)
        experiments.run_subexperiment(self, self._out.format('clustering/'),
                                      updated_ds)
示例#3
0
    def perform_cluster(self, dim_param):
        self.log('Running clustering for {} with dim param {}'.format(self.experiment_name(), dim_param))

        # TODO: USE UNSUPERVISED METHOD TO GET THIS BEST VALUE
        # %% Data for 3
        # Set this from chart 2 and dump, use clustering script to finish up
        rfc = RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=self._details.seed,
                                     n_jobs=self._details.threads)
        rfc.fit(self._details.ds.training_x, self._details.ds.training_y)
        filtr = ImportanceSelect(rfc, dim_param)

        # ANN based on best params from assignment 1
        mlp = MLPClassifier(activation='relu', max_iter=2000, early_stopping=True, random_state=self._details.seed)
        pipe = Pipeline([('filter', filtr), ('NN', mlp)], memory=experiments.pipeline_memory)
        gs, _ = self.gs_with_best_estimator(pipe, experiments.BEST_NN_PARAMS, type='ass1')

        tmp = pd.DataFrame(gs.cv_results_)
        tmp.to_csv(self._out.format('{}_ass1_dim_red.csv'.format(self._details.ds_name)))

        hdf_path = self.dump_for_clustering(lambda x: filtr.fit_transform(x.get_details().ds.training_x,
                                                                          x.get_details().ds.training_y))

        # Run clustering as a subexperiment
        self.log("Running clustering sub-experiment")
        updated_ds = self._details.ds.reload_from_hdf(hdf_path=hdf_path, hdf_ds_name=self._details.ds_name,
                                                      preprocess=False)
        experiments.run_subexperiment(self, self._out.format('clustering/'), updated_ds)
示例#4
0
文件: ICA.py 项目: yifanguo247/CS7641
    def perform_cluster(self, dim_param):
        self.log('Running clustering for {} with dim param {}'.format(
            self.experiment_name(), dim_param))
        # TODO: USE UNSUPERVISED METHOD TO GET THIS BEST VALUE
        # %% Data for 3
        # Set this from chart 2 and dump, use clustering script to finish up
        ica = FastICA(n_components=dim_param,
                      random_state=self._details.seed)  # user requested dim

        # ANN based on best params from assignment 1
        mlp = MLPClassifier(activation='relu',
                            max_iter=2000,
                            early_stopping=True,
                            random_state=self._details.seed)
        pipe = Pipeline([('ica', ica), ('NN', mlp)],
                        memory=experiments.pipeline_memory)
        gs, _ = self.gs_with_best_estimator(
            pipe, experiments.BEST_NN_PARAMS, type='ass1'
        )  # perform experiments and write best estimator params to file

        tmp = pd.DataFrame(gs.cv_results_)
        tmp.to_csv(
            self._out.format('{}_ass1_dim_red.csv'.format(
                self._details.ds_name))
        )  # write assignment 1 best NN results --> ./output/ICA/bank_ass1_ICA.csv (misleading name. should be ICA)

        hdf_path = self.dump_for_clustering(
            lambda x: ica.fit_transform(x.get_details().ds.training_x)
        )  # write ICA transformed features + class to file, returns path to that file

        # Run clustering as a subexperiment
        self.log("Running clustering sub-experiment")
        updated_ds = self._details.ds.reload_from_hdf(
            hdf_path=hdf_path,
            hdf_ds_name=self._details.ds_name,
            preprocess=False)  # new BankData object
        experiments.run_subexperiment(
            self, self._out.format('clustering/'),
            updated_ds)  # --> './output/ICA/clustering/'
示例#5
0
 def perform_cluster(self, dim_param):
     experiments.run_subexperiment(self, self._out.format('clustering/'))