def test_fs_permute(cvs, X_test1, y_test1, cluster_dir):

    logger = logging.getLogger('log_rbf_cnn_test.log')
    logger.setLevel(logging.INFO)
    handler = logging.FileHandler(
        os.path.join(cluster_dir, 'log_rbf_cnn_test.log'), 'a')
    handler.setLevel(logging.INFO)

    # create a logging format
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    handler.setFormatter(formatter)

    # add the handlers to the logger
    logger.addHandler(handler)

    rated = None

    static_data = write_database()

    logger.info('Permutation Evaluation')
    logger.info('/n')
    method = 'svm'
    model_sklearn = sklearn_model(cluster_dir, rated, method,
                                  static_data['sklearn']['njobs'])
    model_sklearn.train(cvs)
    pred = model_sklearn.predict(X_test1)

    metrics_svm = model_sklearn.compute_metrics(pred, y_test1, rated)
    logger.info('before feature selection metrics')
    logger.info('sse, %s rms %s, mae %s, mse %s', *metrics_svm)

    fs = FS(cluster_dir, static_data['sklearn']['njobs'])
    features = fs.fit(cvs)
    logger.info('Number of variables %s', str(features.shape[0]))

    for i in range(3):
        cvs[i][0] = cvs[i][0][:, features]
        cvs[i][2] = cvs[i][2][:, features]
        cvs[i][4] = cvs[i][4][:, features]

    model_sklearn = sklearn_model(cluster_dir, rated, method,
                                  static_data['sklearn']['njobs'])
    model_sklearn.train(cvs)
    pred = model_sklearn.predict(X_test1[:, features])

    metrics_svm = model_sklearn.compute_metrics(pred, y_test1, rated)
    logger.info('After feature selection metrics')
    logger.info('sse, %s rms %s, mae %s, mse %s', *metrics_svm)
示例#2
0
    def predict(self, X):
        X_pred = np.array([])
        if not hasattr(self, 'best_methods'):
            self.best_methods = X.keys()
        for method in sorted(self.best_methods):
            if X_pred.shape[0] == 0:
                X_pred = X[method]
            else:
                X_pred = np.hstack((X_pred, X[method]))
        X_pred /= 20
        if not hasattr(self, 'model'):
            raise ValueError('The combine models does not exist')
        pred_combine = dict()
        for combine_method in self.combine_methods:
            if combine_method == 'rls':
                pred = np.matmul(self.model[combine_method]['w'], X_pred)
            elif combine_method == 'bcp':
                pred = np.matmul(self.model[combine_method]['w'], X_pred)

            elif combine_method == 'mlp':
                self.model[combine_method] = sklearn_model(
                    self.model_dir, self.rated, 'mlp', self.n_jobs)
                pred = self.model[combine_method].predict(X_pred)

            elif combine_method == 'bayesian_ridge':
                self.model[combine_method] = BayesianRidge(fit_intercept=False)
                pred = self.model[combine_method].predict(X_pred)

            elif combine_method == 'elastic_net':
                self.model[combine_method] = ElasticNetCV(cv=5,
                                                          fit_intercept=False)
                pred = self.model[combine_method].predict(X_pred)
            elif combine_method == 'ridge':
                self.model[combine_method] = RidgeCV(cv=5, fit_intercept=False)
                pred = self.model[combine_method].predict(X_pred)

            elif combine_method == 'isotonic':
                self.model[combine_method] = IsotonicRegression(y_min=0,
                                                                y_max=1)
                pred = self.model[combine_method].predict(X_pred)
            else:
                pred = np.mean(X_pred, axis=1).reshape(-1, 1)

            pred_combine[combine_method] = 20 * pred

        return pred_combine
示例#3
0
    def train(self, X_test, y_test, act_test, X_cnn_test, X_lstm_test):
        if X_test.shape[0] > 0 and len(self.methods) > 1:
            if self.model_type in {'pv', 'wind'}:
                if self.resampling == True:
                    pred_resample, y_resample, results = self.resampling_for_combine(
                        X_test, y_test, act_test, X_cnn_test, X_lstm_test)
                else:
                    pred_resample, y_resample, results = self.without_resampling(
                        X_test, y_test, act_test, X_cnn_test, X_lstm_test)
            elif self.model_type in {'load'}:
                if self.resampling == True:
                    pred_resample, y_resample, results = self.resampling_for_combine(
                        X_test, y_test, act_test, X_cnn_test, X_lstm_test)
                else:
                    pred_resample, y_resample, results = self.without_resampling(
                        X_test, y_test, act_test, X_cnn_test, X_lstm_test)
            elif self.model_type in {'fa'}:
                if self.resampling == True:
                    pred_resample, y_resample, results = self.resampling_for_combine(
                        X_test, y_test, act_test, X_cnn_test, X_lstm_test)
                else:
                    pred_resample, y_resample, results = self.without_resampling(
                        X_test, y_test, act_test, X_cnn_test, X_lstm_test)

            self.best_methods = results.nsmallest(4, 'mae').index.tolist()
            results = results.loc[self.best_methods]
            results['diff'] = results['mae'] - results['mae'].iloc[0]
            best_of_best = results.iloc[np.where(
                results['diff'] <= 0.01)].index.tolist()
            if len(best_of_best) == 1:
                best_of_best.append(self.best_methods[1])
            self.best_methods = best_of_best
            X_pred = np.array([])
            for method in sorted(self.best_methods):
                if X_pred.shape[0] == 0:
                    X_pred = pred_resample[method]
                else:
                    X_pred = np.hstack((X_pred, pred_resample[method]))
            X_pred /= 20
            X_pred[np.where(X_pred < 0)] = 0
            y_resample /= 20
            X_pred, y_resample = shuffle(X_pred, y_resample)
            self.weight_size = len(self.best_methods)
            self.model = dict()
            for combine_method in self.combine_methods:
                if combine_method == 'rls':
                    self.logger.info('RLS training')
                    self.logger.info('/n')
                    self.model[combine_method] = dict()
                    w = self.rls_fit(X_pred, y_resample)

                    self.model[combine_method]['w'] = w

                elif combine_method == 'bcp':
                    self.logger.info('BCP training')
                    self.logger.info('/n')
                    self.model[combine_method] = dict()
                    w = self.bcp_fit(X_pred, y_resample)
                    self.model[combine_method]['w'] = w

                elif combine_method == 'mlp':
                    self.logger.info('MLP training')
                    self.logger.info('/n')
                    cvs = []
                    for _ in range(3):
                        X_train, X_test1, y_train, y_test1 = train_test_split(
                            X_pred, y_resample, test_size=0.15)
                        X_train, X_val, y_train, y_val = train_test_split(
                            X_train, y_train, test_size=0.15)
                        cvs.append(
                            [X_train, y_train, X_val, y_val, X_test1, y_test1])
                    mlp_model = sklearn_model(self.model_dir,
                                              self.rated,
                                              'mlp',
                                              self.n_jobs,
                                              is_combine=True)
                    self.model[combine_method] = mlp_model.train(cvs)

                elif combine_method == 'bayesian_ridge':
                    self.logger.info('bayesian_ridge training')
                    self.logger.info('/n')
                    self.model[combine_method] = BayesianRidge()
                    self.model[combine_method].fit(X_pred, y_resample)

                elif combine_method == 'elastic_net':
                    self.logger.info('elastic_net training')
                    self.logger.info('/n')
                    self.model[combine_method] = ElasticNetCV(cv=5)
                    self.model[combine_method].fit(X_pred, y_resample)
                elif combine_method == 'ridge':
                    self.logger.info('ridge training')
                    self.logger.info('/n')
                    self.model[combine_method] = RidgeCV(cv=5)
                    self.model[combine_method].fit(X_pred, y_resample)
            self.logger.info('End of combine models training')
        else:
            self.combine_methods = ['average']
        self.istrained = True
        self.save(self.model_dir)
    def fit_model(self,
                  cvs,
                  method,
                  static_data,
                  cluster_dir,
                  optimize_method,
                  X_cnn=np.array([]),
                  X_lstm=np.array([]),
                  y=np.array([]),
                  rated=1):
        # deap, optuna, skopt, grid_search
        if optimize_method == 'deap':
            from Fuzzy_clustering.ver_tf2.Sklearn_models_deap import sklearn_model
        elif optimize_method == 'optuna':
            from Fuzzy_clustering.ver_tf2.Sklearn_models_optuna import sklearn_model
        elif optimize_method == 'skopt':
            from Fuzzy_clustering.ver_tf2.Sklearn_models_skopt import sklearn_model
        else:
            from Fuzzy_clustering.ver_tf2.SKlearn_models import sklearn_model
        # if (datetime.now().hour>=8 and datetime.now().hour<10):
        #     time.sleep(2*60*60)
        if method == 'ML_RBF_ALL':
            model_rbf = rbf_model(static_data['RBF'], rated, cluster_dir)
            model_rbf_ols = rbf_ols_module(cluster_dir,
                                           rated,
                                           static_data['sklearn']['njobs'],
                                           GA=False)
            model_rbf_ga = rbf_ols_module(cluster_dir,
                                          rated,
                                          static_data['sklearn']['njobs'],
                                          GA=True)

            if model_rbf_ols.istrained == False or static_data[
                    'train_online'] == True:
                self.logger.info('Start of training of model_rbf_ols')
                self.models['RBF_OLS'] = model_rbf_ols.optimize_rbf(cvs)
            else:
                self.models['RBF_OLS'] = model_rbf_ols.to_dict()
            if model_rbf_ga.istrained == False or static_data[
                    'train_online'] == True:
                self.logger.info('Start of training of model_rbf_ga')
                self.models['GA_RBF_OLS'] = model_rbf_ga.optimize_rbf(cvs)
            else:
                self.models['GA_RBF_OLS'] = model_rbf_ga.to_dict()
            if model_rbf.istrained == False or static_data[
                    'train_online'] == True:
                self.logger.info('Start of training of model_rbf_adam')
                self.models['RBFNN'] = model_rbf.rbf_train(cvs)
            else:
                self.models['RBFNN'] = model_rbf.to_dict()

        elif method == 'ML_RBF_ALL_CNN':
            model_rbf = rbf_model(static_data['RBF'], rated, cluster_dir)
            model_rbf_ols = rbf_ols_module(cluster_dir,
                                           rated,
                                           static_data['sklearn']['njobs'],
                                           GA=False)
            model_rbf_ga = rbf_ols_module(cluster_dir,
                                          rated,
                                          static_data['sklearn']['njobs'],
                                          GA=True)

            if model_rbf_ols.istrained == False or static_data[
                    'train_online'] == True:
                self.logger.info('Start of training of model_rbf_ols')
                self.models['RBF_OLS'] = model_rbf_ols.optimize_rbf(cvs)
            else:
                self.models['RBF_OLS'] = model_rbf_ols.to_dict()
            if model_rbf_ga.istrained == False or static_data[
                    'train_online'] == True:
                self.logger.info('Start of training of model_rbf_ga')
                self.models['GA_RBF_OLS'] = model_rbf_ga.optimize_rbf(cvs)
            else:
                self.models['GA_RBF_OLS'] = model_rbf_ga.to_dict()
            if model_rbf.istrained == False or static_data[
                    'train_online'] == True:
                self.logger.info('Start of training of model_rbf_adam')
                self.models['RBFNN'] = model_rbf.rbf_train(cvs)
            else:
                self.models['RBFNN'] = model_rbf.to_dict()

            rbf_dir = [
                model_rbf_ols.cluster_dir, model_rbf_ga.cluster_dir,
                model_rbf.cluster_dir
            ]

            model_cnn = cnn_model(static_data, rated, cluster_dir, rbf_dir)
            if model_cnn.istrained == False or static_data[
                    'train_online'] == True:
                self.logger.info('Start of training of model_cnn')
                self.models['RBF-CNN'] = model_cnn.train_cnn(cvs)
            else:
                self.models['RBF-CNN'] = model_cnn.to_dict()

        elif method == 'ML_NUSVM':
            method = method.replace('ML_', '')
            model_sklearn = sklearn_model(cluster_dir, rated, method,
                                          static_data['sklearn']['njobs'])
            if model_sklearn.istrained == False or static_data[
                    'train_online'] == True:
                self.logger.info('Start of training of NUSVM')
                self.models['NUSVM'] = model_sklearn.train(cvs)
            else:
                self.models['NUSVM'] = model_sklearn.to_dict()
        elif method == 'ML_MLP':
            method = method.replace('ML_', '')
            model_sklearn = sklearn_model(cluster_dir, rated, method,
                                          static_data['sklearn']['njobs'])
            if model_sklearn.istrained == False or static_data[
                    'train_online'] == True:
                self.logger.info('Start of training of MLP')
                self.models['MLP'] = model_sklearn.train(cvs)
            else:
                self.models['MLP'] = model_sklearn.to_dict()
        elif method == 'ML_SVM':
            method = method.replace('ML_', '')
            model_sklearn = sklearn_model(cluster_dir, rated, method,
                                          static_data['sklearn']['njobs'])
            if model_sklearn.istrained == False or static_data[
                    'train_online'] == True:
                self.logger.info('Start of training of SVM')
                self.models['SVM'] = model_sklearn.train(cvs)
            else:
                self.models['SVM'] = model_sklearn.to_dict()
        elif method == 'ML_RF':
            method = method.replace('ML_', '')
            model_sklearn = sklearn_model(cluster_dir, rated, method,
                                          static_data['sklearn']['njobs'])
            if model_sklearn.istrained == False or static_data[
                    'train_online'] == True:
                self.logger.info('Start of training of RF')
                self.models['RF'] = model_sklearn.train(cvs)
            else:
                self.models['RF'] = model_sklearn.to_dict()
        elif method == 'ML_XGB':
            method = method.replace('ML_', '')
            model_sklearn = sklearn_model(cluster_dir, rated, method,
                                          static_data['sklearn']['njobs'])
            if model_sklearn.istrained == False or static_data[
                    'train_online'] == True:
                self.logger.info('Start of training of XGB')
                self.models['XGB'] = model_sklearn.train(cvs)
            else:
                self.models['XGB'] = model_sklearn.to_dict()
        elif method == 'ML_CNN_3d':
            cnn_model_3d = cnn_3d_model(static_data, rated, cluster_dir)
            if cnn_model_3d.istrained == False or static_data[
                    'train_online'] == True:
                self.logger.info('Start of training of CNN_3d')
                self.models['CNN_3d'] = cnn_model_3d.train_cnn(X_cnn, y)
            else:
                self.models['CNN_3d'] = cnn_model_3d.to_dict()
        elif method == 'ML_LSTM_3d':
            lstm_model_3d = lstm_3d_model(static_data, rated, cluster_dir)
            if lstm_model_3d.istrained == False or static_data[
                    'train_online'] == True:
                self.logger.info('Start of training of LSTM_3d')
                self.models['LSTM_3d'] = lstm_model_3d.train_lstm(X_lstm, y)
            else:
                self.models['LSTM_3d'] = lstm_model_3d.to_dict()
        self.save(self.cluster_dir)
    def fit(self, cvs):
        logger = logging.getLogger('log_fs_permutation')
        logger.setLevel(logging.INFO)
        handler = logging.FileHandler(
            os.path.join(self.log_dir, 'log_fs_perm.log'), 'w')
        handler.setLevel(logging.INFO)

        # create a logging format
        formatter = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        handler.setFormatter(formatter)

        # add the handlers to the logger
        logger.addHandler(handler)

        print()
        print('Training the model (Fitting to the training data) ')
        logger.info('Training the feature extraction ')

        method = 'svm'

        regressor = sklearn_model(self.log_dir, 1, method, self.njobs)
        regressor.train(cvs)

        # Update classifier parameters
        estimator = regressor.model

        features = np.arange(cvs[0][0].shape[1])
        np.random.shuffle(features)
        # features=features[np.argsort(estimator.feature_importances_)]

        acc_test = regressor.acc_test

        cv_result = regressor.cv_results.nlargest(10,
                                                  'acc')['params'].to_list()
        flag = True

        cvs_temp = copy.deepcopy(cvs)

        remove_features = []
        keep_features = []
        unchecked = np.copy(features)
        while flag:
            for f in unchecked:
                features_temp = np.hstack(
                    (np.array(keep_features),
                     np.delete(unchecked,
                               np.where(unchecked == f)))).astype('int')
                reg_temp = sklearn_model(os.path.join(self.log_dir, 'temp'), 1,
                                         method, self.njobs)
                for i in range(3):
                    cvs_temp[i][0] = copy.deepcopy(cvs[i][0][:, features_temp])
                    cvs_temp[i][2] = copy.deepcopy(cvs[i][2][:, features_temp])
                    cvs_temp[i][4] = copy.deepcopy(cvs[i][4][:, features_temp])
                reg_temp.train(cvs_temp)

                cv_result = reg_temp.cv_results.nlargest(
                    5, 'acc')['params'].to_list()
                if reg_temp.acc_test < acc_test:
                    logger.info('Remove feature %s accuracy: %s', str(f),
                                str(reg_temp.acc_test))
                    remove_features.append(f)
                    unchecked = np.delete(unchecked, np.where(unchecked == f))
                    acc_test = reg_temp.acc_test
                    break
                else:
                    logger.info('ADD feature %s accuracy: %s', str(f),
                                str(reg_temp.acc_test))
                    keep_features.append(f)
                    unchecked = np.delete(unchecked, np.where(unchecked == f))

            if unchecked.shape[0] == 0:
                flag = False
            else:
                np.random.shuffle(unchecked)

        features = np.array(keep_features)
        self.features = features

        logger.info('Number of variables %s', str(self.features.shape[0]))
        logger.info('Finish the feature extraction ')
        return features
示例#6
0
    def fit(self, cvs):
        logger = logging.getLogger('log_fs_boruta.log')
        logger.setLevel(logging.INFO)
        handler = logging.FileHandler(
            os.path.join(self.log_dir, 'log_fs_boruta.log'), 'w')
        handler.setLevel(logging.INFO)

        # create a logging format
        formatter = logging.Formatter(
            '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        handler.setFormatter(formatter)

        # add the handlers to the logger
        logger.addHandler(handler)

        print()
        print('Training the model (Fitting to the training data) ')
        logger.info('Training the feature extraction ')
        X = np.vstack((cvs[0][0], cvs[0][2], cvs[0][4]))

        if len(cvs[0][1].shape) == 1 and len(cvs[0][5].shape) == 1:
            y = np.hstack((cvs[0][1], cvs[0][3], cvs[0][5]))
        else:
            y = np.vstack((cvs[0][1], cvs[0][3], cvs[0][5])).ravel()
        self.D, self.N = X.shape

        regressor = sklearn_model(self.log_dir, 1, 'rf', self.njobs)
        if regressor.istrained == False:
            regressor.train(cvs)

        # Update classifier parameters
        estimator = regressor.model
        estimator.set_params(n_jobs=-1)
        self.init_params = [regressor.best_params]
        # Define steps
        step1 = {'Constant Features': {'frac_constant_values': 0.999}}

        step2 = {'Correlated Features': {'correlation_threshold': 0.999}}

        step3 = {
            'Relevant Features': {
                'cv': 3,
                'estimator': estimator,
                'n_estimators': 500,
                'max_iter': 20,
                'verbose': 0,
                'random_state': 42
            }
        }

        step4 = {
            'RFECV Features': {
                'cv': 3,
                'estimator': estimator,
                'step': 1,
                'scoring': 'neg_root_mean_squared_error',
                'verbose': 50
            }
        }

        # Place steps in a list in the order you want them execute it
        steps = [step1, step2, step3]
        columns = ['other_' + str(i) for i in range(X.shape[1])]
        X_df = pd.DataFrame(X, columns=columns)
        # Initialize FeatureSelector()
        fs = FeatureSelector()

        # Apply feature selection methods in the order they appear in steps
        fs.fit(X_df, y.ravel(), steps)
        features = [
            i for i in range(len(X_df.columns))
            if X_df.columns[i] in fs.selected_features
        ]

        # Get selected features
        self.features = np.array(features)

        # logger.info('best score %s', str(best_score))
        logger.info('Number of variables %s', str(self.features.shape[0]))
        logger.info('Finish the feature extraction ')
        return features
示例#7
0
    def train(self, lstm=False):
        if len(self.combine_methods) > 1:
            if os.path.exists(
                    os.path.join(self.data_dir,
                                 'predictions_by_method.pickle')):
                pred_cluster = joblib.load(
                    os.path.join(self.data_dir,
                                 'predictions_by_cluster.pickle'))
                predictions = joblib.load(
                    os.path.join(self.data_dir,
                                 'predictions_by_method.pickle'))
                y = pd.read_csv(os.path.join(self.data_dir, 'target_test.csv'),
                                index_col=0,
                                header=[0],
                                parse_dates=True,
                                dayfirst=True)

                self.models = dict()
                if lstm:
                    X = np.array([])
                    combine_method = 'lstm_full'

                    for clust in pred_cluster.keys():
                        x = np.array([])
                        for method in pred_cluster[clust]:
                            if method in self.methods:
                                tmp = np.zeros_like(y.values.reshape(-1, 1))
                                try:
                                    tmp[pred_cluster[clust][
                                        'index']] = pred_cluster[clust][method]
                                except:
                                    tmp[pred_cluster[clust]
                                        ['index']] = pred_cluster[clust][
                                            method].reshape(-1, 1)
                                if x.shape[0] == 0:
                                    x = tmp
                                else:
                                    x = np.hstack((x, tmp))
                        if X.shape[0] == 0:
                            X = np.copy(x)
                        elif len(X.shape) == 2:
                            X = np.stack((X, x))
                        else:
                            X = np.vstack((X, x[np.newaxis, :, :]))
                    X = np.transpose(X, [1, 0, 2]).astype('float')
                    y_pred = y.values / 20
                    self.models[combine_method] = self.lstm_fit(X,
                                                                y_pred,
                                                                full=True)

                    X = np.array([])
                    combine_method = 'lstm_combine'

                    for clust in pred_cluster.keys():
                        x = np.array([])
                        for method in pred_cluster[clust]:
                            if method in self.combine_methods:
                                tmp = np.zeros_like(y.values.reshape(-1, 1))
                                try:
                                    tmp[pred_cluster[clust][
                                        'index']] = pred_cluster[clust][method]
                                except:
                                    tmp[pred_cluster[clust]
                                        ['index']] = pred_cluster[clust][
                                            method].reshape(-1, 1)
                                if x.shape[0] == 0:
                                    x = tmp
                                else:
                                    x = np.hstack((x, tmp))
                        if X.shape[0] == 0:
                            X = np.copy(x)
                        elif len(X.shape) == 2:
                            X = np.stack((X, x))
                        else:
                            X = np.vstack((X, x[np.newaxis, :, :]))
                    X = np.transpose(X, [1, 0, 2]).astype('float')
                    y_pred = y.values / 20
                    self.models[combine_method] = self.lstm_fit(X, y_pred)

                for method in self.combine_methods:
                    pred = predictions[method].values.astype('float')
                    pred[np.where(np.isnan(pred))] = 0
                    pred /= 20
                    y_pred = y.values / 20
                    cvs = []
                    for _ in range(3):
                        X_train, X_test1, y_train, y_test1 = train_test_split(
                            pred, y_pred, test_size=0.15)
                        X_train, X_val, y_train, y_val = train_test_split(
                            X_train, y_train, test_size=0.15)
                        cvs.append(
                            [X_train, y_train, X_val, y_val, X_test1, y_test1])
                    mlp_model = sklearn_model(self.model_dir + '/' + method,
                                              self.rated, 'mlp', self.n_jobs)
                    if mlp_model.istrained == False:
                        self.models['mlp_' + method] = mlp_model.train(cvs)
                    else:
                        self.models['mlp_' + method] = mlp_model.to_dict()
                combine_method = 'bcp'
                for method in self.combine_methods:
                    self.models['bcp_' + method] = self.bcp_fit(
                        predictions[method].values.astype('float'), y.values)

            else:
                raise ValueError('Prediction of regressors missing')
        else:
            self.combine_methods = ['average']
        self.istrained = True
        self.save(self.model_dir)
        return self.to_dict()