def fit_TL(self): if self.optimize_method == 'deap': from Fuzzy_clustering.version2.sklearn_models.sklearn_models_deap import sklearn_model elif self.optimize_method == 'optuna': from Fuzzy_clustering.version2.sklearn_models.sklearn_models_optuna import sklearn_model elif self.optimize_method == 'skopt': from Fuzzy_clustering.version2.sklearn_models.sklearn_models_skopt import sklearn_model else: from Fuzzy_clustering.version2.sklearn_models.sklearn_models_grid import sklearn_model static_data_tl = self.static_data['tl_project']['static_data'] cluster_dir_tl = os.path.join(static_data_tl['path_model'], 'Regressor_layer/' + self.cluster_name) model_sklearn_TL = sklearn_model(static_data_tl, cluster_dir_tl, static_data_tl['rated'], self.method, self.njobs) if self.istrained == False: cvs = self.load_data() model_sklearn = sklearn_model(self.static_data, self.sk_models_dir, self.rated, self.method, self.njobs) if model_sklearn.istrained == False: self.models[self.method] = model_sklearn.train_TL( cvs, model_sklearn_TL.best_params) else: self.models[self.method] = model_sklearn.to_dict() self.istrained = True self.save() return 'Done'
def fit(self): if self.optimize_method == 'deap': from Fuzzy_clustering.version2.sklearn_models.sklearn_models_deap import sklearn_model elif self.optimize_method == 'optuna': from Fuzzy_clustering.version2.sklearn_models.sklearn_models_optuna import sklearn_model elif self.optimize_method == 'skopt': from Fuzzy_clustering.version2.sklearn_models.sklearn_models_skopt import sklearn_model else: from Fuzzy_clustering.version2.sklearn_models.sklearn_models_grid import sklearn_model if self.istrained == False: cvs = self.load_data() model_sklearn = sklearn_model(self.static_data, self.sk_models_dir, self.rated, self.method, self.njobs, path_group=self.path_group) if model_sklearn.istrained == False: print('Train ', self.method, ' ', self.cluster_name) self.models[self.method] = model_sklearn.train(cvs) else: self.models[self.method] = model_sklearn.to_dict() self.istrained = True self.save() return 'Done'
def train(self): if len(self.combine_methods) > 1: pred_cluster, predictions, y_pred = self.project.predict_clusters() self.combine_methods = [ method for method in self.combine_methods if method in predictions.keys() ] self.models = dict() for method in self.combine_methods: pred = predictions[method].values.astype('float') pred[np.where(np.isnan(pred))] = 0 cvs = [] for _ in range(3): X_train, X_test1, y_train, y_test1 = train_test_split( pred, y_pred.values, test_size=0.15) X_train, X_val, y_train, y_val = train_test_split( X_train, y_train, test_size=0.15) cvs.append( [X_train, y_train, X_val, y_val, X_test1, y_test1]) mlp_model = sklearn_model( self.static_data, self.model_dir + '/' + method, self.rated, 'mlp', self.n_jobs, path_group=self.static_data['path_group']) if mlp_model.istrained == False: self.models['mlp_' + method] = mlp_model.train(cvs) else: self.models['mlp_' + method] = mlp_model.to_dict() combine_method = 'bcp' for method in self.combine_methods: self.models['bcp_' + method] = self.bcp_fit( predictions[method].values.astype('float'), y_pred.values) else: self.combine_methods = ['average'] self.istrained = True self.save(self.model_dir) return 'Done'
def train(self): X_test, y_test, act_test, X_cnn_test, X_lstm_test = self.load_data() if X_test.shape[0] > 0 and len( self.methods) > 1 and self.istrained == False: if self.model_type in {'pv', 'wind'}: if self.resampling == True: pred_resample, y_resample, results = self.resampling_for_combine( X_test, y_test, act_test, X_cnn_test, X_lstm_test) else: pred_resample, y_resample, results = self.without_resampling( X_test, y_test, act_test, X_cnn_test, X_lstm_test) elif self.model_type in {'load'}: if self.resampling == True: pred_resample, y_resample, results = self.resampling_for_combine( X_test, y_test, act_test, X_cnn_test, X_lstm_test) else: pred_resample, y_resample, results = self.without_resampling( X_test, y_test, act_test, X_cnn_test, X_lstm_test) elif self.model_type in {'fa'}: if self.resampling == True: pred_resample, y_resample, results = self.resampling_for_combine( X_test, y_test, act_test, X_cnn_test, X_lstm_test) else: pred_resample, y_resample, results = self.without_resampling( X_test, y_test, act_test, X_cnn_test, X_lstm_test) self.best_methods = results.nsmallest(4, 'mae').index.tolist() results = results.loc[self.best_methods] results['diff'] = results['mae'] - results['mae'].iloc[0] best_of_best = results.iloc[np.where( results['diff'] <= 0.02)].index.tolist() if len(best_of_best) == 1: best_of_best.extend( [best_of_best[0], best_of_best[0], self.best_methods[1]]) elif len(best_of_best) == 2: best_of_best.extend([best_of_best[0], best_of_best[0]]) elif len(best_of_best) == 3: best_of_best.append(best_of_best[0]) self.best_methods = best_of_best X_pred = np.array([]) for method in sorted(self.best_methods): if X_pred.shape[0] == 0: X_pred = pred_resample[method] else: X_pred = np.hstack((X_pred, pred_resample[method])) X_pred[np.where(X_pred < 0)] = 0 X_pred, y_resample = shuffle(X_pred, y_resample) self.weight_size = len(self.best_methods) self.model = dict() for combine_method in self.combine_methods: if combine_method == 'rls': self.logger.info('RLS training') self.logger.info('/n') self.model[combine_method] = dict() w = self.rls_fit(X_pred, y_resample) self.model[combine_method]['w'] = w elif combine_method == 'bcp': self.logger.info('BCP training') self.logger.info('/n') self.model[combine_method] = dict() w = self.bcp_fit(X_pred, y_resample) self.model[combine_method]['w'] = w elif combine_method == 'mlp': self.logger.info('MLP training') self.logger.info('/n') cvs = [] for _ in range(3): X_train1, X_test1, y_train1, y_test1 = train_test_split( X_pred, y_resample, test_size=0.15) X_train, X_val, y_train, y_val = train_test_split( X_train1, y_train1, test_size=0.15) cvs.append( [X_train, y_train, X_val, y_val, X_test1, y_test1]) mlp_model = sklearn_model( self.static_data, self.model_dir, self.rated, 'mlp', self.n_jobs, is_combine=True, path_group=self.static_data['path_group']) self.model[combine_method] = mlp_model.train(cvs) elif combine_method == 'bayesian_ridge': self.logger.info('bayesian_ridge training') self.logger.info('/n') self.model[combine_method] = BayesianRidge() self.model[combine_method].fit(X_pred, y_resample) elif combine_method == 'elastic_net': self.logger.info('elastic_net training') self.logger.info('/n') self.model[combine_method] = ElasticNetCV(cv=5) self.model[combine_method].fit(X_pred, y_resample) elif combine_method == 'ridge': self.logger.info('ridge training') self.logger.info('/n') self.model[combine_method] = RidgeCV(cv=5) self.model[combine_method].fit(X_pred, y_resample) self.logger.info('End of combine models training') else: self.combine_methods = ['average'] self.istrained = True self.save(self.model_dir) return 'Done'
def fit(self, cvs): # logger = logging.getLogger('log_fs_boruta.log') # logger.setLevel(logging.INFO) # handler = logging.FileHandler(os.path.join(self.log_dir, 'log_fs_boruta.log'), 'w') # handler.setLevel(logging.INFO) # # # create a logging format # formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') # handler.setFormatter(formatter) # # # add the handlers to the logger # logger.addHandler(handler) print() print('Training the model (Fitting to the training data) ') # logger.info('Training the feature extraction ') X = np.vstack((cvs[0][0], cvs[0][2], cvs[0][4])) if len(cvs[0][1].shape) == 1 and len(cvs[0][5].shape) == 1: y = np.hstack((cvs[0][1], cvs[0][3], cvs[0][5])) else: y = np.vstack((cvs[0][1], cvs[0][3], cvs[0][5])).ravel() self.D, self.N = X.shape self.N_tot = X.shape[1] ncpus = joblib.load(os.path.join(self.path_group, 'total_cpus.pickle')) gpu_status = joblib.load( os.path.join(self.path_group, 'gpu_status.pickle')) njobs = int(ncpus - gpu_status) cpu_status = njobs joblib.dump(cpu_status, os.path.join(self.path_group, 'cpu_status.pickle')) regressor = sklearn_model(self.static_data, self.log_dir, 1, 'rf', njobs, path_group=self.path_group) if regressor.istrained == False: regressor.train(cvs, FS=True) # Update classifier parameters estimator = regressor.model estimator.set_params(n_jobs=self.njobs) self.init_params = [regressor.best_params] # Define steps step1 = {'Constant Features': {'frac_constant_values': 0.999}} step2 = {'Correlated Features': {'correlation_threshold': 0.999}} step3 = { 'Relevant Features': { 'cv': 3, 'estimator': estimator, 'n_estimators': 500, 'max_iter': 20, 'verbose': 0, 'random_state': 42 } } step4 = { 'RFECV Features': { 'cv': 3, 'estimator': estimator, 'step': 1, 'scoring': 'neg_root_mean_squared_error', 'verbose': 50 } } # Place steps in a list in the order you want them execute it steps = [step1, step2, step3] columns = ['other_' + str(i) for i in range(X.shape[1])] X_df = pd.DataFrame(X, columns=columns) # Initialize FeatureSelector() fs = FeatureSelector() # Apply feature selection methods in the order they appear in steps fs.fit(X_df, y.ravel(), steps) features = [ i for i in range(len(X_df.columns)) if X_df.columns[i] in fs.selected_features ] # Get selected features self.features = np.array(features) # logger.info('best score %s', str(best_score)) # logger.info('Number of variables %s', str(self.features.shape[0])) # logger.info('Finish the feature extraction ') if self.features.shape[0] > 48: pca = self.reduce_dim(cvs) else: pca = None # logger.info('Number of variables %s', str(self.features.shape[0])) # logger.info('Finish the feature extraction ') return features, pca