def recursive_feature_elimination(config_learning, config_data, number_features): output = open(os.path.expanduser(config_data.get("Learner", "models")) + "/" + "feature_ranks.txt", "w") feature_names = FeatureExtractor.get_combinations_from_config_file_unsorted(config_data) x_train = read_features_file(config_learning.get('x_train'), '\t') y_train = read_reference_file(config_learning.get('y_train'), '\t') x_test = read_features_file(config_learning.get('x_test'), '\t') estimator, scorers = learn_model.set_learning_method(config_learning, x_train, y_train) scale = config_learning.get("scale", True) if scale: x_train, x_test = scale_datasets(x_train, x_test) rfe = RFE(estimator, number_features, step=1) rfe.fit(x_train, y_train) for i, name in enumerate(feature_names): output.write(name + "\t" + str(rfe.ranking_[i]) + "\n") print(name + "\t" + str(rfe.ranking_[i])) predictions = rfe.predict(x_test) output.close() return predictions
def load_get_coefficients(config_learning, config_data): output = open(os.path.expanduser(config_data.get("Learner", "models")) + "/" + "coefficients.txt", "w") feature_names = FeatureExtractor.get_features_from_config_file_unsorted(config_data) combination_methods = FeatureExtractor.get_combinations_from_config_file_unsorted(config_data) learning_config = config_learning.get("learning", None) method_name = learning_config.get("method", None) estimator = joblib.load(os.path.expanduser(config_data.get("Learner", "models")) + "/" + method_name + ".pkl") coefficients = estimator.coef_ feature_list = [] for i, feature_name in enumerate(feature_names): if combination_methods[i] == 'both': feature_list.append(feature_name) feature_list.append(feature_name) else: feature_list.append(feature_name) for i, name in enumerate(feature_list): output.write(name + "\t" + str(coefficients[0][i]) + "\n") output.close()
def recursive_feature_elimination_cv(config_learning, config_data): output = open(os.path.expanduser(config_data.get("Learner", "models")) + "/" + "feature_ranks.txt", "w") feature_names = FeatureExtractor.get_features_from_config_file_unsorted(config_data) combination_methods = FeatureExtractor.get_combinations_from_config_file_unsorted(config_data) x_train = read_features_file(config_learning.get('x_train'), '\t') y_train = read_reference_file(config_learning.get('y_train'), '\t') x_test = read_features_file(config_learning.get('x_test'), '\t') estimator, scorers = learn_model.set_learning_method(config_learning, x_train, y_train) scale = config_learning.get("scale", True) if scale: x_train, x_test = scale_datasets(x_train, x_test) rfecv = RFECV(estimator=estimator, step=1, cv=StratifiedKFold(y_train, 2), scoring='accuracy') rfecv.fit(x_train, y_train) feature_list = [] for i, feature_name in enumerate(feature_names): if combination_methods[i] == 'both': feature_list.append(feature_name) feature_list.append(feature_name) else: feature_list.append(feature_name) for i, name in enumerate(feature_list): output.write(name + "\t" + str(rfecv.ranking_[i]) + "\n") output.close() predictions = rfecv.predict(x_test) return predictions