def run_rf(self, outputfile): print('Running model...') forest = RandomForestClassifier(train=self.x_train, test=self.x_test , prunning=6, balanced=None, n_estimators=50, scores=outputfile, Y_train=self.y_train) forest.preprocess_input_vectors() pred,predprob = forest.build_rf_model() f1,prec,rec,acc = forest.model_evaluation2(pred, predprob, self.y_test) # print ("f1_binary: %f" % f1) # print ("Accuracy: %f" % acc) # print ("Precision: %f" % prec) # print ("Recall: %f" % rec) PrecisionGraphics(outputfile, outputfile+"_precisionvsrecall.pdf") NDCGGraphics(outputfile, outputfile+"_NDCG.pdf") return f1,prec,rec,acc
def run_rf(self, prunning=5, balanced=None, imputer=False): scores = '_rf_scores.txt' if (imputer): imp = Imputer(missing_values=-1, strategy='median').fit(self.x_train) self.x_train = imp.transform(self.x_train) self.x_test = imp.transform(self.x_test) print('Gerando modelo...') forest = RandomForestClassifier(train=self.x_train, test=self.x_test, prunning=prunning, n_estimators=100, balanced=balanced, scores=str(prunning) + '_' + str(balanced) + '_' + str(imputer) + scores, Y_train=self.y_train) forest.preprocess_input_vectors() pred, predprob = forest.build_rf_model() # forest.model_evaluation(prediction, self.y_test) f1, prec, rec, acc = forest.model_evaluation2(pred, predprob, self.y_test) print("f1_binary: %f" % f1) print("Accuracy: %f" % acc) print("Precision: %f" % prec) print("Recall: %f" % rec) PrecisionGraphics( str(prunning) + '_' + str(balanced) + '_' + str(imputer) + scores, str(prunning) + '_' + str(balanced) + '_' + str(imputer) + "_precisionvsrecall.pdf") NDCGGraphics( str(prunning) + '_' + str(balanced) + '_' + str(imputer) + scores, str(prunning) + '_' + str(balanced) + '_' + str(imputer) + "_NDCG.pdf") ids = GetIds("files3_claro_mig_model") ids.gen_features( str(prunning) + '_' + str(balanced) + '_' + str(imputer) + "_ids.txt")
def run_rf(self, prunning=5, balanced='balanced', imputer=False): scores = '_rf_scores.txt' if (imputer): imp = Imputer(missing_values=-1, strategy='median').fit(self.x_train) self.x_train = imp.transform(self.x_train) self.x_test = imp.transform(self.x_test) print('Running model...') forest = RandomForestClassifier(train=self.x_train, test=self.x_test, prunning=prunning, n_estimators=100, balanced=balanced, scores=str(prunning) + '_' + str(balanced) + '_' + str(imputer) + scores, Y_train=self.y_train) index_1 = np.where(self.y_train == 1) data_y_balanced_1 = self.y_train[index_1] index_0 = np.where(self.y_train == 0) data_y_balanced_0 = self.y_train[ index_0[0]] # [0:30*len(data_y_balanced_1[:])]] data_y_balanced = np.array( np.hstack([data_y_balanced_0, data_y_balanced_1])) data_x_balanced_1 = self.x_train[index_1] data_x_balanced_0 = self.x_train[ index_0[0]] # [0:30*len(data_x_balanced_1[:])]] data_x_balanced = np.array( np.vstack([data_x_balanced_0, data_x_balanced_1])) del data_x_balanced_0 clf = MLPClassifier(solver='sgd', alpha=1e-5, hidden_layer_sizes=(50, 100, 50), max_iter=50, random_state=1, verbose=1) filename = 'clf_model_tT4.sav' try: clf = joblib.load(filename) except: clf.fit(np.nan_to_num(data_x_balanced), data_y_balanced) # save the model to disk joblib.dump(clf, filename) pred = clf.predict(np.nan_to_num(self.x_test)) print(classification_report(self.y_test[:len(pred)], pred)) #predprob = clf.predict_proba(np.nan_to_num(self.x_test))[:, 1] forest.preprocess_input_vectors() pred, predprob = forest.build_rf_model() # forest.model_evaluation(prediction, self.y_test) f1, prec, rec, acc = forest.model_evaluation2(pred, predprob, self.y_test) print("f1_binary: %f" % f1) print("Accuracy: %f" % acc) print("Precision: %f" % prec) print("Recall: %f" % rec) plt.hist(predprob) plt.title("Histogram") plt.xlabel("Value") plt.ylabel("Frequency") plt.show() PrecisionGraphics( str(prunning) + '_' + str(balanced) + '_' + str(imputer) + scores, str(prunning) + '_' + str(balanced) + '_' + str(imputer) + "_precisionvsrecall.pdf") NDCGGraphics( str(prunning) + '_' + str(balanced) + '_' + str(imputer) + scores, str(prunning) + '_' + str(balanced) + '_' + str(imputer) + "_NDCG.pdf") ids = GetIds("files3_claro_mig_model_2") ids.gen_features( str(prunning) + '_' + str(balanced) + '_' + str(imputer) + "_ids.txt")