def f1_average(y_true, y_pred): """ returns average of f1 score for both classes. """ f1_survived = f1_score(y_true, y_pred, pos_label=1, average="binary") f1_died = f1_score(y_true, y_pred, pos_label=0, average="binary") return np.mean([f1_survived, f1_died])
def test_auto_weight(): # Test class weights for imbalanced data from sklearn.linear_model import LogisticRegression # We take as dataset the two-dimensional projection of iris so # that it is not separable and remove half of predictors from # class 1. # We add one to the targets as a non-regression test: class_weight="balanced" # used to work only when the labels where a range [0..K). from sklearn.utils import compute_class_weight X, y = iris.data[:, :2], iris.target + 1 unbalanced = np.delete(np.arange(y.size), np.where(y > 2)[0][::2]) classes = np.unique(y[unbalanced]) class_weights = compute_class_weight('balanced', classes, y[unbalanced]) assert_true(np.argmax(class_weights) == 2) for clf in (svm.SVC(kernel='linear'), svm.LinearSVC(random_state=0), LogisticRegression()): # check that score is better when class='balanced' is set. y_pred = clf.fit(X[unbalanced], y[unbalanced]).predict(X) clf.set_params(class_weight='balanced') y_pred_balanced = clf.fit(X[unbalanced], y[unbalanced],).predict(X) assert_true(metrics.f1_score(y, y_pred, average='weighted') <= metrics.f1_score(y, y_pred_balanced, average='weighted'))
def f1(self, X, y): n_class = len(np.unique(y)) prediction = self.predict(X) if n_class > 2: return f1_score(y, prediction, average='weighted') else: return f1_score(y, prediction)
def evaluation(y_test=None, y_predict=None, n_classes=None): """ Input the predicted results, targets results and the number of class, return the confusion matrix, F1-score of each class, accuracy and macro F1-score. Parameters ---------- y_test : list The target results y_predict : list The predicted results n_classes : int The number of classes Examples -------- >>> c_mat, f1, acc, f1_macro = tl.utils.evaluation(y_test, y_predict, n_classes) """ c_mat = confusion_matrix(y_test, y_predict, labels=[x for x in range(n_classes)]) f1 = f1_score(y_test, y_predict, average=None, labels=[x for x in range(n_classes)]) f1_macro = f1_score(y_test, y_predict, average='macro') acc = accuracy_score(y_test, y_predict) tl.logging.info('confusion matrix: \n%s' % c_mat) tl.logging.info('f1-score : %s' % f1) tl.logging.info('f1-score(macro) : %f' % f1_macro) # same output with > f1_score(y_true, y_pred, average='macro') tl.logging.info('accuracy-score : %f' % acc) return c_mat, f1, acc, f1_macro
def compute_ref(true_tags, out_file, data_type='svm_light'): tag_map = {'OK': 1, 'BAD': 0, u'OK': 1, u'BAD': 0} predicted = [] if data_type == 'svm_light': tag_map_pred = {'+1': 1, '-1': 0} for line in open(out_file): label = line[line.find(':')+1:line.find(' ')] predicted.append(tag_map_pred[label]) elif data_type == 'crfpp' or data_type == 'crf_suite': for line in open(out_file): line = line.strip('\n') if line == '': continue tag = line.split('\t')[-1] if tag == 'OK' or tag == 'BAD': predicted.append(tag) predicted = [tag_map[t] for t in predicted] # if (type(true_tags[0]) is str or type(true_tags[0]) is unicode) and not true_tags[0].isdigit(): true_tags = [tag_map[t] for t in true_tags] # if type(predicted[0]) is str and not predicted[0].isdigit(): print(true_tags[:10]) print(predicted[:10]) print(f1_score(predicted, true_tags, average=None)) print(f1_score(predicted, true_tags, average='weighted', pos_label=None))
def get_f1_and_classification_report(embeddings_dict, classifier): xs, ys, y_pred = get_xs_ys_predictions(embeddings_dict, classifier) class_names = ['verbs', 'nouns', 'adjectives', 'closed class words'] report = classification_report(y_true=ys, y_pred=y_pred, target_names=class_names) micro_f1 = f1_score(y_true=ys, y_pred=y_pred, average='micro') macro_f1 = f1_score(y_true=ys, y_pred=y_pred, average='macro') return micro_f1, macro_f1, report
def evaluation(y_test=None, y_predict=None, n_classes=None): """ Input the predicted results, targets results and the number of class, return the confusion matrix, F1-score of each class, accuracy and macro F1-score. Parameters ---------- y_test : numpy.array or list target results y_predict : numpy.array or list predicted results n_classes : int number of classes Examples -------- >>> c_mat, f1, acc, f1_macro = evaluation(y_test, y_predict, n_classes) """ from sklearn.metrics import confusion_matrix, f1_score, accuracy_score c_mat = confusion_matrix(y_test, y_predict, labels = [x for x in range(n_classes)]) f1 = f1_score(y_test, y_predict, average = None, labels = [x for x in range(n_classes)]) f1_macro = f1_score(y_test, y_predict, average='macro') acc = accuracy_score(y_test, y_predict) print('confusion matrix: \n',c_mat) print('f1-score:',f1) print('f1-score(macro):',f1_macro) # same output with > f1_score(y_true, y_pred, average='macro') print('accuracy-score:', acc) return c_mat, f1, acc, f1_macro
def benchmark(clf, train_X, train_y, test_X, test_y, encoder): """ benchmark based on f1 score """ t0 = time() clf.fit(train_X, train_y) train_time = time() - t0 t0 = time() pred = clf.predict(test_X) test_time = time() - t0 score = metrics.f1_score(test_y, pred, average='micro') scores = metrics.f1_score(test_y, pred, average=None) counter = Counter(train_y) counter = [(k, v) for k, v in counter.iteritems()] counter.sort(key=lambda a: a[1], reverse=True) if len(counter) > 20: tops = [v[0] for v in counter[0:20]] else: tops = [v[0] for v in counter] labels = encoder.inverse_transform(tops) s = [scores[v] for v in tops] labeled_scores = zip(labels, s) return clf, score, labeled_scores, train_time, test_time
def on_epoch_end(self, epoch, logs={}): print logs corr=0 tot=0 preds = self.model.predict(self.dev_data, verbose=1) preds_text=[] for l in preds: preds_text.append(self.index2label[np.argmax(l)]) print "Micro f-score:", f1_score(self.dev_labels_text,preds_text,average=u"micro") print "Macro f-score:", f1_score(self.dev_labels_text,preds_text,average=u"macro") print "Macro recall:", recall_score(self.dev_labels_text,preds_text,average=u"macro") if self.best_mr < recall_score(self.dev_labels_text,preds_text,average=u"macro"): self.best_mr = recall_score(self.dev_labels_text,preds_text,average=u"macro") model.save_weights(self.model_name + '_full_' + str(epoch) + '_MR_' + str(self.best_mr) + '.hdf5') print 'Saved Weights!' print classification_report(self.dev_labels_text, preds_text) for i in xrange(len(self.dev_labels)): # next_index = sample(preds[i]) next_index = np.argmax(preds[i]) # print preds[i],next_index,index2label[next_index] l = self.index2label[next_index] # print "correct:", index2label[np.argmax(dev_labels[i])], "predicted:",l if self.index2label[np.argmax(self.dev_labels[i])]==l: corr+=1 tot+=1 print corr,"/",tot
def predict_evaluate_models(fn ,ax=None, sel=["Penalties_Conceeded","Tries_Scored"], goal="Referee", verbosity=0): class_weight = 'auto' X, y, names = data_prepare(fn, sel=sel, goal=goal, verbosity=verbosity-1) if verbosity > 2: y_shuffled = y.copy() np.random.shuffle(y_shuffled) print ("All zeros accuracy:",1.0-np.sum(y)/len(y)) print ("y_shuffled f1_csore:",metrics.f1_score(y, y_shuffled)) n_folds = 10 cv = cross_validation.StratifiedKFold(y, n_folds=n_folds) #cv = cross_validation.LeaveOneOut(n=len(y)) results = [] for sclf in ('svm','svmp','svmr','lgCV','gnb','rf','knc'): clf = get_clf(sclf,class_weight=class_weight) y_pred = cross_validation.cross_val_predict(clf, X, y, cv=cv) #print "pred:",y_pred res = [ metrics.accuracy_score(y, y_pred), metrics.precision_score(y, y_pred), metrics.recall_score(y, y_pred), metrics.f1_score(y, y_pred), ] if verbosity > 0: print (sclf,res) results.append( (sclf,res) ) return results
def compare_2_models(model1, model2, X, y, h): h = min(X.shape[0], h) hidden_layer = features[np.random.choice(X.shape[0], h, replace=False)] print('training 1st model') pr = cProfile.Profile() pr.enable() model1.fit(X, y, hidden_layer=hidden_layer) y1 = model1.predict(X) pr.disable() ps = pstats.Stats(pr).sort_stats('cumulative') ps.print_stats() print('training 2nd model') pr = cProfile.Profile() pr.enable() model2.fit(X, y, hidden_layer=hidden_layer) y2 = model2.predict(X) pr.disable() ps = pstats.Stats(pr).sort_stats('cumulative') ps.print_stats() print(f1_score(y, y2)) print(f1_score(y, y1)) return np.allclose(y1, y2)
def baseline_graph_experiment(model, data_fn, data_name, model_name): print "Running graph experiment (%s)..." % (data_name,) A, X, Y = data_fn() A = np.asarray(A) X = np.asarray(X) Y = np.asarray(Y) n_nodes = A.shape[0] indices = np.arange(n_nodes) np.random.shuffle(indices) train_indices = indices[: n_nodes // 3] valid_indices = indices[n_nodes // 3 : (2 * n_nodes) // 3] test_indices = indices[(2 * n_nodes) // 3 :] model.fit_with_validation(A, X, Y, train_indices, valid_indices) preds = model.predict(A, X, test_indices) actuals = Y[test_indices, :] accuracy = accuracy_score(actuals, preds) f1_micro = f1_score(actuals, preds, average="micro") f1_macro = f1_score(actuals, preds, average="macro") print "form: name,micro_f,macro_f,accuracy" print "###RESULTS###: %s,%s,%.8f,%.8f,%.8f" % (data_name, model_name, f1_micro, f1_macro, accuracy)
def getScores(y, yPredTrain, yTest, yPredTest): scores = dict() scores['f1Train'] = f1_score(y, yPredTrain) scores['f1Test'] = f1_score(yTest, yPredTest) scores['accTrain'] = accuracy_score(y, yPredTrain) scores['accTest'] = accuracy_score(yTest, yPredTest) scores['rocTrain'] = roc_auc_score(y, yPredTrain) scores['rocTest'] = roc_auc_score(yTest, yPredTest) scores['cMatrixTrain'] = confusion_matrix(y, yPredTrain) scores['cMatrixTest'] = confusion_matrix(yTest, yPredTest) proba = float(len(np.where(y==1)[0]))/len(y) if proba < 0.50: proba = 1 - proba scores['random'] = proba return scores
def main(): f = open("me.stdout", "r").read() print f (confusionMatrix, labels, ytrue, ypred, trueCount) = readConfusionMatrix.readText(f) for row in confusionMatrix: print row precisionMicro = np.float(metrics.precision_score(ytrue, ypred, average="micro")) recallMicro = np.float(metrics.recall_score(ytrue, ypred, average="micro")) f1Micro = np.float(metrics.f1_score(ytrue, ypred, average="micro")) f1Macro = np.float(metrics.f1_score(ytrue, ypred, pos_label=1, average="macro")) precisionMacro = np.float(metrics.precision_score(ytrue, ypred, average="macro")) recallMacro = np.float(metrics.recall_score(ytrue, ypred, average="macro")) mConf = metrics.confusion_matrix(ytrue, ypred) print mConf print labels print len(ytrue) print len(ypred) print trueCount print metrics.accuracy_score(ytrue, ypred) print precisionMicro print recallMicro print f1Micro print f1Macro print precisionMacro print recallMacro
def kernel_graph_experiment(model, data_fn, data_name, model_name): print "Running graph experiment (%s)..." % (data_name,) A, X, Y = data_fn() n_nodes = len(A) indices = np.arange(n_nodes) np.random.shuffle(indices) print indices train_indices = indices[: n_nodes // 3] valid_indices = indices[n_nodes // 3 : (2 * n_nodes) // 3] test_indices = indices[(2 * n_nodes) // 3 :] # train_indices = indices[:int(n_nodes*0.8)] # valid_indices = indices[int(n_nodes*0.8):int(n_nodes*0.9)] # test_indices = indices[int(n_nodes*0.9):] model.fit_with_validation(Y, train_indices, valid_indices, test_indices) preds = model.predict(Y, np.asarray([]), test_indices) actuals = Y[test_indices, :] accuracy = accuracy_score(actuals, preds) f1_micro = f1_score(actuals, preds, average="micro") f1_macro = f1_score(actuals, preds, average="macro") print "form: name,micro_f,macro_f,accuracy" print "###RESULTS###: %s,%s,%.8f,%.8f,%.8f" % (data_name, model_name, f1_micro, f1_macro, accuracy)
def single_test(feature, attribute): from sklearn.metrics import f1_score from sklearn.metrics import recall_score from sklearn.metrics import accuracy_score from data_generator import load_vector_from_text import random data=merge_different_vectors([feature],attribute) none_attribute_uids=load_vector_from_text('uids_none_attributes.vector',feature,'list') none_attribute_uids=filter(lambda x:x in data[0],none_attribute_uids) alpha=0.2*len(data[0])/len(none_attribute_uids) train_data=[[],[]] test_data=[[],[]] for index,uid in enumerate(data[0]): if uid in none_attribute_uids and random.random()<alpha: #if random.random()<0.2: test_data[0].append(data[1][index]) test_data[1].append(data[2][index]) else: train_data[0].append(data[1][index]) train_data[1].append(data[2][index]) print len(test_data[1]),sum(test_data[1]),len(train_data[1]),sum(train_data[1]) clf=LogisticRegression() clf.fit(train_data[0], train_data[1]) predicted_y=clf.predict(test_data[0]) test_accuracy=accuracy_score(test_data[1],predicted_y) test_recall=recall_score(test_data[1],predicted_y) test_f1=f1_score(test_data[1],predicted_y) print 'F1 of test data (%d %d): %0.2f'%(sum(test_data[1]),len(test_data[1])-sum(test_data[1]),test_f1) print 'Accuracy of test data (%d %d): %0.2f'%(sum(test_data[1]),len(test_data[1])-sum(test_data[1]),test_accuracy) predicted_y=clf.predict(train_data[0]) train_accuracy=accuracy_score(train_data[1],predicted_y) train_recall=recall_score(train_data[1],predicted_y) train_f1=f1_score(train_data[1],predicted_y) print 'F1 of train data (%d %d): %0.2f'%(sum(train_data[1]),len(train_data[1])-sum(train_data[1]),train_f1) return [test_accuracy,test_recall,test_f1,train_accuracy,train_recall,train_f1]
def benchmark(clf_current): print('_' * 80) print("Test performance for: ") clf_descr = str(clf_current).split('(')[0] print(clf_descr) t0 = time() classif = OneVsRestClassifier(clf_current) classif.fit(X_train, Y_train.toarray()) train_time = time() - t0 print("train time: %0.3fs" % train_time) t0 = time() if hasattr(clf_current,"decision_function"): dfmatrix = classif.decision_function(X_test) score = metrics.f1_score(Y_test.toarray(), df_to_preds(dfmatrix, k = 5)) else: probsmatrix = classif.predict_proba(X_test) score = metrics.f1_score(Y_test.toarray(), probs_to_preds(probsmatrix, k = 5)) test_time = time() - t0 print("f1-score: %0.7f" % score) print("test time: %0.3fs" % test_time) print('_' * 80) return clf_descr, score, train_time, test_time
def findBestDistance(self): print '*** start ****' d = 0.1 # y_true = [] # y_pred = [] result = {} for x in range(0,10): y_true = [] y_pred = [] for dataIndex in range(0, len(self.lstTest)): dataTest = self.lstTest[dataIndex] # y_true.append(dataTest[0]) y_true.append(self.scoreConvert(dataTest[0])) isFilter = self.computeWithNoCorpus(dataTest[1], self.lstTrain, d) y_pred.append(self.scoreConvert(isFilter)) print y_true print y_pred f1 = metrics.f1_score(y_true, y_pred) f1_mac = f1_score(y_true, y_pred, average='macro') print 'd : ',d,' f1 : ',f1,' f1 mac : ',f1_mac result[d] = f1 print classification_report(y_true, y_pred) # print 'result ', result d = d+0.1 print result print '*** end ******'
def ternary_metrics(polarities, lexicon, eval_words, tau_lexicon=None): if not tau_lexicon == None: kendall_words = list(set(eval_words).intersection(tau_lexicon)) y_prob, y_true = [], [] polarities = {word:polarities[word] for word in eval_words} for w in polarities: y_prob.append(polarities[w]) y_true.append(lexicon[w]) y_prob = np.array(y_prob) y_true = np.array(y_true) y_prob = 2*(y_prob - np.min(y_prob)) / (np.max(y_prob) - np.min(y_prob)) - 1 neg_prop = np.sum(np.array(lexicon.values()) == -1) / float(len(lexicon)) pos_prop = np.sum(np.array(lexicon.values()) == 1) / float(len(lexicon)) sorted_probs = sorted(y_prob) neg_thresh = sorted_probs[int(np.round(neg_prop*len(sorted_probs)))] pos_thresh = sorted_probs[-int(np.round(pos_prop*len(sorted_probs)))] cmn_labels = [1 if val >= pos_thresh else -1 if val <= neg_thresh else 0 for val in y_prob] if not tau_lexicon == None: tau = kendalltau(*zip(*[(polarities[word], tau_lexicon[word]) for word in kendall_words]))[0] else: tau = None maj_f1 = f1_score(y_true, np.repeat(sp.stats.mode(y_true)[0][0], len(y_true)), average="macro") cmn_f1 = f1_score(y_true, cmn_labels, average="macro") label_func = lambda entry : 1 if entry > pos_thresh else -1 if entry < neg_thresh else 0 conf_mat = confusion_matrix(y_true, [label_func(entry) for entry in y_prob]) return tau, cmn_f1, maj_f1, conf_mat
def compareModels(model): """ This evaluates the pre-trained model agaisnt metamind's API on sentences in `data/validation` Parameters ---------- model: test.MODEL Namedtuple containing model parameters (dictionary, tfidf learner and labels) """ set_api_key("MohJ53r6kUvoPjHS8tStX1vnfssvN5EDetVcp2uCNISwXus2BS") with open('data/validation', 'r') as fin: validations = fin.read() truth = [model.labels.label2class[i] for i in ['positive']*9 + ['negative']*8] scores_mm = [] scores_joe = [] for validation in validations.split('\n'): mmLabel = testMetaMind(validation)[0]['label'] scores_mm.append(model.labels.label2class[mmLabel]) joeLabel = testDeepModel(validation, model) scores_joe.append(model.labels.label2class[joeLabel]) print 'MetaMind F1 score is %s' % f1_score(truth, scores_mm) print 'My F1 score is %s' % f1_score(truth, scores_joe)
def test_standard_svm_blobs_2d_class_weight(): # no edges, reduce to crammer-singer svm X, Y = make_blobs(n_samples=210, centers=3, random_state=1, cluster_std=3, shuffle=False) X = np.hstack([X, np.ones((X.shape[0], 1))]) X, Y = X[:170], Y[:170] X_graphs = [(x[np.newaxis, :], np.empty((0, 2), dtype=np.int)) for x in X] pbl = GraphCRF(n_features=3, n_states=3, inference_method='unary') svm = OneSlackSSVM(pbl, check_constraints=False, C=1000) svm.fit(X_graphs, Y[:, np.newaxis]) weights = 1. / np.bincount(Y) weights *= len(weights) / np.sum(weights) pbl_class_weight = GraphCRF(n_features=3, n_states=3, class_weight=weights, inference_method='unary') svm_class_weight = OneSlackSSVM(pbl_class_weight, C=10, check_constraints=False, break_on_bad=False) svm_class_weight.fit(X_graphs, Y[:, np.newaxis]) assert_greater(f1_score(Y, np.hstack(svm_class_weight.predict(X_graphs))), f1_score(Y, np.hstack(svm.predict(X_graphs))))
def cutoff_f1(clf, X, y): y_pred = (clf.predict_proba(X)[:,1] > cutoff_value).astype(int) y_pred2 = clf.predict(X) s1 = f1_score(y, y_pred) s2 = f1_score(y, y_pred2) # print 'f1 = %.4f, %.4f' % (s1, s2) return s1
def on_epoch_end(self, batch, logs={}): # losses self.losses_train.append(self.model.evaluate(X_train, Y_train, batch_size=128,verbose =0)) self.losses_val.append(self.model.evaluate(X_val, Y_val, batch_size=128,verbose = 0)) # Roc train train_preds = self.model.predict_proba(X_train, verbose=0) train_preds = train_preds[:, 1] roc_train = metrics.roc_auc_score(y_train, train_preds) self.roc_train.append(roc_train) # Roc val val_preds = self.model.predict_proba(X_val, verbose=0) val_preds = val_preds[:, 1] roc_val = metrics.roc_auc_score(y_val, val_preds) self.roc_val.append(roc_val) # Metrics train y_preds = self.model.predict_classes(X_train,verbose = 0) self.f1_train.append(metrics.f1_score(y_train,y_preds)) self.recal_train.append(metrics.recall_score(y_train,y_preds)) self.preci_train.append(metrics.precision_score(y_train,y_preds)) # Metrics val y_preds = self.model.predict_classes(X_val,verbose =0) self.f1_val.append(metrics.f1_score(y_val,y_preds)) self.recal_val.append(metrics.recall_score(y_val,y_preds)) self.preci_val.append(metrics.precision_score(y_val,y_preds))
def cv_model(): DATA_FILE = './data/train-set-ru-b64-utf-8.txt' all_data = [] target = [] with open(DATA_FILE) as df: for i, line in enumerate(df): print i line = line.strip() parts = line.split() stats_collector = StatsCollector() #print parts[2] #print base64.b64decode(parts[3])#.decode('utf-8') #print parts[2].decode('utf-8'), parts[3].decode('utf-8'), "\n" stats_collector.collect(int(parts[1]), parts[3], parts[2]) # mark page url all_data.append(stats_collector.get_features()) target.append(stats_collector.get_target()) #print all_data[-1] data = np.asarray(all_data, dtype = np.float) target = np.asarray(target, dtype = np.float) clf = GradientBoostingClassifier(loss='deviance', learning_rate=0.05, n_estimators=400,\ min_samples_split=30, min_samples_leaf=15, max_depth=5) kf = KFold(data.shape[0], n_folds = 3, shuffle = True) for train_index, test_index in kf: X_train, X_test = data[train_index], data[test_index] y_train, y_test = target[train_index], target[test_index] clf.fit(X_train, y_train) y_pred = clf.predict(X_test) print f1_score(y_test, y_pred)
def cross_val(data_x, data_y, classifier, kFold, b_cost=1, h_cost=1, w=0.5): e_h, e_b = 0, 0 y_tests, pred_probas = [], [] for train_index, test_index in kFold: data_x_, data_y_ = np.array(data_x), np.array(data_y) X_train, X_test = list(data_x_[train_index]), list(data_x_[test_index]) y_train, y_test = list(data_y_[train_index]), list(data_y_[test_index]) classifier.fit(X_train, y_train) pred_proba = [r[0] for r in classifier.predict_proba(X_test)] y_tests += y_test pred_probas += pred_proba predictions = [0 if p*b_cost > (1-p)*h_cost else 1 for p in pred_probas] roc_auc = roc_auc_score(y_tests, pred_probas) total_acc = accuracy_score(y_tests, predictions) precision, recall, thresholds = precision_recall_curve(y_tests, pred_probas, pos_label=0) fpr, tpr, thresholds = roc_curve(y_tests, pred_probas, pos_label=0) precision_bots = precision_score(y_tests, predictions, pos_label = 0) precision_humans = precision_score(y_tests, predictions, pos_label = 1) recall_bots = recall_score(y_tests, predictions, pos_label = 0) recall_humans = recall_score(y_tests, predictions, pos_label = 1) f1_bots = f1_score(y_tests, predictions, pos_label = 0) f1_humans = f1_score(y_tests, predictions, pos_label = 1) conf_matrix = np.matrix(list(confusion_matrix(y_tests, predictions))) #plot_curve(fpr, tpr, 'ROC', w) #plot_curve(recall, precision, 'PR', w) return [total_acc, precision_bots, precision_humans, recall_bots, recall_humans, f1_bots, f1_humans, roc_auc, conf_matrix]
def logistic_regression_sklearn(self, features, labels): """Run a logistic regression, evaluate it, return the LR object """ print '\n**** Running logistic regression...' # Split into train / test segments features_train, features_test, target_train, target_test = cross_validation.train_test_split(features, labels, test_size=0.20, random_state=0) lr = LogisticRegression() lr.fit(features_train, target_train) # Evaluate the regression target_predicted = lr.predict(features_test) accuracy = accuracy_score(target_test, target_predicted) print 'Logistic regression accuracy score: {0:.0f}%'.format(100 * accuracy) # coefs = pd.DataFrame(zip(feature_cols, np.transpose(lr.coef_[0])), columns=['Feature', 'Coefficient']) print 'F1: ', print f1_score(target_test, target_predicted) # preds = lr.predict_proba(features_test)[:,1] # fpr, tpr, _ = roc_curve(target_test, preds) # print 'AOC: ', # print '{:.2f}'.format(auc(fpr,tpr)) return lr
def fit_model(): DATA_FILE = './data/train-set-ru-b64-utf-8.txt' stats_collector = StatsCollector() i=0 data = [] target = [] with open (DATA_FILE) as df: for i, line in enumerate(df): print i line = line.strip() parts = line.split() stats_collector = StatsCollector() stats_collector.collect(int(parts[1]), parts[3], parts[2]) data.append(stats_collector.get_features()) target.append(stats_collector.get_target()) #print len(data[-1]) data = np.asarray(data, dtype = np.float) target = np.asarray(target, dtype = np.float) print data.shape, target.shape df.close() clf = GradientBoostingClassifier(loss='deviance', learning_rate=0.07, n_estimators=300, min_samples_split=30,\ min_samples_leaf=15, max_depth=4) clf.fit(data, target) y_pred = clf.predict(data) print f1_score(target, y_pred) joblib.dump(clf, 'model/model.pkl')
def on_epoch_end(self, epoch, logs={}): print logs corr=0 tot=0 preds = self.model.predict(self.dev_data, verbose=1) preds_text=[] for l in preds: preds_text.append(self.index2label[np.argmax(l)]) print "Micro f-score:", f1_score(self.dev_labels_text,preds_text,average=u"micro") print "Macro f-score:", f1_score(self.dev_labels_text,preds_text,average=u"macro") print classification_report(self.dev_labels_text, preds_text) for i in xrange(len(self.dev_labels)): # next_index = sample(preds[i]) next_index = np.argmax(preds[i]) # print preds[i],next_index,index2label[next_index] l = self.index2label[next_index] # print "correct:", index2label[np.argmax(dev_labels[i])], "predicted:",l if self.index2label[np.argmax(self.dev_labels[i])]==l: corr+=1 tot+=1 print corr,"/",tot
def evaluate_fold(clf, X_train, y_train, X_test, y_test): """ This is the business section """ tmp = dict() tmp['X_train.shape'] = X_train.shape tmp['X_test.shape'] = X_test.shape try: pred_test = clf.predict_proba(X_test) pred_train = clf.predict_proba(X_train) tmp['roc'] = roc_info(y_test, pred_test[:,1]) tmp['roc_area'] = roc_auc_score(y_test, pred_test[:,1]) pred_test = clf.predict(X_test) pred_train = clf.predict(X_train) tmp['f1_test'] = f1_score(y_test, pred_test, pos_label=1) tmp['f1_train'] = f1_score(y_train, pred_train, pos_label=1) except (AttributeError, NotImplementedError): pred_test = clf.predict(X_test) pred_train = clf.predict(X_train) tmp['roc'] = roc_info(y_test, pred_test) tmp['roc_area'] = roc_auc_score(y_test, pred_test) tmp['f1_test'] = f1_score(y_test, pred_test, pos_label=1) tmp['f1_train'] = f1_score(y_train, pred_train, pos_label=1) return tmp
def scnn_proportion_experiment(data_fn, name, n_hops, prop_valid, prop_test, transform_fn=util.rw_laplacian, transform_name='rwl'): print 'Running node experiment (%s)...' % (name,) A, X, Y = data_fn() n_nodes = A.shape[0] indices = np.arange(n_nodes) valid_start = int(n_nodes * (1 - (prop_valid + prop_test))) test_start = int(n_nodes * (1 - prop_test)) valid_indices = indices[valid_start:test_start] test_indices = indices[test_start:] for train_prop in [x / 10.0 for x in range(1, 11)]: train_end = int(valid_start * train_prop) train_indices = indices[:train_end] scnn = SCNN(n_hops=n_hops, transform_fn=transform_fn) scnn.fit(A, X, Y, train_indices=train_indices, valid_indices=valid_indices) probs = scnn.predict_proba(X, test_indices) print probs preds = scnn.predict(X, test_indices) actuals = np.argmax(Y[test_indices,:], axis=1) f1_micro = f1_score(actuals, preds, average='micro') f1_macro = f1_score(actuals, preds, average='macro') accuracy = accuracy_score(actuals, preds) print 'form: name,n_hops,transform_name,micro_f,macro_f,accuracy' print '###RESULTS###: %s,%d,%.2f,%s,%.8f,%.8f,%.8f' % (name, n_hops, train_prop, transform_name, f1_micro, f1_macro, accuracy)
train, _ = ColumnInfoExtractor(n_files=num_files, n_rows=num_rows, train_size=1., n_jobs=n_cores, column_sample=True).transform( annotations_file=train_file_path, csv_folder=csv_folder_path) test, _ = ColumnInfoExtractor(n_files=num_files, n_rows=num_rows, train_size=1., n_jobs=n_cores, column_sample=True).transform( annotations_file=test_file_path, csv_folder=csv_folder_path) tqdm.write("Loading data done...") ablation_results = defaultdict(dict) for pp_name, pp in tqdm(all_pipelines.items()): tqdm.write(f"Fitting pipeline {pp_name}") pp.fit(train, train["y"]) y_test = test["y"] y_pred = pp.predict(test) f_score = f1_score(y_true=y_test, y_pred=y_pred, average='macro') ablation_results[pp_name]["f_score"] = f_score ablation_results[pp_name]["confusion_matrix"] = confusion_matrix( y_true=y_test, y_pred=y_pred).tolist() ablation_results["tags"] = list(np.unique(test["y"])) json.dump(ablation_results, open("./data/ablation_results.json", "w"))
from sklearn.metrics import f1_score y_true = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] y_pred = [0, 1, 1, 1, 1, 0, 0, 0, 1, 1] print(f1_score(y_true, y_pred)) # 0.3636363636363636
def train1(): with open(opt.pickle_train_path, 'rb') as inp: word2id = pickle.load(inp) id2word = pickle.load(inp) tag2id = pickle.load(inp) id2tag = pickle.load(inp) x_train = pickle.load(inp) y_train = pickle.load(inp) x_valid = pickle.load(inp) y_valid = pickle.load(inp) print("train len:", len(x_train)) print("valid len", len(x_valid)) # print("test len", len(x_test)) train_dataset = NERDataset(x_train,y_train) valid_dataset = NERDataset(x_valid, y_valid) # valid_dataset = NERDataset(x_valid, y_valid) # test_dataset = NERDataset(x_test, y_test) train_dataloader = DataLoader(train_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) valid_dataloader = DataLoader(valid_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) # test_dataloader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=True, num_workers=opt.num_workers) # x = train_dataset[0] # print(x) # for index, batch in enumerate(train_dataloader): # print(index) # print(batch) models = {'NERLSTM': NERLSTM, 'NERLSTM_CRF': NERLSTM_CRF} all_vec = load_vec(opt.load_vec_path) # device = torch.device('cuda') # model = models[opt.model](opt.embedding_dim, opt.hidden_dim, opt.dropout, word2id, tag2id).cuda() model = models[opt.model](opt.word_dim, opt.embedding_dim, opt.hidden_dim, opt.filter_size, opt.cnn_out_dim, opt.dropout, word2id, tag2id, all_vec).cuda() criterion = nn.CrossEntropyLoss(ignore_index=0) optimizer = optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) if opt.model == 'NERLSTM': for epoch in range(opt.max_epoch): model.train() for index, batch in enumerate(train_dataloader): optimizer.zero_grad() X = batch['x'].cuda() y = batch['y'].cuda() y = y.view(-1, 1) y = y.squeeze(-1) pred = model(X) pred = pred.view(-1, pred.size(-1)) loss = criterion(pred, y) loss.backward() optimizer.step() if index % 200 == 0: print('epoch:%04d,------------loss:%f' % (epoch, loss.item())) aver_loss = 0 preds, labels = [], [] for index, batch in enumerate(valid_dataloader): model.eval() val_x, val_y = batch['x'].cuda(), batch['y'].cuda() predict = model(val_x) predict = torch.argmax(predict, dim=-1) if index % 500 == 0: print([id2word[i.item()] for i in val_x[0].cpu() if i.item() > 0]) length = [id2tag[i.item()] for i in val_y[0].cpu() if i.item() > 0] print(length) print([id2tag[i.item()] for i in predict[0][:len(length)].cpu() if i.item() > 0]) # 统计非0的,也就是真实标签的长度 leng = [] for i in val_y.cpu(): tmp = [] for j in i: if j.item() > 0: tmp.append(j.item()) leng.append(tmp) # 提取真实长度的预测标签 for index, i in enumerate(predict.tolist()): preds.extend(i[:len(leng[index])]) # 提取真实长度的真实标签 for index, i in enumerate(val_y.tolist()): labels.extend(i[:len(leng[index])]) precision = precision_score(labels, preds, average='macro') recall = recall_score(labels, preds, average='macro') f1 = f1_score(labels, preds, average='macro') report = classification_report(labels, preds) print(report) elif opt.model == 'NERLSTM_CRF': best_score = 0.0 for epoch in range(opt.max_epoch): model.train() for index, batch in enumerate(train_dataloader): optimizer.zero_grad() X = batch['x'].cuda() y = batch['y'].cuda() # CRF loss = model.log_likelihood(X, y) loss.backward() # CRF torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=10) optimizer.step() if index % 200 == 0: print('best_score:%f' % (best_score)) print('epoch:%02d,idnex%4d------------loss:%f' % (epoch, index, loss.item())) aver_loss = 0 preds, labels = [], [] for index, batch in enumerate(valid_dataloader): model.eval() val_x, val_y = batch['x'].cuda(), batch['y'].cuda() predict = model(val_x) # CRF loss = model.log_likelihood(val_x, val_y) aver_loss += loss.item() # 统计非0的,也就是真实标签的长度 leng = [] for i in val_y.cpu(): tmp = [] for j in i: if j.item() > 0: tmp.append(j.item()) leng.append(tmp) for index, i in enumerate(predict): preds += i[:len(leng[index])] for index, i in enumerate(val_y.tolist()): labels += i[:len(leng[index])] aver_loss /= (len(valid_dataloader) * 64) precision = precision_score(labels, preds, average='macro') recall = recall_score(labels, preds, average='macro') f1 = f1_score(labels, preds, average='macro') # report = classification_report(labels, preds) # print(report) print('p', precision) print('r', recall) print('f1', f1) if f1 > best_score: best_score = f1 path_name = './model/model' + str(epoch) + '----' + str(f1) + '.pkl' torch.save(model, path_name) print('model has been saved')
def test1(model_path,output_file,output_file1): def list2tags(l_list): r = [] for l in l_list: r.append(id2tag[l]) return r with open(opt.pickle_train_path, 'rb') as inp: word2id = pickle.load(inp) id2word = pickle.load(inp) tag2id = pickle.load(inp) id2tag = pickle.load(inp) x_train = pickle.load(inp) y_train = pickle.load(inp) x_test = pickle.load(inp) y_test = pickle.load(inp) print("valid len", len(x_test)) test_dataset = NERDataset(x_test, y_test) test_dataloader = DataLoader(test_dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.num_workers) model = torch.load(model_path) model.eval() aver_loss = 0 preds, labels = [], [] for index, batch in enumerate(test_dataloader): model.eval() val_x, val_y = batch['x'].cuda(), batch['y'].cuda() predict = model(val_x) # CRF loss = model.log_likelihood(val_x, val_y) aver_loss += loss.item() # 统计非0的,也就是真实标签的长度 leng = [] for i in val_y.cpu(): tmp = [] for j in i: if j.item() > 0: tmp.append(j.item()) leng.append(tmp) for index, i in enumerate(predict): preds += i[:len(leng[index])] for index, i in enumerate(val_y.tolist()): labels += i[:len(leng[index])] print('prediction\n' + str(len(list2tags(preds))) + '\n', list2tags(preds)) print('labels\n'+ str(len(list2tags(labels))) + '\n', list2tags(labels)) aver_loss /= (len(test_dataloader) * 64) # precision = precision_score(labels, preds, average='macro') # recall = recall_score(labels, preds, average='macro') # f1 = f1_score(labels, preds, average='macro') precision = precision_score(labels, preds, average='macro') recall = recall_score(labels, preds, average='macro') f1 = f1_score(labels, preds, average='macro') # report = classification_report(labels, preds) # print(report) print('p',precision) print('r',recall) print('f1',f1) p, r, f = get_f1score(list2tags(preds), list2tags(labels)) print('p', p) print('r', r) print('f1', f)
skf = StratifiedKFold(n_splits=5) confusion_matrices = [] accuracies = [] precisions = [] recalls = [] f1s = [] for train, test in skf.split(X, Y): xtrain, xtest = X[train], X[test] ytrain, ytest = Y[train], Y[test] clf.fit(xtrain, ytrain) ypredict = clf.predict(xtest) confusion_matrices.append(confusion_matrix(ytest, ypredict)) accuracies.append(accuracy_score(ytest, ypredict)) precisions.append(precision_score(ytest, ypredict)) recalls.append(recall_score(ytest, ypredict)) f1s.append(f1_score(ytest, ypredict)) print '5-fold cross-validation' print 'sum of confusion matrices' print sum(confusion_matrices) print 'average accuracy' print np.mean(accuracies) print 'average precision' print np.mean(precisions) print 'average recall' print np.mean(recalls) print 'average f1' print np.mean(f1s) # train a classifier on the full training set final_classifier = naive_bayes.BernoulliNB()
def get_metrics(path): precision = [] recall = [] fscore = [] roc_auc = [] results = [] valid_dataset = 67 test_scores = np.load(path) threshold = 0 for i in range(67): values = np.load("data/A1X_" + str(i + 1) + ".npy") labels = np.load("data/A1Y_" + str(i + 1) + ".npy") test_portion = 0.2 test_n = int(len(labels) * test_portion) train_values, test_values = values[:-test_n], values[-test_n:] train_labels, test_labels = labels[:-test_n], labels[-test_n:] test_score = test_scores[i] threshold = np.sum(test_score) / test_score.shape[0] test_correct = np.zeros(len(test_score)) for j in range(len(test_labels) - window_size + 1): for k in range(window_size): if (test_labels[j + k] == 1): test_correct[j] = 1 break # This is used for "threshold" detector (Detector 1 in report) predictions = (test_score < threshold).astype(np.int32) # Use the anomalous_num if taking advanced detection method (Detector 2 in report) #anomalous_num = np.where(test_correct==1)[0].shape[0] #predictions = np.zeros_like(test_correct) #predictions_idx = heapq.nsmallest(anomalous_num, range(len(test_score)), test_score.take) #predictions[predictions_idx] = 1 precision.append( precision_score(test_correct, predictions, average="binary")) recall.append(recall_score(test_correct, predictions, average="binary")) fscore.append(f1_score(test_correct, predictions, average="binary")) if (np.sum(test_correct == 1) == 0 or np.sum(test_correct == 1) == test_correct.shape[0]): roc_auc.append(0) valid_dataset -= 1 else: roc_auc.append(roc_auc_score(test_correct, test_score)) # This part is for augmenting the dataset with infrequent normal sampels """ train_score = np.load("scores_on_trained_S.npy") train_correct = np.zeros(len(train_score[i])) for j in range(len(train_labels)-window_size+1): for k in range(window_size): if (train_labels[j+k] == 1): train_correct[j] = 1 break a_n = np.where(train_correct==1)[0].shape[0] pred = np.zeros_like(train_correct) pred_idx = heapq.nsmallest(a_n, range(len(train_score[i])), train_score[i].take) pred[pred_idx] = 1 augment_data(train_correct, train_values[:-119], pred, 50, "aug_data/A1X_"+str(i+1)+".npy", "aug_data/A1Y_"+str(i+1)+".npy") """ precision = np.array(precision) recall = np.array(recall) fscore = np.array(fscore) roc_auc = np.array(roc_auc) #np.save("precision.npy", precision) #np.save("recall.npy", recall) #np.save("fscore.npy", fscore) #np.save("roc_auc.npy", roc_auc) print("Precision:", float(np.sum(precision)) / valid_dataset) print("Recall:", float(np.sum(recall)) / valid_dataset) print("Fscore:", float(np.sum(fscore)) / valid_dataset) print("AUC_Score:", float(np.sum(roc_auc)) / valid_dataset)
models_report = pd.DataFrame(columns=[ 'Model', 'Precision_score', 'Recall_score', 'F1_score', 'Accuracy' ]) for clf, clf_name in zip(clfs.values(), clfs.keys()): clf.fit(X_train_up, y_train_up) y_pred = clf.predict(X_test_up) y_score = clf.score(X_test_up, y_test_up) #print('Calculating {}'.format(clf_name)) t = pd.Series({ 'Model': clf_name, 'Precision_score': metrics.precision_score(y_test_up, y_pred), 'Recall_score': metrics.recall_score(y_test_up, y_pred), 'F1_score': metrics.f1_score(y_test_up, y_pred), 'Accuracy': metrics.accuracy_score(y_test_up, y_pred) }) models_report = models_report.append(t, ignore_index=True) models_report from sklearn.ensemble import RandomForestClassifier classifier = RandomForestClassifier(n_estimators=300, n_jobs=1, random_state=0, bootstrap=False) classifier.fit(X_train_up, y_train_up) y_pred = classifier.predict(X_test_up)
auc_scores = [] for train_index, test_index in kf.split(idx_train): X_train_fold, X_test_fold = X_train[train_index], X_train[test_index] y_train_fold, y_test_fold = y_train[train_index], y_train[test_index] elm.fit(X_train_fold,y_train_fold) y_pred = elm.predict_proba(X_test_fold)[:, 1] yhat_classes = y_pred.copy() yhat_classes[yhat_classes>=threshold] = np.float64(1) yhat_classes[yhat_classes<threshold] = np.float64(0) accuracy = accuracy_score(y_test_fold, yhat_classes) loss = log_loss(y_test_fold, yhat_classes) f1 = f1_score(y_test_fold, yhat_classes) precision = precision_score(y_test_fold, yhat_classes) recall = recall_score(y_test_fold, yhat_classes) auc_score = roc_auc_score(y_test_fold, y_pred) accuracies.append(accuracy) losses.append(loss) f1s.append(f1) precisions.append(precision) recalls.append(recall) auc_scores.append(auc_score) end = time.time() print('Accuracy: %f' % np.array(accuracies).mean()) print('Precision: %f' % np.array(precisions).mean()) print('Recall: %f' % np.array(recalls).mean())
def CNN_model(X_training, X_test, y_training, y_test, n_epochs=100, batch_size=256, model_name='model', history_file='model_accuracies.csv', conf_matrix=False, accuracy_report=False): while os.path.isfile(model_name + ".h5"): model_name = model_name + str(1) csv_logger = CSVLogger('model_training.log') plot_losses = my_callbacks.PlotLosses() metrics = my_callbacks.Metrics() f1_accuracy = my_callbacks.F1Metric() earlystop = EarlyStopping(monitor='val_acc', patience=10, mode='auto') adam = Adam(lr=0.00001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False) model = Sequential() model.add( Conv1D(32, 9, input_shape=(X_training.shape[1], 1), kernel_initializer=he_normal(seed=12), activation='relu', W_regularizer=l1_l2(0.01))) model.add(BatchNormalization()) model.add(MaxPooling1D(1)) model.add( Conv1D(32, 3, activation='relu', W_regularizer=l1_l2(0.01), padding='same')) model.add(MaxPooling1D(3, padding='same')) model.add(BatchNormalization()) model.add( Conv1D(9, 3, activation='relu', W_regularizer=l1_l2(0.01), padding='same')) model.add(MaxPooling1D(3, padding='same')) model.add(BatchNormalization()) model.add( Conv1D(9, 3, activation='relu', W_regularizer=l1_l2(0.01), padding='same')) model.add(MaxPooling1D(3, padding='same')) model.add(BatchNormalization()) model.add(Flatten()) model.add(Dense(512, activation='relu')) model.add(Dropout(0.2)) model.add(Dense(256, activation='relu')) model.add(BatchNormalization()) model.add(Dropout(0.2)) model.add(Dense(17, activation='softmax', input_shape=(1, ))) model.compile(optimizer=adam, loss='sparse_categorical_crossentropy', metrics=['accuracy']) print('starts fitting model ...') start = time.time() model.fit(X_training, y_training, batch_size=batch_size, epochs=n_epochs, validation_data=(X_test, y_test), callbacks=[metrics, csv_logger]) end = time.time() delta = end - start print('fitting time: ', delta) print('starts predicting model ...') start_prediction = time.time() model.predict(X_test) end_prediction = time.time() delta_prediction = end_prediction - start_prediction print('prediction time: ', delta_prediction) y_pred = model.predict_classes(X_test) model.save_weights(model_name + ".h5") print('weights saved to disk') model_json = model.to_json() with open(model_name + '.json', 'w') as json_file: json_file.write(model_json) print('model saved to disk') with open(history_file, 'a', newline='') as history: writer = csv.writer(history, delimiter=';') writer.writerow([ model_name, accuracy_score(y_test, y_pred), cohen_kappa_score(y_test, y_pred), f1_score(y_test, y_pred, average='weighted'), delta, delta_prediction ]) if conf_matrix: cm_filename = model_name + '_cm.csv' cm = pd.DataFrame(confusion_matrix(y_test, y_pred)) cm.to_csv(cm_filename) if accuracy_report: raport_filename = model_name + '_report.csv' report = classification_report(y_test, y_pred) with open(raport_filename, 'w') as acc_report: acc_report.write(report) return y_pred
'KNeighborsClassifier' ], [pe_v, nb_v, dt_v, rf_v, lr_v, kn_v]): print(x) error = (Y_test != y).sum() p[0].append(error) print("Errors : %d" % error) acc = accuracy_score(y, Y_test) * 100 p[1].append(acc) print("Accuracy : %.2f%%" % acc) ps = precision_score(y, Y_test) * 100 p[2].append(ps) print("Precision : %.2f%%" % ps) rs = recall_score(y, Y_test) * 100 p[3].append(rs) print("Recall : %.2f%%" % rs) f1 = f1_score(y, Y_test) * 100 p[4].append(f1) print("F1 Score : %.2f%% \n" % f1) print("\n") print("WITHOUT PCA") # Perceptron Model pe = Perceptron(n_iter=10, eta0=10, n_jobs=-1) pe.fit(X_train_sd, Y_train) # Naive Bayes Classification nb = GaussianNB() nb.fit(X_train, Y_train) # Decision Tree Classifier
def main(): # Could/should refactor this whole thing args = parseCmdLine() myModule = sklearnHelperLib.importPyFile(args.pipelineDefs) pipelines = myModule.pipelines if type(pipelines) != type([]): pipelines = [ pipelines ] if args.vote: nPipelinesAndVotes = len(pipelines)+1 # include votes else: nPipelinesAndVotes = len(pipelines) # totals across all the split tries for each pipeline + voted predictions # for computing averages pipelineTotals = [ {'fscores':0, 'precisions': 0, 'f1': 0, 'recalls': 0, } for i in range(nPipelinesAndVotes) ] # formats for output lines, Pipeline line, votes line, avg line pf="Pipeline %d: F1: %5.3f F%d: %5.3f Precision: %4.2f Recall: %4.2f" vf="Votes... %d: F1: %5.3f F%d: %5.3f Precision: %4.2f Recall: %4.2f" af="Average. %d: F1: %5.3f F%d: %5.3f Precision: %4.2f Recall: %4.2f" dataSet = load_files( args.trainingData ) labelIndex = dataSet['target_names'].index(args.label) for sp in range(args.numSplits): docs_train, docs_test, y_train, y_test = \ train_test_split( dataSet.data, dataSet.target, test_size=args.testSize, random_state=None) predictions = [] # predictions[i]= predictions for ith Pipeline # on this split (for voting) print "Sample Split %d" % sp for i, pl in enumerate(pipelines): # for each Pipeline pl.fit(docs_train, y_train) y_pred = pl.predict(docs_test) predictions.append(y_pred) precision, recall, fscore, support = \ precision_recall_fscore_support( \ y_test, y_pred, args.beta, pos_label=labelIndex, average='binary') f1 = f1_score(y_test, y_pred, pos_label=labelIndex, average='binary') pipelineTotals[i]['fscores'] += fscore pipelineTotals[i]['f1'] += f1 pipelineTotals[i]['precisions'] += precision pipelineTotals[i]['recalls'] += recall l = pf % (i, f1, args.beta, fscore, precision, recall) print l if args.vote: vote_pred = y_vote( predictions ) precision, recall, fscore, support = \ precision_recall_fscore_support( \ y_test, vote_pred, args.beta, pos_label=labelIndex, average='binary') f1 = f1_score(y_test, vote_pred, pos_label=labelIndex, average='binary') i = len(pipelines) pipelineTotals[i]['fscores'] += fscore pipelineTotals[i]['f1'] += f1 pipelineTotals[i]['precisions'] += precision pipelineTotals[i]['recalls'] += recall l = vf % (i , f1, args.beta, fscore, precision, recall) print l # averages across all the Splits print for i in range(nPipelinesAndVotes): avgFscore = pipelineTotals[i]['fscores'] / args.numSplits avgF1 = pipelineTotals[i]['f1'] / args.numSplits avgPrecision = pipelineTotals[i]['precisions'] / args.numSplits avgRecall = pipelineTotals[i]['recalls'] / args.numSplits l = af % (i, avgF1, args.beta, avgFscore, avgPrecision, avgRecall) print l # pipeline info print "\nTraining data: %s" % args.trainingData print time.strftime("%Y/%m/%d-%H-%M-%S") for i,p in enumerate(pipelines): print "\nPipeline %d -------------" % i for s in p.steps: print s
model_xgb = XGBClassifier(scale_pos_weight=3, learning_rate=0.2, n_estimators=200, min_child_weight=20, max_depth=3, base_score=0.5, gamma=0, n_jobs=4) # eval = [(X_test, y_test)] # model_xgb.fit(X_train, y_train, eval_set=eval, eval_metric='auc', early_stopping_rounds=20, verbose=True) model_xgb.fit(X, Y) y_pred = model_xgb.predict(X_test) p = precision_score(y_test, y_pred, average='binary') r = recall_score(y_test, y_pred, average='binary') f1score = f1_score(y_test, y_pred, average='binary') print(p) print(r) print(f1score) plot_importance(model_xgb, importance_type='gain') pyplot.rcParams["font.sans-serif"] = ["Microsoft YaHei"] pyplot.rcParams['axes.unicode_minus'] = False pyplot.show() # y_real_pred = model_xgb.predict(X_real_test) # y_real_pred = grid_search.predict(X_real_test) # y_real_id = pd.read_csv('data\df_id_test.csv', index_col=0, names=["个人编码", "result"]) # temp = pd.DataFrame({"result": y_real_pred}, index=X_real_test.index) # print(temp["result"].sum()) # y_real_id["result"] = temp["result"]
for i in range(0, np.shape(features)[0], predict_batch): predictions[i:i+predict_batch] = clf.predict(features[ i:i+predict_batch]) predictions_prob = np.zeros((np.shape(features)[0],len(np.unique(label)))) for i in range(0, np.shape(features)[0], predict_batch): predictions_prob[i:i+predict_batch] = clf.predict_proba(features[ i:i+predict_batch]) np.save('predictions.npy',predictions) np.save('predictions_prob.npy',predictions_prob) predictions=predictions.astype(np.uint8) print("predictions",predictions.shape,np.unique(predictions),predictions.dtype) print("label_test",label.shape,np.unique(label),label.dtype) predictions=predictions.astype(np.uint8) metrics={} metrics['f1_score']=f1_score(label,predictions,average=None) metrics['f1_score_weighted']=f1_score(label,predictions,average='weighted') metrics['overall_acc']=accuracy_score(label,predictions) confusion_matrix_=confusion_matrix(label,predictions) metrics['per_class_acc']=(confusion_matrix_.astype('float') / confusion_matrix_.sum(axis=1)[:, np.newaxis]).diagonal() metrics['average_acc']=np.average(metrics['per_class_acc'][~np.isnan(metrics['per_class_acc'])]) print(metrics) print(confusion_matrix_)
# A prediction is made on the test dataset based on the model fitted on train set y_pred = clf.predict(x_test) # Various performance metrics were found and reported for each k print "k =", k """ Report all performance metrics and append values to their respective arrays """ accuracies.append(accuracy_score(y_test, y_pred)) print 'Accuracy:', accuracy_score(y_test, y_pred) precisions.append(precision_score(y_test, y_pred)) print 'Precision:', precision_score(y_test, y_pred) recalls.append(recall_score(y_test, y_pred)) print 'Recall:', recall_score(y_test, y_pred) fscores.append(f1_score(y_test, y_pred)) print 'F1-Score:', f1_score(y_test, y_pred) # Find best fit k based on their accuracies best_k_index = np.argmax(accuracies) """ Display performance metrics for best fit k value """ print "Best fit k =", best_k_index + 3 print 'Best fit Accuracy:', accuracies[best_k_index] print 'Best fit Precision:', precisions[best_k_index] print 'Best fit Recall:', recalls[best_k_index] print 'Best fit F1-Score:', fscores[best_k_index]
def gini_samples_f1(truth, predictions): return f1_score(truth, predictions.argmax(axis=1), average='samples')
def train(self, data_dir): ''' Trains a single layer model on the data contained in the specified directory. Labels found in the directory are augmented with an unknown label. Args: data_dir: Directory containing the training data ''' print("Reading data") # First read the data directory for the features and labels X_all, y_all, new_labels = read_data( data_dir, duration=self.duration, labels=self.labels ) self.labels = new_labels print("Making data splits") # Split the data into training, validation, and testing sets X_train, X_test, y_train, y_test = train_test_split( X_all, y_all, test_size=0.2, random_state=0 ) print("Normalizing features") # Mean normalize the features, saving the means and variances self.means = X_train.mean(axis=0) self.stds = X_train.std(axis=0) # Set the zero standard deviations to 1 zero_stds = self.stds <= 1 self.stds[zero_stds] = 1 # Apply the mean normalization transformation to the training dataj X_normed = X_train - np.expand_dims(self.means, 0) X_normed /= np.expand_dims(self.stds, 0) print("Doing feature selection") # Select the relevant features from the training set self.feature_list = select_features(X_normed, y_train) print(self.feature_list) # If hidden size wasn't specified, default to the mean of the number # of features and the size of the label space if self.hidden_size is None: self.hidden_size = int(1/2*( len(self.labels) + \ len(self.feature_list) ) ) # Augment the data with randomly permuted samples X_aug, y_aug = self._augment_data(X_normed, y_train) # Fit the one layer model to the augmented training data X_input = X_aug[:, self.feature_list] self.model = MLPClassifier( (self.hidden_size), alpha=0.1, activation='relu', max_iter=1000 ) self.model.fit(X_input, y_aug) # Evaulate the model on the augmented test data X_test_input = X_test - np.expand_dims(self.means, 0) X_test_input /= np.expand_dims(self.stds, 0) X_test_aug, y_test_aug = self._augment_data(X_test_input, y_test) predictions = self.model.predict(X_test_aug[:, self.feature_list]) print("F1 score:", f1_score(y_test_aug, predictions, average='weighted'))
def gini_weighted_f1(truth, predictions): return f1_score(truth, predictions.argmax(axis=1), average='weighted')
def score(self, X, y, print_report=False): predictions = self.predict(X) if print_report: print(confusion_matrix(y, predictions)) print(classification_report(y, predictions, digits=3)) return f1_score(y, predictions, average="macro")
string_files = string_files[int(len(string_files) * args['split']):] struct_files = struct_files[int(len(struct_files) * args['split']):] dynamc_files = dynamc_files[int(len(dynamc_files) * args['split']):] print('---------- ENSEMBLE MODEL ----------') res = ensemble( string_files, struct_files, dynamc_files, string_model.predict(map(itemgetter(1), string_files)), structure_model.predict(map(itemgetter(1), struct_files)), dynamic_model.predict(map(itemgetter(1), dynamc_files))) # Write the sklearn step here please on x=res[1] and y=res[2] print("accuracy:\t\t\t", metrics.accuracy_score(res[1], res[2])) print("f1 score (micro):\t\t", metrics.f1_score(res[1], res[2], average='micro')) print("precision score (micro):\t", metrics.precision_score(res[1], res[2], average='micro')) print("recall score (micro):\t\t", metrics.recall_score(res[1], res[2], average='micro')) print("f1 score (macro):\t\t", metrics.f1_score(res[1], res[2], average='macro')) print("precision score (macro):\t", metrics.precision_score(res[1], res[2], average='macro')) print("recall score (macro):\t\t", metrics.recall_score(res[1], res[2], average='macro')) elif mode == 'validate': if args['choose']: print( 'The parameter "choose" is unavailable for this operation.'
def gini_micro_f1(truth, predictions): return f1_score(truth, predictions.argmax(axis=1), average='micro')
params, n_jobs=1, cv=5, return_train_score=True, scoring={'f1_score': make_scorer(f1_score, average='macro'), 'accuracy': 'accuracy'}, refit='f1_score', verbose=10, error_score='raise') best = search.fit(X[:, :, :], Y[:]) print(best.__dict__) print("BEST PARAMS: ", best.best_params_) model_dir = 'models' if not os.path.exists(model_dir): os.makedirs(model_dir) best_estimator = best.best_estimator_ best_estimator.fit(X, Y) predicted_y = best_estimator.predict(X_test) print("TEST EVALUATION") print("F1-SCORE: ", f1_score(Y_test, predicted_y, average='macro')) print("ACCURACY: ", accuracy_score(Y_test, predicted_y)) auc_roc = roc_auc_score(Y_test, predicted_y) print("AUROC score : %s "% acc) precision, recall, _ = precision_recall_curve(Y_test, predicted_y) auc_prc = auc(recall, precision) print("AUPRC score : %s "% auc_prc) print(confusion_matrix(Y_test, predicted_y)) best_estimator.model.save(os.path.join(model_dir, str(datetime.now().strftime("%Y%m%d-%H%M%S"))))
def gini_f1(truth, predictions, pos_label=1): return f1_score(truth, predictions, average=None)[pos_label]
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, label, test_size=.3, random_state=1984) print("len(X) = %d" % (len(X))) print("len(Xtrain) = %d" % (len(Xtrain))) print("len(Xtest) = %d" % (len(Xtest))) model = BernoulliNB() model.fit(Xtrain, Ytrain) prediction = model.predict(Xtest) accuracy = (Ytest == prediction).mean() precision_score = precision_score(Ytest, prediction, average="macro") recall_score = recall_score(Ytest, prediction, average="macro") f1_score = f1_score(Ytest, prediction, average="macro") print("accuracy %.3f" % accuracy) print("precision_score %.3f" % precision_score) print("recall_score %.3f" % recall_score) print("f1_score %.3f" % f1_score) confusion_matrix = confusion_matrix(Ytest, prediction, labels=[0, 1, 2, 3]) print(confusion_matrix) print("VOIR LE RESULTAT DANS FICHIER NBLibrary.xlsx") ############ Ecrire resultat sur un fichier Excel ############### export("NBLibrary.xlsx", confusion_matrix, len(X), len(Xtrain), len(Xtest), accuracy, precision_score, recall_score, f1_score) ''' import os
#print(classifier_Y.shape[0]) k_means_classifier = KNeighborsClassifier(n_neighbors=10) k_means_classifier.fit(c_x_train, c_y_train) svm_classifier = SVC(kernel='linear') svm_classifier.fit(c_x_train, c_y_train) k_means_guesses = k_means_classifier.predict(c_x_test) svm_guesses = svm_classifier.predict(c_x_test) print("K-Means Accuracy, Recall, Precision, and F1 score are as follows:") print(accuracy_score(c_y_test, k_means_guesses)) print(recall_score(c_y_test, k_means_guesses, average='macro')) print(precision_score(c_y_test, k_means_guesses, average='macro')) print(f1_score(c_y_test, k_means_guesses, average='macro')) print( "Support Vector Machine Accuracy, Recall, Precision, and F1 score are as follows:" ) print(accuracy_score(c_y_test, svm_guesses)) print(recall_score(c_y_test, svm_guesses, average='macro')) print(precision_score(c_y_test, svm_guesses, average='macro')) print(f1_score(c_y_test, svm_guesses, average='macro')) #print(guesses) ### ----- Normalization of Regression Data # ---------------------------------------------------------------------------------- feature_data1 = df[['sex_code', 'education_code']]
X = pd.read_csv('D:/SKRIPSI/percobaan/1332data9klas/tfidf1332.csv') # In[2]: kf = KFold(len(X), n_folds=10, shuffle=True, random_state=9999) model_train_index = [] model_test_index = [] model = 0 for k, (index_train, index_test) in enumerate(kf): X_train, X_test, y_train, y_test = X.ix[index_train,:], X.ix[index_test,:],y[index_train], y[index_test] clf = MultinomialNB(alpha=0.1, fit_prior=True, class_prior=None).fit(X_train, y_train) score = clf.score(X_test, y_test) f1score = f1_score(y_test, clf.predict(X_test)) precision = precision_score(y_test, clf.predict(X_test)) recall = recall_score(y_test, clf.predict(X_test)) print('Model %d has accuracy %f with | f1score: %f | precision: %f | recall : %f'%(k,score, f1score, precision, recall)) model_train_index.append(index_train) model_test_index.append(index_test) model+=1 # In[5]: temp = df.klasifikasi # In[ ]:
def main(_): # word_id_mapping_o, w2v_o = load_w2v(FLAGS.embedding_file, FLAGS.embedding_dim, True) word_id_mapping_o, w2v_o = load_word_embedding(FLAGS.word_id_file, FLAGS.embedding_file, FLAGS.embedding_dim, True) word_embedding_o = tf.constant(w2v_o, dtype=tf.float32) # word_id_mapping_r, w2v_r = load_w2v(FLAGS.embedding_file_r, FLAGS.embedding_dim, True) # word_id_mapping_r, w2v_r = load_word_embedding(FLAGS.word_id_file, FLAGS.embedding_file_r, FLAGS.embedding_dim, True) word_id_mapping_r = word_id_mapping_o word_embedding_r = tf.constant(w2v_o, dtype=tf.float32) with tf.name_scope('inputs'): keep_prob1 = tf.placeholder(tf.float32) keep_prob2 = tf.placeholder(tf.float32) x_o = tf.placeholder(tf.int32, [None, FLAGS.max_doc_len, FLAGS.max_sentence_len]) x_r = tf.placeholder(tf.int32, [None, FLAGS.max_doc_len, FLAGS.max_sentence_len]) sen_len_o = tf.placeholder(tf.int32, [None, FLAGS.max_doc_len]) sen_len_r = tf.placeholder(tf.int32, [None, FLAGS.max_doc_len]) doc_len_o = tf.placeholder(tf.int32, None) doc_len_r = tf.placeholder(tf.int32, None) y = tf.placeholder(tf.float32, [None, FLAGS.n_class]) inputs_o = tf.nn.embedding_lookup(word_embedding_o, x_o) inputs_o = tf.reshape(inputs_o, [-1, FLAGS.max_sentence_len, FLAGS.embedding_dim]) inputs_r = tf.nn.embedding_lookup(word_embedding_r, x_r) inputs_r = tf.reshape(inputs_r, [-1, FLAGS.max_sentence_len, FLAGS.embedding_dim]) prob = hn_inter_att(inputs_o, sen_len_o, doc_len_o, inputs_r, sen_len_r, doc_len_r, keep_prob1, keep_prob2) with tf.name_scope('loss'): reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prob, labels=y)) + tf.add_n(reg_loss) all_vars = [var for var in tf.global_variables()] with tf.name_scope('train'): global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) grads, global_norm = tf.clip_by_global_norm(tf.gradients(loss, all_vars), 5.0) train_op = optimizer.apply_gradients(zip(grads, all_vars), name='train_op', global_step=global_step) with tf.name_scope('predict'): cor_pred = tf.equal(tf.argmax(prob, 1), tf.argmax(y, 1)) accuracy = tf.reduce_mean(tf.cast(cor_pred, tf.float32)) accuracy_num = tf.reduce_sum(tf.cast(cor_pred, tf.int32)) true_y = tf.argmax(y, 1) pred_y = tf.argmax(prob, 1) title = '-d1-{}d2-{}b-{}r-{}l2-{}sen-{}dim-{}h-{}c-{}'.format( FLAGS.keep_prob1, FLAGS.keep_prob2, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.l2_reg, FLAGS.max_sentence_len, FLAGS.embedding_dim, FLAGS.n_hidden, FLAGS.n_class ) def get_batch_data(xo, slo, dlo, xr, slr, dlr, yy, batch_size, kp1, kp2, is_shuffle=True): for index in batch_index(len(yy), batch_size, 1, is_shuffle): feed_dict = { x_o: xo[index], x_r: xr[index], y: yy[index], sen_len_o: slo[index], sen_len_r: slr[index], doc_len_o: dlo[index], doc_len_r: dlr[index], keep_prob1: kp1, keep_prob2: kp2, } yield feed_dict, len(index) conf = tf.ConfigProto(allow_soft_placement=True) conf.gpu_options.allow_growth = True with tf.Session(config=conf) as sess: import time timestamp = str(int(time.time())) _dir = 'summary/' + str(timestamp) + '_' + title test_loss = tf.placeholder(tf.float32) test_acc = tf.placeholder(tf.float32) train_summary_op, test_summary_op, validate_summary_op, train_summary_writer, test_summary_writer, \ validate_summary_writer = summary_func(loss, accuracy, test_loss, test_acc, _dir, title, sess) save_dir = 'temp_model/' + str(timestamp) + '_' + title + '/' saver = saver_func(save_dir) init = tf.global_variables_initializer() sess.run(init) # saver.restore(sess, '/-') tr_x, tr_y, tr_sen_len, tr_doc_len = load_inputs_document( FLAGS.train_file, word_id_mapping_o, FLAGS.max_sentence_len, FLAGS.max_doc_len ) te_x, te_y, te_sen_len, te_doc_len = load_inputs_document( FLAGS.test_file, word_id_mapping_o, FLAGS.max_sentence_len, FLAGS.max_doc_len ) tr_x_r, tr_y_r, tr_sen_len_r, tr_doc_len_r = load_inputs_document( FLAGS.train_file_r, word_id_mapping_r, FLAGS.max_sentence_len, FLAGS.max_doc_len ) te_x_r, te_y_r, te_sen_len_r, te_doc_len_r = load_inputs_document( FLAGS.test_file_r, word_id_mapping_r, FLAGS.max_sentence_len, FLAGS.max_doc_len ) # v_x, v_y, v_sen_len, v_doc_len = load_inputs_document( # FLAGS.validate_file_path, # word_id_mapping, # FLAGS.max_sentence_len, # FLAGS.max_doc_len # ) # v_x, v_y, v_sen_len, v_doc_len = load_inputs_document( # FLAGS.validate_file_path, # word_id_mapping, # FLAGS.max_sentence_len, # FLAGS.max_doc_len # ) max_acc, max_prob, step = 0., None, None max_ty, max_py = None, None for i in xrange(FLAGS.n_iter): for train, _ in get_batch_data(tr_x, tr_sen_len, tr_doc_len, tr_x_r, tr_sen_len_r, tr_doc_len_r, tr_y, FLAGS.batch_size, FLAGS.keep_prob1, FLAGS.keep_prob2): _, step, summary = sess.run([train_op, global_step, train_summary_op], feed_dict=train) train_summary_writer.add_summary(summary, step) # embed_update = tf.assign(word_embedding, tf.concat([tf.zeros([1, FLAGS.embedding_dim]), word_embedding[1:]]), 0) # sess.run(embed_update) acc, cost, cnt = 0., 0., 0 p, ty, py = [], [], [] for test, num in get_batch_data(te_x, te_sen_len, te_doc_len, te_x_r, te_sen_len_r, te_doc_len_r, te_y, FLAGS.batch_size, 1.0, 1.0, False): _loss, _acc, _p, _ty, _py = sess.run([loss, accuracy_num, prob, true_y, pred_y], feed_dict=test) p += list(_p) ty += list(_ty) py += list(_py) acc += _acc cost += _loss * num cnt += num print 'all samples={}, correct prediction={}'.format(cnt, acc) acc = acc / cnt cost = cost / cnt print 'Iter {}: mini-batch loss={:.6f}, test acc={:.6f}'.format(i, cost, acc) summary = sess.run(test_summary_op, feed_dict={test_loss: cost, test_acc: acc}) test_summary_writer.add_summary(summary, step) if acc > max_acc: max_acc = acc max_prob = p max_ty = ty max_py = py # saver.save(sess, save_dir, global_step=step) print 'P:', precision_score(max_ty, max_py, average=None) print 'R:', recall_score(max_ty, max_py, average=None) print 'F:', f1_score(max_ty, max_py, average=None) fp = open(FLAGS.prob_file, 'w') for item in max_prob: fp.write(' '.join([str(it) for it in item]) + '\n') print 'Optimization Finished! Max acc={}'.format(max_acc) print 'Learning_rate={}, iter_num={}, batch_size={}, hidden_num={}, l2={}'.format( FLAGS.learning_rate, FLAGS.n_iter, FLAGS.batch_size, FLAGS.n_hidden, FLAGS.l2_reg )
for i in features: ## run for every feature my_list.append(i) X = df.loc[:, my_list].values # data ## cross-validation X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=0) #machine learning algorithm is applied in this section clf = ml_list[j] # clf.fit(X_train, y_train) predict = clf.predict(X_test) f1 = clf.score(X_test, y_test) result = f1_score(y_test, predict, average='macro') accuracy = round(clf.score(X_test, y_test), 2) temp = "[" for ii in my_list: temp += str( my_list.index(ii) + 1 ) + ", " #translate property list to sequence number for less space if result >= least: # If the F-criterion is equal to or greater than the highest value previously accessed, keep the new feature. least = result print( '%-17s %-30s %-10s %-10s %-15s %-15s ' % (j, i, result, accuracy, temp, "------> New feature found!!!")) else: #If not, remove it from the list
def test(patch_shape, extraction_step): with tf.Graph().as_default(): test_patches = tf.placeholder(tf.float32, [ F.batch_size, patch_shape[0], patch_shape[1], patch_shape[2], F.num_mod ], name='real_patches') phase = tf.placeholder(tf.bool) # Define the network # For using actual 3-D U-Net change ***trained_network*** function both in training and testing #output_soft = trained_network(test_patches, phase, patch_shape, reuse=None) output_soft = trained_network_dis(test_patches, reuse=None) # To convert from one hat form output = tf.argmax(output_soft, axis=-1) print("Output Patch Shape:", output.get_shape()) # To load the saved checkpoint saver = tf.train.Saver() with tf.Session() as sess: try: load_model(F.best_checkpoint_dir, sess, saver) print(" Checkpoint loaded succesfully!....\n") except: print(" [!] Checkpoint loading failed!....\n") return # Get patches from test images patches_test, labels_test = preprocess_dynamic_lab( F.data_directory, F.num_classes, extraction_step, patch_shape, F.number_train_images, validating=F.training, testing=F.testing, num_images_testing=F.number_test_images) total_batches = int(patches_test.shape[0] / F.batch_size) # Array to store the prediction results predictions_test = np.zeros((patches_test.shape[0], patch_shape[0], patch_shape[1], patch_shape[2])) print("max and min of patches_test:", np.min(patches_test), np.max(patches_test)) # Batch wise prediction print("Total number of Batches: ", total_batches) for batch in range(total_batches): patches_feed = patches_test[batch * F.batch_size:(batch + 1) * F.batch_size, :, :, :, :] preds = sess.run(output, feed_dict={ test_patches: patches_feed, phase: False }) predictions_test[batch * F.batch_size:(batch + 1) * F.batch_size, :, :, :] = preds print(("Processed_batch:[%8d/%8d]") % (batch, total_batches)) print("All patches Predicted") print("Shape of predictions_test, min and max:", predictions_test.shape, np.min(predictions_test), np.max(predictions_test)) #To stitch the image back images_pred = recompose3D_overlap(predictions_test, 144, 192, 256, extraction_step[0], extraction_step[1], extraction_step[2]) print("Shape of Predicted Output Groundtruth Images:", images_pred.shape, np.min(images_pred), np.max(images_pred), np.mean(images_pred), np.mean(labels_test)) # To save the images for i in range(F.number_test_images): pred2d = np.reshape(images_pred[i], (144 * 192 * 256)) lab2d = np.reshape(labels_test[i], (144 * 192 * 256)) save_image(F.results_dir, images_pred[i], F.number_train_images + i + 2) F1_score = f1_score(lab2d, pred2d, [0, 1, 2, 3], average=None) # Evaluation pred2d = np.reshape(images_pred, (images_pred.shape[0] * 144 * 192 * 256)) lab2d = np.reshape(labels_test, (labels_test.shape[0] * 144 * 192 * 256)) F1_score = f1_score(lab2d, pred2d, [0, 1, 2, 3], average=None) print("Testing Dice Coefficient.... ") print("Background:", F1_score[0]) print("CSF:", F1_score[1]) print("GM:", F1_score[2]) print("WM:", F1_score[3]) return
def evaluate(self,dev,avg_best,BLEU=False): logging.info("STARTING EVALUATION") acc_avg = 0.0 wer_avg = 0.0 acc_G = 0.0 acc_P = 0.0 acc_V = 0.0 microF1_PRED,microF1_PRED_cal,microF1_PRED_nav,microF1_PRED_wet = [],[],[],[] microF1_TRUE,microF1_TRUE_cal,microF1_TRUE_nav,microF1_TRUE_wet = [],[],[],[] ref = [] hyp = [] ref_s = "" hyp_s = "" pbar = tqdm(enumerate(dev),total=len(dev)) for j, data_dev in pbar: # (T,B) a list of list words = self.evaluate_batch(len(data_dev[1]),data_dev[0],data_dev[1],data_dev[2],data_dev[3],data_dev[4],data_dev[5],data_dev[6]) acc=0 w = 0 temp_gen = [] for i, row in enumerate(np.transpose(words)): # (B,T) st = '' for e in row: if e== '<EOS>': break else: st+= e + ' ' temp_gen.append(st) correct = data_dev[7][i] ### compute F1 SCORE if(len(data_dev)>10): f1_true,f1_pred = computeF1(data_dev[8][i],st.lstrip().rstrip(),correct.lstrip().rstrip()) microF1_TRUE += f1_true microF1_PRED += f1_pred f1_true,f1_pred = computeF1(data_dev[9][i],st.lstrip().rstrip(),correct.lstrip().rstrip()) microF1_TRUE_cal += f1_true microF1_PRED_cal += f1_pred f1_true,f1_pred = computeF1(data_dev[10][i],st.lstrip().rstrip(),correct.lstrip().rstrip()) microF1_TRUE_nav += f1_true microF1_PRED_nav += f1_pred f1_true,f1_pred = computeF1(data_dev[11][i],st.lstrip().rstrip(),correct.lstrip().rstrip()) microF1_TRUE_wet += f1_true microF1_PRED_wet += f1_pred if (correct.lstrip().rstrip() == st.lstrip().rstrip()): acc+=1 w += wer(correct.lstrip().rstrip(),st.lstrip().rstrip()) ref.append(str(correct.lstrip().rstrip())) hyp.append(str(st.lstrip().rstrip())) ref_s+=str(correct.lstrip().rstrip())+ "\n" hyp_s+=str(st.lstrip().rstrip()) + "\n" acc_avg += acc/float(len(data_dev[1])) wer_avg += w/float(len(data_dev[1])) pbar.set_description("R:{:.4f},W:{:.4f}".format(acc_avg/float(len(dev)),wer_avg/float(len(dev)))) if(len(data_dev)>10): logging.info("F1 SCORE:\t"+str(f1_score(microF1_TRUE, microF1_PRED, average='micro'))) logging.info("F1 CAL:\t"+str(f1_score(microF1_TRUE_cal, microF1_PRED_cal, average='micro'))) logging.info("F1 WET:\t"+str(f1_score(microF1_TRUE_wet, microF1_PRED_wet, average='micro'))) logging.info("F1 NAV:\t"+str(f1_score(microF1_TRUE_nav, microF1_PRED_nav, average='micro'))) if (BLEU): bleu_score = moses_multi_bleu(np.array(hyp), np.array(ref), lowercase=True) logging.info("BLEU SCORE:"+str(bleu_score)) if (bleu_score >= avg_best): self.save_model(str(self.name)+str(bleu_score)) logging.info("MODEL SAVED") return bleu_score else: acc_avg = acc_avg/float(len(dev)) if (acc_avg >= avg_best): self.save_model(str(self.name)+str(acc_avg)) logging.info("MODEL SAVED") return acc_avg
def train_or_eval_model(model, loss_function, dataloader, epoch, optimizer=None, train=False): losses = [] preds = [] labels = [] masks = [] alphas, alphas_f, alphas_b, vids = [], [], [], [] assert not train or optimizer != None if train: model.train() else: model.eval() for data in dataloader: if train: optimizer.zero_grad() # import ipdb;ipdb.set_trace() acouf, qmask, umask, label =\ [d.cuda() for d in data[:-1]] if cuda else data[:-1] #log_prob = model(torch.cat((textf,acouf,visuf),dim=-1), qmask,umask) # seq_len, batch, n_classes log_prob, alpha, alpha_f, alpha_b = model( acouf, qmask, umask) # seq_len, batch, n_classes lp_ = log_prob.transpose(0, 1).contiguous().view( -1, log_prob.size()[2]) # batch*seq_len, n_classes labels_ = label.view(-1) # batch*seq_len loss = loss_function(lp_, labels_, umask) pred_ = torch.argmax(lp_, 1) # batch*seq_len preds.append(pred_.data.cpu().numpy()) labels.append(labels_.data.cpu().numpy()) masks.append(umask.view(-1).cpu().numpy()) losses.append(loss.item() * masks[-1].sum()) if train: loss.backward() optimizer.step() else: alphas += alpha alphas_f += alpha_f alphas_b += alpha_b vids += data[-1] if preds != []: preds = np.concatenate(preds) labels = np.concatenate(labels) masks = np.concatenate(masks) else: return float('nan'), float('nan'), [], [], [], float('nan'), [] avg_loss = round(np.sum(losses) / np.sum(masks), 4) avg_accuracy = round( accuracy_score(labels, preds, sample_weight=masks) * 100, 2) avg_fscore = round( f1_score(labels, preds, sample_weight=masks, average='weighted') * 100, 2) return avg_loss, avg_accuracy, labels, preds, masks, avg_fscore, [ alphas, alphas_f, alphas_b, vids ]