def test_fmeasure(): y_true = K.variable(np.array([0, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0])) y_pred = K.variable(np.array([1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0])) # Calculated using sklearn.metrics.f1_score expected = 0.33333333333333331 actual = K.eval(metrics.fmeasure(y_true, y_pred)) epsilon = 1e-05 assert expected - epsilon <= actual <= expected + epsilon
def predict_and_eval(model, X_train, y_train, X_test, y_test, threshold = None): logging.info("Performing prediction on train and test for evaluation ...") y_pred_train = model.predict(X_train, batch_size=batch_size, verbose=1) y_pred_test = model.predict(X_test, batch_size=batch_size, verbose=1) eval_types = ['Train', 'Test'] logs = {} for e, eval_type in enumerate(eval_types): # print "[%s]" % eval_type metric_prefix = '' if e == 0 else 'val_' X_eval = X_train if e == 0 else X_test y_eval = y_train if e == 0 else y_test y_pred_eval = y_pred_train if e == 0 else y_pred_test # threshold = 0.48 # y_pred_eval = (0.5 - threshold) + y_pred_eval y_eval = y_eval.astype(float) if threshold != None: y_eval = K.clip((0.5 - threshold) + y_eval, 0., 1.) logs[metric_prefix + 'loss'] = metrics.binary_crossentropy(y_eval, y_pred_eval).eval() logs[metric_prefix + 'acc'] = metrics.binary_accuracy(y_eval, y_pred_eval).eval() logs[metric_prefix + 'precision'] = metrics.precision(y_eval, y_pred_eval).eval() logs[metric_prefix + 'recall'] = metrics.recall(y_eval, y_pred_eval).eval() logs[metric_prefix + 'fbeta_score'] = metrics.fmeasure(y_eval, y_pred_eval).eval() # log_file.write("%d,%.5f,%s,%.5f\n" % (epoch, threshold, eval_type, average_faux_jaccard_similarity)) # print "%d,%.5f,%s,%.4f" % (epoch, threshold, eval_type, average_faux_jaccard_similarity) metrics_line = '' for s in ['loss', 'acc', 'precision', 'recall', 'fbeta_score']: metrics_line += "%s: %.5f %s: %.5f - " %(s, logs[s], 'val_'+s, logs['val_' +s]) logging.info(metrics_line) return logs
def batch_pairwise_metrics(y_true, y_pred): #assert K.get_variable_shape(y_true)[1] == K.get_variable_shape(y_pred)[1] num_classes = K.get_variable_shape(y_pred)[1] preds_cats = K.argmax(y_pred, axis=1) preds_one_hot = K.one_hot(preds_cats, num_classes) overall_precision = [None for _ in range(num_classes)] overall_recall = [None for _ in range(num_classes)] overall_fmeasure = [None for _ in range(num_classes)] out_dict = {} for cc in range(num_classes): #Metrics should take 1D arrays which are 1 for positive, 0 for negative two_true, two_pred = y_true[:, cc], preds_one_hot[:, cc] cur_dict = { 'precision/%02d' % cc: kmetrics.precision(two_true, two_pred), 'recall/%02d' % cc: kmetrics.recall(two_true, two_pred), 'fmeasure/%02d' % cc: kmetrics.fmeasure(two_true, two_pred), 'binary_accuracy/%02d' % cc: kmetrics.binary_accuracy(two_true, two_pred), 'act_pos/%02d' % cc: K.sum(two_true), 'pred_pos/%02d' % cc: K.sum(two_pred) } out_dict.update(cur_dict) overall_precision[cc] = cur_dict['precision/%02d' % cc] overall_recall[cc] = cur_dict['recall/%02d' % cc] overall_fmeasure[cc] = cur_dict['fmeasure/%02d' % cc] out_dict.update(make_stats('precision', overall_precision)) out_dict.update(make_stats('recall', overall_recall)) out_dict.update(make_stats('fmeasure', overall_fmeasure)) return out_dict
def _non_null_accuracy(self, y_true, y_pred): return fmeasure(y_true[:, 1:], y_pred[:, 1:])
# predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) # precision = true_positives / (predicted_positives + K.epsilon()) # return precision # def fmeasure(y_true, y_pred): # """Computes the f-measure, the harmonic mean of precision and recall. # Here it is only computed as a batch-wise average, not globally. # """ # return fbeta_score(y_true, y_pred, beta=1) opt = Adagrad(lr=0.0001) # model.compile(loss='categorical_crossentropy', # optimizer=opt, # metrics=['accuracy']) model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) print(model.summary()) print('train gaurav...') model.fit(x_train, y_train, epochs=20, batch_size=128) predictions = model.predict(x_test, batch_size=256) score = model.evaluate(x_test, y_test, batch_size=256) print metrics.fmeasure(y_test, predictions) print metrics.categorical_accuracy(y_test, predictions) print score