def eval_hc_train(hc, labels, print_err = False): """ evaluate in the train set :param hc: :param labels: :return: """ n = len(hc.res) tp = 0 fp = 0 fn = 0 for i in range(n): if len(hc.data.sentences[i]) > 0: (x, y, z) = eval_ner(util.get_lab( hc.data.sentences[i]), hc.res[i], labels) tp += x fp += y fn += z try: pre = tp * 1.0 / (tp + fp) rec = tp * 1.0 / (tp + fn) f = 2.0 * pre * rec / (pre + rec) print pre, rec, f except ZeroDivisionError: print "DIV BY 0 ", tp, fp, fn
def eval_hc_test(hc, features, labels, print_err=False, decoder='hc'): """ evaluate in the train set :param hc: :param labels: :return: """ tp = 0 fp = 0 fn = 0 dirname = "testa" input = [] for file in os.listdir(dirname): # print file if file.endswith(".txt"): f = open(os.path.join(dirname, file)) l = list(f) input.extend(l) f.close() # return input sentences = util.extract(input, features, labels, keep_word = True) # return sentences for sen in sentences: if True: # if not has_oov(sen): #predicted = hc.decode(util.get_obs(sen)) predicted = get_tag(hc, sen, features, decoder) (x, y, z) = eval_ner(util.get_lab(sen), predicted, labels) tp += x fp += y fn += z if print_err: if y + z > 0: print "sen: ", util.get_words(sen, features) + " OOV = " + str(has_oov(sen)) print "true labels: ", util.get_lab_name(util.get_lab(sen), labels) print "predicted: ", util.get_lab_name(predicted, labels) try: pre = tp * 1.0 / (tp + fp) rec = tp * 1.0 / (tp + fn) f = 2.0 * pre * rec / (pre + rec) print pre, rec, f except ZeroDivisionError: print "DIV BY 0 ", tp, fp, fn
def run_test(test, h): cnt = 0 correct = 0 for s in test: x = util.get_obs(s) g = util.get_lab(s) p = h.decode(x) for i, j in zip(g, p): cnt += 1 if i == j: correct += 1 print correct * 1.0 / cnt