def evaluate(args): sparse_graphs_gold = {} sparse_graphs_predicted = {} total = 0.0 correct = 0.0 total_arcs = 0.0 correct_arcs = 0.0 for sentence in sentences(codecs.open(args.gold, encoding='utf-8')): sparse_graph_gold = Graph(sentence, "sparse").heads sparse_graphs_gold[len(sparse_graphs_gold)] = sparse_graph_gold for sentence in sentences(codecs.open(args.in_file, encoding='utf-8')): sparse_graph_predicted = Graph(sentence, "sparse").heads sparse_graphs_predicted[len(sparse_graphs_predicted)] = sparse_graph_predicted if len(sparse_graphs_gold) == len(sparse_graphs_predicted): for gold_graph in sorted(sparse_graphs_gold.keys()): total += 1 if make_graph_compareable(sparse_graphs_gold[gold_graph]) == make_graph_compareable( sparse_graphs_predicted[gold_graph]): correct += 1 else: print "Error in file length, Gold: " + str(len(sparse_graphs_gold)) + ", Predicted: " + str( len(sparse_graphs_predicted)) for predicted_graph in sorted(sparse_graphs_predicted.keys()): rev_predicted = reverse_head_graph(sparse_graphs_predicted[predicted_graph]) rev_gold = reverse_head_graph(sparse_graphs_gold[predicted_graph]) for dependent in rev_predicted: for arc in rev_predicted[dependent]: if arc.head == rev_gold[dependent][0].head: correct_arcs += 1 total_arcs += 1 with open(args.out_file, "w") as out: print >> out, "Total: " + str(total) print >> out, "Correct: " + str(correct) print >> out, "%: " + str(round(correct/total, 2) * 100) print >> out, "" print >> out, "Total Arcs: " + str(total_arcs) print >> out, "Correct: " + str(correct_arcs) print >> out, "%: " + str(round(correct_arcs/total_arcs, 2) * 100)
def find_affixes(file_in, len_list): top_x = [2, 3, 4, 5] # all the affix lenghts that should be computed # creates lists for suffixes and prefixes, containing as many dictionaries as top_x elements: suffixes = {} prefixes = {} letter_combs = {} pos_tags = {} for i in top_x: suffixes[i] = {} prefixes[i] = {} letter_combs[i] = {} print "\tReading prefixes and suffixes" t0 = time.time() # after the following loop, every dictionary in both lists contains all affixes # that fit in that list as a key, and the respective frequency as it's value: for sentence in tk.sentences(codecs.open(file_in, encoding='utf-8')): for token in sentence: if token.gold_pos in pos_tags: pos_tags[token.gold_pos] += 1 else: pos_tags[token.gold_pos] = 1 for i in top_x: # for every desired affix length if len(token.form) > i: # word must be longer than suffix length # token.form[-i:] is the suffix with length i # suffixes[i-2] is the dictionary for suffixes with length i # in the list 'suffixes' if token.form[-i:] in suffixes[i]: if token.gold_pos in suffixes[i][token.form[-i:]]: suffixes[i][token.form[-i:]][token.gold_pos] += 1 else: suffixes[i][token.form[-i:]][token.gold_pos] = 1 else: suffixes[i][token.form[-i:]] = {token.gold_pos: 1} if len(token.form) > i: # word must be longer than prefix length # the same as for suffixes if token.form[:i] in prefixes[i]: if token.gold_pos in prefixes[i][token.form[:i]]: prefixes[i][token.form[:i]][token.gold_pos] += 1 else: prefixes[i][token.form[:i]][token.gold_pos] = 1 else: prefixes[i][token.form[:i]] = {token.gold_pos: 1} if len(token.form) > i+1 and i > 2: # letter combinations in the word # if they don't overlap with pre- or suffixes for j in range(i, len(token.form)-(i*2-1)): if token.form[j:j+i] in letter_combs[i]: if token.gold_pos in letter_combs[i][token.form[j:j+i]]: letter_combs[i][token.form[j:j+i]][token.gold_pos] += 1 else: letter_combs[i][token.form[j:j+i]][token.gold_pos] = 1 else: letter_combs[i][token.form[j:j+i]] = {token.gold_pos: 1} t1 = time.time() print "\t\t"+str(t1-t0)+" sec." return [suffixes, prefixes, letter_combs]
def evaluate(file_in, out_file): t0 = time.time() print "\tEvaluate predictions" pos_dict = {} counter = 0 prediction_count = 0 # unique_tags will contain every existing POS tag as key, whether it exists # only in gold, predicted, or both. The value is the dict {'TP':0,'FN':0,'FP':0} unique_tags = {} unique_tags_scores = {} correct_predictions = 0 false_predictions = 0 TP = 0.0 FN = 0.0 FP = 0.0 for sentence in tk.sentences(codecs.open(file_in, encoding='utf-8')): for tid, token in enumerate(sentence): prediction_count += 1 # add POS tags to dictionary: if token.gold_pos not in unique_tags: unique_tags[token.gold_pos] = {'TP': 0, 'FN': 0, 'FP': 0} if token.predicted_pos not in unique_tags: unique_tags[token.predicted_pos] = {'TP': 0, 'FN': 0, 'FP': 0} # if the prediction was correct, TP of the gold POS is increased by 1 # otherwise, the FN of the gold POS and FP of the predicted pos are increased by 1 if token.gold_pos == token.predicted_pos: correct_predictions += 1 unique_tags[token.gold_pos]['TP'] += 1 else: false_predictions += 1 unique_tags[token.gold_pos]['FN'] += 1 unique_tags[token.predicted_pos]['FP'] += 1 # computes precision, recall, accuracy and f-score for each tag based on TP, FN, FP: for pos in unique_tags: TP += unique_tags[pos]['TP'] FN += unique_tags[pos]['FN'] FP += unique_tags[pos]['FP'] unique_tags_scores[pos] = {'Precision': 0.00, 'Recall': 0.00, 'Accuracy': 0.00, 'F-Score': 0.00} if unique_tags[pos]['TP'] + unique_tags[pos]['FP'] == 0: unique_tags_scores[pos]['precision'] = 0.00 else: unique_tags_scores[pos]['precision'] = (float(unique_tags[pos]['TP'])) / (float(unique_tags[pos]['TP']) + \ float(unique_tags[pos][ 'FP'])) * 100.00 if unique_tags[pos]['TP'] + unique_tags[pos]['FN'] == 0: unique_tags_scores[pos]['recall'] = 0.00 else: unique_tags_scores[pos]['recall'] = (float(unique_tags[pos]['TP'])) / (float(unique_tags[pos]['TP']) + \ float( unique_tags[pos]['FN'])) * 100.00 if unique_tags[pos]['TP'] + unique_tags[pos]['FP'] + unique_tags[pos]['FN'] == 0: unique_tags_scores[pos]['accuracy'] = 0.00 else: unique_tags_scores[pos]['accuracy'] = float(unique_tags[pos]['TP']) / (float(unique_tags[pos]['TP']) + \ float(unique_tags[pos]['FN']) + \ float( unique_tags[pos]['FP'])) * 100.00 if unique_tags_scores[pos]['precision'] + unique_tags_scores[pos]['recall'] == 0.00: unique_tags_scores[pos]['f-score'] = 0.00 else: unique_tags_scores[pos]['f-score'] = (2 * float(unique_tags_scores[pos]['precision']) * \ float(unique_tags_scores[pos]['recall'])) / \ (float(unique_tags_scores[pos]['precision']) + \ float(unique_tags_scores[pos]['recall'])) # computes overall values, then writes results to file: precision_sum = 0.0 recall_sum = 0.0 f_score_sum = 0.0 false_tags = prediction_count - correct_predictions for pos in unique_tags_scores: precision_sum += unique_tags_scores[pos]['precision'] recall_sum += unique_tags_scores[pos]['recall'] f_score_sum += unique_tags_scores[pos]['f-score'] macro_averaged_precision = precision_sum / float(len(unique_tags_scores)) macro_averaged_recall = recall_sum / float(len(unique_tags_scores)) macro_averaged_f_score = f_score_sum / float(len(unique_tags_scores)) if TP+FP != 0: micro_averaged_precision = TP/(TP+FP)*100 else: micro_averaged_precision = 0.0 if TP+FN != 0: micro_averaged_recall = TP/(TP+FN)*100 else: micro_averaged_recall = 0.0 if micro_averaged_precision+micro_averaged_recall != 0: micro_averaged_f_score = (2*micro_averaged_precision*micro_averaged_recall)/(micro_averaged_precision+micro_averaged_recall) else: micro_averaged_f_score = 0.0 accuracy = (float(correct_predictions) / float(prediction_count)) * 100 error_rate = (float(false_predictions) / float(prediction_count)) * 100 t1 = time.time() print "\t\t" + str(t1 - t0) + " sec." print "\tWrite evaluation results to file" z0 = time.time() print >> out_file, "Total Predictions:\t" + str(prediction_count) print >> out_file, "Correct Predictions:\t" + str(correct_predictions) print >> out_file, "False Predictions:\t" + str(false_tags) print >> out_file, "" print >> out_file, "Accuracy:\t" + str(round(accuracy, 2)) print >> out_file, "Error rate:\t" + str(round(error_rate, 2)) print >> out_file, "" print >> out_file, "Overall Precision (mac-av):\t" + str(round(macro_averaged_precision, 2)) print >> out_file, "Overall Recall (mac-av):\t" + str(round(macro_averaged_recall, 2)) print >> out_file, "Overall F-Score (mac-av):\t" + str(round(macro_averaged_f_score, 2)) print >> out_file, "" print >> out_file, "Overall Precision (mic-av):\t" + str(round(micro_averaged_precision, 2)) print >> out_file, "Overall Recall (mic-av):\t" + str(round(micro_averaged_recall, 2)) print >> out_file, "Overall F-Score (mic-av):\t" + str(round(micro_averaged_f_score, 2)) print "" print >> out_file, "Tagwise Accuracy, Precision, Recall and F-Score:\n" for pos in unique_tags_scores.keys(): print >> out_file, pos + "\tAccuracy: " + str(round(unique_tags_scores[pos]['accuracy'], 2)) + "\tPrecision: " + \ str(round(unique_tags_scores[pos]['precision'], 2)) + "\tRecall: " + \ str(round(unique_tags_scores[pos]['recall'], 2)) + "\tF-Score: " + \ str(round(unique_tags_scores[pos]['f-score'], 2)) print "\t\tTotal Predictions:\t" + str(prediction_count) print "\t\tCorrect Predictions:\t" + str(correct_predictions) print "\t\tFalse Predictions:\t" + str(false_tags) print "" print "\t\tAccuracy:\t" + str(round(accuracy, 2)) print "\t\tError rate:\t" + str(round(error_rate, 2)) print "" print "\t\tOverall Precision (mac-av):\t" + str(round(macro_averaged_precision, 2)) print "\t\tOverall Recall (mac-av):\t" + str(round(macro_averaged_recall, 2)) print "\t\tOverall F-Score (mac-av):\t" + str(round(macro_averaged_f_score, 2)) print "" print "\t\tOverall Precision (mic-av):\t" + str(round(micro_averaged_precision, 2)) print "\t\tOverall Recall (mic-av):\t" + str(round(micro_averaged_recall, 2)) print "\t\tOverall F-Score (mic-av):\t" + str(round(micro_averaged_f_score, 2)) print "" print "\t\tFor details see the output file." z1 = time.time() print "\t\t" + str(z1 - z0) + " sec."
def evaluate(file_in, out_file): t0 = time.time() print "\tEvaluate predictions" pos_dict = {} counter = 0 prediction_count = 0 # unique_tags will contain every existing POS tag as key, whether it exists # only in gold, predicted, or both. The value is the dict {'TP':0,'FN':0,'FP':0} unique_tags = {} unique_tags_scores = {} correct_predictions = 0 false_predictions = 0 TP = 0.0 FN = 0.0 FP = 0.0 for sentence in tk.sentences(codecs.open(file_in, encoding='utf-8')): gold_targets = [] predicted_targets = [] gold_target = [] predicted_target = [] i_found_predicted = False i_found_gold = False for tid, token in enumerate(sentence): if token.predicted_tag_2 == "I": i_found_predicted = True predicted_target.append(token.t_id_2) else: if len(predicted_target) > 0: i_found_predicted = False predicted_targets.append(predicted_target) predicted_target = [] if i_found_predicted: predicted_target.append(token.form_2) if token.gold_tag_2 == "I": i_found_gold = True gold_target.append(token.t_id_2) else: if len(gold_target) > 0: i_found_gold = False gold_targets.append(gold_target) gold_target = [] if i_found_gold: gold_target.append(token.form_2) for prediction in predicted_targets: if prediction in gold_targets: TP += 1.0 del gold_targets[gold_targets.index(prediction)] else: FP += 1.0 FN += len(gold_targets) """ for tid, token in enumerate(sentence): prediction_count += 1 # add POS tags to dictionary: if token.gold_tag_2 not in unique_tags: unique_tags[token.gold_tag_2] = {'TP': 0, 'FN': 0, 'FP': 0} if token.predicted_tag_2 not in unique_tags: unique_tags[token.predicted_tag_2] = {'TP': 0, 'FN': 0, 'FP': 0} # if the prediction was correct, TP of the gold POS is increased by 1 # otherwise, the FN of the gold POS and FP of the predicted pos are increased by 1 if token.gold_tag_2 == token.predicted_tag_2: correct_predictions += 1 unique_tags[token.gold_tag_2]['TP'] += 1 else: false_predictions += 1 unique_tags[token.gold_tag_2]['FN'] += 1 unique_tags[token.predicted_tag_2]['FP'] += 1 # computes precision, recall, accuracy and f-score for each tag based on TP, FN, FP: for pos in unique_tags: TP += unique_tags[pos]['TP'] FN += unique_tags[pos]['FN'] FP += unique_tags[pos]['FP'] unique_tags_scores[pos] = {'Precision': 0.00, 'Recall': 0.00, 'Accuracy': 0.00, 'F-Score': 0.00} if unique_tags[pos]['TP'] + unique_tags[pos]['FP'] == 0: unique_tags_scores[pos]['precision'] = 0.00 else: unique_tags_scores[pos]['precision'] = (float(unique_tags[pos]['TP'])) / (float(unique_tags[pos]['TP']) + \ float(unique_tags[pos][ 'FP'])) * 100.00 if unique_tags[pos]['TP'] + unique_tags[pos]['FN'] == 0: unique_tags_scores[pos]['recall'] = 0.00 else: unique_tags_scores[pos]['recall'] = (float(unique_tags[pos]['TP'])) / (float(unique_tags[pos]['TP']) + \ float( unique_tags[pos]['FN'])) * 100.00 if unique_tags[pos]['TP'] + unique_tags[pos]['FP'] + unique_tags[pos]['FN'] == 0: unique_tags_scores[pos]['accuracy'] = 0.00 else: unique_tags_scores[pos]['accuracy'] = float(unique_tags[pos]['TP']) / (float(unique_tags[pos]['TP']) + \ float(unique_tags[pos]['FN']) + \ float( unique_tags[pos]['FP'])) * 100.00 if unique_tags_scores[pos]['precision'] + unique_tags_scores[pos]['recall'] == 0.00: unique_tags_scores[pos]['f-score'] = 0.00 else: unique_tags_scores[pos]['f-score'] = (2 * float(unique_tags_scores[pos]['precision']) * \ float(unique_tags_scores[pos]['recall'])) / \ (float(unique_tags_scores[pos]['precision']) + \ float(unique_tags_scores[pos]['recall'])) """ # computes overall values, then writes results to file: if TP+FP != 0: micro_averaged_precision = TP/(TP+FP)*100 else: micro_averaged_precision = 0.0 if TP+FN != 0: micro_averaged_recall = TP/(TP+FN)*100 else: micro_averaged_recall = 0.0 if micro_averaged_precision+micro_averaged_recall != 0: micro_averaged_f_score = (2*micro_averaged_precision*micro_averaged_recall)/(micro_averaged_precision+micro_averaged_recall) else: micro_averaged_f_score = 0.0 t1 = time.time() print "\t\t" + str(t1 - t0) + " sec." print "\tWrite evaluation results to file" z0 = time.time() print >> out_file, "Overall Precision (mic-av):\t" + str(round(micro_averaged_precision, 2)) print >> out_file, "Overall Recall (mic-av):\t" + str(round(micro_averaged_recall, 2)) print >> out_file, "Overall F-Score (mic-av):\t" + str(round(micro_averaged_f_score, 2)) print "" print "\t\tOverall Precision (mic-av):\t" + str(round(micro_averaged_precision, 2)) print "\t\tOverall Recall (mic-av):\t" + str(round(micro_averaged_recall, 2)) print "\t\tOverall F-Score (mic-av):\t" + str(round(micro_averaged_f_score, 2)) print "" print "\t\tFor details see the output file." z1 = time.time() print "\t\t" + str(z1 - z0) + " sec."