def __init__(self, pos_actual, words_actual, pos_target, words_target, refuse_common_threshold=0, junk=[]): super(DiffRearrangePhrase, self).__init__( pos_actual, words_actual, pos_target, words_target) self.refuse_common_threshold = refuse_common_threshold self.junk = junk self.sub_diff_result = doc_diff.doc_diff( self.words_actual, self.words_target, refuse_common_threshold=refuse_common_threshold, junk=junk)
def evaluate_by_strings(self, gt_path, gt, actual_path, actual): """ Computes precision and recall of words extraction. For that, run diff on the set of words of groundtruth (gt) and the actual extraction result (actual). The precision of actual follows from the percentage of the number of common words to the number of extracted words. The recall follows from the percentage of the number of common words to the number of all words in the groundtruth. We only want to evaluate the accuracy of words extraction, but not to evaluate the correct order of extracted words. Thus, we try tro rearrange the words in the actual result such that the order of words corresponds to the order in the groundtruth. You can disable the rearrange step by setting the rearrange flag to False. Per default, the evaluation is done case-insensitively. To make it case-sensitive, set the ignore_cases flag to False. Per default, the evaluation is based on exact matches of words. To match words with a defined distance as well, adjust max_dist. """ return doc_diff(actual, gt, self.args.junk)
def evaluate(self, str1, str2, expected, junk=[]): actual = doc_diff.doc_diff(str1, str2, junk) actual = doc_diff.visualize_diff_result_debug(actual) self.assertEqual(actual, str(expected))
def assert_equal(self, input1, input2, expected): diff_phrases = doc_diff.doc_diff(input1, input2) num_ops = doc_diff.count_num_ops(diff_phrases) self.assertDictEqual(dict(num_ops), expected)
def assert_equal(self, input1, input2, expected): diff_phrases = doc_diff.doc_diff(input1, input2) num_ops = count_num_ops.count_num_ops(diff_phrases) self.assertDictEqual(dict(num_ops), expected)