示例#1
0
 def __init__(self, pos_actual, words_actual, pos_target, words_target,
              refuse_common_threshold=0, junk=[]):
     super(DiffRearrangePhrase, self).__init__(
         pos_actual, words_actual, pos_target, words_target)
     self.refuse_common_threshold = refuse_common_threshold
     self.junk = junk
     self.sub_diff_result = doc_diff.doc_diff(
         self.words_actual, self.words_target,
         refuse_common_threshold=refuse_common_threshold,
         junk=junk)
示例#2
0
    def evaluate_by_strings(self, gt_path, gt, actual_path, actual):
        """
        Computes precision and recall of words extraction. For that, run diff 
        on the set of words of groundtruth (gt) and the actual extraction 
        result (actual). The precision of actual follows from the percentage of
        the number of common words to the number of extracted words. The recall 
        follows from the percentage of the number of common words to the number 
        of all words in the groundtruth.  
        We only want to evaluate the accuracy of words extraction, but not to 
        evaluate the correct order of extracted words. Thus, we try tro 
        rearrange the words in the actual result such that the order of words 
        corresponds to the order in the groundtruth. You can disable the 
        rearrange step by setting the rearrange flag to False. 
        Per default, the evaluation is done case-insensitively. To make it 
        case-sensitive, set the ignore_cases flag to False.
        Per default, the evaluation is based on exact matches of words. To 
        match words with a defined distance as well, adjust max_dist.
        """

        return doc_diff(actual, gt, self.args.junk)
示例#3
0
    def evaluate_by_strings(self, gt_path, gt, actual_path, actual):
        """
        Computes precision and recall of words extraction. For that, run diff 
        on the set of words of groundtruth (gt) and the actual extraction 
        result (actual). The precision of actual follows from the percentage of
        the number of common words to the number of extracted words. The recall 
        follows from the percentage of the number of common words to the number 
        of all words in the groundtruth.  
        We only want to evaluate the accuracy of words extraction, but not to 
        evaluate the correct order of extracted words. Thus, we try tro 
        rearrange the words in the actual result such that the order of words 
        corresponds to the order in the groundtruth. You can disable the 
        rearrange step by setting the rearrange flag to False. 
        Per default, the evaluation is done case-insensitively. To make it 
        case-sensitive, set the ignore_cases flag to False.
        Per default, the evaluation is based on exact matches of words. To 
        match words with a defined distance as well, adjust max_dist.
        """

        return doc_diff(actual, gt, self.args.junk)
示例#4
0
 def evaluate(self, str1, str2, expected, junk=[]):
     actual = doc_diff.doc_diff(str1, str2, junk)
     actual = doc_diff.visualize_diff_result_debug(actual)
     self.assertEqual(actual, str(expected))
示例#5
0
 def assert_equal(self, input1, input2, expected):
     diff_phrases = doc_diff.doc_diff(input1, input2)
     num_ops      = doc_diff.count_num_ops(diff_phrases)
     self.assertDictEqual(dict(num_ops), expected)
示例#6
0
    def assert_equal(self, input1, input2, expected):
        diff_phrases = doc_diff.doc_diff(input1, input2)
        num_ops = count_num_ops.count_num_ops(diff_phrases)

        self.assertDictEqual(dict(num_ops), expected)