Exemplo n.º 1
0
    def handle_candidate(self, reference, info={}):
        """For each entry in the reference Gold Standard, store it in main memory
        in the `pre_gs` global list.

        @param reference A `Pattern` contained in the reference Gold Standard.
        """
        global ignore_pos
        global ref_counter
        global ignore_case
        global pre_gs
        global lemma_or_surface
        global fuzzy_pre_gs
        if ignore_pos :
            reference.set_all( pos=WILDCARD )     # reference has type Pattern
        pre_gs_key = reference.to_string()
        if ignore_case :
            pre_gs_key = pre_gs_key.lower()

        pre_gs_entry = pre_gs.get( pre_gs_key, [] )
        pre_gs_entry.append( reference )
        pre_gs[ pre_gs_key ] = pre_gs_entry

        if lemma_or_surface:
            fuzzy_pre_gs.setdefault(WORD_SEPARATOR.join(
                [w.lemma for w in reference]), []).append(reference)
            fuzzy_pre_gs.setdefault(WORD_SEPARATOR.join(
                [w.surface for w in reference]), []).append(reference)

        #gs.append( reference )
        ref_counter = ref_counter + 1
Exemplo n.º 2
0
    def handle_candidate(self, reference, info={}):
        """For each entry in the reference Gold Standard, store it in main memory
        in the `pre_gs` global list.

        @param reference A `Pattern` contained in the reference Gold Standard.
        """
        global ignore_pos
        global ref_counter
        global ignore_case
        global pre_gs
        global lemma_or_surface
        global fuzzy_pre_gs
        if ignore_pos:
            reference.set_all(pos=WILDCARD)  # reference has type Pattern
        pre_gs_key = reference.to_string()
        if ignore_case:
            pre_gs_key = pre_gs_key.lower()

        pre_gs_entry = pre_gs.get(pre_gs_key, [])
        pre_gs_entry.append(reference)
        pre_gs[pre_gs_key] = pre_gs_entry

        if lemma_or_surface:
            fuzzy_pre_gs.setdefault(
                WORD_SEPARATOR.join([w.lemma for w in reference]),
                []).append(reference)
            fuzzy_pre_gs.setdefault(
                WORD_SEPARATOR.join([w.surface for w in reference]),
                []).append(reference)

        #gs.append( reference )
        ref_counter = ref_counter + 1
Exemplo n.º 3
0
    def handle_candidate(self, candidate_i, info={}):
        """For each candidate, verifies whether it is contained in the reference
        list (in which case it is a *True* positive) or else, it is not in the
        reference list (in which case it is a *False* positive, i.e. a random
        ngram that does not constitute a MWE).

        @param candidate_i The `Candidate` that is being read from the XML file.
        """
        global ignore_pos
        global gs_name
        global ignore_case
        global entity_counter
        global tp_counter
        global pre_gs
        global lemma_or_surface
        global fuzzy_pre_gs

        true_positive = False
        #pdb.set_trace()
        candidate = self.candidate_factory.make()
        for w in candidate_i:
            copy_w = Word(w.surface, w.lemma, w.pos, w.syn)
            candidate.append(copy_w)

        if ignore_pos:
            candidate.set_all(pos=WILDCARD)  # reference has type Pattern
        pre_gs_key = candidate.to_string()
        if ignore_case:
            pre_gs_key = pre_gs_key.lower()
        entries_to_check = pre_gs.get(pre_gs_key, [])

        if lemma_or_surface:
            entries_to_check += fuzzy_pre_gs.get(
                WORD_SEPARATOR.join([w.lemma for w in candidate]), [])
            entries_to_check += fuzzy_pre_gs.get(
                WORD_SEPARATOR.join([w.surface for w in candidate]), [])

        for gold_entry in entries_to_check:
            if gold_entry.match(candidate,
                                ignore_case=ignore_case,
                                lemma_or_surface=lemma_or_surface):
                true_positive = True
                break  # Stop at first positive match

        if true_positive:
            candidate_i.add_tpclass(TPClass(gs_name, "True"))
            tp_counter = tp_counter + 1
        else:
            candidate_i.add_tpclass(TPClass(gs_name, "False"))
        self.chain.handle_candidate(candidate_i, info)
        entity_counter += 1
Exemplo n.º 4
0
    def handle_candidate(self, candidate_i, info={}) :
        """For each candidate, verifies whether it is contained in the reference
        list (in which case it is a *True* positive) or else, it is not in the
        reference list (in which case it is a *False* positive, i.e. a random
        ngram that does not constitute a MWE).

        @param candidate_i The `Candidate` that is being read from the XML file.
        """
        global ignore_pos
        global gs_name
        global ignore_case
        global entity_counter
        global tp_counter
        global pre_gs
        global lemma_or_surface
        global fuzzy_pre_gs

        true_positive = False
        #pdb.set_trace()
        candidate = self.candidate_factory.make()
        for w in candidate_i :
            copy_w = Word( w.surface, w.lemma, w.pos, w.syn)
            candidate.append( copy_w )    
        
        if ignore_pos :
            candidate.set_all( pos=WILDCARD )     # reference has type Pattern
        pre_gs_key = candidate.to_string()
        if ignore_case :
            pre_gs_key = pre_gs_key.lower()
        entries_to_check = pre_gs.get( pre_gs_key, [] )

        if lemma_or_surface:
            entries_to_check += fuzzy_pre_gs.get(WORD_SEPARATOR.join([w.lemma for w in candidate]), [])
            entries_to_check += fuzzy_pre_gs.get(WORD_SEPARATOR.join([w.surface for w in candidate]), [])

        for gold_entry in entries_to_check :
            if gold_entry.match( candidate, ignore_case=ignore_case, lemma_or_surface=lemma_or_surface ) :
                true_positive = True
                break # Stop at first positive match

        if true_positive :
            candidate_i.add_tpclass( TPClass( gs_name, "True" ) )
            tp_counter = tp_counter + 1
        else :
            candidate_i.add_tpclass( TPClass( gs_name, "False" ) )
        self.chain.handle_candidate(candidate_i, info)
        entity_counter += 1
Exemplo n.º 5
0
def key(ngram):
    """
        Returns a string key for the given list of words (strings).
        (Shelves can only be indexed by strings and integers.)
    """
    return WORD_SEPARATOR.join(ngram)
Exemplo n.º 6
0
def key(ngram):
    """
        Returns a string key for the given list of words (strings).
        (Shelves can only be indexed by strings and integers.)
    """
    return WORD_SEPARATOR.join(ngram)