Пример #1
0
 def _get_prep_phrases(self, head):
     prep_phrases = []
     prep_list = self._get_dependents(self._dependencies['prep'], head)
     if prep_list:
         for prep in prep_list:
             if prep.word.lower() not in self._prep_blacklist_for_prep_phrases:
                 # Ignore those prepositions that are far away from the head
                 if abs(prep.index - head.index) < 3:
                     # Look for pobj
                     obj_list = self._get_dependents(self._dependencies['pobj'], prep)
                     if obj_list:
                         for obj in obj_list:
                             if not self._head_extracting_condition(obj, pos=True):
                                 continue
                             obj_conjunction = self._get_conjunction(obj)
                             for o in obj_conjunction:
                                 obj_seq = self._expand_head_word(o)
                                 if obj_seq:
                                     obj_seq.add_word_unit(prep)
                                     prep_phrase = WordUnitSequence()
                                     prep_phrase.extend(obj_seq)
                                     prep_phrase.head = o
                                     prep_phrase.nn_head = obj_seq.nn_head
                                     if prep_phrase:
                                         self._print_expansion_debug_info(head, 'prep phrase', prep_phrase)
                                         prep_phrases.append(prep_phrase)
     return prep_phrases
Пример #2
0
 def _get_neg_modifier(self, head):
     neg_mod = WordUnitSequence()
     neg_list = self._get_dependents(self._dependencies['neg'], head)
     if neg_list and neg_list[0].pos == self._pos_tags['dt']:
         neg_mod.add_word_unit(neg_list[0])
         self._print_expansion_debug_info(head, 'negation', neg_list[0])
     return neg_mod
Пример #3
0
 def _get_num_modifier(self, head):
     num_mod = WordUnitSequence()
     num_list = self._get_dependents(self._dependencies['num'], head)
     if num_list:
         for num in num_list:
             num_mod.add_word_unit(num)
             self._print_expansion_debug_info(head, 'numeric modifier', num)
     return num_mod
Пример #4
0
 def _get_noun_compound(self, head):
     nc = WordUnitSequence()
     nn_list = self._get_dependents(self._dependencies['nn'], head)
     if nn_list:
         for nn in nn_list:
             nc.add_word_unit(nn)
             self._print_expansion_debug_info(head, 'noun compound', nn)
     return nc
Пример #5
0
 def _get_vmod_phrase(self, head):
     vmod_phrase = WordUnitSequence()
     vmod_list = self._get_dependents(self._dependencies['vmod'], head)
     if vmod_list:
         for vmod in vmod_list:
             predicate_object = self._get_predicate_object(vmod)
             if predicate_object:
                 predicate, object = predicate_object[0]
                 vmod_phrase.extend(predicate)
                 vmod_phrase.extend(object)
                 self._print_expansion_debug_info(head, 'vmod', vmod_phrase)
     return vmod_phrase
Пример #6
0
 def _get_predicate_object(self, pred_head):
     predicate_object = []
     predicates = self._expand_predicate(pred_head)
     for predicate in predicates:
         dobj_flag, acomp_flag, pobj_flag = False, False, False
         for ind, pred in predicate:
             # Look for direct object
             obj_list = self._get_dependents(self._dependencies['dobj'], pred)
             if obj_list:
                 for obj in obj_list:
                     if not self._head_extracting_condition(obj, pos=True):
                         continue
                     obj_conjunction = self._get_conjunction(obj)
                     for o in obj_conjunction:
                         expanded_obj = self._expand_head_word(o)
                         if expanded_obj:
                             object = WordUnitSequence()
                             object.extend(expanded_obj)
                             object.head = o
                             object.nn_head = expanded_obj.nn_head
                             dobj_flag = True
                             predicate_object.append((predicate, object))
                 continue
             # Look for adjective compliment
             acomp_list = self._get_dependents(self._dependencies['acomp'], pred)
             if acomp_list:
                 for acomp in acomp_list:
                     acomp_prep_phrases = self._get_prep_phrases(acomp)
                     for acomp_prep_phrase in acomp_prep_phrases:
                         if len(acomp_prep_phrase) > 1:
                             object = WordUnitSequence()
                             object.extend(WordUnitSequence(acomp_prep_phrase[1:]))
                             object.head = acomp_prep_phrase.head
                             object.nn_head = acomp_prep_phrase.nn_head
                             # Make a copy of predicate in case it gets expanded
                             predicate_copy = deepcopy(predicate)
                             # Merge the acomp and prep into the predicate
                             predicate_copy.add_word_unit(acomp)
                             predicate_copy.add_word_unit(acomp_prep_phrase[0])
                             acomp_flag = True
                             predicate_object.append((predicate_copy, object))
                 continue
             # Look for prepositional objects
             prep_phrases = self._get_prep_phrases(pred)
             for prep_phrase in prep_phrases:
                 if len(prep_phrase) > 1:
                     object = WordUnitSequence()
                     object.extend(WordUnitSequence(prep_phrase[1:]))
                     object.head = prep_phrase.head
                     object.nn_head = prep_phrase.nn_head
                     # Make a copy of predicate in case it gets expanded
                     predicate_copy = deepcopy(predicate)
                     # Merge the prep into the predicate
                     predicate_copy.add_word_unit(prep_phrase[0])
                     pobj_flag = True
                     predicate_object.append((predicate_copy, object))
         # Also return the predicate if it has no object in case it is a conjunction of other predicates.
         if not dobj_flag and not acomp_flag and not pobj_flag:
             predicate_object.append((predicate, None))
     return predicate_object
Пример #7
0
    def _expand_head_word(self, head):

        def _clean(word_unit_seq):
            # If the sequence is a single letter, ignore it
            if len(word_unit_seq) == 1 and len(word_unit_seq[0]) == 1:
                word_unit_seq = None
            # If the head of the sequence is a number, ignore it
            if word_unit_seq.head.pos == self._pos_tags['cd']:
                word_unit_seq = None
            return word_unit_seq

        expansion = WordUnitSequence(head, head)
        # Find out if the head is in a compound noun
        noun_compound = self._get_noun_compound(head)
        expansion.extend(noun_compound)
        expansion.nn_head = deepcopy(expansion)
        # # Find out if there is any numeric modifier
        # num_mod = self._get_num_modifier(head)
        # expansion.extend(num_mod)
        # Find out if there is any negation
        neg_mod = self._get_neg_modifier(head)
        expansion.extend(neg_mod)
        # Find out if the head has pobj phrase
        pobj_phrases = self._get_prep_phrases(head)
        if pobj_phrases:
            pobj_phrase = pobj_phrases[0]
            expansion.extend(pobj_phrase)
            # Transfer the head in the pattern "<num> of <noun>" from "<num>" to "<noun>"
            if (head.pos == self._pos_tags['cd'] or head.word.isdigit()) and pobj_phrase[0].word == 'of':
                expansion.head = pobj_phrase.head
                expansion.nn_head = pobj_phrase.nn_head
        # Find out if the head has vmod phrase
        vmod_phrase = self._get_vmod_phrase(head)
        expansion.extend(vmod_phrase)
        # Cleaning
        expansion = _clean(expansion)
        return expansion