Пример #1
0
 def test01_AndOfSubj(self):
     text = "John and Paul went to the movies"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.select_phrases(RT_PROPERNAME | RT_EVENT)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('John' in phrases)
     self.assertTrue('Paul' in phrases)
     self.assertTrue('went' in phrases)
     john = filter(lambda x: 'John' == x[1].text, f.iteritems())[0]
     paul = filter(lambda x: 'Paul' == x[1].text, f.iteritems())[0]
     went = filter(lambda x: 'went' == x[1].text, f.iteritems())[0]
     J = john[0]
     P = paul[0]
     E = went[0]
     self.assertTrue(d.find_condition(Rel('_EVENT', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('go', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('John', [J])) is not None)
     self.assertTrue(d.find_condition(Rel('Paul', [P])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG0', [E, J])) is not None)
Пример #2
0
 def test10_OrOfVerb_OrInBrackets(self):
     text = "That which is perceived or known or inferred to have its own distinct existence (living or nonliving)"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs(nodups=True)
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     # RT_EMPTY_DRS adds 'or' to phrases
     f = sentence.select_phrases(lambda x: x.pos is POS.from_cache('WDT') or \
                                                0 == (x.mask & RT_EMPTY_DRS),
                                 contiguous=False)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('That which' in phrases)
     self.assertTrue('have' in phrases)
     self.assertTrue('is perceived known inferred' in phrases)
     self.assertTrue('its own distinct existence' in phrases)
     verb1 = filter(lambda x: 'is perceived known inferred' == x[1].text,
                    f.iteritems())[0]
     verb2 = filter(lambda x: 'have' == x[1].text, f.iteritems())[0]
     agent = filter(lambda x: 'That which' == x[1].text, f.iteritems())[0]
     theme = filter(lambda x: 'its own distinct existence' == x[1].text,
                    f.iteritems())[0]
     X1 = agent[0]
     E1 = verb1[0]
     E2 = verb2[0]
     X2 = theme[1][0].refs[1]
     X3 = theme[1][1].refs[0]
     self.assertTrue(d.find_condition(Rel('_EVENT', [E1])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG0', [E1, X1])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG1', [E1, E2])) is not None)
     # TODO: should the theme attach to X2?
     self.assertTrue(d.find_condition(Rel('_ARG1', [E2, X3])) is not None)
     self.assertTrue(d.find_condition(Rel('_POSS', [X2, X3])) is not None)
Пример #3
0
 def test02_AndOfObj(self):
     text = "He saw John and Paul"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.select_phrases(RT_PROPERNAME | RT_EVENT)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('John' in phrases)
     self.assertTrue('Paul' in phrases)
     self.assertTrue('saw' in phrases)
     john = filter(lambda x: 'John' == x[1].text, f.iteritems())[0]
     paul = filter(lambda x: 'Paul' == x[1].text, f.iteritems())[0]
     saw = filter(lambda x: 'saw' == x[1].text, f.iteritems())[0]
     J = john[0]
     P = paul[0]
     E = saw[0]
     # FIXME: wn lemmatizer does not convert saw to see - I guess to to ambiguity
     self.assertTrue(d.find_condition(Rel('_EVENT', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('saw', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('John', [J])) is not None)
     self.assertTrue(d.find_condition(Rel('Paul', [P])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG1', [E, J])) is not None)
Пример #4
0
 def test03_OrOfObj(self):
     text = "To participate in games or sport"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.select_phrases(RT_ENTITY | RT_EVENT)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('participate' in phrases)
     self.assertTrue('games' in phrases)
     self.assertTrue('sport' in phrases)
     noun1 = filter(lambda x: 'games' == x[1].text, f.iteritems())[0]
     noun2 = filter(lambda x: 'sport' == x[1].text, f.iteritems())[0]
     verb = filter(lambda x: 'participate' == x[1].text, f.iteritems())[0]
     X1 = noun1[0]
     X2 = noun2[0]
     E = verb[0]
     self.assertTrue(d.find_condition(Rel('_EVENT', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('participate', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('games', [X1])) is not None)
     self.assertTrue(d.find_condition(Rel('sport', [X2])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG1', [E, X2])) is not None)
Пример #5
0
 def test05_AndOfVerb_AndOfObj(self):
     text = "Bell makes and distributes computers, electronics, and building products"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.select_phrases(RT_PROPERNAME | RT_ENTITY | RT_EVENT
                                 | RT_ATTRIBUTE)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('Bell' in phrases)
     self.assertTrue('makes distributes' in phrases)
     self.assertTrue('computers' in phrases)
     self.assertTrue('electronics' in phrases)
     # Note if we add RT_EMPTY_DRS to the selection criteria then this phrase becomes 'and building products'
     self.assertTrue('building products' in phrases)
     self.assertEqual(5, len(phrases))
     verb1 = filter(lambda x: 'makes distributes' == x[1].text,
                    f.iteritems())[0]
     agent = filter(lambda x: 'Bell' == x[1].text, f.iteritems())[0]
     theme1 = filter(lambda x: 'computers' == x[1].text, f.iteritems())[0]
     theme2 = filter(lambda x: 'electronics' == x[1].text, f.iteritems())[0]
     theme3 = filter(lambda x: 'building products' == x[1].text,
                     f.iteritems())[0]
     X1 = agent[0]
     Y1 = theme1[0]
     Y2 = theme2[0]
     Y3 = theme3[0]
     E1 = verb1[0]
     self.assertTrue(d.find_condition(Rel('_EVENT', [E1])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG0', [E1, X1])) is not None)
     # TODO: should we add proposition for multi NP's conjoined?
     self.assertTrue(d.find_condition(Rel('_ARG1', [E1, Y3])) is not None)
Пример #6
0
 def test1_Currency_00_0194(self):
     text = r"Without the Cray-3 research and development expenses, the company would have been able to report a profit of $19.3 million for the first half of 1989 rather than the $5.9 million it posted."
     etext = r"Without the Cray-3 research and development expenses , the company would have been able to report a profit of $ 19.3 million for the first half of 1989 rather than the $ 5.9 million it posted"
     mtext = preprocess_sentence(text)
     self.assertEqual(etext, mtext)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs(nodups=True)
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     fnps = sentence.get_np_nominals()
     nps = [sp.text for r, sp in fnps]
     self.assertTrue('the Cray-3 research and development expenses' in nps)
     self.assertTrue('the company' in nps)
     self.assertTrue('a profit' in nps)
     self.assertTrue('$ 19.3 million' in nps)
     self.assertTrue('the first half' in nps)
     self.assertTrue('the $ 5.9 million' in nps)
     self.assertTrue('1989' in nps)
     fvps = sentence.get_vp_nominals()
     vps = [sp.text for r, sp in fvps]
     self.assertTrue('would have been' in vps)
     self.assertTrue('report' in vps)
     self.assertTrue('posted' in vps)
     would_have_been = filter(lambda x: 'would have been' == x[1].text,
                              fvps)[0][0]
     report = filter(lambda x: 'report' == x[1].text, fvps)[0][0]
     posted = filter(lambda x: 'posted' == x[1].text, fvps)[0][0]
     cray_rnd = filter(
         lambda x: 'the Cray-3 research and development expenses' == x[1].
         text, fnps)[0][0]
     company = filter(lambda x: 'the company' == x[1].text, fnps)[0][0]
     profit = filter(lambda x: 'a profit' == x[1].text, fnps)[0][0]
     first_half = filter(lambda x: 'the first half' == x[1].text,
                         fnps)[0][0]
     n1989 = filter(lambda x: '1989' == x[1].text, fnps)[0][0]
     n19_3M = filter(lambda x: '$ 19.3 million' == x[1].text, fnps)[0][0]
     n5_9M = filter(lambda x: 'the $ 5.9 million' == x[1].text, fnps)[0][0]
     self.assertTrue(
         d.find_condition(Rel('without', [would_have_been, cray_rnd]))
         is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG0', [would_have_been, company]))
         is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG0', [report, company])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG1', [report, profit])) is not None)
     self.assertTrue(
         d.find_condition(Rel('of', [profit, n19_3M])) is not None)
     self.assertTrue(
         d.find_condition(Rel('for', [profit, first_half])) is not None)
     self.assertTrue(
         d.find_condition(Rel('of', [first_half, n1989])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG1', [posted, n5_9M])) is not None)
Пример #7
0
 def promote_to_propernoun(self):
     """Promote an entity to a proper noun."""
     if 0 == (self.mask & RT_PROPERNAME):
         self.stem = self.word.title()
         self.mask &= ~RT_ENTITY
         self.mask |= RT_PROPERNAME
         self.drs = DRS(self.drs.referents,
                        [Rel(self.stem, self.drs.referents)])
Пример #8
0
 def test10_Brutus(self):
     text = "Ceasar was stabbed by Brutus"
     derivation = grpc.ccg_parse(self.stub, text, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     fnps = sentence.get_np_nominals()
     nps = [sp.text for r, sp in fnps]
     #self.assertTrue('Average maturity' in nps)
     self.assertTrue('Brutus' in nps)
     self.assertTrue('Ceasar' in nps)
     fvps = sentence.get_vp_nominals()
     vps = [sp.text for r, sp in fvps]
     self.assertTrue('was stabbed' in vps)
     E = filter(lambda x: x[1].text == "was stabbed", fvps)[0][0]
     A1 = filter(lambda x: x[1].text == "Brutus", fnps)[0][0]
     A0 = filter(lambda x: x[1].text == "Ceasar", fnps)[0][0]
     self.assertTrue(d.find_condition(Rel('_ARG0', [E, A0])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG1', [E, A1])) is not None)
Пример #9
0
 def test04_AndOfVerb(self):
     text = "Bell makes and distributes computers"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.select_phrases(RT_PROPERNAME | RT_ENTITY | RT_EVENT)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('Bell' in phrases)
     self.assertTrue('makes distributes' in phrases)
     self.assertTrue('computers' in phrases)
     verb1 = filter(lambda x: 'makes distributes' == x[1].text,
                    f.iteritems())[0]
     agent = filter(lambda x: 'Bell' == x[1].text, f.iteritems())[0]
     theme = filter(lambda x: 'computers' == x[1].text, f.iteritems())[0]
     X1 = agent[0]
     X2 = theme[0]
     E1 = verb1[0]
     self.assertTrue(d.find_condition(Rel('_EVENT', [E1])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG0', [E1, X1])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG1', [E1, X2])) is not None)
Пример #10
0
    def _get_noun_drs(self, span):
        global _Wnl, _Ieng
        if not self.isproper_noun and not self.pos == POS_POSSESSIVE:
            # TODO: cache nouns
            # pattern.en.pluralize(self.stem)
            # or use inflect https://pypi.python.org/pypi/inflect
            if self.stem == "'s":
                pass
            # inflect will generate an exception for single character nouns. This can happen for
            # bad pos tagging (like EasySRL)
            if len(self.stem) > 1:
                sp = _Ieng.plural(self.stem)
            self.wnsynsets = wn.wordnet.synsets(_Wnl.lemmatize(
                self.stem.lower(), 'n'),
                                                pos='n')
            if False and self.stem != sp:
                rp = DRSRef(DRSVar('X', len(self.refs) + 1))
                self.drs = DRS([self.refs[0], rp], [
                    Rel(self.stem, [self.refs[0]]),
                    Rel(sp, [rp]),
                    Rel('_ISMEMBER', [self.refs[0], rp])
                ])
                d = DrsProduction([self.refs[0], rp],
                                  self.refs[1:],
                                  category=self.category,
                                  span=span)
                d.set_lambda_refs([self.refs[0]])
                return d

        self.drs = DRS([self.refs[0]], [Rel(self.stem, [self.refs[0]])])
        d = DrsProduction([self.refs[0]],
                          self.refs[1:],
                          category=self.category,
                          span=span)
        d.set_lambda_refs([self.refs[0]])
        return d
Пример #11
0
 def test4_ApposInterrupt(self):
     text = r"Bell, a telecommunications company, which is located in Los Angeles, makes and distributes electronics, computers, and building products"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET|CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.get_np_nominals()
     phrases = [sp.text for r, sp in f]
     self.assertTrue('Bell' in phrases)
     self.assertTrue('a telecommunications company' in phrases)
     np1 = filter(lambda x: 'Bell' == x[1].text, f)[0]
     np2 = filter(lambda x: 'a telecommunications company' == x[1].text, f)[0]
     X = np1[0]
     Y = np2[0]
     self.assertNotEqual(X, Y)
     self.assertTrue(d.find_condition(Rel('_AKA', [X, Y])) is not None)
     self.assertTrue(len(repr(d).split('_AKA')) == 2)
Пример #12
0
 def test3_ApposInterrupt(self):
     text = r"Robbie, a hot-tempered tennis player, charged the umpire and tried to crack the poor man's skull with a racket."
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET|CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.get_np_nominals()
     phrases = [sp.text for r, sp in f]
     self.assertTrue('Robbie' in phrases)
     self.assertTrue('a hot-tempered tennis player' in phrases)
     robbie = filter(lambda x: 'Robbie' == x[1].text, f)[0]
     temper = filter(lambda x: 'a hot-tempered tennis player' == x[1].text, f)[0]
     X = robbie[0]
     Y = temper[0]
     self.assertNotEqual(X, Y)
     self.assertTrue(d.find_condition(Rel('_AKA', [X, Y])) is not None)
     self.assertTrue(len(repr(d).split('_AKA')) == 2)
Пример #13
0
 def test2_ApposInterrupt(self):
     text = r"Reliable, Diane's eleven-year-old beagle, chews holes in the living room carpeting as if he were still a puppy."
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET|CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.get_np_nominals()
     phrases = [sp.text for r, sp in f]
     self.assertTrue('Reliable' in phrases)
     self.assertTrue("eleven-year-old beagle" in phrases)
     self.assertTrue("Diane" in phrases)
     dog = filter(lambda x: 'Reliable' == x[1].text, f)[0]
     breed = filter(lambda x: "eleven-year-old beagle" == x[1].text, f)[0]
     X = dog[0]
     Y = breed[0]
     self.assertNotEqual(X, Y)
     self.assertTrue(d.find_condition(Rel('_AKA', [X, Y])) is not None)
     self.assertTrue(len(repr(d).split('_AKA')) == 2)
Пример #14
0
 def _copy_production_from_sample(self, sample, span):
     # Deepcopy but ensure variables only get one python reference - makes renaming fast
     self.mask |= sample[1]
     ovrs = set(sample[0].freerefs).union(sample[0].referents)
     nvrs = {}
     for x in ovrs:
         nvrs[x.var.to_string()] = DRSRef(DRSVar(x.var.name, x.var.idx))
     conds = []
     for c in sample[0].conditions:
         assert isinstance(c, Rel)
         conds.append(
             Rel(c.relation,
                 map(lambda x: nvrs[x.var.to_string()], c.referents)))
     refs = map(lambda x: nvrs[x.var.to_string()], sample[0].referents)
     self.drs = DRS(refs, conds)
     d = DrsProduction(self.drs.universe, self.drs.freerefs, span=span)
     d.set_lambda_refs(map(lambda x: nvrs[x.var.to_string()], sample[2]))
     # refs[0] is always the final_ref (atom)
     self.refs = d.lambda_refs
     xtra = filter(lambda x: x not in self.refs, nvrs.itervalues())
     self.refs.extend(xtra)
     return d
Пример #15
0
    def _build_conditions(self, conds, refs, template):
        """Refs are reversed, refs[0] is the functor return value.

        Args:
            conds: The existing DRS conditions.
            refs: The referents, where refs[0] is the functor return value.
            template: A FunctorTemplate instance.

        Returns:
            The modified conditions.
        """

        # Note. Proper noun handling requires any extra predicates appear after the noun.
        global _TypeMonth, _TypeWeekday
        if self.isproper_noun:
            # If we are a functor and a proper noun then argument type if the
            # correct referent for the noun
            if _TypeMonth.match(self.stem):
                self.mask |= RT_DATE
                if self.stem in _MONTHS:
                    conds.append(Rel(_MONTHS[self.stem], [self.refs[0]]))
                else:
                    conds.append(Rel(self.stem, [self.refs[0]]))
                if template.isfinalevent:
                    conds.append(Rel('_DATE', self.refs[0:2]))
                else:
                    conds.append(Rel('_DATE', self.refs[0]))
            elif _TypeWeekday.match(self.stem):
                self.mask |= RT_DATE
                if self.stem in _WEEKDAYS:
                    conds.append(Rel(_WEEKDAYS[self.stem], [self.refs[0]]))
                else:
                    conds.append(Rel(self.stem, [self.refs[0]]))
                if template.isfinalevent:
                    conds.append(Rel('_DATE', self.refs[0:2]))
                else:
                    conds.append(Rel('_DATE', self.refs[0]))
            else:
                conds.append(Rel(self.stem, [self.refs[0]]))
        elif self.isnumber:
            self.mask |= RT_NUMBER
            conds.append(Rel(self.stem, [self.refs[0]]))
            conds.append(Rel('_NUM', self.refs))
        elif self.ispunct:
            if len(self.refs) > 1:
                if self.word == ':':
                    conds.append(Rel('_IE', [self.refs[0], self.refs[-1]]))
                elif self.word == ';':
                    conds.append(Rel('_LINK', [self.refs[0], self.refs[-1]]))
        elif self.pos == POS_PREPOSITION and not self.ispreposition:
            conds.append(Rel(self.stem, self.refs))
        else:
            conds.append(Rel(self.stem, [self.refs[0]]))
        return conds
Пример #16
0
    def get_production(self, sentence, options=0):
        """Get the production model for this category.

        Returns:
            A Production instance.
        """
        global _EventPredicates
        no_vn = 0 != (CO_NO_VERBNET & options)
        span = Span(sentence, [self.idx])
        template = self.get_template()

        # Ensure we only have one instance for each referent name. FunctorTemplate's guarantee
        # this. This allows fast renaming by changing the DRSVar embedded in the DRSRef.
        #
        # To take advantage of fast renaming we need to do one rename post functor creation.

        if template is None:
            if not (not self.category.isfunctor
                    or self.category in [CAT_CONJ_CONJ, CAT_CONJCONJ]):
                pass
            assert not self.category.isfunctor or self.category in [
                CAT_CONJ_CONJ, CAT_CONJCONJ
            ]
            # Simple type
            # Handle prepositions
            if self.category in [CAT_CONJ, CAT_NPthr]:
                self.refs = [DRSRef('X1')]
                if self.stem == 'or':
                    self.mask |= RT_UNION
                elif self.stem == 'nor':
                    self.mask |= RT_UNION | RT_NEGATE
                elif self.stem == 'and':
                    self.mask |= RT_INTERSECTION
                # If self.drs is None then we don't include in constituents
                self.drs = DRS([], [])
                return create_empty_drs_production(self.category,
                                                   self.refs[0],
                                                   span=span)
            elif self.category in [CAT_CONJ_CONJ, CAT_CONJCONJ]:
                self.refs = [DRSRef('X1')]
                return identity_functor(self.category, self.refs[0])
            elif self.ispronoun and self.stem in _PRON:
                d = self._copy_production_from_sample(_PRON[self.stem], span)
                d.set_category(self.category)
                return d
            elif self.category == CAT_N:
                self.refs = [DRSRef('X1')]
                # Relative pronouns don't get a universe. This can happen for cases such as 'That which is...'
                if self.stem in _RELPRON:
                    self.mask |= RT_ENTITY
                    self.drs = DRS([], [Rel(self.stem, [self.refs[0]])])
                    d = DrsProduction([],
                                      self.refs,
                                      category=self.category,
                                      span=span)
                    d.set_lambda_refs([self.refs[0]])
                    return d
                else:
                    self._set_noun_mask()
                    return self._get_noun_drs(span)
            elif self.category == CAT_NOUN:
                self.refs = [DRSRef('X1')]
                self._set_noun_mask()
                return self._get_noun_drs(span)
            elif self.category == CAT_CONJ_CONJ or self.category == CAT_CONJCONJ:
                self.refs = [DRSRef('X1')]
                return create_empty_drs_production(CAT_CONJ, self.refs[0])
                #return identity_functor(self.category)
            elif self.isadverb and self.stem in _ADV:
                d = self._copy_production_from_sample(_ADV[self.stem], span)
                d.set_category(self.category)
                return d
            else:
                self.refs = [DRSRef('X1')]
                self.drs = DRS([], [Rel(self.stem, [self.refs[0]])])
                d = DrsProduction([],
                                  self.refs,
                                  category=self.category,
                                  span=span)
                d.set_lambda_refs([self.refs[0]])
                return d

        # else is functor

        # Production templates use tuples so we don't accidentally modify.
        if self.category == CAT_NP_N:  # NP*/N class
            # Ignore template in these cases
            # FIXME: these relations should be added as part of _build_conditions()
            if self.ispronoun and self.stem in _PRON:
                d = self._copy_production_from_sample(_PRON[self.stem], span)
                d.set_category(CAT_NP)
                return FunctorProduction(self.category, d.lambda_refs, d)

            else:
                nref = DRSRef('X1')
                self.refs = [nref]
                if self.stem in ['a', 'an']:
                    #self.drs = DRS([], [Rel('_EXISTS', [nref])])
                    self.drs = DRS([], [])
                    fn = DrsProduction([], [nref], category=CAT_NP, span=span)
                elif self.stem in ['the', 'thy']:
                    self.drs = DRS([], [])
                    fn = DrsProduction([], [nref], category=CAT_NP, span=span)
                else:
                    self.drs = DRS([], [Rel(self.stem, [nref])])
                    fn = DrsProduction([], [nref], category=CAT_NP, span=span)
                fn.set_lambda_refs([nref])
            return FunctorProduction(category=self.category,
                                     referent=nref,
                                     production=fn)

        else:
            compose = None if template is None else template.constructor_rule
            refs = []
            rule_map = template.create_constructor_rule_map()
            rstk = []
            lstk = []
            argcat = self.category
            for c in compose:
                stk = lstk if argcat.isarg_left else rstk
                if isinstance(c[1], tuple):
                    stk.extend([rule_map[x] for x in c[1]])
                else:
                    stk.append(rule_map[c[1]])
                argcat = argcat.result_category()

            final_ref = rule_map[template.final_ref]
            final_atom = template.final_atom.remove_wildcards()
            refs.append(final_ref)
            refs.extend(reversed(lstk))
            refs.extend(rstk)
            refs = remove_dups(refs)
            # refs[0] is always final_ref
            self.refs = refs

            # Verbs can also be adjectives so check event
            isverb = self.isverb
            if self.isgerund:
                result = self.category
                while not isverb and not result.isatom:
                    isverb = result.can_unify(CAT_TV)
                    result = result.result_category()
                    # TODO: Add predicate for NG or change predarg attachments

            if isverb and template.isfinalevent:
                conds = []
                vncond = None
                vnclasses = []
                try:
                    vnclasses = [] if no_vn else VERBNETDB.name_index[
                        self.stem]
                    if len(vnclasses) == 1:
                        vncond = Rel('_vn_' + vnclasses[0].ID.encode('utf-8'),
                                     [refs[0]])
                    elif len(vnclasses) >= 2:
                        xconds = [Rel('_vn_' + vnclasses[-1].ID.encode('utf-8'), [refs[0]])] \
                            if len(vnclasses) & 0X1 else []

                        # TODO: for vn classes A,B,C should really have (A&!B&!C)|(!A&B&!C)|(!A&!B&C)
                        for vna, vnb in zip(vnclasses[0::2], vnclasses[1::2]):
                            xconds.append(
                                Or(
                                    DRS([], [
                                        Rel('_vn_' + vna.ID.encode('utf-8'),
                                            [refs[0]])
                                    ]),
                                    DRS([], [
                                        Rel('_vn_' + vnb.ID.encode('utf-8'),
                                            [refs[0]])
                                    ])))
                        while len(xconds) != 1:
                            c2 = xconds.pop()
                            c1 = xconds.pop()
                            xconds.append(Or(DRS([], [c1]), DRS([], [c2])))
                        vncond = xconds[0]
                        xconds = None

                    if vncond is not None:
                        # Add implication
                        conds.append(
                            Imp(DRS([], [Rel(self.stem, [refs[0]])]),
                                DRS([], [vncond])))
                    else:
                        conds.append(Rel(self.stem, [refs[0]]))

                except Exception:
                    conds.append(Rel(self.stem, [refs[0]]))
                    pass
                rcat = self.category.test_return_and_get(CAT_VPMODX, False)
                if rcat is not None and rcat.argument_category().has_any_features(FEATURE_VARG) \
                        and rcat.result_category().has_any_features(FEATURE_VRES):
                    conds.append(Rel('_EVENT', [refs[0]]))
                    pred = zip(refs[1:], _EventPredicates)
                    for v, e in pred[0:2]:
                        conds.append(Rel(e, [refs[0], v]))
                    self.mask |= RT_EVENT
                    self.vnclasses = vnclasses
                    self.drs = DRS([refs[0]], conds)
                    #d = DrsProduction([refs[0]], self.refs[1:], span=span)
                    d = DrsProduction([], self.refs, span=span)

                elif rcat is not None and (rcat.has_any_features(FEATURE_PSS
                                                                 | FEATURE_TO)
                                           or rcat.ismodifier):
                    if len(refs) > 1:
                        # passive case
                        if rcat.ismodifier or self.stem in ['be', 'get']:
                            self.mask |= RT_EVENT_ATTRIB
                            conds.append(Rel('_MOD', [refs[0], refs[-1]]))
                            self.drs = DRS([], conds)
                            d = DrsProduction([], self.refs, span=span)
                        else:
                            conds.append(Rel('_EVENT', [refs[0]]))
                            pred = zip(refs[1:], _EventPredicates)
                            for v, e in pred[0:2]:
                                conds.append(Rel(e, [refs[0], v]))
                            self.mask |= RT_EVENT
                            self.vnclasses = vnclasses
                            self.drs = DRS([refs[0]], conds)
                            #d = DrsProduction([refs[0]], self.refs[1:], span=span)
                            d = DrsProduction([], self.refs, span=span)
                    else:
                        d = DrsProduction([], self.refs, span=span)

                elif self.category == CAT_MODAL_PAST:
                    self.mask |= RT_EVENT_MODAL
                    conds.append(Rel('_MODAL', [refs[0]]))
                    self.drs = DRS([], conds)
                    d = DrsProduction([], self.refs, span=span)

                elif self.category in CAT_COPULAR:
                    if len(refs) != 3:
                        pass
                    assert len(refs) == 3, "copular expects 3 referents"

                    # Special handling - ARG1 is a ROLE?
                    self.mask |= RT_EVENT
                    self.vnclasses = vnclasses
                    if self.stem == 'be':
                        # Discard conditions
                        conds.extend([
                            Rel('_EVENT', [refs[0]]),
                            Rel('_ARG0', [refs[0], refs[1]]),
                            Rel('_ARG1', [refs[0], refs[2]])
                        ])

                    else:
                        conds.append(Rel('_EVENT', [refs[0]]))
                        conds.append(Rel('_ARG0', [refs[0], refs[1]]))
                        conds.append(Rel('_ARG1', [refs[0], refs[2]]))
                    self.drs = DRS([refs[0]], conds)
                    #d = DrsProduction([refs[0]], refs[1:], category=final_atom, span=span)
                    d = DrsProduction([], refs, category=final_atom, span=span)
                elif self.category == CAT_VPdcl:
                    if len(refs) != 2:
                        pass
                    assert len(refs) == 2, "VP[dcl] expects 2 referents"

                    conds.append(Rel('_EVENT', [refs[0]]))
                    conds.append(Rel('_ARG0', [refs[0], refs[1]]))
                    self.mask |= RT_EVENT
                    self.vnclasses = vnclasses

                    # Special handling
                    self.drs = DRS([refs[0]], conds)
                    #d = DrsProduction([refs[0]], self.refs[1:], category=final_atom, span=span)
                    d = DrsProduction([],
                                      self.refs,
                                      category=final_atom,
                                      span=span)

                else:
                    # TODO: use verbnet to get semantics
                    self.mask |= RT_EVENT
                    self.vnclasses = vnclasses
                    if self.stem == 'be' and self.category.can_unify(CAT_TV):
                        # Discard conditions - ARG1 is a ROLE?
                        conds.extend([
                            Rel('_EVENT', [refs[0]]),
                            Rel('_ARG0', [refs[0], refs[1]]),
                            Rel('_ARG1', [refs[0], refs[2]])
                        ])
                    else:
                        conds.append(Rel('_EVENT', [refs[0]]))
                        pred = zip(refs[1:], _EventPredicates)
                        for v, e in pred:
                            conds.append(Rel(e, [refs[0], v]))
                        if (len(refs) - 1) > len(pred):
                            rx = [refs[0]]
                            rx.extend(refs[len(pred) + 1:])
                            conds.append(Rel('_ARG2', rx))
                    self.drs = DRS([refs[0]], conds)
                    #d = DrsProduction([refs[0]], refs[1:], span=span)
                    d = DrsProduction([], refs, span=span)

            elif self.isadverb and template.isfinalevent:
                if self.stem in _ADV:
                    d = self._copy_production_from_sample(
                        _ADV[self.stem], span)
                    rs = zip(d.variables, refs)
                    d.rename_vars(rs)
                else:
                    self.drs = DRS([], [Rel(self.stem, refs[0])])
                    d = DrsProduction([], self.refs, span=span)

            #elif self.pos == POS_DETERMINER and self.stem == 'a':

            elif self.ispronoun and self.stem in _PRON:
                pron = _PRON[self.stem]
                d = self._copy_production_from_sample(pron, span)
                ers = complement(d.variables, pron[2])
                ors = intersect(refs, ers)
                if len(ors) != 0:
                    # Make disjoint
                    nrs = get_new_drsrefs(ors, union(ers, refs, pron[2]))
                    d.rename_vars(zip(ors, nrs))
                if len(ers) != 0:
                    ers = complement(d.variables, pron[2])
                    d.rename_vars(zip([pron[2][0], ers[0]], refs))
                else:
                    d.rename_vars([(pron[2][0], refs[0])])

            elif self.ispreposition:
                if template.construct_empty:
                    # Make sure we have one freeref. For functors it is a bad idea to use an empty DrsProduction
                    # as the spans can be deleted by ProductionList.flatten().
                    d = DrsProduction([], [refs[0]], span=span)
                else:
                    if len(refs) >= 2:
                        refs = [refs[0], refs[-1]]
                        self.refs = refs
                        self.drs = DRS([], [Rel(self.stem, refs)])
                    else:
                        self.drs = DRS([], [Rel(self.stem, refs)])
                    d = DrsProduction([], self.refs, span=span)

            elif self.pos == POS_PREPOSITION and self.category.test_returns_modifier() \
                    and len(refs) > 1 and not self.category.ismodifier:
                self.drs = DRS([], [Rel(self.stem, [refs[0], refs[-1]])])
                refs = [refs[0], refs[-1]]
                self.refs = refs
                d = DrsProduction([], self.refs, span=span)

            elif final_atom == CAT_Sadj and len(refs) > 1:
                if self.category == CAT_AP_PP or self.category.ismodifier or \
                        self.category.test_returns_modifier():
                    self.drs = DRS([], [Rel(self.stem, refs[0])])
                    d = DrsProduction([], self.refs, span=span)
                elif self.category.test_return(CAT_AP) and self.category.isarg_right and \
                        self.category.argument_category() == CAT_NP:
                    self.mask |= RT_ATTRIBUTE
                    self.drs = DRS([], [Rel(self.stem, [refs[0], refs[-1]])])
                    d = DrsProduction([], self.refs, span=span)
                else:
                    self.mask |= RT_ATTRIBUTE
                    self.drs = DRS([], [Rel(self.stem, refs[0])])
                    d = DrsProduction([], self.refs, span=span)

            else:
                universe = []
                freerefs = self.refs
                if self.isproper_noun:
                    #universe.append(self.refs[0])
                    #freerefs = self.refs[1:]
                    self.mask |= RT_PROPERNAME
                elif final_atom == CAT_N and not self.category.ismodifier \
                        and not self.category.test_returns_modifier():
                    #universe.append(self.refs[0])
                    #freerefs = self.refs[1:]
                    self.mask |= (
                        RT_ENTITY
                        | RT_PLURAL) if self.pos == POS_NOUN_S else RT_ENTITY
                elif len(self.refs) == 1 and final_atom == CAT_N \
                        and (self.category.ismodifier or self.category.test_returns_modifier()):
                    self.mask |= RT_ATTRIBUTE
                if self.pos == POS_POSSESSIVE:
                    self.mask |= RT_POSSESSIVE

                if template.isfinalevent:
                    if self.category == CAT_INFINITIVE:
                        # Make sure we have one freeref. For functors it is a bad idea to use an empty DrsProduction
                        # as the spans can be deleted by ProductionList.flatten().
                        d = DrsProduction([], [self.refs[0]], span=span)
                        # Having a DRS prevents deletion of TO constituent
                        self.drs = DRS([], [])
                    elif self.pos == POS_MODAL:
                        self.mask |= RT_EVENT_MODAL
                        self.drs = DRS([], [
                            Rel(self.stem, [refs[0]]),
                            Rel('_MODAL', [refs[0]])
                        ])
                        d = DrsProduction([], self.refs, span=span)
                    else:
                        self.drs = DRS([],
                                       self._build_conditions([], refs,
                                                              template))
                        d = DrsProduction([], self.refs, span=span)
                else:
                    self.drs = DRS(universe,
                                   self._build_conditions([], refs, template))
                    d = DrsProduction(universe, freerefs, span=span)

            d.set_lambda_refs([final_ref])
            d.set_category(final_atom)
            fn = template.create_functor(rule_map, d)
            return fn
Пример #17
0
 def test10_Ccgbank_00_0099(self):
     text = "Plans that give advertisers discounts for maintaining or increasing ad spending have become permanent fixtures at the news weeklies and underscore the fierce competition between Newsweek, Time Warner Inc.'s Time magazine, and Mortimer B. Zuckerman's U.S. News & World Report."
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.get_np_nominals()
     phrases = [sp.text for r, sp in f]
     self.assertTrue('Plans' in phrases)
     self.assertTrue('advertisers' in phrases)
     self.assertTrue('discounts' in phrases)
     self.assertTrue('ad spending' in phrases)
     self.assertTrue('permanent fixtures' in phrases)
     self.assertTrue('the news weeklies' in phrases)
     self.assertTrue('the fierce competition' in phrases)
     self.assertTrue("Newsweek" in phrases)
     self.assertTrue("Time-Warner-Inc." in phrases)
     self.assertTrue("Time-magazine" in phrases)
     self.assertTrue("Mortimer-B.-Zuckerman" in phrases)
     self.assertTrue("U.S.-News-&-World-Report" in phrases)
     vf = sentence.get_vp_nominals()
     vphrases = [sp.text for r, sp in vf]
     self.assertTrue('give' in vphrases)
     self.assertTrue('maintaining increasing' in vphrases)
     self.assertTrue('have become' in vphrases)
     self.assertTrue('underscore' in vphrases)
     give = filter(lambda x: 'give' == x[1].text, vf)[0][0]
     become = filter(lambda x: 'have become' == x[1].text, vf)[0][0]
     uscore = filter(lambda x: 'underscore' == x[1].text, vf)[0][0]
     minc = filter(lambda x: 'maintaining increasing' == x[1].text,
                   vf)[0][0]
     plans = filter(lambda x: 'Plans' == x[1].text, f)[0][0]
     advertisers = filter(lambda x: 'advertisers' == x[1].text, f)[0][0]
     discounts = filter(lambda x: 'discounts' == x[1].text, f)[0][0]
     spending = filter(lambda x: 'ad spending' == x[1].text, f)[0][0]
     fixtures = filter(lambda x: 'permanent fixtures' == x[1].text, f)[0][0]
     weeklies = filter(lambda x: 'the news weeklies' == x[1].text, f)[0][0]
     timeinc = filter(lambda x: 'Time-Warner-Inc.' == x[1].text, f)[0][0]
     timemag = filter(lambda x: 'Time-magazine' == x[1].text, f)[0][0]
     mortimer = filter(lambda x: 'Mortimer-B.-Zuckerman' == x[1].text,
                       f)[0][0]
     uswr = filter(lambda x: 'U.S.-News-&-World-Report' == x[1].text,
                   f)[0][0]
     self.assertTrue(
         d.find_condition(Rel('_ARG0', [give, plans])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG1', [give, advertisers])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG2', [give, discounts])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG0', [minc, plans])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG1', [minc, spending])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG0', [become, plans])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG1', [become, fixtures])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_POSS', [mortimer, uswr])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_POSS', [timeinc, timemag])) is not None)
Пример #18
0
 def to_drs(self):
     refs = [DRSRef(r.encode('utf-8')) for r in self[1:]]
     return Rel(DRSRelation(self[0].encode('utf-8')), refs)