def test01_AndOfSubj(self): text = "John and Paul went to the movies" mtext = preprocess_sentence(text) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) f = sentence.select_phrases(RT_PROPERNAME | RT_EVENT) phrases = [sp.text for r, sp in f.iteritems()] self.assertTrue('John' in phrases) self.assertTrue('Paul' in phrases) self.assertTrue('went' in phrases) john = filter(lambda x: 'John' == x[1].text, f.iteritems())[0] paul = filter(lambda x: 'Paul' == x[1].text, f.iteritems())[0] went = filter(lambda x: 'went' == x[1].text, f.iteritems())[0] J = john[0] P = paul[0] E = went[0] self.assertTrue(d.find_condition(Rel('_EVENT', [E])) is not None) self.assertTrue(d.find_condition(Rel('go', [E])) is not None) self.assertTrue(d.find_condition(Rel('John', [J])) is not None) self.assertTrue(d.find_condition(Rel('Paul', [P])) is not None) self.assertTrue(d.find_condition(Rel('_ARG0', [E, J])) is not None)
def test10_OrOfVerb_OrInBrackets(self): text = "That which is perceived or known or inferred to have its own distinct existence (living or nonliving)" mtext = preprocess_sentence(text) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH) d = sentence.get_drs(nodups=True) dprint(pt_to_ccg_derivation(pt)) dprint(d) # RT_EMPTY_DRS adds 'or' to phrases f = sentence.select_phrases(lambda x: x.pos is POS.from_cache('WDT') or \ 0 == (x.mask & RT_EMPTY_DRS), contiguous=False) phrases = [sp.text for r, sp in f.iteritems()] self.assertTrue('That which' in phrases) self.assertTrue('have' in phrases) self.assertTrue('is perceived known inferred' in phrases) self.assertTrue('its own distinct existence' in phrases) verb1 = filter(lambda x: 'is perceived known inferred' == x[1].text, f.iteritems())[0] verb2 = filter(lambda x: 'have' == x[1].text, f.iteritems())[0] agent = filter(lambda x: 'That which' == x[1].text, f.iteritems())[0] theme = filter(lambda x: 'its own distinct existence' == x[1].text, f.iteritems())[0] X1 = agent[0] E1 = verb1[0] E2 = verb2[0] X2 = theme[1][0].refs[1] X3 = theme[1][1].refs[0] self.assertTrue(d.find_condition(Rel('_EVENT', [E1])) is not None) self.assertTrue(d.find_condition(Rel('_ARG0', [E1, X1])) is not None) self.assertTrue(d.find_condition(Rel('_ARG1', [E1, E2])) is not None) # TODO: should the theme attach to X2? self.assertTrue(d.find_condition(Rel('_ARG1', [E2, X3])) is not None) self.assertTrue(d.find_condition(Rel('_POSS', [X2, X3])) is not None)
def test02_AndOfObj(self): text = "He saw John and Paul" mtext = preprocess_sentence(text) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) f = sentence.select_phrases(RT_PROPERNAME | RT_EVENT) phrases = [sp.text for r, sp in f.iteritems()] self.assertTrue('John' in phrases) self.assertTrue('Paul' in phrases) self.assertTrue('saw' in phrases) john = filter(lambda x: 'John' == x[1].text, f.iteritems())[0] paul = filter(lambda x: 'Paul' == x[1].text, f.iteritems())[0] saw = filter(lambda x: 'saw' == x[1].text, f.iteritems())[0] J = john[0] P = paul[0] E = saw[0] # FIXME: wn lemmatizer does not convert saw to see - I guess to to ambiguity self.assertTrue(d.find_condition(Rel('_EVENT', [E])) is not None) self.assertTrue(d.find_condition(Rel('saw', [E])) is not None) self.assertTrue(d.find_condition(Rel('John', [J])) is not None) self.assertTrue(d.find_condition(Rel('Paul', [P])) is not None) self.assertTrue(d.find_condition(Rel('_ARG1', [E, J])) is not None)
def test03_OrOfObj(self): text = "To participate in games or sport" mtext = preprocess_sentence(text) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) f = sentence.select_phrases(RT_ENTITY | RT_EVENT) phrases = [sp.text for r, sp in f.iteritems()] self.assertTrue('participate' in phrases) self.assertTrue('games' in phrases) self.assertTrue('sport' in phrases) noun1 = filter(lambda x: 'games' == x[1].text, f.iteritems())[0] noun2 = filter(lambda x: 'sport' == x[1].text, f.iteritems())[0] verb = filter(lambda x: 'participate' == x[1].text, f.iteritems())[0] X1 = noun1[0] X2 = noun2[0] E = verb[0] self.assertTrue(d.find_condition(Rel('_EVENT', [E])) is not None) self.assertTrue(d.find_condition(Rel('participate', [E])) is not None) self.assertTrue(d.find_condition(Rel('games', [X1])) is not None) self.assertTrue(d.find_condition(Rel('sport', [X2])) is not None) self.assertTrue(d.find_condition(Rel('_ARG1', [E, X2])) is not None)
def test05_AndOfVerb_AndOfObj(self): text = "Bell makes and distributes computers, electronics, and building products" mtext = preprocess_sentence(text) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) f = sentence.select_phrases(RT_PROPERNAME | RT_ENTITY | RT_EVENT | RT_ATTRIBUTE) phrases = [sp.text for r, sp in f.iteritems()] self.assertTrue('Bell' in phrases) self.assertTrue('makes distributes' in phrases) self.assertTrue('computers' in phrases) self.assertTrue('electronics' in phrases) # Note if we add RT_EMPTY_DRS to the selection criteria then this phrase becomes 'and building products' self.assertTrue('building products' in phrases) self.assertEqual(5, len(phrases)) verb1 = filter(lambda x: 'makes distributes' == x[1].text, f.iteritems())[0] agent = filter(lambda x: 'Bell' == x[1].text, f.iteritems())[0] theme1 = filter(lambda x: 'computers' == x[1].text, f.iteritems())[0] theme2 = filter(lambda x: 'electronics' == x[1].text, f.iteritems())[0] theme3 = filter(lambda x: 'building products' == x[1].text, f.iteritems())[0] X1 = agent[0] Y1 = theme1[0] Y2 = theme2[0] Y3 = theme3[0] E1 = verb1[0] self.assertTrue(d.find_condition(Rel('_EVENT', [E1])) is not None) self.assertTrue(d.find_condition(Rel('_ARG0', [E1, X1])) is not None) # TODO: should we add proposition for multi NP's conjoined? self.assertTrue(d.find_condition(Rel('_ARG1', [E1, Y3])) is not None)
def test1_Currency_00_0194(self): text = r"Without the Cray-3 research and development expenses, the company would have been able to report a profit of $19.3 million for the first half of 1989 rather than the $5.9 million it posted." etext = r"Without the Cray-3 research and development expenses , the company would have been able to report a profit of $ 19.3 million for the first half of 1989 rather than the $ 5.9 million it posted" mtext = preprocess_sentence(text) self.assertEqual(etext, mtext) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH) d = sentence.get_drs(nodups=True) dprint(pt_to_ccg_derivation(pt)) dprint(d) fnps = sentence.get_np_nominals() nps = [sp.text for r, sp in fnps] self.assertTrue('the Cray-3 research and development expenses' in nps) self.assertTrue('the company' in nps) self.assertTrue('a profit' in nps) self.assertTrue('$ 19.3 million' in nps) self.assertTrue('the first half' in nps) self.assertTrue('the $ 5.9 million' in nps) self.assertTrue('1989' in nps) fvps = sentence.get_vp_nominals() vps = [sp.text for r, sp in fvps] self.assertTrue('would have been' in vps) self.assertTrue('report' in vps) self.assertTrue('posted' in vps) would_have_been = filter(lambda x: 'would have been' == x[1].text, fvps)[0][0] report = filter(lambda x: 'report' == x[1].text, fvps)[0][0] posted = filter(lambda x: 'posted' == x[1].text, fvps)[0][0] cray_rnd = filter( lambda x: 'the Cray-3 research and development expenses' == x[1]. text, fnps)[0][0] company = filter(lambda x: 'the company' == x[1].text, fnps)[0][0] profit = filter(lambda x: 'a profit' == x[1].text, fnps)[0][0] first_half = filter(lambda x: 'the first half' == x[1].text, fnps)[0][0] n1989 = filter(lambda x: '1989' == x[1].text, fnps)[0][0] n19_3M = filter(lambda x: '$ 19.3 million' == x[1].text, fnps)[0][0] n5_9M = filter(lambda x: 'the $ 5.9 million' == x[1].text, fnps)[0][0] self.assertTrue( d.find_condition(Rel('without', [would_have_been, cray_rnd])) is not None) self.assertTrue( d.find_condition(Rel('_ARG0', [would_have_been, company])) is not None) self.assertTrue( d.find_condition(Rel('_ARG0', [report, company])) is not None) self.assertTrue( d.find_condition(Rel('_ARG1', [report, profit])) is not None) self.assertTrue( d.find_condition(Rel('of', [profit, n19_3M])) is not None) self.assertTrue( d.find_condition(Rel('for', [profit, first_half])) is not None) self.assertTrue( d.find_condition(Rel('of', [first_half, n1989])) is not None) self.assertTrue( d.find_condition(Rel('_ARG1', [posted, n5_9M])) is not None)
def promote_to_propernoun(self): """Promote an entity to a proper noun.""" if 0 == (self.mask & RT_PROPERNAME): self.stem = self.word.title() self.mask &= ~RT_ENTITY self.mask |= RT_PROPERNAME self.drs = DRS(self.drs.referents, [Rel(self.stem, self.drs.referents)])
def test10_Brutus(self): text = "Ceasar was stabbed by Brutus" derivation = grpc.ccg_parse(self.stub, text, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) fnps = sentence.get_np_nominals() nps = [sp.text for r, sp in fnps] #self.assertTrue('Average maturity' in nps) self.assertTrue('Brutus' in nps) self.assertTrue('Ceasar' in nps) fvps = sentence.get_vp_nominals() vps = [sp.text for r, sp in fvps] self.assertTrue('was stabbed' in vps) E = filter(lambda x: x[1].text == "was stabbed", fvps)[0][0] A1 = filter(lambda x: x[1].text == "Brutus", fnps)[0][0] A0 = filter(lambda x: x[1].text == "Ceasar", fnps)[0][0] self.assertTrue(d.find_condition(Rel('_ARG0', [E, A0])) is not None) self.assertTrue(d.find_condition(Rel('_ARG1', [E, A1])) is not None)
def test04_AndOfVerb(self): text = "Bell makes and distributes computers" mtext = preprocess_sentence(text) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) f = sentence.select_phrases(RT_PROPERNAME | RT_ENTITY | RT_EVENT) phrases = [sp.text for r, sp in f.iteritems()] self.assertTrue('Bell' in phrases) self.assertTrue('makes distributes' in phrases) self.assertTrue('computers' in phrases) verb1 = filter(lambda x: 'makes distributes' == x[1].text, f.iteritems())[0] agent = filter(lambda x: 'Bell' == x[1].text, f.iteritems())[0] theme = filter(lambda x: 'computers' == x[1].text, f.iteritems())[0] X1 = agent[0] X2 = theme[0] E1 = verb1[0] self.assertTrue(d.find_condition(Rel('_EVENT', [E1])) is not None) self.assertTrue(d.find_condition(Rel('_ARG0', [E1, X1])) is not None) self.assertTrue(d.find_condition(Rel('_ARG1', [E1, X2])) is not None)
def _get_noun_drs(self, span): global _Wnl, _Ieng if not self.isproper_noun and not self.pos == POS_POSSESSIVE: # TODO: cache nouns # pattern.en.pluralize(self.stem) # or use inflect https://pypi.python.org/pypi/inflect if self.stem == "'s": pass # inflect will generate an exception for single character nouns. This can happen for # bad pos tagging (like EasySRL) if len(self.stem) > 1: sp = _Ieng.plural(self.stem) self.wnsynsets = wn.wordnet.synsets(_Wnl.lemmatize( self.stem.lower(), 'n'), pos='n') if False and self.stem != sp: rp = DRSRef(DRSVar('X', len(self.refs) + 1)) self.drs = DRS([self.refs[0], rp], [ Rel(self.stem, [self.refs[0]]), Rel(sp, [rp]), Rel('_ISMEMBER', [self.refs[0], rp]) ]) d = DrsProduction([self.refs[0], rp], self.refs[1:], category=self.category, span=span) d.set_lambda_refs([self.refs[0]]) return d self.drs = DRS([self.refs[0]], [Rel(self.stem, [self.refs[0]])]) d = DrsProduction([self.refs[0]], self.refs[1:], category=self.category, span=span) d.set_lambda_refs([self.refs[0]]) return d
def test4_ApposInterrupt(self): text = r"Bell, a telecommunications company, which is located in Los Angeles, makes and distributes electronics, computers, and building products" mtext = preprocess_sentence(text) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET|CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) f = sentence.get_np_nominals() phrases = [sp.text for r, sp in f] self.assertTrue('Bell' in phrases) self.assertTrue('a telecommunications company' in phrases) np1 = filter(lambda x: 'Bell' == x[1].text, f)[0] np2 = filter(lambda x: 'a telecommunications company' == x[1].text, f)[0] X = np1[0] Y = np2[0] self.assertNotEqual(X, Y) self.assertTrue(d.find_condition(Rel('_AKA', [X, Y])) is not None) self.assertTrue(len(repr(d).split('_AKA')) == 2)
def test3_ApposInterrupt(self): text = r"Robbie, a hot-tempered tennis player, charged the umpire and tried to crack the poor man's skull with a racket." mtext = preprocess_sentence(text) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET|CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) f = sentence.get_np_nominals() phrases = [sp.text for r, sp in f] self.assertTrue('Robbie' in phrases) self.assertTrue('a hot-tempered tennis player' in phrases) robbie = filter(lambda x: 'Robbie' == x[1].text, f)[0] temper = filter(lambda x: 'a hot-tempered tennis player' == x[1].text, f)[0] X = robbie[0] Y = temper[0] self.assertNotEqual(X, Y) self.assertTrue(d.find_condition(Rel('_AKA', [X, Y])) is not None) self.assertTrue(len(repr(d).split('_AKA')) == 2)
def test2_ApposInterrupt(self): text = r"Reliable, Diane's eleven-year-old beagle, chews holes in the living room carpeting as if he were still a puppy." mtext = preprocess_sentence(text) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET|CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) f = sentence.get_np_nominals() phrases = [sp.text for r, sp in f] self.assertTrue('Reliable' in phrases) self.assertTrue("eleven-year-old beagle" in phrases) self.assertTrue("Diane" in phrases) dog = filter(lambda x: 'Reliable' == x[1].text, f)[0] breed = filter(lambda x: "eleven-year-old beagle" == x[1].text, f)[0] X = dog[0] Y = breed[0] self.assertNotEqual(X, Y) self.assertTrue(d.find_condition(Rel('_AKA', [X, Y])) is not None) self.assertTrue(len(repr(d).split('_AKA')) == 2)
def _copy_production_from_sample(self, sample, span): # Deepcopy but ensure variables only get one python reference - makes renaming fast self.mask |= sample[1] ovrs = set(sample[0].freerefs).union(sample[0].referents) nvrs = {} for x in ovrs: nvrs[x.var.to_string()] = DRSRef(DRSVar(x.var.name, x.var.idx)) conds = [] for c in sample[0].conditions: assert isinstance(c, Rel) conds.append( Rel(c.relation, map(lambda x: nvrs[x.var.to_string()], c.referents))) refs = map(lambda x: nvrs[x.var.to_string()], sample[0].referents) self.drs = DRS(refs, conds) d = DrsProduction(self.drs.universe, self.drs.freerefs, span=span) d.set_lambda_refs(map(lambda x: nvrs[x.var.to_string()], sample[2])) # refs[0] is always the final_ref (atom) self.refs = d.lambda_refs xtra = filter(lambda x: x not in self.refs, nvrs.itervalues()) self.refs.extend(xtra) return d
def _build_conditions(self, conds, refs, template): """Refs are reversed, refs[0] is the functor return value. Args: conds: The existing DRS conditions. refs: The referents, where refs[0] is the functor return value. template: A FunctorTemplate instance. Returns: The modified conditions. """ # Note. Proper noun handling requires any extra predicates appear after the noun. global _TypeMonth, _TypeWeekday if self.isproper_noun: # If we are a functor and a proper noun then argument type if the # correct referent for the noun if _TypeMonth.match(self.stem): self.mask |= RT_DATE if self.stem in _MONTHS: conds.append(Rel(_MONTHS[self.stem], [self.refs[0]])) else: conds.append(Rel(self.stem, [self.refs[0]])) if template.isfinalevent: conds.append(Rel('_DATE', self.refs[0:2])) else: conds.append(Rel('_DATE', self.refs[0])) elif _TypeWeekday.match(self.stem): self.mask |= RT_DATE if self.stem in _WEEKDAYS: conds.append(Rel(_WEEKDAYS[self.stem], [self.refs[0]])) else: conds.append(Rel(self.stem, [self.refs[0]])) if template.isfinalevent: conds.append(Rel('_DATE', self.refs[0:2])) else: conds.append(Rel('_DATE', self.refs[0])) else: conds.append(Rel(self.stem, [self.refs[0]])) elif self.isnumber: self.mask |= RT_NUMBER conds.append(Rel(self.stem, [self.refs[0]])) conds.append(Rel('_NUM', self.refs)) elif self.ispunct: if len(self.refs) > 1: if self.word == ':': conds.append(Rel('_IE', [self.refs[0], self.refs[-1]])) elif self.word == ';': conds.append(Rel('_LINK', [self.refs[0], self.refs[-1]])) elif self.pos == POS_PREPOSITION and not self.ispreposition: conds.append(Rel(self.stem, self.refs)) else: conds.append(Rel(self.stem, [self.refs[0]])) return conds
def get_production(self, sentence, options=0): """Get the production model for this category. Returns: A Production instance. """ global _EventPredicates no_vn = 0 != (CO_NO_VERBNET & options) span = Span(sentence, [self.idx]) template = self.get_template() # Ensure we only have one instance for each referent name. FunctorTemplate's guarantee # this. This allows fast renaming by changing the DRSVar embedded in the DRSRef. # # To take advantage of fast renaming we need to do one rename post functor creation. if template is None: if not (not self.category.isfunctor or self.category in [CAT_CONJ_CONJ, CAT_CONJCONJ]): pass assert not self.category.isfunctor or self.category in [ CAT_CONJ_CONJ, CAT_CONJCONJ ] # Simple type # Handle prepositions if self.category in [CAT_CONJ, CAT_NPthr]: self.refs = [DRSRef('X1')] if self.stem == 'or': self.mask |= RT_UNION elif self.stem == 'nor': self.mask |= RT_UNION | RT_NEGATE elif self.stem == 'and': self.mask |= RT_INTERSECTION # If self.drs is None then we don't include in constituents self.drs = DRS([], []) return create_empty_drs_production(self.category, self.refs[0], span=span) elif self.category in [CAT_CONJ_CONJ, CAT_CONJCONJ]: self.refs = [DRSRef('X1')] return identity_functor(self.category, self.refs[0]) elif self.ispronoun and self.stem in _PRON: d = self._copy_production_from_sample(_PRON[self.stem], span) d.set_category(self.category) return d elif self.category == CAT_N: self.refs = [DRSRef('X1')] # Relative pronouns don't get a universe. This can happen for cases such as 'That which is...' if self.stem in _RELPRON: self.mask |= RT_ENTITY self.drs = DRS([], [Rel(self.stem, [self.refs[0]])]) d = DrsProduction([], self.refs, category=self.category, span=span) d.set_lambda_refs([self.refs[0]]) return d else: self._set_noun_mask() return self._get_noun_drs(span) elif self.category == CAT_NOUN: self.refs = [DRSRef('X1')] self._set_noun_mask() return self._get_noun_drs(span) elif self.category == CAT_CONJ_CONJ or self.category == CAT_CONJCONJ: self.refs = [DRSRef('X1')] return create_empty_drs_production(CAT_CONJ, self.refs[0]) #return identity_functor(self.category) elif self.isadverb and self.stem in _ADV: d = self._copy_production_from_sample(_ADV[self.stem], span) d.set_category(self.category) return d else: self.refs = [DRSRef('X1')] self.drs = DRS([], [Rel(self.stem, [self.refs[0]])]) d = DrsProduction([], self.refs, category=self.category, span=span) d.set_lambda_refs([self.refs[0]]) return d # else is functor # Production templates use tuples so we don't accidentally modify. if self.category == CAT_NP_N: # NP*/N class # Ignore template in these cases # FIXME: these relations should be added as part of _build_conditions() if self.ispronoun and self.stem in _PRON: d = self._copy_production_from_sample(_PRON[self.stem], span) d.set_category(CAT_NP) return FunctorProduction(self.category, d.lambda_refs, d) else: nref = DRSRef('X1') self.refs = [nref] if self.stem in ['a', 'an']: #self.drs = DRS([], [Rel('_EXISTS', [nref])]) self.drs = DRS([], []) fn = DrsProduction([], [nref], category=CAT_NP, span=span) elif self.stem in ['the', 'thy']: self.drs = DRS([], []) fn = DrsProduction([], [nref], category=CAT_NP, span=span) else: self.drs = DRS([], [Rel(self.stem, [nref])]) fn = DrsProduction([], [nref], category=CAT_NP, span=span) fn.set_lambda_refs([nref]) return FunctorProduction(category=self.category, referent=nref, production=fn) else: compose = None if template is None else template.constructor_rule refs = [] rule_map = template.create_constructor_rule_map() rstk = [] lstk = [] argcat = self.category for c in compose: stk = lstk if argcat.isarg_left else rstk if isinstance(c[1], tuple): stk.extend([rule_map[x] for x in c[1]]) else: stk.append(rule_map[c[1]]) argcat = argcat.result_category() final_ref = rule_map[template.final_ref] final_atom = template.final_atom.remove_wildcards() refs.append(final_ref) refs.extend(reversed(lstk)) refs.extend(rstk) refs = remove_dups(refs) # refs[0] is always final_ref self.refs = refs # Verbs can also be adjectives so check event isverb = self.isverb if self.isgerund: result = self.category while not isverb and not result.isatom: isverb = result.can_unify(CAT_TV) result = result.result_category() # TODO: Add predicate for NG or change predarg attachments if isverb and template.isfinalevent: conds = [] vncond = None vnclasses = [] try: vnclasses = [] if no_vn else VERBNETDB.name_index[ self.stem] if len(vnclasses) == 1: vncond = Rel('_vn_' + vnclasses[0].ID.encode('utf-8'), [refs[0]]) elif len(vnclasses) >= 2: xconds = [Rel('_vn_' + vnclasses[-1].ID.encode('utf-8'), [refs[0]])] \ if len(vnclasses) & 0X1 else [] # TODO: for vn classes A,B,C should really have (A&!B&!C)|(!A&B&!C)|(!A&!B&C) for vna, vnb in zip(vnclasses[0::2], vnclasses[1::2]): xconds.append( Or( DRS([], [ Rel('_vn_' + vna.ID.encode('utf-8'), [refs[0]]) ]), DRS([], [ Rel('_vn_' + vnb.ID.encode('utf-8'), [refs[0]]) ]))) while len(xconds) != 1: c2 = xconds.pop() c1 = xconds.pop() xconds.append(Or(DRS([], [c1]), DRS([], [c2]))) vncond = xconds[0] xconds = None if vncond is not None: # Add implication conds.append( Imp(DRS([], [Rel(self.stem, [refs[0]])]), DRS([], [vncond]))) else: conds.append(Rel(self.stem, [refs[0]])) except Exception: conds.append(Rel(self.stem, [refs[0]])) pass rcat = self.category.test_return_and_get(CAT_VPMODX, False) if rcat is not None and rcat.argument_category().has_any_features(FEATURE_VARG) \ and rcat.result_category().has_any_features(FEATURE_VRES): conds.append(Rel('_EVENT', [refs[0]])) pred = zip(refs[1:], _EventPredicates) for v, e in pred[0:2]: conds.append(Rel(e, [refs[0], v])) self.mask |= RT_EVENT self.vnclasses = vnclasses self.drs = DRS([refs[0]], conds) #d = DrsProduction([refs[0]], self.refs[1:], span=span) d = DrsProduction([], self.refs, span=span) elif rcat is not None and (rcat.has_any_features(FEATURE_PSS | FEATURE_TO) or rcat.ismodifier): if len(refs) > 1: # passive case if rcat.ismodifier or self.stem in ['be', 'get']: self.mask |= RT_EVENT_ATTRIB conds.append(Rel('_MOD', [refs[0], refs[-1]])) self.drs = DRS([], conds) d = DrsProduction([], self.refs, span=span) else: conds.append(Rel('_EVENT', [refs[0]])) pred = zip(refs[1:], _EventPredicates) for v, e in pred[0:2]: conds.append(Rel(e, [refs[0], v])) self.mask |= RT_EVENT self.vnclasses = vnclasses self.drs = DRS([refs[0]], conds) #d = DrsProduction([refs[0]], self.refs[1:], span=span) d = DrsProduction([], self.refs, span=span) else: d = DrsProduction([], self.refs, span=span) elif self.category == CAT_MODAL_PAST: self.mask |= RT_EVENT_MODAL conds.append(Rel('_MODAL', [refs[0]])) self.drs = DRS([], conds) d = DrsProduction([], self.refs, span=span) elif self.category in CAT_COPULAR: if len(refs) != 3: pass assert len(refs) == 3, "copular expects 3 referents" # Special handling - ARG1 is a ROLE? self.mask |= RT_EVENT self.vnclasses = vnclasses if self.stem == 'be': # Discard conditions conds.extend([ Rel('_EVENT', [refs[0]]), Rel('_ARG0', [refs[0], refs[1]]), Rel('_ARG1', [refs[0], refs[2]]) ]) else: conds.append(Rel('_EVENT', [refs[0]])) conds.append(Rel('_ARG0', [refs[0], refs[1]])) conds.append(Rel('_ARG1', [refs[0], refs[2]])) self.drs = DRS([refs[0]], conds) #d = DrsProduction([refs[0]], refs[1:], category=final_atom, span=span) d = DrsProduction([], refs, category=final_atom, span=span) elif self.category == CAT_VPdcl: if len(refs) != 2: pass assert len(refs) == 2, "VP[dcl] expects 2 referents" conds.append(Rel('_EVENT', [refs[0]])) conds.append(Rel('_ARG0', [refs[0], refs[1]])) self.mask |= RT_EVENT self.vnclasses = vnclasses # Special handling self.drs = DRS([refs[0]], conds) #d = DrsProduction([refs[0]], self.refs[1:], category=final_atom, span=span) d = DrsProduction([], self.refs, category=final_atom, span=span) else: # TODO: use verbnet to get semantics self.mask |= RT_EVENT self.vnclasses = vnclasses if self.stem == 'be' and self.category.can_unify(CAT_TV): # Discard conditions - ARG1 is a ROLE? conds.extend([ Rel('_EVENT', [refs[0]]), Rel('_ARG0', [refs[0], refs[1]]), Rel('_ARG1', [refs[0], refs[2]]) ]) else: conds.append(Rel('_EVENT', [refs[0]])) pred = zip(refs[1:], _EventPredicates) for v, e in pred: conds.append(Rel(e, [refs[0], v])) if (len(refs) - 1) > len(pred): rx = [refs[0]] rx.extend(refs[len(pred) + 1:]) conds.append(Rel('_ARG2', rx)) self.drs = DRS([refs[0]], conds) #d = DrsProduction([refs[0]], refs[1:], span=span) d = DrsProduction([], refs, span=span) elif self.isadverb and template.isfinalevent: if self.stem in _ADV: d = self._copy_production_from_sample( _ADV[self.stem], span) rs = zip(d.variables, refs) d.rename_vars(rs) else: self.drs = DRS([], [Rel(self.stem, refs[0])]) d = DrsProduction([], self.refs, span=span) #elif self.pos == POS_DETERMINER and self.stem == 'a': elif self.ispronoun and self.stem in _PRON: pron = _PRON[self.stem] d = self._copy_production_from_sample(pron, span) ers = complement(d.variables, pron[2]) ors = intersect(refs, ers) if len(ors) != 0: # Make disjoint nrs = get_new_drsrefs(ors, union(ers, refs, pron[2])) d.rename_vars(zip(ors, nrs)) if len(ers) != 0: ers = complement(d.variables, pron[2]) d.rename_vars(zip([pron[2][0], ers[0]], refs)) else: d.rename_vars([(pron[2][0], refs[0])]) elif self.ispreposition: if template.construct_empty: # Make sure we have one freeref. For functors it is a bad idea to use an empty DrsProduction # as the spans can be deleted by ProductionList.flatten(). d = DrsProduction([], [refs[0]], span=span) else: if len(refs) >= 2: refs = [refs[0], refs[-1]] self.refs = refs self.drs = DRS([], [Rel(self.stem, refs)]) else: self.drs = DRS([], [Rel(self.stem, refs)]) d = DrsProduction([], self.refs, span=span) elif self.pos == POS_PREPOSITION and self.category.test_returns_modifier() \ and len(refs) > 1 and not self.category.ismodifier: self.drs = DRS([], [Rel(self.stem, [refs[0], refs[-1]])]) refs = [refs[0], refs[-1]] self.refs = refs d = DrsProduction([], self.refs, span=span) elif final_atom == CAT_Sadj and len(refs) > 1: if self.category == CAT_AP_PP or self.category.ismodifier or \ self.category.test_returns_modifier(): self.drs = DRS([], [Rel(self.stem, refs[0])]) d = DrsProduction([], self.refs, span=span) elif self.category.test_return(CAT_AP) and self.category.isarg_right and \ self.category.argument_category() == CAT_NP: self.mask |= RT_ATTRIBUTE self.drs = DRS([], [Rel(self.stem, [refs[0], refs[-1]])]) d = DrsProduction([], self.refs, span=span) else: self.mask |= RT_ATTRIBUTE self.drs = DRS([], [Rel(self.stem, refs[0])]) d = DrsProduction([], self.refs, span=span) else: universe = [] freerefs = self.refs if self.isproper_noun: #universe.append(self.refs[0]) #freerefs = self.refs[1:] self.mask |= RT_PROPERNAME elif final_atom == CAT_N and not self.category.ismodifier \ and not self.category.test_returns_modifier(): #universe.append(self.refs[0]) #freerefs = self.refs[1:] self.mask |= ( RT_ENTITY | RT_PLURAL) if self.pos == POS_NOUN_S else RT_ENTITY elif len(self.refs) == 1 and final_atom == CAT_N \ and (self.category.ismodifier or self.category.test_returns_modifier()): self.mask |= RT_ATTRIBUTE if self.pos == POS_POSSESSIVE: self.mask |= RT_POSSESSIVE if template.isfinalevent: if self.category == CAT_INFINITIVE: # Make sure we have one freeref. For functors it is a bad idea to use an empty DrsProduction # as the spans can be deleted by ProductionList.flatten(). d = DrsProduction([], [self.refs[0]], span=span) # Having a DRS prevents deletion of TO constituent self.drs = DRS([], []) elif self.pos == POS_MODAL: self.mask |= RT_EVENT_MODAL self.drs = DRS([], [ Rel(self.stem, [refs[0]]), Rel('_MODAL', [refs[0]]) ]) d = DrsProduction([], self.refs, span=span) else: self.drs = DRS([], self._build_conditions([], refs, template)) d = DrsProduction([], self.refs, span=span) else: self.drs = DRS(universe, self._build_conditions([], refs, template)) d = DrsProduction(universe, freerefs, span=span) d.set_lambda_refs([final_ref]) d.set_category(final_atom) fn = template.create_functor(rule_map, d) return fn
def test10_Ccgbank_00_0099(self): text = "Plans that give advertisers discounts for maintaining or increasing ad spending have become permanent fixtures at the news weeklies and underscore the fierce competition between Newsweek, Time Warner Inc.'s Time magazine, and Mortimer B. Zuckerman's U.S. News & World Report." mtext = preprocess_sentence(text) derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION) pt = parse_ccg_derivation(derivation) sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH) d = sentence.get_drs() dprint(pt_to_ccg_derivation(pt)) dprint(d) f = sentence.get_np_nominals() phrases = [sp.text for r, sp in f] self.assertTrue('Plans' in phrases) self.assertTrue('advertisers' in phrases) self.assertTrue('discounts' in phrases) self.assertTrue('ad spending' in phrases) self.assertTrue('permanent fixtures' in phrases) self.assertTrue('the news weeklies' in phrases) self.assertTrue('the fierce competition' in phrases) self.assertTrue("Newsweek" in phrases) self.assertTrue("Time-Warner-Inc." in phrases) self.assertTrue("Time-magazine" in phrases) self.assertTrue("Mortimer-B.-Zuckerman" in phrases) self.assertTrue("U.S.-News-&-World-Report" in phrases) vf = sentence.get_vp_nominals() vphrases = [sp.text for r, sp in vf] self.assertTrue('give' in vphrases) self.assertTrue('maintaining increasing' in vphrases) self.assertTrue('have become' in vphrases) self.assertTrue('underscore' in vphrases) give = filter(lambda x: 'give' == x[1].text, vf)[0][0] become = filter(lambda x: 'have become' == x[1].text, vf)[0][0] uscore = filter(lambda x: 'underscore' == x[1].text, vf)[0][0] minc = filter(lambda x: 'maintaining increasing' == x[1].text, vf)[0][0] plans = filter(lambda x: 'Plans' == x[1].text, f)[0][0] advertisers = filter(lambda x: 'advertisers' == x[1].text, f)[0][0] discounts = filter(lambda x: 'discounts' == x[1].text, f)[0][0] spending = filter(lambda x: 'ad spending' == x[1].text, f)[0][0] fixtures = filter(lambda x: 'permanent fixtures' == x[1].text, f)[0][0] weeklies = filter(lambda x: 'the news weeklies' == x[1].text, f)[0][0] timeinc = filter(lambda x: 'Time-Warner-Inc.' == x[1].text, f)[0][0] timemag = filter(lambda x: 'Time-magazine' == x[1].text, f)[0][0] mortimer = filter(lambda x: 'Mortimer-B.-Zuckerman' == x[1].text, f)[0][0] uswr = filter(lambda x: 'U.S.-News-&-World-Report' == x[1].text, f)[0][0] self.assertTrue( d.find_condition(Rel('_ARG0', [give, plans])) is not None) self.assertTrue( d.find_condition(Rel('_ARG1', [give, advertisers])) is not None) self.assertTrue( d.find_condition(Rel('_ARG2', [give, discounts])) is not None) self.assertTrue( d.find_condition(Rel('_ARG0', [minc, plans])) is not None) self.assertTrue( d.find_condition(Rel('_ARG1', [minc, spending])) is not None) self.assertTrue( d.find_condition(Rel('_ARG0', [become, plans])) is not None) self.assertTrue( d.find_condition(Rel('_ARG1', [become, fixtures])) is not None) self.assertTrue( d.find_condition(Rel('_POSS', [mortimer, uswr])) is not None) self.assertTrue( d.find_condition(Rel('_POSS', [timeinc, timemag])) is not None)
def to_drs(self): refs = [DRSRef(r.encode('utf-8')) for r in self[1:]] return Rel(DRSRelation(self[0].encode('utf-8')), refs)