Python dprint示例，marbles.test.dprint Python示例

示例#1

0

显示文件

文件： conj_test.py 项目： marbles-ai/ie

 def test05_AndOfVerb_AndOfObj(self):
     text = "Bell makes and distributes computers, electronics, and building products"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.select_phrases(RT_PROPERNAME | RT_ENTITY | RT_EVENT
                                 | RT_ATTRIBUTE)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('Bell' in phrases)
     self.assertTrue('makes distributes' in phrases)
     self.assertTrue('computers' in phrases)
     self.assertTrue('electronics' in phrases)
     # Note if we add RT_EMPTY_DRS to the selection criteria then this phrase becomes 'and building products'
     self.assertTrue('building products' in phrases)
     self.assertEqual(5, len(phrases))
     verb1 = filter(lambda x: 'makes distributes' == x[1].text,
                    f.iteritems())[0]
     agent = filter(lambda x: 'Bell' == x[1].text, f.iteritems())[0]
     theme1 = filter(lambda x: 'computers' == x[1].text, f.iteritems())[0]
     theme2 = filter(lambda x: 'electronics' == x[1].text, f.iteritems())[0]
     theme3 = filter(lambda x: 'building products' == x[1].text,
                     f.iteritems())[0]
     X1 = agent[0]
     Y1 = theme1[0]
     Y2 = theme2[0]
     Y3 = theme3[0]
     E1 = verb1[0]
     self.assertTrue(d.find_condition(Rel('_EVENT', [E1])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG0', [E1, X1])) is not None)
     # TODO: should we add proposition for multi NP's conjoined?
     self.assertTrue(d.find_condition(Rel('_ARG1', [E1, Y3])) is not None)

示例#2

0

显示文件

文件： conj_test.py 项目： marbles-ai/ie

 def test01_AndOfSubj(self):
     text = "John and Paul went to the movies"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.select_phrases(RT_PROPERNAME | RT_EVENT)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('John' in phrases)
     self.assertTrue('Paul' in phrases)
     self.assertTrue('went' in phrases)
     john = filter(lambda x: 'John' == x[1].text, f.iteritems())[0]
     paul = filter(lambda x: 'Paul' == x[1].text, f.iteritems())[0]
     went = filter(lambda x: 'went' == x[1].text, f.iteritems())[0]
     J = john[0]
     P = paul[0]
     E = went[0]
     self.assertTrue(d.find_condition(Rel('_EVENT', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('go', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('John', [J])) is not None)
     self.assertTrue(d.find_condition(Rel('Paul', [P])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG0', [E, J])) is not None)

示例#3

0

显示文件

文件： conj_test.py 项目： marbles-ai/ie

 def test10_OrOfVerb_OrInBrackets(self):
     text = "That which is perceived or known or inferred to have its own distinct existence (living or nonliving)"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs(nodups=True)
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     # RT_EMPTY_DRS adds 'or' to phrases
     f = sentence.select_phrases(lambda x: x.pos is POS.from_cache('WDT') or \
                                                0 == (x.mask & RT_EMPTY_DRS),
                                 contiguous=False)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('That which' in phrases)
     self.assertTrue('have' in phrases)
     self.assertTrue('is perceived known inferred' in phrases)
     self.assertTrue('its own distinct existence' in phrases)
     verb1 = filter(lambda x: 'is perceived known inferred' == x[1].text,
                    f.iteritems())[0]
     verb2 = filter(lambda x: 'have' == x[1].text, f.iteritems())[0]
     agent = filter(lambda x: 'That which' == x[1].text, f.iteritems())[0]
     theme = filter(lambda x: 'its own distinct existence' == x[1].text,
                    f.iteritems())[0]
     X1 = agent[0]
     E1 = verb1[0]
     E2 = verb2[0]
     X2 = theme[1][0].refs[1]
     X3 = theme[1][1].refs[0]
     self.assertTrue(d.find_condition(Rel('_EVENT', [E1])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG0', [E1, X1])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG1', [E1, E2])) is not None)
     # TODO: should the theme attach to X2?
     self.assertTrue(d.find_condition(Rel('_ARG1', [E2, X3])) is not None)
     self.assertTrue(d.find_condition(Rel('_POSS', [X2, X3])) is not None)

示例#4

0

显示文件

    def test3_GetSuggestions(self):
        ldcpath = os.path.join(PROJDIR, 'data', 'ldc', 'ccgbank_1_1', 'data',
                               'RAW')
        dirlist = os.listdir(ldcpath)
        spellchecker = SymSpell()
        stats = spellchecker.build_from_wordnet()
        for fname in dirlist:
            f, x = os.path.splitext(fname)
            if x != '.raw':
                continue
            ldcpath1 = os.path.join(ldcpath, fname)
            with open(ldcpath1, 'r') as fp:
                stats = spellchecker.build_from_corpus(fp, stats)

        if True:
            dprint("total words processed: %i" % stats[0])
            dprint("total unique words in corpus: %i" % stats[1])
            dprint(
                "total items in dictionary (corpus words and deletions): %i" %
                len(spellchecker.dictionary))
            dprint("  edit distance for deletions: %i" %
                   spellchecker.max_edit_distance)
            dprint("  length of longest word in corpus: %i" %
                   spellchecker.longest_word_length)

        strm = StringIO.StringIO()
        strm.write("excepted except exception exceptional\n")
        strm.seek(0)
        spellchecker.build_from_corpus(strm)

        suggestion = spellchecker.get_suggestions('acept', BEST_SUGGESTION)
        self.assertEqual('accept', suggestion)
        suggestion = spellchecker.get_suggestions('cepted', NBEST_SUGGESTIONS)
        self.assertListEqual(['accepted', 'excepted', 'sceptred'],
                             sorted(suggestion))

示例#5

0

显示文件

文件： conj_test.py 项目： marbles-ai/ie

 def test03_OrOfObj(self):
     text = "To participate in games or sport"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.select_phrases(RT_ENTITY | RT_EVENT)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('participate' in phrases)
     self.assertTrue('games' in phrases)
     self.assertTrue('sport' in phrases)
     noun1 = filter(lambda x: 'games' == x[1].text, f.iteritems())[0]
     noun2 = filter(lambda x: 'sport' == x[1].text, f.iteritems())[0]
     verb = filter(lambda x: 'participate' == x[1].text, f.iteritems())[0]
     X1 = noun1[0]
     X2 = noun2[0]
     E = verb[0]
     self.assertTrue(d.find_condition(Rel('_EVENT', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('participate', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('games', [X1])) is not None)
     self.assertTrue(d.find_condition(Rel('sport', [X2])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG1', [E, X2])) is not None)

示例#6

0

显示文件

文件： conj_test.py 项目： marbles-ai/ie

 def test02_AndOfObj(self):
     text = "He saw John and Paul"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.select_phrases(RT_PROPERNAME | RT_EVENT)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('John' in phrases)
     self.assertTrue('Paul' in phrases)
     self.assertTrue('saw' in phrases)
     john = filter(lambda x: 'John' == x[1].text, f.iteritems())[0]
     paul = filter(lambda x: 'Paul' == x[1].text, f.iteritems())[0]
     saw = filter(lambda x: 'saw' == x[1].text, f.iteritems())[0]
     J = john[0]
     P = paul[0]
     E = saw[0]
     # FIXME: wn lemmatizer does not convert saw to see - I guess to to ambiguity
     self.assertTrue(d.find_condition(Rel('_EVENT', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('saw', [E])) is not None)
     self.assertTrue(d.find_condition(Rel('John', [J])) is not None)
     self.assertTrue(d.find_condition(Rel('Paul', [P])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG1', [E, J])) is not None)

示例#7

0

显示文件

文件： number_test.py 项目： marbles-ai/ie

 def test1_Currency_00_0194(self):
     text = r"Without the Cray-3 research and development expenses, the company would have been able to report a profit of $19.3 million for the first half of 1989 rather than the $5.9 million it posted."
     etext = r"Without the Cray-3 research and development expenses , the company would have been able to report a profit of $ 19.3 million for the first half of 1989 rather than the $ 5.9 million it posted"
     mtext = preprocess_sentence(text)
     self.assertEqual(etext, mtext)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs(nodups=True)
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     fnps = sentence.get_np_nominals()
     nps = [sp.text for r, sp in fnps]
     self.assertTrue('the Cray-3 research and development expenses' in nps)
     self.assertTrue('the company' in nps)
     self.assertTrue('a profit' in nps)
     self.assertTrue('$ 19.3 million' in nps)
     self.assertTrue('the first half' in nps)
     self.assertTrue('the $ 5.9 million' in nps)
     self.assertTrue('1989' in nps)
     fvps = sentence.get_vp_nominals()
     vps = [sp.text for r, sp in fvps]
     self.assertTrue('would have been' in vps)
     self.assertTrue('report' in vps)
     self.assertTrue('posted' in vps)
     would_have_been = filter(lambda x: 'would have been' == x[1].text,
                              fvps)[0][0]
     report = filter(lambda x: 'report' == x[1].text, fvps)[0][0]
     posted = filter(lambda x: 'posted' == x[1].text, fvps)[0][0]
     cray_rnd = filter(
         lambda x: 'the Cray-3 research and development expenses' == x[1].
         text, fnps)[0][0]
     company = filter(lambda x: 'the company' == x[1].text, fnps)[0][0]
     profit = filter(lambda x: 'a profit' == x[1].text, fnps)[0][0]
     first_half = filter(lambda x: 'the first half' == x[1].text,
                         fnps)[0][0]
     n1989 = filter(lambda x: '1989' == x[1].text, fnps)[0][0]
     n19_3M = filter(lambda x: '$ 19.3 million' == x[1].text, fnps)[0][0]
     n5_9M = filter(lambda x: 'the $ 5.9 million' == x[1].text, fnps)[0][0]
     self.assertTrue(
         d.find_condition(Rel('without', [would_have_been, cray_rnd]))
         is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG0', [would_have_been, company]))
         is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG0', [report, company])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG1', [report, profit])) is not None)
     self.assertTrue(
         d.find_condition(Rel('of', [profit, n19_3M])) is not None)
     self.assertTrue(
         d.find_condition(Rel('for', [profit, first_half])) is not None)
     self.assertTrue(
         d.find_condition(Rel('of', [first_half, n1989])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG1', [posted, n5_9M])) is not None)

示例#8

0

显示文件

    def test1_JsonFiles(self):
        filelist = os.listdir(datapath)
        allfiles = []
        for fn in filelist:
            if not os.path.isfile(os.path.join(datapath, fn)):
                continue
            f, x = os.path.splitext(fn)
            if x == '.json' and f == '9255a890ffe40c05876d8d402044ab11':
                allfiles.append(os.path.join(datapath, fn))

        for fn in allfiles:
            with open(fn, 'r') as fd:
                body = json.load(fd, encoding='utf-8')

            smod = preprocess_sentence(body['title'])
            ccgbank = grpc.ccg_parse(self.stub, smod, grpc.DEFAULT_SESSION)
            pt = parse_ccg_derivation(ccgbank)
            ccg = process_ccg_pt(pt)

            ccgbody = {}
            ccgbody['story'] = {
                'title': [x.get_json() for x in ccg.get_span()],
                'paragraphs': []
            }
            paragraphs = filter(
                lambda y: len(y) != 0,
                map(lambda x: x.strip(), body['content'].split('\n')))
            i = 0
            for p in paragraphs[i:]:
                sentences = filter(lambda x: len(x.strip()) != 0,
                                   sent_tokenize(p))
                sp = []
                j = 0
                for s in sentences[j:]:
                    dprint('p:s = %d:%d' % (i, j))
                    smod = preprocess_sentence(s)
                    ccgbank = grpc.ccg_parse(self.stub, smod,
                                             grpc.DEFAULT_SESSION)
                    pt = parse_ccg_derivation(ccgbank)
                    ccg = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
                    sp.append([x.get_json() for x in ccg.get_span()])
                    j += 1
                ccgbody['story']['paragraphs'].append(sp)
                i += 1

            msgbody = json.dumps(ccgbody)

        pass

示例#9

0

显示文件

文件： number_test.py 项目： marbles-ai/ie

 def test2_Date_21_0985(self):
     text = r"Annualized interest rates on certain investments as reported by the Federal Reserve Board on a weekly-average basis: 1989 and Wednesday October 4, 1989."
     etext = r"Annualized interest rates on certain investments as reported by the Federal Reserve Board on a weekly-average basis : 1989 and Wednesday October 4 , 1989"
     mtext = preprocess_sentence(text)
     self.assertEqual(etext, mtext)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     fnps = sentence.get_np_nominals()
     nps = [sp.text for r, sp in fnps]
     self.assertTrue('Annualized interest rates' in nps)
     self.assertTrue('certain investments' in nps)
     self.assertTrue('the Federal-Reserve-Board' in nps)
     self.assertTrue('a weekly-average basis' in nps)
     self.assertTrue('Wednesday October 4' in nps)

示例#10

0

显示文件

文件： number_test.py 项目： marbles-ai/ie

 def test2_Date_00_1228(self):
     text = r"The reduced dividend is payable Jan. 2 to stock of record Dec. 15"
     etext = r"The reduced dividend is payable Jan. 2 to stock of record Dec. 15"
     mtext = preprocess_sentence(text)
     self.assertEqual(etext, mtext)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     fnps = sentence.get_np_nominals()
     nps = [sp.text for r, sp in fnps]
     self.assertTrue('The reduced dividend' in nps)
     self.assertTrue('payable' in nps)
     self.assertTrue('Jan. 2' in nps)
     self.assertTrue('Dec. 15' in nps)
     self.assertTrue('stock' in nps)
     self.assertTrue('record' in nps)

示例#11

0

显示文件

 def test4_ApposInterrupt(self):
     text = r"Bell, a telecommunications company, which is located in Los Angeles, makes and distributes electronics, computers, and building products"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET|CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.get_np_nominals()
     phrases = [sp.text for r, sp in f]
     self.assertTrue('Bell' in phrases)
     self.assertTrue('a telecommunications company' in phrases)
     np1 = filter(lambda x: 'Bell' == x[1].text, f)[0]
     np2 = filter(lambda x: 'a telecommunications company' == x[1].text, f)[0]
     X = np1[0]
     Y = np2[0]
     self.assertNotEqual(X, Y)
     self.assertTrue(d.find_condition(Rel('_AKA', [X, Y])) is not None)
     self.assertTrue(len(repr(d).split('_AKA')) == 2)

示例#12

0

显示文件

 def test3_ApposInterrupt(self):
     text = r"Robbie, a hot-tempered tennis player, charged the umpire and tried to crack the poor man's skull with a racket."
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET|CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.get_np_nominals()
     phrases = [sp.text for r, sp in f]
     self.assertTrue('Robbie' in phrases)
     self.assertTrue('a hot-tempered tennis player' in phrases)
     robbie = filter(lambda x: 'Robbie' == x[1].text, f)[0]
     temper = filter(lambda x: 'a hot-tempered tennis player' == x[1].text, f)[0]
     X = robbie[0]
     Y = temper[0]
     self.assertNotEqual(X, Y)
     self.assertTrue(d.find_condition(Rel('_AKA', [X, Y])) is not None)
     self.assertTrue(len(repr(d).split('_AKA')) == 2)

示例#13

0

显示文件

 def test2_ApposInterrupt(self):
     text = r"Reliable, Diane's eleven-year-old beagle, chews holes in the living room carpeting as if he were still a puppy."
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET|CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.get_np_nominals()
     phrases = [sp.text for r, sp in f]
     self.assertTrue('Reliable' in phrases)
     self.assertTrue("eleven-year-old beagle" in phrases)
     self.assertTrue("Diane" in phrases)
     dog = filter(lambda x: 'Reliable' == x[1].text, f)[0]
     breed = filter(lambda x: "eleven-year-old beagle" == x[1].text, f)[0]
     X = dog[0]
     Y = breed[0]
     self.assertNotEqual(X, Y)
     self.assertTrue(d.find_condition(Rel('_AKA', [X, Y])) is not None)
     self.assertTrue(len(repr(d).split('_AKA')) == 2)

示例#14

0

显示文件

文件： number_test.py 项目： marbles-ai/ie

 def test1_Currency_00_0195(self):
     text = r"On the other hand, had it existed then, Cray Computer would have incurred a $20.5 million loss."
     etext = r"On the other hand , had it existed then , Cray Computer would have incurred a $ 20.5 million loss ."
     mtext = preprocess_sentence(text)
     self.assertEqual(etext, mtext)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     fnps = sentence.get_np_nominals()
     nps = [sp.text for r, sp in fnps]
     self.assertTrue('the other hand' in nps)
     self.assertTrue('Cray-Computer' in nps)
     self.assertTrue('$ 20.5 million' in nps)
     fvps = sentence.get_vp_nominals()
     vps = [sp.text for r, sp in fvps]
     self.assertTrue('had' in vps)
     self.assertTrue('existed' in vps)
     self.assertTrue('would have incurred' in vps)

示例#15

0

显示文件

文件： passive_test.py 项目： marbles-ai/ie

 def test10_Brutus(self):
     text = "Ceasar was stabbed by Brutus"
     derivation = grpc.ccg_parse(self.stub, text, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     fnps = sentence.get_np_nominals()
     nps = [sp.text for r, sp in fnps]
     #self.assertTrue('Average maturity' in nps)
     self.assertTrue('Brutus' in nps)
     self.assertTrue('Ceasar' in nps)
     fvps = sentence.get_vp_nominals()
     vps = [sp.text for r, sp in fvps]
     self.assertTrue('was stabbed' in vps)
     E = filter(lambda x: x[1].text == "was stabbed", fvps)[0][0]
     A1 = filter(lambda x: x[1].text == "Brutus", fnps)[0][0]
     A0 = filter(lambda x: x[1].text == "Ceasar", fnps)[0][0]
     self.assertTrue(d.find_condition(Rel('_ARG0', [E, A0])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG1', [E, A1])) is not None)

示例#16

0

显示文件

文件： gold_constituent_test.py 项目： marbles-ai/ie

 def test1_EasySRL_BoyGirl2(self):
     txt = r'''(<T S[dcl] 1 2> (<T NP 0 2> (<L NP/N DT DT The NP/N>) (<L N NN NN boy N>) ) (<T S[dcl]\NP 0 2>
     (<L (S[dcl]\NP)/(S[b]\NP) MD MD will (S[dcl]\NP)/(S[b]\NP)>) (<T S[b]\NP 0 2>
     (<L (S[b]\NP)/(S[to]\NP) VB VB want (S[b]\NP)/(S[to]\NP)>) (<T S[to]\NP 0 2>
     (<L (S[to]\NP)/(S[b]\NP) TO TO to (S[to]\NP)/(S[b]\NP)>) (<T S[b]\NP 0 2>
     (<L (S[b]\NP)/NP VB VB believe (S[b]\NP)/NP>) (<T NP 0 2> (<L NP/N DT DT the NP/N>)
     (<L N NN NN girl N>) ) ) ) ) ) )'''
     pt = parse_ccg_derivation(txt)
     self.assertIsNotNone(pt)
     s = sentence_from_pt(pt)
     dprint(s)
     ccg = Ccg2Drs(CO_VERIFY_SIGNATURES | CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     ccg.build_execution_sequence(pt)
     ccg.create_drs()
     ccg.resolve_proper_names()
     ccg.final_rename()
     d = ccg.get_drs()
     s = d.show(SHOW_LINEAR)
     dprint(s)
     x = '[X1,E2,E3,X4| boy(X1),will(E2),_MODAL(E2),want(E2),_EVENT(E2),_ARG0(E2,X1),_ARG1(E2,E3),believe(E3),_EVENT(E3),_ARG0(E3,X1),_ARG1(E3,X4),girl(X4)]'
     self.assertEqual(x, s)
     a = get_constituents_string_list(ccg)
     dprint('\n'.join(a))
     x = [
         'S(The boy #will want to believe the girl)',
         'NP(#The boy)',
         'S_INF(#want to believe the girl)',
         'S_INF(#to believe the girl)',
         'S_INF(#believe the girl)',
         'NP(#the girl)'
     ]
     self.assertListEqual(x, a)
     s = get_constituent_string(ccg.get_verbnet_sentence())
     self.assertEqual('NP(#The boy) VP(#will want) S_INF(#to believe) NP(#the girl)', s)

示例#17

0

显示文件

文件： neuralccg_constituent_test.py 项目： marbles-ai/ie

 def test1_PP_Attachment(self):
     # NCCG get the PP attachment wrong
     txt = "Eat spaghetti with meatballs"
     derivation = grpc.ccg_parse(self.stub, txt, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     self.assertIsNotNone(pt)
     s = sentence_from_pt(pt)
     dprint(s)
     sent = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sent.get_drs()
     s = d.show(SHOW_LINEAR)
     dprint(s)
     a = get_constituents_string_list(sent)
     dprint('\n'.join(a))
     x = [
         'S_INF(#Eat spaghetti with meatballs)',  # 0
         'NP(#spaghetti)',  # 1
         'NP(#meatballs)',  # 2
     ]
     self.assertListEqual(x, a)
     x = (0, [(1, []), (2, [])])
     a = sent.get_constituent_tree()
     dprint_constituent_tree(sent, a)
     self.assertEqual(repr(x), repr(a))
     vsent = get_constituent_string(sent.get_verbnet_sentence())
     self.assertEqual('S_INF(#Eat with) NP(#spaghetti) NP(#meatballs)',
                      vsent)

示例#18

0

显示文件

 def test10_Ccgbank_00_0036(self):
     text = "Average maturity of the funds' investments lengthened by a day to 41 days, the longest since early August, according to Donoghue's."
     etext = "Average maturity of the funds ' investments lengthened by a day to 41 days , the longest since early August , according to Donoghue 's ."
     mtext = preprocess_sentence(text)
     self.assertEqual(etext, mtext)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     fnps = sentence.get_np_nominals()
     nps = [sp.text for r, sp in fnps]
     #self.assertTrue('Average maturity' in nps)
     self.assertTrue('the funds' in nps)
     self.assertTrue('a day' in nps)
     self.assertTrue('41 days' in nps)
     self.assertTrue('the longest' in nps)
     self.assertTrue('early August' in nps)
     fvps = sentence.get_vp_nominals()
     vps = [sp.text for r, sp in fvps]
     self.assertTrue('lengthened' in vps)
     self.assertTrue('according' in vps)

示例#19

0

显示文件

文件： conj_test.py 项目： marbles-ai/ie

 def test04_AndOfVerb(self):
     text = "Bell makes and distributes computers"
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.select_phrases(RT_PROPERNAME | RT_ENTITY | RT_EVENT)
     phrases = [sp.text for r, sp in f.iteritems()]
     self.assertTrue('Bell' in phrases)
     self.assertTrue('makes distributes' in phrases)
     self.assertTrue('computers' in phrases)
     verb1 = filter(lambda x: 'makes distributes' == x[1].text,
                    f.iteritems())[0]
     agent = filter(lambda x: 'Bell' == x[1].text, f.iteritems())[0]
     theme = filter(lambda x: 'computers' == x[1].text, f.iteritems())[0]
     X1 = agent[0]
     X2 = theme[0]
     E1 = verb1[0]
     self.assertTrue(d.find_condition(Rel('_EVENT', [E1])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG0', [E1, X1])) is not None)
     self.assertTrue(d.find_condition(Rel('_ARG1', [E1, X2])) is not None)

示例#20

0

显示文件

文件： neuralccg_constituent_test.py 项目： marbles-ai/ie

 def test2_Wsj_0056_1(self):
     # RAW 1043
     txt = '''@'''
     derivation = grpc.ccg_parse(self.stub, txt, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     self.assertIsNotNone(pt)
     s = sentence_from_pt(pt)
     dprint(s)
     sent = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sent.get_drs()
     s = d.show(SHOW_LINEAR)
     dprint(s)
     a = get_constituents_string_list(sent)
     dprint('\n'.join(a))
     x = ['S(#@)']
     self.assertListEqual(x, a)

示例#21

0

显示文件

 def test10_Ccgbank_00_0099(self):
     text = "Plans that give advertisers discounts for maintaining or increasing ad spending have become permanent fixtures at the news weeklies and underscore the fierce competition between Newsweek, Time Warner Inc.'s Time magazine, and Mortimer B. Zuckerman's U.S. News & World Report."
     mtext = preprocess_sentence(text)
     derivation = grpc.ccg_parse(self.stub, mtext, grpc.DEFAULT_SESSION)
     pt = parse_ccg_derivation(derivation)
     sentence = process_ccg_pt(pt, CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     d = sentence.get_drs()
     dprint(pt_to_ccg_derivation(pt))
     dprint(d)
     f = sentence.get_np_nominals()
     phrases = [sp.text for r, sp in f]
     self.assertTrue('Plans' in phrases)
     self.assertTrue('advertisers' in phrases)
     self.assertTrue('discounts' in phrases)
     self.assertTrue('ad spending' in phrases)
     self.assertTrue('permanent fixtures' in phrases)
     self.assertTrue('the news weeklies' in phrases)
     self.assertTrue('the fierce competition' in phrases)
     self.assertTrue("Newsweek" in phrases)
     self.assertTrue("Time-Warner-Inc." in phrases)
     self.assertTrue("Time-magazine" in phrases)
     self.assertTrue("Mortimer-B.-Zuckerman" in phrases)
     self.assertTrue("U.S.-News-&-World-Report" in phrases)
     vf = sentence.get_vp_nominals()
     vphrases = [sp.text for r, sp in vf]
     self.assertTrue('give' in vphrases)
     self.assertTrue('maintaining increasing' in vphrases)
     self.assertTrue('have become' in vphrases)
     self.assertTrue('underscore' in vphrases)
     give = filter(lambda x: 'give' == x[1].text, vf)[0][0]
     become = filter(lambda x: 'have become' == x[1].text, vf)[0][0]
     uscore = filter(lambda x: 'underscore' == x[1].text, vf)[0][0]
     minc = filter(lambda x: 'maintaining increasing' == x[1].text,
                   vf)[0][0]
     plans = filter(lambda x: 'Plans' == x[1].text, f)[0][0]
     advertisers = filter(lambda x: 'advertisers' == x[1].text, f)[0][0]
     discounts = filter(lambda x: 'discounts' == x[1].text, f)[0][0]
     spending = filter(lambda x: 'ad spending' == x[1].text, f)[0][0]
     fixtures = filter(lambda x: 'permanent fixtures' == x[1].text, f)[0][0]
     weeklies = filter(lambda x: 'the news weeklies' == x[1].text, f)[0][0]
     timeinc = filter(lambda x: 'Time-Warner-Inc.' == x[1].text, f)[0][0]
     timemag = filter(lambda x: 'Time-magazine' == x[1].text, f)[0][0]
     mortimer = filter(lambda x: 'Mortimer-B.-Zuckerman' == x[1].text,
                       f)[0][0]
     uswr = filter(lambda x: 'U.S.-News-&-World-Report' == x[1].text,
                   f)[0][0]
     self.assertTrue(
         d.find_condition(Rel('_ARG0', [give, plans])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG1', [give, advertisers])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG2', [give, discounts])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG0', [minc, plans])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG1', [minc, spending])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG0', [become, plans])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_ARG1', [become, fixtures])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_POSS', [mortimer, uswr])) is not None)
     self.assertTrue(
         d.find_condition(Rel('_POSS', [timeinc, timemag])) is not None)

示例#22

0

显示文件

文件： ccg_test.py 项目： marbles-ai/ie

    def test7_RuleUniquenessLDC(self):
        allfiles = []
        projdir = os.path.dirname(
            os.path.dirname(
                os.path.dirname(
                    os.path.dirname(
                        os.path.dirname(
                            os.path.dirname(os.path.dirname(__file__)))))))
        ldcpath = os.path.join(projdir, 'data', 'ldc', 'ccgbank_1_1', 'data',
                               'AUTO')
        dirlist1 = os.listdir(ldcpath)
        for dir1 in dirlist1:
            ldcpath1 = os.path.join(ldcpath, dir1)
            if os.path.isdir(ldcpath1):
                dirlist2 = os.listdir(ldcpath1)
                for dir2 in dirlist2:
                    ldcpath2 = os.path.join(ldcpath1, dir2)
                    if os.path.isfile(ldcpath2):
                        allfiles.append(ldcpath2)

        failed_parse = 0
        ambiguous = []
        for fn in allfiles:
            with open(fn, 'r') as fd:
                lines = fd.readlines()
            for hdr, ccgbank in zip(lines[0::10], lines[1::10]):
                dprint(hdr.strip())
                ccg = Ccg2Drs()
                try:
                    pt = parse_ccg_derivation(ccgbank)
                    ccg.build_execution_sequence(pt)
                except Exception:
                    failed_parse += 1
                    continue
                self.assertIsNotNone(pt)

                for op in ccg.exeque:
                    if isinstance(op, PushOp):
                        continue
                    self.assertIsInstance(op, ExecOp)
                    left = op.sub_ops[0].category
                    result = op.category
                    if len(op.sub_ops) == 2:
                        right = op.sub_ops[1].category
                    else:
                        right = CAT_EMPTY

                    exclude = []
                    # Should not have ambiguity
                    rule = get_rule(left, right, result, exclude)
                    limit = 5
                    rstr = ''
                    while rule is not None:
                        rstr += repr(rule) + '|'
                        rule = get_rule(left, right, result, exclude)
                        limit -= 1
                        if limit == 0:
                            rule = get_rule(left, right, result, exclude)
                            break
                    if len(exclude) > 1:
                        ambiguous.append(
                            ('%s <- %s <{%s}> %s' %
                             (result, left, rstr, right), exclude))
                    self.assertGreater(limit, 0)

        for x in ambiguous:
            dprint('ambiguous rule: %s {%s}' % x)
        self.assertTrue(len(ambiguous) == 0)

示例#23

0

显示文件

文件： gold_constituent_test.py 项目： marbles-ai/ie

    def test2_GOLD_Wsj0051_13(self):
        txt = r'''
(<T S[dcl] 0 2> 
  (<T S[dcl] 1 2> 
    (<T NP 1 2> 
      (<L NP[nb]/N DT DT The NP[nb]_273/N_273>) 
      (<L N NNS NNS bids N>) 
    ) 
    (<T S[dcl]\NP 1 2> 
      (<T (S\NP)/(S\NP) 1 2> 
        (<L , , , , ,>) 
        (<T (S\NP)/(S\NP) 0 2> 
          (<T S[dcl]/S[dcl] 1 2> 
            (<T S/(S\NP) 0 1> 
              (<L NP PRP PRP he NP>) 
            ) 
            (<L (S[dcl]\NP)/S[dcl] VBD VBD added (S[dcl]\NP_242)/S[dcl]_243>) 
          ) 
          (<L , , , , ,>) 
        ) 
      ) 
      (<T S[dcl]\NP 0 2> 
        (<L (S[dcl]\NP)/(S[adj]\NP) VBD VBD were (S[dcl]\NP_211)/(S[adj]_212\NP_211:B)_212>) 
        (<T S[adj]\NP 0 2> 
          (<L (S[adj]\NP)/PP JJ JJ contrary (S[adj]\NP_219)/PP_220>) 
          (<T PP 0 2> 
            (<L PP/NP TO TO to PP/NP_225>) 
            (<T NP 0 1> 
              (<T N 1 2> 
                (<L N/N JJ JJ common N_234/N_234>) 
                (<L N NN NN sense N>) 
              ) 
            ) 
          ) 
        ) 
      ) 
    ) 
  ) 
  (<L . . . . .>) 
) 
'''
        pt = parse_ccg_derivation(txt)
        s = sentence_from_pt(pt)
        dprint(s)
        self.assertIsNotNone(pt)
        ccg = Ccg2Drs(CO_VERIFY_SIGNATURES | CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
        ccg.build_execution_sequence(pt)
        ccg.create_drs()
        ccg.final_rename()
        d = ccg.get_drs()
        s = d.show(SHOW_LINEAR)
        dprint(s)
        sent = ccg.get_verbnet_sentence()
        a = get_constituents_string_list(sent)
        x = [
            'NP(The #bids)',
            'ADVP(he #added)',
            'VP(#were)',
            'ADJP(#contrary to common sense)',
            'PP(#to)',
            'NP(common #sense)'
        ]
        dprint('\n'.join(a))
        self.assertListEqual(x, a)

示例#24

0

显示文件

文件： gold_constituent_test.py 项目： marbles-ai/ie

 def test2_GOLD_Wsj0003_1(self):
     # A form of asbestos once used to make Kent cigarette filters has caused a high percentage of cancer deaths
     # among a group of workers exposed to it more than 30 years ago, researchers reported.
     # ID=wsj_0003.1 PARSER=GOLD NUMPARSE=1
     # (<T S[dcl] 0 2>
     #   (<T S[dcl] 1 2>
     #       (<T S[dcl] 1 2>
     #           (<T NP 0 2>
     #               (<T NP 0 2>
     #                   (<T NP 1 2>
     #                       (<L NP[nb]/N DT DT A NP[nb]_166/N_166>)
     #                       (<L N NN NN form N>)
     #                   )
     #                   (<T NP\NP 0 2>
     #                       (<L (NP\NP)/NP IN IN of (NP_174\NP_174)/NP_175>)
     #                       (<T NP 0 1>
     #                           (<L N NN NN asbestos N>)
     #                       )
     #                   )
     #               )
     #               (<T NP\NP 0 1>
     #                   (<T S[pss]\NP 1 2>
     #                       (<L (S\NP)/(S\NP) RB RB once (S_235\NP_230)_235/(S_235\NP_230)_235>)
     #                       (<T S[pss]\NP 0 2>
     #                           (<L (S[pss]\NP)/(S[to]\NP) VBN VBN used (S[pss]\NP_187)/(S[to]_188\NP_187:B)_188>)
     #                           (<T S[to]\NP 0 2>
     #                               (<L (S[to]\NP)/(S[b]\NP) TO TO to (S[to]\NP_197)/(S[b]_198\NP_197:B)_198>)
     #                               (<T S[b]\NP 0 2>
     #                                   (<L (S[b]\NP)/NP VB VB make (S[b]\NP_205)/NP_206>)
     #                                   (<T NP 0 1>
     #                                       (<T N 1 2>
     #                                           (<L N/N NNP NNP Kent N_222/N_222>)
     #                                           (<T N 1 2>
     #                                               (<L N/N NN NN cigarette N_215/N_215>)
     #                                               (<L N NNS NNS filters N>)
     #                                           )
     #                                       )
     #                                   )
     #                               )
     #                           )
     #                       )
     #                   )
     #               )
     #           )
     #           (<T S[dcl]\NP 0 2>
     #               (<L (S[dcl]\NP)/(S[pt]\NP) VBZ VBZ has (S[dcl]\NP_23)/(S[pt]_24\NP_23:B)_24>)
     #               (<T S[pt]\NP 0 2>
     #                   (<L (S[pt]\NP)/NP VBN VBN caused (S[pt]\NP_31)/NP_32>)
     #                       (<T NP 0 2>
     #                           (<T NP 0 2>
     #                               (<T NP 1 2>
     #                                   (<L NP[nb]/N DT DT a NP[nb]_46/N_46>)
     #                                   (<T N 1 2>
     #                                       (<L N/N JJ JJ high N_41/N_41>)
     #                                       (<L N NN NN percentage N>)
     #                                   )
     #                               )
     #                               (<T NP\NP 0 2>
     #                                   (<L (NP\NP)/NP IN IN of (NP_54\NP_54)/NP_55>)
     #                                   (<T NP 0 1>
     #                                       (<T N 1 2>
     #                                           (<L N/N NN NN cancer N_64/N_64>)
     #                                           (<L N NNS NNS deaths N>)
     #                                       )
     #                                   )
     #                               )
     #                           )
     #                           (<T NP\NP 0 2>
     #                               (<L (NP\NP)/NP IN IN among (NP_73\NP_73)/NP_74>)
     #                               (<T NP 0 2>
     #                                   (<T NP 1 2>
     #                                       (<L NP[nb]/N DT DT a NP[nb]_81/N_81>)
     #                                       (<L N NN NN group N>)
     #                                   )
     #                                   (<T NP\NP 0 2>
     #                                       (<L (NP\NP)/NP IN IN of (NP_89\NP_89)/NP_90>)
     #                                       (<T NP 0 2>
     #                                           (<T NP 0 1>
     #                                               (<L N NNS NNS workers N>)
     #                                           )
     #                                           (<T NP\NP 0 1>
     #                                               (<T S[pss]\NP 0 2>
     #                                                   (<T S[pss]\NP 0 2>
     #                                                       (<L (S[pss]\NP)/PP VBN VBN exposed (S[pss]\NP_100)/PP_101>)
     #                                                       (<T PP 0 2>
     #                                                           (<L PP/NP TO TO to PP/NP_106>)
     #                                                           (<L NP PRP PRP it NP>)
     #                                                       )
     #                                                   )
     #                                                   (<T (S\NP)\(S\NP) 1 2>
     #                                                       (<T NP 0 1>
     #                                                           (<T N 1 2>
     #                                                               (<T N/N 1 2>
     #                                                                   (<T (N/N)/(N/N) 1 2>
     #                                                                       (<L S[adj]\NP RBR RBR more S[adj]\NP_153>)
     #                                                                       (<L ((N/N)/(N/N))\(S[adj]\NP) IN IN than ((N_147/N_139)_147/(N_147/N_139)_147)\(S[adj]_148\NP_142)_148>)
     #                                                                   )
     #                                                                   (<L N/N CD CD 30 N_131/N_131>)
     #                                                               )
     #                                                               (<L N NNS NNS years N>)
     #                                                           )
     #                                                       )
     #                                                       (<L ((S\NP)\(S\NP))\NP IN IN ago ((S_121\NP_116)_121\(S_121\NP_116)_121)\NP_122>)
     #                                                   )
     #                                               )
     #                                           )
     #                                       )
     #                                   )
     #                               )
     #                           )
     #                       )
     #                   )
     #               )
     #           )
     #           (<T S[dcl]\S[dcl] 1 2>
     #               (<L , , , , ,>)
     #               (<T S[dcl]\S[dcl] 1 2>
     #                   (<T NP 0 1>
     #                       (<L N NNS NNS researchers N>)
     #                   )
     #                   (<L (S[dcl]\S[dcl])\NP VBD VBD reported (S[dcl]\S[dcl]_8)\NP_9>)
     #               )
     #           )
     #       )
     #       (<L . . . . .>)
     #   )
     txt = r'''(<T S[dcl] 0 2> (<T S[dcl] 1 2> (<T S[dcl] 1 2> (<T NP 0 2> (<T NP 0 2> (<T NP 1 2>
     (<L NP[nb]/N DT DT A NP[nb]_166/N_166>) (<L N NN NN form N>) ) (<T NP\NP 0 2>
     (<L (NP\NP)/NP IN IN of (NP_174\NP_174)/NP_175>) (<T NP 0 1> (<L N NN NN asbestos N>) ) ) ) (<T NP\NP 0 1>
     (<T S[pss]\NP 1 2> (<L (S\NP)/(S\NP) RB RB once (S_235\NP_230)_235/(S_235\NP_230)_235>) (<T S[pss]\NP 0 2>
     (<L (S[pss]\NP)/(S[to]\NP) VBN VBN used (S[pss]\NP_187)/(S[to]_188\NP_187:B)_188>) (<T S[to]\NP 0 2>
     (<L (S[to]\NP)/(S[b]\NP) TO TO to (S[to]\NP_197)/(S[b]_198\NP_197:B)_198>) (<T S[b]\NP 0 2>
     (<L (S[b]\NP)/NP VB VB make (S[b]\NP_205)/NP_206>) (<T NP 0 1> (<T N 1 2> (<L N/N NNP NNP Kent N_222/N_222>)
     (<T N 1 2> (<L N/N NN NN cigarette N_215/N_215>) (<L N NNS NNS filters N>) ) ) ) ) ) ) ) ) ) (<T S[dcl]\NP 0 2>
     (<L (S[dcl]\NP)/(S[pt]\NP) VBZ VBZ has (S[dcl]\NP_23)/(S[pt]_24\NP_23:B)_24>) (<T S[pt]\NP 0 2>
     (<L (S[pt]\NP)/NP VBN VBN caused (S[pt]\NP_31)/NP_32>) (<T NP 0 2> (<T NP 0 2> (<T NP 1 2>
     (<L NP[nb]/N DT DT a NP[nb]_46/N_46>) (<T N 1 2> (<L N/N JJ JJ high N_41/N_41>) (<L N NN NN percentage N>) ) )
     (<T NP\NP 0 2> (<L (NP\NP)/NP IN IN of (NP_54\NP_54)/NP_55>) (<T NP 0 1> (<T N 1 2>
     (<L N/N NN NN cancer N_64/N_64>) (<L N NNS NNS deaths N>) ) ) ) ) (<T NP\NP 0 2>
     (<L (NP\NP)/NP IN IN among (NP_73\NP_73)/NP_74>) (<T NP 0 2> (<T NP 1 2> (<L NP[nb]/N DT DT a NP[nb]_81/N_81>)
     (<L N NN NN group N>) ) (<T NP\NP 0 2> (<L (NP\NP)/NP IN IN of (NP_89\NP_89)/NP_90>) (<T NP 0 2> (<T NP 0 1>
     (<L N NNS NNS workers N>) ) (<T NP\NP 0 1> (<T S[pss]\NP 0 2> (<T S[pss]\NP 0 2>
     (<L (S[pss]\NP)/PP VBN VBN exposed (S[pss]\NP_100)/PP_101>) (<T PP 0 2> (<L PP/NP TO TO to PP/NP_106>)
     (<L NP PRP PRP it NP>) ) ) (<T (S\NP)\(S\NP) 1 2> (<T NP 0 1> (<T N 1 2> (<T N/N 1 2> (<T (N/N)/(N/N) 1 2>
     (<L S[adj]\NP RBR RBR more S[adj]\NP_153>)
     (<L ((N/N)/(N/N))\(S[adj]\NP) IN IN than ((N_147/N_139)_147/(N_147/N_139)_147)\(S[adj]_148\NP_142)_148>) )
     (<L N/N CD CD 30 N_131/N_131>) ) (<L N NNS NNS years N>) ) )
     (<L ((S\NP)\(S\NP))\NP IN IN ago ((S_121\NP_116)_121\(S_121\NP_116)_121)\NP_122>) ) ) ) ) ) ) ) ) ) ) )
     (<T S[dcl]\S[dcl] 1 2> (<L , , , , ,>) (<T S[dcl]\S[dcl] 1 2> (<T NP 0 1> (<L N NNS NNS researchers N>) )
     (<L (S[dcl]\S[dcl])\NP VBD VBD reported (S[dcl]\S[dcl]_8)\NP_9>) ) ) ) (<L . . . . .>) )'''
     pt = parse_ccg_derivation(txt)
     s = sentence_from_pt(pt)
     dprint(s)
     self.assertIsNotNone(pt)
     ccg = Ccg2Drs(CO_VERIFY_SIGNATURES | CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     ccg.build_execution_sequence(pt)
     ccg.create_drs()
     ccg.final_rename()
     d = ccg.get_drs()
     s = d.show(SHOW_LINEAR)
     dprint(s)
     sent = ccg.get_verbnet_sentence()
     a = get_constituents_string_list(sent)
     x = [
         'NP(A #form)',              # 0
         'PP(#of)',                  # 1
         'NP(#asbestos)',            # 2
         'ADVP(once #used to make Kent cigarette filters)',   # 3
         'S_INF(#to make)',          # 4
         'NP(Kent cigarette #filters)',  # 5
         'VP(#has caused)',          # 6
         'NP(a high #percentage)',   # 7
         'PP(#of)',                  # 8
         'NP(cancer #deaths)',       # 9
         'PP(#among)',               #10
         'NP(a #group)',             #11
         'PP(#of)',                  #12
         'NP(#workers)',             #13
         'ADVP(#exposed to it more than 30 years ago)',  #14
         'NP(more than 30 #years)',  #15
         'NP(#researchers)',         #16
         'VP(#reported)',            #17
     ]
     dprint('\n'.join(a))
     self.assertListEqual(x, a)
     # 17 VP(reported.)
     #    06 VP(has caused)
     #       00 NP(A form)
     #          01 PP(of)
     #             02 NP(asbestos)
     #          03 ADVP(once used to make Kent cigarette filters)
     #             04 S_INF(to make)
     #                05 NP(Kent cigarette filters)
     #       07 NP(a high percentage)
     #          08 PP(of)
     #             09 NP(cancer deaths)
     #          10 PP(among)
     #             11 NP(a group)
     #                12 PP(of)
     #                   13 NP(workers)
     #                      14 ADVP(exposed to it more than 30 years ago)
     #                         15 NP(more than 30 years)
     #    16 NP(reserchers)
     x = (17, [(6, [(0, [(1, [(2, [])]), (3, [(4, [(5, [])])])]), (7, [(8, [(9, [])]), (10, [(11, [(12, [(13, [(14, [(15, [])])])])])])])]), (16, [])])
     a = sent.get_constituent_tree()
     dprint_constituent_tree(sent, a)
     self.assertEqual(repr(x), repr(a))

示例#25

0

显示文件

文件： gold_constituent_test.py 项目： marbles-ai/ie

    def test2_GOLD_Wsj0001_2(self):
        # Mr. Vinken is chairman of Elsevier N.V. , the Dutch publishing group .
        #
        # PARG
        # 1      0      N/N             1      Vinken Mr.
        # 1      2      (S[dcl]\NP)/NP  1      Vinken is
        # 3      2      (S[dcl]\NP)/NP  2      chairman is
        # 3      4      (NP\NP)/NP      1      chairman of
        # 6      4      (NP\NP)/NP      2      N.V. of
        # 6      5      N/N             1      N.V. Elsevier
        # 11     4      (NP\NP)/NP      2      group of
        # 11     8      NP[nb]/N        1      group the
        # 11     9      N/N             1      group Dutch
        # 11     10     N/N             1      group publishing
        txt = r'''
(<T S[dcl] 0 2>
    (<T S[dcl] 1 2>
        (<T NP 0 1>
            (<T N 1 2>
                (<L N/N NNP NNP Mr. N_142/N_142>)
                (<L N NNP NNP Vinken N>)
            )
        )
        (<T S[dcl]\NP 0 2>
            (<L (S[dcl]\NP)/NP VBZ VBZ is (S[dcl]\NP_87)/NP_88>)
            (<T NP 0 2>
                (<T NP 0 1>
                    (<L N NN NN chairman N>)
                )
                (<T NP\NP 0 2>
                    (<L (NP\NP)/NP IN IN of (NP_99\NP_99)/NP_100>)
                    (<T NP 0 2>
                        (<T NP 0 1>
                            (<T N 1 2>
                                (<L N/N NNP NNP Elsevier N_109/N_109>)
                                (<L N NNP NNP N.V. N>)
                            )
                        )
                        (<T NP[conj] 1 2>
                            (<L , , , , ,>)
                            (<T NP 1 2>
                                (<L NP[nb]/N DT DT the NP[nb]_131/N_131>)
                                (<T N 1 2>
                                    (<L N/N NNP NNP Dutch N_126/N_126>)
                                    (<T N 1 2>
                                        (<L N/N VBG VBG publishing N_119/N_119>)
                                        (<L N NN NN group N>)
                                    )
                                )
                            )
                        )
                    )
                )
            )
        )
    )
    (<L . . . . .>)
)'''
        pt = parse_ccg_derivation(txt)
        s = sentence_from_pt(pt)
        dprint(s)
        self.assertIsNotNone(pt)
        ccg = Ccg2Drs(CO_VERIFY_SIGNATURES | CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
        ccg.build_execution_sequence(pt)
        ccg.create_drs()
        ccg.resolve_proper_names()
        ccg.final_rename()
        d = ccg.get_drs()
        s = d.show(SHOW_LINEAR)
        dprint(s)
        sent = ccg.get_verbnet_sentence()
        a = get_constituents_string_list(sent)
        x = [
            'NP(#Mr.-Vinken)',
            'VP(#is)',
            'NP(#chairman)',
            'PP(#of)',
            'NP(#Elsevier-N.V.)',
            'NP(the Dutch publishing #group)',
        ]
        dprint('\n'.join(a))
        self.assertListEqual(x, a)
        # 01 VP(is)
        #    00 NP(Mr.-Vinken)
        #    02 NP(chairman)
        #       03 PP(of Elsevier N.V. the Dutch publishing group)
        #          04 NP(Elsevier N.V.)
        #             05 NP(the Dutch publishing group)
        x = (1, [(0, []), (2, [(3, [(4, [(5, [])])])])])
        a = sent.get_constituent_tree()
        dprint_constituent_tree(sent, a)
        self.assertEqual(repr(x), repr(a))

示例#26

0

显示文件

文件： gold_constituent_test.py 项目： marbles-ai/ie

 def test2_GOLD_Wsj0001_1(self):
     # ID=wsj_0001.1 PARSER=GOLD NUMPARSE=1
     # Pierre Vinken, 61 years old, will join the board as a nonexecutive director Nov 29.
     # (<T S[dcl] 0 2>
     #   (<T S[dcl] 1 2>
     #       (<T NP 0 2>
     #           (<T NP 0 2>
     #               (<T NP 0 2>
     #                   (<T NP 0 1>
     #                       (<T N 1 2>
     #                           (<L N/N NNP NNP Pierre N_73/N_73>)
     #                           (<L N NNP NNP Vinken N>)
     #                       )
     #                   )
     #                   (<L , , , , ,>)
     #               )
     #               (<T NP\NP 0 1>
     #                   (<T S[adj]\NP 1 2>
     #                       (<T NP 0 1>
     #                           (<T N 1 2>
     #                               (<L N/N CD CD 61 N_93/N_93>)
     #                               (<L N NNS NNS years N>)
     #                           )
     #                       )
     #                       (<L (S[adj]\NP)\NP JJ JJ old (S[adj]\NP_83)\NP_84>)
     #                   )
     #               )
     #           )
     #           (<L , , , , ,>)
     #       )
     #       (<T S[dcl]\NP 0 2>
     #           (<L (S[dcl]\NP)/(S[b]\NP) MD MD will (S[dcl]\NP_10)/(S[b]_11\NP_10:B)_11>)
     #           (<T S[b]\NP 0 2>
     #               (<T S[b]\NP 0 2>
     #                   (<T (S[b]\NP)/PP 0 2>
     #                       (<L ((S[b]\NP)/PP)/NP VB VB join ((S[b]\NP_20)/PP_21)/NP_22>)
     #                       (<T NP 1 2>
     #                           (<L NP[nb]/N DT DT the NP[nb]_29/N_29>)
     #                           (<L N NN NN board N>)
     #                       )
     #                   )
     #                   (<T PP 0 2>
     #                       (<L PP/NP IN IN as PP/NP_34>)
     #                       (<T NP 1 2>
     #                           (<L NP[nb]/N DT DT a NP[nb]_48/N_48>)
     #                           (<T N 1 2>
     #                               (<L N/N JJ JJ nonexecutive N_43/N_43>)
     #                               (<L N NN NN director N>)
     #                           )
     #                       )
     #                   )
     #               )
     #               (<T (S\NP)\(S\NP) 0 2>
     #                   (<L ((S\NP)\(S\NP))/N[num] NNP NNP Nov. ((S_61\NP_56)_61\(S_61\NP_56)_61)/N[num]_62>)
     #                   (<L N[num] CD CD 29 N[num]>)
     #               )
     #           )
     #       )
     #   )
     #   (<L . . . . .>)
     # )
     txt = r'''(<T S[dcl] 0 2> (<T S[dcl] 1 2> (<T NP 0 2> (<T NP 0 2> (<T NP 0 2> (<T NP 0 1> (<T N 1 2>
         (<L N/N NNP NNP Pierre N_73/N_73>) (<L N NNP NNP Vinken N>) ) ) (<L , , , , ,>) ) (<T NP\NP 0 1>
         (<T S[adj]\NP 1 2> (<T NP 0 1> (<T N 1 2> (<L N/N CD CD 61 N_93/N_93>) (<L N NNS NNS years N>) ) )
         (<L (S[adj]\NP)\NP JJ JJ old (S[adj]\NP_83)\NP_84>) ) ) ) (<L , , , , ,>) ) (<T S[dcl]\NP 0 2>
         (<L (S[dcl]\NP)/(S[b]\NP) MD MD will (S[dcl]\NP_10)/(S[b]_11\NP_10:B)_11>) (<T S[b]\NP 0 2>
         (<T S[b]\NP 0 2> (<T (S[b]\NP)/PP 0 2> (<L ((S[b]\NP)/PP)/NP VB VB join ((S[b]\NP_20)/PP_21)/NP_22>)
         (<T NP 1 2> (<L NP[nb]/N DT DT the NP[nb]_29/N_29>) (<L N NN NN board N>) ) ) (<T PP 0 2>
         (<L PP/NP IN IN as PP/NP_34>) (<T NP 1 2> (<L NP[nb]/N DT DT a NP[nb]_48/N_48>) (<T N 1 2>
         (<L N/N JJ JJ nonexecutive N_43/N_43>) (<L N NN NN director N>) ) ) ) ) (<T (S\NP)\(S\NP) 0 2>
         (<L ((S\NP)\(S\NP))/N[num] NNP NNP Nov. ((S_61\NP_56)_61\(S_61\NP_56)_61)/N[num]_62>)
         (<L N[num] CD CD 29 N[num]>) ) ) ) ) (<L . . . . .>) )'''
     pt = parse_ccg_derivation(txt)
     self.assertIsNotNone(pt)
     s = sentence_from_pt(pt)
     dprint(s)
     ccg = Ccg2Drs(CO_VERIFY_SIGNATURES | CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     ccg.build_execution_sequence(pt)
     ccg.create_drs()
     ccg.resolve_proper_names()
     ccg.final_rename()
     d = ccg.get_drs()
     s = d.show(SHOW_LINEAR)
     dprint(s)
     sent = ccg.get_verbnet_sentence()
     a = get_constituents_string_list(sent)
     # FIXME: VP(will #join) should be S_INF(will #join).
     # Issues occurs because I convert modal-verb combinator categories to modifiers. Must be fixed on functor
     # creation - Lexeme.get_production()
     # will: (S[dcl]\NP)/(S[b]/NP) -> (S\NP)/(S/NP)
     x = [
         'NP(#Pierre-Vinken)',
         'ADJP(61 years #old)',
         'NP(61 #years)',
         'VP(#will join)',
         'NP(the #board)',
         'PP(#as)',
         'NP(a nonexecutive #director)',
         'NP(#Nov. 29)'
     ]
     dprint('\n'.join(a))
     self.assertListEqual(x, a)
     # 03 VP(will join)
     #    00 NP(Pierre-Vinken)
     #       01 ADJP(61 years old)
     #          02 NP(61 years)
     #    04 NP(the board)
     #    05 PP(as)
     #       06 NP(a nonexecutive director)
     #    07 NP(Nov. 29)
     x = (3, [(0, [(1, [(2, [])])]), (4, []), (5, [(6, [])]), (7, [])])
     a = sent.get_constituent_tree()
     dprint_constituent_tree(sent, a)
     self.assertEqual(repr(x), repr(a))

示例#27

0

显示文件

文件： gold_constituent_test.py 项目： marbles-ai/ie

 def test2_GOLD_Wsj0002_1(self):
     # ID=wsj_0002.1 PARSER=GOLD NUMPARSE=1
     # Rudolph Agnew, 55 years old and former chairman of Consolidated Gold Fields PLC, was named a nonexecutive
     # director of this British industrial conglomerate.
     # (<T S[dcl] 0 2>
     #   (<T S[dcl] 1 2>
     #       (<T NP 0 2>
     #           (<T NP 0 2>
     #               (<T NP 0 2>
     #                   (<T NP 0 1>
     #                       (<T N 1 2>
     #                           (<L N/N NNP NNP Rudolph N_72/N_72>)
     #                           (<L N NNP NNP Agnew N>)
     #                       )
     #                   )
     #                   (<L , , , , ,>)
     #               )
     #               (<T NP\NP 0 1>
     #                   (<T S[adj]\NP 0 2>
     #                       (<T S[adj]\NP 1 2>
     #                           (<T NP 0 1>
     #                               (<T N 1 2>
     #                                   (<L N/N CD CD 55 N_92/N_92>)
     #                                   (<L N NNS NNS years N>)
     #                               )
     #                           )
     #                           (<L (S[adj]\NP)\NP JJ JJ old (S[adj]\NP_82)\NP_83>)
     #                       )
     #                       (<T S[adj]\NP[conj] 1 2>
     #                           (<L conj CC CC and conj>)
     #                           (<T NP 0 2>
     #                               (<T NP 0 1>
     #                                   (<T N 1 2>
     #                                       (<L N/N JJ JJ former N_102/N_102>)
     #                                       (<L N NN NN chairman N>)
     #                                   )
     #                               )
     #                               (<T NP\NP 0 2>
     #                                   (<L (NP\NP)/NP IN IN of (NP_111\NP_111)/NP_112>)
     #                                   (<T NP 0 1>
     #                                       (<T N 1 2>
     #                                           (<L N/N NNP NNP Consolidated N_135/N_135>)
     #                                           (<T N 1 2>
     #                                               (<L N/N NNP NNP Gold N_128/N_128>)
     #                                               (<T N 1 2>
     #                                                   (<L N/N NNP NNP Fields N_121/N_121>)
     #                                                   (<L N NNP NNP PLC N>)
     #                                               )
     #                                           )
     #                                       )
     #                                   )
     #                               )
     #                           )
     #                       )
     #                   )
     #               )
     #           )
     #           (<L , , , , ,>)
     #       )
     #       (<T S[dcl]\NP 0 2>
     #           (<L (S[dcl]\NP)/(S[pss]\NP) VBD VBD was (S[dcl]\NP_10)/(S[pss]_11\NP_10:B)_11>)
     #           (<T S[pss]\NP 0 2>
     #               (<L (S[pss]\NP)/NP VBN VBN named (S[pss]\NP_18)/NP_19>)
     #                   (<T NP 0 2> (<T NP 1 2>
     #                       (<L NP[nb]/N DT DT a NP[nb]_33/N_33>)
     #                       (<T N 1 2>
     #                           (<L N/N JJ JJ nonexecutive N_28/N_28>)
     #                           (<L N NN NN director N>)
     #                       )
     #                   )
     #                   (<T NP\NP 0 2>
     #                       (<L (NP\NP)/NP IN IN of (NP_41\NP_41)/NP_42>)
     #                       (<T NP 1 2>
     #                           (<L NP[nb]/N DT DT this NP[nb]_63/N_63>)
     #                           (<T N 1 2>
     #                               (<L N/N JJ JJ British N_58/N_58>)
     #                               (<T N 1 2>
     #                                   (<L N/N JJ JJ industrial N_51/N_51>)
     #                                   (<L N NN NN conglomerate N>)
     #                               )
     #                           )
     #                       )
     #                   )
     #               )
     #           )
     #       )
     #   )
     #   (<L . . . . .>)
     # )
     txt = r'''(<T S[dcl] 0 2> (<T S[dcl] 1 2> (<T NP 0 2> (<T NP 0 2> (<T NP 0 2> (<T NP 0 1> (<T N 1 2>
         (<L N/N NNP NNP Rudolph N_72/N_72>) (<L N NNP NNP Agnew N>) ) ) (<L , , , , ,>) ) (<T NP\NP 0 1>
         (<T S[adj]\NP 0 2> (<T S[adj]\NP 1 2> (<T NP 0 1> (<T N 1 2> (<L N/N CD CD 55 N_92/N_92>)
         (<L N NNS NNS years N>) ) ) (<L (S[adj]\NP)\NP JJ JJ old (S[adj]\NP_82)\NP_83>) ) (<T S[adj]\NP[conj] 1 2>
         (<L conj CC CC and conj>) (<T NP 0 2> (<T NP 0 1> (<T N 1 2> (<L N/N JJ JJ former N_102/N_102>)
         (<L N NN NN chairman N>) ) ) (<T NP\NP 0 2> (<L (NP\NP)/NP IN IN of (NP_111\NP_111)/NP_112>) (<T NP 0 1>
         (<T N 1 2> (<L N/N NNP NNP Consolidated N_135/N_135>) (<T N 1 2> (<L N/N NNP NNP Gold N_128/N_128>)
         (<T N 1 2> (<L N/N NNP NNP Fields N_121/N_121>) (<L N NNP NNP PLC N>) ) ) ) ) ) ) ) ) ) ) (<L , , , , ,>) )
         (<T S[dcl]\NP 0 2> (<L (S[dcl]\NP)/(S[pss]\NP) VBD VBD was (S[dcl]\NP_10)/(S[pss]_11\NP_10:B)_11>)
         (<T S[pss]\NP 0 2> (<L (S[pss]\NP)/NP VBN VBN named (S[pss]\NP_18)/NP_19>) (<T NP 0 2> (<T NP 1 2>
         (<L NP[nb]/N DT DT a NP[nb]_33/N_33>) (<T N 1 2> (<L N/N JJ JJ nonexecutive N_28/N_28>)
         (<L N NN NN director N>) ) ) (<T NP\NP 0 2> (<L (NP\NP)/NP IN IN of (NP_41\NP_41)/NP_42>) (<T NP 1 2>
         (<L NP[nb]/N DT DT this NP[nb]_63/N_63>) (<T N 1 2> (<L N/N JJ JJ British N_58/N_58>) (<T N 1 2>
         (<L N/N JJ JJ industrial N_51/N_51>) (<L N NN NN conglomerate N>) ) ) ) ) ) ) ) ) (<L . . . . .>) )'''
     pt = parse_ccg_derivation(txt)
     self.assertIsNotNone(pt)
     s = sentence_from_pt(pt)
     dprint(s)
     ccg = Ccg2Drs(CO_VERIFY_SIGNATURES | CO_NO_VERBNET | CO_NO_WIKI_SEARCH)
     ccg.build_execution_sequence(pt)
     ccg.create_drs()
     ccg.resolve_proper_names()
     ccg.final_rename()
     d = ccg.get_drs()
     s = d.show(SHOW_LINEAR)
     dprint(s)
     sent = ccg.get_verbnet_sentence()
     a = get_constituents_string_list(sent)
     dprint('\n'.join(a))
     # Hash indicates head word in constituent
     x = [
         'NP(#Rudolph-Agnew)',
         'ADJP(55 years #old and former chairman of Consolidated-Gold-Fields-PLC)',
         'NP(55 #years)',
         'NP(former #chairman)',
         'PP(#of)',
         'NP(#Consolidated-Gold-Fields-PLC)',
         'VP(#was named)',
         'NP(a nonexecutive #director)',
         'PP(#of)',
         'NP(this British industrial #conglomerate)'
     ]
     self.assertListEqual(x, a)
     # 6 VP(was named)
     #   0 NP(Rudolph-Agnew)
     #     1 ADVP(55 years old former chairman of Consolidated-Gold-Fields-PLC)
     #       2 NP(55 years)
     #       3 NP(former chairman)
     #         4 PP(of)
     #           5 NP(Consolidated-Gold-Fields-PLC)
     #   7 NP(a nonexecutive director)
     #     8 PP(of)
     #       9 NP(this British industrial conglomerate)
     x = (6, [(0, [(1, [(2, []), (3, [(4, [(5, [])])])])]), (7, [(8, [(9, [])])])])
     a = sent.get_constituent_tree()
     dprint_constituent_tree(sent, a)
     self.assertEqual(repr(x), repr(a))

示例#28

0

显示文件

文件： ccg_test.py 项目： marbles-ai/ie

    def test8_RuleUniquenessEasySRL(self):
        allfiles = []
        projdir = os.path.dirname(
            os.path.dirname(
                os.path.dirname(
                    os.path.dirname(
                        os.path.dirname(
                            os.path.dirname(os.path.dirname(__file__)))))))
        ldcpath = os.path.join(projdir, 'data', 'ldc', 'easysrl', 'ccgbank')
        dirlist1 = os.listdir(ldcpath)
        for fname in dirlist1:
            if 'ccg_derivation' not in fname:
                continue
            ldcpath1 = os.path.join(ldcpath, fname)
            if os.path.isfile(ldcpath1):
                allfiles.append(ldcpath1)

        failed_parse = 0
        ambiguous = []
        start = 0
        for fn in allfiles:
            with open(fn, 'r') as fd:
                lines = fd.readlines()

            name, _ = os.path.splitext(os.path.basename(fn))
            for i in range(start, len(lines), 50):
                start = 0
                ccgbank = lines[i]
                dprint('%s-%04d' % (name, i))
                ccg = Ccg2Drs()
                try:
                    pt = parse_ccg_derivation(ccgbank)
                    ccg.build_execution_sequence(pt)
                except Exception:
                    failed_parse += 1
                    continue

                self.assertIsNotNone(pt)
                for op in ccg.exeque:
                    if isinstance(op, PushOp):
                        continue
                    self.assertIsInstance(op, ExecOp)
                    left = op.sub_ops[0].category
                    result = op.category
                    if len(op.sub_ops) == 2:
                        right = op.sub_ops[1].category
                    else:
                        right = CAT_EMPTY
                    exclude = []
                    # Should not have ambiguity
                    rule = get_rule(left, right, result, exclude)
                    if rule is None and right != CAT_EMPTY:
                        rule = get_rule(left.remove_features(),
                                        right.remove_features(),
                                        result.remove_features(), exclude)
                    self.assertIsNotNone(rule)
                    rstr = ''
                    while rule is not None:
                        rstr += repr(rule) + '|'
                        rule = get_rule(left, right, result, exclude)
                    if len(exclude) > 1:
                        ambiguous.append(
                            ('%s <- %s <{%s}> %s' %
                             (result, left, rstr, right), exclude))
        for x in ambiguous:
            dprint('ambiguous rule in %s-%04d: %s {%s}' % x)
        self.assertTrue(len(ambiguous) == 0)

示例#29

0

显示文件

文件： ccg_test.py 项目： marbles-ai/ie

    def test9_RuleExecutionEasySRL(self):
        allfiles = []
        projdir = os.path.dirname(
            os.path.dirname(
                os.path.dirname(
                    os.path.dirname(
                        os.path.dirname(
                            os.path.dirname(os.path.dirname(__file__)))))))
        ldcpath = os.path.join(projdir, 'data', 'ldc', 'easysrl', 'ccgbank')
        dirlist1 = os.listdir(ldcpath)
        for fname in dirlist1:
            if 'ccg_derivation' not in fname:
                continue
            ldcpath1 = os.path.join(ldcpath, fname)
            if os.path.isfile(ldcpath1):
                allfiles.append(ldcpath1)

        failed_parse = 0
        failed_exec = []
        start = 0
        analysis = []
        for fn in allfiles[0:]:
            with open(fn, 'r') as fd:
                lines = fd.readlines()

            name, _ = os.path.splitext(os.path.basename(fn))
            for i in range(start, len(lines), 50):
                #for i in range(start, len(lines)):
                start = 0
                ccgbank = lines[i]
                dprint('%s-%04d' % (name, i))
                ccg = Ccg2Drs()
                try:
                    pt = parse_ccg_derivation(ccgbank)
                    ccg.build_execution_sequence(pt)
                except Exception:
                    failed_parse += 1
                    continue

                for op in ccg.exeque:
                    if isinstance(op, PushOp):
                        continue
                    self.assertIsInstance(op, ExecOp)
                    left = op.sub_ops[0].category.remove_wildcards()
                    result = op.category.remove_wildcards()
                    if len(op.sub_ops) == 2:
                        right = op.sub_ops[1].category.remove_wildcards()
                    else:
                        right = CAT_EMPTY

                    if op.rule is not None and op.rule not in [RL_TCL_UNARY, RL_TCR_UNARY, RL_TC_ATOM, RL_TC_CONJ, \
                                                               RL_LPASS, RL_RPASS, RL_TYPE_RAISE]:
                        actual = op.rule.apply_rule_to_category(left, right)
                        if not actual.can_unify(result):
                            failed_exec.append('%s-%04d: %s <!> %s' %
                                               (name, i, actual, result))
                            failed_exec.append('%s <- %s %s %s' %
                                               (actual, left, op.rule, right))
                            failed_exec.append(ccgbank)
                        else:
                            # Can add analysis here
                            if left == CAT_NP and right == CAT_NP_NP and op.rule == RL_BA and op.head != 0:
                                # Expected: NP(x) <- λx.NP(x) -BA- λxλy.NP(y)\NP(x)
                                # Actual: NP(y) <- λx.NP(x) -BA- λxλy.NP(y)\NP(x)
                                analysis.append('%s-%04d: NP <ba> NP\\NP' %
                                                (name, i))

                        self.assertTrue(actual.can_unify(result))

        if len(analysis) != 0:
            dprint('-----------------------')
            print('%d rules failed analysis')
            dprint('--')
            for ln in analysis:
                dprint(ln)
        if len(failed_exec) != 0:
            dprint('-----------------------')
            print('%d rules failed exec' % len(failed_exec) / 3)
            dprint('--')
            for ln in failed_exec:
                dprint(ln)

        self.assertTrue(len(failed_exec) == 0)
        self.assertTrue(len(analysis) == 0)