Exemplo n.º 1
0
 def test8_Wsj0004_3(self):
     txt = r'''
     (<T S[dcl] 0 2> (<T S[dcl] 1 2> (<T NP 0 1> (<T N 1 2> (<L N/N NN NN Compound N_309/N_309>) 
     (<L N NNS NNS yields N>) ) ) (<T S[dcl]\NP 0 2> (<L (S[dcl]\NP)/NP VBP VBP assume (S[dcl]\NP_236)/NP_237>) 
     (<T NP 0 2> (<T NP 0 2> (<T NP 0 1> (<L N NN NN reinvestment N>) ) (<T NP\NP 0 2> 
     (<L (NP\NP)/NP IN IN of (NP_248\NP_248)/NP_249>) (<T NP 0 1> (<L N NNS NNS dividends N>) ) ) ) (<T NP[conj] 1 2> 
     (<L conj CC CC and conj>) (<T S[em] 0 2> (<L S[em]/S[dcl] IN IN that S[em]/S[dcl]_257>) (<T S[dcl] 1 2> 
     (<T NP 1 2> (<L NP[nb]/N DT DT the NP[nb]_297/N_297>) (<T N 1 2> (<L N/N JJ JJ current N_292/N_292>) 
     (<L N NN NN yield N>) ) ) (<T S[dcl]\NP 0 2> (<L S[dcl]\NP VBZ VBZ continues S[dcl]\NP_262>) 
     (<T (S\NP)\(S\NP) 0 2> (<L ((S\NP)\(S\NP))/NP IN IN for ((S_275\NP_270)_275\(S_275\NP_270)_275)/NP_276>) 
     (<T NP 1 2> (<L NP[nb]/N DT DT a NP[nb]_283/N_283>) (<L N NN NN year N>) ) ) ) ) ) ) ) ) ) (<L . . . . .>) ) '''
     pt = parse_ccg_derivation(txt)
     ccg = Ccg2Drs()
     rule = get_rule(Category.from_cache('conj'),
                     Category.from_cache('S[em]'),
                     Category.from_cache('NP[conj]'))
     self.assertEqual(rule, RL_TC_ATOM)
     ccg.build_execution_sequence(pt)
     # Check execution queue
     actual = [repr(x) for x in ccg.exeque]
     expected = [
         '<PushOp>:(compound, N/N, NN)',
         '<PushOp>:(yields, N, NNS)',
         '<ExecOp>:(2, FA N)',
         '<ExecOp>:(1, LP NP)',
         '<PushOp>:(assume, (S[dcl]\\NP)/NP, VBP)',
         '<PushOp>:(reinvestment, N, NN)',
         '<ExecOp>:(1, LP NP)',
         '<PushOp>:(of, (NP\\NP)/NP, IN)',
         '<PushOp>:(dividends, N, NNS)',
         '<ExecOp>:(1, LP NP)',
         '<ExecOp>:(2, FA NP\\NP)',
         '<ExecOp>:(2, BA NP)',
         '<PushOp>:(and, conj, CC)',
         '<PushOp>:(that, S[em]/S[dcl], IN)',
         '<PushOp>:(the, NP[nb]/N, DT)',
         '<PushOp>:(current, N/N, JJ)',
         '<PushOp>:(yield, N, NN)',
         '<ExecOp>:(2, FA N)',
         '<ExecOp>:(2, FA NP)',
         '<PushOp>:(continue, S[dcl]\\NP, VBZ)',
         '<PushOp>:(for, ((S\\NP)\\(S\\NP))/NP, IN)',
         '<PushOp>:(a, NP[nb]/N, DT)',
         '<PushOp>:(year, N, NN)',
         '<ExecOp>:(2, FA NP)',
         '<ExecOp>:(2, FA (S\\NP)\\(S\\NP))',
         '<ExecOp>:(2, BA S[dcl]\\NP)',
         '<ExecOp>:(2, BA S[dcl])',
         '<ExecOp>:(2, FA S[em])',
         '<ExecOp>:(2, ATOM_TC NP[conj])',
         '<ExecOp>:(2, RCONJ NP)',
         '<ExecOp>:(2, FA S[dcl]\\NP)',
         '<ExecOp>:(2, BA S[dcl])',
         '<PushOp>:(., ., .)',
         '<ExecOp>:(2, LP S[dcl])',
     ]
     self.assertListEqual(expected, actual)
Exemplo n.º 2
0
 def issupported(self, category):
     """Test a FunctorTemplate is in TEMPLATES with key=category."""
     if category in self._TEMPLATES:
         return True
     # Perform wildcard replacements
     if category.isfunctor:
         wc = Category.from_cache(
             self._Feature.sub('[X]', category.signature))
         return wc in self._TEMPLATES
     return False
Exemplo n.º 3
0
 def lookup(self, category):
     """Lookup a FunctorTemplate with key=category."""
     category = category.remove_conj_feature()
     if category in self._TEMPLATES:
         return self._TEMPLATES[category]
     # Perform wildcard replacements
     if category.isfunctor:
         wc = Category.from_cache(
             self._Feature.sub('[X]', category.signature))
         try:
             return self._TEMPLATES[wc]
         except Exception:
             pass
     return None
Exemplo n.º 4
0
    def __init__(self,
                 rule,
                 predarg_category,
                 finalRef,
                 finalAtom,
                 construct_empty=False):
        """Constructor.

        Args:
            rule: The production constructor rule.
            predarg_category: A predarg category.
            finalRef: The final referent result.
            finalAtom: The final atomic category result.
            construct_empty: If true the functor should be constructed with an empty DrsProduction as the final atom.
        """
        self._constructor_rule = rule
        self._predarg_category = predarg_category
        self._clean_category = Category.from_cache(
            predarg_category.clean(True))
        self._final_ref = finalRef
        self._final_atom = finalAtom
        self._construct_empty = construct_empty
Exemplo n.º 5
0
 def lookup_unary(self, result, argument):
     if isinstance(result, (str, unicode)):
         result = Category(result)
     elif not isinstance(result, Category):
         raise TypeError(
             'Model.lookup_unary() expects signature or Category result')
     if isinstance(argument, (str, unicode)):
         argument = Category(argument)
     elif not isinstance(argument, Category):
         raise TypeError(
             'Model.lookup_unary() expects signature or Category argument')
     key = UnaryRule.create_key(result, argument)
     try:
         return self._UNARY[key]
     except Exception:
         pass
     # Perform wildcard replacements
     wc = Category.from_cache(self._Feature.sub('[X]', key.signature))
     try:
         return self._UNARY[wc]
     except Exception:
         pass
     return None
Exemplo n.º 6
0
def make_lexicon(daemon):
    global pypath, projdir, datapath, idsrch
    allfiles = []
    projdir = os.path.dirname(os.path.dirname(__file__))

    easysrl_path = os.path.join(projdir, 'data', 'ldc', daemon, 'lexicon')
    if not os.path.exists(easysrl_path):
        os.makedirs(easysrl_path)
    if not os.path.exists(os.path.join(easysrl_path, 'rt')):
        os.makedirs(os.path.join(easysrl_path, 'rt'))
    if not os.path.exists(os.path.join(easysrl_path, 'az')):
        os.makedirs(os.path.join(easysrl_path, 'az'))

    # Get files
    ldcpath = os.path.join(projdir, 'data', 'ldc', daemon, 'ccgbank')
    dirlist1 = sorted(os.listdir(ldcpath))
    #dirlist1 = ['ccg_derivation00.txt']
    for fname in dirlist1:
        if 'ccg_derivation' not in fname:
            continue
        ldcpath1 = os.path.join(ldcpath, fname)
        if os.path.isfile(ldcpath1):
            allfiles.append(ldcpath1)

    failed_parse = 0
    failed_ccg_derivation = []
    start = 0
    progress = -1
    dictionary = None
    for fn in allfiles:
        idx = idsrch.match(fn)
        if idx is None:
            continue
        idx = idx.group('id')

        with open(fn, 'r') as fd:
            lines = fd.readlines()

        name, _ = os.path.splitext(os.path.basename(fn))
        for i in range(start, len(lines)):
            start = 0
            ccgbank = lines[i].strip()
            if len(ccgbank) == 0 or ccgbank[0] == '#':
                continue

            if progress < 0:
                print('%s-%04d' % (name, i))
            else:
                progress = print_progress(progress, 10)

            try:
                # CCG parser is Java so output is UTF-8.
                ccgbank = safe_utf8_decode(ccgbank)
                pt = parse_ccg_derivation(ccgbank)
                s = sentence_from_pt(pt).strip()
            except Exception:
                failed_parse += 1
                raise
                continue

            uid = '%s-%04d' % (idx, i)
            try:
                #dictionary[0-25][stem][set([c]), set(uid)]
                dictionary = extract_lexicon_from_pt(pt, dictionary, uid=uid)
            except Exception as e:
                print(e)
                raise
                continue

    rtdict = {}
    for idx in range(len(dictionary)):
        fname = unichr(idx + 0x40)
        filepath = os.path.join(easysrl_path, 'az', fname + '.txt')
        with open(filepath, 'w') as fd:
            d = dictionary[idx]
            for k, v in d.iteritems():
                # k == stem, v = {c: set(uid)}
                fd.write(b'<predicate name=\'%s\'>\n' % safe_utf8_encode(k))
                for x, w in v.iteritems():
                    fd.write(b'<usage \'%s\'>\n' % safe_utf8_encode(x))
                    nc = x.split(':')
                    if len(nc) == 2:
                        c = Category.from_cache(
                            Category(nc[1].strip()).clean(True))
                        # Return type atom
                        rt = c.extract_unify_atoms(False)[-1]
                        if rt in rtdict:
                            cdict = rtdict[rt]
                            if c in cdict:
                                cdict[c].append(nc[0])
                            else:
                                cdict[c] = [nc[0]]
                        else:
                            rtdict[rt] = {c: [nc[0]]}
                    for y in w:
                        fd.write(b'sentence id: ' + safe_utf8_encode(y))
                        fd.write(b'\n')
                    fd.write(b'</usage>\n')
                fd.write(b'</predicate>\n\n')
            # Free up memory
            dictionary[idx] = None
            d = None
    for rt, cdict in rtdict.iteritems():
        fname = rt.signature.replace('[', '_').replace(']', '')
        filepath = os.path.join(easysrl_path, 'rt', fname + '.txt')
        with open(filepath, 'w') as fd:
            for c, vs in cdict.iteritems():
                fd.write(b'<category signature=\'%s\'>\n' %
                         safe_utf8_encode(c))
                for v in vs:
                    fd.write(v)
                    fd.write(b'\n')
                fd.write(b'</category>\n\n')
Exemplo n.º 7
0
    def test7_Wsj0051_30(self):
        txt = r'''
(<T S[dcl] 0 2> 
  (<T S[dcl] 1 2> 
    (<T NP 0 1> 
      (<T N 1 2> 
        (<L N NNP NNP Fujitsu N>) 
        (<T N[conj] 1 2> 
          (<L conj CC CC and conj>) 
          (<L N NNP NNP NEC N>) 
        ) 
      ) 
    ) 
    (<T S[dcl]\NP 0 2> 
      (<L (S[dcl]\NP)/S[dcl] VBD VBD said (S[dcl]\NP_146)/S[dcl]_147>) 
      (<T S[dcl] 0 2> 
        (<T S[dcl] 1 2> 
          (<L NP PRP PRP they NP>) 
          (<T S[dcl]\NP 0 2> 
            (<T (S[dcl]\NP)/(S[ng]\NP) 0 2> 
              (<L (S[dcl]\NP)/(S[ng]\NP) VBD VBD were (S[dcl]\NP_156)/(S[ng]_157\NP_156:B)_157>) 
              (<L (S\NP)\(S\NP) RB RB still (S_169\NP_164)_169\(S_169\NP_164)_169>) 
            ) 
            (<L S[ng]\NP VBG VBG investigating S[ng]\NP_174>) 
          ) 
        ) 
        (<T S[dcl][conj] 1 2> 
          (<L , , , , ,>) 
          (<T S[dcl][conj] 1 2> 
            (<L conj CC CC and conj>) 
            (<T S[em] 0 2> 
              (<L S[em]/S[dcl] IN IN that S[em]/S[dcl]_181>) 
              (<T S[dcl] 1 2> 
                (<T NP 0 2> 
                  (<T NP 0 1> 
                    (<L N NN NN knowledge N>) 
                  ) 
                  (<T NP\NP 0 2> 
                    (<L (NP\NP)/NP IN IN of (NP_207\NP_207)/NP_208>) 
                    (<T NP 0 1> 
                      (<T N 1 2> 
                        (<L N/N JJR JJR more N_224/N_224>) 
                        (<T N 1 2> 
                          (<L N/N JJ JJ such N_217/N_217>) 
                          (<L N NNS NNS bids N>) 
                        ) 
                      ) 
                    ) 
                  ) 
                ) 
                (<T S[dcl]\NP 0 2> 
                  (<L (S[dcl]\NP)/(S[b]\NP) MD MD could (S[dcl]\NP_190)/(S[b]_191\NP_190:B)_191>) 
                  (<L S[b]\NP VB VB emerge S[b]\NP_196>) 
                ) 
              ) 
            ) 
          ) 
        ) 
      ) 
    ) 
  ) 
  (<L . . . . .>)
) 
'''
        pt = parse_ccg_derivation(txt)
        ccg = Ccg2Drs()
        rule = get_rule(Category.from_cache('conj'),
                        Category.from_cache('S[em]'),
                        Category.from_cache('S[dcl][conj]'))
        self.assertEqual(rule, RL_RPASS)
        ccg.build_execution_sequence(pt)
        # Check execution queue
        actual = [repr(x) for x in ccg.exeque]
        expected = [
            '<PushOp>:(Fujitsu, N, NNP)', '<PushOp>:(and, conj, CC)',
            '<PushOp>:(NEC, N, NNP)', '<ExecOp>:(2, RP N[conj])',
            '<ExecOp>:(2, RCONJ N)', '<ExecOp>:(1, LP NP)',
            '<PushOp>:(say, (S[dcl]\\NP)/S[dcl], VBD)',
            '<PushOp>:(they, NP, PRP)',
            '<PushOp>:(be, (S[dcl]\\NP)/(S[ng]\\NP), VBD)',
            '<PushOp>:(still, (S\\NP)\\(S\\NP), RB)',
            '<ExecOp>:(2, BX (S[dcl]\\NP)/(S[ng]\\NP))',
            '<PushOp>:(investigate, S[ng]\\NP, VBG)',
            '<ExecOp>:(2, FA S[dcl]\\NP)', '<ExecOp>:(2, BA S[dcl])',
            '<PushOp>:(,, ,, ,)', '<PushOp>:(and, conj, CC)',
            '<PushOp>:(that, S[em]/S[dcl], IN)', '<PushOp>:(knowledge, N, NN)',
            '<ExecOp>:(1, LP NP)', '<PushOp>:(of, (NP\\NP)/NP, IN)',
            '<PushOp>:(more, N/N, JJR)', '<PushOp>:(such, N/N, JJ)',
            '<PushOp>:(bids, N, NNS)', '<ExecOp>:(2, FA N)',
            '<ExecOp>:(2, FA N)', '<ExecOp>:(1, LP NP)',
            '<ExecOp>:(2, FA NP\\NP)', '<ExecOp>:(2, BA NP)',
            '<PushOp>:(could, (S\\NP)/(S\\NP), MD)',
            '<PushOp>:(emerge, S[b]\\NP, VB)', '<ExecOp>:(2, FA S[dcl]\\NP)',
            '<ExecOp>:(2, BA S[dcl])', '<ExecOp>:(2, FA S[em])',
            '<ExecOp>:(2, RP S[dcl][conj])', '<ExecOp>:(2, RP S[dcl][conj])',
            '<ExecOp>:(2, RCONJ S[dcl])', '<ExecOp>:(2, FA S[dcl]\\NP)',
            '<ExecOp>:(2, BA S[dcl])', '<PushOp>:(., ., .)',
            '<ExecOp>:(2, LP S[dcl])'
        ]
        self.assertListEqual(expected, actual)