def test8_Wsj0004_3(self): txt = r''' (<T S[dcl] 0 2> (<T S[dcl] 1 2> (<T NP 0 1> (<T N 1 2> (<L N/N NN NN Compound N_309/N_309>) (<L N NNS NNS yields N>) ) ) (<T S[dcl]\NP 0 2> (<L (S[dcl]\NP)/NP VBP VBP assume (S[dcl]\NP_236)/NP_237>) (<T NP 0 2> (<T NP 0 2> (<T NP 0 1> (<L N NN NN reinvestment N>) ) (<T NP\NP 0 2> (<L (NP\NP)/NP IN IN of (NP_248\NP_248)/NP_249>) (<T NP 0 1> (<L N NNS NNS dividends N>) ) ) ) (<T NP[conj] 1 2> (<L conj CC CC and conj>) (<T S[em] 0 2> (<L S[em]/S[dcl] IN IN that S[em]/S[dcl]_257>) (<T S[dcl] 1 2> (<T NP 1 2> (<L NP[nb]/N DT DT the NP[nb]_297/N_297>) (<T N 1 2> (<L N/N JJ JJ current N_292/N_292>) (<L N NN NN yield N>) ) ) (<T S[dcl]\NP 0 2> (<L S[dcl]\NP VBZ VBZ continues S[dcl]\NP_262>) (<T (S\NP)\(S\NP) 0 2> (<L ((S\NP)\(S\NP))/NP IN IN for ((S_275\NP_270)_275\(S_275\NP_270)_275)/NP_276>) (<T NP 1 2> (<L NP[nb]/N DT DT a NP[nb]_283/N_283>) (<L N NN NN year N>) ) ) ) ) ) ) ) ) ) (<L . . . . .>) ) ''' pt = parse_ccg_derivation(txt) ccg = Ccg2Drs() rule = get_rule(Category.from_cache('conj'), Category.from_cache('S[em]'), Category.from_cache('NP[conj]')) self.assertEqual(rule, RL_TC_ATOM) ccg.build_execution_sequence(pt) # Check execution queue actual = [repr(x) for x in ccg.exeque] expected = [ '<PushOp>:(compound, N/N, NN)', '<PushOp>:(yields, N, NNS)', '<ExecOp>:(2, FA N)', '<ExecOp>:(1, LP NP)', '<PushOp>:(assume, (S[dcl]\\NP)/NP, VBP)', '<PushOp>:(reinvestment, N, NN)', '<ExecOp>:(1, LP NP)', '<PushOp>:(of, (NP\\NP)/NP, IN)', '<PushOp>:(dividends, N, NNS)', '<ExecOp>:(1, LP NP)', '<ExecOp>:(2, FA NP\\NP)', '<ExecOp>:(2, BA NP)', '<PushOp>:(and, conj, CC)', '<PushOp>:(that, S[em]/S[dcl], IN)', '<PushOp>:(the, NP[nb]/N, DT)', '<PushOp>:(current, N/N, JJ)', '<PushOp>:(yield, N, NN)', '<ExecOp>:(2, FA N)', '<ExecOp>:(2, FA NP)', '<PushOp>:(continue, S[dcl]\\NP, VBZ)', '<PushOp>:(for, ((S\\NP)\\(S\\NP))/NP, IN)', '<PushOp>:(a, NP[nb]/N, DT)', '<PushOp>:(year, N, NN)', '<ExecOp>:(2, FA NP)', '<ExecOp>:(2, FA (S\\NP)\\(S\\NP))', '<ExecOp>:(2, BA S[dcl]\\NP)', '<ExecOp>:(2, BA S[dcl])', '<ExecOp>:(2, FA S[em])', '<ExecOp>:(2, ATOM_TC NP[conj])', '<ExecOp>:(2, RCONJ NP)', '<ExecOp>:(2, FA S[dcl]\\NP)', '<ExecOp>:(2, BA S[dcl])', '<PushOp>:(., ., .)', '<ExecOp>:(2, LP S[dcl])', ] self.assertListEqual(expected, actual)
def issupported(self, category): """Test a FunctorTemplate is in TEMPLATES with key=category.""" if category in self._TEMPLATES: return True # Perform wildcard replacements if category.isfunctor: wc = Category.from_cache( self._Feature.sub('[X]', category.signature)) return wc in self._TEMPLATES return False
def lookup(self, category): """Lookup a FunctorTemplate with key=category.""" category = category.remove_conj_feature() if category in self._TEMPLATES: return self._TEMPLATES[category] # Perform wildcard replacements if category.isfunctor: wc = Category.from_cache( self._Feature.sub('[X]', category.signature)) try: return self._TEMPLATES[wc] except Exception: pass return None
def __init__(self, rule, predarg_category, finalRef, finalAtom, construct_empty=False): """Constructor. Args: rule: The production constructor rule. predarg_category: A predarg category. finalRef: The final referent result. finalAtom: The final atomic category result. construct_empty: If true the functor should be constructed with an empty DrsProduction as the final atom. """ self._constructor_rule = rule self._predarg_category = predarg_category self._clean_category = Category.from_cache( predarg_category.clean(True)) self._final_ref = finalRef self._final_atom = finalAtom self._construct_empty = construct_empty
def lookup_unary(self, result, argument): if isinstance(result, (str, unicode)): result = Category(result) elif not isinstance(result, Category): raise TypeError( 'Model.lookup_unary() expects signature or Category result') if isinstance(argument, (str, unicode)): argument = Category(argument) elif not isinstance(argument, Category): raise TypeError( 'Model.lookup_unary() expects signature or Category argument') key = UnaryRule.create_key(result, argument) try: return self._UNARY[key] except Exception: pass # Perform wildcard replacements wc = Category.from_cache(self._Feature.sub('[X]', key.signature)) try: return self._UNARY[wc] except Exception: pass return None
def make_lexicon(daemon): global pypath, projdir, datapath, idsrch allfiles = [] projdir = os.path.dirname(os.path.dirname(__file__)) easysrl_path = os.path.join(projdir, 'data', 'ldc', daemon, 'lexicon') if not os.path.exists(easysrl_path): os.makedirs(easysrl_path) if not os.path.exists(os.path.join(easysrl_path, 'rt')): os.makedirs(os.path.join(easysrl_path, 'rt')) if not os.path.exists(os.path.join(easysrl_path, 'az')): os.makedirs(os.path.join(easysrl_path, 'az')) # Get files ldcpath = os.path.join(projdir, 'data', 'ldc', daemon, 'ccgbank') dirlist1 = sorted(os.listdir(ldcpath)) #dirlist1 = ['ccg_derivation00.txt'] for fname in dirlist1: if 'ccg_derivation' not in fname: continue ldcpath1 = os.path.join(ldcpath, fname) if os.path.isfile(ldcpath1): allfiles.append(ldcpath1) failed_parse = 0 failed_ccg_derivation = [] start = 0 progress = -1 dictionary = None for fn in allfiles: idx = idsrch.match(fn) if idx is None: continue idx = idx.group('id') with open(fn, 'r') as fd: lines = fd.readlines() name, _ = os.path.splitext(os.path.basename(fn)) for i in range(start, len(lines)): start = 0 ccgbank = lines[i].strip() if len(ccgbank) == 0 or ccgbank[0] == '#': continue if progress < 0: print('%s-%04d' % (name, i)) else: progress = print_progress(progress, 10) try: # CCG parser is Java so output is UTF-8. ccgbank = safe_utf8_decode(ccgbank) pt = parse_ccg_derivation(ccgbank) s = sentence_from_pt(pt).strip() except Exception: failed_parse += 1 raise continue uid = '%s-%04d' % (idx, i) try: #dictionary[0-25][stem][set([c]), set(uid)] dictionary = extract_lexicon_from_pt(pt, dictionary, uid=uid) except Exception as e: print(e) raise continue rtdict = {} for idx in range(len(dictionary)): fname = unichr(idx + 0x40) filepath = os.path.join(easysrl_path, 'az', fname + '.txt') with open(filepath, 'w') as fd: d = dictionary[idx] for k, v in d.iteritems(): # k == stem, v = {c: set(uid)} fd.write(b'<predicate name=\'%s\'>\n' % safe_utf8_encode(k)) for x, w in v.iteritems(): fd.write(b'<usage \'%s\'>\n' % safe_utf8_encode(x)) nc = x.split(':') if len(nc) == 2: c = Category.from_cache( Category(nc[1].strip()).clean(True)) # Return type atom rt = c.extract_unify_atoms(False)[-1] if rt in rtdict: cdict = rtdict[rt] if c in cdict: cdict[c].append(nc[0]) else: cdict[c] = [nc[0]] else: rtdict[rt] = {c: [nc[0]]} for y in w: fd.write(b'sentence id: ' + safe_utf8_encode(y)) fd.write(b'\n') fd.write(b'</usage>\n') fd.write(b'</predicate>\n\n') # Free up memory dictionary[idx] = None d = None for rt, cdict in rtdict.iteritems(): fname = rt.signature.replace('[', '_').replace(']', '') filepath = os.path.join(easysrl_path, 'rt', fname + '.txt') with open(filepath, 'w') as fd: for c, vs in cdict.iteritems(): fd.write(b'<category signature=\'%s\'>\n' % safe_utf8_encode(c)) for v in vs: fd.write(v) fd.write(b'\n') fd.write(b'</category>\n\n')
def test7_Wsj0051_30(self): txt = r''' (<T S[dcl] 0 2> (<T S[dcl] 1 2> (<T NP 0 1> (<T N 1 2> (<L N NNP NNP Fujitsu N>) (<T N[conj] 1 2> (<L conj CC CC and conj>) (<L N NNP NNP NEC N>) ) ) ) (<T S[dcl]\NP 0 2> (<L (S[dcl]\NP)/S[dcl] VBD VBD said (S[dcl]\NP_146)/S[dcl]_147>) (<T S[dcl] 0 2> (<T S[dcl] 1 2> (<L NP PRP PRP they NP>) (<T S[dcl]\NP 0 2> (<T (S[dcl]\NP)/(S[ng]\NP) 0 2> (<L (S[dcl]\NP)/(S[ng]\NP) VBD VBD were (S[dcl]\NP_156)/(S[ng]_157\NP_156:B)_157>) (<L (S\NP)\(S\NP) RB RB still (S_169\NP_164)_169\(S_169\NP_164)_169>) ) (<L S[ng]\NP VBG VBG investigating S[ng]\NP_174>) ) ) (<T S[dcl][conj] 1 2> (<L , , , , ,>) (<T S[dcl][conj] 1 2> (<L conj CC CC and conj>) (<T S[em] 0 2> (<L S[em]/S[dcl] IN IN that S[em]/S[dcl]_181>) (<T S[dcl] 1 2> (<T NP 0 2> (<T NP 0 1> (<L N NN NN knowledge N>) ) (<T NP\NP 0 2> (<L (NP\NP)/NP IN IN of (NP_207\NP_207)/NP_208>) (<T NP 0 1> (<T N 1 2> (<L N/N JJR JJR more N_224/N_224>) (<T N 1 2> (<L N/N JJ JJ such N_217/N_217>) (<L N NNS NNS bids N>) ) ) ) ) ) (<T S[dcl]\NP 0 2> (<L (S[dcl]\NP)/(S[b]\NP) MD MD could (S[dcl]\NP_190)/(S[b]_191\NP_190:B)_191>) (<L S[b]\NP VB VB emerge S[b]\NP_196>) ) ) ) ) ) ) ) ) (<L . . . . .>) ) ''' pt = parse_ccg_derivation(txt) ccg = Ccg2Drs() rule = get_rule(Category.from_cache('conj'), Category.from_cache('S[em]'), Category.from_cache('S[dcl][conj]')) self.assertEqual(rule, RL_RPASS) ccg.build_execution_sequence(pt) # Check execution queue actual = [repr(x) for x in ccg.exeque] expected = [ '<PushOp>:(Fujitsu, N, NNP)', '<PushOp>:(and, conj, CC)', '<PushOp>:(NEC, N, NNP)', '<ExecOp>:(2, RP N[conj])', '<ExecOp>:(2, RCONJ N)', '<ExecOp>:(1, LP NP)', '<PushOp>:(say, (S[dcl]\\NP)/S[dcl], VBD)', '<PushOp>:(they, NP, PRP)', '<PushOp>:(be, (S[dcl]\\NP)/(S[ng]\\NP), VBD)', '<PushOp>:(still, (S\\NP)\\(S\\NP), RB)', '<ExecOp>:(2, BX (S[dcl]\\NP)/(S[ng]\\NP))', '<PushOp>:(investigate, S[ng]\\NP, VBG)', '<ExecOp>:(2, FA S[dcl]\\NP)', '<ExecOp>:(2, BA S[dcl])', '<PushOp>:(,, ,, ,)', '<PushOp>:(and, conj, CC)', '<PushOp>:(that, S[em]/S[dcl], IN)', '<PushOp>:(knowledge, N, NN)', '<ExecOp>:(1, LP NP)', '<PushOp>:(of, (NP\\NP)/NP, IN)', '<PushOp>:(more, N/N, JJR)', '<PushOp>:(such, N/N, JJ)', '<PushOp>:(bids, N, NNS)', '<ExecOp>:(2, FA N)', '<ExecOp>:(2, FA N)', '<ExecOp>:(1, LP NP)', '<ExecOp>:(2, FA NP\\NP)', '<ExecOp>:(2, BA NP)', '<PushOp>:(could, (S\\NP)/(S\\NP), MD)', '<PushOp>:(emerge, S[b]\\NP, VB)', '<ExecOp>:(2, FA S[dcl]\\NP)', '<ExecOp>:(2, BA S[dcl])', '<ExecOp>:(2, FA S[em])', '<ExecOp>:(2, RP S[dcl][conj])', '<ExecOp>:(2, RP S[dcl][conj])', '<ExecOp>:(2, RCONJ S[dcl])', '<ExecOp>:(2, FA S[dcl]\\NP)', '<ExecOp>:(2, BA S[dcl])', '<PushOp>:(., ., .)', '<ExecOp>:(2, LP S[dcl])' ] self.assertListEqual(expected, actual)