def _doExtend(self, syms, s, t): """ define a 1-branch grammar rule arguments: self - syms - grammar symbol table s - left part of rule t - right part of rule returns: 1-branch extending rule on success, otherwise None """ # print "extend=",s,'->',t if t == None or len(t) == 0: print >> sys.stderr, '** incomplete grammar rule' return None try: ss = syntaxSpecification.SyntaxSpecification(syms, s) ns = ss.catg fs = ss.synf st = syntaxSpecification.SyntaxSpecification(syms, t) nt = st.catg ft = st.synf if fs == None: # lh = False rh = False else: # rh = fs.positive.test(0) lh = fs.positive.test(1) if not symbolTable.featureConsistencyExtend( fs, st.synf, None, lh, rh): print >> sys.stderr, '** bad syntactic feature inheritance' raise ellyException.FormatFailure except ellyException.FormatFailure: print >> sys.stderr, '** bad syntactic category or features' return None if ns >= symbolTable.NMAX or nt >= symbolTable.NMAX: print >> sys.stderr, 'too many syntactic categories' return None if ns < 0 or nt < 0: print >> sys.stderr, '** bad syntax specification' return None fs.negative.complement() ru = grammarRule.ExtendingRule(ns, fs.positive, fs.negative) # print 'extd rule=' , unicode(ru) ru.gens = self.d1bp ru.utfet = ft.makeTest( ) # precombined positive and negative features for testing if s != '...' or t != '...': self.extens[nt].append(ru) # add rule to grammar table self.mat.join(ns, nt) return ru else: print >> sys.stderr, '** bad type 0 rule' return None
def __init__(self, ptr, sym): """ initialization arguments: self - ptr - parse tree sym - symbol table exceptions: FormatFailure on error """ self.ptr = ptr self.sym = sym self.exs = [] for x in ellyConfiguration.extractors: proc = x[0] synt = syntaxSpecification.SyntaxSpecification(sym, x[1].lower()) entry = [proc, synt.catg, synt.synf.positive] if len(x) > 2: f = None if x[2] == '-' else x[2].lower() smnt = featureSpecification.FeatureSpecification(sym, f, True) entry.append(smnt.positive) if len(x) > 3: entry.append(x[3]) self.exs.append(entry)
def __init__ ( self , symtb , defr ): """ initialization arguments: self - symtb - symbol table for interpreting syntax defr - definition input string """ self._errcount = 0 # print ( 'defr=' , defr ) ru = defr.split(' : ') if len(ru) != 2: self._err('incomplete template',defr) return [ elems , defns ] = ru rw = elems.split(' ') if len(rw) < 2: self._err('trivial template',defr) return le = [ ] for w in rw: # print ( 'w=' , w ) x = w.strip() lx = len(x) if lx == 0: self._err('null template element',defr) return if x[0] == '%': if lx > 1 and ellyChar.isLetter(x[1]): if lx > 2: if x[1] != '*': self._err('bad class ID',defr) return x = x.lower() le.append(x) if self._errcount > 0: return self.listing = le de = defns.split(' ') lde = len(de) if lde != 1 and lde != 3: self._err('bad template definition',defr) return syns = de[0] sems = de[1] if lde > 1 else None try: spec = syntaxSpecification.SyntaxSpecification(symtb,syns) semf = featureSpecification.FeatureSpecification(symtb,sems,True) except ellyException.FormatFailure: self._err('bad definition' , defr) return self.lstg = le self.catg = spec.catg self.synf = spec.synf.positive self.semf = semf.positive self.bias = int(de[2]) if lde > 1 else 0
def _doSplit(self, syms, s, t, u): """ define a 2-branch grammar rule arguments: self - syms - grammar symbol table s - left part of rule t - first half of right part of rule u - second returns: 2-branch splitting rule on success, None otherwise """ # print 'split=' , s , '->' , t , u if t == None or len(t) == 0 or u == None or len(u) == 0: print >> sys.stderr, '** incomplete grammar rule' return None try: # print 's=' , s ss = syntaxSpecification.SyntaxSpecification(syms, s) ns = ss.catg fs = ss.synf # print 'fs=' , fs st = syntaxSpecification.SyntaxSpecification(syms, t) nt = st.catg ft = st.synf su = syntaxSpecification.SyntaxSpecification(syms, u) nu = su.catg fu = su.synf if fs == None: # lh = False rh = False else: # rh = fs.positive.test(0) lh = fs.positive.test(1) if not symbolTable.featureConsistencySplit(fs, ft, fu, lh, rh): print >> sys.stderr, '** bad syntactic feature inheritance' raise ellyException.FormatFailure except ellyException.FormatFailure: return None if ns >= symbolTable.NMAX or nt >= symbolTable.NMAX or nu >= symbolTable.NMAX: print >> sys.stderr, 'too many syntactic categories' return None if ns < 0 or nt < 0 or nu < 0: print >> sys.stderr, '** bad syntax specification' return None fs.negative.complement() ru = grammarRule.SplittingRule(ns, fs.positive, fs.negative) # print 'splt rule=' , unicode(ru) ru.gens = self.d2bp ru.ltfet = ft.makeTest( ) # combine positive and negative features for testing ru.rtfet = fu.makeTest( ) # combine positive and negative features for testing ru.rtyp = nu if t == '...': if u == '...': print >> sys.stderr, '** bad type 0 rule' return None # cannot have a rule of the form X->... ... else: self.mat.join(ns, nu) # for rule of form X->... Y, we see X->Y else: self.mat.join(ns, nt) # otherwise, treat as normal 2-branch self.splits[nt].append(ru) # add rule to grammar table return ru
def define(self, syms, defn): """ process grammar rules from an EllyDefinitionReader arguments: self - syms - grammar symbol table defn - rule definitions returns: True on success, False otherwise """ # print "defining" , defn , len(defn.buffer) , "lines" skp = 0 # skipped lines nor = 0 # number of rules now = 0 # number dictionary entries nop = 0 # number of procedures lno = 0 # line number in definition input eno = 0 # error count while True: line = defn.readline().lower() if len(line) == 0: break lno += 1 # print 'after line' , lno , '[' + line + ']' if not isNewRule(line): print '* skipped: [', line, ']' skp += 1 continue c = line[0] # single char indicating type of rule to define line = line[2:].strip() cogn = [] # for cognitive semantics genr = [] # for generative semantics p = cogn # start with cognitive if c != 'i': # not global variable initialization? dl = line dlno = lno while True: l = defn.readline() # if so, parse semantics lno += 1 if len(l) == 0: print >> sys.stderr, '** unexpected EOF at', lno return False elif l[:2] == '__': # end of semantic procedure? break elif l[:1] == '_': # end of cognitive procedure? p = genr elif isNewRule(l): defn.unreadline(l) lno -= 1 print >> sys.stderr, '** no termination of semantic procedures' print >> sys.stderr, '* on or after line', dlno, '[' + dl + ']' eno += 1 c = '?' break else: p.append(l) # add line to accumulating procedure if c == 'g': # grammar rule? nor += 1 dl = definitionLine.DefinitionLine(line) first = dl.nextInTail() if dl.isEmptyTail(): ru = self._doExtend(syms, dl.left, first) # make 1-branch rule if ru == None: print >> sys.stderr, '* on or after line', lno, '[', line, ']' eno += 1 continue ru.gens = self.d1bp # default 1-branch procedure nwy = 1 else: ru = self._doSplit(syms, dl.left, first, dl.nextInTail()) # 2-branch rule if ru == None: print >> sys.stderr, '* on or after line', lno, '[', line, ']' eno += 1 continue ru.gens = self.d2bp # default 2-branch procedure nwy = 2 ru.cogs = compile(syms, 'c', cogn, nwy) # compile semantics if len(genr) > 0: # generative procedure defined? ru.gens = compile(syms, 'g', genr) # if so, replace default if ru.cogs == None or ru.gens == None: print >> sys.stderr, '** ERROR g: [', line, ']' eno += 1 continue elif c == 'd': # internal dictionary entry? now += 1 dl = definitionLine.DefinitionLine(line) # print 'len=' , len(dl.left) , type(dl.left) , dl.left dllf = dl.left # print 'len=' , len(dllf) , type(dllf) , dllf try: ss = syntaxSpecification.SyntaxSpecification(syms, dl.tail) except ellyException.FormatFailure: print >> sys.stderr, '** ERROR d: [', line, ']' eno += 1 continue ru = grammarRule.ExtendingRule(ss.catg, ss.synf.positive) ru.cogs = compile(syms, 'c', cogn) if len(genr) > 0: # generative procedure defined? ru.gens = compile(syms, 'g', genr) # if so, compile it else: ru.gens = compile(syms, 'g', ['obtain']) # otherwise, compile default if not dllf in self.dctn: # make sure word is in dictionary self.dctn[dllf] = [] # self.dctn[dllf].append(ru) # add rule to dictionary if ru.cogs == None or ru.gens == None: print >> sys.stderr, '** ERROR d: [', line, ']' eno += 1 continue elif c == 'p': # semantic subprocedure? k = line.find(' ') # name should have no spaces if k > 0 or len(genr) == 0: print >> sys.stderr, '** ERROR p: bad format [', line, ']' eno += 1 continue if line in self.pndx: print >> sys.stderr, '** ERROR p: subprocedure', line, 'redefined' eno += 1 continue nop += 1 self.pndx[line] = compile(syms, 'g', genr) # compile generative procedure elif c == 'i': # global variable initialization? k = line.find('=') if k <= 0: print >> sys.stderr, '** bad initialization:', '[' + line + ']' eno += 1 continue vr = line[:k].strip().lower() va = line[k + 1:].lstrip() self.initzn.append([vr, va]) # add initialization else: print >> sys.stderr, '** unknown rule type=', c + ':' print >> sys.stderr, '* on or after line', lno, '[' + line + ']' eno += 1 continue # print 'SKIP' , skp if skp > 0: print >> sys.stderr, '**', skp, 'uninterpretable input lines skipped' eno += skp if eno > 0: print >> sys.stderr, '**', eno, 'grammar errors in all' return False print "added" print NBSP + '{0:4} grammar rules'.format(nor) print NBSP + '{0:4} dictionary rules'.format(now) print NBSP + '{0:4} procedures'.format(nop) return True
def __init__(self, syms, dfls): """ initialization arguments: self - syms - Elly grammatical symbol table dfls - definition elements in list exceptions: FormatFailure on error """ # print 'dfls=' , dfls ne = len(dfls) # print 'ne=' , ne if 3 > ne or ne > 5: # must have 3 to 5 elements raise ellyException.FormatFailure else: if dfls[0] == '\\0': self.patn = u'\x00' # special nul pattern elif ellyWildcard.numSpaces(list(dfls[0])) > 0: print >> sys.stderr, '** link pattern includes space:', dfls[0] raise ellyException.FormatFailure else: # print 'do conversion' self.patn = ellyWildcard.convert( dfls[0]) # encode Elly pattern # print 'patn=' , self.patn if dfls[0] != '$': if self.patn == None or ellyWildcard.minMatch(self.patn) == 0: print >> sys.stderr, '** bad link pattern:', dfls[0] raise ellyException.FormatFailure # print 'appended patn=' , list(self.patn) , '=' , len(self.patn) lastat = dfls[-1] self.catg = None # defaults self.synf = None # self.semf = None # self.bias = 0 # sss = dfls[1].lower() # assumed not to be Unicode # print 'sss=' , sss if sss != '-': # allow for no category syx = syntaxSpecification.SyntaxSpecification(syms, sss) if syx != None: if not lastat in ['-1', '-2' ]: # not a stop state for matching raise ellyException.FormatFailure # cannot have syntax here self.catg = syx.catg # syntactic category self.synf = syx.synf.positive # syntactic features if ne > 3: if lastat != '-1': # not a stop state for matching raise ellyException.FormatFailure # cannot have semantics here sss = None if dfls[2] == '-' else dfls[2].lower() else: sss = None # print 'semantic features=' , sss sem = featureSpecification.FeatureSpecification(syms, sss, True) self.semf = sem.positive # get semantic features # print 'semf=' , self.semf if ne > 4: try: self.bias = int(dfls[3]) except ValueError: raise ellyException.FormatFailure # unrecognizable bias try: n = int(lastat) # next state for link except ValueError: raise ellyException.FormatFailure # unrecognizable number # print 'transition=' , n if n < 0: # final transition? if self.patn == u'\x00': raise ellyException.FormatFailure # final state not allowed here if n == -1: pe = self.patn[-1] # if so, get last pattern element if (pe != ellyWildcard.cALL and # final pattern must end with * or $ pe != ellyWildcard.cEND): self.patn += ellyWildcard.cEND # default is $ print >> sys.stderr, '** final $ added to pattern', list( self.patn) self.nxts = n # specify next state