示例#1
0
    def _doExtend(self, syms, s, t):
        """
        define a 1-branch grammar rule

        arguments:
            self  -
            syms  - grammar symbol table
            s     - left  part of rule
            t     - right part of rule

        returns:
            1-branch extending rule on success, otherwise None
        """

        #       print "extend=",s,'->',t
        if t == None or len(t) == 0:
            print >> sys.stderr, '** incomplete grammar rule'
            return None
        try:
            ss = syntaxSpecification.SyntaxSpecification(syms, s)
            ns = ss.catg
            fs = ss.synf
            st = syntaxSpecification.SyntaxSpecification(syms, t)
            nt = st.catg
            ft = st.synf

            if fs == None:  #
                lh = False
                rh = False
            else:  #
                rh = fs.positive.test(0)
                lh = fs.positive.test(1)
            if not symbolTable.featureConsistencyExtend(
                    fs, st.synf, None, lh, rh):
                print >> sys.stderr, '** bad syntactic feature inheritance'
                raise ellyException.FormatFailure
        except ellyException.FormatFailure:
            print >> sys.stderr, '** bad syntactic category or features'
            return None
        if ns >= symbolTable.NMAX or nt >= symbolTable.NMAX:
            print >> sys.stderr, 'too many syntactic categories'
            return None
        if ns < 0 or nt < 0:
            print >> sys.stderr, '** bad syntax specification'
            return None
        fs.negative.complement()
        ru = grammarRule.ExtendingRule(ns, fs.positive, fs.negative)
        #       print 'extd rule=' , unicode(ru)
        ru.gens = self.d1bp
        ru.utfet = ft.makeTest(
        )  # precombined positive and negative features for testing
        if s != '...' or t != '...':
            self.extens[nt].append(ru)  # add rule to grammar table
            self.mat.join(ns, nt)
            return ru
        else:
            print >> sys.stderr, '** bad type 0 rule'
            return None
示例#2
0
    def __init__(self, ptr, sym):
        """
        initialization

        arguments:
            self  -
            ptr   - parse tree
            sym   - symbol table

        exceptions:
            FormatFailure on error
        """

        self.ptr = ptr
        self.sym = sym
        self.exs = []
        for x in ellyConfiguration.extractors:
            proc = x[0]
            synt = syntaxSpecification.SyntaxSpecification(sym, x[1].lower())
            entry = [proc, synt.catg, synt.synf.positive]
            if len(x) > 2:
                f = None if x[2] == '-' else x[2].lower()
                smnt = featureSpecification.FeatureSpecification(sym, f, True)
                entry.append(smnt.positive)
            if len(x) > 3:
                entry.append(x[3])
            self.exs.append(entry)
示例#3
0
    def __init__ ( self , symtb , defr ):

        """
        initialization

        arguments:
            self  -
            symtb - symbol table for interpreting syntax
            defr  - definition input string
        """

        self._errcount = 0
#       print ( 'defr=' , defr )
        ru = defr.split(' : ')
        if len(ru) != 2:
            self._err('incomplete template',defr)
            return
        [ elems , defns ] = ru
        rw = elems.split(' ')
        if len(rw) < 2:
            self._err('trivial template',defr)
            return
        le = [ ]
        for w in rw:
#           print ( 'w=' , w )
            x = w.strip()
            lx = len(x)
            if lx == 0:
                self._err('null template element',defr)
                return
            if x[0] == '%':
                if lx > 1 and ellyChar.isLetter(x[1]):
                    if lx > 2:
                        if x[1] != '*':
                            self._err('bad class ID',defr)
                            return
                    x = x.lower()
            le.append(x)
        if self._errcount > 0: return
        self.listing = le

        de = defns.split(' ')
        lde = len(de)
        if lde != 1 and lde != 3:
            self._err('bad template definition',defr)
            return
        syns = de[0]
        sems = de[1] if lde > 1 else None
        try:
            spec = syntaxSpecification.SyntaxSpecification(symtb,syns)
            semf = featureSpecification.FeatureSpecification(symtb,sems,True)
        except ellyException.FormatFailure:
            self._err('bad definition' , defr)
            return

        self.lstg = le
        self.catg = spec.catg
        self.synf = spec.synf.positive
        self.semf = semf.positive
        self.bias = int(de[2]) if lde > 1 else 0
示例#4
0
    def _doSplit(self, syms, s, t, u):
        """
        define a 2-branch grammar rule

        arguments:
            self  -
            syms  - grammar symbol table
            s     - left  part of rule
            t     - first half of right part of rule
            u     - second

        returns:
            2-branch splitting rule on success, None otherwise
        """

        #       print 'split=' , s , '->' , t , u
        if t == None or len(t) == 0 or u == None or len(u) == 0:
            print >> sys.stderr, '** incomplete grammar rule'
            return None
        try:
            #           print 's=' , s
            ss = syntaxSpecification.SyntaxSpecification(syms, s)
            ns = ss.catg
            fs = ss.synf
            #           print 'fs=' , fs
            st = syntaxSpecification.SyntaxSpecification(syms, t)
            nt = st.catg
            ft = st.synf
            su = syntaxSpecification.SyntaxSpecification(syms, u)
            nu = su.catg
            fu = su.synf

            if fs == None:  #
                lh = False
                rh = False
            else:  #
                rh = fs.positive.test(0)
                lh = fs.positive.test(1)
            if not symbolTable.featureConsistencySplit(fs, ft, fu, lh, rh):
                print >> sys.stderr, '** bad syntactic feature inheritance'
                raise ellyException.FormatFailure
        except ellyException.FormatFailure:
            return None
        if ns >= symbolTable.NMAX or nt >= symbolTable.NMAX or nu >= symbolTable.NMAX:
            print >> sys.stderr, 'too many syntactic categories'
            return None
        if ns < 0 or nt < 0 or nu < 0:
            print >> sys.stderr, '** bad syntax specification'
            return None
        fs.negative.complement()
        ru = grammarRule.SplittingRule(ns, fs.positive, fs.negative)
        #       print 'splt rule=' , unicode(ru)
        ru.gens = self.d2bp
        ru.ltfet = ft.makeTest(
        )  # combine positive and negative features for testing
        ru.rtfet = fu.makeTest(
        )  # combine positive and negative features for testing
        ru.rtyp = nu
        if t == '...':
            if u == '...':
                print >> sys.stderr, '** bad type 0 rule'
                return None  # cannot have a rule of the form X->... ...
            else:
                self.mat.join(ns, nu)  # for rule of form X->... Y, we see X->Y
        else:
            self.mat.join(ns, nt)  # otherwise, treat as normal 2-branch
        self.splits[nt].append(ru)  # add rule to grammar table
        return ru
示例#5
0
    def define(self, syms, defn):
        """
        process grammar rules from an EllyDefinitionReader

        arguments:
            self  -
            syms  - grammar symbol table
            defn  - rule definitions

        returns:
            True on success, False otherwise
        """

        #       print "defining" , defn , len(defn.buffer) , "lines"

        skp = 0  # skipped lines

        nor = 0  # number of rules
        now = 0  # number dictionary entries
        nop = 0  # number of procedures

        lno = 0  # line number in definition input

        eno = 0  # error count

        while True:

            line = defn.readline().lower()
            if len(line) == 0: break
            lno += 1

            #           print 'after line' , lno , '[' + line + ']'

            if not isNewRule(line):
                print '*  skipped: [', line, ']'
                skp += 1
                continue

            c = line[0]  # single char indicating type of rule to define
            line = line[2:].strip()

            cogn = []  # for cognitive  semantics
            genr = []  # for generative semantics
            p = cogn  # start with cognitive

            if c != 'i':  # not global variable initialization?
                dl = line
                dlno = lno
                while True:
                    l = defn.readline()  # if so, parse semantics
                    lno += 1
                    if len(l) == 0:
                        print >> sys.stderr, '** unexpected EOF at', lno
                        return False
                    elif l[:2] == '__':  # end of semantic procedure?
                        break
                    elif l[:1] == '_':  # end of cognitive procedure?
                        p = genr
                    elif isNewRule(l):
                        defn.unreadline(l)
                        lno -= 1
                        print >> sys.stderr, '** no termination of semantic procedures'
                        print >> sys.stderr, '*  on or after line', dlno, '[' + dl + ']'
                        eno += 1
                        c = '?'
                        break
                    else:
                        p.append(l)  # add line to accumulating procedure

            if c == 'g':  # grammar rule?
                nor += 1
                dl = definitionLine.DefinitionLine(line)
                first = dl.nextInTail()
                if dl.isEmptyTail():
                    ru = self._doExtend(syms, dl.left,
                                        first)  # make 1-branch rule
                    if ru == None:
                        print >> sys.stderr, '*  on or after line', lno, '[', line, ']'
                        eno += 1
                        continue
                    ru.gens = self.d1bp  # default 1-branch procedure
                    nwy = 1
                else:
                    ru = self._doSplit(syms, dl.left, first,
                                       dl.nextInTail())  # 2-branch rule
                    if ru == None:
                        print >> sys.stderr, '*  on or after line', lno, '[', line, ']'
                        eno += 1
                        continue
                    ru.gens = self.d2bp  # default 2-branch procedure
                    nwy = 2
                ru.cogs = compile(syms, 'c', cogn, nwy)  # compile semantics
                if len(genr) > 0:  # generative procedure defined?
                    ru.gens = compile(syms, 'g',
                                      genr)  # if so, replace default
                if ru.cogs == None or ru.gens == None:
                    print >> sys.stderr, '** ERROR g: [', line, ']'
                    eno += 1
                    continue
            elif c == 'd':  # internal dictionary entry?
                now += 1
                dl = definitionLine.DefinitionLine(line)
                #               print 'len=' , len(dl.left) , type(dl.left) , dl.left
                dllf = dl.left
                #               print 'len=' , len(dllf) , type(dllf) , dllf
                try:
                    ss = syntaxSpecification.SyntaxSpecification(syms, dl.tail)
                except ellyException.FormatFailure:
                    print >> sys.stderr, '** ERROR d: [', line, ']'
                    eno += 1
                    continue
                ru = grammarRule.ExtendingRule(ss.catg, ss.synf.positive)
                ru.cogs = compile(syms, 'c', cogn)
                if len(genr) > 0:  # generative procedure defined?
                    ru.gens = compile(syms, 'g', genr)  # if so, compile it
                else:
                    ru.gens = compile(syms, 'g',
                                      ['obtain'])  # otherwise, compile default
                if not dllf in self.dctn:  # make sure word is in dictionary
                    self.dctn[dllf] = []  #
                self.dctn[dllf].append(ru)  # add rule to dictionary
                if ru.cogs == None or ru.gens == None:
                    print >> sys.stderr, '** ERROR d: [', line, ']'
                    eno += 1
                    continue
            elif c == 'p':  # semantic subprocedure?
                k = line.find(' ')  # name should have no spaces
                if k > 0 or len(genr) == 0:
                    print >> sys.stderr, '** ERROR p: bad format [', line, ']'
                    eno += 1
                    continue
                if line in self.pndx:
                    print >> sys.stderr, '** ERROR p: subprocedure', line, 'redefined'
                    eno += 1
                    continue
                nop += 1
                self.pndx[line] = compile(syms, 'g',
                                          genr)  # compile generative procedure
            elif c == 'i':  # global variable initialization?
                k = line.find('=')
                if k <= 0:
                    print >> sys.stderr, '** bad initialization:', '[' + line + ']'
                    eno += 1
                    continue
                vr = line[:k].strip().lower()
                va = line[k + 1:].lstrip()
                self.initzn.append([vr, va])  # add initialization
            else:
                print >> sys.stderr, '** unknown rule type=', c + ':'
                print >> sys.stderr, '*  on or after line', lno, '[' + line + ']'
                eno += 1
                continue

#       print 'SKIP' , skp
        if skp > 0:
            print >> sys.stderr, '**', skp, 'uninterpretable input lines skipped'
            eno += skp

        if eno > 0:
            print >> sys.stderr, '**', eno, 'grammar errors in all'
            return False

        print "added"
        print NBSP + '{0:4} grammar rules'.format(nor)
        print NBSP + '{0:4} dictionary rules'.format(now)
        print NBSP + '{0:4} procedures'.format(nop)
        return True
示例#6
0
    def __init__(self, syms, dfls):
        """
        initialization

        arguments:
            self  -
            syms  - Elly grammatical symbol table
            dfls  - definition elements in list

        exceptions:
            FormatFailure on error
        """

        #       print 'dfls=' , dfls
        ne = len(dfls)
        #       print 'ne=' , ne
        if 3 > ne or ne > 5:  # must have 3 to 5 elements
            raise ellyException.FormatFailure
        else:
            if dfls[0] == '\\0':
                self.patn = u'\x00'  # special nul pattern
            elif ellyWildcard.numSpaces(list(dfls[0])) > 0:
                print >> sys.stderr, '** link pattern includes space:', dfls[0]
                raise ellyException.FormatFailure
            else:
                #               print 'do conversion'
                self.patn = ellyWildcard.convert(
                    dfls[0])  # encode Elly pattern
#           print 'patn=' , self.patn
            if dfls[0] != '$':
                if self.patn == None or ellyWildcard.minMatch(self.patn) == 0:
                    print >> sys.stderr, '** bad link pattern:', dfls[0]
                    raise ellyException.FormatFailure
#               print 'appended patn=' , list(self.patn) , '=' , len(self.patn)

            lastat = dfls[-1]
            self.catg = None  # defaults
            self.synf = None  #
            self.semf = None  #
            self.bias = 0  #
            sss = dfls[1].lower()  # assumed not to be Unicode
            #           print 'sss=' , sss
            if sss != '-':  # allow for no category
                syx = syntaxSpecification.SyntaxSpecification(syms, sss)
                if syx != None:
                    if not lastat in ['-1', '-2'
                                      ]:  # not a stop state for matching
                        raise ellyException.FormatFailure  # cannot have syntax here
                    self.catg = syx.catg  # syntactic category
                    self.synf = syx.synf.positive  # syntactic features

            if ne > 3:
                if lastat != '-1':  # not a stop state for matching
                    raise ellyException.FormatFailure  # cannot have semantics here
                sss = None if dfls[2] == '-' else dfls[2].lower()
            else:
                sss = None
#           print 'semantic features=' , sss
            sem = featureSpecification.FeatureSpecification(syms, sss, True)
            self.semf = sem.positive  # get semantic features
            #           print 'semf=' , self.semf

            if ne > 4:
                try:
                    self.bias = int(dfls[3])
                except ValueError:
                    raise ellyException.FormatFailure  # unrecognizable bias

            try:
                n = int(lastat)  # next state for link
            except ValueError:
                raise ellyException.FormatFailure  # unrecognizable number

#           print 'transition=' , n

            if n < 0:  # final transition?
                if self.patn == u'\x00':
                    raise ellyException.FormatFailure  # final state not allowed here
                if n == -1:
                    pe = self.patn[-1]  # if so, get last pattern element
                    if (pe != ellyWildcard.cALL
                            and  # final pattern must end with * or $
                            pe != ellyWildcard.cEND):
                        self.patn += ellyWildcard.cEND  # default is $
                        print >> sys.stderr, '** final $ added to pattern', list(
                            self.patn)

            self.nxts = n  # specify next state