Пример #1
0
    def __init__ ( self , syms , fets=None , semantic=False ):

        """
        initialization

        arguments:
            self     -
            syms     - symbol table
            fets     - string representation of feature set
            semantic - flag for semantic features

        exceptions:
            FormatFailure on error
        """

        if syms == None or fets == None:  # special case generating zero feature set
            self.positive = ellyBits.EllyBits(symbolTable.FMAX)
            self.negative = ellyBits.EllyBits(symbolTable.FMAX)
            self.id = ''
            return

        segm = fets.lower()
#       print "features=",segm,"semantic=",semantic
        if segm == None or len(segm) < 3 or segm[0] != '[' or segm[-1] != ']':
            raise ellyException.FormatFailure
        elif segm[1] == ' ' or ellyChar.isLetterOrDigit(segm[1]) or segm[1] == '*':
            raise ellyException.FormatFailure
        else:
            self.id = segm[1]
#           print "id=",self.id
            fs = syms.getFeatureSet(segm[1:-1] , semantic)
#           print 'fs=' , str(fs[0]) , ',' , str(fs[1])
            if fs == None:
                raise ellyException.FormatFailure
            self.positive , self.negative = fs
Пример #2
0
 def __init__(self):
     """
     initialize node to defaults
     arguments:
         self
     """
     self.synf = ellyBits.EllyBits(symbolTable.FMAX)
     self.semf = ellyBits.EllyBits(symbolTable.FMAX)
     self.seqn = -1
     self.reset()
Пример #3
0
    def __init__(self, syms):
        """
        initialization

        arguments:
            self  -
            syms  - Elly symbol table

        exceptions:
            FormatFailure on error
        """

        self.catg = syms.getSyntaxTypeIndexNumber(category)
        self.synf = None
        self.semf = None
        self.hpnc = {}
        brkg = _FS(syms, '[' + sID + sBRK + ']', True)
        zero = ellyBits.EllyBits()
        #       print ( 'smfs=' , smfs )
        for sky in smfs.keys():  # predefine semantic features for punctuation
            #           print ( 'sky=' , sky , ' : ' , smfs[sky] )
            smfs[sky] = _FS(syms, smfs[sky], True)
        for defn in defns:  #           syntactic
            pc = defn[0]
            if len(defn) > 1:
                sxf = _FS(syms, defn[1])
                smf = smfs[pc] if pc in smfs else brkg if len(
                    defn) > 2 else zero
                self.hpnc[pc] = [sxf, smf]
            else:
                self.hpnc[pc] = [zero, zero]
Пример #4
0
    def addGoalPositions(self, n=10):
        """
        extend goal lists and goal bits

        arguments:
            self  -
            n     - how many new positions to add
        """

        #       print 'add goals, ntyp=' , self.ntyp
        for _ in range(n):
            self.goal.append([])
            self.gbits.append(ellyBits.EllyBits(self.ntyp))
Пример #5
0
    def __init__(self, nmax):
        """
        initialization

        arguments:
            self  -
            nmax  - how many syntactic types to encode
        """

        self.dm = []  # empty matrix initially
        for i in range(nmax):
            rw = ellyBits.EllyBits(nmax)  # get bit string
            rw.set(i)  # diagonalize matrix by row
            self.dm.append(rw)  # and save
Пример #6
0
    def __init__(self):
        """
        create environment for testing semantic procedure

        arguments:
            self
        """

        stb = symbolTable.SymbolTable()  # empty
        hry = conceptualHierarchy.ConceptualHierarchy()  # empty
        ctx = interpretiveContext.InterpretiveContext(stb, {}, {}, hry)
        self.context = ctx  # make available

        ptb = parseTreeBase.ParseTreeBase()  # just for generating phrases

        self.toknL = ellyToken.EllyToken(
            'uvwxxyz')  # insert dummy data that might
        self.toknR = ellyToken.EllyToken('abcdefg')  # be replaced from outside

        ctx.addTokenToListing(self.toknL)  # put a token in first position
        ctx.addTokenToListing(self.toknR)  # and a token in second

        x = ctx.syms.getSyntaxTypeIndexNumber(
            'x')  # for consistency, define two
        y = ctx.syms.getSyntaxTypeIndexNumber(
            'y')  # syntactic categories for rules

        fbs = ellyBits.EllyBits(symbolTable.FMAX)  # zero feature bits

        exL = grammarRule.ExtendingRule(x, fbs)  # dummy rules as a place for
        exR = grammarRule.ExtendingRule(x,
                                        fbs)  # attaching semantic procedures
        spl = grammarRule.SplittingRule(y, fbs)  # for testing

        # dummy semantic procedures
        gX = ["left", "right"]  # generative
        gL = ["obtain"]  #
        gR = ["obtain"]  #

        gP = ["append did it!"]  # for standalone generative subprocedure

        cX = []  # cognitive
        cL = [">> +1"]  #
        cR = [">> -1"]  #

        ctx.pushStack()  # needed for local variables usable in testing
        ctx.setLocalVariable(
            "vl", "LLLL")  # make two variables available to work with
        ctx.setLocalVariable("vr", "RRRR")  #

        ctx.setProcedure('do', self._genp(gP))  # define procedure 'do'

        exL.gens = self._genp(gL)  # assign semantic procedures to rules
        exL.cogs = self._cogp(cL)  #

        exR.gens = self._genp(gR)  #
        exR.cogs = self._cogp(cR)  #

        spl.gens = self._genp(gX)  #
        spl.cogs = self._cogp(cX)  #

        phr = ptb.makePhrase(0, spl)  # make phrase for splitting plus
        phr.krnl.lftd = ptb.makePhrase(0, exL)  # left and right descendants
        phr.krnl.rhtd = ptb.makePhrase(1, exR)  # defined by left and right
        # extending rules from above
        phr.ntok = 1

        stb.getFeatureSet('!one,two', True)  # define semantic feature
        print stb.smindx
        smx = stb.smindx['!']  #
        ix = smx['one']  #
        print 'ix=', ix
        phr.krnl.semf.set(ix)  # turn on feature for phrase
        ix = smx['two']  #
        print 'ix=', ix
        phr.krnl.semf.set(ix)  # turn on feature for phrase
        print 'semf=', phr.krnl.semf

        self.phrase = phr  # make phrase available
Пример #7
0
    def dumpAll(self):
        """
        dump all tree fragments (overrides superclass method)

        arguments:
            self  -
        """

        #       print ( ' all depth=' , self.dlm + 1 )
        if self.dlm < 0:
            return
        out.write('dump all\n\n')
        n = self.phlim - 1  # index of last node created
        while n >= 0:  # process until oldest node at n=0
            ph = self.phrases[n]  # get phrase
            if ph.dump:  # if not already dumped
                self.dumpTree(ph)  # dump subtree starting at current phrase
            n -= 1

        out.write('rules invoked and associated phrases\n')
        hr = {}  # association of rules with phrase nodes
        lm = N  # maximum number phrase nodes to report
        for k in range(self.phlim):
            ph = self.phrases[k]
            phno = ph.krnl.seqn
            rs = ph.krnl.rule.seqn
            #           print ( '!!!!  ' , phno , ':' , rs )
            if not rs in hr: hr[rs] = []
            hr[rs].append(phno)  # make association
#           print ( '!!!! =' , hr[rs] )
#       print ( len(hr) , 'distinct rules' )
#       print ( 'keys=' , hr.keys() )
        for rs in hr.keys():  # iterate on sequence numbers for rules found
            ls = hr[rs]
            if len(ls) > lm:
                ls = ls[:lm]
                ls.append('...')
            rssn = 'rule {:4d}:'.format(rs)
            out.write(rssn)  # report up to lm phrase nodes for rule
            # with this sequence number
            out.write(str(ls))
            out.write('\n')
        out.write('\n')

        wno = len(self.ctx.tokns)  # set to last parse position in input
        wn = wno
        while wn > 0 and len(self.goal[wn]) == 0:
            wn -= 1
        gs = self.goal[wn]  # goals at end of parsed input
        gl = len(gs)  # number of goals at last position

        out.write(str(gl) + ' final goals at position= ')
        out.write(str(wn) + ' / ' + str(wno) + '\n')
        for g in gs:
            fs = '{:4.4s}'.format(self.stb.getSyntaxTypeName(g.cat))
            grs = str(g)
            kn = grs.find(':') + 1
            out.write(NBSP + grs[:kn] + ' ' + fs + grs[kn:] + '\n')
        out.write("\n")
        out.write(str(self.phlim) + ' phrases altogether\n')
        out.write('\nambiguities\n')

        nph = self.phlim  # number of phrases allocated in parse
        phx = ellyBits.EllyBits(
            nph)  # to keep track of phrases listed as ambiguous
        nam = 0  # ambiguity count

        for i in range(nph):  # scan all phrases for ambiguities
            ph = self.phrases[i]
            if ph.alnk == None or phx.test(i):
                continue  # is this a new ambiguity?
            s = self.stb.getSyntaxTypeName(
                ph.krnl.typx)  # if so, show phrase info
            f = ph.krnl.synf.hexadecimal(
                False)  # convert to packed hexadecimal
            out.write("{0:<5.5s} {1}: ".format(
                s, f))  # show syntax type of ambiguity
            nam += 1
            while ph != None:
                phx.set(ph.krnl.seqn)  # mark phrase as scanned
                out.write("{:2d} ".format(
                    ph.krnl.seqn))  # show its sequence number
                bx = '-' if ph.krnl.rule.bias < 0 else '0'  # show rule bias in effect
                out.write("({0:+2d}/{1}) ".format(ph.krnl.bias, bx))
                ph = ph.alnk  # next phrase in ambiguity list
            out.write('\n')
        if nam == 0: out.write('NONE\n')

        self.showTokens(out)

        ng = self.glim
        out.write(str(nph) + ' phrases, ' + str(ng) + ' goals\n\n')
        out.flush()
Пример #8
0
    print('loading', '[' + base + name + '.g.elly]', len(rdr.buffer), 'lines')

    stbu = symbolTable.SymbolTable()
    gtbu = grammarTable.GrammarTable(stbu, rdr)
    ctxu = Ctx()
    tksu = ctxu.tokns

    tree = ParseTreeWithDisplay(stbu, gtbu, None, ctxu)
    print()
    print(tree)
    print()
    print(dir(tree))
    print()

    cat = stbu.getSyntaxTypeIndexNumber('num')
    fbs = ellyBits.EllyBits(symbolTable.FMAX)
    tree.addLiteralPhrase(cat, fbs)
    tree.digest()
    tksu.append(ellyToken.EllyToken('66'))
    tree.restartQueue()

    ws = ['nn', 'b', 'aj']  # from test.g.elly
    wu = ['ww', 'wx', 'wy', 'wz']  # unknown terms

    for w in ws:

        tree.createPhrasesFromDictionary(w, False, False)
        #       print ( '**** to' , tree.phlim , tree.lastph , 'rule=' , tree.lastph.krnl.rule.seqn )
        tree.digest()
        #       print ( '**** to' , tree.phlim , tree.lastph , 'rule=' , tree.lastph.krnl.rule.seqn )
        tksu.append(ellyToken.EllyToken(w))
Пример #9
0
 def __init__(self, m, n=0):
     self.styp = m
     self.sfet = ellyBits.EllyBits()
     self.rtyp = n
     self.seqn = 10000
Пример #10
0
    def getFeatureSet ( self , fs , ty=False ):

        """
        get feature indices associated with given names in given set

        arguments:
            self  -
            fs    - feature set without enclosing brackets
            ty    - False=syntactic, True=semantic

        returns:
            list of EllyBits [ positive , negative ] on success, None on failure
        """

        if len(fs) < 1: return None
#       print ( 'fs=' , fs )

        bp = ellyBits.EllyBits(FMAX) # all feature bits zeroed
        bn = ellyBits.EllyBits(FMAX) #

        fsx = self.smindx if ty else self.sxindx
#       print ( '--------  fs=' , fs )
        fid = fs[0]                  # feature set ID
        fnm = fs[1:].split(',')      # feature names
        if not fid in fsx:           # known ID?
#           print ( 'new feature set' )
            d = { }                  # new dictionary of feature names
            if ty:
                d['*c'] = 0          # always define '*c' as semantic  feature
                d['*capital'] = 0    # equivalent to '*c'
            else:
                d['*r'] = 0          # always define '*r' as syntactic feature
                d['*right'] = 0      # equivalent to '*r'
                d['*l'] = 1          # always define '*l'
                d['*left']  = 1      # equivalent to '*l'
                d['*x'] = LAST       # always define '*x'
                d['*u'] = LAST       # always define '*u'
                d['*unique'] = LAST  # equivalent to '*u' and '*x'
            fsx[fid] = d             # make new feature set known
        h = fsx[fid]                 # for hashing of feature names
        if len(fnm) == 0:            # check for empty features
            return [ bp , bn ]
        for nm in fnm:
            nm = nm.strip()
            if len(nm) == 0: continue
            if nm[0] == '-':         # negative feature?
                b = bn               # if so, look at negative bits
                nm = nm[1:]
            elif nm[0] == '+':       # positive feature?
                b = bp               # if so, look at positive bits
                nm = nm[1:]
            else:
                b = bp               # positive bits by default

#           print ( '--------  nm=' , nm )
            nmc = nm if nm[0] != '*' else nm[1:]
            for c in nmc:            # check feature name chars
                if not ellyChar.isLetterOrDigit(c):
                    print ( 'bad feature name=' , nm , file=sys.stderr )
                    return None
            if not nm in h:          # new name in feature set?
                if nm[0] == '*':     # user cannot define reserved name
                    print ( 'unknown reserved feature=' , nm , file=sys.stderr )
                    return None
#               print ( 'define new feature' )
                k = len(h)           # yes, this will be next free index
                l = FMAX             # upper limit on feature index
                if ty:               # semantic feature?
                    k -= 1           # if so, adjust for extra name *C
                else:
                    k -= 5           # else,  adjust for *UNIQUE and extra names *L, *R , *U , *X
                    l -= 1           #        adjust upper limit for *UNIQUE
                if k == l:           # overflow check
                    print ( '** ERROR: too many feature names, fid=',fid,'nm=',nm , file=sys.stderr )
                    print ( '**' , end=' ' , file=sys.stderr )
                    print ( h.keys() , file=sys.stderr )
                    return None
                if k < 0:
                    print ( 'bad index=' , k , 'l=' , l , file=sys.stderr )
                    return None
                h[nm] = k            # define new feature
#               print ( 'k=' , k )

#           print ( 'set bit' , h[nm] , 'for' , fid + nm )
            b.set(h[nm])             # set bit for feature
        return [ bp , bn ]
Пример #11
0
to be subclassed further
"""

import sys
import ellyBits
import ellyDefinitionReader
import ellyConfiguration
import grammarRule
import symbolTable
import cognitiveProcedure
import generativeProcedure
import parseTreeBase

NPOSNS = 128  # nominal minimum number of tree leaf nodes

ZEROfs = ellyBits.EllyBits(symbolTable.FMAX)


class ParseTreeBottomUp(parseTreeBase.ParseTreeBase):
    """
    parse tree plus supporting structures for table-driven
    bottom-up  parsing algorithm

    attributes:
        newph  - unique new phrases at each position
        ambig  - accumulate ambiguous phrases for reporting
        queue  - of phrases yet to process

        gtb    - basic syntax and internal dictionary
        ptb    - syntax type patterns
Пример #12
0
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# -----------------------------------------------------------------------------
"""
runs extraction methods and generates phrases
"""

import ellyBits
import ellyChar
import ellyConfiguration
import syntaxSpecification
import featureSpecification

noBits = ellyBits.EllyBits()


class EntityExtractor(object):
    """
    handler for information extraction (IE)

    attributes:
        ptr   - parse tree for extracted information
        sym   - saved symbol table
        exs   - extraction procedure list
    """
    def __init__(self, ptr, sym):
        """
        initialization
Пример #13
0
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# -----------------------------------------------------------------------------
"""
classes for syntax rules with associated semantics
"""

import featureSpecification
import ellyBits

_dfrs = ellyBits.EllyBits()
_dfrs.complement()


class BasicRule(object):
    """
    basic rule structure

    attributes:
        cogs  - cognitive  semantics
        gens  - generative semantics
        styp  - syntactic type produced by rule
        sfet  - syntactic features to set
        sftr  -                    to reset
        bias  - for rule ordering in ambiguity handling
        nmrg  - to indicate degree of merging by rule (1 or 2)
Пример #14
0
    def __init__(self, syms, defn=None):
        """
        initialization

        arguments:
            self  -
            syms  - symbol table for grammar
            defn  - EllyDefinitionReader grammar definition

        exceptions:
            TableFailure on error
        """

        self.initzn = []  # preset global variables
        self.proc = {}  # named semantic procedures
        self.dctn = {}  # builtin words and semantics
        self.pndx = {}  # standalone procedures
        self.extens = []  # 1-branch rule
        self.splits = []  # 2-branch rule
        for _ in range(symbolTable.NMAX):
            self.extens.append(
                [])  # list of 1-branch rules for each syntax type
            self.splits.append(
                [])  # list of 2-branch rules for each syntax type`

        self.mat = derivabilityMatrix.DerivabilityMatrix(symbolTable.NMAX)

        # coding of predefined syntax types

        self.START = syms.getSyntaxTypeIndexNumber('sent')
        self.END = syms.getSyntaxTypeIndexNumber('end')
        self.UNKN = syms.getSyntaxTypeIndexNumber('unkn')
        self.SEPR = syms.getSyntaxTypeIndexNumber('sepr')
        self.XXX = syms.getSyntaxTypeIndexNumber('...')

        # special rule for ... type going to null

        fets = ellyBits.EllyBits(symbolTable.FMAX)
        self.arbr = grammarRule.ExtendingRule(self.XXX, fets)
        self.arbr.cogs = None
        self.arbr.gens = compile(syms, 'g', [])

        # special rule for SENT->SENT END

        ru = grammarRule.SplittingRule(self.START, fets)
        ru.rtyp = self.END
        ru.ltfet = ru.rtfet = ellyBits.join(fets, fets)
        ru.cogs = None
        ru.gens = None
        self.splits[self.START].append(ru)

        # special rule for RS (ASCII record separator)

        ru = grammarRule.ExtendingRule(self.SEPR, fets)
        ru.cogs = None
        ru.gens = None
        self.dctn[ellyChar.RS] = [ru]  # should be only rule here ever

        # predefined generative semantic procedures

        self.pndx['defl'] = compile(syms, 'g', ['left'])
        self.pndx['defr'] = compile(syms, 'g', ['right'])
        self.pndx['deflr'] = compile(syms, 'g', ['left', 'right'])

        self.d1bp = self.pndx['defl']  # default 1-branch generative semantics
        self.d2bp = self.pndx['deflr']  # default 2-branch

        if defn != None:
            if not self.define(syms, defn):
                print >> sys.stderr, 'grammar table definition FAILed'
                raise ellyException.TableFailure
Пример #15
0
 def __init__(self, n):
     """ initialization
     """
     self.styp = n
     self.sfet = ellyBits.EllyBits()
     self.cogs = None
Пример #16
0
 def __init__(self):
     """ initialization
     """
     self.catg = 0
     self.synf = ellyBits.EllyBits(symbolTable.FMAX)
Пример #17
0
    def __init__(self, dta):
        """
        initialization of vocabulary object from retrieved record

        arguments:
            self  -
            dta   - what DB support returns

        throws:
            FormatFailure on error
        """

        self._ln = 0
        rec = dta[1]  # data record found for search key
        #       print ( 'voc rec=' , rec , file=sys.stderr )
        r = rec.split('=:')  # split off term in data record
        if len(r) <= 1: return  # the '=:' is mandatory
        d = r[1].strip().split(' ')  # definition is right of '=:'
        #       print ( 'VEntry: define as' , d , file=sys.stderr )
        if len(d) < 4: return  # it should have at least 4 parts
        ur = r[0].strip()  # term left of '=:'
        self.chs = list(ur)  # save it
        self.cat = int(d.pop(0))  # syntactic category
        #       print ( '    full term=' , ''.join(self.chs) , file=sys.stderr )
        sy = d.pop(0)
        nb = len(sy) * 4
        self.syf = ellyBits.EllyBits(nb)  # allocate bits
        self.syf.reinit(sy)  # set syntactic features
        sm = d.pop(0)
        nb = len(sm) * 4
        self.smf = ellyBits.EllyBits(nb)  # allocate bits
        self.smf.reinit(sm)  # set semantic  features
        self.bia = int(d.pop(0))  # save initial plausibility
        if len(d) > 0:  # any concept?
            self.con = d.pop(0).upper()  # if so, save it
        else:
            self.con = '-'

#       print ( '    translation=' , d , file=sys.stderr )

        if len(d) == 0:  # no further definition?
            self.gen = obtnp  # if so, then use default procedure
            self._nt = 0  #   i.e. no translation
        elif d[0][0] == '=':  # simple translation?
            dfs = ' '.join(d)  # just in case translation had spaces
            #           print ( 'def ext=' , dfs , file=sys.stderr )
            pls = ['append ' + dfs[1:]]
            inpts = ellyDefinitionReader.EllyDefinitionReader(pls)
            self.gen = generativeProcedure.GenerativeProcedure(None, inpts)
            self._nt = 1
        elif d[0][0] == '(':  # get predefined procedure
            inpts = ellyDefinitionReader.EllyDefinitionReader([d[0]])
            self.gen = generativeProcedure.GenerativeProcedure(None, inpts)
            self._nt = 0
        else:  # otherwise, set for selection of translation
            #           print ( 'multi selection, d=' , d , file=sys.stderr )
            cm = 'pick LANG ('  # construct instruction to select
            for p in d:
                if p[-1] == ',':
                    p = p[:-1]
                cm += p + '#'  # build selection clauses
            cm += ')'
            gens[0] = cm  # replace action
            #           print ( 'cm=' , cm )
            #           print ( 'gens=' , gens )
            inpts = ellyDefinitionReader.EllyDefinitionReader(gens)
            self.gen = generativeProcedure.GenerativeProcedure(None, inpts)
            if self.gen == None:
                print('vocabulary generative semantic failure',
                      file=sys.stderr)
                print('gens=', gens, file=sys.stderr)
                raise ellyException.FormatFailure
#           print ( 'vocabulary gen.logic' )
#           generativeDefiner.showCode(self.gen.logic)
            self._nt = len(d)

        self._ln = len(self.chs)