Exemplo n.º 1
0
    def __init__ ( self , system , create , rid ):

        """
        load all definitions from binary or text files

        arguments:
            self     -
            system   - which PyElly application
            create   - whether to create new binary
            rid      - PyElly release ID

        exceptions:
            TableFailure on error
        """

        super(Grammar,self).__init__()

        self.rls = rid
        sysf = system + grammar

        if create:
            print "recompiling grammar rules"

            self.stb = symbolTable.SymbolTable()  # new empty table to fill in

            el = [ ]

            try:
                self.mtb = macroTable.MacroTable(self.inpT(system,'m'))
            except ellyException.TableFailure:
                el.append('macro')
            try:
                self.gtb = grammarTable.GrammarTable(self.stb,self.inpT(system,'g'))
                self.stb.setBaseSymbols()
            except ellyException.TableFailure:
                el.append('grammar')
            try:
                self.ptb = patternTable.PatternTable(self.stb,self.inpT(system,'p'))
            except ellyException.TableFailure:
                el.append('pattern')

            try:
                self.hry = conceptualHierarchy.ConceptualHierarchy(self.inpT(system,'h'))
            except ellyException.TableFailure:
                el.append('concept')

            try:
                self.ntb = nameTable.NameTable(self.inpT(system,'n'))
            except ellyException.TableFailure:
                el.append('name')

            sa = self.inpT(system,'stl')
            pa = self.inpT(system,'ptl')
            try:
                self.man = morphologyAnalyzer.MorphologyAnalyzer(sa,pa)
            except ellyException.TableFailure:
                el.append('morphology')

            if len(el) > 0:
                print >> sys.stderr , 'rule FAILures on' , el
                raise ellyException.TableFailure

            if self.rls != None:
                ellyPickle.save(self,sysf)

        else:
            print "loading saved grammar rules from" , sysf

            gram = ellyPickle.load(sysf)
            if gram == None:
                raise ellyException.TableFailure
            if gram.rls != rid:
                print >> sys.stderr , 'inconsistent PyElly version for saved rules'
                sys.exit(1)
            self.stb = gram.stb  # copy in saved language definition objects
            self.mtb = gram.mtb  #
            self.gtb = gram.gtb  #
            self.ptb = gram.ptb  #
            self.ntb = gram.ntb  #
            self.hry = gram.hry  #
            self.man = gram.man  #
Exemplo n.º 2
0
    def __init__ ( self , system , restore=None ):

        """
        initialization

        arguments:
            system   - root name for PyElly tables to load
            restore  - name of session to continue
        """

        nfail = 0          # error count for reporting
        self.rul = None

        self.gundef = [ ]  # record orphan symbols by module
        self.vundef = [ ]  #
        self.pundef = [ ]  #
        self.eundef = [ ]  #

#       print 'EllyBase.__init__()'
#       aid = './' + system
#       try:
#           print 'a rul time=' , _timeModified(aid,rules)
#           print 'a voc time=' , _timeModified(aid,vocabulary)
#       except:
#           print '\n**** a rul or voc time exception'

        sysf = system + rules
        redefine = not _isSaved(system,rules,_rules)
#       print '0 redefine=' , redefine
        try:
            self.rul = ellyDefinition.Grammar(system,redefine,release)
        except ellyException.TableFailure:
            nfail += 1
        if nfail == 0:
            self.gundef = self.rul.stb.findUnknown()
            if redefine:
                ellyPickle.save(self.rul,sysf)

#       try:
#           print 'b rul time=' , _timeModified(aid,rules)
#           print 'b voc time=' , _timeModified(aid,vocabulary)
#       except:
#           print '\n**** b rul or voc time exception'

#       print '1 redefine=' , redefine
        if restore != None:
            self.ses = ellyPickle.load(restore + '.' + system + _session)
        else:
            self.ses = ellySession.EllySession()

        s = self.ses  # session info
        d = self.rul  # language rules

#       print '0:' , len(d.stb.ntname) , 'syntactic categories'

        mtb = d.mtb if d != None else None
        self.sbu = substitutionBuffer.SubstitutionBuffer(mtb)

        try:
            inflx = self.sbu.stemmer
#           print 'inflx=' , inflx
        except AttributeError:
            inflx = None
#       print 'inflx=' , inflx
        if d != None:
            d.man.suff.infl = inflx   # define root restoration logic

#       print '2 redefine=' , redefine
        if not redefine:
            if not _isSaved(system,vocabulary,_vocabulary) or _notVocabularyToDate(system):
                redefine = True

        stb = d.stb if d != None else symbolTable.SymbolTable()

#       print self.rul.stb
#       print stb

        if nfail > 0:
            print >> sys.stderr , 'exiting: table generation FAILures'
            sys.exit(1)

#       print '1:' , len(stb.ntname) , 'syntactic categories'

        self.ctx = interpretiveContext.InterpretiveContext(stb,d.gtb.pndx,s.globals,d.hry)

        for z in d.gtb.initzn:        # initialize global symbols for parsing
            self.ctx.glbls[z[0]] = z[1]

#       print '2:' , len(stb.ntname) , 'syntactic categories'

        self.pnc = punctuationRecognizer.PunctuationRecognizer(stb)
        self.pundef = stb.findUnknown()

#       print '3:' , len(stb.ntname) , 'syntactic categories'

        nto = len(stb.ntname)         # for consistency check

        if noParseTree:
            self.ptr = NoParseTree(stb,d.gtb,d.ptb,self.ctx)
        elif ellyConfiguration.treeDisplay:
            self.ptr = parseTreeWithDisplay.ParseTreeWithDisplay(stb,d.gtb,d.ptb,self.ctx)
        else:
            self.ptr = parseTree.ParseTree(stb,d.gtb,d.ptb,self.ctx)

        ntabl = d.ntb

        if ntabl != None and ntabl.filled():
            nameRecognition.setUp(ntabl)
            ellyConfiguration.extractors.append( [ nameRecognition.scan , 'name' ] )

        self.iex = entityExtractor.EntityExtractor(self.ptr,stb) # set up extractors

        self.eundef = stb.findUnknown()

        if ellyConfiguration.rewriteNumbers:
            self.trs = simpleTransform.SimpleTransform()
        else:
            self.trs = None           # no automatic conversion of written out numbers

#       print '4:' , len(stb.ntname) , 'syntactic categories'

#       print '3 redefine=' , redefine
        if redefine: print 'recompiling vocabulary rules'
        try:
            voc = ellyDefinition.Vocabulary(system,redefine,stb)
        except ellyException.TableFailure:
            voc = None
            nfail += 1

        if ellyConfiguration.treeDisplay:
            print "tree display ON"
        else:
            print "tree display OFF"

#       try:
#           print 'c rul time=' , _timeModified(aid,rules)
#           print 'c voc time=' , _timeModified(aid,vocabulary)
#       except:
#           print 'rul or voc time exception'

#       print 'vundef=' , self.vundef
        if voc != None: self.vtb = voc.vtb
        self.vundef = stb.findUnknown()
#       print 'vundef=' , self.vundef

        ntn = len(stb.ntname)         # do consistency check on syntactic category count
        if nto != ntn:
            print >> sys.stderr , ''
            print >> sys.stderr , 'WARNING: grammar rules should predefine all syntactic categories'
            print >> sys.stderr , '         referenced in language definition files'
            for i in range(nto,ntn):
                print >> sys.stderr , '        ' , stb.ntname[i].upper() , '=' , i
            print >> sys.stderr , ''

        if nfail > 0:
            print >> sys.stderr , 'exiting: table generation FAILures'
            sys.exit(1)

        sys.stderr.flush()
Exemplo n.º 3
0
    def __init__(self, system, restore=None):
        """
        initialization

        arguments:
            system   - root name for PyElly tables to load
            restore  - name of session to continue
        """

        nfail = 0  # error count for reporting
        self.rul = None

        self.gundef = []  # record orphan symbols by module
        self.vundef = []  #
        self.pundef = []  #
        self.eundef = []  #

        #       print ( 'EllyBase.__init__()' )
        #       aid = './' + system
        #       try:
        #           print ( 'a rul time=' , _timeModified(aid,rules) )
        #           print ( 'a voc time=' , _timeModified(aid,vocabulary) )
        #       except:
        #           print ( '\n**** a rul or voc time exception' )

        sysf = system + rules
        redefine = not _isSaved(system, rules, _rules)
        #       print ( '0 redefine=' , redefine )
        try:
            self.rul = ellyDefinition.Grammar(system, redefine, release)
        except ellyException.TableFailure:
            nfail += 1
        if nfail == 0:
            self.gundef = self.rul.stb.findUnknown()
            if redefine:
                ellyPickle.save(self.rul, sysf)

#       try:
#           print ( 'b rul time=' , _timeModified(aid,rules) )
#           print ( 'b voc time=' , _timeModified(aid,vocabulary) )
#       except:
#           print ( '\n**** b rul or voc time exception' )

#       print ( '1 redefine=' , redefine )
        if restore != None:
            self.ses = ellyPickle.load(restore + '.' + system + _session)
        else:
            self.ses = ellySession.EllySession()

        s = self.ses  # session info
        d = self.rul  # language rules

        #       print ( '0:' , len(d.stb.ntname) , 'syntactic categories' )

        #       print ( 'base language=' , ellyConfiguration.language )
        mtb = d.mtb if d != None else None
        self.sbu = substitutionBuffer.SubstitutionBuffer(mtb)

        try:
            inflx = self.sbu.stemmer
#           print ( 'inflx=' , inflx )
        except AttributeError:
            inflx = None
#       print ( 'inflx=' , inflx )
        if d != None:
            d.man.suff.infl = inflx  # define root restoration logic

#       print ( '2 redefine=' , redefine )
        if not redefine:
            if not _isSaved(system, vocabulary,
                            _vocabulary) or _notVocabularyToDate(system):
                redefine = True

        stb = d.stb if d != None else symbolTable.SymbolTable()

        #       print ( self.rul.stb )
        #       print ( stb )

        if nfail > 0:
            print('exiting: table generation FAILures', file=sys.stderr)
            sys.exit(1)

#       print ( '1:' , len(stb.ntname) , 'syntactic categories' )

        self.ctx = extendedContext.ExtendedContext(stb, d.gtb.pndx, s.globals,
                                                   d.hry)

        for z in d.gtb.initzn:  # initialize global symbols for parsing
            self.ctx.glbls[z[0]] = z[1]

#       print ( '2:' , len(stb.ntname) , 'syntactic categories' )

        self.pnc = punctuationRecognizer.PunctuationRecognizer(stb)
        self.pundef = stb.findUnknown()

        #       print ( '3:' , len(stb.ntname) , 'syntactic categories' )

        nto = len(stb.ntname)  # for consistency check

        if noParseTree:
            self.ptr = NoParseTree(stb, d.gtb, d.ptb, self.ctx)
        elif ellyConfiguration.treeDisplay:
            self.ptr = parseTreeWithDisplay.ParseTreeWithDisplay(
                stb, d.gtb, d.ptb, self.ctx)
        else:
            self.ptr = parseTree.ParseTree(stb, d.gtb, d.ptb, self.ctx)

        ntabl = d.ntb

        if ntabl != None and ntabl.filled():
            nameRecognition.setUp(ntabl)
            ellyConfiguration.extractors.append([nameRecognition.scan, 'name'])

        self.iex = entityExtractor.EntityExtractor(self.ptr,
                                                   stb)  # set up extractors

        self.eundef = stb.findUnknown()

        if ellyConfiguration.rewriteNumbers:
            self.trs = simpleTransform.SimpleTransform()
        else:
            self.trs = None  # no automatic conversion of written out numbers

#       print ( '4:' , len(stb.ntname) , 'syntactic categories' )

#       print ( '3 redefine=' , redefine )
        if redefine: print('recompiling vocabulary rules')
        try:
            voc = ellyDefinition.Vocabulary(system, redefine, stb)
        except ellyException.TableFailure:
            voc = None
            nfail += 1

        if ellyConfiguration.treeDisplay:
            print("tree display ON")
        else:
            print("tree display OFF")

#       try:
#           print ( 'c rul time=' , _timeModified(aid,rules) )
#           print ( 'c voc time=' , _timeModified(aid,vocabulary) )
#       except:
#           print ( 'rul or voc time exception' )

#       print ( 'vundef=' , self.vundef )
        if voc != None: self.vtb = voc.vtb
        self.vundef = stb.findUnknown()
        #       print ( 'vundef=' , self.vundef )

        ntn = len(
            stb.ntname)  # do consistency check on syntactic category count
        if nto != ntn:
            print(file=sys.stderr)
            print(
                'WARNING: grammar rules should predefine all syntactic categories',
                file=sys.stderr)
            print('         referenced in language definition files',
                  file=sys.stderr)
            for i in range(nto, ntn):
                print('        ',
                      stb.ntname[i].upper(),
                      '=',
                      i,
                      file=sys.stderr)
            print(file=sys.stderr)

        if nfail > 0:
            print('exiting: table generation FAILures', file=sys.stderr)
            sys.exit(1)

        sys.stderr.flush()
Exemplo n.º 4
0
    def __init__(self, system, create, rid=None):
        """
        load all definitions from binary or text files

        arguments:
            self     -
            system   - which PyElly application
            create   - whether to create new binary
            rid      - PyElly release ID

        exceptions:
            TableFailure on error
        """

        super(Grammar, self).__init__()

        self.rls = rid
        sysf = system + grammar

        if create:
            print("recompiling grammar rules")

            self.stb = symbolTable.SymbolTable()  # new empty table to fill in

            el = []

            try:
                self.mtb = macroTable.MacroTable(self.inpT(system, 'm'))
            except ellyException.TableFailure:
                el.append('macro')
            try:
                self.gtb = grammarTable.GrammarTable(self.stb,
                                                     self.inpT(system, 'g'))
                self.stb.setBaseSymbols()
            except ellyException.TableFailure:
                el.append('grammar')
            try:
                self.ptb = patternTable.PatternTable(self.stb,
                                                     self.inpT(system, 'p'))
            except ellyException.TableFailure:
                el.append('pattern')

            try:
                self.hry = conceptualHierarchy.ConceptualHierarchy(
                    self.inpT(system, 'h'))
            except ellyException.TableFailure:
                el.append('concept')

            try:
                self.ntb = nameTable.NameTable(self.inpT(system, 'n'))
            except ellyException.TableFailure:
                el.append('name')

            try:
                self.ctb = compoundTable.CompoundTable(self.stb,
                                                       self.inpT(system, 't'))
            except ellyException.TableFailure:
                el.append('compound')

            sa = self.inpT(system, 'stl')
            pa = self.inpT(system, 'ptl')
            try:
                self.man = morphologyAnalyzer.MorphologyAnalyzer(sa, pa)
            except ellyException.TableFailure:
                el.append('morphology')

            if len(el) > 0:
                print('rule FAILures on', el, file=sys.stderr)
                raise ellyException.TableFailure

            if self.rls != None:
                ellyPickle.save(self, sysf)

        else:
            print("loading saved grammar rules from", sysf)

            gram = ellyPickle.load(sysf)
            if gram == None:
                raise ellyException.TableFailure
            if gram.rls != rid:
                print('inconsistent PyElly version for saved rules',
                      file=sys.stderr)
                sys.exit(1)
            self.stb = gram.stb  # copy in saved language definition objects
            self.mtb = gram.mtb  #
            self.gtb = gram.gtb  #
            self.ptb = gram.ptb  #
            self.ctb = gram.ctb  #
            self.ntb = gram.ntb  #
            self.hry = gram.hry  #
            self.man = gram.man  #