示例#1
0
class Parser(object):
    '''
    Parser class.

    >>> p = Parser()
    >>> print type(p)
    <class 'parser.Parser'>

    # test exceptions
    >>> p.tokenize()
    Traceback (most recent call last):
        raise ParserError, 'cannot tokenize empty sentence'
    ParserError: cannot tokenize empty sentence
    >>> 

    >>> p.getParseList()
    Traceback (most recent call last):
        raise ParserError, 'cannot parse empty sentence'
    ParserError: cannot parse empty sentence
    >>> 


    >>> p.setSentence('I move quickly East')
    '''
    def __init__(self, sentence=''):

        grammar = Grammar()
        self.grammar = grammar.grammar
        self.parser = None
        if sentence:
            self.setSentence(sentence)
        else:
            self.sentence = sentence

    def tokenize(self):
        
        from copy import copy
        if not self.sentence:
            raise ParserError, 'cannot tokenize empty sentence'

        _sentence = tokenizer.Token(TEXT=self.sentence)
        tokenizer.WhitespaceTokenizer().tokenize(_sentence)
        self._sentence = _sentence

    def setParser(self):
        
        self.parser = Parser(self.grammar)

    def getParser(self):

        if not self.parser:
            self.setParser()

        return self.parser

    # XXX getParseList is broken right now...
    def getParseList(self):

        if not self.sentence:
            raise ParserError, 'cannot parse empty sentence'

        if not self.parser:
            self.setParser()

        return self.parser.get_parser_list(self._sentence)

    def setSentence(self, sentence=''):

        self.sentence = sentence
        self.tokenize()
示例#2
0
 def setParser(self):
     
     self.parser = Parser(self.grammar)
示例#3
0
    CFGProduction(V, ['saw']),  CFGProduction(P, ['in']),
    CFGProduction(P, ['with']), CFGProduction(N, ['park']),
    CFGProduction(N, ['dog']),   CFGProduction(N, ['telescope'])
)
grammar = CFG(S, productions)

# Tokenize a simple sentence 
sentence = 'I saw a man in the park'
sent = Token(TEXT=sentence)
print sent
print type(sent)
WhitespaceTokenizer().tokenize(sent)
print sent
        
# Build a parser 
parser = RecursiveDescentParser(grammar)
print dir(parser)
print parser.__dict__
for p in parser.get_parse_list(sent):
    print p

print '\n\n\n'

from parser import Parser
p = Parser()
p.setSentence('I move slowly south')
print p
print p.sentence
print p._sentence