示例#1
0
文件: telaro.py 项目: davelab6/telaro
class Parser:
    """
    Base class for a lexer/parser that has the rules defined as methods
    """
    tokens = ()
    precedence = ()

    def __init__(self, **kw):
        self.debug = kw.get('debug', 0)
        self.sentences = []
        self.markov = Markov()
        self.clause_starter = {}
        self.para_starter = []
        self.words = kw.get('words', None)

        try:
            modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__
        except:
            modname = "parser"+"_"+self.__class__.__name__
        self.debugfile = modname + ".dbg"
        self.tabmodule = modname + "_" + "parsetab"
        #print self.debugfile, self.tabmodule

        # Build the lexer and parser
        lex.lex(module=self, debug=self.debug)
        yacc.yacc(module=self,
                  debug=self.debug,
                  debugfile=self.debugfile,
                  tabmodule=self.tabmodule)

    def run(self):
        s = sys.stdin.read()
        s = s.replace('\n\n', '\x00')
        s = s.replace('\x00\x00', '\x00')
        s = s.replace('\n\n', '')
        s = s.replace('\n', ' ')
        s = s.replace('  ', ' ')
        yacc.parse(s)
        print self.sentences
        self.markov.printout()
        print
        print "clause starters"
        keys = self.clause_starter.keys()
        keys.sort()
        for k in keys:
            v = self.clause_starter[k]
            print "\t", repr(k), v
        print
        print "para starters", self.para_starter
        print
        self.markov.prepare()
        sentence = random_sentence(self.markov, 800,
                                    starters=self.clause_starter,
                                    para_starters=self.para_starter)
        print_sentence(sentence, word_filter=self.words)
示例#2
0
class Parser:
    """
    Base class for a lexer/parser that has the rules defined as methods
    """
    tokens = ()
    precedence = ()

    def __init__(self, **kw):
        self.debug = kw.get('debug', 0)
        self.sentences = []
        self.markov = Markov()
        self.clause_starter = {}
        self.para_starter = []
        self.word_filter = kw.get('word_filter', None)
        self.letter_priority = kw.get('letter_priority', None)

        try:
            modname = os.path.split(os.path.splitext(__file__)[0])[1] + "_" + self.__class__.__name__
        except:
            modname = "parser"+"_"+self.__class__.__name__
        self.debugfile = modname + ".dbg"
        self.tabmodule = modname + "_" + "parsetab"
        #print self.debugfile, self.tabmodule

        # Build the lexer and parser
        lex.lex(module=self, debug=self.debug)
        yacc.yacc(module=self,
                  debug=self.debug,
                  debugfile=self.debugfile,
                  tabmodule=self.tabmodule)

    def run(self, txt=None, para_starter=False):
        if txt is None:
            s = sys.stdin.read()
        else:
            s = txt
        s = s.replace('\n\n', '\x00')
        s = s.replace('\x00\x00', '\x00')
        s = s.replace('\n\n', '')
        s = s.replace('\n', ' ')
        s = s.replace('  ', ' ')
        yacc.parse(s)
        print self.sentences
        self.markov.printout()
        print
        print "clause starters"
        keys = self.clause_starter.keys()
        keys.sort()
        for k in keys:
            v = self.clause_starter[k]
            print "\t", repr(k), v
        print
        print "para starters", self.para_starter
        print
        word_filter = self.word_filter
        if self.letter_priority and word_filter:
            # certain words are given a higher priority (multiplier)
            # than others.
            states = self.markov.states
            for from_word, fp in states.items():
                for to_word in fp.keys():
                    if word_filter.has_key(to_word.lower()):
                        fp[to_word] *= self.letter_priority
            word_filter = None
        self.markov.prepare()
        if para_starter:
            para_starters = None
        else:
            para_starters = self.para_starter
        sentence = random_sentence(self.markov, 800,
                                    starters=self.clause_starter,
                                    para_starters=para_starters)
        return make_sentence(sentence, word_filter=word_filter)