Exemplo n.º 1
0
class GrammarObject(object):
    def __init__(self, rules):
        '''
        @param rules: a list of grammar rules in tokenized form i.e. grammar rules as token streams.
        '''
        self.rules = rules
        self.langlet = GrammarLanglet()
        self.nfagenerator = None
        self.langlet.langlet_id = ls_grammar.langlet_id
        self.grammar      = ""

    def set_langlet_id(self, ll_id):
        self.langlet.langlet_id = ll_id

    def create_grammar(self, report = False, expansion = True):
        symobject = SymbolObject(self.langlet.langlet_id)
        R = [' '.join([g[1] for g in R1]).strip() for R1 in self.rules]
        symobject.create(R)
        self.langlet.parse_symbol = self.langlet.symbol = symobject
        self.nfagenerator = NFAGenerator(self.langlet, "Parser")
        self.grammar = "\n".join(R)+"\n"
        # print grammar
        self.nfagenerator.create_all(self.grammar)
        if self.nfagenerator.nfas:
            self.nfagenerator.derive_properties()
            if expansion:
                self.nfagenerator.expand_nfas(report = report)
        self.langlet.parse_nfa = self.nfagenerator.nfadata
        self.langlet.keywords  = self.langlet.parse_nfa.keywords

    def get_nfas(self):
        return self.nfagenerator.nfas

    def get_start_symbol(self):
        return self.nfagenerator.nfadata.start_symbols[0]


    @classmethod
    def grammar_from_rule(cls, grammar_rule, report = False):
        rules = []
        R = ls_grammar.tokenize(grammar_rule)
        rules.append(R)
        names = set()
        strings = set()
        for t in R[2:]:
            if t[0] == ls_grammar.parse_token.NAME:
                names.add(t[1])
            elif t[0] == ls_grammar.parse_token.STRING:
                strings.add(t[1])
        for i,name in enumerate(names):
            n = 1
            k = str(i)*n
            while (name+k in names or "'"+name+k+"'" in strings):
                n+=1
                k = str(i)*n

            rules.append(ls_grammar.tokenize("%s: '%s'"%(name, name+k)))
        go = GrammarObject(rules)
        go.create_grammar(report = report)
        return go
Exemplo n.º 2
0
class GrammarObject(object):
    def __init__(self, rules):
        '''
        @param rules: a list of grammar rules in tokenized form i.e. grammar rules as token streams.
        '''
        self.rules = rules
        self.langlet = GrammarLanglet()
        self.nfagenerator = None
        self.langlet.langlet_id = ls_grammar.langlet_id
        self.grammar = ""

    def set_langlet_id(self, ll_id):
        self.langlet.langlet_id = ll_id

    def create_grammar(self, report=False, expansion=True):
        symobject = SymbolObject(self.langlet.langlet_id)
        R = [' '.join([g[1] for g in R1]).strip() for R1 in self.rules]
        symobject.create(R)
        self.langlet.parse_symbol = self.langlet.symbol = symobject
        self.nfagenerator = NFAGenerator(self.langlet, "Parser")
        self.grammar = "\n".join(R) + "\n"
        # print grammar
        self.nfagenerator.create_all(self.grammar)
        if self.nfagenerator.nfas:
            self.nfagenerator.derive_properties()
            if expansion:
                self.nfagenerator.expand_nfas(report=report)
        self.langlet.parse_nfa = self.nfagenerator.nfadata
        self.langlet.keywords = self.langlet.parse_nfa.keywords

    def get_nfas(self):
        return self.nfagenerator.nfas

    def get_start_symbol(self):
        return self.nfagenerator.nfadata.start_symbols[0]

    @classmethod
    def grammar_from_rule(cls, grammar_rule, report=False):
        rules = []
        R = ls_grammar.tokenize(grammar_rule)
        rules.append(R)
        names = set()
        strings = set()
        for t in R[2:]:
            if t[0] == ls_grammar.parse_token.NAME:
                names.add(t[1])
            elif t[0] == ls_grammar.parse_token.STRING:
                strings.add(t[1])
        for i, name in enumerate(names):
            n = 1
            k = str(i) * n
            while (name + k in names or "'" + name + k + "'" in strings):
                n += 1
                k = str(i) * n

            rules.append(ls_grammar.tokenize("%s: '%s'" % (name, name + k)))
        go = GrammarObject(rules)
        go.create_grammar(report=report)
        return go
Exemplo n.º 3
0
def create_bnf_langlet(bnf_grammar_file, lexer_file):
    '''
    Construct an ad-hoc langlet from a BNF grammar file.
    '''
    # parser-rules
    cst = bnfreader.parse_file(bnf_grammar_file)
    parser_rules = []
    # do some normalization of rules of the grammar file
    for rule in find_all(cst, bnfreader.symbol.rule):
        ls_rule = " ".join(bnfreader.unparse(rule)[:-1].split())+"\n"
        parser_rules.append(ls_rule)
    bnf_grammar  = "".join(parser_rules)
    langlet_id   = 1000*100
    parse_symbol = SymbolObject(langlet_id)
    parse_symbol.create(parser_rules)

    # lexer-rules
    with open(lexer_file) as f_lex:
        lexer_rules = ls_grammar.unparse(ls_grammar.parse(f_lex.read())).split("\n")
    lex_symbol  = SymbolObject(langlet_id, 100)
    lex_symbol.create(lexer_rules)
    # create NFAs but don't compute properties. This won't work because
    # left recursion prevents first-sets ( reachables ) to be derived.
    langlet = LangletObject(langlet_id, parse_symbol, lex_symbol)
    nfagen  = NFAGenerator(langlet)
    nfas    = nfagen.create_all(bnf_grammar)
    langlet.nfas = nfas
    langlet.keywords = nfagen.keywords
    return langlet
Exemplo n.º 4
0
class GrammarObject(object):
    def __init__(self, rules):
        self.rules = rules
        self.langlet = GrammarLanglet()
        self.nfagenerator = None

    def create_grammar(self):
        symobject = SymbolObject(ls_grammar.langlet_id)
        R = [' '.join([g[1] for g in R1]).strip() for R1 in self.rules]
        symobject.create(R)
        self.langlet.parse_symbol = symobject
        self.nfagenerator = NFAGenerator(self.langlet, "Parser")
        self.nfagenerator.create_all("\n".join(R)+"\n")
        if self.nfagenerator.nfas:
            self.nfagenerator.derive_properties()
            self.nfagenerator.expand_nfas()
Exemplo n.º 5
0
def create_bnf_langlet(bnf_grammar_file, lexer_file):
    '''
    Construct an ad-hoc langlet from a BNF grammar file.
    '''
    # parser-rules
    cst = bnfreader.parse_file(bnf_grammar_file)
    parser_rules = []
    # do some normalization of rules of the grammar file
    for rule in find_all(cst, bnfreader.symbol.rule):
        ls_rule = " ".join(bnfreader.unparse(rule)[:-1].split()) + "\n"
        parser_rules.append(ls_rule)
    bnf_grammar = "".join(parser_rules)
    langlet_id = 1000 * 100
    parse_symbol = SymbolObject(langlet_id)
    parse_symbol.create(parser_rules)

    # lexer-rules
    with open(lexer_file) as f_lex:
        lexer_rules = ls_grammar.unparse(ls_grammar.parse(
            f_lex.read())).split("\n")
    lex_symbol = SymbolObject(langlet_id, 100)
    lex_symbol.create(lexer_rules)
    # create NFAs but don't compute properties. This won't work because
    # left recursion prevents first-sets ( reachables ) to be derived.
    langlet = LangletObject(langlet_id, parse_symbol, lex_symbol)
    nfagen = NFAGenerator(langlet)
    nfas = nfagen.create_all(bnf_grammar)
    langlet.nfas = nfas
    langlet.keywords = nfagen.keywords
    return langlet