Пример #1
0
def rescore(grammar, abs_pen=None):
    g = ebnf.parse(grammar)
    print "Loaded grammar from {}".format(grammar)
    nbests = glob.glob('*.nb.scores.txt')
    for nbest in nbests:
        fn = basename(nbest)
        n = ''.join(re.findall('\d+', fn))
        with open(nbest) as nb, open('{}.nb.rescored-abs.txt'.format(n), 'w+') as rescored:
            total = check_output("wc -l {}".format(nbest), shell=True).split()[0]
            for i,line in enumerate(nb):
                line = line.split()
                lsc, asc = line[-2:]
                line = line[:-2]
                score = float(lsc) + 0.001 * float(asc)
                hyp_id = line[0]
                hyp = ' '.join(line[1:])
                print "Parsing sentence {} of {}...".format(i+1, total)
                # find partial parses of hyp and rescore
                matches = []
                for subg in g:
                    match = g[subg].searchString(hyp)
                    matches.append(match.asList())
                matches = list(flatten(matches))
                not_matched = len([word for word in hyp.split() if word not in matches])
                if abs_pen:
                    if not_matched >= 1:
                        score -= 2.5
                else:
                    if not_matched >= 1:
                        score -= 0.5 * not_matched
                rescored.write("{} {}\n".format(hyp_id, score))
Пример #2
0
  '"', character - '"', {character - '"'}, '"';
 meta_identifier = letter, {letter | digit};
integer = digit, {digit}; 
*)
'''

table = {}
#~ table['character'] = Word(printables, exact=1)
#~ table['letter'] = Word(alphas + '_', exact=1)
#~ table['digit'] = Word(nums, exact=1)
table['terminal_string'] = sglQuotedString
table['meta_identifier'] = Word(alphas+"_", alphas+"_"+nums)
table['integer'] = Word(nums)

print('Parsing EBNF grammar with EBNF parser...')
parsers = ebnf.parse(grammar, table)
ebnf_parser = parsers['syntax']

commentcharcount = 0
commentlocs = set()
def tallyCommentChars(s,l,t):
    global commentcharcount,commentlocs
    # only count this comment if we haven't seen it before
    if l not in commentlocs:
        charCount = ( len(t[0]) - len(list(filter(str.isspace, t[0]))) )
        commentcharcount += charCount
        commentlocs.add(l)
    return l,t

#ordinarily, these lines wouldn't be necessary, but we are doing extra stuff with the comment expression
ebnf.ebnfComment.setParseAction( tallyCommentChars )
Пример #3
0
 def __init__(self, bnf):
     self.bnf = ebnf.parse(bnf)
     self.attlist = odict()
Пример #4
0
    def get_parser(debug=True):
        # se non e' inizializzato
        # oppure se e' richiesto un parser
        # con stato debug diverso dall'attuale
        # lo (ri)compilo
        if ClipsEbnf._CACHED_CLIPS_EBNF == None \
            or debug != ClipsEbnf._DEBUG:
            
            from icse.predicates.Predicate import PositivePredicate, TestPredicate,\
                NegativePredicate, NccPredicate
            from icse.predicates.NotEq import NotEq
            from icse.predicates.Eq import Eq
            
            import os
            
            grammar_file = open(os.path.dirname(__file__)+'/clips.ebnf', 'r')
            grammar = grammar_file.read()
            
            table = {}
            table['float'] = pp.Regex(r'\d+(\.\d*)?([eE]\d+)?').setParseAction(lambda s,l,t:float(t[0])) 
            table['integer'] = pp.Word(pp.nums).setParseAction(lambda s,l,t:int(t[0][:]))
            table['string'] = pp.Word(pp.printables)
            table['symbol'] = pp.Word("".join( [ c for c in string.printable if c not in string.whitespace and c not in "\"'()&?|<~;" ] ))
            table['variable_symbol'] = pp.Word('?', "".join( [ c for c in string.printable if c not in string.whitespace and c not in "\"'()&?|<~;" ] ), 2)
            table['variable_undef'] = pp.Literal('?')
            table['quoted_text'] = pp.Combine(("'" + pp.CharsNotIn("'") + "'" ^ \
                                '"' + pp.CharsNotIn('"') + '"'))
            
            import icse.actions as actions
            #table['action_name'] = pp.Combine(pp.oneOf(" ".join(actions.Proxy.get_actions().keys())) + pp.Optional( pp.Literal(" ").suppress()) )
            table['action_name'] = pp.Combine(pp.oneOf(actions.Proxy.get_actions().keys()) + pp.FollowedBy( pp.White() |  pp.Literal(")") ) + pp.Optional( pp.Literal(" ").suppress()) )
            
            import icse.functions as functions
            table['function_name'] = pp.Combine(pp.oneOf(" ".join(functions.Proxy.get_functions().keys())) + pp.Literal(" ").suppress())

            import icse.predicates as predicates
            table['predicate_name'] = pp.Combine(pp.oneOf(" ".join(predicates.Proxy.get_predicates().keys())) + pp.Literal(" ").suppress())
            
            table['MYCLIPS_directive'] =  pp.Regex(r'\;\@(?P<command>\w+)\((?P<params>.+?)\)').setParseAction(lambda s,l,t: ('myclips-directive', (t['command'], t['params'])))
            
            parsers = ebnf.parse(grammar, table, debug)
            
            #parsers['comment'].setParseAction(lambda s,l,t:t[0][1:-1])
            parsers['number'].setParseAction(lambda s,l,t:t[0][0])
            parsers['rule_property'].setParseAction(lambda s,l,t: tuple([t[1], t[2][0]])) 
            parsers['declaration'].setParseAction(lambda s,l,t: ('declare', dict(t[1][:])))
            parsers['comment'].setParseAction(lambda s,l,t: ('description', t[0][1:-1]))
            parsers['rule_name'].setParseAction(lambda s,l,t: ('name', t[0]))
            parsers['conditional_element_group'].setParseAction(lambda s,l,t: ('lhs', t[0][:]))
            parsers['action_group'].setParseAction(lambda s,l,t: ('rhs', t[0][:]))
            parsers['defrule_construct'].setParseAction(lambda s,l,t: ('defrule', dict([x for x in t if isinstance(x, tuple)])))
            parsers['constant'].setParseAction(lambda s,l,t:(Eq, t[0]))
            parsers['variable_symbol'].setParseAction(lambda s,l,t:(Variable, t[0][1:]))
            parsers['variable_undef'].setParseAction(lambda s,l,t:(Variable, None))
            parsers['pattern_CE'].setParseAction(lambda s,l,t:(PositivePredicate, t[1][:]))
            parsers['not_term'].setParseAction(lambda s,l,t:(NotEq, t[1][1]) if t[1][0] == Eq else (Variable.withPredicate(NotEq), t[1][1]) if t[1][0] == Variable else (NotEq, t[1]) )
            parsers['test_CE'].setParseAction(lambda s,l,t:(TestPredicate.withPredicate(t[2]), t[3][:]))
            parsers['assigned_pattern_CE'].setParseAction(lambda s,l,t:(PositivePredicate, t[2][1][:], t[0][1]))
            parsers['and_CE'].setParseAction(lambda s,l,t: [t[1][:]] )
            parsers['not_CE'].setParseAction(lambda s,l,t: (NegativePredicate, t[1][1]) if t[1][0] == PositivePredicate else (NccPredicate, t[1]))
            parsers['predicate_name'].setParseAction(lambda s,l,t: _get_predicate_from_string(t[0]) )
            parsers['function_name'].setParseAction(lambda s,l,t: _get_function_from_string(t[0]) )
            # registra la funzione per la function call
            # per provare a riscrivere la funzione come fosse
            # una costante se tutti i termini della funzione sono costanti
            parsers['function_call'].setParseAction(ClipsEbnf._try_rewrite_staticfunction)
            parsers['term_function_call'].setParseAction(lambda s,l,t: t[0] if len(t) == 1 else t[1] )
            parsers['deffacts_name'].setParseAction(lambda s,l,t: ('name', t[0]))
            parsers['rhs_pattern'].setParseAction(lambda s,l,t: [t[1][:]])
            parsers['rhs_pattern_group'].setParseAction(lambda s,l,t: ('facts', t[0][:]))
            parsers['deffacts_construct'].setParseAction(lambda s,l,t: ('deffacts', dict([x for x in t if isinstance(x, tuple)]).get('facts')))
            
            parsers['action_quoted_text'].setParseAction(lambda s,l,t: "".join(t) )
            parsers['action_call'].setParseAction(lambda s,l,t: (t[1],t[2][:]) )
            parsers['action_name'].setParseAction(lambda s,l,t: _get_action_from_string(t[0]) )
            
            parsers['setstrategy_construct'].setParseAction(lambda s,l,t: ('set-strategy', _get_strategy_from_string(t[1]) ))
            
            #parsers['MYCLIPS_directive'].setParseAction(lambda s,l,t: ('myclips-directive', (t[0], t[1][:]) ))
            
            clipsComment = ( ";" + pp.NotAny('@') + pp.SkipTo("\n") ).setName("clips_comment")

            parsers['CLIPS_program'].setParseAction(lambda s,l,t: t[0][:])
            parsers['CLIPS_program'].ignore(clipsComment)
            
        
            if debug:
                # vistualizzo informazioni su funzioni e predicati caricati
                print "Predicati caricati:"
                print "\t" + "\n\t".join(predicates.Proxy.get_predicates().keys())
                print "Funzioni caricate:"
                print "\t" + "\n\t".join(functions.Proxy.get_functions().keys())
                print "Azioni caricate:"
                print "\t" + "\n\t".join(actions.Proxy.get_actions().keys()) 
                raw_input()
                
        
            ClipsEbnf._CACHED_CLIPS_EBNF = parsers
            ClipsEbnf._DEBUG = debug
            
        return ClipsEbnf._CACHED_CLIPS_EBNF['CLIPS_program']          
Пример #5
0
#!/usr/bin/env python3

from pyparsing import *
import ebnf

ParserElement.enablePackrat()

singleRawString = QuotedString(quoteChar='@"', endQuoteChar='"')
doubleRawString = QuotedString(quoteChar='@@"', endQuoteChar='"@@', multiline=True)
rawString = singleRawString | doubleRawString

table = {
    # Non-greedy trick from http://pyparsing.wikispaces.com/share/view/178079
    "Identifier": ~Literal('END') + Word(alphas, alphanums + "_"),
    "Number": Word(nums, nums + "_e.") ^ ("0b" + Word("01_")) ^ ("0o" + Word("01234567_")) ^ ("0x" + Word("0123456789abcdefABCDEF_")),
    "StringLiteral": Regex(r'"(?:[^"\r\n\\]|(?:\\\((?:[^")]|"[^"]*")*\))|(?:\\.))*"') | rawString,
    "restOfLine": restOfLine,
    "ExecBody": Regex(r".*?;"),
}

parsers = ebnf.parse(open("contrib/grammar/neon.ebnf").read(), table)
Пример #6
0
from pyparsing import *
import ebnf

ParserElement.enablePackrat()

singleRawString = QuotedString(quoteChar='@"', endQuoteChar='"')
doubleRawString = QuotedString(quoteChar='@@"', endQuoteChar='"@@', multiline=True)
rawString = singleRawString | doubleRawString

table = {
    # Non-greedy trick from http://pyparsing.wikispaces.com/share/view/178079
    "Identifier": ~Literal('END') + Word(alphas, alphanums + "_"),
    "Number": Word(nums, nums + "_e.") ^ ("0b" + Word("01_")) ^ ("0o" + Word("01234567_")) ^ ("0x" + Word("0123456789abcdefABCDEF_")) ^ ("0#" + Word(nums) + "#" + Word(alphanums + "_")),
    "StringLiteral": Regex(r'"(?:[^"\r\n\\]|(?:\\\((?:[^")]|"[^"]*")*\))|(?:\\.))*"') | rawString,
    "restOfLine": restOfLine,
    "ExecBody": Regex(r".*?;"),
}

parsers = ebnf.parse(open("contrib/grammar/neon.ebnf").read(), table)