Пример #1
0
class KarmaParser(TextParsers):
    anything = reg(r".") > constant(None)

    word_topic = reg(r'[^"\s]+?(?=[+-]{2})')
    string_topic = reg(r'".*?(?<!\\)(\\\\)*?"(?=[+-]{2})')
    topic = (word_topic >
             (lambda t: [t, False])) | (string_topic >
                                        (lambda t: [t[1:-1], True]))

    op_positive = reg(r"(?<![+-])\+\+(?![+-])") > constant(
        KarmaOperation.POSITIVE)
    op_neutral = (reg(r"(?<![+-])\+-(?![+-])")
                  | reg(r"(?<![+-])-\+(?![+-])")) > constant(
                      KarmaOperation.NEUTRAL)
    op_negative = reg(r"(?<![+-])--(?![+-])") > constant(
        KarmaOperation.NEGATIVE)
    operator = op_positive | op_neutral | op_negative

    bracket_reason = reg(r"\(.+?\)") > (lambda s: s[1:-1])
    quote_reason = reg(r'".*?(?<!\\)(\\\\)*?"(?![+-]{2})') > (
        lambda s: s[1:-1])
    reason_words = reg(r"(?i)because") | reg(r"(?i)for")
    text_reason = reason_words >> (reg(r'[^",]+') | quote_reason)
    reason = bracket_reason | quote_reason | text_reason

    karma = (topic & operator & opt(reason)) > make_karma

    parse_all = rep(karma | anything) > filter_out_none
Пример #2
0
class PrologParser(TextParsers, whitespace='[ \t\n]*'):
    module = lit('module') > constant("module")
    literal = reg(r'[a-z_][a-zA-Z_0-9]*') > (lambda x: ["Atom", str(x)])
    id = reg(r'[a-z_][a-zA-Z_0-9]*') > (lambda x: ["ID", str(x)])
    identificator = pred(id, lambda x: x != ['module'], 'ID')
    disunction = lit(';') > constant("DIS")
    conjunction = lit(',') > constant("CON")
    dot = lit('.') > constant("DOT")
    lbr = lit('(')
    rbr = lit(')')
    tstile = lit(':-') > constant("TSTILE")

    # ---------------------------------------------------------------

    mod = module & identificator & dot

    # ---------------------------------------------------------------

    head = head_atom & tstile & expression & dot | head_atom & dot
    expression = M & disunction & expression | M
    M = P & conjunction & M | P
    P = lbr & expression & rbr | atom1
    atom = head_atom & dot
    atom1 = literal & atom2 | literal
    head_atom = identificator & atom2 | identificator
    atom2 = lbr & atom3 & rbr & atom2 | atom1 | lbr & atom3 & rbr
    atom3 = atom1 | lbr & atom3 & rbr
Пример #3
0
class FormatTextParsers(TextParsers, whitespace=None):
    integer = reg(r'[0-9]+') > int
    dense = lit('d') > constant(Mode.dense)
    compressed = lit('s') > constant(Mode.compressed)
    mode = dense | compressed

    # Use eof to ensure each parser goes to end
    format_without_orderings = rep(mode) << eof > (
        lambda modes: Format(tuple(modes), tuple(range(len(modes)))))
    format_with_orderings = rep(mode
                                & integer) << eof > make_format_with_orderings

    format = format_without_orderings | format_with_orderings
Пример #4
0
class Homework(TextParsers):
    number = reg(r"\d+") > int
    plus = lit("+") > constant(add)
    times = lit("*") > constant(mul)
    operator = plus | times

    # No precedence
    base = "(" >> unprecedented << ")" | number
    unprecedented = base & rep(operator & base) > reduce

    # Addition first, then multiplication
    base = "(" >> multiplication << ")" | number
    addition = base & rep(plus & base) > reduce
    multiplication = addition & rep(times & addition) > reduce
Пример #5
0
class JsonParsers(TextParsers, whitespace=r'[ \t\n\r]*'):
    number = reg(r'-?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][-+]?[0-9]+)?') > float

    false = lit('false') > constant(False)
    true = lit('true') > constant(True)
    null = lit('null') > constant(None)

    string = JsonStringParsers.string

    array = '[' >> repsep(value, ',') << ']'

    entry = string << ':' & value
    obj = '{' >> repsep(entry, ',') << '}' > dict

    value = number | false | true | null | string | array | obj
Пример #6
0
class TypicalUrlParsers(TextParsers, whitespace=None):
    encoded = '%' >> reg(r'[0-9A-F]{2}') > (lambda x: chr(int(x, 16)))

    scheme = reg(r'[A-Za-z][-+.A-Za-z0-9]*') > str.lower

    username = reg(r'[A-Za-z][-_+A-Za-z0-9]*([.][-_+A-Za-z0-9]+)*') > str.lower
    password = rep(reg(r'[-_.+A-Za-z0-9]+') | encoded) > ''.join
    userinfo = username << ':' & password > splat(UserInfo)

    domain_name = rep1sep(reg('[A-Za-z0-9]+([-]+[A-Za-z0-9]+)*') > str.lower, '.') << opt('.') > (
        lambda x: DomainName(list(reversed(x))))
    ipv4_address = reg(r'[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}') > IPv4Address
    ipv6_address = reg(r'\[([A-Fa-f0-9]{1,4}(:[A-Fa-f0-9]{1,4}){1,7})\]'
                       r'|\[(([A-Fa-f0-9]{1,4}:){1,7})(:[A-Fa-f0-9]{1,4}){1,6}\]'
                       r'|\[:(:[A-Fa-f0-9]{1,4}){0,7}\]'
                       r'|\[([A-Fa-f0-9]{1,4}:){0,7}:\]') > IPv6Address
    host = ipv4_address | ipv6_address | domain_name

    port = ':' >> reg(r'[0-9]+') > int

    path = rep('/' >> (reg(r'[-._~A-Za-z0-9]*') | encoded))

    query_as_is = reg(r'[*-._A-Za-z0-9]+')
    query_space = lit('+') > constant(' ')
    query_string = rep1(query_as_is | query_space | encoded) > ''.join
    query = '?' >> repsep(query_string << '=' & query_string, '&') > OrderedDict

    fragment = '#' >> reg(r'[-._~/?A-Za-z0-9]*')

    url = scheme << '://' & opt(userinfo << '@') & host & opt(port) & path & opt(query) & opt(fragment) > splat(Url)
Пример #7
0
class LineParser(TextParsers, whitespace=None):
    s = reg(r'[ \t]+')
    valid = reg(r'[A-Za-z][A-Za-z0-9_]*')
    label = valid << ':' > Label
    #arg = reg(r'[vimVIM][A-Za-z0-9_]*')
    function_header = lit(
        'FUNC', 'PRIM') & s >> valid & rep(s >> valid) > splat(FunctionHeader)
    function_call = valid & rep(s >> (MapParser.map_ | ImmParser.imm2 | valid
                                      | '$POP' | '$TOP')) > splat(FunctionCall)
    memory_command = lit('LOAD',
                         'STORE') & opt('NEXT') & opt('BIG') & opt('RED')
    end = lit('END') > constant(End(False, []))
    line = end | label | function_header | function_call
Пример #8
0
class JsonStringParsers(TextParsers, whitespace=None):
    quote = lit(r'\"') > constant('"')
    reverse_solidus = lit(r'\\') > constant('\\')
    solidus = lit(r'\/') > constant('/')
    backspace = lit(r'\b') > constant('\b')
    form_feed = lit(r'\f') > constant('\f')
    line_feed = lit(r'\n') > constant('\n')
    carriage_return = lit(r'\r') > constant('\r')
    tab = lit(r'\t') > constant('\t')
    uni = reg(r'\\u([0-9a-fA-F]{4})') > (lambda x: chr(int(x.group(1), 16)))

    escaped = (quote | reverse_solidus | solidus | backspace | form_feed
               | line_feed | carriage_return | tab | uni)
    unescaped = reg(r'[\u0020-\u0021\u0023-\u005B\u005D-\U0010FFFF]+')

    string = '"' >> rep(escaped | unescaped) << '"' > ''.join
Пример #9
0
class Parser(TextParsers, whitespace=r'[ \t\n\r]*'):
    idreg = reg(r'[a-z_A-Z][a-z_A-Z0-9]*')
    id = pred(
        idreg, (lambda x: x != 'module' and x != 'type'),
        "Identificator name cannot be 'module' or 'type'") > concatenateall
    rthen = lit(':-') > constant('')
    conj = lit(',') > constant('')
    disj = lit(';') > constant('')
    lbracket = lit('(') > constant('')
    rbracket = lit(')') > constant('')
    dot = lit('.') > constant('')

    mod = lit('module')
    type = lit('type')

    atom_in = (((id & atom_close) > (lambda x: "ID(" + x[0] + "), " + x[1]))
               | (id > (lambda x: "ID(" + x + ")"))
               | ((lbracket & atom_in & rbracket) >
                  (lambda x: x[1]))) > concatenateall
    atom_in_gen = lbracket & atom_in & rbracket > (
        lambda x: "Atom(" + x[1] + ")")
    atom_close = (((id & atom_close) > (lambda x: "ID(" + x[0] + "), " + x[1]))
                  | (id > (lambda x: "ID(" + x + ")"))
                  | ((atom_in_gen & atom_close) >
                     (lambda x: x[0] + ", " + x[1]))
                  | atom_in_gen) > concatenateall
    atom = (((id & atom_close) >
             (lambda x: "Atom(ID(" + x[0] + "), " + x[1] + ")"))
            | (id > (lambda x: "ID(" + x + ")"))) > concatenateall
    liter = (((lbracket & Disj & rbracket) >
              (lambda x: x[1])) | atom) > concatenateall
    Conj = (((liter & conj & Conj) >
             (lambda x: "Conj(" + x[0] + ", " + x[2] + ")"))
            | liter) > concatenateall
    Disj = (((Conj & disj & Disj) >
             (lambda x: "Disj(" + x[0] + ", " + x[2] + ")"))
            | Conj) > concatenateall
    relation = ((((atom & rthen & Disj) >
                  (lambda x: "Head(" + x[0] + "), Body(" + x[2] + ")"))
                 | atom) & dot) > (lambda x: "Rel(" + x[0] + ")")
    possible_t = (
        ((lbracket & possible_t & rbracket & rep1('->' >> possible_t)) >
         (lambda x: "Type(" + x[1] + ", " + ", ".join(x[3]) + ")"))
        | ((lbracket & possible_t & rbracket) > (lambda x: x[1]))
        | (atom & rep1('->' >> possible_t) >
           (lambda x: "Type(" + x[0] + ", " + ", ".join(x[1]) + ")"))
        | atom) > (lambda x: "".join(x))
    types = (rep1sep(possible_t, '->') >
             (lambda x: ", ".join(x))) > correctprefix
    type_block = type & id & types & dot > (
        lambda x: "Typedef(" + x[1] + ", " + x[2] + ")")
    mod_block = mod & id & dot > (lambda x: "Module(" + x[1] + ")")
    program = ((opt(mod_block) > (lambda x: "\n".join(x)))\
              & (rep(type_block) > (lambda x: "\n".join(x))) \
              & (rep(relation) > (lambda x: "\n".join(x)))) > (lambda x: "\n".join(x))
Пример #10
0
class ProgramParser(TextParsers):

    # Actual grammar
    split1 = lambda item, separator: item & rep(separator & item)
    split = lambda item, separator: opt(split1(item, separator))

    identifier = reg(r"[a-zA-Z]\w*")

    string = reg(r'".*?(?<!\\)(\\\\)*?"') | reg(r"'.*?(?<!\\)(\\\\)*?'") > (
        lambda s: TokenString(s[1:-1])
    )

    num_int = reg(r"\d+") > int
    num_float = reg(r"(\d*\.\d+|\d+\.\d*)") > float
    num_positive = num_float | num_int
    num_negative = "-" >> num > (lambda x: -x)
    num = num_negative | num_positive
    number = num > TokenNumber

    op_eq = lit("==") > constant(Operator.EQ)
    op_ne = lit("!=") > constant(Operator.NE)
    op_ge = lit(">=") > constant(Operator.GE)
    op_gt = lit(">") > constant(Operator.GT)
    op_le = lit("<=") > constant(Operator.LE)
    op_lt = lit("<") > constant(Operator.LT)
    op_and = lit("&") > constant(Operator.AND)
    op_or = lit("|") > constant(Operator.OR)
    op_add = lit("+") > constant(Operator.ADD)
    op_sub = lit("-") > constant(Operator.SUB)
    op_mul = lit("*") > constant(Operator.MUL)
    op_div = lit("/") > constant(Operator.DIV)
    op_pow = lit("^") > constant(Operator.POW)
    op_not = lit("!") > constant(Operator.NOT)
    op_neg = lit("-") > constant(Operator.NEG)

    equality_op = op_eq | op_ne
    comparison_op = op_ge | op_gt | op_le | op_lt
    logic_op = op_and | op_or
    term_op = op_add | op_sub
    factor_op = op_mul | op_div
    power_op = op_pow
    unary_op = op_neg | op_not

    case_pair = expr << "->" & expr

    assignment = identifier << "=" & expr
    let_stmt = "^" >> rep1sep(assignment, ";") << "$" & expr > let

    anon_func = (lit("\\") | lit("\\\\")) >> rep1(identifier) & "->" >> expr > anon

    variable = identifier > TokenVariable

    expr = rep1sep(equality, reg(r"\s*")) > maybe_application
    equality = split1(comparison, equality_op) > bin_operator
    comparison = split1(logic, comparison_op) > bin_operator
    logic = split1(term, logic_op) > bin_operator
    term = split1(factor, term_op) > bin_operator
    factor = split1(power, factor_op) > bin_operator
    power = split1(ternary, power_op) > bin_operator_right
    ternary = case & opt("?" >> expr << ":" & expr) > maybe_ternary
    case = dice & opt(lit("$") >> "(" >> rep1sep(case_pair, ";") << ")") > maybe_case
    dice = unary & opt("d" >> unary) > maybe_dice
    unary = unary_op & unary | primary > mon_operator
    primary = number | string | bracketed | let_stmt | anon_func | variable
    bracketed = "(" >> expr << ")"

    func = identifier & "=" >> expr > function

    program = repsep("@" >> func | expr, ";") << opt(";") > Program

    main = program
Пример #11
0
class DiceParser(TextParsers):

    split1 = lambda item, separator: item & rep(separator & item)
    split = lambda item, separator: opt(split1(item, separator))

    comment = "/*" >> expr << "*/" > constant(None)

    string = reg(r'".*?(?<!\\)(\\\\)*?"') > (lambda s: ValueString(s[1:-1]))

    num_int = reg(r"\d+") > int
    num_float = reg(r"(\d*\.\d+|\d+\.\d*)") > float
    num_positive = num_float | num_int
    num_negative = "-" >> num > (lambda x: -x)
    num = num_negative | num_positive
    number = num > ValueNumber

    # set = "(" >> repsep(expr, ",") << ")" > ValueSet

    op_eq = lit("==") > constant(Operator.EQ)
    op_ne = lit("!=") > constant(Operator.NE)
    op_ge = lit(">=") > constant(Operator.GE)
    op_gt = lit(">") > constant(Operator.GT)
    op_le = lit("<=") > constant(Operator.LE)
    op_lt = lit("<") > constant(Operator.LT)
    op_and = lit("&") > constant(Operator.AND)
    op_or = lit("|") > constant(Operator.OR)
    op_add = lit("+") > constant(Operator.ADD)
    op_sub = lit("-") > constant(Operator.SUB)
    op_mul = lit("*") > constant(Operator.MUL)
    op_div = lit("/") > constant(Operator.DIV)
    op_pow = lit("^") > constant(Operator.POW)
    op_not = lit("!") > constant(Operator.NOT)
    op_neg = lit("-") > constant(Operator.NEG)

    equality_op = op_eq | op_ne
    comparison_op = op_ge | op_gt | op_le | op_lt
    logic_op = op_and | op_or
    term_op = op_add | op_sub
    factor_op = op_mul | op_div
    power_op = op_pow
    unary_op = op_neg | op_not

    case_pair = expr << "," & expr

    program = repsep(expr, ";") > Program
    expr = equality
    equality = split1(comparison, equality_op) > bin_operator
    comparison = split1(logic, comparison_op) > bin_operator
    logic = split1(term, logic_op) > bin_operator
    term = split1(factor, term_op) > bin_operator
    factor = split1(power, factor_op) > bin_operator
    power = split1(dice, power_op) > bin_operator_right
    dice = opt(opt(ternary) << "d") & ternary > maybe_dice
    ternary = case & opt("?" >> expr << ":" & expr) > maybe_ternary
    case = unary & opt(
        lit(":") >> "(" >> repsep(case_pair, ";") << ")") > maybe_case
    unary = unary_op & unary | primary > mon_operator
    primary = number | string | bracketed
    bracketed = "(" >> expr << ")"

    parse_all = program