def test_char_from(self): ab = char_from("ab") self.assertEqual(ab.parse("a"), "a") self.assertEqual(ab.parse("b"), "b") with self.assertRaises(ParseError) as err: ab.parse('x') ex = err.exception self.assertEqual(str(ex), """expected '[ab]' at 0:0""")
def calc_num(): next_val = yield lparen | number if next_val == '(': # do not return since (5 + 3) * 4 -> after getting 5 + 3, still possible to have a * 4 after lnum = yield calc_num else: lnum = next_val space_or_end = yield space | char_from( ')\n') # number is definitely there, but operator may not be there if space_or_end in ')\n': return [lnum] else: op = yield operator yield space rnum = yield calc_num return [lnum, op] + rnum
@dataclasses.dataclass(frozen=True) class Varying(Attribute): default_value: Union[TypeConstructor, Float] = None def __str__(self): definition = super().__str__() if self.default_value: definition = f'{definition} = {self.default_value}' return definition C_COMMENT_PATTERN = re.compile(r'\/\/.*$|\/\*.*?\*\/', re.MULTILINE) EQ = parsy.char_from('=').desc('=') COMMA = parsy.char_from(',').desc(',') COLON = parsy.char_from(':').desc(':') SEMICOLON = parsy.char_from(';').desc(';') L_BRACE = parsy.char_from('{').desc('{') R_BRACE = parsy.char_from('}').desc('}') L_PARENTHESES = parsy.char_from('(').desc('(') R_PARENTHESES = parsy.char_from(')').desc(')') TYPE = parsy.string_from(*TYPES) WHITESPACE = parsy.whitespace.desc('whitespace') OPTIONAL_WHITESPACE = WHITESPACE.optional() SEMANTIC = parsy.string_from(*SEMANTICS) IDENTIFIER_CHARS = parsy.letter | parsy.decimal_digit | parsy.string("_") IDENTIFIER = (parsy.letter + IDENTIFIER_CHARS.many().concat()).desc('identifier')
SFIXED64 = "sfixed64" BOOL = "bool" STRING = "string" # Some extra constants to avoid typing SEMI = lexeme(";") EQ = lexeme("=") LPAREN = lexeme("(") RPAREN = lexeme(")") LBRACE = lexeme("{") RBRACE = lexeme("}") # -- Beginning of following spec -- # Letters and digits letter = char_from(ascii_letters) decimalDigit = char_from(digits) octalDigit = char_from(octdigits) hexDigit = char_from(hexdigits) # Identifiers # Compared to spec, we add some '_' prefixed items which are not wrapped in `lexeme`, # on the assumption that spaces in the middle of identifiers are not accepted. _ident = (letter + (letter | decimalDigit | string("_")).many().concat()).desc('ident') ident = lexeme(_ident) fullIdent = lexeme(ident + (string(".") + ident).many().concat()).desc('fullIdent') _messageName = _ident messageName = lexeme(ident).desc('messageName')
whitespace = P.regex(r'\s*', P.re.MULTILINE) lexeme = lambda p: p << whitespace reserved = lambda str: lexeme(P.string(str)) parens = lambda p: reserved("(") >> p << reserved(")") number = lexeme(P.regex(r'(0|[1-9][0-9]*)')).map(int).map(ast.Constant) identifier = lexeme(P.regex(r"[_a-zA-Z][_'a-zA-Z0-9]*")) variable = identifier.map(ast.Variable) operator = lexeme(P.char_from("+-*/")).map( lambda x: {"+": ast.Add, "-": ast.Sub, "*": ast.Mul, "/": ast.Div}[x]) value = number | variable @P.generate def single_expression(): return (yield number | variable | parens(expression)) @P.generate def expression(): fst = yield single_expression rest = yield P.seq(operator, single_expression).map(tuple).many()
def factor(): b = yield base rep = yield char_from("*?+").optional() return Factor(b, rep)
if end == "]": # not a real range, the - is the last char in the class builder.add(start) builder.add(poss_range) break if end < start: raise RuntimeError(f"Invalid range: {start}-{end}") builder.add(start, end) start = yield any_char return Lit(builder.min, builder.max) base = alt( char_from(".").result(Lit(CHAR_MIN, CHAR_MAX)), char_from("^$"), # TODO string("\\") >> any_char.map(lambda c: Lit(c, c)), char_class, string("(") >> regex << string(")"), test_char(lambda c: c not in "?+*[()|", "").map(lambda c: Lit(c, c)), ) @attr.s(eq=False) class Node: eps_transitions = attr.ib(factory=list) matcher = attr.ib(default=None) next = attr.ib(default=None) def min_matching(self):
def combine_interpolated(locations, result): return Interpolated( nodes=result, **locations, ) return (locate(parser).combine(combine_interpolated)) tag_name_start_char = P.regex(r'[:a-z]') tag_name_char = tag_name_start_char | P.regex(r'[0-9-_.]') tag_name = tag_name_start_char + tag_name_char.many().concat() dtd = P.regex(r'<![^>]*>') string_attribute_char = P.char_from('-_./+,?=:;#') | P.regex(r'[0-9a-zA-Z]') def make_quoted_string_attribute_parser(quote, jinja): """ quote: A single or a double quote """ def combine(locations, value): return String( value=value, quote=quote, **locations, ) value_char = P.regex(r'[^<]', flags=re.DOTALL) value = interpolated(
from parsy import string, char_from, decimal_digit, seq argument = seq( char_from('+-').map(lambda c: 1 if c == '+' else -1), decimal_digit.many().concat().map(int) ).combine(lambda m, a: m * a) acc = string('acc ') >> argument.map(lambda a: (a, 1, a)) nop = string('nop ') >> argument.map(lambda a: (0, 1, a)) jmp = string('jmp ') >> argument.map(lambda a: (0, a, a)) eol = string('\n') instruction = (acc | nop | jmp) << eol def get_terminates_n_acc_value(instr_list): visited = [0] * len(instr_list) accumulator = 0 idx = 0 while idx < len(instr_list): visited[idx] += 1 if visited[idx] > 1: return False, accumulator acc_mut, idx_mut, _ = instr_list[idx] accumulator += acc_mut idx += idx_mut return True, accumulator def get_answer_pt2(instr_list):
def interpolated(parser): def combine_interpolated(locations, result): return Interpolated(nodes=result, **locations) return locate(parser).combine(combine_interpolated) tag_name_start_char = P.regex(r"[:a-zA-Z]") tag_name_char = tag_name_start_char | P.regex(r"[0-9-_.]") tag_name = tag_name_start_char + tag_name_char.many().concat() dtd = P.regex(r"<![^>]*>") # TODO This is overly restrictive on what can be inside attributes, it’s unclear why. string_attribute_char = P.char_from("-_./+,?=:;#") | P.regex(r"[0-9a-zA-Z]") def make_quoted_string_attribute_parser(quote, jinja): """ quote: A single or a double quote """ def combine(locations, value): return String(value=value, quote=quote, **locations) value_char = P.regex(r"[^<]", flags=re.DOTALL) value = interpolated( P.string(quote) .should_fail("no " + quote) .then(jinja | value_char)
from parsy import string, decimal_digit, seq, char_from from math import prod TILE_SIZE = 10 number = decimal_digit.at_least(1).concat().map(int) tile_header = string('Tile ') >> number << string(':\n') tile_image_line = char_from('.#').map(lambda v: 1 if v == '#' else 0).times( TILE_SIZE).map(tuple) tile_image = (tile_image_line << string('\n')).times(TILE_SIZE).map(tuple) tile_parse = seq(tile_header, tile_image << string('\n')).map(tuple) tiles_parse = tile_parse.many().map(dict) def get_nth_col(tile, n): return tuple([tile[i][n] for i in range(TILE_SIZE)]) def get_left_edge(tile): return get_nth_col(tile, 0) def get_right_edge(tile): return get_nth_col(tile, TILE_SIZE - 1) def get_edges(tile): top = tile[0] bot = tile[TILE_SIZE - 1]
mask_and |= 1 << i else: mask_or |= int(v) << i return mask_and, mask_or # Parsing Combinators optional_whitespace = regex(r"\s*") word = regex(r"\w+") number = regex(r"[-+]?\d+").map(int) mask_inst = seq( string("mask"), whitespace >> match_item("=") >> whitespace >> char_from("X01").at_least(1).map(build_mask), ) mem_inst = seq( string("mem"), match_item("[") >> number << match_item("]"), whitespace >> match_item("=") >> whitespace >> number, ) instruction = mask_inst | mem_inst program = instruction.sep_by(string("\n")) << string("\n").optional() with puzzle_input(14, example2, False) as f: data, remain = program.parse_partial(f.read()) # pprint(data) print("\n\n\nremaining:", repr(remain)) # pprint(data)
return [rule_parts] number = decimal_digit.at_least(1).concat().map(int) rule_part = string(' ') >> (number | string('|')) rule_tail = rule_part.at_least(1).map(tail_from_parts) rule_exact = string(' "') >> any_char << string('"') parse_rule_parse = seq( number << string(':'), rule_exact | rule_tail ).map(tuple) parse_rules_parse = (parse_rule_parse << string('\n')).many().map(dict) message_parse = char_from('ab').many().concat() messages_parse = (message_parse << string('\n')).many() input_parse = seq(parse_rules_parse << string('\n'), messages_parse).map(tuple) def to_parser(rule_defs): @cache() def internal_to_parser(idx): rule_def = rule_defs[idx] if isinstance(rule_def, str): return string(rule_def) else: return alt(*[ seq(*list(map(internal_to_parser, to_seq))).map(lambda l: ''.join(l)) for to_seq in rule_def
from parsy import string, char_from, decimal_digit, seq MASK_CMD = 'MASK' MEM_CMD = 'MEM' mask_line = string('mask = ') >> char_from('X01').many( ).map(lambda mask_chars: ( MASK_CMD, int(''.join(['1' if c == '1' else '0' for c in mask_chars]), 2), # or mask int(''.join(['0' if c == '0' else '1' for c in mask_chars]), 2), # and mask )) mem_line = seq( string('mem[') >> decimal_digit.many().concat().map(int), string('] = ') >> decimal_digit.many().concat().map(int)).combine( lambda loc, val: (MEM_CMD, loc, val)) code_line = (mem_line | mask_line) << char_from('\n') code_lines = code_line.many() OR_MASK = 'OR' AND_MASK = 'AND' def execute_pt1(code, memory): if not code: return memory else: cmd, x, y = code[0] if cmd == MASK_CMD: memory[OR_MASK] = x