# Basis characters (by exclusion) for variable / field names. The following # list of characters is from the btparse documentation any_name = Regex('[^\s"#%\'(),={}]+') # btparse says, and the test bibs show by experiment, that macro and field names # cannot start with a digit. In fact entry type names cannot start with a digit # either (see tests/bibs). Cite keys can start with a digit not_digname = Regex('[^\d\s"#%\'(),={}][^\s"#%\'(),={}]*') # Comment comments out to end of line comment = (AT + CaselessLiteral('comment') + Regex("[\s{(].*").leaveWhitespace()) # The name types with their digiteyness not_dig_lower = not_digname.copy().setParseAction(lambda t: t[0].lower()) macro_def = not_dig_lower.copy() macro_ref = not_dig_lower.copy().setParseAction(lambda t: Macro(t[0].lower())) field_name = not_dig_lower.copy() # Spaces in names mean they cannot clash with field names entry_type = not_dig_lower('entry_type') cite_key = any_name('cite_key') # Number has to be before macro name string = (number | macro_ref | quoted_string | curly_string) # There can be hash concatenation field_value = string + ZeroOrMore(HASH + string) field_def = Group(field_name + EQUALS + field_value) entry_contents = Dict(ZeroOrMore(field_def + COMMA) + Optional(field_def)) # Entry is surrounded either by parentheses or curlies
# Basis characters (by exclusion) for variable / field names. The following # list of characters is from the btparse documentation any_name = Regex('[^\s"#%\'(),={}]+') # btparse says, and the test bibs show by experiment, that macro and field names # cannot start with a digit. In fact entry type names cannot start with a digit # either (see tests/bibs). Cite keys can start with a digit not_digname = Regex('[^\d\s"#%\'(),={}][^\s"#%\'(),={}]*') # Comment comments out to end of line comment = (AT + CaselessLiteral('comment') + Regex("[\s{(].*").leaveWhitespace()) # The name types with their digiteyness not_dig_lower = not_digname.copy().setParseAction(lambda t: t[0].lower()) macro_def = not_dig_lower.copy() macro_ref = not_dig_lower.copy().setParseAction(lambda t : Macro(t[0].lower())) field_name = not_dig_lower.copy() # Spaces in names mean they cannot clash with field names entry_type = not_dig_lower('entry_type') cite_key = any_name('cite_key') # Number has to be before macro name string = (number | macro_ref | quoted_string | curly_string) # There can be hash concatenation field_value = string + ZeroOrMore(HASH + string) field_def = Group(field_name + EQUALS + field_value) entry_contents = Dict(ZeroOrMore(field_def + COMMA) + Optional(field_def)) # Entry is surrounded either by parentheses or curlies
valid_word = Regex(r'([a-zA-Z0-9*_+.-]|\\[!(){}\[\]^"~*?\\:])+').setName("word") valid_word.setParseAction( lambda t : t[0].replace('\\\\',chr(127)).replace('\\','').replace(chr(127),'\\') ) string = QuotedString('"') required_modifier = Literal("+")("required") prohibit_modifier = Literal("-")("prohibit") integer = Regex(r"\d+").setParseAction(lambda t:int(t[0])) proximity_modifier = Group(TILDE + integer("proximity")) number = Regex(r'\d+(\.\d+)?').setParseAction(lambda t:float(t[0])) fuzzy_modifier = TILDE + Optional(number, default=0.5)("fuzzy") term = Forward() field_name = valid_word.copy().setName("fieldname") incl_range_search = Group(LBRACK + term("lower") + to_ + term("upper") + RBRACK) excl_range_search = Group(LBRACE + term("lower") + to_ + term("upper") + RBRACE) range_search = incl_range_search("incl_range") | excl_range_search("excl_range") boost = (CARAT + number("boost")) string_expr = Group(string + proximity_modifier) | string word_expr = Group(valid_word + fuzzy_modifier) | valid_word term << (Optional(field_name("field") + COLON) + (word_expr | string_expr | range_search | Group(LPAR + expression + RPAR)) + Optional(boost)) term.setParseAction(lambda t:[t] if 'field' in t or 'boost' in t else None) expression << operatorPrecedence(term, [ (required_modifier | prohibit_modifier, 1, opAssoc.RIGHT),