def parse_template(template, parameters): def subst_param(node): substitute_template_parameter(node, parameters) toolset['substitute_template_parameter'] = subst_param parser = preprocessorParser.make_parser(toolset) result = parser.parse(template) # We reinitialize this so that we won't pollute other templates with our values toolset['substitute_template_parameter'] = substitute_template_parameter return result.value
def parse_templates_from_text(txt): ''' Parses all templates from the mediawiki text. ''' templates = collections.defaultdict(list) def substitute_template(node): page_name = node.value[0].value.strip() parameters = {} if len(node.value) > 1: for parameter in node.value[1].value: if isinstance(parameter.value, py3compat.string_types) or \ len(parameter.value) == 1: # It is a standalone parameter parameters['%s' % parameter.value] = None elif len(parameter.value) == 2 and \ parameter.value[0].tag == 'parameter_name' and \ parameter.value[1].tag == 'parameter_value': parameter_name = parameter.value[0].value parameter_value = parameter.value[1].leaf() parameters['%s' % parameter_name] = '%s' % parameter_value else: raise Exception("Bad AST shape!") templates[page_name].append(parameters) # Once the template has been parsed, remove it # from the text. node.value = '' def noop(node): pass custom_parser = preprocessorParser.make_parser({ 'substitute_template': substitute_template, 'substitute_template_parameter': noop, 'substitute_named_entity': noop, 'substitute_numbered_entity': noop }) parsed_text = custom_parser.parse(txt) return parsed_text.leaf().strip(), dict(templates)
def make_parser(template_dict): global templates templates = template_dict global parsed_templates parsed_templates = {} return preprocessorParser.make_parser(toolset)