def main(output, template_path, *xml_files): with open(output, 'wb') as f: xml = make_page(template_path, xml_files) f.write(xml_print(xml))
def _extract_directives(page, names): directive_groups = {name: collections.defaultdict(set) for name in names} t = xml_parse(page) section = t.find('./refmeta/manvolnum').text pagename = t.find('./refmeta/refentrytitle').text formatting = {} storopt = directive_groups['options'] for variablelist in t.iterfind('.//variablelist'): klass = variablelist.attrib.get('class') searchpath = variablelist.attrib.get('xpath', './varlistentry/term/varname') storvar = directive_groups[klass or 'miscellaneous'] # <option>s go in OPTIONS, unless class is specified for xpath, stor in ((searchpath, storvar), ('./varlistentry/term/option', storvar if klass else storopt)): for name in variablelist.iterfind(xpath): text = re.sub(r'([= ]).*', r'\1', name.text).rstrip() if text.startswith('-'): # for options, merge options with and without mandatory arg text = text.partition('=')[0] stor[text].add((pagename, section)) if text not in formatting: # use element as formatted display if name.text[-1] in "= '": name.clear() else: name.tail = '' name.text = text formatting[text] = name extra = variablelist.attrib.get('extra-ref') if extra: stor[extra].add((pagename, section)) if extra not in formatting: elt = tree.Element("varname") elt.text = extra formatting[extra] = elt storfile = directive_groups['filenames'] for xpath, absolute_only in (('.//refsynopsisdiv//filename', False), ('.//refsynopsisdiv//command', False), ('.//filename', True)): for name in t.iterfind(xpath): if absolute_only and not (name.text and name.text.startswith('/')): continue if name.attrib.get('index') == 'false': continue name.tail = '' if name.text: if name.text.endswith('*'): name.text = name.text[:-1] if not name.text.startswith('.'): text = name.text.partition(' ')[0] if text != name.text: name.clear() name.text = text if text.endswith('/'): text = text[:-1] storfile[text].add((pagename, section)) if text not in formatting: # use element as formatted display formatting[text] = name else: text = ' '.join(name.itertext()) storfile[text].add((pagename, section)) formatting[text] = name storfile = directive_groups['constants'] for name in t.iterfind('.//constant'): if name.attrib.get('index') == 'false': continue name.tail = '' if name.text.startswith('('): # a cast, strip it name.text = name.text.partition(' ')[2] storfile[name.text].add((pagename, section)) formatting[name.text] = name storfile = directive_groups['specifiers'] for name in t.iterfind(".//table[@class='specifiers']//entry/literal"): if name.text[0] != '%' or name.getparent().text is not None: continue if name.attrib.get('index') == 'false': continue storfile[name.text].add((pagename, section)) formatting[name.text] = name for name in t.iterfind(".//literal[@class='specifiers']"): storfile[name.text].add((pagename, section)) formatting[name.text] = name # Serialize to allow pickling formatting = {name: xml_print(value) for name, value in formatting.items()} return directive_groups, formatting
""" for name, directives in directive_groups.items(): _make_section(template, name, directives, formatting) _make_colophon(template, directive_groups.values()) return template def make_page(template_path, xml_files): "Extract directives from xml_files and return XML index tree." template = xml_parse(template_path) names = [vl.get('id') for vl in template.iterfind('.//variablelist')] directive_groups = {name: collections.defaultdict(list) for name in names} formatting = {} for page in xml_files: try: _extract_directives(directive_groups, formatting, page) except Exception: raise ValueError("failed to process " + page) return _make_page(template, directive_groups, formatting) if __name__ == '__main__': with open(sys.argv[1], 'wb') as f: template_path = sys.argv[2] xml_files = sys.argv[3:] xml = make_page(template_path, xml_files) f.write(xml_print(xml))
'variable2': ...}, ... } """ for name, directives in directive_groups.items(): _make_section(template, name, directives, formatting) _make_colophon(template, directive_groups.values()) return template def make_page(*xml_files): "Extract directives from xml_files and return XML index tree." template = tree.fromstring(TEMPLATE) names = [vl.get('id') for vl in template.iterfind('.//variablelist')] directive_groups = {name: collections.defaultdict(list) for name in names} formatting = {} for page in xml_files: try: _extract_directives(directive_groups, formatting, page) except Exception: raise ValueError("failed to process " + page) return _make_page(template, directive_groups, formatting) if __name__ == '__main__': with open(sys.argv[1], 'wb') as f: f.write(xml_print(make_page(*sys.argv[2:])))
'class': {'variable': [('manpage', 'manvolume'), ...], 'variable2': ...}, ... } """ for name, directives in directive_groups.items(): _make_section(template, name, directives, formatting) _make_colophon(template, directive_groups.values()) return template def make_page(*xml_files): "Extract directives from xml_files and return XML index tree." template = tree.fromstring(TEMPLATE) names = [vl.get('id') for vl in template.iterfind('.//variablelist')] directive_groups = {name:collections.defaultdict(list) for name in names} formatting = {} for page in xml_files: try: _extract_directives(directive_groups, formatting, page) except Exception: raise ValueError("failed to process " + page) return _make_page(template, directive_groups, formatting) if __name__ == '__main__': with open(sys.argv[1], 'wb') as f: f.write(xml_print(make_page(*sys.argv[2:])))