def add_stylesheet(html, stylesheet): head = html.find('head') link = Tag(name='link') link['rel'] = 'stylesheet' link['type'] = 'text/css' link['href'] = get_css_filename('compiled/%s' % stylesheet) head.append('\n') head.append(link) head.append('\n')
def add_style(data_aug, stylesheet): soup = bs_entire_document(data_aug.get_result()) head = soup.find('head') assert head is not None link = Tag(name='link') link['rel'] = 'stylesheet' link['type'] = 'text/css' from mcdp_report.html import get_css_filename link['href'] = get_css_filename('compiled/%s' % stylesheet) head.append(link) html = to_html_entire_document(soup) res = AugmentedResult() res.merge(data_aug) res.set_result(html) return res
def get_minimal_document(body_contents, title=None, add_markdown_css=False, add_manual_css=False, stylesheet=None, extra_css=None): """ Creates the minimal html document with MCDPL css. add_markdown_css: language + markdown add_manual_css: language + markdown + (manual*) extra_css = additional CSS contents """ check_html_fragment(body_contents) soup = bs("") assert soup.name == 'fragment' if title is None: title = '' html = Tag(name='html') head = Tag(name='head') body = Tag(name='body') head.append(Tag(name='meta', attrs={'http-equiv':"Content-Type", 'content': "application/xhtml+xml; charset=utf-8"})) if stylesheet is None: stylesheet = 'v_mcdp_render_default' if add_markdown_css or add_manual_css: link = Tag(name='link') link['rel'] = 'stylesheet' link['type'] = 'text/css' link['href'] = get_css_filename('compiled/%s' % stylesheet) head.append(link) tag_title = Tag(name='title') tag_title.append(NavigableString(title)) head.append(tag_title) parsed = bs(body_contents) assert parsed.name == 'fragment' parsed.name = 'div' body.append(parsed) html.append(head) html.append(body) soup.append(html) if extra_css is not None: add_extra_css(soup, extra_css) s = to_html_stripping_fragment_document(soup) assert not 'DOCTYPE' in s # s = html.prettify() # no: it removes empty text nodes # ns="""<?xml version="1.0" encoding="utf-8" ?>""" ns = """<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">""" res = ns + '\n' + s # if add_manual_css and MCDPConstants.manual_link_css_instead_of_including: # assert 'manual.css' in res, res res = res.replace('<div><!DOCTYPE html>', '<div>') return res
def manual_join(template, files_contents, stylesheet, remove=None, extra_css=None, remove_selectors=None, hook_before_toc=None, references=None, resolve_references=True, hook_before_final_pass=None, require_toc_placeholder=False, permalink_prefix=None, crossrefs_aug=None, aug0=None): """ files_contents: a list of tuples that can be cast to DocToJoin: where the string is a unique one to be used for job naming. extra_css: if not None, a string of more CSS to be added Remove_selectors: list of selectors to remove (e.g. ".draft"). hook_before_toc if not None is called with hook_before_toc(soup=soup) just before generating the toc """ result = AugmentedResult() if references is None: references = {} check_isinstance(files_contents, list) if crossrefs_aug is None: crossrefs = Tag(name='no-cross-refs') else: crossrefs = bs(crossrefs_aug.get_result()) result.merge(crossrefs_aug) if aug0 is not None: result.merge(aug0) @contextmanager def timeit(_): yield with timeit('manual_join'): files_contents = [DocToJoin(*_) for _ in files_contents] # cannot use bs because entire document with timeit('parsing template'): template0 = template template = replace_macros(template) template_soup = BeautifulSoup(template, 'lxml', from_encoding='utf-8') d = template_soup if d.html is None: s = "Invalid template" raise_desc(ValueError, s, template0=template0) with timeit('adding head'): assert d.html is not None assert '<html' in str(d) head = d.find('head') if head is None: msg = 'Could not find <head> in template:' logger.error(msg) logger.error(str(d)) raise Exception(msg) assert head is not None for x in get_manual_css_frag().contents: head.append(x.__copy__()) with timeit('adding stylesheet'): if stylesheet is not None: link = Tag(name='link') link['rel'] = 'stylesheet' link['type'] = 'text/css' from mcdp_report.html import get_css_filename link['href'] = get_css_filename('compiled/%s' % stylesheet) head.append(link) with timeit('making basename2soup'): basename2soup = OrderedDict() for doc_to_join in files_contents: if doc_to_join.docname in basename2soup: msg = 'Repeated docname %r' % doc_to_join.docname raise ValueError(msg) from .latex.latex_preprocess import assert_not_inside if isinstance(doc_to_join.contents, AugmentedResult): result.merge(doc_to_join.contents) contents = doc_to_join.contents.get_result() else: contents = doc_to_join.contents assert_not_inside(contents, '<fragment') assert_not_inside(contents, 'DOCTYPE') frag = bs(contents) basename2soup[doc_to_join.docname] = frag # with timeit('fix_duplicate_ids'): # XXX # fix_duplicated_ids(basename2soup) with timeit('copy contents'): body = d.find('body') add_comments = False for docname, content in basename2soup.items(): if add_comments: body.append(NavigableString('\n\n')) body.append( Comment('Beginning of document dump of %r' % docname)) body.append(NavigableString('\n\n')) try_faster = True if try_faster: for e in list(content.children): body.append(e.extract()) else: copy_contents_into(content, body) if add_comments: body.append(NavigableString('\n\n')) body.append(Comment('End of document dump of %r' % docname)) body.append(NavigableString('\n\n')) with timeit('extract_bibtex_blocks'): extract_bibtex_blocks(d) with timeit('ID_PUT_BIB_HERE'): ID_PUT_BIB_HERE = MCDPManualConstants.ID_PUT_BIB_HERE bibhere = d.find('div', id=ID_PUT_BIB_HERE) if bibhere is None: msg = ('Could not find #%s in document. ' 'Adding one at end of document.') % ID_PUT_BIB_HERE result.note_warning(msg) bibhere = Tag(name='div') bibhere.attrs['id'] = ID_PUT_BIB_HERE d.find('body').append(bibhere) do_bib(d, bibhere) with timeit('hook_before_final_pass'): if hook_before_final_pass is not None: hook_before_final_pass(soup=d) with timeit('document_final_pass_before_toc'): location = LocationUnknown() document_final_pass_before_toc(d, remove, remove_selectors, result, location) with timeit('hook_before_toc'): if hook_before_toc is not None: hook_before_toc(soup=d) with timeit('generate_and_add_toc'): try: generate_and_add_toc(d, raise_error=True, res=result) except NoTocPlaceholder as e: if require_toc_placeholder: msg = 'Could not find toc placeholder: %s' % e # logger.error(msg) if aug0 is not None: result.note_error(msg) else: raise Exception(msg) with timeit('document_final_pass_after_toc'): document_final_pass_after_toc( soup=d, crossrefs=crossrefs, resolve_references=resolve_references, res=result) if extra_css is not None: logger.info('adding extra CSS') add_extra_css(d, extra_css) with timeit('document_only_once'): document_only_once(d) location = LocationUnknown() substitute_github_refs(d, defaults={}, res=result, location=location) with timeit('another A pass'): for a in d.select('a[href]'): href = a.attrs['href'] if href in references: r = references[href] a.attrs['href'] = r.url if not a.children: # empty a.append(r.title) # do not use to_html_stripping_fragment - this is a complete doc # mark_in_html(result, soup=d) add_github_links_if_edit_url(soup=d, permalink_prefix=permalink_prefix) with timeit('converting to string'): res = unicode(d) with timeit('encoding'): res = res.encode('utf8') logger.info('done - %.1f MB' % (len(res) / (1024 * 1024.0))) result.set_result(res) return result
def go(self): logger.setLevel(logging.DEBUG) options = self.get_options() symbols = self.options.symbols if symbols is not None: symbols = open(symbols).read() else: symbols = '' if not options.contracts: disable_all() stylesheet = options.stylesheet # make sure it exists get_css_filename('compiled/%s' % stylesheet) params = options.get_extra() if len(params) < 1: raise ValueError('Please specify name.') config_dirs = options.config_dirs.split(":") maindir = options.maindir out_dir = options.out if options.cache: cache_dir = os.path.join(out_dir, '_cached', 'solve') else: cache_dir = None librarian = Librarian() for e in config_dirs: librarian.find_libraries(e) library = librarian.get_library_by_dir(maindir) if cache_dir is not None: library.use_cache_dir(cache_dir) docs = params if not docs: msg = 'At least one argument required.' raise_desc(UserError, msg) for docname in docs: if '/' in docname: docname0 = os.path.split(docname)[-1] logger.info("Using %r rather than %r" % (docname0, docname)) docname = docname0 suffix = '.' + MCDPConstants.ext_doc_md if docname.endswith(suffix): docname = docname.replace(suffix, '') basename = docname + suffix f = library._get_file_data(basename) data = f['data'] realpath = f['realpath'] generate_pdf = options.pdf_figures if out_dir is None: use_out_dir = os.path.dirname(realpath) else: use_out_dir = os.path.join('out', 'mcdp_render') raise_errors = not options.forgiving use_mathjax = bool(options.mathjax) html_filename = render(library, docname, data, realpath, use_out_dir, generate_pdf, stylesheet=stylesheet, symbols=symbols, raise_errors=raise_errors, use_mathjax=use_mathjax) if options.pdf: run_prince(html_filename)
def manual_join(template, files_contents, bibfile, stylesheet, remove=None, extra_css=None, remove_selectors=None, hook_before_toc=None): """ extra_css: if not None, a string of more CSS to be added Remove_selectors: list of selectors to remove (e.g. ".draft"). hook_before_toc if not None is called with hook_before_toc(soup=soup) just before generating the toc """ logger.debug('remove_selectors: %s' % remove_selectors) logger.debug('remove: %s' % remove) from mcdp_utils_xml import bs template = replace_macros(template) # cannot use bs because entire document template_soup = BeautifulSoup(template, 'lxml', from_encoding='utf-8') d = template_soup assert d.html is not None assert '<html' in str(d) head = d.find('head') assert head is not None for x in get_manual_css_frag().contents: head.append(x.__copy__()) if stylesheet is not None: link = Tag(name='link') link['rel'] = 'stylesheet' link['type'] = 'text/css' from mcdp_report.html import get_css_filename link['href'] = get_css_filename('compiled/%s' % stylesheet) head.append(link) basename2soup = OrderedDict() for (_libname, docname), data in files_contents: frag = bs(data) basename2soup[docname] = frag fix_duplicated_ids(basename2soup) body = d.find('body') add_comments = False for docname, content in basename2soup.items(): logger.debug('docname %r -> %s KB' % (docname, len(data) / 1024)) from mcdp_docs.latex.latex_preprocess import assert_not_inside assert_not_inside(data, 'DOCTYPE') if add_comments: body.append(NavigableString('\n\n')) body.append(Comment('Beginning of document dump of %r' % docname)) body.append(NavigableString('\n\n')) for x in content: x2 = x.__copy__() # not clone, not extract body.append(x2) if add_comments: body.append(NavigableString('\n\n')) body.append(Comment('End of document dump of %r' % docname)) body.append(NavigableString('\n\n')) extract_bibtex_blocks(d) logger.info('external bib') if bibfile is not None: if not os.path.exists(bibfile): logger.error('Cannot find bib file %s' % bibfile) else: bibliography_entries = get_bibliography(bibfile) bibliography_entries['id'] = 'bibliography_entries' body.append(bibliography_entries) bibhere = d.find('div', id='put-bibliography-here') if bibhere is None: logger.warning('Could not find #put-bibliography-here in document.' 'Adding one at end of document') bibhere = Tag(name='div') bibhere.attrs['id'] = 'put-bibliography-here' d.find('body').append(bibhere) do_bib(d, bibhere) if True: logger.info('reorganizing contents in <sections>') body2 = reorganize_contents(d.find('body')) body.replace_with(body2) else: warnings.warn('fix') body2 = body # Removing all_selectors = [] if remove is not None and remove != '': all_selectors.append(remove) if remove_selectors: all_selectors.extend(remove_selectors) logger.debug('all_selectors: %s' % all_selectors) all_removed = '' for selector in all_selectors: nremoved = 0 logger.debug('Removing selector %r' % remove) toremove = list(body2.select(selector)) logger.debug('Removing %d objects' % len(toremove)) for x in toremove: nremoved += 1 nd = len(list(x.descendants)) logger.debug('removing %s with %s descendants' % (x.name, nd)) if nd > 1000: s = str(x)[:300] logger.debug(' it is %s' % s) x.extract() all_removed += '\n\n' + '-' * 50 + ' chunk %d removed\n' % nremoved all_removed += str(x) all_removed += '\n\n' + '-' * 100 + '\n\n' logger.info('Removed %d elements of selector %r' % (nremoved, remove)) # if False: with open('all_removed.html', 'w') as f: f.write(all_removed) if hook_before_toc is not None: hook_before_toc(soup=d) ### logger.info('adding toc') toc = generate_toc(body2) logger.info('TOC:\n' + str(toc)) toc_ul = bs(toc).ul toc_ul.extract() assert toc_ul.name == 'ul' toc_ul['class'] = 'toc' toc_ul['id'] = 'main_toc' toc_selector = 'div#toc' tocs = list(d.select(toc_selector)) if not tocs: msg = 'Cannot find any element of type %r to put TOC inside.' % toc_selector logger.warning(msg) else: toc_place = tocs[0] toc_place.replaceWith(toc_ul) logger.info('checking errors') check_various_errors(d) from mcdp_docs.check_missing_links import check_if_any_href_is_invalid logger.info('checking hrefs') check_if_any_href_is_invalid(d) # Note that this should be done *after* check_if_any_href_is_invalid() # because that one might fix some references logger.info('substituting empty links') substituting_empty_links(d) warn_for_duplicated_ids(d) if extra_css is not None: logger.info('adding extra CSS') add_extra_css(d, extra_css) add_footnote_polyfill(d) logger.info('converting to string') # do not use to_html_stripping_fragment - this is a complete doc res = unicode(d) res = res.encode('utf8') logger.info('done - %d bytes' % len(res)) return res
def manual_join(template, files_contents, stylesheet, remove=None, extra_css=None, remove_selectors=None, hook_before_toc=None, references={}, resolve_references=True): """ files_contents: a list of tuples that can be cast to DocToJoin: where the string is a unique one to be used for job naming. extra_css: if not None, a string of more CSS to be added Remove_selectors: list of selectors to remove (e.g. ".draft"). hook_before_toc if not None is called with hook_before_toc(soup=soup) just before generating the toc """ check_isinstance(files_contents, list) files_contents = [DocToJoin(*_) for _ in files_contents] template0 = template template = replace_macros(template) # cannot use bs because entire document template_soup = BeautifulSoup(template, 'lxml', from_encoding='utf-8') d = template_soup if d.html is None: s = "Invalid template" raise_desc(ValueError, s, template0=template0) assert d.html is not None assert '<html' in str(d) head = d.find('head') assert head is not None for x in get_manual_css_frag().contents: head.append(x.__copy__()) if stylesheet is not None: link = Tag(name='link') link['rel'] = 'stylesheet' link['type'] = 'text/css' from mcdp_report.html import get_css_filename link['href'] = get_css_filename('compiled/%s' % stylesheet) head.append(link) basename2soup = OrderedDict() for doc_to_join in files_contents: if doc_to_join.docname in basename2soup: msg = 'Repeated docname %r' % doc_to_join.docname raise ValueError(msg) from .latex.latex_preprocess import assert_not_inside assert_not_inside(doc_to_join.contents, '<fragment') assert_not_inside(doc_to_join.contents, 'DOCTYPE') frag = bs(doc_to_join.contents) basename2soup[doc_to_join.docname] = frag fix_duplicated_ids(basename2soup) body = d.find('body') add_comments = False for docname, content in basename2soup.items(): # logger.debug('docname %r -> %s KB' % (docname, len(data) / 1024)) if add_comments: body.append(NavigableString('\n\n')) body.append(Comment('Beginning of document dump of %r' % docname)) body.append(NavigableString('\n\n')) copy_contents_into(content, body) f = body.find('fragment') if f: msg = 'I found a <fragment> in the manual after %r' % docname msg += '\n\n' + indent(str(content), '> ') raise Exception(msg) if add_comments: body.append(NavigableString('\n\n')) body.append(Comment('End of document dump of %r' % docname)) body.append(NavigableString('\n\n')) extract_bibtex_blocks(d) logger.info('external bib') ID_PUT_BIB_HERE = MCDPManualConstants.ID_PUT_BIB_HERE bibhere = d.find('div', id=ID_PUT_BIB_HERE) if bibhere is None: logger.warning(('Could not find #%s in document. ' 'Adding one at end of document.') % ID_PUT_BIB_HERE) bibhere = Tag(name='div') bibhere.attrs['id'] = ID_PUT_BIB_HERE d.find('body').append(bibhere) do_bib(d, bibhere) document_final_pass_before_toc(d, remove, remove_selectors) if hook_before_toc is not None: hook_before_toc(soup=d) generate_and_add_toc(d) document_final_pass_after_toc(soup=d, resolve_references=resolve_references) if extra_css is not None: logger.info('adding extra CSS') add_extra_css(d, extra_css) document_only_once(d) for a in d.select('[href]'): href = a.attrs['href'] if href in references: r = references[href] a.attrs['href'] = r.url if not a.children: # empty a.append(r.title) logger.info('converting to string') # do not use to_html_stripping_fragment - this is a complete doc res = unicode(d) res = res.encode('utf8') logger.info('done - %d bytes' % len(res)) return res
def go(): groups = OrderedDict(yaml.load(BOOKS)) import os dist = 'duckuments-dist' html = Tag(name='html') head = Tag(name='head') meta = Tag(name='meta') meta.attrs['content'] = "text/html; charset=utf-8" meta.attrs['http-equiv'] = "Content-Type" stylesheet = 'v_manual_split' link = Tag(name='link') link['rel'] = 'stylesheet' link['type'] = 'text/css' link['href'] = get_css_filename('compiled/%s' % stylesheet) head.append(link) body = Tag(name='body') style = Tag(name='style') style.append(CSS) head.append(style) head.append(meta) html.append(head) html.append(body) divgroups = Tag(name='div') all_crossrefs = Tag(name='div') res = AugmentedResult() for id_group, group in groups.items(): divgroup = Tag(name='div') divgroup.attrs['class'] = 'group' divgroup.attrs['id'] = id_group h0 = Tag(name='h1') h0.append(group['title']) divgroup.append(h0) if 'abstract' in group: p = Tag(name='p') p.append(group['abstract']) divgroup.append(p) books = group['books'] # divbook = Tag(name='div') books = OrderedDict(books) for id_book, book in books.items(): d = os.path.join(dist, id_book) change_frame(d, '../../', current_slug=id_book) d0 = dist errors_and_warnings = os.path.join(d, 'out', 'errors_and_warnings.pickle') if os.path.exists(errors_and_warnings): resi = pickle.loads(open(errors_and_warnings).read()) # print(errors_and_warnings) resi.update_file_path(prefix=os.path.join(id_book, 'out')) res.merge(resi) else: msg = 'Path does not exist: %s' % errors_and_warnings logger.error(msg) artefacts = get_artefacts(d0, d) div = Tag(name='div') div.attrs['class'] = 'book-div' div.attrs['id'] = id_book div_inside = Tag(name='div') div_inside.attrs['class'] = 'div_inside' links = get_links2(artefacts) for a in links.select('a'): s = gettext(a) if 'error' in s or 'warning' in s or 'task' in s: a['class'] = 'EWT' if False: h = Tag(name='h3') h.append(book['title']) # div_inside.append(h) if 'abstract' in book: p = Tag(name='p') p.append(book['abstract']) div_inside.append(p) div_inside.append(links) div.append(div_inside) toc = os.path.join(d, 'out/toc.html') if os.path.exists(toc): data = open(toc).read() x = bs(data) for a in x.select('a[href]'): href = a.attrs['href'] a.attrs['href'] = id_book + '/out/' + href x.name = 'div' # not fragment div.append(x) crossrefs = os.path.join(d, 'crossref.html') if os.path.exists(crossrefs): x = bs(open(crossrefs).read()) for e in x.select('[url]'): all_crossrefs.append('\n\n') all_crossrefs.append(e.__copy__()) else: logger.error('File does not exist %s' % crossrefs) divgroup.append(div) divgroups.append(divgroup) out_pickle = sys.argv[3] nwarnings = len(res.get_notes_by_tag(MCDPManualConstants.NOTE_TAG_WARNING)) ntasks = len(res.get_notes_by_tag(MCDPManualConstants.NOTE_TAG_TASK)) nerrors = len(res.get_notes_by_tag(MCDPManualConstants.NOTE_TAG_ERROR)) logger.info('%d tasks' % ntasks) logger.warning('%d warnings' % nwarnings) logger.error('%d nerrors' % nerrors) from mcdp_docs.mcdp_render_manual import write_errors_and_warnings_files write_errors_and_warnings_files(res, os.path.dirname(out_pickle)) out_junit = os.path.join(os.path.dirname(out_pickle), 'junit', 'notes', 'junit.xml') s = get_junit_xml(res) write_data_to_file(s.encode('utf8'), out_junit) # write_data_to_file(pickle.dumps(res), out_pickle, quiet=False) extra = get_extra_content(res) extra.attrs['id'] = 'extra' body.append(extra) body.append(divgroups) embed_css_files(html) for e in body.select('.notes-panel'): e.extract() out = sys.argv[1] data = str(html) data = data.replace('<body>', '<body>\n<?php header1() ?>\n') write_data_to_file(data, out) manifest = [dict(display='index', filename=os.path.basename(out))] mf = os.path.join(os.path.dirname(out), 'summary.manifest.yaml') write_data_to_file(yaml.dump(manifest), mf) out_crossrefs = sys.argv[2] html = Tag(name='html') head = Tag(name='head') body = Tag(name='body') style = Tag(name='style') style.append(CROSSREF_CSS) head.append(style) html.append(head) script = Tag(name='script') script.append(CROSSREF_SCRIPT) container = Tag(name='div') container.attrs['id'] = 'container' body.append(container) details = Tag(name='details') summary = Tag(name='summary') summary.append('See all references') details.append(summary) details.append(all_crossrefs) body.append(details) body.append(script) html.append(body) write_data_to_file(str(html), out_crossrefs) if nerrors > 0: sys.exit(nerrors)