def run_bibtex2html(contents): res = AugmentedResult() erase = True with tmpdir(prefix='bibtex', erase=erase, keep_on_exception=True) as d: fn = os.path.join(d, 'input.bib') fno = os.path.join(d, 'out') fno1 = fno + '.html' # fno2 = fno + '_bib.html' with open(fn, 'w') as f: f.write(contents) cmd = ['bibtex2html', '-unicode', '--dl', '-o', fno, fn] system_cmd_result( '.', cmd, display_stdout=False, display_stderr=False, raise_on_error=True, display_prefix=None, # leave it there env=None) bibtex2html_output = open(fno1).read() fixed = bibtex2html_output.replace('<p>\n</dd>', '</dd><!--fix-->') with open(os.path.join(d, 'fixed.html'), 'w') as f: f.write(fixed) out = process_bibtex2html_output(fixed, d) write_data_to_file(out, os.path.join(d, 'processed.html')) res.set_result(out) return res
def add_likebtn(joined_aug, likebtn): res = AugmentedResult() res.merge(joined_aug) soup = bs_entire_document(joined_aug.get_result()) add_likebtn_(soup, likebtn) res.set_result(to_html_entire_document(soup)) return res
def mark_errors_and_rest(joined_aug): soup = bs_entire_document(joined_aug.get_result()) mark_in_html(joined_aug, soup) res = AugmentedResult() res.merge(joined_aug) res.set_result(to_html_entire_document(soup)) return res
def add_related(joined_aug): res = AugmentedResult() res.merge(joined_aug) soup = bs_entire_document(joined_aug.get_result()) add_related_(soup, res) res.set_result(to_html_entire_document(soup)) return res
def prerender(joined_aug, symbols): joined = joined_aug.get_result() soup = bs_entire_document(joined) for details in soup.select('details'): details.name = 'div' add_class(details, 'transmuted-details') # details.attrs['open'] = 1 joined = to_html_entire_document(soup) res = AugmentedResult() result = prerender_mathjax(joined, symbols=symbols, res=res) res.set_result(result) return res
def make_last_modified(files_contents, nmax=100): res = AugmentedResult() files_contents = [DocToJoin(*x) for x in files_contents] files_contents = [_ for _ in files_contents if _.source_info] files_contents = list( sorted(files_contents, key=lambda x: x.source_info.last_modified, reverse=True)) r = Tag(name='fragment') r.append('\n') h = Tag(name='h1') h.append('Last modified') h.attrs['id'] = 'sec:last-modified' r.append(h) r.append('\n') ul = Tag(name='ul') ul.append('\n') for d in files_contents[:nmax]: li = Tag(name='li') when = d.source_info.last_modified when_s = time.strftime("%a, %b %d", when) # %H:%M li.append(when_s) li.append(': ') hid = get_main_header(bs(d.contents)) if hid is None: what = "File %s" % d.docname else: what = Tag(name='a') what.attrs['href'] = '#' + hid what.attrs['class'] = MCDPManualConstants.CLASS_NUMBER_NAME li.append(what) li.append(' (') name = d.source_info.author.name li.append(name) li.append(')') ul.append(li) ul.append('\n') r.append(ul) s = to_html_stripping_fragment(r) # print s res.set_result(s) return res
def add_style(data_aug, stylesheet): soup = bs_entire_document(data_aug.get_result()) head = soup.find('head') assert head is not None link = Tag(name='link') link['rel'] = 'stylesheet' link['type'] = 'text/css' from mcdp_report.html import get_css_filename link['href'] = get_css_filename('compiled/%s' % stylesheet) head.append(link) html = to_html_entire_document(soup) res = AugmentedResult() res.merge(data_aug) res.set_result(html) return res
def make_composite(compose_config, joined_aug): data = joined_aug.get_result() soup = bs_entire_document(data) recipe = compose_config.recipe remove_status = compose_config.remove_status show_removed = compose_config.show_removed permalink_prefix = compose_config.purl_prefix aug = compose_go2(soup, recipe, permalink_prefix, remove_status, show_removed) soup = aug.get_result() results = str(soup) res = AugmentedResult() res.merge(joined_aug) res.merge(aug) res.set_result(results) return res
def manual_join(template, files_contents, stylesheet, remove=None, extra_css=None, remove_selectors=None, hook_before_toc=None, references=None, resolve_references=True, hook_before_final_pass=None, require_toc_placeholder=False, permalink_prefix=None, crossrefs_aug=None, aug0=None): """ files_contents: a list of tuples that can be cast to DocToJoin: where the string is a unique one to be used for job naming. extra_css: if not None, a string of more CSS to be added Remove_selectors: list of selectors to remove (e.g. ".draft"). hook_before_toc if not None is called with hook_before_toc(soup=soup) just before generating the toc """ result = AugmentedResult() if references is None: references = {} check_isinstance(files_contents, list) if crossrefs_aug is None: crossrefs = Tag(name='no-cross-refs') else: crossrefs = bs(crossrefs_aug.get_result()) result.merge(crossrefs_aug) if aug0 is not None: result.merge(aug0) @contextmanager def timeit(_): yield with timeit('manual_join'): files_contents = [DocToJoin(*_) for _ in files_contents] # cannot use bs because entire document with timeit('parsing template'): template0 = template template = replace_macros(template) template_soup = BeautifulSoup(template, 'lxml', from_encoding='utf-8') d = template_soup if d.html is None: s = "Invalid template" raise_desc(ValueError, s, template0=template0) with timeit('adding head'): assert d.html is not None assert '<html' in str(d) head = d.find('head') if head is None: msg = 'Could not find <head> in template:' logger.error(msg) logger.error(str(d)) raise Exception(msg) assert head is not None for x in get_manual_css_frag().contents: head.append(x.__copy__()) with timeit('adding stylesheet'): if stylesheet is not None: link = Tag(name='link') link['rel'] = 'stylesheet' link['type'] = 'text/css' from mcdp_report.html import get_css_filename link['href'] = get_css_filename('compiled/%s' % stylesheet) head.append(link) with timeit('making basename2soup'): basename2soup = OrderedDict() for doc_to_join in files_contents: if doc_to_join.docname in basename2soup: msg = 'Repeated docname %r' % doc_to_join.docname raise ValueError(msg) from .latex.latex_preprocess import assert_not_inside if isinstance(doc_to_join.contents, AugmentedResult): result.merge(doc_to_join.contents) contents = doc_to_join.contents.get_result() else: contents = doc_to_join.contents assert_not_inside(contents, '<fragment') assert_not_inside(contents, 'DOCTYPE') frag = bs(contents) basename2soup[doc_to_join.docname] = frag # with timeit('fix_duplicate_ids'): # XXX # fix_duplicated_ids(basename2soup) with timeit('copy contents'): body = d.find('body') add_comments = False for docname, content in basename2soup.items(): if add_comments: body.append(NavigableString('\n\n')) body.append( Comment('Beginning of document dump of %r' % docname)) body.append(NavigableString('\n\n')) try_faster = True if try_faster: for e in list(content.children): body.append(e.extract()) else: copy_contents_into(content, body) if add_comments: body.append(NavigableString('\n\n')) body.append(Comment('End of document dump of %r' % docname)) body.append(NavigableString('\n\n')) with timeit('extract_bibtex_blocks'): extract_bibtex_blocks(d) with timeit('ID_PUT_BIB_HERE'): ID_PUT_BIB_HERE = MCDPManualConstants.ID_PUT_BIB_HERE bibhere = d.find('div', id=ID_PUT_BIB_HERE) if bibhere is None: msg = ('Could not find #%s in document. ' 'Adding one at end of document.') % ID_PUT_BIB_HERE result.note_warning(msg) bibhere = Tag(name='div') bibhere.attrs['id'] = ID_PUT_BIB_HERE d.find('body').append(bibhere) do_bib(d, bibhere) with timeit('hook_before_final_pass'): if hook_before_final_pass is not None: hook_before_final_pass(soup=d) with timeit('document_final_pass_before_toc'): location = LocationUnknown() document_final_pass_before_toc(d, remove, remove_selectors, result, location) with timeit('hook_before_toc'): if hook_before_toc is not None: hook_before_toc(soup=d) with timeit('generate_and_add_toc'): try: generate_and_add_toc(d, raise_error=True, res=result) except NoTocPlaceholder as e: if require_toc_placeholder: msg = 'Could not find toc placeholder: %s' % e # logger.error(msg) if aug0 is not None: result.note_error(msg) else: raise Exception(msg) with timeit('document_final_pass_after_toc'): document_final_pass_after_toc( soup=d, crossrefs=crossrefs, resolve_references=resolve_references, res=result) if extra_css is not None: logger.info('adding extra CSS') add_extra_css(d, extra_css) with timeit('document_only_once'): document_only_once(d) location = LocationUnknown() substitute_github_refs(d, defaults={}, res=result, location=location) with timeit('another A pass'): for a in d.select('a[href]'): href = a.attrs['href'] if href in references: r = references[href] a.attrs['href'] = r.url if not a.children: # empty a.append(r.title) # do not use to_html_stripping_fragment - this is a complete doc # mark_in_html(result, soup=d) add_github_links_if_edit_url(soup=d, permalink_prefix=permalink_prefix) with timeit('converting to string'): res = unicode(d) with timeit('encoding'): res = res.encode('utf8') logger.info('done - %.1f MB' % (len(res) / (1024 * 1024.0))) result.set_result(res) return result
def compose_go2(soup, recipe, permalink_prefix, remove_status, show_removed): res = AugmentedResult() # Create context doc = soup.__copy__() body = Tag(name='body') doc.body.replace_with(body) elements = recipe.make(RecipeContext(soup=soup)) check_isinstance(elements, list) append_all(body, elements) # Now remove stuff for status in remove_status: removed = [] for section in list(body.select('section[status=%s]' % status)): level = section.attrs['level'] if not level in ['sec', 'part']: continue section_id = section.attrs['id'] pure_id = section_id.replace(':section', '') removed.append(section.attrs['id']) if show_removed: # remove everything that is not a header keep = ['h1', 'h2', 'h3', 'h4', 'h5'] for e in list(section.children): if e.name not in keep: e.extract() else: e.append(' [%s]' % status) p = Tag(name='p') p.append( "This section has been removed because it is in status %r. " % status) a = Tag(name='a') a.attrs['href'] = 'http://purl.org/dt/master/%s' % pure_id a.append( "If you are feeling adventurous, you can read it on master." ) p.append(a) section.append(p) p = Tag(name='p') p.append( "To disable this behavior, and completely hide the sections, " ) p.append( "set the parameter show_removed to false in fall2017.version.yaml." ) section.append(p) else: section.extract() # section.replace_with(div) if not removed: logger.info('Found no section with status = %r to remove.' % status) else: logger.info('I removed %d sections with status %r.' % (len(removed), status)) logger.debug('Removed: %s' % ", ".join(removed)) add_github_links_if_edit_url(doc, permalink_prefix=permalink_prefix) generate_and_add_toc(doc) doc = doc.__copy__() # generate_and_add_toc(soup) # substituting_empty_links(soup) raise_errors = False find_links_from_master(master_soup=soup, version_soup=doc, raise_errors=raise_errors, res=res) document_final_pass_after_toc(doc) res.set_result(doc) return res
def get_cross_refs(src_dirs, permalink_prefix, extra_crossrefs, ignore=[]): res = AugmentedResult() files = look_for_files(src_dirs, "crossref.html") id2file = {} soup = Tag(name='div') def add_from_soup(s, f, ignore_alread_present, ignore_if_conflict): for img in list(s.find_all('img')): img.extract() for e in s.select('[base_url]'): e['external_crossref_file'] = f # Remove the ones with the same base_url for e in list(s.select('[base_url]')): if e.attrs['base_url'] == permalink_prefix: e.extract() for e in s.select('[id]'): id_ = e.attrs['id'] if id_ == 'container': continue # XXX: if id_ in id2file: if not ignore_alread_present: msg = 'Found two elements with same ID "%s":' % id_ msg += '\n %s' % id2file[id_] msg += '\n %s' % f res.note_error(msg) else: id2file[id_] = f e2 = e.__copy__() if ignore_if_conflict: e2.attrs['ignore_if_conflict'] = '1' soup.append(e2) soup.append('\n') ignore = [os.path.realpath(_) for _ in ignore] for _f in files: if os.path.realpath(_f) in ignore: msg = 'Ignoring file %r' % _f logger.info(msg) continue logger.info('cross ref file %s' % _f) data = open(_f).read() if permalink_prefix in data: msg = 'skipping own file' logger.debug(msg) continue s = bs(data) add_from_soup(s, _f, ignore_alread_present=False, ignore_if_conflict=False) if extra_crossrefs is not None: logger.info('Reading external refs\n%s' % extra_crossrefs) try: r = requests.get(extra_crossrefs) except Exception as ex: msg = 'Could not read external cross reference links' msg += '\n %s' % extra_crossrefs msg += '\n\n' + indent(str(ex), ' > ') res.note_error(msg) else: logger.debug('%s %s' % (r.status_code, extra_crossrefs)) if r.status_code == 404: msg = 'Could not read external cross refs: %s' % r.status_code msg += '\n url: ' + extra_crossrefs msg += '\n This is normal if you have not pushed this branch yet.' res.note_warning(msg) # logger.error(msg) s = bs(r.text) add_from_soup(s, extra_crossrefs, ignore_alread_present=True, ignore_if_conflict=True) # print soup res.set_result(str(soup)) return res
def render_book( src_dirs, generate_pdf, data, realpath, use_mathjax, raise_errors, filter_soup=None, symbols=None, ignore_ref_errors=False, ): """ Returns an AugmentedResult(str) """ res = AugmentedResult() from mcdp_docs.pipeline import render_complete librarian = get_test_librarian() # XXX: these might need to be changed if not MCDPConstants.softy_mode: for src_dir in src_dirs: librarian.find_libraries(src_dir) load_library_hooks = [librarian.load_library] library_ = MCDPLibrary(load_library_hooks=load_library_hooks) for src_dir in src_dirs: library_.add_search_dir(src_dir) d = tempfile.mkdtemp() library_.use_cache_dir(d) location = LocalFile(realpath) # print('location:\n%s' % location) def filter_soup0(soup, library): if filter_soup is not None: filter_soup(soup=soup, library=library) add_edit_links2(soup, location) add_last_modified_info(soup, location) try: html_contents = render_complete(library=library_, s=data, raise_errors=raise_errors, realpath=realpath, use_mathjax=use_mathjax, symbols=symbols, generate_pdf=generate_pdf, filter_soup=filter_soup0, location=location, res=res, ignore_ref_errors=ignore_ref_errors) except DPSyntaxError as e: msg = 'Could not compile %s' % realpath location0 = LocationInString(e.where, location) res.note_error(msg, locations=location0) fail = "<p>This file could not be compiled</p>" res.set_result(fail) return res # raise_wrapped(DPSyntaxError, e, msg, compact=True) if False: # write minimal doc doc = get_minimal_document(html_contents, add_markdown_css=True, extra_css=extra_css) dirname = main_file + '.parts' if dirname and not os.path.exists(dirname): try: os.makedirs(dirname) except: pass fn = os.path.join(dirname, '%s.html' % out_part_basename) write_data_to_file(doc, fn) res.set_result(html_contents) return res