def generate_and_add_toc(soup, raise_error=False, res=None): if res is None: aug = AugmentedResult() logger.info('adding toc') body = soup.find('body') toc = generate_toc(body, res) # logger.info('TOC:\n' + str(toc)) toc_ul = bs(toc).ul if toc_ul is None: # empty TOC msg = 'Could not find toc.' # logger.warning(msg) res.note_error(msg) # XXX else: toc_ul.extract() assert toc_ul.name == 'ul' toc_ul['class'] = 'toc' # XXX: see XXX13 toc_ul['id'] = MCDPManualConstants.MAIN_TOC_ID toc_selector = MCDPManualConstants.TOC_PLACEHOLDER_SELECTOR tocs = list(body.select(toc_selector)) if not tocs: msg = 'Cannot find any element of type %r to put TOC inside.' % toc_selector if raise_error: raise NoTocPlaceholder(msg) logger.warning(msg) res.note_error(msg) else: toc_place = tocs[0] toc_place.replaceWith(toc_ul)
def generate_and_add_toc(soup, toc_selector='div#toc'): logger.info('adding toc') body = soup.find('body') toc = generate_toc(body) # logger.info('TOC:\n' + str(toc)) toc_ul = bs(toc).ul if toc_ul is None: # empty TOC msg = 'Could not find toc' logger.warning(msg) # XXX else: toc_ul.extract() assert toc_ul.name == 'ul' toc_ul['class'] = 'toc' toc_ul['id'] = 'main_toc' tocs = list(body.select(toc_selector)) if not tocs: msg = 'Cannot find any element of type %r to put TOC inside.' % toc_selector logger.warning(msg) else: toc_place = tocs[0] toc_place.replaceWith(toc_ul)
def raise_if_any_error(results): errors = {} for rid, (_, r) in results.items(): if r.error_type is not None: f = r.error_string.split('\n')[0] n = 150 - len(rid) f = f[:n] errors[rid] = (rid + ' | ' + r.error_type[:4] + ' | ' + f) expected = [ 'local-uav_energetics-pretty-models-batteries', 'local-uav_energetics-pretty-models-battery_squash', 'local-unittests-loading_python-models-load1', 'local-unittests-loading_python-models-load1b', 'local-unittests-loading_python-posets-load2', 'local-unittests-loading_python-primitivedps-load_primitivedp', 'local-unittests-making-models-test1', 'local-examples_devel-icra17-models-uncertain2', ] for e in expected: if e in results: if not e in errors: msg = 'Expected a failure for %r' % e logger.warning(msg) if e in errors: del errors[e] if errors: msg = 'Found %s errors.\n\n' % len(errors) msg += "\n".join(sorted(errors)) raise Exception(msg)
def _load_spec_data(self, spec_name, thing_name): shelf = self.the_context.db_view.repos[self.repo_name].shelves[ self.shelf_name] library = shelf.libraries[self.library_name] things = library.things.child(spec_name) try: match = get_soft_match(thing_name, list(things)) except KeyError: msg = 'Soft match failed: Could not find %r in %s.' % (thing_name, spec_name) available = sorted(things) if available: msg += ("\n Available %s: %s." % (spec_name, format_list(sorted(available)))) else: msg += "\n None available." raise_desc(DPSemanticError, msg) else: if match != thing_name: if MCDPConstants.allow_soft_matching: logger.warning('Soft matching %r to %r (deprecated)' % (match, thing_name)) else: msg = 'Found case in which the user relies on soft matching (%r to refer to %r).' % ( thing_name, match) raise DPSemanticError(msg) # TODO: add warning data = things[match] spec = specs[spec_name] basename = match + '.' + spec.extension realpath = '%s in library %r in shelf %r in repo %r' % ( basename, self.library_name, self.shelf_name, self.repo_name) return dict(data=data, realpath=realpath)
def substituting_empty_links(soup, raise_errors=False): ''' default style is [](#sec:systems) "Chapter 10" the name is [](#sec:systems?only_name) "My title" the number is [](#sec:systems?only_number) "10" and full is [](#sec:systems?toc_link) "Chapter 10 - My title" You can also use "class": <a href='#sec:name' class='only_number'></a> or <a href='#sec:name?only_number'></a> ''' CLASS_ONLY_NUMBER = MCDPManualConstants.CLASS_ONLY_NUMBER CLASS_NUMBER_NAME = MCDPManualConstants.CLASS_NUMBER_NAME CLASS_ONLY_NAME = MCDPManualConstants.CLASS_ONLY_NAME logger.debug('substituting_empty_links') n = 0 nerrors = 0 for le in get_empty_links_to_fragment(soup): a = le.linker element_id = le.eid element = le.linked n += 1 if not element: msg = ('Cannot find %s' % element_id) note_error_msg(a, msg) nerrors += 1 if raise_errors: raise ValueError(msg) continue # if there is a query, remove it if le.query is not None: new_href = '#' + le.eid a.attrs['href'] = new_href logger.info('setting new href= %s' % (new_href)) if (not LABEL_WHAT_NUMBER in element.attrs) or \ (not LABEL_NAME in element.attrs): msg = ( 'substituting_empty_links: Could not find attributes %s or %s in %s' % (LABEL_NAME, LABEL_WHAT_NUMBER, element)) if True: logger.warning(msg) else: note_error_msg(a, msg) nerrors += 1 if raise_errors: raise ValueError(msg) continue label_what_number = element.attrs[LABEL_WHAT_NUMBER] label_number = element.attrs[LABEL_NUMBER] label_what = element.attrs[LABEL_WHAT] label_name = element.attrs[LABEL_NAME] classes = list(a.attrs.get('class', [])) # bug: I was modifying if le.query is not None: classes.append(le.query) if 'toc_link' in classes: s = Tag(name='span') s.string = label_what add_class(s, 'toc_what') a.append(s) a.append(' ') s = Tag(name='span') s.string = label_number add_class(s, 'toc_number') a.append(s) s = Tag(name='span') s.string = ' - ' add_class(s, 'toc_sep') a.append(s) if label_name is not None and '<' in label_name: contents = bs(label_name) # sanitize the label name for br in contents.findAll('br'): br.replaceWith(NavigableString(' ')) for _ in contents.findAll('a'): _.extract() a.append(contents) #logger.debug('From label_name = %r to a = %r' % (label_name, a)) else: s = Tag(name='span') if label_name is None: s.string = '(unnamed)' # XXX else: s.string = label_name add_class(s, 'toc_name') a.append(s) else: if CLASS_ONLY_NUMBER in classes: label = label_number elif CLASS_NUMBER_NAME in classes: if label_name is None: label = label_what_number + \ ' - ' + '(unnamed)' # warning else: label = label_what_number + ' - ' + label_name elif CLASS_ONLY_NAME in classes: if label_name is None: label = '(unnamed)' # warning else: label = label_name else: label = label_what_number span1 = Tag(name='span') add_class(span1, 'reflabel') span1.string = label a.append(span1) logger.debug('substituting_empty_links: %d total, %d errors' % (n, nerrors))
def _warn_once(msg): logger.warning(msg)
def sub_link(a, element_id, element, raise_errors): """ a: the link with href= #element_id element: the link to which we refer """ CLASS_ONLY_NUMBER = MCDPManualConstants.CLASS_ONLY_NUMBER CLASS_NUMBER_NAME = MCDPManualConstants.CLASS_NUMBER_NAME CLASS_ONLY_NAME = MCDPManualConstants.CLASS_ONLY_NAME if not element: msg = ('Cannot find %s' % element_id) note_error2(a, 'Ref. error', 'substituting_empty_links():\n' + msg) #nerrors += 1 if raise_errors: raise ValueError(msg) return # if there is a query, remove it # if le.query is not None: # new_href = '#' + le.eid # a.attrs['href'] = new_href # logger.info('setting new href= %s' % (new_href)) if (not LABEL_WHAT_NUMBER in element.attrs) or \ (not LABEL_NAME in element.attrs): msg = ( 'substituting_empty_links: Could not find attributes %s or %s in %s' % (LABEL_NAME, LABEL_WHAT_NUMBER, element)) if True: logger.warning(msg) else: # note_error_msg(a, msg) note_error2(a, 'Ref. error', 'substituting_empty_links():\n' + msg) # nerrors += 1 if raise_errors: raise ValueError(msg) return label_what_number = element.attrs[LABEL_WHAT_NUMBER] label_number = element.attrs[LABEL_NUMBER] label_what = element.attrs[LABEL_WHAT] label_name = element.attrs[LABEL_NAME] classes = list(a.attrs.get('class', [])) # bug: I was modifying # if le.query is not None: # classes.append(le.query) if 'toc_link' in classes: s = Tag(name='span') s.string = label_what add_class(s, 'toc_what') a.append(s) a.append(' ') s = Tag(name='span') s.string = label_number add_class(s, 'toc_number') a.append(s) s = Tag(name='span') s.string = ' - ' add_class(s, 'toc_sep') a.append(s) if label_name is not None and '<' in label_name: contents = bs(label_name) # sanitize the label name for br in contents.findAll('br'): br.replaceWith(NavigableString(' ')) for _ in contents.findAll('a'): _.extract() contents.name = 'span' add_class(contents, 'toc_name') a.append(contents) #logger.debug('From label_name = %r to a = %r' % (label_name, a)) else: if label_name is None: s = Tag(name='span') s.string = '(unnamed)' # XXX else: s = bs(label_name) assert s.name == 'fragment' s.name = 'span' # add_class(s, 'produced-here') # XXX add_class(s, 'toc_name') a.append(s) else: if CLASS_ONLY_NUMBER in classes: label = label_number elif CLASS_NUMBER_NAME in classes: if label_name is None: label = label_what_number + \ ' - ' + '(unnamed)' # warning else: label = label_what_number + ' - ' + label_name elif CLASS_ONLY_NAME in classes: if label_name is None: label = '(unnamed)' # warning else: label = label_name else: # default behavior if string_starts_with(['fig:', 'tab:', 'bib:', 'code:'], element_id): label = label_what_number elif label_name is None: label = label_what_number else: label = label_what_number + ' - ' + label_name frag = bs(label) assert frag.name == 'fragment' frag.name = 'span' add_class(frag, 'reflabel') a.append(frag)
def check_if_any_href_is_invalid(soup): ''' Checks if references are invalid and tries to correct them. if it is of the form "#frag?query" then query is stripped out ''' logger.debug('check_if_any_href_is_invalid') errors = [] math_errors = [] # let's first find all the IDs id2element, duplicates = get_id2element(soup, 'id') _name2element, _duplicates = get_id2element(soup, 'name') for a in soup.select('[href^="#"]'): href = a['href'] if a.has_attr('class') and "mjx-svg-href" in a['class']: msg = 'Invalid math reference (sorry, no details): href = %s .' % href logger.warning(msg) a.insert_before(Comment('Error: %s' % msg)) math_errors.append(msg) continue assert href.startswith('#') ID = href[1:] # remove query if it exists if '?' in ID: ID = ID[:ID.index('?')] if not ID in id2element: # try to fix it # if there is already a prefix, remove it if ':' in href: i = href.index(':') core = href[i + 1:] else: core = ID # logger.debug('check_if_any_href_is_invalid: not found %r, core %r' % (ID, core)) possible = [ 'part', 'sec', 'sub', 'subsub', 'fig', 'tab', 'code', 'app', 'appsub', 'appsubsub', 'def', 'eq', 'rem', 'lem', 'prob', 'prop', 'exa', 'thm', # 'bib' ] matches = [] others = [] for possible_prefix in possible: why_not = possible_prefix + ':' + core others.append(why_not) if why_not in id2element: matches.append(why_not) # logger.debug('others = %r, matches = %r' % (others, matches)) if len(matches) > 1: short = 'Ref. error' msg = '%s not found, and multiple matches for heuristics (%s)' % ( href, matches) note_error2(a, short, msg, ['href-invalid', 'href-invalid-missing']) elif len(matches) == 1: a['href'] = '#' + matches[0] if show_debug_message_for_corrected_links: short = 'Ref replaced' msg = '%s not found, but corrected in %s' % (href, matches[0]) note_warning2(a, short, msg, ['href-replaced']) else: if has_class(a, MCDPConstants.CLASS_IGNORE_IF_NOT_EXISTENT): pass else: short = 'Ref. error' # msg = 'Not found %r (also tried %s)' % (href, ", ".join(others)) msg = 'I do not know the link that is indicated by the link %r.' % href note_error2(a, short, msg, ['href-invalid', 'href-invalid-missing']) errors.append(msg) if ID in duplicates: msg = 'More than one element matching %r.' % href short = 'Ref. error' note_error2(a, short, msg, ['href-invalid', 'href-invalid-multiple']) errors.append(msg) return errors, math_errors
def manual_join(template, files_contents, bibfile, stylesheet, remove=None, extra_css=None, remove_selectors=None, hook_before_toc=None): """ extra_css: if not None, a string of more CSS to be added Remove_selectors: list of selectors to remove (e.g. ".draft"). hook_before_toc if not None is called with hook_before_toc(soup=soup) just before generating the toc """ logger.debug('remove_selectors: %s' % remove_selectors) logger.debug('remove: %s' % remove) from mcdp_utils_xml import bs template = replace_macros(template) # cannot use bs because entire document template_soup = BeautifulSoup(template, 'lxml', from_encoding='utf-8') d = template_soup assert d.html is not None assert '<html' in str(d) head = d.find('head') assert head is not None for x in get_manual_css_frag().contents: head.append(x.__copy__()) if stylesheet is not None: link = Tag(name='link') link['rel'] = 'stylesheet' link['type'] = 'text/css' from mcdp_report.html import get_css_filename link['href'] = get_css_filename('compiled/%s' % stylesheet) head.append(link) basename2soup = OrderedDict() for (_libname, docname), data in files_contents: frag = bs(data) basename2soup[docname] = frag fix_duplicated_ids(basename2soup) body = d.find('body') add_comments = False for docname, content in basename2soup.items(): logger.debug('docname %r -> %s KB' % (docname, len(data) / 1024)) from mcdp_docs.latex.latex_preprocess import assert_not_inside assert_not_inside(data, 'DOCTYPE') if add_comments: body.append(NavigableString('\n\n')) body.append(Comment('Beginning of document dump of %r' % docname)) body.append(NavigableString('\n\n')) for x in content: x2 = x.__copy__() # not clone, not extract body.append(x2) if add_comments: body.append(NavigableString('\n\n')) body.append(Comment('End of document dump of %r' % docname)) body.append(NavigableString('\n\n')) extract_bibtex_blocks(d) logger.info('external bib') if bibfile is not None: if not os.path.exists(bibfile): logger.error('Cannot find bib file %s' % bibfile) else: bibliography_entries = get_bibliography(bibfile) bibliography_entries['id'] = 'bibliography_entries' body.append(bibliography_entries) bibhere = d.find('div', id='put-bibliography-here') if bibhere is None: logger.warning('Could not find #put-bibliography-here in document.' 'Adding one at end of document') bibhere = Tag(name='div') bibhere.attrs['id'] = 'put-bibliography-here' d.find('body').append(bibhere) do_bib(d, bibhere) if True: logger.info('reorganizing contents in <sections>') body2 = reorganize_contents(d.find('body')) body.replace_with(body2) else: warnings.warn('fix') body2 = body # Removing all_selectors = [] if remove is not None and remove != '': all_selectors.append(remove) if remove_selectors: all_selectors.extend(remove_selectors) logger.debug('all_selectors: %s' % all_selectors) all_removed = '' for selector in all_selectors: nremoved = 0 logger.debug('Removing selector %r' % remove) toremove = list(body2.select(selector)) logger.debug('Removing %d objects' % len(toremove)) for x in toremove: nremoved += 1 nd = len(list(x.descendants)) logger.debug('removing %s with %s descendants' % (x.name, nd)) if nd > 1000: s = str(x)[:300] logger.debug(' it is %s' % s) x.extract() all_removed += '\n\n' + '-' * 50 + ' chunk %d removed\n' % nremoved all_removed += str(x) all_removed += '\n\n' + '-' * 100 + '\n\n' logger.info('Removed %d elements of selector %r' % (nremoved, remove)) # if False: with open('all_removed.html', 'w') as f: f.write(all_removed) if hook_before_toc is not None: hook_before_toc(soup=d) ### logger.info('adding toc') toc = generate_toc(body2) logger.info('TOC:\n' + str(toc)) toc_ul = bs(toc).ul toc_ul.extract() assert toc_ul.name == 'ul' toc_ul['class'] = 'toc' toc_ul['id'] = 'main_toc' toc_selector = 'div#toc' tocs = list(d.select(toc_selector)) if not tocs: msg = 'Cannot find any element of type %r to put TOC inside.' % toc_selector logger.warning(msg) else: toc_place = tocs[0] toc_place.replaceWith(toc_ul) logger.info('checking errors') check_various_errors(d) from mcdp_docs.check_missing_links import check_if_any_href_is_invalid logger.info('checking hrefs') check_if_any_href_is_invalid(d) # Note that this should be done *after* check_if_any_href_is_invalid() # because that one might fix some references logger.info('substituting empty links') substituting_empty_links(d) warn_for_duplicated_ids(d) if extra_css is not None: logger.info('adding extra CSS') add_extra_css(d, extra_css) add_footnote_polyfill(d) logger.info('converting to string') # do not use to_html_stripping_fragment - this is a complete doc res = unicode(d) res = res.encode('utf8') logger.info('done - %d bytes' % len(res)) return res
def manual_join(template, files_contents, stylesheet, remove=None, extra_css=None, remove_selectors=None, hook_before_toc=None, references={}, resolve_references=True): """ files_contents: a list of tuples that can be cast to DocToJoin: where the string is a unique one to be used for job naming. extra_css: if not None, a string of more CSS to be added Remove_selectors: list of selectors to remove (e.g. ".draft"). hook_before_toc if not None is called with hook_before_toc(soup=soup) just before generating the toc """ check_isinstance(files_contents, list) files_contents = [DocToJoin(*_) for _ in files_contents] template0 = template template = replace_macros(template) # cannot use bs because entire document template_soup = BeautifulSoup(template, 'lxml', from_encoding='utf-8') d = template_soup if d.html is None: s = "Invalid template" raise_desc(ValueError, s, template0=template0) assert d.html is not None assert '<html' in str(d) head = d.find('head') assert head is not None for x in get_manual_css_frag().contents: head.append(x.__copy__()) if stylesheet is not None: link = Tag(name='link') link['rel'] = 'stylesheet' link['type'] = 'text/css' from mcdp_report.html import get_css_filename link['href'] = get_css_filename('compiled/%s' % stylesheet) head.append(link) basename2soup = OrderedDict() for doc_to_join in files_contents: if doc_to_join.docname in basename2soup: msg = 'Repeated docname %r' % doc_to_join.docname raise ValueError(msg) from .latex.latex_preprocess import assert_not_inside assert_not_inside(doc_to_join.contents, '<fragment') assert_not_inside(doc_to_join.contents, 'DOCTYPE') frag = bs(doc_to_join.contents) basename2soup[doc_to_join.docname] = frag fix_duplicated_ids(basename2soup) body = d.find('body') add_comments = False for docname, content in basename2soup.items(): # logger.debug('docname %r -> %s KB' % (docname, len(data) / 1024)) if add_comments: body.append(NavigableString('\n\n')) body.append(Comment('Beginning of document dump of %r' % docname)) body.append(NavigableString('\n\n')) copy_contents_into(content, body) f = body.find('fragment') if f: msg = 'I found a <fragment> in the manual after %r' % docname msg += '\n\n' + indent(str(content), '> ') raise Exception(msg) if add_comments: body.append(NavigableString('\n\n')) body.append(Comment('End of document dump of %r' % docname)) body.append(NavigableString('\n\n')) extract_bibtex_blocks(d) logger.info('external bib') ID_PUT_BIB_HERE = MCDPManualConstants.ID_PUT_BIB_HERE bibhere = d.find('div', id=ID_PUT_BIB_HERE) if bibhere is None: logger.warning(('Could not find #%s in document. ' 'Adding one at end of document.') % ID_PUT_BIB_HERE) bibhere = Tag(name='div') bibhere.attrs['id'] = ID_PUT_BIB_HERE d.find('body').append(bibhere) do_bib(d, bibhere) document_final_pass_before_toc(d, remove, remove_selectors) if hook_before_toc is not None: hook_before_toc(soup=d) generate_and_add_toc(d) document_final_pass_after_toc(soup=d, resolve_references=resolve_references) if extra_css is not None: logger.info('adding extra CSS') add_extra_css(d, extra_css) document_only_once(d) for a in d.select('[href]'): href = a.attrs['href'] if href in references: r = references[href] a.attrs['href'] = r.url if not a.children: # empty a.append(r.title) logger.info('converting to string') # do not use to_html_stripping_fragment - this is a complete doc res = unicode(d) res = res.encode('utf8') logger.info('done - %d bytes' % len(res)) return res