def make(self, context): soup = context.soup id_ = '%s:section' % self.id_ try: e = soup_find_absolutely(soup, id_) except KeyError: msg = 'Cannot find ID %r in document.' % id_ d = Tag(name='div') t = Tag(name='code') t.append(self.id_) d.append(t) note_error2(t, 'ref error', msg) return [d] logger.info('Adding section %r' % e.attrs['id']) # logger.info('e: ' + get_summary_of_section(e)) e_copy = e.__copy__() for eid in self.exceptions: logger.info('Removing sections by id "%s"' % eid) look_for = eid + ':section' s = e_copy.find(id=look_for) if s is None: msg = 'Could not remove "%s" because could not find element with ID "%s"' % (eid, look_for) raise Exception(msg) s.extract() # logger.info('e_copy: ' + get_summary_of_section(e_copy)) return [e_copy]
def move_things_around(soup, raise_if_errors=False): ''' Looks for tags like: <move-here src="#line_detector2-line_detector_node2-autogenerated"/> ''' for e in soup.select('move-here'): if not 'src' in e.attrs: msg = 'Expected attribute "src" for element %s' % str(e) raise ValueError(msg) src = e.attrs['src'] if not src.startswith('#'): msg = 'Expected that attribute "src" started with "#" for element %s.' % str( e) raise ValueError(msg) nid = src[1:] el = soup.find(id=nid) if not el: msg = 'move-here: Could not find ID %r.' % nid e.name = 'span' note_error2(e, "invalid move-here reference", msg) if raise_if_errors: raise ValueError(msg) else: continue el.extract() e.replace_with(el)
def check_status_codes(soup, realpath): for h in all_headers(soup): if 'notoc' in h.attrs: continue if STATUS_ATTR in h.attrs: s = h.attrs[STATUS_ATTR] if not s in allowed_statuses: msg = 'Invalid status code %r; expected one of %r' % ( s, allowed_statuses) msg += '\n' + indent(str(h), ' ') note_error2(h, 'syntax error', msg) else: # Only warn for h1 that are not part: if h.name == 'h1' and not 'part:' in h.attrs.get('id', ''): if not 'catkin_ws' in realpath: # let's not worry about the Software repo for now h2 = h.__copy__() h2.attrs.pop('github-blob-url', None) h2.attrs.pop('github-edit-url', None) msg = 'Status not found for this header:\n\n %s' % str(h2) msg += '\n\n in file %s' % realpath msg += '\n\nPlease set the status for all the top-level headers.' msg += '\n\nThe syntax is:\n\n # My section {#SECTIONID status=STATUS}' msg += '\n\nThese are the possible choices for the status:\n' for k, v in allowed_statuses.items(): if k != STATUS_UNKNOWN: msg += '\n' + indent(v, '', '%23s ' % ('status=%s' % k)) note_error2(h, 'missing status', msg) h.attrs[STATUS_ATTR] = STATUS_UNKNOWN
def check_lang_codes(soup): for h in all_headers(soup): if LANG_ATTR in h.attrs: s = h.attrs[LANG_ATTR] if not s in allowed_langs: msg = 'Invalid lang code %r; expected one of %r' % ( s, allowed_langs) msg += '\n' + indent(str(h), ' ') note_error2(h, 'syntax error', msg)
def display_files(soup, defaults, raise_errors): n = 0 for element in soup.find_all('display-file'): src = element.attrs.get('src', '').strip() element.attrs['src'] = src if src.startswith('github:'): display_file(element, defaults, raise_errors) n += 1 else: msg = 'Unknown schema %r; I only know "github:".' % src if raise_errors: raise DPSemanticError(msg) else: note_error2(element, 'syntax error', msg) return n
def check_no_patently_wrong_links(soup): for a in soup.select('a[href]'): href = a.attrs['href'] if href.startswith('#http:') or href.startswith('#https:'): msg = """ This link is invalid: URL = %s I think there is an extra "#" at the beginning. Note that the Markdown syntax is: [description](URL) where URL can be: 1) using the fragment notation, such as URL = '#SECTIONID' for example: Look at [the section](#section-name) 2) a regular URL, such as: URL = 'http://google.com' that is: Look at [the website](http://google.com) You have mixed the two syntaxes. You probably meant to write the url %s but you added an extra "#" at the beginning that should have not been there. Please remove the "#". """ % (href, href[1:]) note_error2(a, 'syntax error', msg.lstrip())
def display_file(element, defaults, raise_errors): assert element.name == 'display-file' assert 'src' in element.attrs src = element.attrs['src'] assert src.startswith('github:') ref = parse_github_file_ref(src) try: ref = resolve_reference(ref, defaults=defaults) except CouldNotResolveRef as e: msg = 'Could not resolve reference %r' % src if raise_errors: raise_wrapped(DPSemanticError, e, msg, compact=True) else: msg += '\n\n' + indent(str(e), '> ') note_error2(element, 'reference error', msg) return lines = ref.contents.split('\n') a = ref.from_line if ref.from_line is not None else 0 b = ref.to_line if ref.to_line is not None else len(lines) - 1 portion = lines[a:b + 1] contents = "\n".join(portion) div = Tag(name='div') base = os.path.basename(ref.path) short = base + '-%d-%d' % (a, b) div.attrs['figure-id'] = 'code:%s' % short figcaption = Tag(name='figcaption') t = Tag(name='code') t.append(base) a = Tag(name='a') a.append(t) a.attrs['href'] = ref.url figcaption.append(a) div.append(figcaption) pre = Tag(name='pre') code = Tag(name='code') pre.append(code) code.append(contents) div.append(pre) element.replace_with(div)
def get_id2element(soup, att): id2element = {} duplicates = set() # ignore the maths ignore = set() for element in soup.select('svg [%s]' % att): # node with ID below SVG ignore.add(element[att]) for element in soup.select('svg[%s]' % att): # svg with ID ignore.add(element[att]) for element in soup.select('[%s^="MathJax"]' % att): # stuff created by MathJax ignore.add(element[att]) for element in soup.select('[%s]' % att): ID = element[att] if ID in ignore: continue if ID in id2element: duplicates.add(ID) other = id2element[ID] for e0 in [element, other]: # if not 'errored' in e0.attrs.get('class', ''): note_error2(e0, 'Naming', 'More than one element with id %r.' % ID) # add_class(e0, 'errored') # w = Tag(name='span', attrs={'class':'duplicated-id'}) # w.string = # e0.insert_after(w) id2element[element[att]] = element if duplicates: s = ", ".join(sorted(duplicates)) msg = '%d duplicated %s found (not errored): %s' % (len(duplicates), att, s) logger.error(msg) return id2element, duplicates
def make_videos_(o, raise_on_errors): if not 'src' in o.attrs: msg = 'The video does not have a "src" attribute.' raise_desc(ValueError, msg, element=str(o)) src = o.attrs['src'] prefix = 'vimeo:' if not src.startswith(prefix): msg = 'Invalid attribute "src": it does not start with %r.' % (src, prefix) raise_desc(ValueError, msg, element=str(o)) vimeo_id = src[len(prefix):] # <iframe src="https://player.vimeo.com/video/152233002" # class="embed-responsive-item" # frameborder="0" webkitallowfullscreen="" mozallowfullscreen="" allowfullscreen=""> try: vimeo_info = get_vimeo_info(vimeo_id) except VimeoInfoException as e: if raise_on_errors: raise else: note_error2(o, 'Resource error', str(e)) return d = Tag(name='div') d.attrs['class'] = 'video' ONLY_WEB = 'only-web' ONLY_EBOOK = 'only-ebook' ONLY_DEADTREE = 'only-deadtree' d.append(Comment('This is the iframe, for online playing.')) C = Tag(name='div') C.attrs['class'] = ONLY_WEB if True: r = Tag(name='iframe') r.attrs['class'] = 'video-vimeo-player' r.attrs['src'] = 'https://player.vimeo.com/video/' + vimeo_id r.attrs['frameborder'] = 0 r.attrs['webkitallowfullscreen'] = 1 r.attrs['mozallowfullscreen'] = 1 r.attrs['allowfullscreen'] = 1 C.append(r) d.append(C) d.append(Comment('This is the thumbnail, for ebook')) C = Tag(name='div') C.attrs['class'] = ONLY_EBOOK if True: a = Tag(name='a') a.attrs['href'] = vimeo_info.url img = Tag(name='img') img.attrs['class'] = 'video-vimeo-thumbnail-ebook' img.attrs['src'] = vimeo_info.thumbnail_large img.attrs['title'] = vimeo_info.title a.append(img) C.append(a) d.append(C) d.append(Comment('This is the textual version for printing.')) C = Tag(name='div') C.attrs['class'] = ONLY_DEADTREE if True: img = Tag(name='img') img.attrs['class'] = 'video-vimeo-thumbnail-deadtree' img.attrs['src'] = vimeo_info.thumbnail_large img.attrs['title'] = vimeo_info.title C.append(img) p = Tag(name='p') p.append("The video is at %s." % vimeo_info.url) C.append(p) d.append(C) o.replace_with(d)
def sub_link(a, element_id, element, raise_errors): """ a: the link with href= #element_id element: the link to which we refer """ CLASS_ONLY_NUMBER = MCDPManualConstants.CLASS_ONLY_NUMBER CLASS_NUMBER_NAME = MCDPManualConstants.CLASS_NUMBER_NAME CLASS_ONLY_NAME = MCDPManualConstants.CLASS_ONLY_NAME if not element: msg = ('Cannot find %s' % element_id) note_error2(a, 'Ref. error', 'substituting_empty_links():\n' + msg) #nerrors += 1 if raise_errors: raise ValueError(msg) return # if there is a query, remove it # if le.query is not None: # new_href = '#' + le.eid # a.attrs['href'] = new_href # logger.info('setting new href= %s' % (new_href)) if (not LABEL_WHAT_NUMBER in element.attrs) or \ (not LABEL_NAME in element.attrs): msg = ( 'substituting_empty_links: Could not find attributes %s or %s in %s' % (LABEL_NAME, LABEL_WHAT_NUMBER, element)) if True: logger.warning(msg) else: # note_error_msg(a, msg) note_error2(a, 'Ref. error', 'substituting_empty_links():\n' + msg) # nerrors += 1 if raise_errors: raise ValueError(msg) return label_what_number = element.attrs[LABEL_WHAT_NUMBER] label_number = element.attrs[LABEL_NUMBER] label_what = element.attrs[LABEL_WHAT] label_name = element.attrs[LABEL_NAME] classes = list(a.attrs.get('class', [])) # bug: I was modifying # if le.query is not None: # classes.append(le.query) if 'toc_link' in classes: s = Tag(name='span') s.string = label_what add_class(s, 'toc_what') a.append(s) a.append(' ') s = Tag(name='span') s.string = label_number add_class(s, 'toc_number') a.append(s) s = Tag(name='span') s.string = ' - ' add_class(s, 'toc_sep') a.append(s) if label_name is not None and '<' in label_name: contents = bs(label_name) # sanitize the label name for br in contents.findAll('br'): br.replaceWith(NavigableString(' ')) for _ in contents.findAll('a'): _.extract() contents.name = 'span' add_class(contents, 'toc_name') a.append(contents) #logger.debug('From label_name = %r to a = %r' % (label_name, a)) else: if label_name is None: s = Tag(name='span') s.string = '(unnamed)' # XXX else: s = bs(label_name) assert s.name == 'fragment' s.name = 'span' # add_class(s, 'produced-here') # XXX add_class(s, 'toc_name') a.append(s) else: if CLASS_ONLY_NUMBER in classes: label = label_number elif CLASS_NUMBER_NAME in classes: if label_name is None: label = label_what_number + \ ' - ' + '(unnamed)' # warning else: label = label_what_number + ' - ' + label_name elif CLASS_ONLY_NAME in classes: if label_name is None: label = '(unnamed)' # warning else: label = label_name else: # default behavior if string_starts_with(['fig:', 'tab:', 'bib:', 'code:'], element_id): label = label_what_number elif label_name is None: label = label_what_number else: label = label_what_number + ' - ' + label_name frag = bs(label) assert frag.name == 'fragment' frag.name = 'span' add_class(frag, 'reflabel') a.append(frag)
def substituting_empty_links(soup, raise_errors=False): ''' default style is [](#sec:systems) "Chapter 10" You can also use "class": <a href='#sec:name' class='only_number'></a> ''' # logger.debug('substituting_empty_links') # n = 0 for le in get_empty_links_to_fragment(soup): a = le.linker element_id = le.eid element = le.linked sub_link(a, element_id, element, raise_errors) # Now mark as errors the ones that for a in get_empty_links(soup): href = a.attrs.get('href', '(not present)') if not href: href = '""' if href.startswith('python:'): continue if href.startswith('http:') or href.startswith('https:'): msg = """ This link text is empty: ELEMENT Note that the syntax for links in Markdown is [link text](URL) For the internal links (where URL starts with "#"), then the documentation system can fill in the title automatically, leading to the format: [](#other-section) However, this does not work for external sites, such as: [](MYURL) So, you need to provide some text, such as: [this useful website](MYURL) """ msg = msg.replace('ELEMENT', str(a)) msg = msg.replace('MYURL', href) note_error2(a, 'syntax error', msg.strip()) else: msg = """ This link is empty: ELEMENT It might be that the writer intended for this link to point to something, but they got the syntax wrong. href = %s As a reminder, to refer to other parts of the document, use the syntax "#ID", such as: See [](#fig:my-figure). See [](#section-name). """ % href msg = msg.replace('ELEMENT', str(a)) note_error2(a, 'syntax error', msg.strip())
def check_if_any_href_is_invalid(soup): ''' Checks if references are invalid and tries to correct them. if it is of the form "#frag?query" then query is stripped out ''' logger.debug('check_if_any_href_is_invalid') errors = [] math_errors = [] # let's first find all the IDs id2element, duplicates = get_id2element(soup, 'id') _name2element, _duplicates = get_id2element(soup, 'name') for a in soup.select('[href^="#"]'): href = a['href'] if a.has_attr('class') and "mjx-svg-href" in a['class']: msg = 'Invalid math reference (sorry, no details): href = %s .' % href logger.warning(msg) a.insert_before(Comment('Error: %s' % msg)) math_errors.append(msg) continue assert href.startswith('#') ID = href[1:] # remove query if it exists if '?' in ID: ID = ID[:ID.index('?')] if not ID in id2element: # try to fix it # if there is already a prefix, remove it if ':' in href: i = href.index(':') core = href[i + 1:] else: core = ID # logger.debug('check_if_any_href_is_invalid: not found %r, core %r' % (ID, core)) possible = [ 'part', 'sec', 'sub', 'subsub', 'fig', 'tab', 'code', 'app', 'appsub', 'appsubsub', 'def', 'eq', 'rem', 'lem', 'prob', 'prop', 'exa', 'thm', # 'bib' ] matches = [] others = [] for possible_prefix in possible: why_not = possible_prefix + ':' + core others.append(why_not) if why_not in id2element: matches.append(why_not) # logger.debug('others = %r, matches = %r' % (others, matches)) if len(matches) > 1: short = 'Ref. error' msg = '%s not found, and multiple matches for heuristics (%s)' % ( href, matches) note_error2(a, short, msg, ['href-invalid', 'href-invalid-missing']) elif len(matches) == 1: a['href'] = '#' + matches[0] if show_debug_message_for_corrected_links: short = 'Ref replaced' msg = '%s not found, but corrected in %s' % (href, matches[0]) note_warning2(a, short, msg, ['href-replaced']) else: if has_class(a, MCDPConstants.CLASS_IGNORE_IF_NOT_EXISTENT): pass else: short = 'Ref. error' # msg = 'Not found %r (also tried %s)' % (href, ", ".join(others)) msg = 'I do not know the link that is indicated by the link %r.' % href note_error2(a, short, msg, ['href-invalid', 'href-invalid-missing']) errors.append(msg) if ID in duplicates: msg = 'More than one element matching %r.' % href short = 'Ref. error' note_error2(a, short, msg, ['href-invalid', 'href-invalid-multiple']) errors.append(msg) return errors, math_errors