def check_figures(language): ''' Check included figures. ''' def _ignore(filename): return filename.startswith('.') or \ filename.endswith('.gif') or \ filename.endswith('.odg') or \ filename.endswith('.pdf') or \ filename.endswith('.xml') def _redundant(filename, defined): return filename.endswith('.png') and \ filename.replace('.png', '.svg') in defined content = get_all_docs(language) by_doc = _match_lines_by_doc( content, r'{%\s+include\s+figure.html[^%]+src="([^"]+)"') used = set() for slug in by_doc: used |= { os.path.join(FIGURE_DIR, slug, filename) for filename in by_doc[slug] } used |= _match_lines(content, r'^!\[.+\]\(\.\./(.+)\)') defined = { f for f in glob.glob(os.path.join(FIGURE_DIR, '**/*.*')) if not _ignore(f) } defined -= {f for f in defined if _redundant(f, defined)} report('Figures', 'unused', defined - used) report('Figures', 'missing', used - defined)
def check_crossref(language): ''' Check cross-references. ''' content = get_all_docs(language) used = _match_lines(content, r'\[([^\]]+)\]\(#REF\)') crossref = get_crossref(language) defined = {x for x in crossref.keys() if x.startswith('s:')} report('Cross References', 'missing', used - defined)
def check_cites(language): ''' Check for unused and undefined citations. ''' content = get_all_docs(CONFIG_FILE, language) used = _match_lines(content, r'\[([^\]]+)\]\(#BIB\)', flatten=',') defined = _match_lines(content, r'{:#b:([^}]+)}') report('Citations', 'unused', defined - used) report('Citations', 'undefined', used - defined)
def check_gloss(language): ''' Check for unused and undefined glossary entries. ''' content = get_all_docs(CONFIG_FILE, language) used = _match_body(content, r'\[.+?\]\(#(g:.+?)\)') defined = _match_lines(content, r'\*\*.+?\*\*{:#(g:.+?)}') report('Glossary Entries', 'unused', defined - used) report('Glossary Entries', 'missing', used - defined)
def check_figref(language): ''' Check figure references. ''' content = get_all_docs(language) used = _match_lines(content, r'\[([^\]]+)\]\(#FIG\)') crossref = get_crossref(language) defined = {x for x in crossref.keys() if x.startswith('f:')} report('Figure References', 'missing', used - defined) report('Figure References', 'unused', defined - used)
def main(language, with_undone=False): ''' Main driver. ''' overall = {} per_file = {} for (slug, filename, body, lines) in get_all_docs(CONFIG_FILE, language): if with_undone or not is_undone(body): count_basic(per_file, slug, filename, body, lines) if slug == 'gloss': count_gloss(overall, slug, filename, body, lines) display(overall, per_file)
def check_chars(language): ''' Find and report non-7-bit characters that aren't translated. ''' allowed = set(CHARACTERS.keys()) result = set() for (slug, filename, body, lines) in get_all_docs(language): for (i, line) in enumerate(lines): for (j, char) in enumerate(line): if (ord(char) > 127) and (char not in allowed): result.add('{} {} {}: {}'.format(filename, i + 1, j + 1, char)) report('Characters', 'non-ascii', result)
def check_gloss(language): ''' Check for unused and undefined glossary entries and alphabetical order. ''' content = get_all_docs(language) used = match_body(content, r'\[.+?\]\(#(g:.+?)\)') defined = _match_lines(content, r'\*\*.+?\*\*{:#(g:.+?)}') report('Glossary Entries', 'unused', defined - used) report('Glossary Entries', 'missing', used - defined) keys = _get_lines(content, r'\*\*(.+?)\*\*{:#g:.+?}') report('Glossary Entries', 'out of order', _out_of_order(keys))
def check_cites(language): ''' Check for unused and undefined citations and for bibliography order. ''' key_pat = r'{:#b:([^}]+)}' content = get_all_docs(language) used = _match_lines(content, r'\[([^\]]+)\]\(#BIB\)', splitter=',') defined = _match_lines(content, key_pat) report('Citations', 'unused', defined - used) report('Citations', 'undefined', used - defined) keys = _get_lines(content, key_pat) report('Citations', 'out of order', _out_of_order(keys))
def check_links(language): ''' Check that external links are defined and used. ''' content = get_all_docs(language) used = match_body(content, r'\[.+?\]\[(.+?)\]') with open(LINK_FILE, 'r') as reader: body = reader.read() matches = re.findall(r'^\[(.+?)\]', body, flags=re.DOTALL + re.MULTILINE) links = Counter(matches) duplicate = {key for key in links if links[key] > 1} defined = set(links.keys()) report('External Links', 'unused', defined - used) report('External Links', 'undefined', used - defined) report('External Links', 'duplicated', duplicate)
def check_anchors(language): ''' Check that anchors on H2's are properly formatted and include the chapter slug. ''' header_pat = re.compile(r'^##\s+[^{]+{([^}]+)}\s*$') target_pat = re.compile(r'#s:([^-]+)') result = set() for (slug, filename, body, lines) in get_all_docs(language): for line in lines: anchor = header_pat.search(line) if not anchor: continue m = target_pat.search(anchor.group(1)) if (not m) or (m.group(1) != slug): result.add('{}: "{}"'.format(slug, anchor.group(1))) report('Anchors', 'mismatched', result)
def check_pages(language): ''' Check that Markdown pages are properly structured. ''' yaml_pat = re.compile(r'\A---\n.+\n---\n.+', flags=re.DOTALL + re.MULTILINE) links_pat = re.compile(r'{%\s+include\s+links.md\s+%}\s*\Z', flags=re.DOTALL + re.MULTILINE) content = get_all_docs(language) result = set() for (slug, filename, body, lines) in content: if not yaml_pat.match(body): result.add('{}: bad YAML header'.format(filename)) if not links_pat.search(body): result.add('{}: missing links inclusion'.format(filename)) report('Pages', 'issues', result)
def main(options, single, multi): ''' Display all requested inclusions. ''' if multi: multi += '/' # to avoid spurious substring matches do_all = not (single or multi) content = get_all_docs(options['language'], remove_code_blocks=False) inclusions = get_inclusions(content, options['rejoin_lines']) todo = [(path, body) for (path, body) in inclusions \ if (single and (path == single)) or \ (multi and path.startswith(multi)) or \ do_all] for (path, body) in sorted(todo): align(options, path, body)
def check_langs(language): ''' Check that every fenced code block specifies a language. ''' content = get_all_docs(language) result = set() for (slug, filename, body, lines) in content: in_block = False for (i, line) in enumerate(lines): if not line.startswith('```'): pass elif in_block: in_block = False else: in_block = True if line.strip() == '```': result.add('{} {:4d}'.format(filename, i + 1)) report('Code Blocks', 'no language', result)
def check_src(language): ''' Check external source files referenced in title attributes of code blocks. ''' prefix_len = len(SOURCE_DIR + '/') def _unprefix(filename): return filename[prefix_len:] content = get_all_docs(language, remove_code_blocks=False) referenced = match_body(content, r'{:\s+title="([^"]+)\s*"}') actual = { _unprefix(filename) for filename in glob.iglob('{}/**/*.*'.format(SOURCE_DIR), recursive=True) if not _ignore_file(filename) } report('Source Files', 'unused', actual - referenced) report('Source Files', 'missing', referenced - actual)
def check_figures(language): ''' Check included figures. ''' def _ignore(filename): return filename.startswith('.') or \ filename.endswith('.odg') or \ filename.endswith('.pdf') or \ filename.endswith('.xml') def _redundant(filename, defined): return filename.endswith('.png') and \ filename.replace('.png', '.svg') in defined content = get_all_docs(language) used = _match_lines( content, r'{%\s+include\s+figure.html[^%]+src=".+/figures/([^"]+)"') defined = {f for f in os.listdir(FIGURE_DIR) if not _ignore(f)} defined -= {f for f in defined if _redundant(f, defined)} report('Figures', 'unused', defined - used) report('Figures', 'missing', used - defined)