def get_sections(notebook): """Return the section titles from a notebook file""" contents = get_text_contents(notebook) matches = re.findall(r'^(# .*)', contents, re.MULTILINE) if len(matches) >= 5: # Multiple top sections (book?) - use these pass else: # Use sections and subsections instead matches = re.findall(r'^(###? .*)', contents, re.MULTILINE) sections = [match.replace(r'\n', '') for match in matches] # print("Sections", repr(sections).encode('utf-8')) # Filter out second synopsis section if '## Synopsis' in sections: sections = ['## Synopsis' ] + [sec for sec in sections if sec != '## Synopsis'] # Filter out "End of Excursion" titles sections = [ sec for sec in sections if sec != '## End of Excursion' and sec != '### End of Excursion' ] return sections
def get_description(notebook): """Return the first 2-4 sentences from a notebook file, after the title""" contents = get_text_contents(notebook) match = re.search(r'^# .*$([^#]*)^#', contents, re.MULTILINE) if match is None: desc = contents else: desc = match.group(1).replace(r'\n', '').replace('\n', '') desc = re.sub(r"\]\([^)]*\)", "]", desc).replace('[', '').replace(']', '') desc = re.sub(r"[_*]", "", desc) # print("Description", desc.encode('utf-8')) return desc
def get_sections(notebook): """Return the section titles from a notebook file""" contents = get_text_contents(notebook) matches = re.findall(r'^(# .*)', contents, re.MULTILINE) if len(matches) >= 5: # Multiple top sections (book?) - use these pass else: # Use sections and subsections instead matches = re.findall(r'^(###? .*)', contents, re.MULTILINE) sections = [match.replace(r'\n', '') for match in matches] # print("Sections", repr(sections).encode('utf-8')) return sections
def get_sections(notebook): """Return the section titles from a notebook file""" contents = get_text_contents(notebook) matches = re.findall(r'^(# .*)', contents, re.MULTILINE) if len(matches) >= 5: # Multiple top sections (book?) - use these pass else: # Use sections and subsections instead matches = re.findall(r'^(###? .*)', contents, re.MULTILINE) sections = [match.replace(r'\n', '') for match in matches] # print("Sections", repr(sections).encode('utf-8')) # Move last synopsis section to top; ignore all others if '## Synopsis' in sections: synopsis_sections = [] body_sections = [] in_synopsis = False for sec in sections: if sec == '## Synopsis': in_synopsis = True synopsis_sections = [] elif sec.startswith('## '): in_synopsis = False if in_synopsis: synopsis_sections.append(sec) else: body_sections.append(sec) sections = synopsis_sections + body_sections # Filter out "End of Excursion" titles sections = [ sec for sec in sections if not sec.endswith('# End of Excursion') ] return sections