示例#1
0
def process_file(filepath: str) -> (str, str):
    """
	Run through each file, locating titles and updating <title> tag.

	INPUTS:
	filepath: path to content file

	OUTPUTS:
	altered xhtml file text and new section ID (as a tuple)
	"""
    xhtml = gethtml(filepath)
    soup = BeautifulSoup(xhtml, "html.parser")
    heading = soup.find(
        ["h2", "h3", "h4", "h5",
         "h6"])  # find first heading, not interested in h1 in halftitle
    if heading:
        title_info = process_first_heading(heading)
        title_tag = soup.find("title")
        sections = heading.find_parents("section")
        get_part_prefix(title_info, sections)
        new_id = title_info.generate_id()
        section = heading.find_parent("section")
        if section:
            section["id"] = new_id
        if title_tag:
            title_tag.clear()
            title_tag.append(title_info.output_title_tag())
        return format_xhtml(str(soup)), new_id
    # failure, so return blanks
    return "", ""
示例#2
0
def recreate(textpath: str, notes_soup: BeautifulSoup, endnotes: list):
    """
	rebuilds endnotes.xhtml in the correct (possibly new) order
	:param textpath: path to text folder in SE project
	:param notes_soup:
	:param endnotes:
	:return:
	"""
    ol = notes_soup.ol
    ol.clear()
    endnotes.sort(key=lambda enote: enote.number)
    for endnote in endnotes:
        if endnote.matched:
            li = notes_soup.new_tag("li")
            li["id"] = "note-" + str(endnote.number)
            li["epub:type"] = "endnote"
            for content in endnote.contents:
                if isinstance(content, Tag):
                    links = content.find_all("a")
                    for link in links:
                        epub_type = link.get("epub:type") or ""
                        if "se:referrer" in epub_type or "backlink" in epub_type:
                            href = link.get("href") or ""
                            if href:
                                link[
                                    "href"] = endnote.source_file + "#noteref-" + str(
                                        endnote.number)
                li.append(content)
            ol.append(li)
    new_file = open(os.path.join(textpath, "endnotes.xhtml"), "w")
    new_file.write(format_xhtml(str(notes_soup)))
    new_file.close()
示例#3
0
def output_toc(item_list: list, landmark_list, toc_path: str, work_type: str,
               work_title: str) -> str:
    """
	Outputs the contructed ToC based on the lists of items and landmarks found,
	either to stdout or overwriting the existing ToC file
	"""

    if len(item_list) < 2:
        raise se.InvalidInputException("Too few ToC items found.")

    existing_toc: BeautifulSoup = get_existing_toc(toc_path)
    if existing_toc is None:
        raise se.InvalidInputException("Existing ToC not found.")

    # There should be exactly two nav sections.
    navs = existing_toc.find_all("nav")

    if len(navs) < 2:
        raise se.InvalidInputException(
            "Existing ToC has too few nav sections.")

    item_ol = navs[0].find("ol")
    item_ol.clear()
    landmark_ol = navs[1].find("ol")
    landmark_ol.clear()
    new_items = BeautifulSoup(process_items(item_list), "html.parser")
    item_ol.append(new_items)
    new_landmarks = BeautifulSoup(
        process_landmarks(landmark_list, work_type, work_title), "html.parser")
    landmark_ol.append(new_landmarks)
    return format_xhtml(str(existing_toc))
示例#4
0
def process_file(text_path: str, file_name: str, endnotes: list,
                 de_orphan: bool, current_note_number: int) -> int:
    """
	Reads a content file, locates and processes the endnotes,
	accumulating info on them in a global list, and returns the next note number
	:param text_path: path to the text files in the project
	:param file_name: the name of the file being processed eg chapter-1.xhtml
	:param endnotes: list of notes we are building
	:param de_orphan: remove reference in text if no matching endnote
	:param current_note_number: the current note number we are allocating
	:return: the next note number to use
	"""
    global notes_changed
    file_path = os.path.join(text_path, file_name)
    xhtml = gethtml(file_path)
    soup = BeautifulSoup(xhtml, "lxml")
    links = soup.find_all("a")
    needs_rewrite = False
    for link in links:
        epub_type = link.get("epub:type") or ""
        if epub_type == "noteref":
            old_anchor = ""
            href = link.get("href") or ""
            if href:
                old_anchor = extract_anchor(href)
            new_anchor = "note-{:d}".format(current_note_number)
            if new_anchor != old_anchor:
                print("Changed " + old_anchor + " to " + new_anchor + " in " +
                      file_path)
                notes_changed += 1
                # update the link in the soup object
                link["href"] = 'endnotes.xhtml#' + new_anchor
                link["id"] = 'noteref-{:d}'.format(current_note_number)
                link.string = str(current_note_number)
                needs_rewrite = True
            # now try to find this in endnotes
            matches = list(filter(lambda x: x.anchor == old_anchor, endnotes))
            if len(matches) == 0:
                print("Couldn't find endnote with anchor " + old_anchor)
                if de_orphan:
                    print("Removing orphan note ref in text")
                    link.clear()
                    needs_rewrite = True
            elif len(matches) > 1:
                print("Duplicate anchors in endnotes file for anchor " +
                      old_anchor)
            else:  # found a single match, which is what we want
                listnote = matches[0]
                listnote.number = current_note_number
                listnote.matched = True
                # we don't change the anchor or the back ref just yet
                listnote.source_file = file_name
            current_note_number += 1

    # if we need to write back the body text file
    if needs_rewrite:
        new_file = open(file_path, "w")
        new_file.write(format_xhtml(str(soup)))
        # new_file.write(str(soup))
        new_file.close()
    return current_note_number