示例#1
0
文件: html.py 项目: mgorny/librarian
# -*- coding: utf-8 -*-
#
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
import os
import cStringIO
import copy

from lxml import etree
from librarian import XHTMLNS, ParseError, OutputFile
from librarian import functions

from lxml.etree import XMLSyntaxError, XSLTApplyError

functions.reg_substitute_entities()
functions.reg_person_name()

STYLESHEETS = {
    'legacy': 'xslt/book2html.xslt',
    'full': 'xslt/wl2html_full.xslt',
    'partial': 'xslt/wl2html_partial.xslt'
}

def get_stylesheet(name):
    return os.path.join(os.path.dirname(__file__), STYLESHEETS[name])

def html_has_content(text):
    return etree.ETXPath('//p|//{%(ns)s}p|//h1|//{%(ns)s}h1' % {'ns': str(XHTMLNS)})(text)

def transform(wldoc, stylesheet='legacy', options=None, flags=None):
示例#2
0
# -*- coding: utf-8 -*-
#
# This file is part of Librarian, licensed under GNU Affero GPLv3 or later.
# Copyright © Fundacja Nowoczesna Polska. See NOTICE for more information.
#
from __future__ import unicode_literals

import os.path
from copy import deepcopy
from lxml import etree
import six

from librarian import functions, OutputFile
from .epub import replace_by_verse

functions.reg_substitute_entities()
functions.reg_person_name()


def sectionify(tree):
    """Finds section headers and adds a tree of _section tags."""
    sections = [
        'naglowek_czesc', 'naglowek_akt', 'naglowek_rozdzial',
        'naglowek_scena', 'naglowek_podrozdzial'
    ]
    section_level = dict((v, k) for (k, v) in enumerate(sections))

    # We can assume there are just subelements an no text at section level.
    for level, section_name in reversed(list(enumerate(sections))):
        for header in tree.findall('//' + section_name):
            section = header.makeelement("_section")