Exemplo n.º 1
0
def performOPFSourceUpdates(data, newbkpath, oldbkpath, keylist, valuelist):
    data = _remove_xml_header(data)
    # lxml on a Mac does not seem to handle full unicode properly, so encode as utf-8
    data = data.encode('utf-8')
    # rebuild serialized lookup dictionary
    updates = {}
    for i in range(0, len(keylist)):
        updates[keylist[i]] = valuelist[i]
    xmlbuilder = LXMLTreeBuilderForXML(parser=None,
                                       empty_element_tags=ebook_xml_empty_tags)
    soup = BeautifulSoup(data,
                         features=None,
                         from_encoding="utf-8",
                         builder=xmlbuilder)
    for tag in soup.find_all(["link", "item", "reference", "site"]):
        if "href" in tag.attrs:
            href = tag["href"]
            if href.find(":") == -1:
                parts = href.split('#')
                ahref = unquoteurl(parts[0])
                fragment = ""
                if len(parts) > 1:
                    fragment = parts[1]
                oldtarget = buildBookPath(ahref, startingDir(oldbkpath))
                newtarget = updates.get(oldtarget, oldtarget)
                attribute_value = buildRelativePath(newbkpath, newtarget)
                if fragment != "":
                    attribute_value = attribute_value + "#" + fragment
                attribute_value = quoteurl(attribute_value)
                tag["href"] = attribute_value
    newdata = soup.decodexml(indent_level=0,
                             formatter='minimal',
                             indent_chars="  ")
    return newdata
Exemplo n.º 2
0
def anchorNCXUpdatesAfterMerge(data, ncx_bookpath, sink_bookpath,
                               merged_bookpaths):
    data = _remove_xml_header(data)
    startdir = startingDir(ncx_bookpath)
    # lxml on a Mac does not seem to handle full unicode properly, so encode as utf-8
    data = data.encode('utf-8')
    xmlbuilder = LXMLTreeBuilderForXML(parser=None,
                                       empty_element_tags=ebook_xml_empty_tags)
    soup = BeautifulSoup(data,
                         features=None,
                         from_encoding="utf-8",
                         builder=xmlbuilder)
    for tag in soup.find_all("content"):
        if "src" in tag.attrs:
            src = tag["src"]
            if src.find(":") == -1:
                parts = src.split('#')
                if parts is not None:
                    ahref = unquoteurl(parts[0])
                    target_bookpath = buildBookPath(ahref, startdir)
                    if target_bookpath in merged_bookpaths:
                        attribute_value = buildRelativePath(
                            ncx_bookpath, sink_bookpath)
                        if len(parts) > 1 and parts[1] != "":
                            attribute_value += "#" + parts[1]
                        tag["src"] = quoteurl(attribute_value)
    newdata = soup.decodexml(indent_level=0,
                             formatter='minimal',
                             indent_chars="  ")
    return newdata
Exemplo n.º 3
0
def performPageMapUpdates(data, newbkpath, oldbkpath, keylist, valuelist):
    data = _remove_xml_header(data)
    # lxml on a Mac does not seem to handle full unicode properly, so encode as utf-8
    data = data.encode('utf-8')
    # rebuild serialized lookup dictionary of xml_updates properly adjusted
    updates = OrderedDict()
    for i in range(0, len(keylist)):
        updates[ keylist[i] ] = valuelist[i]
    xml_empty_tags = ["page"]
    xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=xml_empty_tags)
    soup = BeautifulSoup(data, features=None, from_encoding="utf-8", builder=xmlbuilder)
    for tag in soup.find_all(["page"]):
        for att in ["href"]:
            if att in tag.attrs :
                ref = tag[att]
                if ref.find(":") == -1 :
                    parts = ref.split('#')
                    apath = urldecodepart(parts[0])
                    fragment = ""
                    if len(parts) > 1:
                        fragment = urldecodepart(parts[1])
                    oldtarget = buildBookPath(apath, startingDir(oldbkpath))
                    newtarget = updates.get(oldtarget, oldtarget)
                    attribute_value = urlencodepart(buildRelativePath(newbkpath, newtarget))
                    if fragment != "":
                        attribute_value = attribute_value + "#" + urlencodepart(fragment)
                    tag[att] = attribute_value
    newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars="  ")
    return newdata
Exemplo n.º 4
0
def anchorNCXUpdates(data, ncx_bookpath, originating_bookpath, keylist, valuelist):
    data = _remove_xml_header(data)
    # lxml on a Mac does not seem to handle full unicode properly, so encode as utf-8
    data = data.encode('utf-8')
    # rebuild serialized lookup dictionary
    id_dict = OrderedDict()
    for i in range(0, len(keylist)):
        id_dict[ keylist[i] ] = valuelist[i]
    startdir = startingDir(ncx_bookpath)
    xmlbuilder = LXMLTreeBuilderForXML(parser=None, empty_element_tags=ebook_xml_empty_tags)
    soup = BeautifulSoup(data, features=None, from_encoding="utf-8", builder=xmlbuilder)
    for tag in soup.find_all("content"):
        if "src" in tag.attrs:
            src = tag["src"]
            if src.find(":") == -1:
                parts = src.split('#')
                apath = urldecodepart(parts[0])
                # convert this path to its target bookpath
                target_bookpath = buildBookPath(apath, startdir)
                if (parts is not None) and (len(parts) > 1) and (target_bookpath == originating_bookpath) and (parts[1] != ""):
                    fragment_id = urldecodepart(parts[1])
                    if fragment_id in id_dict:
                        target_bookpath = id_dict[fragment_id]
                        attribute_value = urlencodepart(buildRelativePath(ncx_bookpath, target_bookpath))
                        attribute_value = attribute_value + "#" + urlencodepart(fragment_id)
                        tag["src"] = attribute_value;
    newdata = soup.decodexml(indent_level=0, formatter='minimal', indent_chars="  ")
    return newdata
Exemplo n.º 5
0
 def addfile(self,
             uniqueid,
             basename,
             data,
             mime=None,
             properties=None,
             fallback=None,
             overlay=None):
     uniqueid = unicode_str(uniqueid)
     if uniqueid in self.id_to_href:
         raise WrapperException('Manifest Id is not unique')
     basename = unicode_str(basename)
     mime = unicode_str(mime)
     if mime is None:
         ext = os.path.splitext(basename)[1]
         ext = ext.lower()
         mime = ext_mime_map.get(ext, None)
     if mime is None:
         raise WrapperException("Mime Type Missing")
     if mime == "application/x-dtbncx+xml" and self.epub_version.startswith(
             "2"):
         raise WrapperException('Can not add or remove an ncx under epub2')
     group = mime_group_map.get(mime, "Misc")
     default_path = self.group_paths[group][0]
     bookpath = basename
     if default_path != "":
         bookpath = default_path + "/" + basename
     href = buildRelativePath(self.opfbookpath, bookpath)
     if href in self.href_to_id:
         raise WrapperException('Basename already exists')
     # now actually write out the new file
     filepath = bookpath.replace("/", os.sep)
     self.id_to_filepath[uniqueid] = filepath
     filepath = os.path.join(self.outdir, filepath)
     base = os.path.dirname(filepath)
     if not unipath.exists(base):
         os.makedirs(base)
     if mime in TEXT_MIMETYPES or isinstance(data, text_type):
         data = utf8_str(data)
     with open(filepath, 'wb') as fp:
         fp.write(data)
     self.id_to_href[uniqueid] = href
     self.id_to_mime[uniqueid] = mime
     self.id_to_props[uniqueid] = properties
     self.id_to_fall[uniqueid] = fallback
     self.id_to_over[uniqueid] = overlay
     self.id_to_bookpath[uniqueid] = bookpath
     self.href_to_id[href] = uniqueid
     self.bookpath_to_id[bookpath] = uniqueid
     self.added.append(uniqueid)
     self.modified[self.opfbookpath] = 'file'
     return uniqueid
Exemplo n.º 6
0
 def addbookpath(self, uniqueid, bookpath, data, mime=None):
     uniqueid = _unicodestr(uniqueid)
     if uniqueid in self.id_to_href:
         raise WrapperException('Manifest Id is not unique')
     bookpath = _unicodestr(bookpath)
     basename = bookpath.split("/")[-1]
     mime = _unicodestr(mime)
     if mime is None:
         ext = os.path.splitext(basename)[1]
         ext = ext.lower()
         mime = ext_mime_map.get(ext, None)
     if mime is None:
         raise WrapperException("Mime Type Missing")
     if mime == "application/x-dtbncx+xml" and self.epub_version.startswith(
             "2"):
         raise WrapperException('Can not add or remove an ncx under epub2')
     href = buildRelativePath(self.opfbookpath, bookpath)
     if href in self.href_to_id:
         raise WrapperException('bookpath already exists')
     # now actually write out the new file
     filepath = bookpath.replace("/", os.sep)
     self.id_to_filepath[uniqueid] = filepath
     filepath = os.path.join(self.outdir, filepath)
     base = os.path.dirname(filepath)
     if not os.path.exists(base):
         os.makedirs(base)
     if mime in TEXT_MIMETYPES or isinstance(data, str):
         data = _utf8str(data)
     with open(filepath, 'wb') as fp:
         fp.write(data)
     self.id_to_href[uniqueid] = href
     self.id_to_mime[uniqueid] = mime
     self.id_to_props[uniqueid] = None
     self.id_to_fall[uniqueid] = None
     self.id_to_over[uniqueid] = None
     self.id_to_bookpath[uniqueid] = bookpath
     self.href_to_id[href] = uniqueid
     self.bookpath_to_id[bookpath] = uniqueid
     self.added.append(uniqueid)
     self.modified[self.opfbookpath] = 'file'
     return uniqueid
Exemplo n.º 7
0
 def get_relativepath(self, from_bookpath, to_bookpath):
     from_bookpath = _unicodestr(from_bookpath)
     to_bookpath = _unicodestr(to_bookpath)
     return buildRelativePath(from_bookpath, to_bookpath)