Python unquoteurl примеры, hrefutils.unquoteurl Python примеры использования

Пример #1

0

Показать файл

 def getmime(self, href):
     href = unicode_str(href)
     href = unquoteurl(href)
     filename = os.path.basename(href)
     ext = os.path.splitext(filename)[1]
     ext = ext.lower()
     return ext_mime_map.get(ext, "")

Пример #2

0

Показать файл

Файл: wrapper.py Проект: snoopy520/Sigil

 def deleteotherfile(self, book_href):
     id = unicode_str(book_href)
     id = unquoteurl(id)
     if id is None:
         raise WrapperException('None is not a valid book hrefbook href')
     if id not in self.other and id in self.id_to_href:
         raise WrapperException(
             'Incorrect interface routine - use deletefile')
     filepath = self.book_href_to_filepath.get(id, None)
     if filepath is None:
         raise WrapperException('Book href does not exist')
     if id in PROTECTED_FILES or id == self.opfbookpath:
         raise WrapperException('attempt to delete protected file')
     add_to_deleted = True
     # if file was added or modified delete file from outdir
     if id in self.added or id in self.modified:
         filepath = os.path.join(self.outdir, filepath)
         if unipath.exists(filepath) and unipath.isfile(filepath):
             os.remove(filepath)
         if id in self.added:
             self.added.remove(id)
             add_to_deleted = False
         if id in self.other:
             self.other.remove(id)
         if id in self.modified:
             del self.modified[id]
     if add_to_deleted:
         self.deleted.append(('other', id, book_href))
     del self.book_href_to_filepath[id]

Пример #3

0

Показать файл

Файл: wrapper.py Проект: snoopy520/Sigil

 def readotherfile(self, book_href):
     id = unicode_str(book_href)
     id = unquoteurl(id)
     if id is None:
         raise WrapperException('None is not a valid book href')
     if id not in self.other and id in self.id_to_href:
         raise WrapperException(
             'Incorrect interface routine - use readfile')
     # handle special case of trying to read the opf after it has been modified
     if id == self.opfbookpath:
         if id in self.modified:
             return self.build_opf()
     filepath = self.book_href_to_filepath.get(id, None)
     if filepath is None:
         raise WrapperException('Book href does not exist')
     basedir = self.ebook_root
     if id in self.added or id in self.modified:
         basedir = self.outdir
     filepath = os.path.join(basedir, filepath)
     if not unipath.exists(filepath):
         raise WrapperException('File Does Not Exist')
     basename = os.path.basename(filepath)
     ext = os.path.splitext(basename)[1]
     ext = ext.lower()
     mime = ext_mime_map.get(ext, "")
     data = b''
     with open(filepath, 'rb') as fp:
         data = fp.read()
     if mime in TEXT_MIMETYPES:
         data = unicode_str(data)
     return data

Пример #4

0

Показать файл

 def setguide(self, new_guide):
     guide = []
     for (type, title, href) in new_guide:
         type = unicode_str(type)
         title = unicode_str(title)
         href = unicode_str(href)
         href = unquoteurl(href)
         if type not in _guide_types:
             type = "other." + type
         if title is None:
             title = 'title missing'
         thref = href.split('#')[0]
         if thref not in self.href_to_id:
             raise WrapperException('guide href not in manifest')
         guide.append((type, title, href))
     self.guide = guide
     self.modified[self.opfbookpath] = 'file'

Пример #5

0

Показать файл

Файл: navprocessor.py Проект: trlgoz/Sigil

    def getTOC(self):
        # parse the nav to get the table of contents
        navsrc = self.content
        toclist = []

        qp = QuickXHTMLParser()
        qp.setContent(navsrc)
        lvl = 0
        po = 0
        title = ""
        nav_type = None
        href = None
        for txt, tp, tname, ttype, tattr in qp.parse_iter():
            if txt is not None:
                if ".a." in tp or tp.endswith(".a"):
                    title = title + txt
                else:
                    title = ""
            else:
                if tname == "nav" and ttype == "begin":
                    nav_type = tattr.get("epub:type", None)
                    continue
                if tname == "nav" and ttype == "end":
                    nav_type = None
                    continue
                if nav_type is not None and nav_type == "toc":
                    if tname == "ol":
                        if ttype == "begin": lvl += 1
                        if ttype == "end": lvl -= 1
                        continue
                    if tname == "a" and ttype == "begin":
                        href = tattr.get("href", "")
                        href = unquoteurl(href)
                        continue
                    if tname == "a" and ttype == "end":
                        po += 1
                        title = xmldecode(title)
                        toclist.append((po, lvl, href, title))
                        title = ""
                        href = None
                        continue

        return toclist

Пример #6

0

Показать файл

 def addotherfile(self, book_href, data):
     id = unicode_str(book_href)
     id = unquoteurl(id)
     if id is None:
         raise WrapperException('None is not a valid book href')
     if id in self.other:
         raise WrapperException('Book href must be unique')
     desired_path = id.replace("/", os.sep)
     filepath = os.path.join(self.outdir, desired_path)
     if unipath.isfile(filepath):
         raise WrapperException('Desired path already exists')
     base = os.path.dirname(filepath)
     if not unipath.exists(base):
         os.makedirs(pathof(base))
     if isinstance(data, text_type):
         data = utf8_str(data)
     with open(pathof(filepath), 'wb') as fp:
         fp.write(data)
     self.other.append(id)
     self.added.append(id)
     self.book_href_to_filepath[id] = desired_path

Пример #7

0

Показать файл

Файл: wrapper.py Проект: stjordanis/Sigil

 def writeotherfile(self, book_href, data):
     id = unicode_str(book_href)
     id = unquoteurl(id)
     if id is None:
         raise WrapperException('None is not a valid book href')
     if id not in self.other and id in self.id_to_href:
         raise WrapperException('Incorrect interface routine - use writefile')
     filepath = self.book_href_to_filepath.get(id, None)
     if filepath is None:
         raise WrapperException('Book href does not exist')
     if id in PROTECTED_FILES or id == self.opfbookpath:
         raise WrapperException('Attempt to modify protected file')
     filepath = os.path.join(self.outdir, filepath)
     base = os.path.dirname(filepath)
     if not unipath.exists(base):
         os.makedirs(base)
     if isinstance(data, text_type):
         data = utf8_str(data)
     with open(filepath,'wb') as fp:
         fp.write(data)
     self.modified[id] = 'file'

Пример #8

0

Показать файл

Файл: navprocessor.py Проект: trlgoz/Sigil

    def getLandmarks(self):
        # parse the nav to get the landmarks
        navsrc = self.content
        landmarks = []

        qp = QuickXHTMLParser()
        qp.setContent(navsrc)
        title = ""
        nav_type = None
        href = None
        epubtype = None
        for txt, tp, tname, ttype, tattr in qp.parse_iter():
            if txt is not None:
                if ".a." in tp or tp.endswith(".a"):
                    title = title + txt
                else:
                    title = ""
            else:
                if tname == "nav" and ttype == "begin":
                    nav_type = tattr.get("epub:type", None)
                    continue
                if tname == "nav" and ttype == "end":
                    nav_type = None
                    continue

                if nav_type is not None and nav_type == "landmarks":
                    if tname == "a" and ttype == "begin":
                        href = tattr.get("href", "")
                        href = unquoteurl(href)
                        epubtype = tattr.get("epub:type", None)
                        continue
                    if tname == "a" and ttype == "end":
                        if epubtype is not None:
                            title = xmldecode(title)
                            landmarks.append((epubtype, href, title))
                        title = ""
                        epubtype = None
                        href = None
                        continue
        return landmarks

Пример #9

0

Показать файл

Файл: navprocessor.py Проект: trlgoz/Sigil

    def getPageList(self):
        # parse the nav source to get the page-list
        navsrc = self.content
        pagelist = []

        qp = QuickXHTMLParser()
        qp.setContent(navsrc)
        pgcnt = 0
        nav_type = None
        href = None
        title = ""
        for txt, tp, tname, ttype, tattr in qp.parse_iter():
            if txt is not None:
                if ".a." in tp or tp.endswith(".a"):
                    title = title + txt
                else:
                    title = ""
            else:
                if tname == "nav" and ttype == "begin":
                    nav_type = tattr.get("epub:type", None)
                    continue
                if tname == "nav" and ttype == "end":
                    nav_type = None
                    continue
                if nav_type is not None and nav_type == "page-list":
                    if tname == "a" and ttype == "begin" and nav_type == "page-list":
                        href = tattr.get("href", "")
                        href = unquoteurl(href)
                        continue
                    if tname == "a" and ttype == "end":
                        pgcnt += 1
                        title = xmldecode(title)
                        pagelist.append((pgcnt, href, title))
                        title = ""
                        continue

        return pagelist

Пример #10

0

Показать файл

Файл: opf_parser.py Проект: tirtawn/Sigil

    def _parseData(self):
        cnt = 0
        for prefix, tname, tattr, tcontent in self._opf_tag_iter():
            if self._debug:
                print("   Parsing OPF: ", prefix, tname, tattr, tcontent)
            # package
            if tname == "package":
                ver = tattr.pop("version", "2.0")
                uid = tattr.pop("unique-identifier", "bookid")
                self.package = (ver, uid, tattr)
                continue
            # metadata
            if tname == "metadata":
                self.metadata_attr = tattr
                continue
            if tname in ["meta", "link"
                         ] or tname.startswith("dc:") and "metadata" in prefix:
                self.metadata.append((tname, tattr, tcontent))
                if tattr.get("name", "") == "cover":
                    self.cover_id = tattr.get("content", None)
                continue
            # manifest
            if tname == "item" and "manifest" in prefix:
                nid = "xid%03d" % cnt
                cnt += 1
                id = tattr.pop("id", nid)
                href = tattr.pop("href", '')
                mtype = tattr.pop("media-type", '')
                if mtype == "text/html":
                    mtype = "application/xhtml+xml"
                if mtype not in mime_group_map:
                    print("****Opf_Parser Warning****: Unknown MediaType: ",
                          mtype)
                href = unquoteurl(href)
                properties = tattr.pop("properties", None)
                fallback = tattr.pop("fallback", None)
                overlay = tattr.pop("media-overlay", None)

                # external resources are now allowed in the opf under epub3
                # we can ignore fragments here as these are links to files
                self.manifest_id_to_href[id] = href

                bookpath = ""
                if href.find(":") == -1:
                    bookpath = buildBookPath(href, self.opf_dir)
                self.manifest_id_to_bookpath[id] = bookpath
                self.manifest_id_to_mime[id] = mtype
                # self.bookpaths.append(bookpath)
                group = mime_group_map.get(mtype, '')
                if bookpath != "" and group != "":
                    folderlst = self.group_folder.get(group, [])
                    countlst = self.group_count.get(group, [])
                    sdir = startingDir(bookpath)
                    if sdir not in folderlst:
                        folderlst.append(sdir)
                        countlst.append(1)
                    else:
                        pos = folderlst.index(sdir)
                        countlst[pos] = countlst[pos] + 1
                    self.group_folder[group] = folderlst
                    self.group_count[group] = countlst
                self.manifest_id_to_properties[id] = properties
                self.manifest_id_to_fallback[id] = fallback
                self.manifest_id_to_overlay[id] = overlay
                continue
            # spine
            if tname == "spine":
                if tattr is not None:
                    self.spine_ppd = tattr.get("page-progression-direction",
                                               None)
                continue
            if tname == "itemref" and "spine" in prefix:
                idref = tattr.pop("idref", "")
                linear = tattr.pop("linear", None)
                properties = tattr.pop("properties", None)
                self.spine.append((idref, linear, properties))
                continue
            # guide
            if tname == "reference" and "guide" in prefix:
                type = tattr.pop("type", '')
                title = tattr.pop("title", '')
                href = unquoteurl(tattr.pop("href", ''))
                self.guide.append((type, title, href))
                continue
            # bindings (stored but ignored for now)
            if tname in ["mediaType", "mediatype"] and "bindings" in prefix:
                mtype = tattr.pop("media-type", "")
                handler = tattr.pop("handler", "")
                self.bindings.append((mtype, handler))
                continue

        # determine unique ShortPathName for each bookpath
        # start with filename and work back up the folders
        # spn = {}
        # dupset = set()
        # nameset = {}
        # lvl = 1
        # for bkpath in self.bookpaths:
        #     aname = build_short_name(bkpath, lvl)
        #     spn[bkpath] = aname
        #     if aname in nameset:
        #         dupset.add(aname)
        #         nameset[aname].append(bkpath)
        #     else:
        #         nameset[aname]=[bkpath]
        #
        # now work just through any to-do list of duplicates
        # until all duplicates are gone
        #
        # todolst = list(dupset)
        # while(todolst):
        #     dupset = set()
        #     lvl += 1
        #     for aname in todolst:
        #         bklst = nameset[aname]
        #         del nameset[aname]
        #         for bkpath in bklst:
        #             newname = build_short_name(bkpath, lvl)
        #             spn[bkpath] = newname
        #             if newname in nameset:
        #                 dupset.add(newname)
        #                 nameset[newname].append(bkpath)
        #             else:
        #                 nameset[newname] = [bkpath]
        #     todolst = list(dupset)

        # finally sort by number of files in dir to find default folders for each group
        dirlst = []
        use_lower_case = False
        for group in self.group_folder.keys():
            folders = self.group_folder[group]
            cnts = self.group_count[group]
            folders = [x for _, x in sorted(zip(cnts, folders), reverse=True)]
            self.group_folder[group] = folders
            if group in [
                    "Text", "Styles", "Images", "Audio", "Fonts", "Video",
                    "Misc"
            ]:
                afolder = folders[0]
                if afolder.find(group.lower()) > -1:
                    use_lower_case = True
                dirlst.append(folders[0])

        # now back fill any missing values
        # commonbase will end with a /
        commonbase = longestCommonPath(dirlst)
        if commonbase == "/":
            commonbase = ""
        for group in ["Styles", "Images", "Audio", "Fonts", "Video", "Misc"]:
            folders = self.group_folder.get(group, [])
            gname = group
            if use_lower_case:
                gname = gname.lower()
            if not folders:
                folders = [commonbase + gname]
                self.group_folder[group] = folders

Пример #11

0

Показать файл

def parse_nav(qp, navdata, navbkpath, newdir):
    qp.setContent(navdata)
    toclist = []
    pagelist = []
    landmarks = []
    lvl = 0
    pgcnt = 0
    maxlvl = -1
    nav_type = None
    href = None
    title = ""
    play = 0
    navdir = startingDir(navbkpath)

    for txt, tp, tname, ttype, tattr in qp.parse_iter():
        if txt is not None:
            if ".a." in tp or tp.endswith(".a"):
                title = title + txt
            else:
                title = ""
        else:
            if tname == "nav":
                if ttype == "begin":
                    nav_type = tattr.get("epub:type", None)
                if ttype == "end":
                    nav_type = None
                continue
            if tname == "ol" and nav_type is not None and nav_type in ("toc","page-list","landmarks"):
                if ttype == "begin":
                    lvl += 1
                    if nav_type == "toc":
                        if lvl > maxlvl: maxlvl = lvl
                if ttype == "end": lvl -= 1
                continue
            if tname == "a" and ttype == "begin":
                href = tattr.get("href", "")
                href = unquoteurl(href)
                if href.find(":") == -1:
                    # first strip off any fragment
                    fragment = ""
                    if href.find("#") != -1:
                        href, fragment = href.split("#")
                    # find destination bookpath
                    if href.startswith("./"): href=href[2:]
                    if href == "":
                        destbkpath = navbkpath
                    else:
                        destbkpath = buildBookPath(href, navdir)
                    # create relative path to destbkpath from newdir
                    href = relativePath(destbkpath, newdir)
                    if fragment != "":
                        href = href + "#" + fragment
                epubtype = tattr.get("epub:type", None)
                continue
            if tname == "a" and ttype == "end":
                if nav_type == "toc":
                    play += 1
                    toclist.append((play, lvl, href, title))
                elif nav_type == "page-list":
                    pgcnt += 1
                    pagelist.append((pgcnt, href, title))
                elif nav_type == "landmarks":
                    if epubtype is not None:
                        gtype = _epubtype_guide_map.get(epubtype, None)
                        landmarks.append((gtype, href, title))
                title = ""
                continue

    return toclist, pagelist, landmarks, maxlvl, pgcnt

Пример #12

0

Показать файл

 def map_href_to_id(self, href, ow):
     href = unicode_str(href)
     href = unquoteurl(href)
     return self.href_to_id.get(href, ow)

Пример #13

0

Показать файл

 def build_bookpath(self, href, starting_dir):
     href = unicode_str(href)
     href = unquoteurl(href)
     starting_dir = unicode_str(starting_dir)
     return buildBookPath(href, starting_dir)

Python unquoteurl примеры использования