def parseExceptionsFile(filename):
    safename = utf8_str(filename)
    words_list = []
    snippet = min(32, os.path.getsize(pathof(safename)))
    raw = open(pathof(safename), 'rb').read(snippet)
    if raw.startswith(codecs.BOM_UTF8):
        enc = 'utf-8-sig'
    else:
        encodings = ['utf-8', 'utf-16' 'windows-1252', 'windows-1250']
        for e in encodings:
            try:
                fh = file_open(pathof(safename), 'r', encoding=e)
                fh.readlines()
                fh.seek(0)
            except UnicodeDecodeError:
                print('Got unicode error with %s , trying different encoding' %
                      e)
            else:
                break
        enc = e
    try:
        with file_open(pathof(safename), 'r', encoding=enc) as fd:
            words_list = [line.rstrip() for line in fd]
        # words_list = filter(None, words_list)
        words_list = [_f for _f in words_list if _f]
        print('Parsing apostrophe exception file %s' % filename)
    except:
        print('Error parsing apostrophe exception file %s: ignoring' %
              filename)
        words_list = []
    return words_list
Exemplo n.º 2
0
def processCRES(i, files, rscnames, sect, data, beg, rsc_ptr, use_hd):
    # extract an HDImage
    global DUMP
    data = data[12:]
    imgtype = get_image_type(None, data)

    if imgtype is None:
        print("Warning: CRES Section %s does not contain a recognised resource" % i)
        rscnames.append(None)
        sect.setsectiondescription(i,"Mysterious CRES data, first four bytes %s" % describe(data[0:4]))
        if DUMP:
            fname = "unknown%05d.dat" % i
            outname= os.path.join(files.outdir, fname)
            with open(pathof(outname), 'wb') as f:
                f.write(data)
            sect.setsectiondescription(i,"Mysterious CRES data, first four bytes %s extracting as %s" % (describe(data[0:4]), fname))
        rsc_ptr += 1
        return rscnames, rsc_ptr

    if use_hd:
        # overwrite corresponding lower res image with hd version
        imgname = rscnames[rsc_ptr]
        imgdest = files.imgdir
    else:
        imgname = "HDimage%05d.%s" % (i-beg+1, imgtype)
        imgdest = files.hdimgdir
    print("Extracting HD image: {0:s} from section {1:d}".format(imgname,i-beg+1))
    outimg = os.path.join(imgdest, imgname)
    with open(pathof(outimg), 'wb') as f:
        f.write(data)
    rscnames.append(None)
    sect.setsectiondescription(i,"Optional HD Image {0:s}".format(imgname))
    rsc_ptr += 1
    return rscnames, rsc_ptr
Exemplo n.º 3
0
 def copy_book_contents_to(self, destdir):
     destdir = unicode_str(destdir)
     if destdir is None or not unipath.isdir(destdir):
         raise WrapperException('destination directory does not exist')
     for id in self.id_to_filepath:
         rpath = self.id_to_filepath[id]
         in_manifest = id in self.id_to_mime
         data = self.readfile(id)
         filepath = os.path.join(destdir,rpath)
         base = os.path.dirname(filepath)
         if not unipath.exists(base):
             os.makedirs(base)
         if isinstance(data,text_type):
             data = utf8_str(data)
         with open(pathof(filepath),'wb') as fp:
             fp.write(data)
     for id in self.book_href_to_filepath:
         rpath = self.book_href_to_filepath[id]
         data = self.readotherfile(id)
         filepath = os.path.join(destdir,rpath)
         base = os.path.dirname(filepath)
         if not unipath.exists(base):
             os.makedirs(base)
         if isinstance(data,text_type):
             data = utf8_str(data)
         with open(pathof(filepath),'wb') as fp:
             fp.write(data)
Exemplo n.º 4
0
 def copy_book_contents_to(self, destdir):
     destdir = unicode_str(destdir)
     if destdir is None or not unipath.isdir(destdir):
         raise WrapperException('destination directory does not exist')
     for id in self.id_to_filepath:
         rpath = self.id_to_filepath[id]
         in_manifest = id in self.id_to_mime
         data = self.readfile(id)
         filepath = os.path.join(destdir, rpath)
         base = os.path.dirname(filepath)
         if not unipath.exists(base):
             os.makedirs(base)
         if isinstance(data, text_type):
             data = utf8_str(data)
         with open(pathof(filepath), 'wb') as fp:
             fp.write(data)
     for id in self.book_href_to_filepath:
         rpath = self.book_href_to_filepath[id]
         data = self.readotherfile(id)
         filepath = os.path.join(destdir, rpath)
         base = os.path.dirname(filepath)
         if not unipath.exists(base):
             os.makedirs(base)
         if isinstance(data, text_type):
             data = utf8_str(data)
         with open(pathof(filepath), 'wb') as fp:
             fp.write(data)
def parseExceptionsFile(filename):
    safename = utf8_str(filename)
    words_list = []
    snippet = min(32, os.path.getsize(pathof(safename)))
    raw = open(pathof(safename), 'rb').read(snippet)
    if raw.startswith(codecs.BOM_UTF8):
        enc = 'utf-8-sig'
    else:
        encodings = ['utf-8', 'utf-16' 'windows-1252', 'windows-1250']
        for e in encodings:
            try:
                fh = file_open(pathof(safename), 'r', encoding=e)
                fh.readlines()
                fh.seek(0)
            except UnicodeDecodeError:
                print('Got unicode error with %s , trying different encoding' % e)
            else:
                break
        enc = e
    try:
        with file_open(pathof(safename), 'r', encoding=enc) as fd:
            words_list = [line.rstrip() for line in fd]
        # words_list = filter(None, words_list)
        words_list = [_f for _f in words_list if _f]
        print('Parsing apostrophe exception file %s' % filename)
    except:
        print('Error parsing apostrophe exception file %s: ignoring' % filename)
        words_list = []
    return words_list
Exemplo n.º 6
0
def processImage(i, files, rscnames, sect, data, beg, rsc_ptr, cover_offset):
    global DUMP
    # Extract an Image
    imgtype = get_image_type(None, data)
    if imgtype is None:
        print("Warning: Section %s does not contain a recognised resource" % i)
        rscnames.append(None)
        sect.setsectiondescription(i,"Mysterious Section, first four bytes %s" % describe(data[0:4]))
        if DUMP:
            fname = "unknown%05d.dat" % i
            outname= os.path.join(files.outdir, fname)
            with open(pathof(outname), 'wb') as f:
                f.write(data)
            sect.setsectiondescription(i,"Mysterious Section, first four bytes %s extracting as %s" % (describe(data[0:4]), fname))
        return rscnames, rsc_ptr

    imgname = "image%05d.%s" % (i-beg+1, imgtype)
    if cover_offset is not None and i == beg + cover_offset:
        imgname = "cover%05d.%s" % (i-beg+1, imgtype)
    print("Extracting image: {0:s} from section {1:d}".format(imgname,i-beg+1))
    outimg = os.path.join(files.imgdir, imgname)
    with open(pathof(outimg), 'wb') as f:
        f.write(data)
    rscnames.append(imgname)
    sect.setsectiondescription(i,"Image {0:s}".format(imgname))
    if rsc_ptr == -1:
        rsc_ptr = i - beg
    return rscnames, rsc_ptr
Exemplo n.º 7
0
    def __init__(self,
                 ebook_root,
                 outdir,
                 op,
                 plugin_dir,
                 plugin_name,
                 debug=False):
        self._debug = debug
        self.ebook_root = pathof(ebook_root)
        # plugins and plugin containers can get name and user plugin dir
        self.plugin_dir = pathof(plugin_dir)
        self.plugin_name = plugin_name
        self.outdir = pathof(outdir)
        # dictionaries used to map opf manifest information
        self.id_to_href = {}
        self.id_to_mime = {}
        self.href_to_id = {}
        self.spine_ppd = None
        self.spine = []
        self.guide = []
        self.package_tag = ''
        self.metadataxml = ''
        self.op = op
        if self.op is not None:
            # copy in data from parsing of initial opf
            self.opfname = op.opfname
            self.id_to_href = op.get_manifest_id_to_href_dict().copy()
            self.id_to_mime = op.get_manifest_id_to_mime_dict().copy()
            self.href_to_id = op.get_href_to_manifest_id_dict().copy()
            self.spine_ppd = op.get_spine_ppd()
            self.spine = op.get_spine()
            self.guide = op.get_guide()
            self.package_tag = op.get_package_tag()
            self.metadataxml = op.get_metadataxml()
        self.other = []  # non-manifest file information
        self.id_to_filepath = {}
        self.modified = {}
        self.added = []
        self.deleted = []

        # walk the ebook directory tree building up initial list of
        # all unmanifested (other) files
        for filepath in unipath.walk(ebook_root):
            book_href = filepath.replace(os.sep, "/")
            # OS X file names and paths use NFD form. The EPUB
            # spec requires all text including filenames to be in NFC form.
            book_href = unicodedata.normalize('NFC', book_href)
            # if book_href file in manifest convert to manifest id
            id = None
            if book_href.startswith('OEBPS/'):
                href = book_href[6:]
                id = self.href_to_id.get(href, None)
            if id is None:
                self.other.append(book_href)
                self.id_to_filepath[book_href] = filepath
            else:
                self.id_to_filepath[id] = filepath
Exemplo n.º 8
0
def epub_zip_up_book_contents(ebook_path, epub_filepath):
    outzip = zipfile.ZipFile(pathof(epub_filepath), 'w')
    files = unipath.walk(ebook_path)
    if 'mimetype' in files:
        outzip.write(pathof(os.path.join(ebook_path, 'mimetype')), pathof('mimetype'), zipfile.ZIP_STORED)
    else:
        raise Exception('mimetype file is missing')
    files.remove('mimetype')
    for file in files:
        filepath = os.path.join(ebook_path, file)
        outzip.write(pathof(filepath),pathof(file),zipfile.ZIP_DEFLATED)
    outzip.close()
Exemplo n.º 9
0
    def writeXHTML(self):
        files = self.files
        cover_page = self.cover_page

        data = self.buildXHTML()

        outfile = os.path.join(files.k8text, cover_page)
        if os.path.exists(pathof(outfile)):
            print('Warning: {:s} already exists.'.format(cover_page))
            os.remove(pathof(outfile))
        with open(pathof(outfile), 'wb') as f:
            f.write(data.encode('utf-8'))
        return
Exemplo n.º 10
0
def epub_zip_up_book_contents(ebook_path, epub_filepath):
    outzip = zipfile.ZipFile(pathof(epub_filepath), 'w')
    files = unipath.walk(ebook_path)
    if 'mimetype' in files:
        outzip.write(pathof(os.path.join(ebook_path, 'mimetype')),
                     pathof('mimetype'), zipfile.ZIP_STORED)
    else:
        raise Exception('mimetype file is missing')
    files.remove('mimetype')
    for file in files:
        filepath = os.path.join(ebook_path, file)
        outzip.write(pathof(filepath), pathof(file), zipfile.ZIP_DEFLATED)
    outzip.close()
Exemplo n.º 11
0
 def writeOPF(self, has_obfuscated_fonts=False):
     if self.isK8:
         data = self.buildEPUBOPF(has_obfuscated_fonts)
         outopf = os.path.join(self.files.k8oebps, EPUB_OPF)
         with open(pathof(outopf), 'wb') as f:
             f.write(data.encode('utf-8'))
         return self.BookId
     else:
         data = self.buildMobi7OPF()
         outopf = os.path.join(self.files.mobi7dir, 'content.opf')
         with open(pathof(outopf), 'wb') as f:
             f.write(data.encode('utf-8'))
         return 0
Exemplo n.º 12
0
    def __init__(self, ebook_root, outdir, op, plugin_dir, plugin_name, debug=False):
        self._debug = debug
        self.ebook_root = pathof(ebook_root)
        # plugins and plugin containers can get name and user plugin dir
        self.plugin_dir = pathof(plugin_dir)
        self.plugin_name = plugin_name
        self.outdir = pathof(outdir)
        # dictionaries used to map opf manifest information
        self.id_to_href = {}
        self.id_to_mime = {}
        self.href_to_id = {}
        self.spine_ppd = None
        self.spine = []
        self.guide = []
        self.package_tag = ""
        self.metadataxml = ""
        self.op = op
        if self.op is not None:
            # copy in data from parsing of initial opf
            self.opfname = op.opfname
            self.id_to_href = op.get_manifest_id_to_href_dict().copy()
            self.id_to_mime = op.get_manifest_id_to_mime_dict().copy()
            self.href_to_id = op.get_href_to_manifest_id_dict().copy()
            self.spine_ppd = op.get_spine_ppd()
            self.spine = op.get_spine()
            self.guide = op.get_guide()
            self.package_tag = op.get_package_tag()
            self.metadataxml = op.get_metadataxml()
        self.other = []  # non-manifest file information
        self.id_to_filepath = {}
        self.modified = {}
        self.added = []
        self.deleted = []

        # walk the ebook directory tree building up initial list of
        # all unmanifested (other) files
        for filepath in unipath.walk(ebook_root):
            book_href = filepath.replace(os.sep, "/")
            # OS X file names and paths use NFD form. The EPUB
            # spec requires all text including filenames to be in NFC form.
            book_href = unicodedata.normalize("NFC", book_href)
            # if book_href file in manifest convert to manifest id
            id = None
            if book_href.startswith("OEBPS/"):
                href = book_href[6:]
                id = self.href_to_id.get(href, None)
            if id is None:
                self.other.append(book_href)
                self.id_to_filepath[book_href] = filepath
            else:
                self.id_to_filepath[id] = filepath
    def quitApp(self):
        global prefs
        if self.edu_quotes.get() == 'q':
            self.gui_prefs['educateQuotes'] = 1
        else:
            self.gui_prefs['educateQuotes'] = 0
        self.gui_prefs['dashes'] = self.dashBox.current()
        if self.edu_ellipses.get() == 'e':
            self.gui_prefs['educateEllipses'] = 1
        else:
            self.gui_prefs['educateEllipses'] = 0
        self.gui_prefs['useFile'] = self.use_file.get()
        if len(self.cust_file_path.get()):
            self.gui_prefs['useFilePath'] = pathof(self.cust_file_path.get())
        else:
            self.gui_prefs['useFilePath'] = ''
        self.gui_prefs['useUnicodeChars'] = self.unicodevar.get()
        self.misc_prefs['windowGeometry'] = self.parent.geometry()

        # copy preferences settings groups pack to global dict
        prefs['gui_selections'] = self.gui_prefs
        prefs['miscellaneous_settings'] = self.misc_prefs
        prefs['update_settings'] = self.update_prefs

        self.parent.destroy()
        self.quit()
Exemplo n.º 14
0
 def deletefile(self, id):
     id = unicode_str(id)
     filepath = self.id_to_filepath.get(id, None)
     if id is None:
         raise WrapperException('id does not exist in manifest')
     add_to_deleted = True
     # if file was added or modified, delete file from outdir
     if id in self.added or id in self.modified:
         filepath = os.path.join(self.outdir, filepath)
         if unipath.exists(filepath) and unipath.isfile(filepath):
             os.remove(pathof(filepath))
         if id in self.added:
             self.added.remove(id)
             add_to_deleted = False
         if id in self.modified:
             del self.modified[id]
     # remove from manifest
     href = self.id_to_href[id]
     del self.id_to_href[id]
     del self.id_to_mime[id]
     del self.href_to_id[href]
     # remove from spine
     new_spine = []
     was_modified = False
     for sid, linear in self.spine:
         if sid != id:
             new_spine.append((sid, linear))
         else:
             was_modified = True
     if was_modified:
         setspine(new_spine)
     if add_to_deleted:
         self.deleted.append(id)
         self.modified['OEBPS/content.opf'] = 'file'
     del self.id_to_filepath[id]
    def quitApp(self):
        global prefs
        if self.edu_quotes.get() == 'q':
            self.gui_prefs['educateQuotes'] = 1
        else:
            self.gui_prefs['educateQuotes'] = 0
        self.gui_prefs['dashes'] = self.dashBox.current()
        if self.edu_ellipses.get() == 'e':
            self.gui_prefs['educateEllipses'] = 1
        else:
            self.gui_prefs['educateEllipses'] = 0
        self.gui_prefs['useFile'] = self.use_file.get()
        if len(self.cust_file_path.get()):
            self.gui_prefs['useFilePath'] = pathof(self.cust_file_path.get())
        else:
            self.gui_prefs['useFilePath'] = ''
        self.gui_prefs['useUnicodeChars'] = self.unicodevar.get()
        self.misc_prefs['windowGeometry'] = self.parent.geometry()

        # copy preferences settings groups pack to global dict
        prefs['gui_selections'] = self.gui_prefs
        prefs['miscellaneous_settings'] = self.misc_prefs
        prefs['update_settings'] = self.update_prefs

        self.parent.destroy()
        self.quit()
Exemplo n.º 16
0
 def writeNAV(self, ncx_data, guidetext, metadata):
     # build the xhtml
     # print("Write Navigation Document.")
     xhtml = self.buildNAV(ncx_data, guidetext, metadata.get('Title')[0], metadata.get('Language')[0])
     fname = os.path.join(self.files.k8text, self.navname)
     with open(pathof(fname), 'wb') as f:
         f.write(xhtml.encode('utf-8'))
Exemplo n.º 17
0
Arquivo: wrapper.py Projeto: pwr/Sigil
 def deletefile(self, id):
     id = unicode_str(id)
     filepath = self.id_to_filepath.get(id, None)
     if id is None:
         raise WrapperException('id does not exist in manifest')
     add_to_deleted = True
     # if file was added or modified, delete file from outdir
     if id in self.added or id in self.modified:
         filepath = os.path.join(self.outdir,filepath)
         if unipath.exists(filepath) and unipath.isfile(filepath):
             os.remove(pathof(filepath))
         if id in self.added:
             self.added.remove(id)
             add_to_deleted = False
         if id in self.modified:
             del self.modified[id]
     # remove from manifest
     href = self.id_to_href[id]
     del self.id_to_href[id]
     del self.id_to_mime[id]
     del self.href_to_id[href]
     # remove from spine
     new_spine = []
     was_modified = False
     for sid, linear in self.spine:
         if sid != id:
             new_spine.append((sid, linear))
         else:
             was_modified = True
     if was_modified:
         setspine(new_spine)
     if add_to_deleted:
         self.deleted.append(id)
         self.modified['OEBPS/content.opf'] = 'file'
     del self.id_to_filepath[id]
Exemplo n.º 18
0
 def zipUpDir(self,
              myzip,
              tdir,
              localname,
              compress_type=zipfile.ZIP_DEFLATED):
     currentdir = tdir
     if localname != "":
         currentdir = os.path.join(currentdir, localname)
     list = unipath.listdir(currentdir)
     for file in list:
         afilename = file
         localfilePath = os.path.join(localname, afilename)
         realfilePath = os.path.join(currentdir, file)
         if unipath.isfile(realfilePath):
             myzip.write(pathof(realfilePath), pathof(localfilePath),
                         compress_type)
         elif unipath.isdir(realfilePath):
             self.zipUpDir(myzip, tdir, localfilePath)
Exemplo n.º 19
0
 def write_opf(self):
     if self.op is not None:
         filepath = pathof(os.path.join(self.outdir, 'OEBPS', self.opfname))
         base = os.path.dirname(filepath)
         if not unipath.exists(base):
             os.makedirs(base)
         with open(filepath, 'wb') as fp:
             data = utf8_str(self.build_opf())
             fp.write(data)
Exemplo n.º 20
0
 def addotherfile(self, book_href, data) :
     id = unicode_str(book_href)
     if id in self.other:
         raise WrapperException('book href must be unquie')
     desired_path = id.replace("/",os.sep)
     filepath = os.path.join(self.outdir,desired_path)
     if unipath.isfile(filepath):
         raise WrapperException('desired path already exists')
     base = os.path.dirname(filepath)
     if not unipath.exists(base):
         os.makedirs(pathof(base))
     if isinstance(data, text_type):
         data = utf8_str(data)
     with open(pathof(filepath),'wb')as fp:
         fp.write(data)
     self.other.append(id)
     self.added.append(id)
     self.id_to_filepath[id] = desired_path
Exemplo n.º 21
0
 def write_opf(self):
     if self.op is not None:
         filepath = pathof(os.path.join(self.outdir, 'OEBPS', self.opfname))
         base = os.path.dirname(filepath)
         if not unipath.exists(base):
             os.makedirs(base)
         with open(filepath,'wb') as fp:
             data = utf8_str(self.build_opf())
             fp.write(data)
Exemplo n.º 22
0
def processSRCS(i, files, rscnames, sect, data):
    # extract the source zip archive and save it.
    print("File contains kindlegen source archive, extracting as %s" % KINDLEGENSRC_FILENAME)
    srcname = os.path.join(files.outdir, KINDLEGENSRC_FILENAME)
    with open(pathof(srcname), 'wb') as f:
        f.write(data[16:])
    rscnames.append(None)
    sect.setsectiondescription(i,"Zipped Source Files")
    return rscnames
Exemplo n.º 23
0
def processCMET(i, files, rscnames, sect, data):
    # extract the build log
    print("File contains kindlegen build log, extracting as %s" % KINDLEGENLOG_FILENAME)
    srcname = os.path.join(files.outdir, KINDLEGENLOG_FILENAME)
    with open(pathof(srcname), 'wb') as f:
        f.write(data[10:])
    rscnames.append(None)
    sect.setsectiondescription(i,"Kindlegen log")
    return rscnames
Exemplo n.º 24
0
 def addotherfile(self, book_href, data) :
     id = unicode_str(book_href)
     if id in self.other:
         raise WrapperException('book href must be unquie')
     desired_path = id.replace("/",os.sep)
     filepath = os.path.join(self.outdir,desired_path)
     if unipath.isfile(filepath):
         raise WrapperException('desired path already exists')
     base = os.path.dirname(filepath)
     if not unipath.exists(base):
         os.makedirs(pathof(base))
     if isinstance(data, text_type):
         data = utf8_str(data)
     with open(pathof(filepath),'wb')as fp:
         fp.write(data)
     self.other.append(id)
     self.added.append(id)
     self.id_to_filepath[id] = desired_path
Exemplo n.º 25
0
def processPrintReplica(metadata, files, rscnames, mh):
    global DUMP
    global WRITE_RAW_DATA
    rawML = mh.getRawML()
    if DUMP or WRITE_RAW_DATA:
        outraw = os.path.join(files.outdir,files.getInputFileBasename() + '.rawpr')
        with open(pathof(outraw),'wb') as f:
            f.write(rawML)

    fileinfo = []
    print("Print Replica ebook detected")
    try:
        numTables, = struct.unpack_from(b'>L', rawML, 0x04)
        tableIndexOffset = 8 + 4*numTables
        # for each table, read in count of sections, assume first section is a PDF
        # and output other sections as binary files
        for i in range(numTables):
            sectionCount, = struct.unpack_from(b'>L', rawML, 0x08 + 4*i)
            for j in range(sectionCount):
                sectionOffset, sectionLength, = struct.unpack_from(b'>LL', rawML, tableIndexOffset)
                tableIndexOffset += 8
                pdf_fpath = u''
                if j == 0:
                    if azw2zip_cfg.isOutputPdf():
                        pdf_fpath = os.path.join(files.outdir, '..', azw2zip_cfg.makeOutputFileName(metadata) + ('.%03d.pdf' % (i+1)))
                    entryName = os.path.join(files.outdir, files.getInputFileBasename() + ('.%03d.pdf' % (i+1)))
                else:
                    entryName = os.path.join(files.outdir, files.getInputFileBasename() + ('.%03d.%03d.data' % ((i+1),j)))
                with open(pathof(entryName), 'wb') as f:
                    f.write(rawML[sectionOffset:(sectionOffset+sectionLength)])
                if pdf_fpath:
                    with open(pathof(pdf_fpath), 'wb') as f:
                        f.write(rawML[sectionOffset:(sectionOffset+sectionLength)])
    except Exception as e:
        print('Error processing Print Replica: ' + str(e))

    fileinfo.append([None,'', files.getInputFileBasename() + '.pdf'])
    usedmap = {}
    for name in rscnames:
        if name is not None:
            usedmap[name] = 'used'
    opf = OPFProcessor(files, metadata, fileinfo, rscnames, False, mh, usedmap)
    opf.writeOPF()
Exemplo n.º 26
0
 def write_opf(self):
     if self.op is not None:
         platpath = self.opfbookpath.replace('/', os.sep)
         filepath = pathof(os.path.join(self.outdir, platpath))
         base = os.path.dirname(filepath)
         if not unipath.exists(base):
             os.makedirs(base)
         with open(filepath, 'wb') as fp:
             data = utf8_str(self.build_opf())
             fp.write(data)
Exemplo n.º 27
0
 def writeK8NCX(self, ncx_data, metadata):
     # build the xml
     self.isNCX = True
     print("Write K8 ncx")
     xml = self.buildK8NCX(ncx_data, metadata['Title'][0],
                           metadata['UniqueID'][0],
                           metadata.get('Language')[0])
     bname = 'toc.ncx'
     ncxname = os.path.join(self.files.k8oebps, bname)
     with open(pathof(ncxname), 'wb') as f:
         f.write(xml.encode('utf-8'))
Exemplo n.º 28
0
def processUnknownSections(mh, sect, files, K8Boundary):
    global DUMP
    global TERMINATION_INDICATOR1
    global TERMINATION_INDICATOR2
    global TERMINATION_INDICATOR3
    if DUMP:
        print("Unpacking any remaining unknown records")
    beg = mh.start
    end = sect.num_sections
    if beg < K8Boundary:
        # then we're processing the first part of a combination file
        end = K8Boundary
    for i in range(beg, end):
        if sect.sectiondescriptions[i] == "":
            data = sect.loadSection(i)
            type = data[0:4]
            if type == TERMINATION_INDICATOR3:
                description = "Termination Marker 3 Nulls"
            elif type == TERMINATION_INDICATOR2:
                description = "Termination Marker 2 Nulls"
            elif type == TERMINATION_INDICATOR1:
                description = "Termination Marker 1 Null"
            elif type == "INDX":
                fname = "Unknown%05d_INDX.dat" % i
                description = "Unknown INDX section"
                if DUMP:
                    outname= os.path.join(files.outdir, fname)
                    with open(pathof(outname), 'wb') as f:
                        f.write(data)
                    print("Extracting %s: %s from section %d" % (description, fname, i))
                    description = description + ", extracting as %s" % fname
            else:
                fname = "unknown%05d.dat" % i
                description = "Mysterious Section, first four bytes %s" % describe(data[0:4])
                if DUMP:
                    outname= os.path.join(files.outdir, fname)
                    with open(pathof(outname), 'wb') as f:
                        f.write(data)
                    print("Extracting %s: %s from section %d" % (description, fname, i))
                    description = description + ", extracting as %s" % fname
            sect.setsectiondescription(i, description)
Exemplo n.º 29
0
def get_image_type(imgname, imgdata=None):
    imgtype = unicode_str(imghdr.what(pathof(imgname), imgdata))
    if imgtype == "jpeg":
        imgtype = "jpg"

    # imghdr only checks for JFIF or Exif JPEG files. Apparently, there are some
    # with only the magic JPEG bytes out there...
    # ImageMagick handles those, so, do it too.
    if imgtype is None:
        if imgdata is None:
            with open(pathof(imgname), 'rb') as f:
                imgdata = f.read()
        if imgdata[0:2] == b'\xFF\xD8':
            # Get last non-null bytes
            last = len(imgdata)
            while (imgdata[last - 1:last] == b'\x00'):
                last -= 1
            # Be extra safe, check the trailing bytes, too.
            if imgdata[last - 2:last] == b'\xFF\xD9':
                imgtype = "jpg"
    return imgtype
Exemplo n.º 30
0
def unzip_epub_to_dir(path_to_epub, destdir):
    f = open(pathof(path_to_epub), 'rb')
    sz = ZipFile(f)
    for name in sz.namelist():
        data = sz.read(name)
        name = name.replace("/", os.sep)
        filepath = os.path.join(destdir, name)
        basedir = os.path.dirname(filepath)
        if not os.path.isdir(basedir):
            os.makedirs(basedir)
        with open(filepath, 'wb') as fp:
            fp.write(data)
    f.close()
Exemplo n.º 31
0
def unzip_epub_to_dir(path_to_epub, destdir):
    f = open(pathof(path_to_epub), 'rb')
    sz = ZipFile(f)
    for name in sz.namelist():
        data = sz.read(name)
        name = name.replace("/", os.sep)
        filepath = os.path.join(destdir,name)
        basedir = os.path.dirname(filepath)
        if not os.path.isdir(basedir):
            os.makedirs(basedir)
        with open(filepath,'wb') as fp:
            fp.write(data)
    f.close()
Exemplo n.º 32
0
Arquivo: wrapper.py Projeto: pwr/Sigil
 def writefile(self, id, data):
     id = unicode_str(id)
     filepath = self.id_to_filepath.get(id, None)
     if filepath is None:
         raise WrapperException('Id does not exist in manifest')
     mime = self.id_to_mime.get(id,'')
     filepath = os.path.join(self.outdir, filepath)
     base = os.path.dirname(filepath)
     if not unipath.exists(base):
         os.makedirs(pathof(base))
     if mime.endswith('+xml') or isinstance(data, text_type):
         data = utf8_str(data)
     with open(filepath,'wb') as fp:
         fp.write(data)
     self.modified[id] = 'file'
Exemplo n.º 33
0
 def writeNCX(self, metadata):
     # build the xml
     self.isNCX = True
     print("Write ncx")
     # htmlname = os.path.basename(self.files.outbase)
     # htmlname += '.html'
     htmlname = 'book.html'
     xml = self.buildNCX(htmlname, metadata['Title'][0],
                         metadata['UniqueID'][0],
                         metadata.get('Language')[0])
     # write the ncx file
     # ncxname = os.path.join(self.files.mobi7dir, self.files.getInputFileBasename() + '.ncx')
     ncxname = os.path.join(self.files.mobi7dir, 'toc.ncx')
     with open(pathof(ncxname), 'wb') as f:
         f.write(xml.encode('utf-8'))
 def fileChooser(self):
     file_opt = {}
     file_opt['parent'] = None
     file_opt['title']= 'Select exception file'
     file_opt['defaultextension'] = '.txt'
     file_opt['initialdir'] = unicode_str(self.misc_prefs['lastDir'], 'utf-8')
     file_opt['multiple'] = False
     file_opt['filetypes'] = [('Text Files', '.txt'), ('All files', '.*')]
     inpath = tkinter_filedialog.askopenfilename(**file_opt)
     if len(inpath):
         self.cust_file_path.config(state="normal")
         self.cust_file_path.delete(0, tkinter_constants.END)
         self.cust_file_path.insert(0, os.path.normpath(inpath))
         self.misc_prefs['lastDir'] = pathof(os.path.dirname(inpath))
         self.cust_file_path.config(state="readonly")
Exemplo n.º 35
0
 def writefile(self, id, data):
     id = unicode_str(id)
     filepath = self.id_to_filepath.get(id, None)
     if filepath is None:
         raise WrapperException('Id does not exist in manifest')
     mime = self.id_to_mime.get(id, '')
     filepath = os.path.join(self.outdir, filepath)
     base = os.path.dirname(filepath)
     if not unipath.exists(base):
         os.makedirs(pathof(base))
     if mime.endswith('+xml') or isinstance(data, text_type):
         data = utf8_str(data)
     with open(filepath, 'wb') as fp:
         fp.write(data)
     self.modified[id] = 'file'
 def fileChooser(self):
     file_opt = {}
     file_opt['parent'] = None
     file_opt['title'] = 'Select exception file'
     file_opt['defaultextension'] = '.txt'
     file_opt['initialdir'] = unicode_str(self.misc_prefs['lastDir'],
                                          'utf-8')
     file_opt['multiple'] = False
     file_opt['filetypes'] = [('Text Files', '.txt'), ('All files', '.*')]
     inpath = tkinter_filedialog.askopenfilename(**file_opt)
     if len(inpath):
         self.cust_file_path.config(state="normal")
         self.cust_file_path.delete(0, tkinter_constants.END)
         self.cust_file_path.insert(0, os.path.normpath(inpath))
         self.misc_prefs['lastDir'] = pathof(os.path.dirname(inpath))
         self.cust_file_path.config(state="readonly")
Exemplo n.º 37
0
 def deletefile(self, id):
     id = unicode_str(id)
     if id not in self.id_to_href:
         raise WrapperException('Id does not exist in manifest')
     filepath = self.id_to_filepath.get(id, None)
     if id is None:
         raise WrapperException('Id does not exist in manifest')
     if self.epub_version.startswith("2") and id == self.gettocid():
         raise WrapperException('Can not add or remove an ncx under epub2')
     add_to_deleted = True
     # if file was added or modified, delete file from outdir
     if id in self.added or id in self.modified:
         filepath = os.path.join(self.outdir, filepath)
         if unipath.exists(filepath) and unipath.isfile(filepath):
             os.remove(pathof(filepath))
         if id in self.added:
             self.added.remove(id)
             add_to_deleted = False
         if id in self.modified:
             del self.modified[id]
     # remove from manifest
     href = self.id_to_href[id]
     mime = self.id_to_mime[id]
     bookpath = self.id_to_bookpath[id]
     del self.id_to_href[id]
     del self.id_to_mime[id]
     del self.id_to_props[id]
     del self.id_to_fall[id]
     del self.id_to_over[id]
     del self.id_to_bookpath[id]
     del self.href_to_id[href]
     del self.bookpath_to_id[bookpath]
     # remove from spine
     new_spine = []
     was_modified = False
     for sid, linear, properties in self.spine:
         if sid != id:
             new_spine.append((sid, linear, properties))
         else:
             was_modified = True
     if was_modified:
         self.setspine_epub3(new_spine)
     if add_to_deleted:
         self.deleted.append(('manifest', id, bookpath))
         self.modified[self.opfbookpath] = 'file'
     del self.id_to_filepath[id]
Exemplo n.º 38
0
def processRESC(i, files, rscnames, sect, data, k8resc):
    global DUMP
    if DUMP:
        rescname = "RESC%05d.dat" % i
        print("Extracting Resource: ", rescname)
        outrsc = os.path.join(files.outdir, rescname)
        with open(pathof(outrsc), 'wb') as f:
            f.write(data)
    if True:  # try:
        # parse the spine and metadata from RESC
        k8resc = K8RESCProcessor(data[16:], DUMP)
    else:  # except:
        print("Warning: cannot extract information from RESC.")
        k8resc = None
    rscnames.append(None)
    sect.setsectiondescription(i,"K8 RESC section")
    return rscnames, k8resc
Exemplo n.º 39
0
 def __init__(self, filename):
     self.data = b''
     with open(pathof(filename), 'rb') as f:
         self.data = f.read()
     self.palmheader = self.data[:78]
     self.palmname = self.data[:32]
     self.ident = self.palmheader[0x3C:0x3C + 8]
     self.num_sections, = struct.unpack_from(b'>H', self.palmheader, 76)
     self.filelength = len(self.data)
     sectionsdata = struct.unpack_from(
         bstr('>%dL' % (self.num_sections * 2)), self.data,
         78) + (self.filelength, 0)
     self.sectionoffsets = sectionsdata[::2]
     self.sectionattributes = sectionsdata[1::2]
     self.sectiondescriptions = ["" for x in range(self.num_sections + 1)]
     self.sectiondescriptions[-1] = "File Length Only"
     return
Exemplo n.º 40
0
def processFONT(i, files, rscnames, sect, data, obfuscate_data, beg, rsc_ptr):
    fontname = "font%05d" % i
    ext = '.dat'
    font_error = False
    font_data = data
    try:
        usize, fflags, dstart, xor_len, xor_start = struct.unpack_from(b'>LLLLL',data,4)
    except:
        print("Failed to extract font: {0:s} from section {1:d}".format(fontname,i))
        font_error = True
        ext = '.failed'
        pass
    if not font_error:
        print("Extracting font:", fontname)
        font_data = data[dstart:]
        extent = len(font_data)
        extent = min(extent, 1040)
        if fflags & 0x0002:
            # obfuscated so need to de-obfuscate the first 1040 bytes
            key = bytearray(data[xor_start: xor_start+ xor_len])
            buf = bytearray(font_data)
            for n in range(extent):
                buf[n] ^=  key[n%xor_len]
            font_data = bytes(buf)
        if fflags & 0x0001:
            # ZLIB compressed data
            font_data = zlib.decompress(font_data)
        hdr = font_data[0:4]
        if hdr == b'\0\1\0\0' or hdr == b'true' or hdr == b'ttcf':
            ext = '.ttf'
        elif hdr == b'OTTO':
            ext = '.otf'
        else:
            print("Warning: unknown font header %s" % hexlify(hdr))
        if (ext == '.ttf' or ext == '.otf') and (fflags & 0x0002):
            obfuscate_data.append(fontname + ext)
        fontname += ext
        outfnt = os.path.join(files.imgdir, fontname)
        with open(pathof(outfnt), 'wb') as f:
            f.write(font_data)
        rscnames.append(fontname)
        sect.setsectiondescription(i,"Font {0:s}".format(fontname))
        if rsc_ptr == -1:
            rsc_ptr = i - beg
    return rscnames, obfuscate_data, rsc_ptr
Exemplo n.º 41
0
    def __init__(self, opf_path, opf_bookpath, debug=False):
        self._debug = debug
        opf_path = pathof(opf_path)
        self.opfname = os.path.basename(opf_path)
        self.opf_bookpath = opf_bookpath
        self.opf_dir = startingDir(opf_bookpath)
        self.opf = None
        with open(opf_path, 'rb') as fp:
            self.opf = fp.read().decode('utf-8')
        self.opos = 0
        self.package = None
        self.metadata_attr = None
        self.metadata = []
        self.cover_id = None

        # let downstream invert any invertable dictionaries when needed
        self.manifest_id_to_href = {}
        self.manifest_id_to_bookpath = {}

        # create non-invertable dictionaries
        self.manifest_id_to_mime = {}
        self.manifest_id_to_properties = {}
        self.manifest_id_to_fallback = {}
        self.manifest_id_to_overlay = {}

        # spine and guide
        self.spine = []
        self.spine_ppd = None
        self.guide = []
        self.bindings = []

        # determine folder structure
        self.group_folder = {}
        self.group_count = {}
        self.group_folder["epub"] = ['META-INF']
        self.group_count["epub"] = [1]
        self.group_folder["opf"] = [self.opf_dir]
        self.group_count["opf"] = [1]

        # self.bookpaths = []
        # self.bookpaths.append(self.opf_bookpath)

        self._parseData()
Exemplo n.º 42
0
 def __init__(self, opf_path, debug = False):
     self._debug = debug
     opf_path = pathof(opf_path)
     self.opfname = os.path.basename(opf_path)
     self.opf = None
     with open(opf_path,'rb') as fp:
         self.opf = fp.read().decode('utf-8')
     self.opos = 0
     self.package = None
     self.metadata_attr = None
     self.metadata = []
     self.cover_id = None
     self.manifest_id_to_href = {}
     self.manifest_id_to_mime = {}
     self.href_to_manifest_id = {}
     self.manifest_id_to_properties = {}
     self.spine = []
     self.spine_ppd = None
     self.guide = []
     self.bindings = []
     self._parseData()
Exemplo n.º 43
0
 def __init__(self, opf_path, debug = False):
     self._debug = debug
     opf_path = pathof(opf_path)
     self.opfname = os.path.basename(opf_path)
     self.opf = None
     with open(opf_path,'rb') as fp:
         self.opf = fp.read().decode('utf-8')
     self.opos = 0
     self.package_tag = [None, None]
     # self.package_version = None
     self.metadata_tag = [None, None]
     self.metadata = []
     self.cover_id = None
     self.manifest_id_to_href = {}
     self.manifest_id_to_mime = {}
     self.href_to_manifest_id ={}
     self.spine_ppd = None
     # self.spine_pageattributes = {}
     # self.spine_idrefs = {}
     self.spine = []
     self.guide = []
     self._parseData()
Exemplo n.º 44
0
def run(bk):
    global prefs
    prefs = bk.getPrefs()

    # set default preference values
    if 'use_file_path' not in prefs:
        prefs['use_file_path'] = expanduser('~')
    if 'azw3_epub_version' not in prefs:
        prefs['azw3_epub_version'] = "2"  # A, F, 2 or 3
    if 'use_hd_images' not in prefs:
        prefs['use_hd_images'] = True
    if 'use_src_from_dual_mobi' not in prefs:
        prefs['use_src_from_dual_mobi'] = True
    if 'asin_for_kindlegen_plugin' not in prefs:
        prefs['asin_for_kindlegen_plugin'] = False
    if 'preserve_kindleunpack_meta' not in prefs:
        prefs['preserve_kindleunpack_meta'] = False

    if 'last_time_checked' not in prefs:
        prefs['last_time_checked'] = str(datetime.now() - timedelta(hours=7))
    if 'last_online_version' not in prefs:
        prefs['last_online_version'] = '0.1.0'

    chk = UpdateChecker(prefs['last_time_checked'], prefs['last_online_version'], bk._w)
    update_available, online_version, time = chk.update_info()
    # update preferences with latest date/time/version
    prefs['last_time_checked'] = time
    if online_version is not None:
        prefs['last_online_version'] = online_version
    if update_available:
        title = 'Plugin Update Available'
        msg = 'Version {} of the {} plugin is now available.'.format(online_version, bk._w.plugin_name)
        update_msgbox(title, msg)

    if _DEBUG_:
        print('Python sys.path', sys.path)
        print('Default AZW3 epub version:', prefs['azw3_epub_version'])

    inpath = fileChooser()
    if inpath == '' or not os.path.exists(inpath):
        print('No input file selected!')
        bk.savePrefs(prefs)
        return 0

    print ('Path to Kindlebook {0}'.format(inpath))
    from mobi_stuff import mobiProcessor, topaz
    if topaz(inpath):
        print('Kindlebook is in Topaz format: can\'t open!')
        bk.savePrefs(prefs)
        return -1

    mobionly = False
    mp = mobiProcessor(inpath, prefs['azw3_epub_version'],  prefs['use_hd_images'])
    # Save last directory accessed to JSON prefs
    prefs['use_file_path'] = pathof(os.path.dirname(inpath))
    if mp.isEncrypted:
        print('Kindlebook is encrypted: can\'t open!')
        bk.savePrefs(prefs)
        return -1
    if mp.isPrintReplica:
        print('Kindlebook is a Print Replica: can\'t open!')
        bk.savePrefs(prefs)
        return -1
    if not mp.isComboFile and not mp.isKF8:
        mobionly = True

    with make_temp_directory() as temp_dir:
        TWEAK = True
        asin = None
        if not mobionly:
            epub, opf, src = mp.unpackEPUB(temp_dir)
            if src is not None and isEPUB(src) and prefs['use_src_from_dual_mobi']:
                print ('Using included kindlegen sources.')
                epub = src
            else:
                # If user requested no tweaks through preferences, use standard epub from KindleUnpack
                if not prefs['asin_for_kindlegen_plugin'] and not prefs['preserve_kindleunpack_meta']:
                    TWEAK = False
                elif prefs['asin_for_kindlegen_plugin']:
                    if opf is not None:
                        # Get asin from metadata and put it in a dc:meta that the Kindlegen plugin can use.
                        asin = get_asin(opf)
                        if asin is not None:
                            asin = unicode_str(asin)
                    else:
                        TWEAK = False
                if TWEAK:
                    # Modify the opf with the requested tweaks and build a new epub
                    if tweak_opf(opf, asin, preserve_comments=prefs['preserve_kindleunpack_meta']):
                        os.remove(epub)
                        with temp_epub_handle(delete=False) as new_epub:
                            epub_zip_up_book_contents(os.path.join(temp_dir,'mobi8'), new_epub)
                        epub = new_epub
        else:
            from quickepub import QuickEpub
            mobidir, mobi_html, mobi_opf, mobiBaseName = mp.unpackMOBI(temp_dir)
            if not prefs['asin_for_kindlegen_plugin'] and not prefs['preserve_kindleunpack_meta']:
                TWEAK = False
            elif prefs['asin_for_kindlegen_plugin']:
                if mobi_opf is not None:
                    # Get asin from metadata and put it in a dc:meta that the Kindlegen plugin can use.
                    asin = get_asin(mobi_opf)
                    if asin is not None:
                        asin = unicode_str(asin)
                    else:
                        TWEAK = False
            if TWEAK:
                if not tweak_opf(mobi_opf, asin, preserve_comments=prefs['preserve_kindleunpack_meta']):
                    print('OPF manipulation failed!')
                    return -1
            qe = QuickEpub(mobidir, mobi_html, mobi_opf)
            epub = qe.makeEPUB()

        # Save prefs to json
        bk.savePrefs(prefs)
        print ('Path to epub or src {0}'.format(epub))
        with file_open(epub,'rb')as fp:
            data = fp.read()
        bk.addotherfile('dummy.epub', data)

    return 0
Exemplo n.º 45
0
def run(bk):
    global prefs
    prefs = bk.getPrefs()

    # set default preference values
    if 'use_file_path' not in prefs:
        prefs['use_file_path'] = expanduser('~')
    if 'check_for_updates' not in prefs:
        prefs['check_for_updates'] = True
    if 'last_time_checked' not in prefs:
        prefs['last_time_checked'] = str(datetime.now() - timedelta(hours=delta+1))
    if 'last_online_version' not in prefs:
        prefs['last_online_version'] = '0.1.0'

    if prefs['check_for_updates']:
        chk = UpdateChecker(prefs['last_time_checked'], prefs['last_online_version'], bk._w)
        update_available, online_version, time = chk.update_info()
        # update preferences with latest date/time/version
        prefs['last_time_checked'] = time
        if online_version is not None:
            prefs['last_online_version'] = online_version
        if update_available:
            title = 'Plugin Update Available'
            msg = 'Version {} of the {} plugin is now available.'.format(online_version, bk._w.plugin_name)
            show_msgbox(title, msg, 'info')

    if 'META-INF/{}'.format(XMLFILE) in bk._w.other:
        title = 'File Already Present!'
        msg = 'The {} file is already present. Please delete it before trying to add another'.format(XMLFILE)
        show_msgbox(title, msg, 'error')
        return 0

    if _DEBUG_:
        print('Python sys.path: {}\n'.format(sys.path))

    inpath = fileChooser()
    if inpath == '' or not os.path.exists(inpath):
        print('iBooks XML file selection canceled!')
        bk.savePrefs(prefs)
        return 0

    if _DEBUG_:
        print('Path to XML file: {}\n'.format(inpath))

    # Save last directory accessed to JSON prefs
    prefs['use_file_path'] = pathof(os.path.dirname(inpath))

    # Save prefs to json
    bk.savePrefs(prefs)

    try:
        with file_open(inpath,'rb')as fp:
            data = fp.read()
    except:
        title = 'Unexpected error!'
        msg = 'Error reading the {} file. Perhaps it is corrupt or missing?'.format(XMLFILE)
        show_msgbox(title, msg, 'error')
        return -1

    if _DEBUG_:
        print('Internal epub href: META-INF/{}\n'.format(XMLFILE))

    bk.addotherfile('META-INF/{}'.format(XMLFILE), data)

    return 0
Exemplo n.º 46
0
    def __init__(self, ebook_root, outdir, op, plugin_dir, plugin_name, debug = False):
        self._debug = debug
        self.ebook_root = pathof(ebook_root)
        # plugins and plugin containers can get name and user plugin dir
        self.plugin_dir = pathof(plugin_dir)
        self.plugin_name = plugin_name
        self.outdir = pathof(outdir)

        # initialize the sigil cofiguration info passed in outdir with sigil.cfg
        self.appdir = None
        self.usrsupdir = None
        self.selected = []
        cfg = ''
        with open(os.path.join(self.outdir, 'sigil.cfg'), 'rb') as f:
            cfg = f.read().decode('utf-8')
        cfg = cfg.replace("\r", "")
        cfg_lst = cfg.split("\n")
        if len(cfg_lst) >= 2:
            self.appdir = cfg_lst.pop(0)
            self.usrsupdir = cfg_lst.pop(0)
            self.selected = cfg_lst
        os.environ['SigilGumboLibPath'] = self.get_gumbo_path()

        # dictionaries used to map opf manifest information
        self.id_to_href = {}
        self.id_to_mime = {}
        self.href_to_id = {}
        self.id_to_props = {}
        self.id_to_fall = {}
        self.id_to_over = {}
        self.spine_ppd = None
        self.spine = []
        self.guide = []
        self.package_tag = None
        self.epub_version = None
        # self.metadata_attr = None
        # self.metadata = []
        self.metadataxml = ''
        self.op = op
        if self.op is not None:
            # copy in data from parsing of initial opf
            self.opfname = op.opfname
            self.id_to_href = op.get_manifest_id_to_href_dict().copy()
            self.id_to_mime = op.get_manifest_id_to_mime_dict().copy()
            self.href_to_id = op.get_href_to_manifest_id_dict().copy()
            self.id_to_props = op.get_manifest_id_to_properties_dict().copy()
            self.id_to_fall = op.get_manifest_id_to_fallback_dict().copy()
            self.id_to_over = op.get_manifest_id_to_overlay_dict().copy()
            self.spine_ppd = op.get_spine_ppd()
            self.spine = op.get_spine()
            self.guide = op.get_guide()
            self.package_tag = op.get_package_tag()
            self.epub_version = op.get_epub_version()
            # self.metadata = op.get_metadata()
            # self.metadata_attr = op.get_metadata_attr()
            self.metadataxml = op.get_metadataxml()
        self.other = []  # non-manifest file information
        self.id_to_filepath = {}
        self.modified = {}
        self.added = []
        self.deleted = []

        # walk the ebook directory tree building up initial list of
        # all unmanifested (other) files
        for filepath in unipath.walk(ebook_root):
            book_href = filepath.replace(os.sep, "/")
            # OS X file names and paths use NFD form. The EPUB
            # spec requires all text including filenames to be in NFC form.
            book_href = unicodedata.normalize('NFC', book_href)
            # if book_href file in manifest convert to manifest id
            id = None
            if book_href.startswith('OEBPS/'):
                href = book_href[6:]
                id = self.href_to_id.get(href,None)
            if id is None:
                self.other.append(book_href)
                self.id_to_filepath[book_href] = filepath
            else:
                self.id_to_filepath[id] = filepath