def zip_all_splits(filehandles, **kwargs): """ Zip all hourly-split pnn sgt log files produced from decodable files in the iterable `filehandles` passed by handle. Pass additional arguments as needed to the filter, namely min and/or max heart rates. """ hasher = hashlib.sha1() memory_file = BytesIO() filter_kw = {k: kwargs[k] for k in {'hr_min', 'hr_max'} & kwargs.keys()} if filter_kw: Filter = HRTrackerFilter else: Filter = HRTrackerIdentityTransform with ZipFile(memory_file, 'w') as zf: for infile in filehandles: try: data = decode_stream(infile) except CannotDecodeFileException: continue for split in HRTrackerSplitter(Filter(data, **filter_kw)): sgt_file = PnnSgtLogfile(split) f_data = b''.join(l for l in sgt_file) data = ZipInfo(sgt_file.filename) # set timestamp to end_time of split data.date_time = gmtime(sgt_file.start_time + sgt_file.elapsed_time) data.compress_type = ZIP_DEFLATED zf.writestr(data, f_data) hasher.update(f_data) memory_file.seek(0) return f'splits-{hasher.hexdigest()}.zip', memory_file
def zip_files(file_names_list, files_content_list, save_type="get_bytes_io", file_location="", skip_datetime=False): """ Creates a zip of 1 or more files provided as a list. Saves to memory or disk based on save_type & file_location """ try: if save_type == "get_bytes_io": return_zip_file = BytesIO() else: return_zip_file = file_location date_time = time.localtime(time.time())[:6] file_meta_data_list = [] for name in file_names_list: name_data = ZipInfo(name) if not skip_datetime: name_data.date_time = date_time name_data.compress_type = ZIP_DEFLATED file_meta_data_list.append(name_data) with ZipFile(return_zip_file, "w") as zip_file: for file_meta_data, file_content in zip(file_meta_data_list, files_content_list): zip_file.writestr(file_meta_data, file_content) if save_type == "get_bytes_io": return_zip_file.seek(0) return return_zip_file return "Saved to disk" except Exception as error: logger.primary_logger.error("Zip Files Failed: " + str(error)) return "error"
def write_blob(self, path, blob, compression=ZIP_DEFLATED, mode=0644): """Add something to the zip without adding to manifest""" zinfo = ZipInfo(path) zinfo.external_attr = mode << 16L # set permissions zinfo.compress_type = compression zinfo.date_time = self.now self.zipfile.writestr(zinfo, blob)
def create_zipinfo(filename, mtime=None, dir=False, executable=False, symlink=False, comment=None): """Create a instance of `ZipInfo`. :param filename: file name of the entry :param mtime: modified time of the entry :param dir: if `True`, the entry is a directory :param executable: if `True`, the entry is a executable file :param symlink: if `True`, the entry is a symbolic link :param comment: comment of the entry """ from zipfile import ZipInfo, ZIP_DEFLATED, ZIP_STORED zipinfo = ZipInfo() # The general purpose bit flag 11 is used to denote # UTF-8 encoding for path and comment. Only set it for # non-ascii files for increased portability. # See http://www.pkware.com/documents/casestudies/APPNOTE.TXT if any(ord(c) >= 128 for c in filename): zipinfo.flag_bits |= 0x0800 zipinfo.filename = filename.encode('utf-8') if mtime is not None: mtime = to_datetime(mtime, utc) zipinfo.date_time = mtime.utctimetuple()[:6] # The "extended-timestamp" extra field is used for the # modified time of the entry in unix time. It avoids # extracting wrong modified time if non-GMT timezone. # See http://www.opensource.apple.com/source/zip/zip-6/unzip/unzip # /proginfo/extra.fld zipinfo.extra += struct.pack( '<hhBl', 0x5455, # extended-timestamp extra block type 1 + 4, # size of this block 1, # modification time is present to_timestamp(mtime)) # time of last modification # external_attr is 4 bytes in size. The high order two # bytes represent UNIX permission and file type bits, # while the low order two contain MS-DOS FAT file # attributes, most notably bit 4 marking directories. if dir: if not zipinfo.filename.endswith('/'): zipinfo.filename += '/' zipinfo.compress_type = ZIP_STORED zipinfo.external_attr = 040755 << 16L # permissions drwxr-xr-x zipinfo.external_attr |= 0x10 # MS-DOS directory flag else: zipinfo.compress_type = ZIP_DEFLATED zipinfo.external_attr = 0644 << 16L # permissions -r-wr--r-- if executable: zipinfo.external_attr |= 0755 << 16L # -rwxr-xr-x if symlink: zipinfo.compress_type = ZIP_STORED zipinfo.external_attr |= 0120000 << 16L # symlink file type if comment: zipinfo.comment = comment.encode('utf-8') return zipinfo
def _render_zip(self, req, repos, chgset): """ZIP archive with all the added and/or modified files.""" req.send_response(200) req.send_header('Content-Type', 'application/zip') req.send_header('Content-Disposition', 'filename=Changeset%s.zip' % chgset.rev) req.end_headers() try: from cStringIO import StringIO except ImportError: from StringIO import StringIO from zipfile import ZipFile, ZipInfo, ZIP_DEFLATED buf = StringIO() zipfile = ZipFile(buf, 'w', ZIP_DEFLATED) for path, kind, change, base_path, base_rev in chgset.get_changes(): if kind == Node.FILE and change != Changeset.DELETE: node = repos.get_node(path, chgset.rev) zipinfo = ZipInfo() zipinfo.filename = node.path zipinfo.date_time = time.gmtime(node.last_modified)[:6] zipinfo.compress_type = ZIP_DEFLATED zipfile.writestr(zipinfo, node.get_content().read()) zipfile.close() req.write(buf.getvalue())
def _render_zip(self, req, repos, chgset): """ZIP archive with all the added and/or modified files.""" req.send_response(200) req.send_header('Content-Type', 'application/zip') req.send_header('Content-Disposition', 'attachment;' 'filename=Changeset%s.zip' % chgset.rev) req.end_headers() try: from cStringIO import StringIO except ImportError: from StringIO import StringIO from zipfile import ZipFile, ZipInfo, ZIP_DEFLATED buf = StringIO() zipfile = ZipFile(buf, 'w', ZIP_DEFLATED) for path, kind, change, base_path, base_rev in chgset.get_changes(): if kind == Node.FILE and change != Changeset.DELETE: node = repos.get_node(path, chgset.rev) zipinfo = ZipInfo() zipinfo.filename = node.path zipinfo.date_time = time.gmtime(node.last_modified)[:6] zipinfo.compress_type = ZIP_DEFLATED zipfile.writestr(zipinfo, node.get_content().read()) zipfile.close() req.write(buf.getvalue())
def add(self, member): if (member.isdir): return # FIXME Should be able to add empty directories info = ZipInfo(member.name) info.date_time = member.mtime info.external_attr = member.perm << 16L self.archive.writestr(info, member.data)
def _render_zip(self, req, filename, repos, diff): """ZIP archive with all the added and/or modified files.""" new_rev = diff.new_rev req.send_response(200) req.send_header('Content-Type', 'application/zip') req.send_header('Content-Disposition', 'attachment;' 'filename=%s.zip' % filename) from zipfile import ZipFile, ZipInfo, ZIP_DEFLATED buf = StringIO() zipfile = ZipFile(buf, 'w', ZIP_DEFLATED) for old_node, new_node, kind, change in repos.get_changes(**diff): if kind == Node.FILE and change != Changeset.DELETE: assert new_node zipinfo = ZipInfo() zipinfo.filename = new_node.path.encode('utf-8') # Note: unicode filenames are not supported by zipfile. # UTF-8 is not supported by all Zip tools either, # but as some does, I think UTF-8 is the best option here. zipinfo.date_time = time.gmtime(new_node.last_modified)[:6] zipinfo.compress_type = ZIP_DEFLATED zipfile.writestr(zipinfo, new_node.get_content().read()) zipfile.close() buf.seek(0, 2) # be sure to be at the end req.send_header("Content-Length", buf.tell()) req.end_headers() req.write(buf.getvalue())
def compileToZip(self): """ Compile the exam as a .zip file """ def cleanpath(path): if path=='': return '' dirname, basename = os.path.split(path) dirname=cleanpath(dirname) if basename!='.': dirname = os.path.join(dirname,basename) return dirname f = ZipFile(self.options.output,'w') for (dst,src) in self.files.items(): dst = ZipInfo(cleanpath(dst)) dst.external_attr = 0o644<<16 dst.date_time = datetime.datetime.today().timetuple() if isinstance(src,basestring): f.writestr(dst,open(src,'rb').read()) else: f.writestr(dst,src.read()) print("Exam created in %s" % os.path.relpath(self.options.output)) f.close()
def write_blob(self, path, blob, compression=ZIP_DEFLATED, mode=0644): """Add something to the zip without adding to manifest""" zinfo = ZipInfo(path) zinfo.external_attr = mode << 16L # set permissions zinfo.compress_type = compression zinfo.date_time = self.now self.zipfile.writestr(zinfo, blob)
def create_zipinfo(filename, mtime=None, dir=False, executable=False, symlink=False, comment=None): """Create a instance of `ZipInfo`. :param filename: file name of the entry :param mtime: modified time of the entry :param dir: if `True`, the entry is a directory :param executable: if `True`, the entry is a executable file :param symlink: if `True`, the entry is a symbolic link :param comment: comment of the entry """ from zipfile import ZipInfo, ZIP_DEFLATED, ZIP_STORED zipinfo = ZipInfo() # The general purpose bit flag 11 is used to denote # UTF-8 encoding for path and comment. Only set it for # non-ascii files for increased portability. # See http://www.pkware.com/documents/casestudies/APPNOTE.TXT if any(ord(c) >= 128 for c in filename): zipinfo.flag_bits |= 0x0800 zipinfo.filename = filename.encode('utf-8') if mtime is not None: mtime = to_datetime(mtime, utc) zipinfo.date_time = mtime.utctimetuple()[:6] # The "extended-timestamp" extra field is used for the # modified time of the entry in unix time. It avoids # extracting wrong modified time if non-GMT timezone. # See http://www.opensource.apple.com/source/zip/zip-6/unzip/unzip # /proginfo/extra.fld zipinfo.extra += struct.pack( '<hhBl', 0x5455, # extended-timestamp extra block type 1 + 4, # size of this block 1, # modification time is present to_timestamp(mtime)) # time of last modification # external_attr is 4 bytes in size. The high order two # bytes represent UNIX permission and file type bits, # while the low order two contain MS-DOS FAT file # attributes, most notably bit 4 marking directories. if dir: if not zipinfo.filename.endswith('/'): zipinfo.filename += '/' zipinfo.compress_type = ZIP_STORED zipinfo.external_attr = 040755 << 16L # permissions drwxr-xr-x zipinfo.external_attr |= 0x10 # MS-DOS directory flag else: zipinfo.compress_type = ZIP_DEFLATED zipinfo.external_attr = 0644 << 16L # permissions -r-wr--r-- if executable: zipinfo.external_attr |= 0755 << 16L # -rwxr-xr-x if symlink: zipinfo.compress_type = ZIP_STORED zipinfo.external_attr |= 0120000 << 16L # symlink file type if comment: zipinfo.comment = comment.encode('utf-8') return zipinfo
def _render_zip(self, req, filename, repos, diff): """ZIP archive with all the added and/or modified files.""" new_rev = diff.new_rev req.send_response(200) req.send_header('Content-Type', 'application/zip') req.send_header('Content-Disposition', 'attachment;' 'filename=%s.zip' % filename) from zipfile import ZipFile, ZipInfo, ZIP_DEFLATED buf = StringIO() zipfile = ZipFile(buf, 'w', ZIP_DEFLATED) for old_node, new_node, kind, change in repos.get_changes(**diff): if kind == Node.FILE and change != Changeset.DELETE: assert new_node zipinfo = ZipInfo() zipinfo.filename = new_node.path.encode('utf-8') # Note: unicode filenames are not supported by zipfile. # UTF-8 is not supported by all Zip tools either, # but as some does, I think UTF-8 is the best option here. zipinfo.date_time = time.gmtime(new_node.last_modified)[:6] zipinfo.compress_type = ZIP_DEFLATED zipfile.writestr(zipinfo, new_node.get_content().read()) zipfile.close() buf.seek(0, 2) # be sure to be at the end req.send_header("Content-Length", buf.tell()) req.end_headers() req.write(buf.getvalue())
def _render_zip(self, req, filename, repos, data): """ZIP archive with all the added and/or modified files.""" new_rev = data['new_rev'] req.send_response(200) req.send_header('Content-Type', 'application/zip') req.send_header('Content-Disposition', content_disposition('inline', filename + '.zip')) from zipfile import ZipFile, ZipInfo, ZIP_DEFLATED buf = StringIO() zipfile = ZipFile(buf, 'w', ZIP_DEFLATED) for old_node, new_node, kind, change in repos.get_changes( new_path=data['new_path'], new_rev=data['new_rev'], old_path=data['old_path'], old_rev=data['old_rev']): if kind == Node.FILE and change != Changeset.DELETE: assert new_node zipinfo = ZipInfo() zipinfo.filename = new_node.path.strip('/').encode('utf-8') # Note: unicode filenames are not supported by zipfile. # UTF-8 is not supported by all Zip tools either, # but as some do, I think UTF-8 is the best option here. zipinfo.date_time = new_node.last_modified.utctimetuple()[:6] zipinfo.external_attr = 0644 << 16L # needed since Python 2.5 zipinfo.compress_type = ZIP_DEFLATED zipfile.writestr(zipinfo, new_node.get_content().read()) zipfile.close() zip_str = buf.getvalue() req.send_header("Content-Length", len(zip_str)) req.end_headers() req.write(zip_str) raise RequestDone
def basics_zip(): if request.method == 'POST': wowcube = WOWCube.from_json(request.data) else: wowcube = WOWCube.DEFAULT stream = io.BytesIO() with ZipFile(stream, "w") as zip_file: for mid in range(8): for sid in range(3): output_img = draw_screen(mid, sid, wowcube) if output_img is None: continue encode_param = [] retval, buffer = cv2.imencode('.bmp', output_img, encode_param) # zip_info = ZipInfo("cubenet.bmp") # zip_info = ZipInfo(f"sides/{side}.bmp") zip_info = ZipInfo(f"modules/{mid}/screens/{sid}.bmp") zip_info.date_time = time.localtime(time.time())[:6] zip_info.compress_type = zipfile.ZIP_DEFLATED zip_info.compress_size = 1 zip_file.writestr(zip_info, buffer) stream.seek(0) response = make_response(stream.read()) stream.close() response.headers['Content-Type'] = 'application/zip' return response
def compileToZip(self): """ Compile the exam as a .zip file """ def cleanpath(path): if path == '': return '' dirname, basename = os.path.split(path) dirname = cleanpath(dirname) if basename != '.': dirname = os.path.join(dirname, basename) return dirname try: os.mkdir(os.path.dirname(self.options.output)) except OSError: pass f = ZipFile(self.options.output, 'w') for (dst, src) in self.files.items(): dst = ZipInfo(cleanpath(dst)) dst.external_attr = 0o644 << 16 dst.date_time = datetime.datetime.today().timetuple() if isinstance(src, basestring): f.writestr(dst, open(src, 'rb').read()) else: f.writestr(dst, src.read()) print("Exam created in %s" % os.path.relpath(self.options.output)) f.close()
def _build_package(self, input_zip: Path, output_zip: Path): # Delete Chalice's build cache because our layer cache eviction rules # are stricter and we want a full rebuild. try: deployment_dir = self.layer_dir / '.chalice' / 'deployments' log.debug("Removing Chalice's deployment cache at %r", str(deployment_dir)) shutil.rmtree(deployment_dir) except FileNotFoundError: pass command = ['chalice', 'package', self.out_dir] log.info('Running %r', command) subprocess.run(command, cwd=self.layer_dir).check_returncode() log.info('Packaging %s', output_zip) with ZipFile(input_zip, 'r') as deployment_zip: with ZipFile(output_zip, 'w') as layer_zip: for src_zip_info in deployment_zip.infolist(): if src_zip_info.filename != 'app.py': # ZipFile doesn't copy permissions. Setting permissions # manually also requires setting other fields. dst_zip_info = ZipInfo(filename=str( Path('python') / src_zip_info.filename)) dst_zip_info.external_attr = src_zip_info.external_attr dst_zip_info.date_time = src_zip_info.date_time dst_zip_info.compress_type = src_zip_info.compress_type with deployment_zip.open(src_zip_info, 'r') as rf: with layer_zip.open(dst_zip_info, 'w') as wf: shutil.copyfileobj(rf, wf, length=1024 * 1024)
def download_test_results_zip(): try: if len(app_variables.previous_mtr_results_file_locations) > 0: date_time = datetime.date.today().strftime("D%dM%mY%Y") return_zip_file = BytesIO() zip_name = "TestResults_" + gethostname() + "_" + date_time + ".zip" file_meta_data_list = [] names_of_files = [] file_to_zip = [] file_creation_dates = [] previous_mtr_results_file_locations = app_variables.previous_mtr_results_file_locations previous_iperf_results_file_locations = app_variables.previous_iperf_results_file_locations for file_location in (previous_mtr_results_file_locations + previous_iperf_results_file_locations): file_to_zip.append(app_generic_functions.get_file_content(file_location)) names_of_files.append(file_location.split("/")[-1]) file_creation_dates.append(time.localtime(os.path.getmtime(file_location))) for name, modification_date in zip(names_of_files, file_creation_dates): name_data = ZipInfo(name) name_data.date_time = modification_date name_data.compress_type = ZIP_DEFLATED file_meta_data_list.append(name_data) with ZipFile(return_zip_file, "w") as zip_file: for file_meta_data, file_content in zip(file_meta_data_list, file_to_zip): zip_file.writestr(file_meta_data, file_content) return_zip_file.seek(0) return send_file(return_zip_file, as_attachment=True, attachment_filename=zip_name) except Exception as error: primary_logger.error("Error zipping test results: " + str(error)) return render_template("message_return.html", URL="/", TextMessage="No Results Found")
def zipadd(zipfile, data, fname): zinfo = ZipInfo() zinfo.filename = fname tlocal = time.localtime() zinfo.date_time = (tlocal[0], tlocal[1] + 1, tlocal[2] + 1, tlocal[3], tlocal[4], tlocal[5]) zinfo.compress_type = ZIP_DEFLATED zinfo.external_attr = 0o664 << 16 zipfile.writestr(zinfo, data)
def _add_file_to_zip(zipfile, path, archive_dest=None): with open(path, 'r') as f: file_bytes = f.read() info = ZipInfo(path) info.date_time = time.localtime() # Set permissions to be executable info.external_attr = 0o100755 << 16 # If archive dest was provided, use that as path if archive_dest: info.filename = archive_dest zipfile.writestr(info, file_bytes, ZIP_DEFLATED)
def export(): """Export all recipes as plaintext in compressed directory.""" recipes = db.execute("SELECT recipes.recipe_id FROM recipes JOIN owners ON recipes.recipe_id=" "owners.recipe_id WHERE user_id = ?", (session["user_id"],)) memory_file = BytesIO() with ZipFile(memory_file, "w") as zf: for recipe in recipes: data = ZipInfo(str(recipe[0]) + ".txt") data.date_time = time.localtime(time.time())[:6] data.compress_type = ZIP_DEFLATED zf.writestr(data, plaintext(recipe[0], db)) memory_file.seek(0) return send_file(memory_file, attachment_filename='recipes.zip', as_attachment=True)
def zip_writer(dirpath, zippath): basedir = os.path.dirname(dirpath) + os.sep entry = ZipInfo() entry.compress_type = compression if os.path.isdir(dirpath): for root, dirs, files in os.walk(dirpath): if os.path.basename(root).startswith('.'): # skip hidden directories continue dirname = root.replace(basedir, '') for f in files: if f[-1] == '~' or f.startswith('.'): # skip backup files and all hidden files continue src = root + '/' + f entry = ZipInfo() entry.compress_type = compression entry.filename = dirname + '/' + f entry.date_time = localtime(os.path.getmtime(src))[:6] # hacky if dirname.startswith("html"): if self.source == True: entry.filename = dirname.replace('html', 'doc', 1) + "/" + f else: entry.filename = dirname.replace('html/', '', 1) + "/" + f entry.filename = entry.filename.replace('html/', '', 1) if entry.filename.startswith("examples"): entry.filename = "tutorials/" + entry.filename file_data = open( src, 'rb').read() self.package.writestr(entry, file_data) else: # top files entry.date_time = localtime(os.path.getmtime(dirpath))[:6] entry.filename = os.path.basename(zippath) file_data = open( dirpath, 'rb').read() self.package.writestr(entry, file_data)
def write_file (self, data, filename, description = "") : """Write a file into the archive :Parameters: - `data` (str) - data to write - `filename` (str) - name of the file in which to store data - `description` (str) - textual description of the data """ info = ZipInfo(filename) info.comment = description info.date_time = localtime()[:6] info.external_attr = 0644 << 16L info.compress_type = ZIP_DEFLATED self._elms[filename] = (info,data)
def write_executable(zfile, path, zip_path=None): if zip_path is None: zip_path = path with open(path, 'rb') as f: fbytes = f.read() info = ZipInfo(str(zip_path)) info.date_time = localtime() # -rwx-r---r-- info.external_attr = 0o100755 << 16 # UNIX host info.create_system = 3 zip_f.writestr(info, fbytes, ZIP_DEFLATED)
def make_dir_entry(name=None, date_time=None, mode=MODE_DIRECTORY): tt = date_time.timetuple() dir = ZipInfo() dir.filename = name+('/' if name[-1] != '/' else '') dir.orig_filename = dir.filename dir.date_time = date_time.isocalendar() + (tt.tm_hour, tt.tm_min, tt.tm_sec) dir.compress_type = 0 dir.create_system = 0 dir.create_version = 20 dir.extract_version = 10 dir.external_attr = mode return dir
def make_file_entry(name=None, date_time=None, mode=MODE_FILE | MODE_ARCHIVE): tt = date_time.timetuple() file = ZipInfo() file.filename = name file.orig_filename = file.filename file.date_time = date_time.isocalendar() + (tt.tm_hour, tt.tm_min, tt.tm_sec) file.compress_type = 8 file.create_system = 0 file.create_version = 20 file.extract_version = 20 file.flag_bits = 2 file.external_attr = mode return file
def removeCDPwatermark(object, path_to_ebook): # "META-INF/cdp.info" is a watermark file used by some Tolino vendors. # We don't want that in our eBooks, so lets remove that file. try: infile = ZipFile(open(path_to_ebook, 'rb')) namelist = infile.namelist() if 'META-INF/cdp.info' not in namelist: return path_to_ebook namelist.remove("mimetype") namelist.remove("META-INF/cdp.info") output = object.temporary_file(".epub").name kwds = dict(compression=ZIP_DEFLATED, allowZip64=False) with closing(ZipFile(open(output, 'wb'), 'w', **kwds)) as outf: for path in (["mimetype"] + namelist): data = infile.read(path) zi = ZipInfo(path) oldzi = infile.getinfo(path) try: zi.compress_type = oldzi.compress_type if path == "mimetype": zi.compress_type = ZIP_STORED zi.date_time = oldzi.date_time zi.comment = oldzi.comment zi.extra = oldzi.extra zi.internal_attr = oldzi.internal_attr zi.external_attr = oldzi.external_attr zi.create_system = oldzi.create_system if any(ord(c) >= 128 for c in path) or any( ord(c) >= 128 for c in zi.comment): # If the file name or the comment contains any non-ASCII char, set the UTF8-flag zi.flag_bits |= 0x800 except: pass outf.writestr(zi, data) print("Watermark: Successfully removed cdp.info watermark") return output except: traceback.print_exc() return path_to_ebook
def _zip_info(environment, name): """ @type environment : C{str} @param environment : The environment name @type name : C{str} @param name : The name of the file @rtype: C{ZipInfo} @return: The Zip Info """ filename = "%s-%s" % (environment, name) info = ZipInfo(filename) info.date_time = time.localtime(time.time())[:6] #now info.external_attr = 0666 << 16L # read-write access to everyone info.compress_type = ZIP_DEFLATED return info
def _zip_info(environment, name): """ @type environment : C{str} @param environment : The environment name @type name : C{str} @param name : The name of the file @rtype: C{ZipInfo} @return: The Zip Info """ filename = "%s-%s" % (environment, name) info = ZipInfo(filename) info.date_time = time.localtime(time.time())[:6] #now info.external_attr = 0666 << 16L # read-write access to everyone info.compress_type = ZIP_DEFLATED return info
def _build_package(self, input_zip, output_zip): command = ['chalice', 'package', self.out_dir] log.info('Running %r', command) subprocess.run(command, cwd=self.layer_dir).check_returncode() log.info('Packaging %s', output_zip) with ZipFile(input_zip, 'r') as deployment_zip: with ZipFile(output_zip, 'w') as layer_zip: for src_zip_info in deployment_zip.infolist(): if src_zip_info.filename != 'app.py': # ZipFile doesn't copy permissions. Setting permissions # manually also requires setting other fields. dst_zip_info = ZipInfo(filename=str( Path('python') / src_zip_info.filename)) dst_zip_info.external_attr = src_zip_info.external_attr dst_zip_info.date_time = src_zip_info.date_time dst_zip_info.compress_type = src_zip_info.compress_type with deployment_zip.open(src_zip_info, 'r') as rf: with layer_zip.open(dst_zip_info, 'w') as wf: shutil.copyfileobj(rf, wf, length=1024 * 1024)
def compileToZip(self): """ Compile the exam as a .zip file """ Path(self.options.output).parent.mkdir(exist_ok=True, parents=True) f = ZipFile(self.options.output, 'w') for (dst, src) in self.files.items(): dst = ZipInfo(str(Path(dst).relative_to('.'))) dst.compress_type = zipfile.ZIP_DEFLATED dst.external_attr = 0o644<<16 dst.date_time = datetime.datetime.today().timetuple() if isinstance(src, Path): f.writestr(dst, src.read_bytes()) else: f.writestr(dst, src.read()) print("Exam created in %s" % os.path.relpath(self.options.output)) f.close()
def add_str(self, str_to_add, name, dt=datetime.now()): # type: (str,str,datetime) -> None """ Add a string to the archive as zip entry named 'name' :param str_to_add: string to add :param name: name of the zip.entry :param dt: datetime, optional if not specified, current date time is assumed :return: None """ # always use forward slash regardless of platform, this allows the calling # code to use os.path.join for names if os.pathsep in name: name = name.replace(os.pathsep, "/") info = ZipInfo() info.filename = self.uuid + "/" + name info.external_attr = 0o644 << 16 info.compress_type = ZIP_DEFLATED info.date_time = (dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second) self.zip.writestr(info, str_to_add)
def compileToZip(exam, files, options): def cleanpath(path): if path == '': return '' dirname, basename = os.path.split(path) dirname = cleanpath(dirname) if basename != '.': dirname = os.path.join(dirname, basename) return dirname f = ZipFile(options.output, 'w') for (dst, src) in files.items(): dst = ZipInfo(cleanpath(dst)) dst.external_attr = 0o644 << 16 dst.date_time = datetime.datetime.today().timetuple() if isinstance(src, basestring): f.writestr(dst, open(src, 'rb').read()) else: f.writestr(dst, src.read()) print("Exam created in %s" % os.path.relpath(options.output)) f.close()
def archive_path(self, path=None, report=None): here = os.getcwd() if path is None: path = here os.chdir(path) # calculate total files and dirs first total = 0 for root, dirs, files in os.walk('.', topdown=True): total += 1 total += len(files) # the tolal seems to be one greater total -= 1 count = 1 for root, dirs, files in os.walk('.', topdown=True): for filename in files: fullpath = os.path.join(root, filename) if report is not None: report(fullpath, count, total) self.write(fullpath) count += 1 # do the directories last for dirname in dirs: fullpath = os.path.join(root, dirname) # only add empty directories if not os.listdir(fullpath): fullpath = '%s/' % fullpath info = ZipInfo(fullpath) st = os.stat(fullpath) mtime = time.localtime(st.st_mtime) date_time = mtime[0:6] info.date_time = date_time self.writestr(info, '') if report is not None: report(fullpath, count, total) count += 1 os.chdir(here)
def get_zip_infos(self, *filenames): """Read in the table of contents for the ZIP file.""" fp = self.fp max_file_count = self.max_file_count if not fp: raise RuntimeError( "Attempt to read ZIP archive that was already closed") filenames = set(filenames) if len(filenames) == 0: return try: endrec = _EndRecData(fp) except OSError: raise BadZipFile("File is not a zip file") if not endrec: raise BadZipFile("File is not a zip file") size_cd = endrec[_ECD_SIZE] # bytes in central directory offset_cd = endrec[_ECD_OFFSET] # offset of central directory # "concat" is zero, unless zip was concatenated to another file concat = endrec[_ECD_LOCATION] - size_cd - offset_cd if endrec[_ECD_SIGNATURE] == stringEndArchive64: # If Zip64 extension structures are present, account for them concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) # start_dir: Position of start of central directory start_dir = offset_cd + concat fp.seek(start_dir, 0) data = fp.read(size_cd) fp = BytesIO(data) total = 0 file_count = 0 while total < size_cd: centdir = fp.read(sizeCentralDir) if len(centdir) != sizeCentralDir: raise BadZipFile("Truncated central directory") centdir = struct.unpack(structCentralDir, centdir) if centdir[_CD_SIGNATURE] != stringCentralDir: raise BadZipFile("Bad magic number for central directory") filename = fp.read(centdir[_CD_FILENAME_LENGTH]) flags = centdir[5] if flags & _UTF8_EXTENSION_FLAG: # UTF-8 file names extension filename = filename.decode('utf-8') else: # Historical ZIP filename encoding filename = filename.decode('cp437') # Create ZipInfo instance to store file information x = ZipInfo(filename) x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] (x.create_version, x.create_system, x.extract_version, x.reserved, x.flag_bits, x.compress_type, t, d, x.CRC, x.compress_size, x.file_size) = centdir[1:12] if x.extract_version > MAX_EXTRACT_VERSION: raise NotImplementedError("zip file version %.1f" % (x.extract_version / 10)) x.volume, x.internal_attr, x.external_attr = centdir[15:18] # Convert date/time code to (year, month, day, hour, min, sec) x._raw_time = t x.date_time = ((d >> 9) + 1980, (d >> 5) & 0xF, d & 0x1F, t >> 11, (t >> 5) & 0x3F, (t & 0x1F) * 2) x._decodeExtra() x.header_offset = x.header_offset + concat # update total bytes read from central directory total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] + centdir[_CD_EXTRA_FIELD_LENGTH] + centdir[_CD_COMMENT_LENGTH]) file_count += 1 if max_file_count is not None and file_count > max_file_count: raise TooManyFiles('Too many files in egg') if x.filename in filenames: filenames.discard(x.filename) yield x if len(filenames) == 0: return
def _RealGetContents(self): """Read in the table of contents for the ZIP file.""" try: endrec = _EndRecData(self.url) except IOError: raise BadZipfile("File is not a zip file") if not endrec: raise BadZipfile, "File is not a zip file" if self.debug > 1: print endrec size_cd = endrec[_ECD_SIZE] # bytes in central directory offset_cd = endrec[_ECD_OFFSET] # offset of central directory self.comment = endrec[_ECD_COMMENT] # archive comment # "concat" is zero, unless zip was concatenated to another file concat = endrec[_ECD_LOCATION] - size_cd - offset_cd # if endrec[_ECD_SIGNATURE] == stringEndArchive64: # # If Zip64 extension structures are present, account for them # concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) if self.debug > 2: inferred = concat + offset_cd print "given, inferred, offset", offset_cd, inferred, concat # self.start_dir: Position of start of central directory self.start_dir = offset_cd + concat ECD = _http_get_partial_data(self.url, self.start_dir, self.start_dir + size_cd - 1) data = ECD.read() ECD.close() fp = cStringIO.StringIO(data) total = 0 while total < size_cd: centdir = fp.read(sizeCentralDir) if centdir[0:4] != stringCentralDir: raise BadZipfile, "Bad magic number for central directory" centdir = struct.unpack(structCentralDir, centdir) if self.debug > 2: print centdir filename = fp.read(centdir[_CD_FILENAME_LENGTH]) # Create ZipInfo instance to store file information x = ZipInfo(filename) x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] ( x.create_version, x.create_system, x.extract_version, x.reserved, x.flag_bits, x.compress_type, t, d, x.CRC, x.compress_size, x.file_size, ) = centdir[1:12] x.volume, x.internal_attr, x.external_attr = centdir[15:18] # Convert date/time code to (year, month, day, hour, min, sec) x._raw_time = t x.date_time = ((d >> 9) + 1980, (d >> 5) & 0xF, d & 0x1F, t >> 11, (t >> 5) & 0x3F, (t & 0x1F) * 2) x._decodeExtra() x.header_offset = x.header_offset + concat x.filename = x._decodeFilename() self.filelist.append(x) self.NameToInfo[x.filename] = x # update total bytes read from central directory total = ( total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] + centdir[_CD_EXTRA_FIELD_LENGTH] + centdir[_CD_COMMENT_LENGTH] ) if self.debug > 2: print "total", total
def removeOPFwatermarks(object, path_to_ebook): contNS = lambda tag: '{%s}%s' % ( 'urn:oasis:names:tc:opendocument:xmlns:container', tag) opf_path = None try: inf = ZipFile(open(path_to_ebook, 'rb')) container = etree.fromstring(inf.read("META-INF/container.xml")) rootfiles = container.find(contNS("rootfiles")).findall( contNS("rootfile")) for rootfile in rootfiles: opf_path = rootfile.get("full-path", None) if (opf_path is not None): break except: traceback.print_exc() return path_to_ebook # If path is None, we didn't find an OPF, so we probably don't have a font key. # If path is set, it's the path to the main content OPF file. if (opf_path is None): # No OPF found - no watermark return path_to_ebook else: try: container_str = inf.read(opf_path).decode("utf-8") container_str_new = container_str had_amazon = False had_elibri = False # Remove Amazon hex watermarks # Match optional newline at the beginning, then spaces, then a "meta" tag with name = "Watermark" or "Watermark_(hex)" and a "content" element. # This regex also matches DuMont watermarks with meta name="watermark", with the case-insensitive match on the "w" in watermark. pre_remove = container_str_new container_str_new = re.sub( r'((\r\n|\r|\n)\s*)?\<meta\s+name=\"[Ww]atermark(_\(hex\))?\"\s+content=\"[0-9a-fA-F]+\"\s*\/>', '', container_str_new) container_str_new = re.sub( r'((\r\n|\r|\n)\s*)?\<meta\s+content=\"[0-9a-fA-F]+\"\s+name=\"[Ww]atermark(_\(hex\))?\"\s*\/>', '', container_str_new) if pre_remove != container_str_new: had_amazon = True # Remove elibri / lemonink watermark # Lemonink replaces all "id" fields in the opf with "idX_Y", with X being the watermark and Y being a number for that particular ID. # This regex replaces all "idX_Y" IDs with "id_Y", removing the watermark IDs. pre_remove = container_str_new container_str_new = re.sub( r'((\r\n|\r|\n)\s*)?\<\!\-\-\s*Wygenerowane przez elibri dla zamówienia numer [0-9a-fA-F]+\s*\-\-\>', '', container_str_new) if pre_remove != container_str_new: # To prevent this Regex from applying to books without that watermark, only do that if the watermark above was found. container_str_new = re.sub(r'\=\"id[0-9]+_([0-9]+)\"', r'="id_\1"', container_str_new) if pre_remove != container_str_new: had_elibri = True except: traceback.print_exc() return path_to_ebook if (container_str == container_str_new): # container didn't change - no watermark return path_to_ebook # Re-package without watermark namelist = inf.namelist() namelist.remove("mimetype") try: output = object.temporary_file(".epub").name kwds = dict(compression=ZIP_DEFLATED, allowZip64=False) with closing(ZipFile(open(output, 'wb'), 'w', **kwds)) as outf: for path in (["mimetype"] + namelist): data = inf.read(path) if path == opf_path: # Found OPF, replacing ... data = container_str_new zi = ZipInfo(path) oldzi = inf.getinfo(path) try: zi.compress_type = oldzi.compress_type if path == "mimetype": zi.compress_type = ZIP_STORED zi.date_time = oldzi.date_time zi.comment = oldzi.comment zi.extra = oldzi.extra zi.internal_attr = oldzi.internal_attr zi.external_attr = oldzi.external_attr zi.create_system = oldzi.create_system if any(ord(c) >= 128 for c in path) or any( ord(c) >= 128 for c in zi.comment): # If the file name or the comment contains any non-ASCII char, set the UTF8-flag zi.flag_bits |= 0x800 except: pass outf.writestr(zi, data) except: traceback.print_exc() return path_to_ebook if had_elibri: print( "Watermark: Successfully stripped eLibri watermark from OPF file." ) if had_amazon: print( "Watermark: Successfully stripped Amazon watermark from OPF file." ) return output
def decryptLCPbook(inpath, passphrases, parent_object): if not isLCPbook(inpath): raise LCPError("This is not an LCP-encrypted book") file = ZipFile(open(inpath, 'rb')) license = json.loads(file.read('META-INF/license.lcpl')) print("LCP: Found LCP-encrypted book {0}".format(license["id"])) user_info_string1 = returnUserInfoStringForLicense(license, None) if (user_info_string1 is not None): print("LCP: Account information: " + user_info_string1) # Check algorithm: if license["encryption"][ "profile"] == "http://readium.org/lcp/basic-profile": print("LCP: Book is using lcp/basic-profile encryption.") transform_algo = LCPTransform.secret_transform_basic elif license["encryption"][ "profile"] == "http://readium.org/lcp/profile-1.0": print("LCP: Book is using lcp/profile-1.0 encryption") transform_algo = LCPTransform.secret_transform_profile10 else: file.close() raise LCPError( "Book is using an unknown LCP encryption standard: {0}".format( license["encryption"]["profile"])) if ("algorithm" in license["encryption"]["content_key"] and license["encryption"]["content_key"]["algorithm"] != "http://www.w3.org/2001/04/xmlenc#aes256-cbc"): file.close() raise LCPError( "Book is using an unknown LCP encryption algorithm: {0}".format( license["encryption"]["content_key"]["algorithm"])) key_check = license["encryption"]["user_key"]["key_check"] encrypted_content_key = license["encryption"]["content_key"][ "encrypted_value"] # Prepare a list of encryption keys to test: password_hashes = [] # Some providers hard-code the passphrase in the LCPL file. That doesn't happen often, # but when it does, these files can be decrypted without knowing any passphrase. if "value" in license["encryption"]["user_key"]: try: password_hashes.append( binascii.hexlify( base64.decodebytes(license["encryption"]["user_key"] ["value"].encode())).decode("ascii")) except AttributeError: # Python 2 password_hashes.append( binascii.hexlify( base64.decodestring(license["encryption"]["user_key"] ["value"].encode())).decode("ascii")) if "hex_value" in license["encryption"]["user_key"]: password_hashes.append( binascii.hexlify( bytearray.fromhex(license["encryption"]["user_key"] ["hex_value"])).decode("ascii")) # Hash all the passwords provided by the user: for possible_passphrase in passphrases: algo = "http://www.w3.org/2001/04/xmlenc#sha256" if "algorithm" in license["encryption"]["user_key"]: algo = license["encryption"]["user_key"]["algorithm"] algo, tmp_pw = LCPTransform.userpass_to_hash( possible_passphrase.encode('utf-8'), algo) if tmp_pw is not None: password_hashes.append(tmp_pw) # For all the password hashes, check if one of them decrypts the book: correct_password_hash = None for possible_hash in password_hashes: transformed_hash = transform_algo(possible_hash) try: decrypted = None decrypted = dataDecryptLCP(key_check, transformed_hash) except: pass if (decrypted is not None and decrypted.decode( "ascii", errors="ignore") == license["id"]): # Found correct password hash, hooray! correct_password_hash = transformed_hash break # Print an error message if none of the passwords worked if (correct_password_hash is None): print( "LCP: Tried {0} passphrases, but none of them could decrypt the book ..." .format(len(password_hashes))) # Print password hint, if available if ("text_hint" in license["encryption"]["user_key"] and license["encryption"]["user_key"]["text_hint"] != ""): print( "LCP: The book distributor has given you the following passphrase hint: \"{0}\"" .format(license["encryption"]["user_key"]["text_hint"])) print( "LCP: Enter the correct passphrase in the DeDRM plugin settings, then try again." ) # Print password reset instructions, if available for link in license["links"]: if ("rel" in link and link["rel"] == "hint"): print( "LCP: You may be able to find or reset your LCP passphrase on the following webpage: {0}" .format(link["href"])) break file.close() raise LCPError("No correct passphrase found") print("LCP: Found correct passphrase, decrypting book ...") user_info_string2 = returnUserInfoStringForLicense(license, correct_password_hash) if (user_info_string2 is not None): if (user_info_string1 != user_info_string2): print("LCP: Account information: " + user_info_string2) # Take the key we found and decrypt the content key: decrypted_content_key = dataDecryptLCP(encrypted_content_key, correct_password_hash) if decrypted_content_key is None: raise LCPError("Decrypted content key is None") # Begin decrypting encryption = file.read('META-INF/encryption.xml') decryptor = Decryptor(decrypted_content_key, encryption) kwds = dict(compression=ZIP_DEFLATED, allowZip64=False) mimetype = file.read("mimetype").decode("latin-1") if mimetype == "application/pdf": # Check how many PDF files there are. # Usually, an LCP-protected PDF/ZIP is only supposed to contain one # PDF file, but if there are multiple, return a ZIP that contains them all. pdf_files = [] for filename in file.namelist(): if filename.endswith(".pdf"): pdf_files.append(filename) if len(pdf_files) == 0: file.close() raise LCPError( "Error: Book is an LCP-protected PDF, but doesn't contain any PDF files ..." ) elif len(pdf_files) == 1: # One PDF file found - extract and return that. pdfdata = file.read(pdf_files[0]) outputname = parent_object.temporary_file(".pdf").name print("LCP: Successfully decrypted, exporting to {0}".format( outputname)) with open(outputname, 'wb') as f: f.write(decryptor.decrypt(pdf_files[0], pdfdata)) file.close() return outputname else: # Multiple PDFs found outputname = parent_object.temporary_file(".zip").name with closing(ZipFile(open(outputname, 'wb'), 'w', **kwds)) as outfile: for path in pdf_files: data = file.read(path) outfile.writestr(path, decryptor.decrypt(path, data)) print( "LCP: Successfully decrypted a multi-PDF ZIP file, exporting to {0}" .format(outputname)) file.close() return outputname else: # Not a PDF -> EPUB if mimetype == "application/epub+zip": outputname = parent_object.temporary_file(".epub").name else: outputname = parent_object.temporary_file(".zip").name with closing(ZipFile(open(outputname, 'wb'), 'w', **kwds)) as outfile: # mimetype must be 1st file. Remove from list and manually add at the beginning namelist = file.namelist() namelist.remove("mimetype") namelist.remove("META-INF/license.lcpl") for path in (["mimetype"] + namelist): data = file.read(path) zi = ZipInfo(path) if path == "META-INF/encryption.xml": # Check if that's still needed if (decryptor.check_if_remaining()): data = decryptor.get_xml() print( "LCP: Adding encryption.xml for the remaining files." ) else: continue try: oldzi = file.getinfo(path) if path == "mimetype": zi.compress_type = ZIP_STORED else: zi.compress_type = ZIP_DEFLATED zi.date_time = oldzi.date_time zi.comment = oldzi.comment zi.extra = oldzi.extra zi.internal_attr = oldzi.internal_attr zi.external_attr = oldzi.external_attr zi.create_system = oldzi.create_system if any(ord(c) >= 128 for c in path) or any( ord(c) >= 128 for c in zi.comment): # If the file name or the comment contains any non-ASCII char, set the UTF8-flag zi.flag_bits |= 0x800 except: pass if path == "META-INF/encryption.xml": outfile.writestr(zi, data) else: outfile.writestr(zi, decryptor.decrypt(path, data)) print( "LCP: Successfully decrypted, exporting to {0}".format(outputname)) file.close() return outputname
def get_zip_infos(self, *filenames): """Read in the table of contents for the ZIP file.""" fp = self.fp max_file_count = self.max_file_count if not fp: raise RuntimeError( "Attempt to read ZIP archive that was already closed") filenames = set(filenames) if len(filenames) == 0: return try: endrec = _EndRecData(fp) except OSError: raise BadZipFile("File is not a zip file") if not endrec: raise BadZipFile("File is not a zip file") size_cd = endrec[_ECD_SIZE] # bytes in central directory offset_cd = endrec[_ECD_OFFSET] # offset of central directory # "concat" is zero, unless zip was concatenated to another file concat = endrec[_ECD_LOCATION] - size_cd - offset_cd if endrec[_ECD_SIGNATURE] == stringEndArchive64: # If Zip64 extension structures are present, account for them concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator) # start_dir: Position of start of central directory start_dir = offset_cd + concat fp.seek(start_dir, 0) data = fp.read(size_cd) fp = BytesIO(data) total = 0 file_count = 0 while total < size_cd: centdir = fp.read(sizeCentralDir) if len(centdir) != sizeCentralDir: raise BadZipFile("Truncated central directory") centdir = struct.unpack(structCentralDir, centdir) if centdir[_CD_SIGNATURE] != stringCentralDir: raise BadZipFile("Bad magic number for central directory") filename = fp.read(centdir[_CD_FILENAME_LENGTH]) flags = centdir[5] if flags & _UTF8_EXTENSION_FLAG: # UTF-8 file names extension filename = filename.decode('utf-8') else: # Historical ZIP filename encoding filename = filename.decode('cp437') # Create ZipInfo instance to store file information x = ZipInfo(filename) x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH]) x.comment = fp.read(centdir[_CD_COMMENT_LENGTH]) x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] (x.create_version, x.create_system, x.extract_version, x.reserved, x.flag_bits, x.compress_type, t, d, x.CRC, x.compress_size, x.file_size) = centdir[1:12] if x.extract_version > MAX_EXTRACT_VERSION: raise NotImplementedError("zip file version %.1f" % (x.extract_version / 10)) x.volume, x.internal_attr, x.external_attr = centdir[15:18] # Convert date/time code to (year, month, day, hour, min, sec) x._raw_time = t x.date_time = ((d >> 9) + 1980, (d >> 5) & 0xF, d & 0x1F, t >> 11, (t >> 5) & 0x3F, (t & 0x1F) * 2) x._decodeExtra() x.header_offset = x.header_offset + concat # update total bytes read from central directory total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH] + centdir[_CD_EXTRA_FIELD_LENGTH] + centdir[_CD_COMMENT_LENGTH]) file_count += 1 if max_file_count is not None and file_count > max_file_count: raise TooManyFiles('Too many files in egg') if x.filename in filenames: filenames.discard(x.filename) yield x if len(filenames) == 0: return
def removeHTMLwatermarks(object, path_to_ebook): try: inf = ZipFile(open(path_to_ebook, 'rb')) namelist = inf.namelist() modded_names = [] modded_contents = [] count_adept = 0 count_pocketbook = 0 count_lemonink_invisible = 0 count_lemonink_visible = 0 lemonink_trackingID = None for file in namelist: if not (file.endswith('.html') or file.endswith('.xhtml') or file.endswith('.xml')): continue try: file_str = inf.read(file).decode("utf-8") str_new = file_str # Remove Adobe ADEPT watermarks # Match optional newline at the beginning, then a "meta" tag with name = "Adept.expected.resource" or "Adept.resource" # and either a "value" or a "content" element with an Adobe UUID pre_remove = str_new str_new = re.sub( r'((\r\n|\r|\n)\s*)?\<meta\s+name=\"(Adept\.resource|Adept\.expected\.resource)\"\s+(content|value)=\"urn:uuid:[0-9a-fA-F\-]+\"\s*\/>', '', str_new) str_new = re.sub( r'((\r\n|\r|\n)\s*)?\<meta\s+(content|value)=\"urn:uuid:[0-9a-fA-F\-]+\"\s+name=\"(Adept\.resource|Adept\.expected\.resource)\"\s*\/>', '', str_new) if (str_new != pre_remove): count_adept += 1 # Remove Pocketbook watermarks pre_remove = str_new str_new = re.sub( r'\<div style\=\"padding\:0\;border\:0\;text\-indent\:0\;line\-height\:normal\;margin\:0 1cm 0.5cm 1cm\;[^\"]*opacity:0.0\;[^\"]*text\-decoration\:none\;[^\"]*background\:none\;[^\"]*\"\>(.*?)\<\/div\>', '', str_new) if (str_new != pre_remove): count_pocketbook += 1 # Remove eLibri / LemonInk watermark # Run this in a loop, as it is possible a file has been watermarked twice ... while True: pre_remove = str_new unique_id = re.search( r'<body[^>]+class="[^"]*(t0x[0-9a-fA-F]{25})[^"]*"[^>]*>', str_new) if (unique_id): lemonink_trackingID = unique_id.groups()[0] count_lemonink_invisible += 1 str_new = re.sub(lemonink_trackingID, '', str_new) pre_remove = str_new pm = r'(<body[^>]+class="[^"]*"[^>]*>)' pm += r'\<div style\=\'padding\:0\;border\:0\;text\-indent\:0\;line\-height\:normal\;margin\:0 1cm 0.5cm 1cm\;[^\']*text\-decoration\:none\;[^\']*background\:none\;[^\']*\'\>(.*?)</div>' pm += r'\<div style\=\'padding\:0\;border\:0\;text\-indent\:0\;line\-height\:normal\;margin\:0 1cm 0.5cm 1cm\;[^\']*text\-decoration\:none\;[^\']*background\:none\;[^\']*\'\>(.*?)</div>' str_new = re.sub(pm, r'\1', str_new) if (str_new != pre_remove): count_lemonink_visible += 1 else: break except: traceback.print_exc() continue if (file_str == str_new): continue modded_names.append(file) modded_contents.append(str_new) if len(modded_names) == 0: # No file modified, return original return path_to_ebook if len(modded_names) != len(modded_contents): # Something went terribly wrong, return original print("Watermark: Error during watermark removal") return path_to_ebook # Re-package with modified files: namelist.remove("mimetype") try: output = object.temporary_file(".epub").name kwds = dict(compression=ZIP_DEFLATED, allowZip64=False) with closing(ZipFile(open(output, 'wb'), 'w', **kwds)) as outf: for path in (["mimetype"] + namelist): data = inf.read(path) try: modded_index = None modded_index = modded_names.index(path) except: pass if modded_index is not None: # Found modified file - replace contents data = modded_contents[modded_index] zi = ZipInfo(path) oldzi = inf.getinfo(path) try: zi.compress_type = oldzi.compress_type if path == "mimetype": zi.compress_type = ZIP_STORED zi.date_time = oldzi.date_time zi.comment = oldzi.comment zi.extra = oldzi.extra zi.internal_attr = oldzi.internal_attr zi.external_attr = oldzi.external_attr zi.create_system = oldzi.create_system if any(ord(c) >= 128 for c in path) or any( ord(c) >= 128 for c in zi.comment): # If the file name or the comment contains any non-ASCII char, set the UTF8-flag zi.flag_bits |= 0x800 except: pass outf.writestr(zi, data) except: traceback.print_exc() return path_to_ebook if (count_adept > 0): print( "Watermark: Successfully stripped {0} ADEPT watermark(s) from ebook." .format(count_adept)) if (count_lemonink_invisible > 0 or count_lemonink_visible > 0): print( "Watermark: Successfully stripped {0} visible and {1} invisible LemonInk watermark(s) (\"{2}\") from ebook." .format(count_lemonink_visible, count_lemonink_invisible, lemonink_trackingID)) if (count_pocketbook > 0): print( "Watermark: Successfully stripped {0} Pocketbook watermark(s) from ebook." .format(count_pocketbook)) return output except: traceback.print_exc() return path_to_ebook
def decryptBook(userkey, inpath, outpath): with closing(ZipFile(open(inpath, 'rb'))) as inf: namelist = inf.namelist() if 'META-INF/rights.xml' not in namelist or \ 'META-INF/encryption.xml' not in namelist: print("{0:s} is DRM-free.".format(os.path.basename(inpath))) return 1 for name in META_NAMES: namelist.remove(name) try: rights = etree.fromstring(inf.read('META-INF/rights.xml')) adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag) expr = './/%s' % (adept('encryptedKey'), ) bookkeyelem = rights.find(expr) bookkey = bookkeyelem.text keytype = bookkeyelem.attrib.get('keyType', '0') if len(bookkey) >= 172 and int(keytype, 10) > 2: print("{0:s} is a secure Adobe Adept ePub with hardening.". format(os.path.basename(inpath))) elif len(bookkey) == 172: print("{0:s} is a secure Adobe Adept ePub.".format( os.path.basename(inpath))) elif len(bookkey) == 64: print("{0:s} is a secure Adobe PassHash (B&N) ePub.".format( os.path.basename(inpath))) else: print("{0:s} is not an Adobe-protected ePub!".format( os.path.basename(inpath))) return 1 if len(bookkey) != 64: # Normal or "hardened" Adobe ADEPT rsakey = RSA.import_key(userkey) # parses the ASN1 structure bookkey = base64.b64decode(bookkey) if int(keytype, 10) > 2: bookkey = removeHardening(rights, keytype, bookkey) try: bookkey = PKCS1_v1_5.new(rsakey).decrypt( bookkey, None) # automatically unpads except ValueError: bookkey = None if bookkey is None: print("Could not decrypt {0:s}. Wrong key".format( os.path.basename(inpath))) return 2 else: # Adobe PassHash / B&N key = base64.b64decode(userkey)[:16] bookkey = base64.b64decode(bookkey) bookkey = unpad( AES.new(key, AES.MODE_CBC, b'\x00' * 16).decrypt(bookkey), 16) # PKCS#7 if len(bookkey) > 16: bookkey = bookkey[-16:] encryption = inf.read('META-INF/encryption.xml') decryptor = Decryptor(bookkey, encryption) kwds = dict(compression=ZIP_DEFLATED, allowZip64=False) with closing(ZipFile(open(outpath, 'wb'), 'w', **kwds)) as outf: for path in (["mimetype"] + namelist): data = inf.read(path) zi = ZipInfo(path) zi.compress_type = ZIP_DEFLATED if path == "mimetype": zi.compress_type = ZIP_STORED elif path == "META-INF/encryption.xml": # Check if there's still something in there if (decryptor.check_if_remaining()): data = decryptor.get_xml() print( "Adding encryption.xml for the remaining embedded files." ) # We removed DRM, but there's still stuff like obfuscated fonts. else: continue try: # get the file info, including time-stamp oldzi = inf.getinfo(path) # copy across useful fields zi.date_time = oldzi.date_time zi.comment = oldzi.comment zi.extra = oldzi.extra zi.internal_attr = oldzi.internal_attr # external attributes are dependent on the create system, so copy both. zi.external_attr = oldzi.external_attr zi.create_system = oldzi.create_system if any(ord(c) >= 128 for c in path) or any( ord(c) >= 128 for c in zi.comment): # If the file name or the comment contains any non-ASCII char, set the UTF8-flag zi.flag_bits |= 0x800 except: pass if path == "META-INF/encryption.xml": outf.writestr(zi, data) else: outf.writestr(zi, decryptor.decrypt(path, data)) except: print("Could not decrypt {0:s} because of an exception:\n{1:s}". format(os.path.basename(inpath), traceback.format_exc())) return 2 return 0
def decryptBook(userkey, inpath, outpath): if AES is None: raise ADEPTError(u"PyCrypto or OpenSSL must be installed.") rsa = RSA(userkey) with closing(ZipFile(open(inpath, 'rb'))) as inf: namelist = set(inf.namelist()) if 'META-INF/rights.xml' not in namelist or \ 'META-INF/encryption.xml' not in namelist: print u"{0:s} is DRM-free.".format(os.path.basename(inpath)) return 1 for name in META_NAMES: namelist.remove(name) try: rights = etree.fromstring(inf.read('META-INF/rights.xml')) adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag) expr = './/%s' % (adept('encryptedKey'),) bookkey = ''.join(rights.findtext(expr)) if len(bookkey) != 172: print u"{0:s} is not a secure Adobe Adept ePub.".format(os.path.basename(inpath)) return 1 bookkey = rsa.decrypt(bookkey.decode('base64')) # Padded as per RSAES-PKCS1-v1_5 if bookkey[-17] != '\x00': print u"Could not decrypt {0:s}. Wrong key".format(os.path.basename(inpath)) return 2 encryption = inf.read('META-INF/encryption.xml') decryptor = Decryptor(bookkey[-16:], encryption) kwds = dict(compression=ZIP_DEFLATED, allowZip64=False) with closing(ZipFile(open(outpath, 'wb'), 'w', **kwds)) as outf: zi = ZipInfo('mimetype') zi.compress_type=ZIP_STORED try: # if the mimetype is present, get its info, including time-stamp oldzi = inf.getinfo('mimetype') # copy across fields to be preserved zi.date_time = oldzi.date_time zi.comment = oldzi.comment zi.extra = oldzi.extra zi.internal_attr = oldzi.internal_attr # external attributes are dependent on the create system, so copy both. zi.external_attr = oldzi.external_attr zi.create_system = oldzi.create_system except: pass outf.writestr(zi, inf.read('mimetype')) for path in namelist: data = inf.read(path) zi = ZipInfo(path) zi.compress_type=ZIP_DEFLATED try: # get the file info, including time-stamp oldzi = inf.getinfo(path) # copy across useful fields zi.date_time = oldzi.date_time zi.comment = oldzi.comment zi.extra = oldzi.extra zi.internal_attr = oldzi.internal_attr # external attributes are dependent on the create system, so copy both. zi.external_attr = oldzi.external_attr zi.create_system = oldzi.create_system except: pass outf.writestr(zi, decryptor.decrypt(path, data)) except: print u"Could not decrypt {0:s} because of an exception:\n{1:s}".format(os.path.basename(inpath), traceback.format_exc()) return 2 return 0
def _clean_zipinfo(zipinfo: zipfile.ZipInfo) -> zipfile.ZipInfo: zipinfo.create_system = 3 # Linux zipinfo.comment = b'' zipinfo.date_time = (1980, 1, 1, 0, 0, 0) # this is as early as a zipfile can be return zipinfo
def decryptBook(keyb64, inpath, outpath): if AES is None: raise IGNOBLEError(u"PyCrypto or OpenSSL must be installed.") key = keyb64.decode("base64")[:16] aes = AES(key) with closing(ZipFile(open(inpath, "rb"))) as inf: namelist = set(inf.namelist()) if "META-INF/rights.xml" not in namelist or "META-INF/encryption.xml" not in namelist: print u"{0:s} is DRM-free.".format(os.path.basename(inpath)) return 1 for name in META_NAMES: namelist.remove(name) try: rights = etree.fromstring(inf.read("META-INF/rights.xml")) adept = lambda tag: "{%s}%s" % (NSMAP["adept"], tag) expr = ".//%s" % (adept("encryptedKey"),) bookkey = "".join(rights.findtext(expr)) if len(bookkey) != 64: print u"{0:s} is not a secure Barnes & Noble ePub.".format(os.path.basename(inpath)) return 1 bookkey = aes.decrypt(bookkey.decode("base64")) bookkey = bookkey[: -ord(bookkey[-1])] encryption = inf.read("META-INF/encryption.xml") decryptor = Decryptor(bookkey[-16:], encryption) kwds = dict(compression=ZIP_DEFLATED, allowZip64=False) with closing(ZipFile(open(outpath, "wb"), "w", **kwds)) as outf: zi = ZipInfo("mimetype") zi.compress_type = ZIP_STORED try: # if the mimetype is present, get its info, including time-stamp oldzi = inf.getinfo("mimetype") # copy across fields to be preserved zi.date_time = oldzi.date_time zi.comment = oldzi.comment zi.extra = oldzi.extra zi.internal_attr = oldzi.internal_attr # external attributes are dependent on the create system, so copy both. zi.external_attr = oldzi.external_attr zi.create_system = oldzi.create_system except: pass outf.writestr(zi, inf.read("mimetype")) for path in namelist: data = inf.read(path) zi = ZipInfo(path) zi.compress_type = ZIP_DEFLATED try: # get the file info, including time-stamp oldzi = inf.getinfo(path) # copy across useful fields zi.date_time = oldzi.date_time zi.comment = oldzi.comment zi.extra = oldzi.extra zi.internal_attr = oldzi.internal_attr # external attributes are dependent on the create system, so copy both. zi.external_attr = oldzi.external_attr zi.create_system = oldzi.create_system except: pass outf.writestr(zi, decryptor.decrypt(path, data)) except: print u"Could not decrypt {0:s} because of an exception:\n{1:s}".format( os.path.basename(inpath), traceback.format_exc() ) return 2 return 0
def decryptFontsBook(inpath, outpath): with closing(ZipFile(open(inpath, 'rb'))) as inf: namelist = inf.namelist() if 'META-INF/encryption.xml' not in namelist: return 1 # Font key handling: font_master_key = None adobe_master_encryption_key = None contNS = lambda tag: '{%s}%s' % ( 'urn:oasis:names:tc:opendocument:xmlns:container', tag) path = None try: container = etree.fromstring(inf.read("META-INF/container.xml")) rootfiles = container.find(contNS("rootfiles")).findall( contNS("rootfile")) for rootfile in rootfiles: path = rootfile.get("full-path", None) if (path is not None): break except: pass # If path is None, we didn't find an OPF, so we probably don't have a font key. # If path is set, it's the path to the main content OPF file. if (path is None): print("FontDecrypt: No OPF for font obfuscation found") return 1 else: packageNS = lambda tag: '{%s}%s' % ('http://www.idpf.org/2007/opf', tag) metadataDCNS = lambda tag: '{%s}%s' % ( 'http://purl.org/dc/elements/1.1/', tag) try: container = etree.fromstring(inf.read(path)) except: container = [] ## IETF font key algorithm: print( "FontDecrypt: Checking {0} for IETF font obfuscation keys ... " .format(path), end='') secret_key_name = None try: secret_key_name = container.get("unique-identifier") except: pass try: identify_element = container.find(packageNS("metadata")).find( metadataDCNS("identifier")) if (secret_key_name is None or secret_key_name == identify_element.get("id")): font_master_key = identify_element.text except: pass if (font_master_key is not None): if (secret_key_name is None): print("found '%s'" % (font_master_key)) else: print("found '%s' (%s)" % (font_master_key, secret_key_name)) # Trim / remove forbidden characters from the key, then hash it: font_master_key = font_master_key.replace(' ', '') font_master_key = font_master_key.replace('\t', '') font_master_key = font_master_key.replace('\r', '') font_master_key = font_master_key.replace('\n', '') font_master_key = font_master_key.encode('utf-8') font_master_key = hashlib.sha1(font_master_key).digest() else: print("not found") ## Adobe font key algorithm print( "FontDecrypt: Checking {0} for Adobe font obfuscation keys ... " .format(path), end='') try: metadata = container.find(packageNS("metadata")) identifiers = metadata.findall(metadataDCNS("identifier")) uid = None uidMalformed = False for identifier in identifiers: if identifier.get(packageNS("scheme")) == "UUID": if identifier.text[:9] == "urn:uuid:": uid = identifier.text[9:] else: uid = identifier.text break if identifier.text[:9] == "urn:uuid:": uid = identifier.text[9:] break if uid is not None: uid = uid.replace(chr(0x20), '').replace(chr(0x09), '') uid = uid.replace(chr(0x0D), '').replace(chr(0x0A), '').replace('-', '') if len(uid) < 16: uidMalformed = True if not all(c in "0123456789abcdefABCDEF" for c in uid): uidMalformed = True if not uidMalformed: print("found '{0}'".format(uid)) uid = uid + uid adobe_master_encryption_key = binascii.unhexlify( uid[:32]) if adobe_master_encryption_key is None: print("not found") except: print("exception") pass # Begin decrypting. try: encryption = inf.read('META-INF/encryption.xml') decryptor = Decryptor(font_master_key, adobe_master_encryption_key, encryption) kwds = dict(compression=ZIP_DEFLATED, allowZip64=False) with closing(ZipFile(open(outpath, 'wb'), 'w', **kwds)) as outf: # Mimetype needs to be the first entry, so remove it from the list # whereever it is, then add it at the beginning. namelist.remove("mimetype") for path in (["mimetype"] + namelist): data = inf.read(path) zi = ZipInfo(path) zi.compress_type = ZIP_DEFLATED if path == "mimetype": # mimetype must not be compressed zi.compress_type = ZIP_STORED elif path == "META-INF/encryption.xml": # Check if there's still other entries not related to fonts if (decryptor.check_if_remaining()): data = decryptor.get_xml() print( "FontDecrypt: There's remaining entries in encryption.xml, adding file ..." ) else: # No remaining entries, no need for that file. continue try: # get the file info, including time-stamp oldzi = inf.getinfo(path) # copy across useful fields zi.date_time = oldzi.date_time zi.comment = oldzi.comment zi.extra = oldzi.extra zi.internal_attr = oldzi.internal_attr # external attributes are dependent on the create system, so copy both. zi.external_attr = oldzi.external_attr zi.create_system = oldzi.create_system if any(ord(c) >= 128 for c in path) or any( ord(c) >= 128 for c in zi.comment): # If the file name or the comment contains any non-ASCII char, set the UTF8-flag zi.flag_bits |= 0x800 except: pass if path == "mimetype": outf.writestr(zi, inf.read('mimetype')) elif path == "META-INF/encryption.xml": outf.writestr(zi, data) else: outf.writestr(zi, decryptor.decrypt(path, data)) except: print( "FontDecrypt: Could not decrypt fonts in {0:s} because of an exception:\n{1:s}" .format(os.path.basename(inpath), traceback.format_exc())) traceback.print_exc() return 2 return 0