def rebuild(src_dir, dest_path): opf = glob.glob(os.path.join(src_dir, '*.opf')) if not opf: raise ValueError('No OPF file found in %s'%src_dir) opf = opf[0] # For debugging, uncomment the following line # def fork_job(a, b, args=None, no_output=True): do_rebuild(*args) fork_job('calibre.ebooks.mobi.tweak', 'do_rebuild', args=(opf, dest_path), no_output=True)
def rebuild(src_dir, dest_path): opf = glob.glob(os.path.join(src_dir, '*.opf')) if not opf: raise ValueError('No OPF file found in %s'%src_dir) opf = opf[0] # For debugging, uncomment the following two lines # def fork_job(a, b, args=None, no_output=True): # do_rebuild(*args) fork_job('calibre.ebooks.mobi.tweak', 'do_rebuild', args=(opf, dest_path), no_output=True)
def do_convert(path, temp_path, key, instance): tdir = os.path.join(temp_path, instance['path']) fork_job('calibre.srv.render_book', 'render_for_viewer', args=( path, tdir, {'size': instance['file_size'], 'mtime': instance['file_mtime'], 'hash': key}, ), timeout=3000, no_output=True ) size = 0 for f in walk(tdir): size += os.path.getsize(f) instance['cache_size'] = size
def run(self): try: if DEBUG_DIALOG: self.results = self.sample_results() else: res = fork_job( "calibre.ebooks.metadata.sources.worker", "single_identify", (self.title, self.authors, self.identifiers), no_output=True, abort=self.abort, ) self.results, covers, caches, log_dump = res["result"] self.results = [ OPF(BytesIO(r), basedir=os.getcwdu(), populate_spine=False).to_book_metadata() for r in self.results ] for r, cov in zip(self.results, covers): r.has_cached_cover_url = cov self.caches.update(caches) self.log.load(log_dump) for i, result in enumerate(self.results): result.gui_rank = i except WorkerError as e: self.error = force_unicode(e.orig_tb) except: import traceback self.error = force_unicode(traceback.format_exc())
def explode(path, dest, question=lambda x:True): with open(path, 'rb') as stream: raw = stream.read(3) stream.seek(0) if raw == b'TPZ': raise BadFormat(_('This is not a MOBI file. It is a Topaz file.')) try: header = MetadataHeader(stream, default_log) except MobiError: raise BadFormat(_('This is not a MOBI file.')) if header.encryption_type != 0: raise DRMError(_('This file is locked with DRM. It cannot be tweaked.')) kf8_type = header.kf8_type if kf8_type is None: raise BadFormat(_('This MOBI file does not contain a KF8 format ' 'book. KF8 is the new format from Amazon. calibre can ' 'only tweak MOBI files that contain KF8 books. Older ' 'MOBI files without KF8 are not tweakable.')) if kf8_type == 'joint': if not question(_('This MOBI file contains both KF8 and ' 'older Mobi6 data. Tweaking it will remove the Mobi6 data, which ' 'means the file will not be usable on older Kindles. Are you ' 'sure?')): return None return fork_job('calibre.ebooks.mobi.tweak', 'do_explode', args=(path, dest), no_output=True)['result']
def set_metadata(stream, mi): with TemporaryDirectory('_podofo_set_metadata') as tdir: with open(os.path.join(tdir, 'input.pdf'), 'wb') as f: shutil.copyfileobj(stream, f) from calibre.ebooks.metadata.xmp import metadata_to_xmp_packet xmp_packet = metadata_to_xmp_packet(mi) try: result = fork_job('calibre.utils.podofo', 'set_metadata_', (tdir, mi.title, mi.authors, mi.book_producer, mi.tags, xmp_packet)) touched = result['result'] except WorkerError as e: raise Exception('Failed to set PDF metadata in (%s): %s' % (mi.title, e.orig_tb)) if touched: with open(os.path.join(tdir, 'output.pdf'), 'rb') as f: f.seek(0, 2) if f.tell() > 100: f.seek(0) stream.seek(0) stream.truncate() shutil.copyfileobj(f, stream) stream.flush() stream.seek(0)
def open_book(self, pathtoebook): with TemporaryFile('_prepprocess_gui') as tf: err_msg = _('Failed to generate markup for testing. Click ' '"Show Details" to learn more.') try: fork_job('calibre.ebooks.oeb.iterator', 'get_preprocess_html', (pathtoebook, tf)) except WorkerError as e: return error_dialog(self, _('Failed to generate preview'), err_msg, det_msg=e.orig_tb, show=True) except: import traceback return error_dialog(self, _('Failed to generate preview'), err_msg, det_msg=traceback.format_exc(), show=True) with open(tf, 'rb') as f: self.preview.setPlainText(f.read().decode('utf-8'))
def open_book(self, pathtoebook): with TemporaryFile('_prepprocess_gui') as tf: err_msg = _('Failed to generate markup for testing. Click ' '"Show details" to learn more.') try: fork_job('calibre.ebooks.oeb.iterator', 'get_preprocess_html', (pathtoebook, tf)) except WorkerError as e: return error_dialog(self, _('Failed to generate preview'), err_msg, det_msg=e.orig_tb, show=True) except: import traceback return error_dialog(self, _('Failed to generate preview'), err_msg, det_msg=traceback.format_exc(), show=True) with open(tf, 'rb') as f: self.preview.setPlainText(f.read().decode('utf-8'))
def get_djvu_metadata(stream, cover=True): with TemporaryDirectory('_djvu_metadata_read') as djvupath: stream.seek(0) with open(os.path.join(djvupath, 'src.djvu'), 'wb') as f: shutil.copyfileobj(stream, f) try: res = fork_job('calibre_plugins.djvu_metadata.djvu', 'get_djvu_metadata_worker', (djvupath, bool(cover))) except WorkerError as e: prints(e.orig_tb) raise RuntimeError('Failed to run djvused') info = res['result'] with open(res['stdout_stderr'], 'rb') as f: raw = f.read().strip() if raw: prints(raw) if info is None: raise ValueError('Could not read metadata from djvu') covpath = os.path.join(djvupath, 'cover.jpg') cdata = None if cover and os.path.exists(covpath): with open(covpath, 'rb') as f: cdata = f.read() title = info.get('Title', None) au = info.get('Author', None) if au is None: au = [_('Unknown')] else: au = string_to_authors(au) mi = MetaInformation(title, au) if cdata: mi.cover_data = ('jpg', cdata) return mi
def render_html_svg_workaround(path_to_html, log, width=590, height=750): from calibre.ebooks.oeb.base import SVG_NS raw = open(path_to_html, 'rb').read() data = None if SVG_NS in raw: try: data = extract_cover_from_embedded_svg(raw, os.path.dirname(path_to_html), log) except: pass if data is None: try: data = extract_calibre_cover(raw, os.path.dirname(path_to_html), log) except: pass if data is None: from calibre.gui2 import is_ok_to_use_qt if is_ok_to_use_qt(): data = render_html_data(path_to_html, width, height) else: from calibre.utils.ipc.simple_worker import fork_job, WorkerError try: result = fork_job('calibre.ebooks', 'render_html_data', (path_to_html, width, height), no_output=True) data = result['result'] except WorkerError as err: prints(err.orig_tb) except: traceback.print_exc() return data
def test_exclusive_file(path=None): if path is None: import tempfile f = os.path.join(tempfile.gettempdir(), 'test-exclusive-file') with ExclusiveFile(f): # Try same process lock try: with ExclusiveFile(f, timeout=1): raise LockError( "ExclusiveFile failed to prevent multiple uses in the same process!" ) except LockError: pass # Try different process lock from calibre.utils.ipc.simple_worker import fork_job err = fork_job('calibre.utils.lock', 'test_exclusive_file', (f, ))['result'] if err is not None: raise LockError('ExclusiveFile failed with error: %s' % err) else: try: with ExclusiveFile(path, timeout=1): raise Exception( 'ExclusiveFile failed to prevent multiple uses in different processes!' ) except LockError: pass except Exception as err: return str(err)
def run(self): try: if DEBUG_DIALOG: self.results = self.sample_results() else: res = fork_job('calibre.ebooks.metadata.sources.worker', 'single_identify', (self.title, self.authors, self.identifiers), no_output=True, abort=self.abort) self.results, covers, caches, log_dump = res['result'] self.results = [ OPF(BytesIO(r), basedir=os.getcwdu(), populate_spine=False).to_book_metadata() for r in self.results ] for r, cov in zip(self.results, covers): r.has_cached_cover_url = cov self.caches.update(caches) self.log.load(log_dump) for i, result in enumerate(self.results): result.gui_rank = i except WorkerError as e: self.error = force_unicode(e.orig_tb) except: import traceback self.error = force_unicode(traceback.format_exc())
def render_html_data(path_to_html, width, height): from calibre.ptempfile import TemporaryDirectory from calibre.utils.ipc.simple_worker import fork_job, WorkerError result = {} def report_error(text=''): prints('Failed to render', path_to_html, 'with errors:', file=sys.stderr) if text: prints(text, file=sys.stderr) if result and result['stdout_stderr']: with open(result['stdout_stderr'], 'rb') as f: prints(f.read(), file=sys.stderr) with TemporaryDirectory('-render-html') as tdir: try: result = fork_job('calibre.ebooks.render_html', 'main', args=(path_to_html, tdir, 'jpeg')) except WorkerError as e: report_error(e.orig_tb) else: if result['result']: with open(os.path.join(tdir, 'rendered.jpeg'), 'rb') as f: return f.read() else: report_error()
def __init__(self, pathtoazw3, log, clone_data=None, tdir=None): if clone_data is not None: super(AZW3Container, self).__init__(None, None, log, clone_data=clone_data) for x in ('pathtoazw3', 'obfuscated_fonts'): setattr(self, x, clone_data[x]) return self.pathtoazw3 = pathtoazw3 if tdir is None: tdir = PersistentTemporaryDirectory('_azw3_container') tdir = os.path.abspath(os.path.realpath(tdir)) self.root = tdir with open(pathtoazw3, 'rb') as stream: raw = stream.read(3) if raw == b'TPZ': raise InvalidMobi( _('This is not a MOBI file. It is a Topaz file.')) try: header = MetadataHeader(stream, default_log) except MobiError: raise InvalidMobi(_('This is not a MOBI file.')) if header.encryption_type != 0: raise DRMError() kf8_type = header.kf8_type if kf8_type is None: raise InvalidMobi( _('This MOBI file does not contain a KF8 format ' 'book. KF8 is the new format from Amazon. calibre can ' 'only edit MOBI files that contain KF8 books. Older ' 'MOBI files without KF8 are not editable.')) if kf8_type == 'joint': raise InvalidMobi( _('This MOBI file contains both KF8 and ' 'older Mobi6 data. calibre can only edit MOBI files ' 'that contain only KF8 data.')) try: opf_path, obfuscated_fonts = fork_job( 'calibre.ebooks.oeb.polish.container', 'do_explode', args=(pathtoazw3, tdir), no_output=True)['result'] except WorkerError as e: log(e.orig_tb) raise InvalidMobi('Failed to explode MOBI') super(AZW3Container, self).__init__(tdir, opf_path, log) self.obfuscated_fonts = { x.replace(os.sep, '/') for x in obfuscated_fonts }
def run_extract_book(*args, **kwargs): from calibre.utils.ipc.simple_worker import fork_job ans = fork_job('calibre.ebooks.oeb.iterator.book', 'extract_book', args=args, kwargs=kwargs, timeout=3000, no_output=True) return ans['result']
def __init__(self, pathtoazw3, log, clone_data=None, tdir=None): if clone_data is not None: super(AZW3Container, self).__init__(None, None, log, clone_data=clone_data) for x in ("pathtoazw3", "obfuscated_fonts"): setattr(self, x, clone_data[x]) return self.pathtoazw3 = pathtoazw3 if tdir is None: tdir = PersistentTemporaryDirectory("_azw3_container") tdir = os.path.abspath(os.path.realpath(tdir)) self.root = tdir with open(pathtoazw3, "rb") as stream: raw = stream.read(3) if raw == b"TPZ": raise InvalidMobi(_("This is not a MOBI file. It is a Topaz file.")) try: header = MetadataHeader(stream, default_log) except MobiError: raise InvalidMobi(_("This is not a MOBI file.")) if header.encryption_type != 0: raise DRMError() kf8_type = header.kf8_type if kf8_type is None: raise InvalidMobi( _( "This MOBI file does not contain a KF8 format " "book. KF8 is the new format from Amazon. calibre can " "only edit MOBI files that contain KF8 books. Older " "MOBI files without KF8 are not editable." ) ) if kf8_type == "joint": raise InvalidMobi( _( "This MOBI file contains both KF8 and " "older Mobi6 data. calibre can only edit MOBI files " "that contain only KF8 data." ) ) try: opf_path, obfuscated_fonts = fork_job( "calibre.ebooks.oeb.polish.container", "do_explode", args=(pathtoazw3, tdir), no_output=True )["result"] except WorkerError as e: log(e.orig_tb) raise InvalidMobi("Failed to explode MOBI") super(AZW3Container, self).__init__(tdir, opf_path, log) self.obfuscated_fonts = {x.replace(os.sep, "/") for x in obfuscated_fonts}
def get_metadata(stream, cover=True): with TemporaryDirectory('_pdf_metadata_read') as pdfpath: stream.seek(0) with open(os.path.join(pdfpath, 'src.pdf'), 'wb') as f: shutil.copyfileobj(stream, f) try: res = fork_job('calibre.ebooks.metadata.pdf', 'read_info', (pdfpath, bool(cover))) except WorkerError as e: prints(e.orig_tb) raise RuntimeError('Failed to run pdfinfo') info = res['result'] with open(res['stdout_stderr'], 'rb') as f: raw = f.read().strip() if raw: prints(raw) if not info: raise ValueError('Could not read info dict from PDF') covpath = os.path.join(pdfpath, 'cover.jpg') cdata = None if cover and os.path.exists(covpath): with open(covpath, 'rb') as f: cdata = f.read() title = info.get('Title', None) au = info.get('Author', None) if au is None: au = [_('Unknown')] else: au = string_to_authors(au) mi = MetaInformation(title, au) # if isbn is not None: # mi.isbn = isbn creator = info.get('Creator', None) if creator: mi.book_producer = creator keywords = info.get('Keywords', None) mi.tags = [] if keywords: mi.tags = [x.strip() for x in keywords.split(',')] isbn = [check_isbn(x) for x in mi.tags if check_isbn(x)] if isbn: mi.isbn = isbn = isbn[0] mi.tags = [x for x in mi.tags if check_isbn(x) != isbn] subject = info.get('Subject', None) if subject: mi.tags.insert(0, subject) if cdata: mi.cover_data = ('jpeg', cdata) return mi
def get_metadata(stream, cover=True): with TemporaryDirectory("_pdf_metadata_read") as pdfpath: stream.seek(0) with open(os.path.join(pdfpath, "src.pdf"), "wb") as f: shutil.copyfileobj(stream, f) try: res = fork_job("calibre.ebooks.metadata.pdf", "read_info", (pdfpath, bool(cover))) except WorkerError as e: prints(e.orig_tb) raise RuntimeError("Failed to run pdfinfo") info = res["result"] with open(res["stdout_stderr"], "rb") as f: raw = f.read().strip() if raw: prints(raw) if not info: raise ValueError("Could not read info dict from PDF") covpath = os.path.join(pdfpath, "cover.jpg") cdata = None if cover and os.path.exists(covpath): with open(covpath, "rb") as f: cdata = f.read() title = info.get("Title", None) au = info.get("Author", None) if au is None: au = [_("Unknown")] else: au = string_to_authors(au) mi = MetaInformation(title, au) # if isbn is not None: # mi.isbn = isbn creator = info.get("Creator", None) if creator: mi.book_producer = creator keywords = info.get("Keywords", None) mi.tags = [] if keywords: mi.tags = [x.strip() for x in keywords.split(",")] isbn = [check_isbn(x) for x in mi.tags if check_isbn(x)] if isbn: mi.isbn = isbn = isbn[0] mi.tags = [x for x in mi.tags if check_isbn(x) != isbn] subject = info.get("Subject", None) if subject: mi.tags.insert(0, subject) if cdata: mi.cover_data = ("jpeg", cdata) return mi
def get_image_urls(self, title, author, log, abort, timeout): from calibre.utils.ipc.simple_worker import fork_job, WorkerError try: return fork_job('calibre.ebooks.metadata.sources.google_images', 'search', args=(title, author, self.prefs['size'], timeout), no_output=True, abort=abort, timeout=timeout)['result'] except WorkerError as e: if e.orig_tb: log.error(e.orig_tb) log.exception('Searching google failed:' + as_unicode(e)) except Exception as e: log.exception('Searching google failed:' + as_unicode(e)) return []
def get_metadata(stream, cover=True): with TemporaryDirectory('_pdf_metadata_read') as pdfpath: stream.seek(0) with open(os.path.join(pdfpath, 'src.pdf'), 'wb') as f: shutil.copyfileobj(stream, f) try: res = fork_job('calibre.ebooks.metadata.pdf', 'read_info', (pdfpath, bool(cover))) except WorkerError as e: prints(e.orig_tb) raise RuntimeError('Failed to run pdfinfo') info = res['result'] with open(res['stdout_stderr'], 'rb') as f: raw = f.read().strip() if raw: prints(raw) if not info: raise ValueError('Could not read info dict from PDF') covpath = os.path.join(pdfpath, 'cover.jpg') cdata = None if cover and os.path.exists(covpath): with open(covpath, 'rb') as f: cdata = f.read() title = info.get('Title', None) au = info.get('Author', None) if au is None: au = [_('Unknown')] else: au = string_to_authors(au) mi = MetaInformation(title, au) #if isbn is not None: # mi.isbn = isbn creator = info.get('Creator', None) if creator: mi.book_producer = creator keywords = info.get('Keywords', None) mi.tags = [] if keywords: mi.tags = [x.strip() for x in keywords.split(',')] subject = info.get('Subject', None) if subject: mi.tags.insert(0, subject) if cdata: mi.cover_data = ('jpeg', cdata) return mi
def __init__(self, pathtoazw3, log, clone_data=None, tdir=None): if clone_data is not None: super(AZW3Container, self).__init__(None, None, log, clone_data=clone_data) for x in ('pathtoazw3', 'obfuscated_fonts'): setattr(self, x, clone_data[x]) return self.pathtoazw3 = pathtoazw3 if tdir is None: tdir = PersistentTemporaryDirectory('_azw3_container') tdir = os.path.abspath(os.path.realpath(tdir)) self.root = tdir with open(pathtoazw3, 'rb') as stream: raw = stream.read(3) if raw == b'TPZ': raise InvalidMobi(_('This is not a MOBI file. It is a Topaz file.')) try: header = MetadataHeader(stream, default_log) except MobiError: raise InvalidMobi(_('This is not a MOBI file.')) if header.encryption_type != 0: raise DRMError() kf8_type = header.kf8_type if kf8_type is None: raise InvalidMobi(_('This MOBI file does not contain a KF8 format ' 'book. KF8 is the new format from Amazon. calibre can ' 'only edit MOBI files that contain KF8 books. Older ' 'MOBI files without KF8 are not editable.')) if kf8_type == 'joint': raise InvalidMobi(_('This MOBI file contains both KF8 and ' 'older Mobi6 data. calibre can only edit MOBI files ' 'that contain only KF8 data.')) try: opf_path, obfuscated_fonts = fork_job( 'calibre.ebooks.oeb.polish.container', 'do_explode', args=(pathtoazw3, tdir), no_output=True)['result'] except WorkerError as e: log(e.orig_tb) raise InvalidMobi('Failed to explode MOBI') super(AZW3Container, self).__init__(tdir, opf_path, log) self.obfuscated_fonts = {x.replace(os.sep, '/') for x in obfuscated_fonts}
def search(self, query, max_results=10, timeout=60): url = 'http://woblink.com/katalog-ebooki?query=' + urllib.quote_plus(query.encode('utf-8')) if max_results > 10: if max_results > 20: url += '&limit=30' else: url += '&limit=20' counter = max_results try: results = fork_job(js_browser,'get_results', (url, timeout,), module_is_source_code=True) except WorkerError as e: raise Exception('Could not get results: %s'%e.orig_tb) doc = html.fromstring(strip_encoding_declarations(results['result'])) for data in doc.xpath('//div[@class="nw_katalog_lista_ksiazka"]'): if counter <= 0: break id = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/@href')) if not id: continue cover_url = ''.join(data.xpath('.//div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/img/@src')) title = ''.join(data.xpath('.//h2[@class="nw_katalog_lista_ksiazka_detale_tytul"]/a[1]/text()')) author = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_autor"]/a/text()')) price = ''.join(data.xpath('.//div[@class="nw_opcjezakupu_cena"]/text()')) formats = ', '.join(data.xpath('.//p[@class="nw_katalog_lista_ksiazka_detale_format"]/span/text()')) s = SearchResult() s.cover_url = 'http://woblink.com' + cover_url s.title = title.strip() s.author = author.strip() s.price = price + ' zł' s.detail_item = id.strip() s.formats = formats if 'DRM' in formats: s.drm = SearchResult.DRM_LOCKED counter -= 1 yield s else: s.drm = SearchResult.DRM_UNLOCKED counter -= 1 yield s
def run_fork(self): with TemporaryDirectory('_single_metadata_download') as tdir: self.keep_going = True t = Thread(target=self.monitor_tdir, args=(tdir,)) t.daemon = True t.start() try: res = fork_job('calibre.ebooks.metadata.sources.worker', 'single_covers', (self.title, self.authors, self.identifiers, self.caches, tdir), no_output=True, abort=self.abort) self.log.append_dump(res['result']) finally: self.keep_going = False t.join()
def set_metadata(stream, mi): with TemporaryDirectory(u'_podofo_set_metadata') as tdir: with open(os.path.join(tdir, u'input.pdf'), 'wb') as f: shutil.copyfileobj(stream, f) try: touched = fork_job('calibre.utils.podofo', 'set_metadata_', (tdir, mi.title, mi.authors, mi.book_producer, mi.tags)) except WorkerError as e: raise Exception('Failed to set PDF metadata: %s'%e.orig_tb) if touched: with open(os.path.join(tdir, u'output.pdf'), 'rb') as f: f.seek(0, 2) if f.tell() > 100: f.seek(0) stream.seek(0) stream.truncate() shutil.copyfileobj(f, stream) stream.flush() stream.seek(0)
def set_metadata(stream, mi): with TemporaryDirectory(u'_podofo_set_metadata') as tdir: with open(os.path.join(tdir, u'input.pdf'), 'wb') as f: shutil.copyfileobj(stream, f) try: touched = fork_job( 'calibre.utils.podofo', 'set_metadata_', (tdir, mi.title, mi.authors, mi.book_producer, mi.tags)) except WorkerError as e: raise Exception('Failed to set PDF metadata: %s' % e.orig_tb) if touched: with open(os.path.join(tdir, u'output.pdf'), 'rb') as f: f.seek(0, 2) if f.tell() > 100: f.seek(0) stream.seek(0) stream.truncate() shutil.copyfileobj(f, stream) stream.flush() stream.seek(0)
def set_metadata(stream, mi): with TemporaryDirectory(u'_podofo_set_metadata') as tdir: with open(os.path.join(tdir, u'input.pdf'), 'wb') as f: shutil.copyfileobj(stream, f) from calibre.ebooks.metadata.xmp import metadata_to_xmp_packet xmp_packet = metadata_to_xmp_packet(mi) try: result = fork_job('calibre.utils.podofo', 'set_metadata_', (tdir, mi.title, mi.authors, mi.book_producer, mi.tags, xmp_packet)) touched = result['result'] except WorkerError as e: raise Exception('Failed to set PDF metadata in (%s): %s'%(mi.title, e.orig_tb)) if touched: with open(os.path.join(tdir, u'output.pdf'), 'rb') as f: f.seek(0, 2) if f.tell() > 100: f.seek(0) stream.seek(0) stream.truncate() shutil.copyfileobj(f, stream) stream.flush() stream.seek(0)
def get_isbn_from_pdf(log, pdf_path): ''' On a forked job execute pdfinfo to read a page count and then pdftohtml to get the page count as an xml file. ''' with TemporaryDirectory('_isbn_pdf') as output_dir: pdf_copy = os.path.join(output_dir, 'src.pdf') with open(pdf_path, 'rb') as src, open(pdf_copy, 'wb') as dest: shutil.copyfileobj(src, dest) try: # We want to run the scanning of the PDF on a fork_job, however # that will only be "fixed" in calibre 0.8.55 to allow calling # a calibre plugin from such a job. In the meantime, do it the # risky way of calling from in-process. if numeric_version < (0, 8, 55): log.error( 'Warning: PDF analysis may crash, upgrade to calibre 0.8.55 when possible' ) return get_isbn(output_dir, 'src.pdf', log) res = fork_job('calibre_plugins.extract_isbn.pdf', 'get_isbn', (output_dir, 'src.pdf')) except WorkerError as e: prints(e.orig_tb) raise RuntimeError('Failed to run pdfinfo/pdftohtml') finally: try: os.remove(pdf_copy) except: pass info = res['result'] with open(res['stdout_stderr'], 'rb') as f: raw = f.read().strip() if raw: log(raw) return info
def get_pdf_page_count(book_path): ''' Optimisation to read the actual page count for PDFs from the PDF itself. ''' from calibre.ptempfile import TemporaryDirectory with TemporaryDirectory('_pages_pdf') as pdfpath: pdf_copy = os.path.join(pdfpath, 'src.pdf') shutil.copyfile(book_path, pdf_copy) try: res = fork_job('calibre.ebooks.metadata.pdf', 'read_info', (pdfpath, False)) except WorkerError as e: prints(e.orig_tb) raise RuntimeError('Failed to run pdfinfo') # Let's try to delete this extra copy straight away try: os.remove(pdf_copy) except: pass info = res['result'] if not info: raise ValueError('Could not read info dict from PDF') if 'Pages' in info: return int(info['Pages'])
def download(all_ids, tf, db, do_identify, covers, ensure_fields, log=None, abort=None, notifications=None): batch_size = 10 batches = split_jobs(all_ids, batch_size=batch_size) tdir = PersistentTemporaryDirectory('_metadata_bulk') heartbeat = HeartBeat(tdir) failed_ids = set() failed_covers = set() title_map = {} lm_map = {} ans = set() all_failed = True aborted = False count = 0 notifier = Notifier(notifications, title_map, tdir, len(all_ids)) notifier.start() try: for ids in batches: if abort.is_set(): log.error('Aborting...') break metadata = {i:db.get_metadata(i, index_is_id=True, get_user_categories=False) for i in ids} for i in ids: title_map[i] = metadata[i].title lm_map[i] = metadata[i].last_modified metadata = {i:metadata_to_opf(mi, default_lang='und') for i, mi in iteritems(metadata)} try: ret = fork_job('calibre.ebooks.metadata.sources.worker', 'main', (do_identify, covers, metadata, ensure_fields, tdir), abort=abort, heartbeat=heartbeat, no_output=True) except WorkerError as e: if e.orig_tb: raise Exception('Failed to download metadata. Original ' 'traceback: \n\n'+e.orig_tb) raise count += batch_size fids, fcovs, allf = ret['result'] if not allf: all_failed = False failed_ids = failed_ids.union(fids) failed_covers = failed_covers.union(fcovs) ans = ans.union(set(ids) - fids) for book_id in ids: lp = os.path.join(tdir, '%d.log'%book_id) if os.path.exists(lp): with open(tf, 'ab') as dest, open(lp, 'rb') as src: dest.write(('\n'+'#'*20 + ' Log for %s '%title_map[book_id] + '#'*20+'\n').encode('utf-8')) shutil.copyfileobj(src, dest) if abort.is_set(): aborted = True log('Download complete, with %d failures'%len(failed_ids)) return (aborted, ans, tdir, tf, failed_ids, failed_covers, title_map, lm_map, all_failed) finally: notifier.keep_going = False
def download(all_ids, tf, db, do_identify, covers, ensure_fields, log=None, abort=None, notifications=None): batch_size = 10 batches = split_jobs(all_ids, batch_size=batch_size) tdir = PersistentTemporaryDirectory('_metadata_bulk') heartbeat = HeartBeat(tdir) failed_ids = set() failed_covers = set() title_map = {} lm_map = {} ans = set() all_failed = True aborted = False count = 0 notifier = Notifier(notifications, title_map, tdir, len(all_ids)) notifier.start() try: for ids in batches: if abort.is_set(): log.error('Aborting...') break metadata = { i: db.get_metadata(i, index_is_id=True, get_user_categories=False) for i in ids } for i in ids: title_map[i] = metadata[i].title lm_map[i] = metadata[i].last_modified metadata = { i: metadata_to_opf(mi, default_lang='und') for i, mi in iteritems(metadata) } try: ret = fork_job( 'calibre.ebooks.metadata.sources.worker', 'main', (do_identify, covers, metadata, ensure_fields, tdir), abort=abort, heartbeat=heartbeat, no_output=True) except WorkerError as e: if e.orig_tb: raise Exception('Failed to download metadata. Original ' 'traceback: \n\n' + e.orig_tb) raise count += batch_size fids, fcovs, allf = ret['result'] if not allf: all_failed = False failed_ids = failed_ids.union(fids) failed_covers = failed_covers.union(fcovs) ans = ans.union(set(ids) - fids) for book_id in ids: lp = os.path.join(tdir, '%d.log' % book_id) if os.path.exists(lp): with open(tf, 'ab') as dest, open(lp, 'rb') as src: dest.write(('\n' + '#' * 20 + ' Log for %s ' % title_map[book_id] + '#' * 20 + '\n').encode('utf-8')) shutil.copyfileobj(src, dest) if abort.is_set(): aborted = True log('Download complete, with %d failures' % len(failed_ids)) return (aborted, ans, tdir, tf, failed_ids, failed_covers, title_map, lm_map, all_failed) finally: notifier.keep_going = False
def auto_add(self): from calibre.utils.ipc.simple_worker import fork_job, WorkerError from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre.ebooks.metadata.meta import metadata_from_filename files = [x for x in os.listdir(self.path) if # Must not be in the process of being added to the db x not in self.staging and # Firefox creates 0 byte placeholder files when downloading os.stat(os.path.join(self.path, x)).st_size > 0 and # Must be a file os.path.isfile(os.path.join(self.path, x)) and # Must have read and write permissions os.access(os.path.join(self.path, x), os.R_OK|os.W_OK) and # Must be a known ebook file type self.is_filename_allowed(x) ] data = {} # Give any in progress copies time to complete time.sleep(2) for fname in files: f = os.path.join(self.path, fname) # Try opening the file for reading, if the OS prevents us, then at # least on windows, it means the file is open in another # application for writing. We will get notified by # QFileSystemWatcher when writing is completed, so ignore for now. try: open(f, 'rb').close() except: continue tdir = tempfile.mkdtemp(dir=self.tdir) try: fork_job('calibre.ebooks.metadata.meta', 'forked_read_metadata', (f, tdir), no_output=True) except WorkerError as e: prints('Failed to read metadata from:', fname) prints(e.orig_tb) except: import traceback traceback.print_exc() # Ensure that the pre-metadata file size is present. If it isn't, # write 0 so that the file is rescanned szpath = os.path.join(tdir, 'size.txt') try: with open(szpath, 'rb') as f: int(f.read()) except: with open(szpath, 'wb') as f: f.write(b'0') opfpath = os.path.join(tdir, 'metadata.opf') try: if os.stat(opfpath).st_size < 30: raise Exception('metadata reading failed') except: mi = metadata_from_filename(fname) with open(opfpath, 'wb') as f: f.write(metadata_to_opf(mi)) self.staging.add(fname) data[fname] = tdir if data: self.callback(data)
def search(self, query, max_results=10, timeout=60): url = 'http://woblink.com/ebooki-kategorie?query=' + urllib.quote_plus( query.encode('utf-8')) if max_results > 10: if max_results > 20: url += '&limit=30' else: url += '&limit=20' counter = max_results try: results = fork_job(js_browser, 'get_results', ( url, timeout, ), module_is_source_code=True) except WorkerError as e: raise Exception('Could not get results: %s' % e.orig_tb) doc = html.fromstring(strip_encoding_declarations(results['result'])) for data in doc.xpath('//div[@class="nw_katalog_lista_ksiazka "]'): if counter <= 0: break id = ''.join( data.xpath( './/div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/@href' )) if not id: continue cover_url = ''.join( data.xpath( './/div[@class="nw_katalog_lista_ksiazka_okladka nw_okladka"]/a[1]/img/@src' )) title = ''.join( data.xpath( './/h3[@class="nw_katalog_lista_ksiazka_detale_tytul"]/a[1]/text()' )) author = ', '.join( data.xpath( './/p[@class="nw_katalog_lista_ksiazka_detale_autor"]/a/text()' )) price = ''.join( data.xpath('.//div[@class="nw_opcjezakupu_cena"]/text()')) formats = ', '.join( data.xpath( './/p[@class="nw_katalog_lista_ksiazka_detale_format"]/span/text()' )) s = SearchResult() s.cover_url = 'http://woblink.com' + cover_url s.title = title.strip() s.author = author.strip() s.price = price + ' zł' s.detail_item = id.strip() s.formats = formats if 'DRM' in formats: s.drm = SearchResult.DRM_LOCKED counter -= 1 yield s else: s.drm = SearchResult.DRM_UNLOCKED counter -= 1 yield s
def get_metadata(stream, cover=True): with TemporaryDirectory('_pdf_metadata_read') as pdfpath: stream.seek(0) with open(os.path.join(pdfpath, 'src.pdf'), 'wb') as f: shutil.copyfileobj(stream, f) try: res = fork_job('calibre.ebooks.metadata.pdf', 'read_info', (pdfpath, bool(cover))) except WorkerError as e: prints(e.orig_tb) raise RuntimeError('Failed to run pdfinfo') info = res['result'] with open(res['stdout_stderr'], 'rb') as f: raw = f.read().strip() if raw: prints(raw) if info is None: raise ValueError('Could not read info dict from PDF') covpath = os.path.join(pdfpath, 'cover.jpg') cdata = None if cover and os.path.exists(covpath): with open(covpath, 'rb') as f: cdata = f.read() title = info.get('Title', None) or _('Unknown') au = info.get('Author', None) if au is None: au = [_('Unknown')] else: au = string_to_authors(au) mi = MetaInformation(title, au) # if isbn is not None: # mi.isbn = isbn creator = info.get('Creator', None) if creator: mi.book_producer = creator keywords = info.get('Keywords', None) mi.tags = [] if keywords: mi.tags = [x.strip() for x in keywords.split(',')] isbn = [check_isbn(x) for x in mi.tags if check_isbn(x)] if isbn: mi.isbn = isbn = isbn[0] mi.tags = [x for x in mi.tags if check_isbn(x) != isbn] subject = info.get('Subject', None) if subject: mi.tags.insert(0, subject) if 'xmp_metadata' in info: from calibre.ebooks.metadata.xmp import consolidate_metadata mi = consolidate_metadata(mi, info) # Look for recognizable identifiers in the info dict, if they were not # found in the XMP metadata for scheme, check_func in iteritems({ 'doi': check_doi, 'isbn': check_isbn }): if scheme not in mi.get_identifiers(): for k, v in iteritems(info): if k != 'xmp_metadata': val = check_func(v) if val: mi.set_identifier(scheme, val) break if cdata: mi.cover_data = ('jpeg', cdata) return mi
def get_metadata(stream, cover=True): with TemporaryDirectory('_pdf_metadata_read') as pdfpath: stream.seek(0) with open(os.path.join(pdfpath, 'src.pdf'), 'wb') as f: shutil.copyfileobj(stream, f) try: res = fork_job('calibre.ebooks.metadata.pdf', 'read_info', (pdfpath, bool(cover))) except WorkerError as e: prints(e.orig_tb) raise RuntimeError('Failed to run pdfinfo') info = res['result'] with open(res['stdout_stderr'], 'rb') as f: raw = f.read().strip() if raw: prints(raw) if not info: raise ValueError('Could not read info dict from PDF') covpath = os.path.join(pdfpath, 'cover.jpg') cdata = None if cover and os.path.exists(covpath): with open(covpath, 'rb') as f: cdata = f.read() title = info.get('Title', None) au = info.get('Author', None) if au is None: au = [_('Unknown')] else: au = string_to_authors(au) mi = MetaInformation(title, au) # if isbn is not None: # mi.isbn = isbn creator = info.get('Creator', None) if creator: mi.book_producer = creator keywords = info.get('Keywords', None) mi.tags = [] if keywords: mi.tags = [x.strip() for x in keywords.split(',')] isbn = [check_isbn(x) for x in mi.tags if check_isbn(x)] if isbn: mi.isbn = isbn = isbn[0] mi.tags = [x for x in mi.tags if check_isbn(x) != isbn] subject = info.get('Subject', None) if subject: mi.tags.insert(0, subject) if 'xmp_metadata' in info: from calibre.ebooks.metadata.xmp import consolidate_metadata mi = consolidate_metadata(mi, info) # Look for recognizable identifiers in the info dict, if they were not # found in the XMP metadata for scheme, check_func in {'doi':check_doi, 'isbn':check_isbn}.iteritems(): if scheme not in mi.get_identifiers(): for k, v in info.iteritems(): if k != 'xmp_metadata': val = check_func(v) if val: mi.set_identifier(scheme, val) break if cdata: mi.cover_data = ('jpeg', cdata) return mi
def auto_add(self): from calibre.utils.ipc.simple_worker import fork_job, WorkerError from calibre.ebooks.metadata.opf2 import metadata_to_opf from calibre.ebooks.metadata.meta import metadata_from_filename files = [x for x in os.listdir(self.path) if # Must not be in the process of being added to the db x not in self.staging # Firefox creates 0 byte placeholder files when downloading and os.stat(os.path.join(self.path, x)).st_size > 0 # Must be a file and os.path.isfile(os.path.join(self.path, x)) # Must have read and write permissions and os.access(os.path.join(self.path, x), os.R_OK|os.W_OK) # Must be a known ebook file type and os.path.splitext(x)[1][1:].lower() in self.allowed ] data = {} # Give any in progress copies time to complete time.sleep(2) for fname in files: f = os.path.join(self.path, fname) # Try opening the file for reading, if the OS prevents us, then at # least on windows, it means the file is open in another # application for writing. We will get notified by # QFileSystemWatcher when writing is completed, so ignore for now. try: open(f, 'rb').close() except: continue tdir = tempfile.mkdtemp(dir=self.tdir) try: fork_job('calibre.ebooks.metadata.meta', 'forked_read_metadata', (f, tdir), no_output=True) except WorkerError as e: prints('Failed to read metadata from:', fname) prints(e.orig_tb) except: import traceback traceback.print_exc() # Ensure that the pre-metadata file size is present. If it isn't, # write 0 so that the file is rescanned szpath = os.path.join(tdir, 'size.txt') try: with open(szpath, 'rb') as f: int(f.read()) except: with open(szpath, 'wb') as f: f.write(b'0') opfpath = os.path.join(tdir, 'metadata.opf') try: if os.stat(opfpath).st_size < 30: raise Exception('metadata reading failed') except: mi = metadata_from_filename(fname) with open(opfpath, 'wb') as f: f.write(metadata_to_opf(mi)) self.staging.add(fname) data[fname] = tdir if data: self.callback(data)