def get_comic_info(tmp_file_path, original_file_name, original_file_extension): if use_comic_meta: archive = ComicArchive(tmp_file_path) if archive.seemsToBeAComicArchive(): if archive.hasMetadata(MetaDataStyle.CIX): style = MetaDataStyle.CIX elif archive.hasMetadata(MetaDataStyle.CBI): style = MetaDataStyle.CBI else: style = None if style is not None: loadedMetadata = archive.readMetadata(style) lang = loadedMetadata.language if len(lang) == 2: loadedMetadata.language = isoLanguages.get(part1=lang).name elif len(lang) == 3: loadedMetadata.language = isoLanguages.get(part3=lang).name else: loadedMetadata.language = "" return uploader.BookMeta(file_path=tmp_file_path, extension=original_file_extension, title=loadedMetadata.title or original_file_name, author=" & ".join([ credit["person"] for credit in loadedMetadata.credits if credit["role"] == "Writer" ]) or u"Unknown", cover=extractCover(tmp_file_path, original_file_extension), description=loadedMetadata.comments or "", tags="", series=loadedMetadata.series or "", series_id=loadedMetadata.issue or "", languages=loadedMetadata.language) else: return uploader.BookMeta(file_path=tmp_file_path, extension=original_file_extension, title=original_file_name, author=u"Unknown", cover=extractCover(tmp_file_path, original_file_extension), description="", tags="", series="", series_id="", languages="")
def pdf_meta(tmp_file_path, original_file_name, original_file_extension): if use_pdf_meta: pdf = PdfFileReader(open(tmp_file_path, 'rb')) doc_info = pdf.getDocumentInfo() else: doc_info = None if doc_info is not None: author = doc_info.author if doc_info.author is not None else u"Unknown" title = doc_info.title if doc_info.title is not None else original_file_name subject = doc_info.subject else: author = u"Unknown" title = original_file_name subject = "" return uploader.BookMeta( file_path=tmp_file_path, extension=original_file_extension, title=title, author=author, cover=pdf_preview(tmp_file_path, original_file_name), description=subject, tags="", series="", series_id="")
def get_fb2_info(tmp_file_path, original_file_name, original_file_extension): ns = { 'fb':'http://www.gribuser.ru/xml/fictionbook/2.0', 'l':'ttp://www.w3.org/1999/xlink', } fb2_file = open(tmp_file_path) tree = etree.fromstring(fb2_file.read()) authors = tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:author', namespaces=ns) def get_author(element): return element.xpath('fb:first-name/text()', namespaces=ns)[0] + ' ' + element.xpath('fb:middle-name/text()', namespaces=ns)[0] + ' ' + element.xpath('fb:last-name/text()', namespaces=ns)[0] author = ", ".join(map(get_author, authors)) title = unicode(tree.xpath('/fb:FictionBook/fb:description/fb:title-info/fb:book-title/text()', namespaces=ns)[0]) description = unicode(tree.xpath('/fb:FictionBook/fb:description/fb:publish-info/fb:book-name/text()', namespaces=ns)[0]) return uploader.BookMeta( file_path = tmp_file_path, extension = original_file_extension, title = title, author = author, cover = None, description = description, tags = "", series = "", series_id="")
def get_fb2_info(tmp_file_path, original_file_extension): ns = { 'fb': 'http://www.gribuser.ru/xml/fictionbook/2.0', 'l': 'http://www.w3.org/1999/xlink', } fb2_file = open(tmp_file_path) tree = etree.fromstring(fb2_file.read()) authors = tree.xpath( '/fb:FictionBook/fb:description/fb:title-info/fb:author', namespaces=ns) def get_author(element): last_name = element.xpath('fb:last-name/text()', namespaces=ns) if len(last_name): last_name = last_name[0] else: last_name = u'' middle_name = element.xpath('fb:middle-name/text()', namespaces=ns) if len(middle_name): middle_name = middle_name[0] else: middle_name = u'' first_name = element.xpath('fb:first-name/text()', namespaces=ns) if len(first_name): first_name = first_name[0] else: first_name = u'' return first_name + ' ' + middle_name + ' ' + last_name author = str(", ".join(map(get_author, authors))) title = tree.xpath( '/fb:FictionBook/fb:description/fb:title-info/fb:book-title/text()', namespaces=ns) if len(title): title = str(title[0]) else: title = u'' description = tree.xpath( '/fb:FictionBook/fb:description/fb:publish-info/fb:book-name/text()', namespaces=ns) if len(description): description = str(description[0]) else: description = u'' return uploader.BookMeta(file_path=tmp_file_path, extension=original_file_extension, title=title.encode('utf-8').decode('utf-8'), author=author.encode('utf-8').decode('utf-8'), cover=None, description=description, tags="", series="", series_id="", languages="")
def get_epub_info(tmp_file_path, original_file_name, original_file_extension): ns = { 'n': 'urn:oasis:names:tc:opendocument:xmlns:container', 'pkg': 'http://www.idpf.org/2007/opf', 'dc': 'http://purl.org/dc/elements/1.1/' } zip = zipfile.ZipFile(tmp_file_path) txt = zip.read('META-INF/container.xml') tree = etree.fromstring(txt) cfname = tree.xpath('n:rootfiles/n:rootfile/@full-path', namespaces=ns)[0] cf = zip.read(cfname) tree = etree.fromstring(cf) coverpath = os.path.dirname(cfname) p = tree.xpath('/pkg:package/pkg:metadata', namespaces=ns)[0] epub_metadata = {} for s in ['title', 'description', 'creator']: tmp = p.xpath('dc:%s/text()' % s, namespaces=ns) if len(tmp) > 0: epub_metadata[s] = p.xpath('dc:%s/text()' % s, namespaces=ns)[0] else: epub_metadata[s] = "Unknown" coversection = tree.xpath( "/pkg:package/pkg:manifest/pkg:item[@id='cover-image']/@href", namespaces=ns) if len(coversection) > 0: coverfile = extractCover(zip, coversection[0], coverpath, tmp_file_path) else: coversection = tree.xpath( "/pkg:package/pkg:manifest/pkg:item[@id='cover']/@href", namespaces=ns) if len(coversection) > 0: coverfile = extractCover(zip, coversection[0], coverpath, tmp_file_path) else: coverfile = None if epub_metadata['title'] is None: title = original_file_name else: title = epub_metadata['title'] return uploader.BookMeta( file_path=tmp_file_path, extension=original_file_extension, title=title.encode('utf-8').decode('utf-8'), author=epub_metadata['creator'].encode('utf-8').decode('utf-8'), cover=coverfile, description=epub_metadata['description'], tags="", series="", series_id="")
def default_meta(tmp_file_path, original_file_name, original_file_extension): return uploader.BookMeta(file_path=tmp_file_path, extension=original_file_extension, title=original_file_name, author="Unknown", cover=None, description="", tags="", series="", series_id="")
def get_comic_info(tmp_file_path, original_file_name, original_file_extension): coverfile = extractCover(tmp_file_path, original_file_extension) return uploader.BookMeta(file_path=tmp_file_path, extension=original_file_extension, title=original_file_name, author=u"Unknown", cover=coverfile, description="", tags="", series="", series_id="", languages="")
def get_epub_info(tmp_file_path, original_file_name, original_file_extension): ns = { 'n': 'urn:oasis:names:tc:opendocument:xmlns:container', 'pkg': 'http://www.idpf.org/2007/opf', 'dc': 'http://purl.org/dc/elements/1.1/' } epub_zip = zipfile.ZipFile(tmp_file_path) txt = epub_zip.read('META-INF/container.xml') tree = etree.fromstring(txt) cfname = tree.xpath('n:rootfiles/n:rootfile/@full-path', namespaces=ns)[0] cf = epub_zip.read(cfname) tree = etree.fromstring(cf) coverpath = os.path.dirname(cfname) p = tree.xpath('/pkg:package/pkg:metadata', namespaces=ns)[0] epub_metadata = {} for s in ['title', 'description', 'creator', 'language', 'subject']: tmp = p.xpath('dc:%s/text()' % s, namespaces=ns) if len(tmp) > 0: epub_metadata[s] = p.xpath('dc:%s/text()' % s, namespaces=ns)[0] else: epub_metadata[s] = "Unknown" if epub_metadata['subject'] == "Unknown": epub_metadata['subject'] = '' if epub_metadata['description'] == "Unknown": description = tree.xpath("//*[local-name() = 'description']/text()") if len(description) > 0: epub_metadata['description'] = description else: epub_metadata['description'] = "" if epub_metadata['language'] == "Unknown": epub_metadata['language'] = "" else: lang = epub_metadata['language'].split('-', 1)[0].lower() if len(lang) == 2: epub_metadata['language'] = iso_languages.get(part1=lang).name elif len(lang) == 3: epub_metadata['language'] = iso_languages.get(part3=lang).name else: epub_metadata['language'] = "" series = tree.xpath( "/pkg:package/pkg:metadata/pkg:meta[@name='calibre:series']/@content", namespaces=ns) if len(series) > 0: epub_metadata['series'] = series[0] else: epub_metadata['series'] = '' series_id = tree.xpath( "/pkg:package/pkg:metadata/pkg:meta[@name='calibre:series_index']/@content", namespaces=ns) if len(series_id) > 0: epub_metadata['series_id'] = series_id[0] else: epub_metadata['series_id'] = '1' coversection = tree.xpath( "/pkg:package/pkg:manifest/pkg:item[@id='cover-image']/@href", namespaces=ns) coverfile = None if len(coversection) > 0: coverfile = extract_cover(epub_zip, coversection[0], coverpath, tmp_file_path) else: meta_cover = tree.xpath( "/pkg:package/pkg:metadata/pkg:meta[@name='cover']/@content", namespaces=ns) if len(meta_cover) > 0: coversection = tree.xpath( "/pkg:package/pkg:manifest/pkg:item[@id='" + meta_cover[0] + "']/@href", namespaces=ns) if len(coversection) > 0: filetype = coversection[0].rsplit('.', 1)[-1] if filetype == "xhtml" or filetype == "html": # if cover is (x)html format markup = epub_zip.read( os.path.join(coverpath, coversection[0])) markup_tree = etree.fromstring(markup) # no matter xhtml or html with no namespace imgsrc = markup_tree.xpath( "//*[local-name() = 'img']/@src") # imgsrc maybe startwith "../"" so fullpath join then relpath to cwd filename = os.path.relpath( os.path.join( os.path.dirname( os.path.join(coverpath, coversection[0])), imgsrc[0])) coverfile = extract_cover(epub_zip, filename, "", tmp_file_path) else: coverfile = extract_cover(epub_zip, coversection[0], coverpath, tmp_file_path) if not epub_metadata['title']: title = original_file_name else: title = epub_metadata['title'] return uploader.BookMeta( file_path=tmp_file_path, extension=original_file_extension, title=title.encode('utf-8').decode('utf-8'), author=epub_metadata['creator'].encode('utf-8').decode('utf-8'), cover=coverfile, description=epub_metadata['description'], tags=epub_metadata['subject'].encode('utf-8').decode('utf-8'), series=epub_metadata['series'].encode('utf-8').decode('utf-8'), series_id=epub_metadata['series_id'].encode('utf-8').decode('utf-8'), languages=epub_metadata['language'])