def _format_metadata(book): metadata = {} # there must be language, creator, identifier and title #key is [ '{' namespace '}' ] name [ '[' scheme ']' ] key_re = re.compile(r'^(?:\{([^}]*)\})?' # namespace r'([^{]+)' # keyword r'(?:\{([^}]*)\})?$' #schema ) for item in models.Info.objects.filter(book=book): key = item.name value = item.getValue() m = key_re.match(key) if m is None: keyword = key namespace, scheme = '', '' else: namespace, keyword, scheme = m.groups('') add_metadata(metadata, keyword, value, namespace, scheme) now = time.strftime("%Y.%m.%d-%H.%M") created = book.created.strftime("%Y.%m.%d-%H.%M") mods = models.BookHistory.objects.filter(book=book).datetimes("modified", "day", order='DESC') if not mods: lastmod = created else: lastmod = mods[0].strftime("%Y.%m.%d-%H.%M") author = 'Author' if book.owner: if book.owner.first_name: author = book.owner.first_name language = 'en' if book.language: language = getattr(book.language, 'abbrevation', 'en') # add some default values if values are not otherwise specified for namespace, keyword, scheme, value in ( (DC, "publisher", "", DEFAULT_PUBLISHER), (DC, "language", "", language), (DC, "creator", "", author), (DC, "title", "", book.title), (DC, "date", "start", created), (DC, "date", "last-modified", lastmod), (DC, "date", "published", now), (DC, "identifier", "booki.cc", "http://%s/%s/%s" % (THIS_BOOKI_SERVER, book.url_title, now)) ): if not get_metadata(metadata, keyword, namespace, scheme): add_metadata(metadata, keyword, value, namespace, scheme) #XXX add contributors return metadata
def _format_metadata(book): metadata = {} # there must be language, creator, identifier and title #key is [ '{' namespace '}' ] name [ '[' scheme ']' ] key_re = re.compile(r'^(?:\{([^}]*)\})?' # namespace r'([^{]+)' # keyword r'(?:\{([^}]*)\})?$' #schema ) for item in models.Info.objects.filter(book=book): key = item.name value = item.getValue() m = key_re.match(key) if m is None: keyword = key namespace, scheme = '', '' else: namespace, keyword, scheme = m.groups('') add_metadata(metadata, keyword, value, namespace, scheme) now = time.strftime("%Y.%m.%d-%H.%M") created = book.created.strftime("%Y.%m.%d-%H.%M") mods = models.BookHistory.objects.filter(book=book).dates("modified", "day", order='DESC') if not mods: lastmod = created else: lastmod = mods[0].strftime("%Y.%m.%d-%H.%M") author = 'Author' if book.owner: if book.owner.first_name: author = book.owner.first_name language = 'en' if book.language: language = getattr(book.language, 'abbrevation', 'en') # add some default values if values are not otherwise specified for namespace, keyword, scheme, value in ( (DC, "publisher", "", DEFAULT_PUBLISHER), (DC, "language", "", language), (DC, "creator", "", author), (DC, "title", "", book.title), (DC, "date", "start", created), (DC, "date", "last-modified", lastmod), (DC, "date", "published", now), (DC, "identifier", "booki.cc", "http://%s/%s/%s" % (THIS_BOOKI_SERVER, book.url_title, now)) ): if not get_metadata(metadata, keyword, namespace, scheme): add_metadata(metadata, keyword, value, namespace, scheme) #XXX add contributors return metadata
def _format_metadata(book): metadata = {} # there must be language, creator, identifier and title # key is [ '{' namespace '}' ] name [ '[' scheme ']' ] key_re = re.compile(r"^(?:\{([^}]*)\})?" r"([^{]+)" r"(?:\{([^}]*)\})?$") # namespace # keyword # schema for item in models.Info.objects.filter(book=book): key = item.name value = item.getValue() m = key_re.match(key) if m is None: keyword = key namespace, scheme = "", "" else: namespace, keyword, scheme = m.groups("") add_metadata(metadata, keyword, value, namespace, scheme) now = time.strftime("%Y.%m.%d-%H.%M") created = book.created.strftime("%Y.%m.%d-%H.%M") mods = models.BookHistory.objects.filter(book=book).dates("modified", "day", order="DESC") if not mods: lastmod = created else: lastmod = mods[0].strftime("%Y.%m.%d-%H.%M") # add some default values if values are not otherwise specified for namespace, keyword, scheme, value in ( (DC, "publisher", "", DEFAULT_PUBLISHER), (DC, "language", "", "en"), (DC, "creator", "", "The Contributors"), (DC, "title", "", book.title), (DC, "date", "start", created), (DC, "date", "last-modified", lastmod), (DC, "date", "published", now), (DC, "identifier", "booki.cc", "http://%s/%s/%s" % (THIS_BOOKI_SERVER, book.url_title, now)), ): if not get_metadata(metadata, keyword, namespace, scheme): add_metadata(metadata, keyword, value, namespace, scheme) # XXX add contributors return metadata
def importBookFromFile(user, zname, createTOC=False, **extraOptions): """Create a new book from a bookizip filename""" from booki.utils.log import logChapterHistory # unzip it zf = zipfile.ZipFile(zname) # load info.json info = json.loads(zf.read('info.json')) logWarning("Loaded json file %r" % info) metadata = info['metadata'] manifest = info['manifest'] TOC = info['TOC'] if extraOptions.get('book_title', None): bookTitle = extraOptions['book_title'] else: bookTitle = get_metadata(metadata, 'title', ns=DC)[0] bookTitle = makeTitleUnique(bookTitle) logWarning("Chose unique book title %r" % bookTitle) if extraOptions.get('book_url', None): bookURL = extraOptions['book_url'] else: bookURL = None book = create_book(user, bookTitle, status="new", bookURL=bookURL) if extraOptions.get("hidden"): book.hidden = True book.save() # this is for Table of Contents p = re.compile('\ssrc="(.*)"') # what if it does not have status "new" stat = models.BookStatus.objects.filter(book=book, name="new")[0] chapters = getChaptersFromTOC(TOC) n = len(chapters) + 1 #is +1 necessary? now = datetime.datetime.now() for chapterName, chapterFile, is_section in chapters: urlName = booktype_slugify(chapterName) if is_section: # create section if createTOC: c = models.BookToc(book=book, version=book.version, name=chapterName, chapter=None, weight=n, typeof=2) c.save() n -= 1 else: # create chapter # check if i can open this file at all content = zf.read(chapterFile) #content = p.sub(r' src="../\1"', content) chapter = models.Chapter(book=book, version=book.version, url_title=urlName, title=chapterName, status=stat, content=content, created=now, modified=now) chapter.save() history = logChapterHistory(chapter=chapter, content=content, user=user, comment="", revision=chapter.revision) if createTOC: c = models.BookToc(book=book, version=book.version, name=chapterName, chapter=chapter, weight=n, typeof=1) c.save() n -= 1 stat = models.BookStatus.objects.filter(book=book, name="new")[0] from django.core.files import File for item in manifest.values(): if item["mimetype"] != 'text/html': attachmentName = item['url'] if attachmentName.startswith("static/"): att = models.Attachment(book=book, version=book.version, status=stat) s = zf.read(attachmentName) f = StringIO(s) f2 = File(f) f2.size = len(s) att.attachment.save(os.path.basename(attachmentName), f2, save=False) att.save() f.close() # metadata for namespace in metadata: # namespace is something like "http://purl.org/dc/elements/1.1/" or "" # in the former case, preepend it to the name, in {}. ns = ('{%s}' % namespace if namespace else '') for keyword, schemes in metadata[namespace].iteritems(): for scheme, values in schemes.iteritems(): #schema, if it is set, describes the value's format. #for example, an identifier might be an ISBN. sc = ('{%s}' % scheme if scheme else '') key = "%s%s%s" % (ns, keyword, sc) for v in values: if not v: continue try: info = models.Info(book=book, name=key) if len(v) >= 2500: info.value_text = v info.kind = 2 else: info.value_string = v info.kind = 0 info.save() except: # For now just ignore any kind of error here. # Considering we don't handle metadata as we # should it is not such a problem. pass zf.close() return book
def importBookFromFile(user, zname, createTOC=False, **extraOptions): """Create a new book from a bookizip filename""" from booki.utils.log import logChapterHistory # unzip it zf = zipfile.ZipFile(zname) # load info.json info = json.loads(zf.read("info.json")) logWarning("Loaded json file %r" % info) metadata = info["metadata"] manifest = info["manifest"] TOC = info["TOC"] if extraOptions.get("book_title", None): bookTitle = extraOptions["book_title"] else: bookTitle = get_metadata(metadata, "title", ns=DC)[0] bookTitle = makeTitleUnique(bookTitle) logWarning("Chose unique book title %r" % bookTitle) if extraOptions.get("book_url", None): bookURL = extraOptions["book_url"] else: bookURL = None book = createBook(user, bookTitle, status="new", bookURL=bookURL) if extraOptions.get("hidden"): book.hidden = True book.save() # this is for Table of Contents p = re.compile('\ssrc="(.*)"') # what if it does not have status "new" stat = models.BookStatus.objects.filter(book=book, name="new")[0] chapters = getChaptersFromTOC(TOC) n = len(chapters) + 1 # is +1 necessary? now = datetime.datetime.now() for chapterName, chapterFile, is_section in chapters: urlName = bookiSlugify(chapterName) if is_section: # create section if createTOC: c = models.BookToc(book=book, version=book.version, name=chapterName, chapter=None, weight=n, typeof=2) c.save() n -= 1 else: # create chapter # check if i can open this file at all content = zf.read(chapterFile) # content = p.sub(r' src="../\1"', content) chapter = models.Chapter( book=book, version=book.version, url_title=urlName, title=chapterName, status=stat, content=content, created=now, modified=now, ) chapter.save() history = logChapterHistory( chapter=chapter, content=content, user=user, comment="", revision=chapter.revision ) if createTOC: c = models.BookToc( book=book, version=book.version, name=chapterName, chapter=chapter, weight=n, typeof=1 ) c.save() n -= 1 stat = models.BookStatus.objects.filter(book=book, name="new")[0] from django.core.files import File for item in manifest.values(): if item["mimetype"] != "text/html": attachmentName = item["url"] if attachmentName.startswith("static/"): att = models.Attachment(book=book, version=book.version, status=stat) s = zf.read(attachmentName) f = StringIO(s) f2 = File(f) f2.size = len(s) att.attachment.save(os.path.basename(attachmentName), f2, save=False) att.save() f.close() # metadata for namespace in metadata: # namespace is something like "http://purl.org/dc/elements/1.1/" or "" # in the former case, preepend it to the name, in {}. ns = "{%s}" % namespace if namespace else "" for keyword, schemes in metadata[namespace].iteritems(): for scheme, values in schemes.iteritems(): # schema, if it is set, describes the value's format. # for example, an identifier might be an ISBN. sc = "{%s}" % scheme if scheme else "" key = "%s%s%s" % (ns, keyword, sc) for v in values: if not v: continue try: info = models.Info(book=book, name=key) if len(v) >= 2500: info.value_text = v info.kind = 2 else: info.value_string = v info.kind = 0 info.save() except: # For now just ignore any kind of error here. # Considering we don't handle metadata as we # should it is not such a problem. pass zf.close() return book
def importBookFromFile(user, zname, createTOC=False, **extraOptions): """Create a new book from a bookizip filename""" # unzip it zf = zipfile.ZipFile(zname) # load info.json info = json.loads(zf.read('info.json')) logWarning("Loaded json file %r" % info) metadata = info['metadata'] manifest = info['manifest'] TOC = info['TOC'] if extraOptions.get('book_title', None): bookTitle = extraOptions['book_title'] else: bookTitle = get_metadata(metadata, 'title', ns=DC)[0] bookTitle = makeTitleUnique(bookTitle) if extraOptions.get('book_url', None): bookURL = extraOptions['book_url'] else: bookURL = None book = createBook(user, bookTitle, status = "imported", bookURL = bookURL) # this is for Table of Contents p = re.compile('\ssrc="(.*)"') # what if it does not have status "imported" stat = models.BookStatus.objects.filter(book=book, name="imported")[0] chapters = getChaptersFromTOC(TOC) n = len(chapters) + 1 #is +1 necessary? now = datetime.datetime.now() for chapterName, chapterFile, is_section in chapters: urlName = slugify(chapterName) if is_section: # create section if createTOC: c = models.BookToc(book = book, version = book.version, name = chapterName, chapter = None, weight = n, typeof = 2) c.save() n -= 1 else: # create chapter # check if i can open this file at all content = zf.read(chapterFile) #content = p.sub(r' src="../\1"', content) chapter = models.Chapter(book = book, version = book.version, url_title = urlName, title = chapterName, status = stat, content = content, created = now, modified = now) chapter.save() if createTOC: c = models.BookToc(book = book, version = book.version, name = chapterName, chapter = chapter, weight = n, typeof = 1) c.save() n -= 1 stat = models.BookStatus.objects.filter(book=book, name="imported")[0] from django.core.files import File for item in manifest.values(): if item["mimetype"] != 'text/html': attachmentName = item['url'] if attachmentName.startswith("static/"): att = models.Attachment(book = book, version = book.version, status = stat) s = zf.read(attachmentName) f = StringIO(s) f2 = File(f) f2.size = len(s) att.attachment.save(os.path.basename(attachmentName), f2, save=False) att.save() f.close() # metadata for namespace in metadata: # namespace is something like "http://purl.org/dc/elements/1.1/" or "" # in the former case, preepend it to the name, in {}. ns = ('{%s}' % namespace if namespace else '') for keyword, schemes in metadata[namespace].iteritems(): for scheme, values in schemes.iteritems(): #schema, if it is set, describes the value's format. #for example, an identifier might be an ISBN. sc = ('{%s}' % scheme if scheme else '') key = "%s%s%s" % (ns, keyword, sc) for v in values: info = models.Info(book=book, name=key) if len(v) >= 2500: info.value_text = v info.kind = 2 else: info.value_string = v info.kind = 0 info.save() zf.close()