示例#1
0
    def handle(self, dirpath='', *args, **options):
        if not os.path.exists(dirpath):
            raise CommandError("%r is not a valid path" % dirpath)

        if os.path.isdir(dirpath):
            names = get_epubs(dirpath)
            for name in names:
                info = None
                try:
                    e = Epub(name)
                    info = e.get_info()
                except:
                    print "%s is not a valid epub file" % name
                    continue
                lang = Language.objects.filter(code=info.language)
                if not lang:
                    for data in langs:
                        if data[0] == info.language:
                            lang = Language()
                            lang.label = data[1]
                            lang.save()
                            break
                else:
                    lang = lang[0]

                #XXX: Hacks below
                if not info.title:
                    info.title = ''
                if not info.summary:
                    info.summary = ''
                if not info.creator:
                    info.creator = ''
                if not info.rights:
                    info.rights = ''

                f = open(name)
                pub_status = Status.objects.get(status='Published')
                book = Book(book_file=File(f), a_title = info.title, \
                        a_author = info.creator, a_summary = info.summary, \
                        a_rights = info.rights, dc_identifier = info.identifier['value'].strip('urn:uuid:'), \
                        dc_issued = info.date,
                        a_status = pub_status)

                try:
                    book.save()
                # FIXME: Find a better way to do this.
                except IntegrityError as e:
                    if str(e) == "column file_sha256sum is not unique":
                        print "The book (", book.book_file, ") was not saved because the file already exsists in the database."
                    else:
                        raise CommandError('Error adding file %s: %s' %
                                           (book.book_file, sys.exc_info()[1]))
                except:
                    raise CommandError('Error adding file %s: %s' %
                                       (book.book_file, sys.exc_info()[1]))
示例#2
0
    def handle(self, dirpath='', *args, **options):
        if not os.path.exists(dirpath):
            raise CommandError("%r is not a valid path" % dirpath)


        if os.path.isdir(dirpath):
            names = get_epubs(dirpath)
            for name in names:
                info = None
                try:
                    e = Epub(name)
                    info = e.get_info()
                except:
                    print "%s is not a valid epub file" % name
                    continue
                lang = Language.objects.filter(code=info.language)
                if not lang:
                    for data in langs:
                        if data[0] == info.language:
                            lang = Language()
                            lang.label = data[1]
                            lang.save()
                            break
                else:
                    lang = lang[0]

                #XXX: Hacks below
                if not info.title:
                    info.title = ''
                if not info.summary:
                    info.summary = ''
                if not info.creator:
                    info.creator = ''
                if not info.rights:
                    info.rights = ''

                f = open(name)
                pub_status = Status.objects.get(status='Published')
                book = Book(book_file=File(f), a_title = info.title, \
                        a_author = info.creator, a_summary = info.summary, \
                        a_rights = info.rights, dc_identifier = info.identifier['value'].strip('urn:uuid:'), \
                        dc_issued = info.date,
                        a_status = pub_status)

                try:
                    book.save()
                # FIXME: Find a better way to do this.
                except IntegrityError as e:
                    if str(e) == "column file_sha256sum is not unique":
                        print "The book (", book.book_file, ") was not saved because the file already exsists in the database."
                    else:
                        raise CommandError('Error adding file %s: %s' % (book.book_file, sys.exc_info()[1]))
                except:
                    raise CommandError('Error adding file %s: %s' % (book.book_file, sys.exc_info()[1]))
示例#3
0
    def save(self, *args, **kwargs):
        assert self.file_sha256sum
        if not self.cover_img:
            # FIXME: we should use mimetype
            if self.book_file.name.endswith('.epub'):
                # get the cover path from the epub file
                epub_file = Epub(self.book_file)
                cover_path = epub_file.get_cover_image_path()
                if cover_path is not None and os.path.exists(cover_path):
                    cover_file = File(open(cover_path, "rb"))
                    self.cover_img.save(os.path.basename(cover_path), # pylint: disable=no-member
                                        cover_file)
                epub_file.close()

        super(Book, self).save(*args, **kwargs)
示例#4
0
 def test_simple_import(self):
     epub = Epub("examples/The Dunwich Horror.epub")
     info = epub.get_info()
     self.assertEqual(info.title, "The Dunwich Horror")
     self.assertEqual(info.creator, "H. P. Lovecraft")
     epub.close()
示例#5
0
    def handle(self, *args, **options):
        dirpath = options.get('dirpath')
        if not dirpath or not os.path.exists(dirpath):
            raise CommandError("%r is not a valid path" % dirpath)


        if os.path.isdir(dirpath):
            names = get_epubs(dirpath)
            for name in names:
                info = None
                try:
                    e = Epub(name)
                    info = e.get_info()
                    e.close()
                except:
                    print("%s is not a valid epub file" % name)
                    continue
                lang = Language.objects.filter(code=info.language)
                if not lang:
                    for data in langs:
                        if data[0] == info.language:
                            lang = Language()
                            lang.label = data[1]
                            lang.save()
                            break
                else:
                    lang = lang[0]

                #XXX: Hacks below
                if not info.title:
                    info.title = ''
                if not info.summary:
                    info.summary = ''
                if not info.creator:
                    info.creator = ''
                if not info.rights:
                    info.rights = ''
                if not info.date:
                    info.date = ''
                if not info.identifier:
                    info.identifier = {}
                if not info.identifier.get('value'):
                    info.identifier['value'] = ''

                f = open(name, "rb")
                sha = sha256_sum(open(name, "rb"))
                pub_status = Status.objects.get(status='Published')
                author = Author.objects.get_or_create(a_author=info.creator)[0]
                book = Book(a_title = info.title,
                        a_author = author, a_summary = info.summary,
                        file_sha256sum=sha,
                        a_rights = info.rights, dc_identifier = info.identifier['value'].strip('urn:uuid:'),
                        dc_issued = info.date,
                        a_status = pub_status, mimetype="application/epub+zip")
                try:
                    # Not sure why this errors, book_file.save exists
                    book.book_file.save(os.path.basename(name), File(f)) #pylint: disable=no-member
                    book.validate_unique()
                    book.save()
                # FIXME: Find a better way to do this.
                except IntegrityError as e:
                    if str(e) == "column file_sha256sum is not unique":
                        print("The book (", book.book_file, ") was not saved because the file already exsists in the database.")
                    else:
                        if options['ignore_error']:
                            print('Error adding file %s: %s' % (book.book_file, sys.exc_info()[1]))
                            continue
                        raise CommandError('Error adding file %s: %s' % (book.book_file, sys.exc_info()[1]))
                except:
                    if options['ignore_error']:
                        print('Error adding file %s: %s' % (book.book_file, sys.exc_info()[1]))
                        continue
                    raise CommandError('Error adding file %s: %s' % (book.book_file, sys.exc_info()[1]))
示例#6
0
    def handle(self, *args, **options):
        dirpath = options.get('dirpath')
        if not dirpath or not os.path.exists(dirpath):
            raise CommandError("%r is not a valid path" % dirpath)

        if os.path.isdir(dirpath):
            names = get_epubs(dirpath)
            for name in names:
                info = None
                try:
                    e = Epub(name)
                    info = e.get_info()
                    e.close()
                except:
                    print("%s is not a valid epub file" % name)
                    continue
                lang = Language.objects.filter(code=info.language)
                if not lang:
                    for data in langs:
                        if data[0] == info.language:
                            lang = Language()
                            lang.label = data[1]
                            lang.save()
                            break
                else:
                    lang = lang[0]

                #XXX: Hacks below
                if not info.title:
                    info.title = ''
                if not info.summary:
                    info.summary = ''
                if not info.creator:
                    info.creator = ''
                if not info.rights:
                    info.rights = ''
                if not info.date:
                    info.date = ''
                if not info.identifier:
                    info.identifier = {}
                if not info.identifier.get('value'):
                    info.identifier['value'] = ''

                f = open(name, "rb")
                sha = sha256_sum(open(name, "rb"))
                pub_status = Status.objects.get(status='Published')
                author = Author.objects.get_or_create(a_author=info.creator)[0]
                book = Book(
                    a_title=info.title,
                    a_author=author,
                    a_summary=info.summary,
                    file_sha256sum=sha,
                    a_rights=info.rights,
                    dc_identifier=info.identifier['value'].strip('urn:uuid:'),
                    dc_issued=info.date,
                    a_status=pub_status,
                    mimetype="application/epub+zip")
                try:
                    # Not sure why this errors, book_file.save exists
                    book.book_file.save(os.path.basename(name), File(f))  #pylint: disable=no-member
                    book.validate_unique()
                    book.save()
                # FIXME: Find a better way to do this.
                except IntegrityError as e:
                    if str(e) == "column file_sha256sum is not unique":
                        print(
                            "The book (", book.book_file,
                            ") was not saved because the file already exsists in the database."
                        )
                    else:
                        if options['ignore_error']:
                            print('Error adding file %s: %s' %
                                  (book.book_file, sys.exc_info()[1]))
                            continue
                        raise CommandError('Error adding file %s: %s' %
                                           (book.book_file, sys.exc_info()[1]))
                except:
                    if options['ignore_error']:
                        print('Error adding file %s: %s' %
                              (book.book_file, sys.exc_info()[1]))
                        continue
                    raise CommandError('Error adding file %s: %s' %
                                       (book.book_file, sys.exc_info()[1]))
示例#7
0
 def test_simple_import(self):
     epub = Epub("examples/The Dunwich Horror.epub")
     info = epub.get_info()
     self.assertEqual(info.title, "The Dunwich Horror")
     self.assertEqual(info.creator, "H. P. Lovecraft")
     epub.close()
示例#8
0
    def process_epub(self, filename, use_symlink=False):
        """Import a single EPUB from `filename`, creating a new `Book` based
        on the information parsed from the epub.

        :param filename: ePub file to process
        :param use_symlink: symlink ePub to FileField or process normally
        :return: success result
        """

        # Try to parse the epub file, extracting the relevant info.
        info_dict = {}
        tmp_cover_path = None
        try:
            epub = Epub(filename)
            epub.get_info()
            # Get the information we need for creating the Model.
            info_dict, tmp_cover_path, subjects = epub.as_model_dict()
            assert info_dict
        except Exception as e:
            self.stdout.write(self.style.ERROR(
                "Error while parsing '%s':\n%s" % (filename, unicode(e))))

            # TODO: this is not 100% reliable yet. Further modifications to
            # epub.py are needed.
            try:
                if tmp_cover_path:
                    os.remove(tmp_cover_path)
                # close() can fail itself it _zobject failed to be initialized.
                epub.close()
            except:
                pass
            return False

        # Prepare some model fields that require extra care.
        # Language (dc_language).
        try:
            language = models.Language.objects.get_or_create_by_code(
                info_dict['dc_language']
            )
            info_dict['dc_language'] = language
        except:
            info_dict['dc_language'] = None

        # Original filename (original_path).
        info_dict['original_path'] = filename
        # Published status (a_status).
        info_dict['a_status'] = models.Status.objects.get(
            status=settings.DEFAULT_BOOK_STATUS)

        # Remove authors and publishers from dict.
        authors = info_dict.pop('authors', [])
        publishers = info_dict.pop('publishers', [])

        # Create and save the Book.
        try:
            # Prepare the Book.
            book = models.Book(**info_dict)
            # Use a symlink or copy the file depending on options.
            if use_symlink:
                f = LinkableFile(open(filename))
            else:
                f = File(open(filename))
            book.book_file.save(os.path.basename(filename), f, save=False)
            book.file_sha256sum = models.sha256_sum(book.book_file)

            # Validate and save.
            book.full_clean()
            book.save()

            # Handle info that needs existing book instance thru book.save.
            # authors, publishers, cover, and tags

            # Add authors
            for author in authors:
                if author is not None:
                    author_split = author.strip().replace(
                        ' and ', ';').replace('&', ';').split(';')
                    for auth in author_split:
                        auth = fix_authors(auth)
                        if auth:
                            for a in auth if not \
                                    isinstance(auth, basestring) \
                                    else [auth]:
                                self.stdout.write(self.style.NOTICE(
                                    'Found author: "%s"' % a))
                                book.authors.add(
                                    models.Author.objects.get_or_create(
                                        name=a)[0].pk)

            # Add publishers
            for publisher in publishers:
                self.stdout.write(self.style.NOTICE(
                    'Found publisher: "%s"' % publisher))
                book.publishers.add(
                    models.Publisher.objects.get_or_create(
                        name=publisher)[0].pk)

            # Add cover image (cover_image). It is handled here as the filename
            # depends on instance.pk (which is only present after Book.save()).
            if tmp_cover_path:
                try:
                    cover_filename = '%s%s' % (
                        book.pk, os.path.splitext(tmp_cover_path)[1]
                    )
                    book.cover_img.save(cover_filename,
                                        File(open(tmp_cover_path)),
                                        save=True)
                except Exception as e:
                    self.stdout.write(self.style.WARNING(
                        'Error while saving cover image %s:\n%s' % (
                            tmp_cover_path, str(e))))
                    tmp_cover_path = None

            # Add subjects as tags
            for subject in (subjects or []):
                # workaround for ePubs with description as subject
                if not subject or len(subject) > 80:
                    break

                subject_split = subject.replace('/', ',') \
                    .replace(';', ',') \
                    .replace(':', '') \
                    .replace('\n', ',') \
                    .replace(' ,', ',') \
                    .replace(' ,', ',') \
                    .split(',')
                for tag in subject_split:
                    if tag is not ' ':
                        # The specs recommend using unicode for the tags, but
                        # do not enforce it. As a result, tags in exotic
                        # encodings might cause taggit to crash while trying to
                        # create the slug.
                        self.stdout.write(self.style.NOTICE(
                            'Found subject (tag): "%s"' % tag))
                        try:
                            book.tags.add(tag.lower().strip())
                        except:
                            try:
                                book.tags.add(
                                    tag.encode('utf-8').lower().strip())
                            except:
                                # No further efforts are made, and the tag is
                                # not added.
                                self.stdout.write(self.style.WARNING(
                                    'Tag could not be added'))
        except Exception as e:
            # Delete .epub file in media/, if `book` is a valid object.
            try:
                if os.path.isfile(book.book_file.path):
                    os.remove(book.book_file.path)
            except:
                pass

            if isinstance(e, ValidationError) and 'already exists' in str(e):
                self.stdout.write(self.style.WARNING(
                    'The book (%s) was not saved because the file already '
                    'exists in the database:\n%s' % (filename, str(e))))
                return False
            else:
                # TODO: check for possible risen exceptions at a finer grain.
                raise e
        finally:
            # Delete the temporary files.
            epub.close()
            if tmp_cover_path:
                os.remove(tmp_cover_path)

        return True