Exemplo n.º 1
0
    def process_epub(self, filename, replace_strategy='original'):
        """Parse a single EPUB from `filename`, updating `Book` if `filename`
        already exists on the database:
            - the `original_path` is updated to point to `filename`.
            - if `book_file` is a symlink, the old symlink is deleted and a new
            one pointing at `filename` is created.

        Returns a tuple (Book, success), where Book will be None if not found.
        """
        # Check the sha256sum and try to find the Book.
        file_sha256sum = models.sha256_sum(File(open(filename)))
        try:
            book = models.Book.objects.get(file_sha256sum=file_sha256sum)
        except models.Book.DoesNotExist:
            return None, False

        # Prepare the changes.
        info_dict = {'original_path': filename}
        if replace_strategy == 'always-link':
            # Remove previous file/link and create a new link.
            book.book_file.delete()
            info_dict['book_file'] = LinkableFile(open(filename))
            pass
        elif replace_strategy == 'always-copy':
            # Remove previous file only if it was a link, create a new *copy*.
            if os.path.islink(os.path.join(settings.MEDIA_ROOT,
                                           book.book_file.path)):
                book.book_file.delete()
                info_dict['book_file'] = File(open(filename))
        else:  # 'default'
            # Remove previous file only if it was a link, create a new *link*..
            if os.path.islink(os.path.join(settings.MEDIA_ROOT,
                                           book.book_file.path)):
                book.book_file.delete()
                info_dict['book_file'] = LinkableFile(open(filename))

        # Save and return the Book.
        # TODO: if save fails, files might have been deleted and leave the
        # model in inconsistent state - it's probably a better idea to delete
        # files *after* a successful book.save().
        try:
            if 'book_file' in info_dict:
                book.book_file.save(os.path.basename(filename),
                                    info_dict['book_file'],
                                    save=False)
            book.original_path = info_dict['original_path']
            book.save()
            book.refresh_from_db()

            return book, True
        except Exception as e:
            # TODO: check for possible risen exceptions at a finer grain.
            raise e

        return book, False
Exemplo n.º 2
0
    def handle(self, *args, **options):
        dirpath = options.get('dirpath')
        if not dirpath or not os.path.exists(dirpath):
            raise CommandError("%r is not a valid path" % dirpath)

        if os.path.isdir(dirpath):
            names = get_epubs(dirpath)
            for name in names:
                info = None
                try:
                    e = Epub(name)
                    info = e.get_info()
                    e.close()
                except:
                    print("%s is not a valid epub file" % name)
                    continue
                lang = Language.objects.filter(code=info.language)
                if not lang:
                    for data in langs:
                        if data[0] == info.language:
                            lang = Language()
                            lang.label = data[1]
                            lang.save()
                            break
                else:
                    lang = lang[0]

                #XXX: Hacks below
                if not info.title:
                    info.title = ''
                if not info.summary:
                    info.summary = ''
                if not info.creator:
                    info.creator = ''
                if not info.rights:
                    info.rights = ''
                if not info.date:
                    info.date = ''
                if not info.identifier:
                    info.identifier = {}
                if not info.identifier.get('value'):
                    info.identifier['value'] = ''

                f = open(name, "rb")
                sha = sha256_sum(open(name, "rb"))
                pub_status = Status.objects.get(status='Published')
                author = Author.objects.get_or_create(a_author=info.creator)[0]
                book = Book(
                    a_title=info.title,
                    a_author=author,
                    a_summary=info.summary,
                    file_sha256sum=sha,
                    a_rights=info.rights,
                    dc_identifier=info.identifier['value'].strip('urn:uuid:'),
                    dc_issued=info.date,
                    a_status=pub_status,
                    mimetype="application/epub+zip")
                try:
                    # Not sure why this errors, book_file.save exists
                    book.book_file.save(os.path.basename(name), File(f))  #pylint: disable=no-member
                    book.validate_unique()
                    book.save()
                # FIXME: Find a better way to do this.
                except IntegrityError as e:
                    if str(e) == "column file_sha256sum is not unique":
                        print(
                            "The book (", book.book_file,
                            ") was not saved because the file already exsists in the database."
                        )
                    else:
                        if options['ignore_error']:
                            print('Error adding file %s: %s' %
                                  (book.book_file, sys.exc_info()[1]))
                            continue
                        raise CommandError('Error adding file %s: %s' %
                                           (book.book_file, sys.exc_info()[1]))
                except:
                    if options['ignore_error']:
                        print('Error adding file %s: %s' %
                              (book.book_file, sys.exc_info()[1]))
                        continue
                    raise CommandError('Error adding file %s: %s' %
                                       (book.book_file, sys.exc_info()[1]))
Exemplo n.º 3
0
    def handle(self, *args, **options):
        dirpath = options.get('dirpath')
        if not dirpath or not os.path.exists(dirpath):
            raise CommandError("%r is not a valid path" % dirpath)


        if os.path.isdir(dirpath):
            names = get_epubs(dirpath)
            for name in names:
                info = None
                try:
                    e = Epub(name)
                    info = e.get_info()
                    e.close()
                except:
                    print("%s is not a valid epub file" % name)
                    continue
                lang = Language.objects.filter(code=info.language)
                if not lang:
                    for data in langs:
                        if data[0] == info.language:
                            lang = Language()
                            lang.label = data[1]
                            lang.save()
                            break
                else:
                    lang = lang[0]

                #XXX: Hacks below
                if not info.title:
                    info.title = ''
                if not info.summary:
                    info.summary = ''
                if not info.creator:
                    info.creator = ''
                if not info.rights:
                    info.rights = ''
                if not info.date:
                    info.date = ''
                if not info.identifier:
                    info.identifier = {}
                if not info.identifier.get('value'):
                    info.identifier['value'] = ''

                f = open(name, "rb")
                sha = sha256_sum(open(name, "rb"))
                pub_status = Status.objects.get(status='Published')
                author = Author.objects.get_or_create(a_author=info.creator)[0]
                book = Book(a_title = info.title,
                        a_author = author, a_summary = info.summary,
                        file_sha256sum=sha,
                        a_rights = info.rights, dc_identifier = info.identifier['value'].strip('urn:uuid:'),
                        dc_issued = info.date,
                        a_status = pub_status, mimetype="application/epub+zip")
                try:
                    # Not sure why this errors, book_file.save exists
                    book.book_file.save(os.path.basename(name), File(f)) #pylint: disable=no-member
                    book.validate_unique()
                    book.save()
                # FIXME: Find a better way to do this.
                except IntegrityError as e:
                    if str(e) == "column file_sha256sum is not unique":
                        print("The book (", book.book_file, ") was not saved because the file already exsists in the database.")
                    else:
                        if options['ignore_error']:
                            print('Error adding file %s: %s' % (book.book_file, sys.exc_info()[1]))
                            continue
                        raise CommandError('Error adding file %s: %s' % (book.book_file, sys.exc_info()[1]))
                except:
                    if options['ignore_error']:
                        print('Error adding file %s: %s' % (book.book_file, sys.exc_info()[1]))
                        continue
                    raise CommandError('Error adding file %s: %s' % (book.book_file, sys.exc_info()[1]))
Exemplo n.º 4
0
    def process_epub(self, filename, use_symlink=False):
        """Import a single EPUB from `filename`, creating a new `Book` based
        on the information parsed from the epub.

        :param filename: ePub file to process
        :param use_symlink: symlink ePub to FileField or process normally
        :return: success result
        """

        # Try to parse the epub file, extracting the relevant info.
        info_dict = {}
        tmp_cover_path = None
        try:
            epub = Epub(filename)
            epub.get_info()
            # Get the information we need for creating the Model.
            info_dict, tmp_cover_path, subjects = epub.as_model_dict()
            assert info_dict
        except Exception as e:
            self.stdout.write(self.style.ERROR(
                "Error while parsing '%s':\n%s" % (filename, unicode(e))))

            # TODO: this is not 100% reliable yet. Further modifications to
            # epub.py are needed.
            try:
                if tmp_cover_path:
                    os.remove(tmp_cover_path)
                # close() can fail itself it _zobject failed to be initialized.
                epub.close()
            except:
                pass
            return False

        # Prepare some model fields that require extra care.
        # Language (dc_language).
        try:
            language = models.Language.objects.get_or_create_by_code(
                info_dict['dc_language']
            )
            info_dict['dc_language'] = language
        except:
            info_dict['dc_language'] = None

        # Original filename (original_path).
        info_dict['original_path'] = filename
        # Published status (a_status).
        info_dict['a_status'] = models.Status.objects.get(
            status=settings.DEFAULT_BOOK_STATUS)

        # Remove authors and publishers from dict.
        authors = info_dict.pop('authors', [])
        publishers = info_dict.pop('publishers', [])

        # Create and save the Book.
        try:
            # Prepare the Book.
            book = models.Book(**info_dict)
            # Use a symlink or copy the file depending on options.
            if use_symlink:
                f = LinkableFile(open(filename))
            else:
                f = File(open(filename))
            book.book_file.save(os.path.basename(filename), f, save=False)
            book.file_sha256sum = models.sha256_sum(book.book_file)

            # Validate and save.
            book.full_clean()
            book.save()

            # Handle info that needs existing book instance thru book.save.
            # authors, publishers, cover, and tags

            # Add authors
            for author in authors:
                if author is not None:
                    author_split = author.strip().replace(
                        ' and ', ';').replace('&', ';').split(';')
                    for auth in author_split:
                        auth = fix_authors(auth)
                        if auth:
                            for a in auth if not \
                                    isinstance(auth, basestring) \
                                    else [auth]:
                                self.stdout.write(self.style.NOTICE(
                                    'Found author: "%s"' % a))
                                book.authors.add(
                                    models.Author.objects.get_or_create(
                                        name=a)[0].pk)

            # Add publishers
            for publisher in publishers:
                self.stdout.write(self.style.NOTICE(
                    'Found publisher: "%s"' % publisher))
                book.publishers.add(
                    models.Publisher.objects.get_or_create(
                        name=publisher)[0].pk)

            # Add cover image (cover_image). It is handled here as the filename
            # depends on instance.pk (which is only present after Book.save()).
            if tmp_cover_path:
                try:
                    cover_filename = '%s%s' % (
                        book.pk, os.path.splitext(tmp_cover_path)[1]
                    )
                    book.cover_img.save(cover_filename,
                                        File(open(tmp_cover_path)),
                                        save=True)
                except Exception as e:
                    self.stdout.write(self.style.WARNING(
                        'Error while saving cover image %s:\n%s' % (
                            tmp_cover_path, str(e))))
                    tmp_cover_path = None

            # Add subjects as tags
            for subject in (subjects or []):
                # workaround for ePubs with description as subject
                if not subject or len(subject) > 80:
                    break

                subject_split = subject.replace('/', ',') \
                    .replace(';', ',') \
                    .replace(':', '') \
                    .replace('\n', ',') \
                    .replace(' ,', ',') \
                    .replace(' ,', ',') \
                    .split(',')
                for tag in subject_split:
                    if tag is not ' ':
                        # The specs recommend using unicode for the tags, but
                        # do not enforce it. As a result, tags in exotic
                        # encodings might cause taggit to crash while trying to
                        # create the slug.
                        self.stdout.write(self.style.NOTICE(
                            'Found subject (tag): "%s"' % tag))
                        try:
                            book.tags.add(tag.lower().strip())
                        except:
                            try:
                                book.tags.add(
                                    tag.encode('utf-8').lower().strip())
                            except:
                                # No further efforts are made, and the tag is
                                # not added.
                                self.stdout.write(self.style.WARNING(
                                    'Tag could not be added'))
        except Exception as e:
            # Delete .epub file in media/, if `book` is a valid object.
            try:
                if os.path.isfile(book.book_file.path):
                    os.remove(book.book_file.path)
            except:
                pass

            if isinstance(e, ValidationError) and 'already exists' in str(e):
                self.stdout.write(self.style.WARNING(
                    'The book (%s) was not saved because the file already '
                    'exists in the database:\n%s' % (filename, str(e))))
                return False
            else:
                # TODO: check for possible risen exceptions at a finer grain.
                raise e
        finally:
            # Delete the temporary files.
            epub.close()
            if tmp_cover_path:
                os.remove(tmp_cover_path)

        return True