Пример #1
0
class DocParser:
    def __init__(self, name, book_properties):
        self.name = name
        self.book = Book(book_properties)

    HTML_HEADER = """
    <html>
    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
    <head>
    Title
    </head>
    <body>
    """

    def parse(self, output=""):
        self.temp_dir = tempfile.mkdtemp()
        if output == "":
            output = self.temp_dir + "/tmp.html"

        os.system(
            PATH_TO_WV_WARE
            + "wvWare -x"
            + PATH_TO_WV_HTML
            + "/wvHtml.xml --charset=cp1251 %s > %s" % (self.name, output)
        )

        paragraphs = []
        # temp_file = self.file.decode("utf-8")
        file = open(self.temp_dir + "/tmp.html", "r")
        temp_file = prettify.remove_spaces(file.read())
        temp_file = prettify.remove_unnecessary_tags(temp_file)
        soup = BeautifulSoup(temp_file)
        temp_names = soup.findAll(align="center")
        names = []
        titles = []
        for temp_name in temp_names:
            if not re.match(r"^(<.*?>|\s+)*$", str(temp_name)):
                names.append(re.sub(r"\s+", " ", str(temp_name)))
                temp = re.sub(r"(<.*>|\s+)", " ", temp_name.prettify())
                titles.append(re.sub(r"\s+", " ", temp))

        temp_file = re.sub(r"\s+", " ", temp_file.decode("cp1251").encode("utf-8"))
        out = open(self.temp_dir + "/tmp", "w")
        out.write(temp_file)
        out.write("   \n\n\n")
        for name in names:
            out.write(name + "\n\n\n")
        out.close()

        if not names:
            print "not names"
            file = open(self.temp_dir + "/0.html", "w")
            file.write(temp_file)
            file.close()
            self.book.add_file(self.temp_dir + "/0.html", "c0", "")
        for i, name in enumerate(names):
            split_index = temp_file.find(name)
            if i == 0:
                paragraph = ""
            else:
                paragraph = self.HTML_HEADER

            paragraph += temp_file[:split_index]
            soup = BeautifulSoup(paragraph)
            paragraph = soup.prettify()
            paragraphs.append(paragraph)
            temp_file = temp_file[split_index:]
            # soup = BeautifulSoup(temp_file)
            # temp_file = soup.prettify()
        for i, paragraph in enumerate(paragraphs):
            file = open(self.temp_dir + "/%d.html" % i, "w")
            file.write(paragraph)
            file.close()
            self.book.add_file(self.temp_dir + "/%d.html" % i, "c%d" % i, titles[i])
        # for i, image in enumerate(self.images):
        #    self.book.add_file(image, 'im%d' % i, title="", in_spine=False)

        self.book.pack()
        return True
Пример #2
0
 def __init__(self, name, book_properties):
     self.name = name
     self.book = Book(book_properties)
Пример #3
0
def epub_form(request):
    """
    Displays form for converting from txt files and adding into db
    """
    messages = []
    if request.method == "POST":
        form = EpubAddForm(request.POST, request.FILES)
        if form.is_valid():
            cd = form.cleaned_data
            book_name = cd['caption']
            lang = cd['language']
            tags = cd['tags']
            subject = cd['subject']
            description = cd['description']
            type = cd['type']
            date = cd['date']
            rights = cd['rights']
            
            temp_name = tempfile.mkstemp()[1]
            properties = BookProperties(temp_name, book_name)
            properties.language = lang
            properties.subject = subject
            properties.description = description
            properties.genre = type
            properties.date = date
            properties.rights = rights
            properties.author = request.user.first_name + " " + request.user.last_name
            book = BookCreator(properties)
            file = request.FILES['file']
            toc = cd['toc']
            if toc:
                book.split_by_toc(toc, file.read())
            else:
                book.txt_to_html(file.read(), temp_name + "text.html")
                book.add_file(temp_name + "text.html", "c1", "")
                
            book.pack()

            try:
                language = Language.objects.get(short=lang)
            except:
                return render_response(request, "epub/epub_form.html", 
                                       {'errors': [_('No such language.')],
                                        'user': request.user, 'menu_on': True,
                                        'form': form})

            lang_code = LANG_CODE[0]
            for lang_code in LANG_CODE:
                if lang_code[0] == lang:
                    break
            
            book_model = Book.objects.create(language=language, pagelink="")
            book_model.title = book_name
            try:
                author = Author.objects.get(name=request.user.first_name + 
                                            ' ' + request.user.last_name)
            except:
                author = Author.objects.create(name=request.user.first_name + 
                                            ' ' + request.user.last_name)
            book_model.author = [author]
            book_model.save()
            
            ebook = EpubBook.objects.create(book = book_model)

            exfile = _ExistingFile(temp_name)
            ebook.name.save(book_name + ".epub", exfile)
            print ebook.name
            try:
                book_file = BookFile(link="/" + MEDIA_URL + "/" + quote(str(ebook.name)))
                book_file.type = 'epub'
                book_file.save()
            except Exception, e:
                print e
                return render_response(request, "epub/epub_form.html",
                                       {'errors': [_("Book name should be unique")],
                                        'menu_on': True, 'form': form})
            book_model.book_file = [book_file]
            book_model.save()
            
            
            ebook.save()
            
            messages += [_("Book successfully created.")]
            hrr =  HttpResponseRedirect("/book/id%d" % book_model.id)
            return hrr