def import_file(self, file_path, options={'scale_font_size': True}, **kwargs): # TODO: document this asap self.delegate.notifier = self.notifier self.broken_images = [] self.converted_images = [] book = self.book process_mode = kwargs.get('process_mode', 'overwrite') try: self.dfile = ooxml.read_from_file(file_path) if self.is_chapter_mode: chapter_content = serialize.serialize(self.dfile.document, self._serialize_options) self._import_single_chapter(self.chapter, chapter_content, process_mode) else: chapters = importer.get_chapters( self.dfile.document, options=options, serialize_options=self._serialize_options) self._import_chapters(book, chapters) # save attachments and tyles self._import_attachments(book, self.dfile.document) self._import_styles(book) self.dfile.close() self._check_for_elements() # trigger signal depending on the import mode # TODO: allow attaching user as sender on `book_imported` signal if self.is_chapter_mode: chapter_imported.send(sender=(self.user or self), chapter=self.chapter) else: book_imported.send(sender=self, book=book) except zipfile.BadZipfile: notif_msg = _( "The file could not be imported because it was not saved in the .docx format. Try to open the file in Word and save it as a .docx." ) # noqa self.notifier.error(notif_msg) except Exception as err: err_msg = _( "The docx file you uploaded contains errors and cannot be converted. Please contact customer support." ) # noqa self.notifier.error(err_msg) logger.exception("Error trying to import docx file. Msg: %s" % err)
import six import logging import ooxml from ooxml import parse, serialize, importer logging.basicConfig(filename='ooxml.log', level=logging.INFO) file_name = '../files/02_split.docx' dfile = ooxml.read_from_file(file_name) chapters = importer.get_chapters(dfile.document) for title, content in chapters: six.print_('====================================================================') six.print_(title) six.print_('====================================================================') six.print_(content)
import six import logging import ooxml from ooxml import parse, serialize, importer logging.basicConfig(filename='ooxml.log', level=logging.INFO) file_name = '../files/02_split.docx' dfile = ooxml.read_from_file(file_name) chapters = importer.get_chapters(dfile.document) for title, content in chapters: six.print_( '====================================================================') six.print_(title) six.print_( '====================================================================') six.print_(content)
def import_file(self, file_path, book, options=None): self.delegate.notifier = self.notifier self.broken_images = [] self.converted_images = [] def serialize_empty(ctx, document, elem, root): return root def serialize_endnote(ctx, document, el, root): # <sup class="endnote" data-id="1454855960556">1</sup> if el.rid not in self.endnotes: data_id = str(uuid.uuid1()).replace('-', '') self.endnotes[el.rid] = data_id else: data_id = self.endnotes[el.rid] note = lxml.etree.SubElement( root, 'sup', {'class': 'endnote', 'data-id': data_id}) note.text = '1' return root def serialize_footnote(ctx, document, el, root): # <sup class="endnote" data-id="1454855960556">1</sup> if el.rid not in self.footnotes: data_id = str(uuid.uuid1()).replace('-', '') self.footnotes[el.rid] = data_id else: data_id = self.footnotes[el.rid] note = lxml.etree.SubElement( root, 'sup', {'class': 'endnote', 'data-id': data_id}) note.text = '1' return root if not options: options = {'scale_font_size': True} try: self.dfile = ooxml.read_from_file(file_path) serialize_options = { 'embed_styles': True, 'embed_fontsize': True, # 'empty_paragraph_as_nbsp': True, 'serializers': { doc.Math: serialize_empty, doc.Footnote: serialize_footnote, doc.Endnote: serialize_endnote } } chapters = importer.get_chapters( self.dfile.document, options=options, serialize_options=serialize_options) self._import_attachments(book, self.dfile.document) self._import_chapters(book, chapters) # get the styles self._import_styles(book) self.dfile.close() self._check_for_elements() except zipfile.BadZipfile: notif_msg = _("The file could not be imported because it was not saved in the .docx format. Try to open the file in Word and save it as a .docx.") # noqa self.notifier.error(notif_msg) except Exception as err: err_msg = _("The docx file you uploaded contains errors and cannot be converted. Please contact customer support.") # noqa self.notifier.error(err_msg) logger.exception("Error trying to import docx file. Msg: %s" % err)
def import_file(self, file_path, book, options=None): self.delegate.notifier = self.notifier self.broken_images = [] self.converted_images = [] def serialize_empty(ctx, document, elem, root): return root def serialize_endnote(ctx, document, el, root): # <sup class="endnote" data-id="1454855960556">1</sup> if el.rid not in self.endnotes: data_id = str(uuid.uuid1()).replace('-', '') self.endnotes[el.rid] = data_id else: data_id = self.endnotes[el.rid] note = lxml.etree.SubElement(root, 'sup', { 'class': 'endnote', 'data-id': data_id }) note.text = '1' return root def serialize_footnote(ctx, document, el, root): # <sup class="endnote" data-id="1454855960556">1</sup> if el.rid not in self.footnotes: data_id = str(uuid.uuid1()).replace('-', '') self.footnotes[el.rid] = data_id else: data_id = self.footnotes[el.rid] note = lxml.etree.SubElement(root, 'sup', { 'class': 'endnote', 'data-id': data_id }) note.text = '1' return root if not options: options = {'scale_font_size': True} try: self.dfile = ooxml.read_from_file(file_path) # TODO: move this into a more customisable place. serialize_options = { 'header': docutils.DocHeaderContext, 'embed_styles': True, 'embed_fontsize': True, # 'empty_paragraph_as_nbsp': True, 'serializers': { doc.Math: serialize_empty, doc.Footnote: serialize_footnote, doc.Endnote: serialize_endnote }, 'hooks': { 'p': [docutils.hook_p], 'h': [docutils.check_h_tags_hook], 'table': [docutils.hook_infobox_table] } } chapters = importer.get_chapters( self.dfile.document, options=options, serialize_options=serialize_options) self._import_attachments(book, self.dfile.document) self._import_chapters(book, chapters) # get the styles self._import_styles(book) self.dfile.close() self._check_for_elements() except zipfile.BadZipfile: notif_msg = _( "The file could not be imported because it was not saved in the .docx format. Try to open the file in Word and save it as a .docx." ) # noqa self.notifier.error(notif_msg) except Exception as err: err_msg = _( "The docx file you uploaded contains errors and cannot be converted. Please contact customer support." ) # noqa self.notifier.error(err_msg) logger.exception("Error trying to import docx file. Msg: %s" % err)