def test_textTrimming (self): sampleDocxFile = os.path.join(os.getcwd(), 'samples', 'docx', '07_trim.docx') docxProcessor = DocxProcessor(sampleDocxFile) document = docxProcessor.document() self.assertEqual('Paragraph with trailing and closing spaces', document.content()[0]) self.assertEqual('Paragraph with trailing and closing spaces', document.formatting()[0].text),
def test_textWithLineBlocks (self): sampleDocxFile = os.path.join(os.getcwd(), 'samples', 'docx', '08_line_blocks.docx') docxProcessor = DocxProcessor(sampleDocxFile) document = docxProcessor.document() expectedDocument = Document().initWithFile(os.path.join(os.getcwd(), 'samples', 'expected outcome', 'docx', 'test_08')) self.assertEquals(expectedDocument.content(), document.content()) self.assertEquals(expectedDocument.formatting(), document.formatting())
def test_textWithLineBreak (self): sampleDocxFile = os.path.join(os.getcwd(), 'samples', 'docx', '06_new_line_same_paragraph.docx') docxProcessor = DocxProcessor(sampleDocxFile) document = docxProcessor.document() self.assertEqual('Title', document.content()[0]) self.assertEqual('Regular paragraph.\rNew line on the same paragraph.', document.content()[1]) self.assertEqual('New paragraph.', document.content()[2])
def test_textWithWeirdFormatting (self): sampleDocxFile = os.path.join(os.getcwd(), 'samples', 'docx', '11_weird_formatting.docx') docxProcessor = DocxProcessor(sampleDocxFile) document = docxProcessor.document() expectedDocument = Document().initWithFile(os.path.join(os.getcwd(), 'samples', 'expected outcome', 'docx', 'test_11')) self.assertEquals(expectedDocument.content(), document.content()) self.assertEquals(expectedDocument.formatting(), document.formatting())
def test_formattingFragmentSorting (self): sampleDocxFile = os.path.join(os.getcwd(), 'samples', 'docx', '03_text_formatting.docx') docxProcessor = DocxProcessor(sampleDocxFile) document = docxProcessor.document() expectedFormatting = [ self.fragment(1, None, "I can do that\n", {'style': 'Heading 1'}), self.fragment(3, None, "I gotta piss\n", {'style': 'Heading 1'}), self.fragment(4, (133, 158), "Do you believe that shit?", {'formattings': ['underline']}), self.fragment(5, None, "We happy?\n", {'style': 'Heading 1'}), self.fragment(6, (118, 269), "Do you see a little Asian child with a blank expression on his face sitting outside on a mechanical helicopter that shakes when you put quarters in it?", {'formattings': ['bold']}), self.fragment(7, None, "Uuummmm, this is a tasty burger!\n", {'style': 'Heading 1'}), self.fragment(8, (335, 347), "m**********r", {'formattings': ['italic']}), self.fragment(8, (443, 455), "m**********r", {'formattings': ['italic']}), self.fragment(9, None, "I'm serious as a heart attack\n", {'style': 'Heading 1'}), self.fragment(10, (182, 194), "M**********r", {'formattings': ['italic']}) ] print("DOCUMENT FORMATTING:\n" + str(document.formatting())) self.assertIsInstance(document.formatting()[0], FragmentIdentifier) self.assertEqual(expectedFormatting, document.formatting())
def processDocx (docxFile, targetFolder): docxProcessor = DocxProcessor(docxFile) document = docxProcessor.document() document.writeTo(targetFolder)