def test_textTrimming (self):
		sampleDocxFile = os.path.join(os.getcwd(), 'samples', 'docx', '07_trim.docx')
		docxProcessor = DocxProcessor(sampleDocxFile)
		document = docxProcessor.document()
		
		self.assertEqual('Paragraph with trailing and closing spaces', document.content()[0])
		self.assertEqual('Paragraph with trailing and closing spaces', document.formatting()[0].text),
	def test_textWithLineBlocks (self):
		sampleDocxFile = os.path.join(os.getcwd(), 'samples', 'docx', '08_line_blocks.docx')
		docxProcessor = DocxProcessor(sampleDocxFile)
		document = docxProcessor.document()
		expectedDocument = Document().initWithFile(os.path.join(os.getcwd(), 'samples', 'expected outcome', 'docx', 'test_08'))

		self.assertEquals(expectedDocument.content(),	 document.content())
		self.assertEquals(expectedDocument.formatting(), document.formatting())
	def test_textWithLineBreak (self):
		sampleDocxFile = os.path.join(os.getcwd(), 'samples', 'docx', '06_new_line_same_paragraph.docx')
		docxProcessor = DocxProcessor(sampleDocxFile)
		document = docxProcessor.document()

		self.assertEqual('Title', document.content()[0])
		self.assertEqual('Regular paragraph.\rNew line on the same paragraph.', document.content()[1])
		self.assertEqual('New paragraph.', document.content()[2])
	def test_textWithWeirdFormatting (self):
		sampleDocxFile = os.path.join(os.getcwd(), 'samples', 'docx', '11_weird_formatting.docx')
		docxProcessor = DocxProcessor(sampleDocxFile)
		document = docxProcessor.document()
		expectedDocument = Document().initWithFile(os.path.join(os.getcwd(), 'samples', 'expected outcome', 'docx', 'test_11'))

		self.assertEquals(expectedDocument.content(), document.content())
		self.assertEquals(expectedDocument.formatting(), document.formatting())
	def test_formattingFragmentSorting (self):
		sampleDocxFile = os.path.join(os.getcwd(), 'samples', 'docx', '03_text_formatting.docx')
		docxProcessor = DocxProcessor(sampleDocxFile)
		document = docxProcessor.document()

		expectedFormatting = [
			self.fragment(1, None, "I can do that\n", {'style': 'Heading 1'}),
			self.fragment(3, None, "I gotta piss\n", {'style': 'Heading 1'}),
			self.fragment(4, (133, 158), "Do you believe that shit?", {'formattings': ['underline']}),
			self.fragment(5, None, "We happy?\n", {'style': 'Heading 1'}),
			self.fragment(6, (118, 269), "Do you see a little Asian child with a blank expression on his face sitting outside on a mechanical helicopter that shakes when you put quarters in it?", {'formattings': ['bold']}),
			self.fragment(7, None, "Uuummmm, this is a tasty burger!\n", {'style': 'Heading 1'}),
			self.fragment(8, (335, 347), "m**********r", {'formattings': ['italic']}),
			self.fragment(8, (443, 455), "m**********r", {'formattings': ['italic']}),
			self.fragment(9, None, "I'm serious as a heart attack\n", {'style': 'Heading 1'}),
			self.fragment(10, (182, 194), "M**********r", {'formattings': ['italic']})
		]

		print("DOCUMENT FORMATTING:\n" + str(document.formatting()))
		self.assertIsInstance(document.formatting()[0], FragmentIdentifier)
		self.assertEqual(expectedFormatting, document.formatting())
Пример #6
0
def processDocx (docxFile, targetFolder):
	docxProcessor = DocxProcessor(docxFile)
	document = docxProcessor.document()
	document.writeTo(targetFolder)