def testSplitBrokenData(self): icdPath = dev_test.getTestIcdPath("customers.ods") dataPath = dev_test.getTestInputPath("broken_customers.csv") exitCode = _cutplace.main(["test_cutplace.py", "--split", icdPath, dataPath]) self.assertEquals(exitCode, 1) acceptedDataPath = dev_test.getTestInputPath("broken_customers_accepted.csv") rejectedDataPath = dev_test.getTestInputPath("broken_customers_rejected.txt") self.assertNotEqual(os.path.getsize(acceptedDataPath), 0) self.assertNotEqual(os.path.getsize(rejectedDataPath), 0) os.remove(acceptedDataPath) os.remove(rejectedDataPath)
def testSplitValidData(self): icdPath = dev_test.getTestIcdPath("customers.ods") dataPath = dev_test.getTestInputPath("valid_customers_iso-8859-1.csv") exitCode = _cutplace.process(["test_cutplace.py", "--split", icdPath, dataPath]) self.assertEqual(exitCode, 0) acceptedDataPath = dev_test.getTestInputPath("valid_customers_iso-8859-1_accepted.csv") rejectedDataPath = dev_test.getTestInputPath("valid_customers_iso-8859-1_rejected.txt") self.assertNotEqual(os.path.getsize(acceptedDataPath), 0) self.assertEqual(os.path.getsize(rejectedDataPath), 0) os.remove(acceptedDataPath) os.remove(rejectedDataPath)
def testSplitBrokenData(self): icdPath = dev_test.getTestIcdPath("customers.ods") dataPath = dev_test.getTestInputPath("broken_customers.csv") exitCode = _cutplace.main( ["test_cutplace.py", "--split", icdPath, dataPath]) self.assertEquals(exitCode, 1) acceptedDataPath = dev_test.getTestInputPath( "broken_customers_accepted.csv") rejectedDataPath = dev_test.getTestInputPath( "broken_customers_rejected.txt") self.assertNotEqual(os.path.getsize(acceptedDataPath), 0) self.assertNotEqual(os.path.getsize(rejectedDataPath), 0) os.remove(acceptedDataPath) os.remove(rejectedDataPath)
def testSplitValidData(self): icdPath = dev_test.getTestIcdPath("customers.ods") dataPath = dev_test.getTestInputPath("valid_customers_iso-8859-1.csv") exitCode = _cutplace.process( ["test_cutplace.py", "--split", icdPath, dataPath]) self.assertEqual(exitCode, 0) acceptedDataPath = dev_test.getTestInputPath( "valid_customers_iso-8859-1_accepted.csv") rejectedDataPath = dev_test.getTestInputPath( "valid_customers_iso-8859-1_rejected.txt") self.assertNotEqual(os.path.getsize(acceptedDataPath), 0) self.assertEqual(os.path.getsize(rejectedDataPath), 0) os.remove(acceptedDataPath) os.remove(rejectedDataPath)
def testValidExcel(self): icd = createDefaultTestIcd(data.FORMAT_EXCEL) dataPath = dev_test.getTestInputPath("valid_customers.xls") icd.addValidationListener(_defaultIcdListener) try: icd.validate(dataPath) self.assertEqual(icd.rejectedCount, 0) self.assertEqual(icd.acceptedCount, 3) except _parsers.CutplaceXlrdImportError: _log.warning(u"ignored ImportError caused by missing xlrd") finally: icd.removeValidationListener(_defaultIcdListener) # TODO: Remove the line below once icd.validate() calls reset(). icd = createDefaultTestIcd(data.FORMAT_EXCEL) icd.dataFormat.set(data.KEY_SHEET, 2) icd.addValidationListener(_defaultIcdListener) try: icd.validate(dataPath) self.assertEqual(icd.rejectedCount, 0) self.assertEqual(icd.acceptedCount, 4) except _parsers.CutplaceXlrdImportError: _log.warning(u"ignored ImportError caused by missing xlrd") finally: icd.removeValidationListener(_defaultIcdListener)
def testCanCreateInterfaceControlDocument(self): def assertFieldTypeEquals(cidRows, fieldName, typeName): fieldRowIndex = None rowToExamineIndex = 0 while (rowToExamineIndex < len(cidRows)) and (fieldRowIndex is None): cidRowToExamine = cidRows[rowToExamineIndex] if (len(cidRowToExamine) >= 6) and (cidRowToExamine[0] == u"f") and (cidRowToExamine[1] == fieldName): fieldRowIndex = rowToExamineIndex else: rowToExamineIndex += 1 assert fieldRowIndex is not None, "field must be found in cid rows: %r <-- %s" % ( fieldName, cidRows) typeNameFromCidRow = cidRowToExamine[5] self.assertEqual(typeName, typeNameFromCidRow) testFileNames = [ "valid_customers.csv", "valid_customers.ods", "valid_customers.xls" ] for testFileName in testFileNames: testFilePath = dev_test.getTestInputPath(testFileName) testFile = open(testFilePath, "rb") try: cidRows = sniff.createCidRows(testFile) assertFieldTypeEquals(cidRows, u"column_a", u"Integer") # branch assertFieldTypeEquals(cidRows, u"column_c", u"Text") # first name assertFieldTypeEquals(cidRows, u"column_f", u"Text") # date of birth finally: testFile.close()
def testCanCreateInterfaceControlDocument(self): def assertFieldTypeEquals(cidRows, fieldName, typeName): fieldRowIndex = None rowToExamineIndex = 0 while (rowToExamineIndex < len(cidRows)) and (fieldRowIndex is None): cidRowToExamine = cidRows[rowToExamineIndex] if (len(cidRowToExamine) >= 6) and (cidRowToExamine[0] == u"f") and (cidRowToExamine[1] == fieldName): fieldRowIndex = rowToExamineIndex else: rowToExamineIndex += 1 assert fieldRowIndex is not None, "field must be found in cid rows: %r <-- %s" % (fieldName, cidRows) typeNameFromCidRow = cidRowToExamine[5] self.assertEqual(typeName, typeNameFromCidRow) testFileNames = ["valid_customers.csv", "valid_customers.ods", "valid_customers.xls"] for testFileName in testFileNames: testFilePath = dev_test.getTestInputPath(testFileName) testFile = open(testFilePath, "rb") try: cidRows = sniff.createCidRows(testFile) assertFieldTypeEquals(cidRows, u"column_a", u"Integer") # branch assertFieldTypeEquals(cidRows, u"column_c", u"Text") # first name assertFieldTypeEquals(cidRows, u"column_f", u"Text") # date of birth finally: testFile.close()
def testCanFindOutDelimitedOptions(self): fileNameToExpectedOptionsMap = { "valid_customers.csv": { sniff._ITEM_DELIMITER: ",", sniff._ESCAPE_CHARACTER: "\"", sniff._ENCODING: 'ascii', sniff._QUOTE_CHARACTER: "\"" }, } for testFileName, exptectedDelimitedOptions in fileNameToExpectedOptionsMap.items( ): testFilePath = dev_test.getTestInputPath(testFileName) testFile = open(testFilePath, "rb") try: actualDelimitedOptions = sniff.delimitedOptions(testFile) # Add actual line delimiter as expected. We cannot provide a proper expected line # delimiter in ``exptectedDelimitedOptions`` because the actual value depend on # the platform the repository has been checked out to. actualLineDelimiter = actualDelimitedOptions["lineDelimiter"] self.assertTrue(actualLineDelimiter) exptectedDelimitedOptions[ "lineDelimiter"] = actualLineDelimiter self.assertEqual(actualDelimitedOptions, exptectedDelimitedOptions, \ "data format for file must be %r but is %r: %r" % (exptectedDelimitedOptions, actualDelimitedOptions, testFilePath)) finally: testFile.close()
def testCanFindOutDelimitedOptions(self): fileNameToExpectedOptionsMap = { "valid_customers.csv": { sniff._ITEM_DELIMITER: ",", sniff._ESCAPE_CHARACTER: '"', sniff._ENCODING: "ascii", sniff._QUOTE_CHARACTER: '"', } } for testFileName, exptectedDelimitedOptions in fileNameToExpectedOptionsMap.items(): testFilePath = dev_test.getTestInputPath(testFileName) testFile = open(testFilePath, "rb") try: actualDelimitedOptions = sniff.delimitedOptions(testFile) # Add actual line delimiter as expected. We cannot provide a proper expected line # delimiter in ``exptectedDelimitedOptions`` because the actual value depend on # the platform the repository has been checked out to. actualLineDelimiter = actualDelimitedOptions["lineDelimiter"] self.assertTrue(actualLineDelimiter) exptectedDelimitedOptions["lineDelimiter"] = actualLineDelimiter self.assertEqual( actualDelimitedOptions, exptectedDelimitedOptions, "data format for file must be %r but is %r: %r" % (exptectedDelimitedOptions, actualDelimitedOptions, testFilePath), ) finally: testFile.close()
def testCanValidateDataWithPlugins(self): icdPath = dev_test.getTestIcdPath("customers_with_plugins.ods") dataPath = dev_test.getTestInputPath("valid_customers.csv") exitCode = _cutplace.main([ "test_cutplace.py", "--plugins", dev_test.getTestPluginsPath(), icdPath, dataPath ]) self.assertEqual(exitCode, 0)
def testCanSniffAndValidateUsingMainWithHeaderAndEncoding(self): testIcdPath = dev_test.getTestOutputPath("icd_sniffed_valid_customers_with_header_iso-8859-15.csv") testDataPath = dev_test.getTestInputPath("valid_customers_with_header_iso-8859-15.csv") exitCode = _cutsniff.main(["test", "--data-encoding", "iso-8859-15", "--head", "1", testIcdPath, testDataPath]) self.assertEqual(exitCode, 0) sniffedIcd = interface.InterfaceControlDocument() sniffedIcd.read(testIcdPath) for _ in interface.validatedRows(sniffedIcd, testDataPath): pass
def testCanSniffAndValidateUsingMain(self): testIcdPath = dev_test.getTestOutputPath("icd_sniffed_valid_customers.csv") testDataPath = dev_test.getTestInputPath("valid_customers.csv") exitCode = _cutsniff.main(["test", testIcdPath, testDataPath]) self.assertEqual(exitCode, 0) sniffedIcd = interface.InterfaceControlDocument() sniffedIcd.read(testIcdPath) for _ in interface.validatedRows(sniffedIcd, testDataPath): pass
def testCanSniffAndValidateUsingMain(self): testIcdPath = dev_test.getTestOutputPath( "icd_sniffed_valid_customers.csv") testDataPath = dev_test.getTestInputPath("valid_customers.csv") exitCode = _cutsniff.main(["test", testIcdPath, testDataPath]) self.assertEqual(exitCode, 0) sniffedIcd = interface.InterfaceControlDocument() sniffedIcd.read(testIcdPath) for _ in interface.validatedRows(sniffedIcd, testDataPath): pass
def testCanSniffStandardFieldFormats(self): testFilePath = dev_test.getTestInputPath("valid_alltypes.csv") with open(testFilePath, "rb") as testFile: cid = sniff.createCid(testFile, header=1) self.assertEqual(cid.fieldNames, [u"customer_id", u"short_name", u"gender", u"date_of_birth", u"balance"]) self.assertEqual(cid.fieldFormatFor("customer_id").__class__, fields.IntegerFieldFormat) self.assertEqual(cid.fieldFormatFor("short_name").__class__, fields.TextFieldFormat) self.assertEqual(cid.fieldFormatFor("gender").__class__, fields.TextFieldFormat) self.assertEqual(cid.fieldFormatFor("date_of_birth").__class__, fields.TextFieldFormat) self.assertEqual(cid.fieldFormatFor("balance").__class__, fields.DecimalFieldFormat)
def testConsumerProducer(self): testInPath = dev_test.getTestInputPath("valid_customers.ods") contentXmlReadable = _ods.odsContent(testInPath) rowQueue = Queue.Queue() producer = _ods.ProducerThread(contentXmlReadable, rowQueue) producer.start() hasRow = True while hasRow: row = rowQueue.get() if row is None: hasRow = False producer.join()
def testCanCreateSniffedReader(self): testFileNames = ["valid_customers.csv", "valid_customers.ods", "valid_customers.xls"] for testFileName in testFileNames: testFilePath = dev_test.getTestInputPath(testFileName) testFile = open(testFilePath, "rb") try: reader = sniff.createReader(testFile) rowCount = 0 for _ in reader: rowCount += 1 self.assertTrue(rowCount > 0) finally: testFile.close()
def testCanSniffAndValidateUsingMainWithHeaderAndEncoding(self): testIcdPath = dev_test.getTestOutputPath( "icd_sniffed_valid_customers_with_header_iso-8859-15.csv") testDataPath = dev_test.getTestInputPath( "valid_customers_with_header_iso-8859-15.csv") exitCode = _cutsniff.main([ "test", "--data-encoding", "iso-8859-15", "--head", "1", testIcdPath, testDataPath ]) self.assertEqual(exitCode, 0) sniffedIcd = interface.InterfaceControlDocument() sniffedIcd.read(testIcdPath) for _ in interface.validatedRows(sniffedIcd, testDataPath): pass
def testCanCreateSniffedReader(self): testFileNames = [ "valid_customers.csv", "valid_customers.ods", "valid_customers.xls" ] for testFileName in testFileNames: testFilePath = dev_test.getTestInputPath(testFileName) testFile = open(testFilePath, "rb") try: reader = sniff.createReader(testFile) rowCount = 0 for _ in reader: rowCount += 1 self.assertTrue(rowCount > 0) finally: testFile.close()
def testCellValue(self): fieldTypesXlsPath = dev_test.getTestInputPath("fieldtypes.xls") readable = open(fieldTypesXlsPath, "rb") try: titleRowSkipped = False for row in _parsers.excelReader(readable): self.assertTrue(row is not None) self.assertTrue(len(row) == 3, "row=%r" % row) if titleRowSkipped: self.assertEqual(row[1], row[2], "row=%r" % row) else: titleRowSkipped = True except _parsers.CutplaceXlrdImportError: _log.warning("ignored ImportError caused by missing xlrd") finally: readable.close()
def testCanSniffAndValidateUsingMainWithFieldNames(self): testIcdPath = dev_test.getTestOutputPath( "icd_sniffed_valid_customers.csv") testDataPath = dev_test.getTestInputPath("valid_customers.csv") exitCode = _cutsniff.main([ "test", "--names", " branchId,customerId, firstName,surName ,gender,dateOfBirth ", testIcdPath, testDataPath ]) self.assertEqual(exitCode, 0) sniffedIcd = interface.InterfaceControlDocument() sniffedIcd.read(testIcdPath) self.assertEqual(sniffedIcd.fieldNames, [ "branchId", "customerId", "firstName", "surName", "gender", "dateOfBirth" ]) for _ in interface.validatedRows(sniffedIcd, testDataPath): pass
def testCanSniffAndValidateUsingMainWithFieldNames(self): testIcdPath = dev_test.getTestOutputPath("icd_sniffed_valid_customers.csv") testDataPath = dev_test.getTestInputPath("valid_customers.csv") exitCode = _cutsniff.main( [ "test", "--names", " branchId,customerId, firstName,surName ,gender,dateOfBirth ", testIcdPath, testDataPath, ] ) self.assertEqual(exitCode, 0) sniffedIcd = interface.InterfaceControlDocument() sniffedIcd.read(testIcdPath) self.assertEqual( sniffedIcd.fieldNames, ["branchId", "customerId", "firstName", "surName", "gender", "dateOfBirth"] ) for _ in interface.validatedRows(sniffedIcd, testDataPath): pass
def testCanFindOutSniffDataFormat(self): fileNameToExpectedFormatMap = { "valid_customers.csv": data.FORMAT_DELIMITED, "valid_customers.ods": data.FORMAT_ODS, "valid_customers.xls": data.FORMAT_EXCEL, } for testFileName, exptectedDataFormatName in fileNameToExpectedFormatMap.items(): testFilePath = dev_test.getTestInputPath(testFileName) testFile = open(testFilePath, "rb") try: actualDataFormat = sniff.createDataFormat(testFile) actualDataFormatName = actualDataFormat.name self.assertEqual( actualDataFormatName, exptectedDataFormatName, "data format for file must be %r but is %r: %r" % (exptectedDataFormatName, actualDataFormatName, testFilePath), ) finally: testFile.close()
def testCanFindOutSniffDataFormat(self): fileNameToExpectedFormatMap = { "valid_customers.csv": data.FORMAT_DELIMITED, "valid_customers.ods": data.FORMAT_ODS, "valid_customers.xls": data.FORMAT_EXCEL, } for testFileName, exptectedDataFormatName in fileNameToExpectedFormatMap.items( ): testFilePath = dev_test.getTestInputPath(testFileName) testFile = open(testFilePath, "rb") try: actualDataFormat = sniff.createDataFormat(testFile) actualDataFormatName = actualDataFormat.name self.assertEqual( actualDataFormatName, exptectedDataFormatName, "data format for file must be %r but is %r: %r" % (exptectedDataFormatName, actualDataFormatName, testFilePath)) finally: testFile.close()
def testValidOds(self): icd = createDefaultTestIcd(data.FORMAT_ODS) dataPath = dev_test.getTestInputPath("valid_customers.ods") icd.addValidationListener(_defaultIcdListener) try: icd.validate(dataPath) self.assertEqual(icd.rejectedCount, 0) self.assertEqual(icd.acceptedCount, 3) finally: icd.removeValidationListener(_defaultIcdListener) # TODO: Remove the line below once icd.validate() calls reset(). icd = createDefaultTestIcd(data.FORMAT_ODS) icd.dataFormat.set(data.KEY_SHEET, 2) icd.addValidationListener(_defaultIcdListener) try: icd.validate(dataPath) self.assertEqual(icd.rejectedCount, 0) self.assertEqual(icd.acceptedCount, 4) finally: icd.removeValidationListener(_defaultIcdListener)
def testCanSniffStandardFieldFormats(self): testFilePath = dev_test.getTestInputPath("valid_alltypes.csv") with open(testFilePath, "rb") as testFile: cid = sniff.createCid(testFile, header=1) self.assertEqual(cid.fieldNames, [ u'customer_id', u'short_name', u'gender', u'date_of_birth', u'balance' ]) self.assertEqual( cid.fieldFormatFor('customer_id').__class__, fields.IntegerFieldFormat) self.assertEqual( cid.fieldFormatFor('short_name').__class__, fields.TextFieldFormat) self.assertEqual( cid.fieldFormatFor('gender').__class__, fields.TextFieldFormat) self.assertEqual( cid.fieldFormatFor('date_of_birth').__class__, fields.TextFieldFormat) self.assertEqual( cid.fieldFormatFor('balance').__class__, fields.DecimalFieldFormat)
def testConvertToCsv(self): testInPath = dev_test.getTestInputPath("valid_customers.ods") testOutPath = dev_test.getTestOutputPath("valid_customers_from__ods.csv") _ods.main([testInPath, testOutPath])
def testValidNativeExcelFormats(self): icdPath = dev_test.getTestIcdPath("native_excel_formats.ods") dataPath = dev_test.getTestInputPath("valid_native_excel_formats.xls") exitCode = _cutplace.main(["test_cutplace.py", icdPath, dataPath]) self.assertEqual(exitCode, 0)
def testBrokenSheet(self): testInPath = dev_test.getTestInputPath("valid_customers.ods") testOutPath = dev_test.getTestOutputPath("valid_customers_from__ods.csv") self.assertRaises(SystemExit, _ods.main, ["--sheet=x", testInPath, testOutPath]) self.assertRaises(SystemExit, _ods.main, ["--sheet=0", testInPath, testOutPath])
def testCanValidateDataWithPlugins(self): icdPath = dev_test.getTestIcdPath("customers_with_plugins.ods") dataPath = dev_test.getTestInputPath("valid_customers.csv") exitCode = _cutplace.main(["test_cutplace.py", "--plugins", dev_test.getTestPluginsPath(), icdPath, dataPath]) self.assertEqual(exitCode, 0)
def setUp(self): self._validCostumersCsvPath = dev_test.getTestInputPath("valid_customers.csv") self._brokenCostumersCsvPath = dev_test.getTestInputPath("broken_customers.csv") icdPath = dev_test.getTestIcdPath("customers.csv") self._icd = interface.InterfaceControlDocument() self._icd.read(icdPath)
def testBrokenKinkyFileName(self): testInPath = dev_test.getTestInputPath("valid_customers.ods") testOutPath = dev_test.getTestOutputPath("kinky_file_name//\\:^$\\::/") self.assertRaises(SystemExit, _ods.main, [testInPath, testOutPath])
def testConvertToRst(self): testInPath = dev_test.getTestInputPath("valid_customers.ods") testOutPath = dev_test.getTestOutputPath("valid_customers_from__ods.rst") _ods.main(["--format=rst", testInPath, testOutPath])
def testValidFixedTxt(self): icdPath = dev_test.getTestIcdPath("customers_fixed.ods") dataPath = dev_test.getTestInputPath("valid_customers_fixed.txt") exitCode = _cutplace.main(["test_cutplace.py", icdPath, dataPath]) self.assertEqual(exitCode, 0)
def testConvertToDocBook(self): testInPath = dev_test.getTestInputPath("valid_customers.ods") testOutPath = dev_test.getTestOutputPath("valid_customers_from__ods.xml") _ods.main(["--format=docbook", testInPath, testOutPath])