def testSqlSimple(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) filePath = os.path.join(_scratchDir, "diet.db") pdf.sql.write_file(panDat, filePath) sqlPanDat = pdf.sql.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, sqlPanDat)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) pdf2.sql.write_file(panDat, filePath) sqlPanDat = pdf2.sql.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, sqlPanDat)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) filePath = os.path.join(_scratchDir, "netflow.db") pdf.sql.write_file(panDat, filePath) panDat2 = pdf.sql.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) sqlPanDat = pdf2.sql.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, sqlPanDat))
def testThree(self): objOrig = netflowData() staticFactory = TicDatFactory(**netflowSchema()) goodTable = lambda t : lambda _t : staticFactory.good_tic_dat_table(_t, t) tables = set(staticFactory.primary_key_fields) ticDat = staticFactory.freeze_me(staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables})) self.assertTrue(staticFactory.good_tic_dat_object(ticDat)) for t in tables : self._assertSame(getattr(objOrig, t), getattr(ticDat,t), goodTable(t)) objOrig.commodities.append(12.3) objOrig.arcs[(1, 2)] = [12] self._assertSame(objOrig.nodes, ticDat.nodes, goodTable("nodes")) self._assertSame(objOrig.cost, ticDat.cost, goodTable("cost")) self.assertTrue(firesException(lambda : self._assertSame( objOrig.commodities, ticDat.commodities, goodTable("commodities")) )) self.assertTrue(firesException(lambda : self._assertSame( objOrig.arcs, ticDat.arcs, goodTable("arcs")) )) ticDat = staticFactory.freeze_me(staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables})) for t in tables : self._assertSame(getattr(objOrig, t), getattr(ticDat,t), goodTable(t)) self.assertTrue(ticDat.arcs[1, 2]["capacity"] == 12) self.assertTrue(12.3 in ticDat.commodities) objOrig.cost[5]=5 self.assertTrue("cost cannot be treated as a ticDat table : Inconsistent key lengths" in firesException(lambda : staticFactory.freeze_me(staticFactory.TicDat (**{t:getattr(objOrig,t) for t in tables})))) objOrig = netflowData() def editMeBadly(t) : def rtn() : t.cost["hack"] = 12 return rtn def editMeWell(t) : def rtn() : t.cost["hack", "my", "balls"] = 12.12 return rtn self.assertTrue(all(firesException(editMeWell(t)) and firesException(editMeBadly(t)) for t in (ticDat, staticFactory.freeze_me(staticFactory.TicDat())))) def attributeMe(t) : def rtn() : t.boger="bogerwoger" return rtn self.assertTrue(firesException(attributeMe(ticDat)) and firesException(attributeMe( staticFactory.freeze_me(staticFactory.TicDat())))) mutable = staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables}) for t in tables : self._assertSame(getattr(objOrig, t), getattr(mutable,t), goodTable(t)) self.assertTrue(firesException(editMeBadly(mutable))) self.assertFalse(firesException(editMeWell(mutable)) or firesException(attributeMe(mutable))) self.assertTrue(firesException(lambda : self._assertSame( objOrig.cost, mutable.cost, goodTable("cost")) ))
def testDictConstructions(self): tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) panDat2 = pdf.PanDat(**{t:getattr(panDat, t).to_dict() for t in pdf.all_tables}) panDat3 = pdf.PanDat(**{t:getattr(panDat, t).to_dict(orient="list") for t in pdf.all_tables}) panDat3_1 = pdf.PanDat(**{t:list(map(list, getattr(panDat, t).itertuples(index=False))) for t in pdf.all_tables}) self.assertTrue(all(pdf._same_data(panDat, _) for _ in [panDat2, panDat3, panDat3_1])) panDat.foods["extra"] = 12 panDat4 = pdf.PanDat(**{t:getattr(panDat, t).to_dict(orient="list") for t in pdf.all_tables}) self.assertTrue(pdf._same_data(panDat, panDat4)) self.assertTrue(set(panDat4.foods["extra"]) == {12}) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) panDat2 = pdf.PanDat(**{t:getattr(panDat, t).to_dict() for t in pdf.all_tables}) panDat3 = pdf.PanDat(**{t:getattr(panDat, t).to_dict(orient="records") for t in pdf.all_tables}) self.assertTrue(all(pdf._same_data(panDat, _) for _ in [panDat2, panDat3])) panDat.cost["extra"] = "boger" panDat4 = pdf.PanDat(**{t:getattr(panDat, t).to_dict(orient="list") for t in pdf.all_tables}) self.assertTrue(pdf._same_data(panDat, panDat4)) self.assertTrue(set(panDat4.cost["extra"]) == {"boger"})
def testXlsSimple(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) filePath = os.path.join(_scratchDir, "diet.xlsx") pdf.xls.write_file(panDat, filePath) xlsPanDat = pdf.xls.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, xlsPanDat)) pdf_shrunk = PanDatFactory(**{ k: v for k, v in dietSchema().items() if k != "nutritionQuantities" }) self.assertTrue(len(pdf_shrunk.all_tables) == len(pdf.all_tables) - 1) xlsPanDatShrunk = pdf_shrunk.xls.create_pan_dat(filePath) self.assertTrue(pdf_shrunk._same_data(panDat, xlsPanDatShrunk)) filePathShrunk = os.path.join(_scratchDir, "diet_shrunk.xlsx") self.assertTrue( self.firesException( lambda: pdf.xls.create_pan_dat(filePathShrunk))) pdf_shrunk.xls.write_file(panDat, filePathShrunk) xlsPanDatShrunk = pdf.xls.create_pan_dat(filePathShrunk) self.assertTrue(pdf_shrunk._same_data(panDat, xlsPanDatShrunk)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) pdf2.xls.write_file(panDat, filePath) xlsPanDat = pdf2.xls.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, xlsPanDat)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) filePath = os.path.join(_scratchDir, "netflow.xlsx") pdf.xls.write_file(panDat, filePath) panDat2 = pdf.xls.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) xlsPanDat = pdf2.xls.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, xlsPanDat))
def testCsvSimple(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) dirPath = os.path.join(_scratchDir, "diet_csv") pdf.csv.write_directory(panDat, dirPath) panDat2 = pdf.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) panDat2 = pdf2.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) dirPath = os.path.join(_scratchDir, "netflow_csv") pdf.csv.write_directory(panDat, dirPath) panDat2 = pdf.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) pdf2.csv.write_directory(panDat, dirPath) panDat2 = pdf2.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) dirPath = os.path.join(_scratchDir, "diet_csv") pdf.csv.write_directory(panDat, dirPath, decimal=",") panDat2 = pdf.csv.create_pan_dat(dirPath) self.assertFalse(pdf._same_data(panDat, panDat2)) panDat2 = pdf.csv.create_pan_dat(dirPath, decimal=",") self.assertTrue(pdf._same_data(panDat, panDat2))
def testNetflow(self): if not self.canRun: return tdf = TicDatFactory(**netflowSchema()) tdf.enable_foreign_key_links() addNetflowForeignKeys(tdf) oldDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields})) self._test_generic_free_copy(oldDat, tdf) self._test_generic_free_copy(oldDat, tdf, ["arcs", "nodes"]) ticDat = tdf.copy_to_pandas(oldDat, ["arcs", "cost"]) self.assertTrue(all(hasattr(ticDat, t) == (t in ["arcs", "cost"]) for t in tdf.all_tables)) self.assertTrue(len(ticDat.arcs.capacity.sloc["Boston",:]) == len(oldDat.nodes["Boston"].arcs_source) == 0) self.assertTrue(len(ticDat.arcs.capacity.sloc[:,"Boston"]) == len(oldDat.nodes["Boston"].arcs_destination) == 2) self.assertTrue(all(ticDat.arcs.capacity.sloc[:,"Boston"][src] == r["capacity"] for src, r in oldDat.nodes["Boston"].arcs_destination.items())) ticDat = tdf.copy_to_pandas(oldDat, drop_pk_columns=True) rebornTicDat = tdf.TicDat(**{t:getattr(ticDat, t) for t in tdf.all_tables}) # because we have single pk field tables, dropping the pk columns is probelmatic self.assertFalse(tdf._same_data(rebornTicDat, oldDat)) # but with the default argument all is well ticDat = tdf.copy_to_pandas(oldDat) rebornTicDat = tdf.TicDat(**{t:getattr(ticDat, t) for t in tdf.all_tables}) self.assertTrue(tdf._same_data(rebornTicDat, oldDat)) self.assertTrue(set(ticDat.inflow.columns) == {"quantity"}) self.assertTrue(set(ticDat.nodes.columns) == {"name"})
def testWeirdDiets(self): if not _can_accdb_unit_test: return filePath = os.path.join(_scratchDir, "weirdDiet.accdb") tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) tdf2 = TicDatFactory(**dietSchemaWeirdCase()) dat2 = copyDataDietWeirdCase(ticDat) tdf2.mdb.write_file(dat2, filePath , allow_overwrite=True) accdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, accdbTicDat)) tdf3 = TicDatFactory(**dietSchemaWeirdCase2()) dat3 = copyDataDietWeirdCase2(ticDat) tdf3.mdb.write_file(dat3, makeCleanPath(filePath)) with py.connect(_connection_str(filePath)) as con: con.cursor().execute("SELECT * INTO [nutrition quantities] FROM nutrition_quantities").commit() con.cursor().execute("DROP TABLE nutrition_quantities").commit() accdbTicDat2 = tdf3.mdb.create_tic_dat(filePath) self.assertTrue(tdf3._same_data(dat3, accdbTicDat2)) with py.connect(_connection_str(filePath)) as con: con.cursor().execute("create table nutrition_quantities (boger int)").commit() self.assertTrue(self.firesException(lambda : tdf3.mdb.create_tic_dat(filePath)))
def testNetflow(self): if not _can_accdb_unit_test: return tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.all_tables})) filePath = os.path.join(_scratchDir, "netflow.accdb") tdf.mdb.write_file(ticDat, filePath) #shutil.copy(filePath, "netflow.accdb") #uncomment to make readonly test file as .accdb self.assertFalse(tdf.mdb.find_duplicates(filePath)) accdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, accdbTicDat)) def changeIt() : accdbTicDat.inflow['Pencils', 'Boston']["quantity"] = 12 self.assertTrue(self.firesException(changeIt)) self.assertTrue(tdf._same_data(ticDat, accdbTicDat)) accdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, accdbTicDat)) self.assertFalse(self.firesException(changeIt)) self.assertFalse(tdf._same_data(ticDat, accdbTicDat)) pkHacked = netflowSchema() pkHacked["nodes"][0] = ["nimrod"] tdfHacked = TicDatFactory(**pkHacked) ticDatHacked = tdfHacked.TicDat(**{t : getattr(ticDat, t) for t in tdf.all_tables}) tdfHacked.mdb.write_file(ticDatHacked, makeCleanPath(filePath)) self.assertTrue(self.firesException(lambda : tdfHacked.mdb.write_file(ticDat, filePath))) tdfHacked.mdb.write_file(ticDat, filePath, allow_overwrite =True) self.assertTrue("Unable to recognize field name in table nodes" in self.firesException(lambda :tdf.mdb.create_tic_dat(filePath)))
def testNetflow(self): if not _can_unit_test: return tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(netflowData(), t) for t in tdf.all_tables})) filePath = "netflow.accdb" self.assertFalse(tdf.mdb.find_duplicates(filePath)) mdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) def changeIt(): mdbTicDat.inflow['Pencils', 'Boston']["quantity"] = 12 self.assertTrue(self.firesException(changeIt)) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) mdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) self.assertFalse(self.firesException(changeIt)) self.assertFalse(tdf._same_data(ticDat, mdbTicDat)) pkHacked = netflowSchema() pkHacked["nodes"][0] = ["nimrod"] tdfHacked = TicDatFactory(**pkHacked) self.assertTrue( "Unable to recognize field nimrod in table nodes" in self. firesException(lambda: tdfHacked.mdb.create_tic_dat(filePath)))
def testDiet(self): if not self.can_run: return for hack, raw_data, activeEnabled in list(product(*(([True, False],)*3))): tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.copy_tic_dat(dietData())) inputset = create_inputset_mock(tdf, ticDat, hack, activeEnabled) self.assertFalse(tdf.opalytics.find_duplicates(inputset, raw_data=raw_data)) ticDat2 = tdf.opalytics.create_tic_dat(inputset, raw_data=raw_data) self.assertTrue(tdf._same_data(ticDat, ticDat2)) def change() : ticDat2.categories["calories"]["minNutrition"]=12 self.assertFalse(firesException(change)) self.assertFalse(tdf._same_data(ticDat, ticDat2)) ticDat2 = tdf.opalytics.create_tic_dat(inputset, freeze_it=True, raw_data=raw_data) self.assertTrue(tdf._same_data(ticDat, ticDat2)) self.assertTrue(firesException(change)) self.assertTrue(tdf._same_data(ticDat, ticDat2)) tdf2 = TicDatFactory(**{k:[pks, list(dfs) + ["dmy"]] for k,(pks, dfs) in tdf.schema().items()}) _dat = tdf2.copy_tic_dat(ticDat) self.assertTrue(tdf._same_data(ticDat, tdf.opalytics.create_tic_dat(create_inputset_mock(tdf2, _dat, hack), raw_data=raw_data))) ex = self.firesException(lambda: tdf2.opalytics.create_tic_dat(inputset, raw_data=raw_data)) self.assertTrue("field dmy can't be found" in ex)
def testXlsSpacey(self): if not self.can_run: return tdf = TicDatFactory(**spacesSchema()) pdf = PanDatFactory(**spacesSchema()) ticDat = tdf.TicDat(**spacesData()) panDat = pan_dat_maker(spacesSchema(), ticDat) ext = ".xlsx" filePath = os.path.join(_scratchDir, "spaces_2%s" % ext) pdf.xls.write_file(panDat, filePath, case_space_sheet_names=True) panDat2 = pdf.xls.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext) pdf.xls.write_file(panDat, filePath, case_space_sheet_names=True) panDat2 = pdf.xls.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2))
def testDietOpalytics(self): if not self.can_run: return for hack, raw_data, activeEnabled in list( itertools.product(*(([True, False], ) * 3))): tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.copy_tic_dat(dietData())) inputset = create_inputset_mock(tdf, ticDat, hack, activeEnabled) pdf = PanDatFactory(**dietSchema()) panDat = pdf.opalytics.create_pan_dat(inputset) self.assertFalse(pdf.find_duplicates(panDat)) ticDat2 = pdf.copy_to_tic_dat(panDat) self.assertTrue(tdf._same_data(ticDat, ticDat2)) tdf2 = TicDatFactory( **{ k: [pks, list(dfs) + ["dmy"]] for k, (pks, dfs) in tdf.schema().items() }) _dat = tdf2.copy_tic_dat(ticDat) panDat = pdf.opalytics.create_pan_dat( create_inputset_mock(tdf2, _dat, hack)) self.assertTrue(tdf._same_data(ticDat, pdf.copy_to_tic_dat(panDat))) pdf2 = PanDatFactory(**tdf2.schema()) ex = self.firesException(lambda: pdf2.opalytics.create_pan_dat( inputset, raw_data=raw_data)) self.assertTrue( all(_ in ex for _ in ["(table, field) pairs missing"] + ["'%s', 'dmy'" % _ for _ in pdf2.all_tables]))
def testNetflow(self): if not self.can_run: return tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) ordered = tdf.sql._ordered_tables() self.assertTrue(ordered.index("nodes") < min(ordered.index(_) for _ in ("arcs", "cost", "inflow"))) self.assertTrue(ordered.index("commodities") < min(ordered.index(_) for _ in ("cost", "inflow"))) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields})) self._test_generic_copy(ticDat, tdf) self._test_generic_copy(ticDat, tdf, ["arcs", "nodes"]) filePath = os.path.join(_scratchDir, "netflow.sql") tdf.sql.write_db_data(ticDat, filePath) self.assertFalse(tdf.sql.find_duplicates(filePath)) sqlTicDat = tdf.sql.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) def changeIt() : sqlTicDat.inflow['Pencils', 'Boston']["quantity"] = 12 self.assertTrue(self.firesException(changeIt)) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) sqlTicDat = tdf.sql.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) self.assertFalse(self.firesException(changeIt)) self.assertFalse(tdf._same_data(ticDat, sqlTicDat)) pkHacked = netflowSchema() pkHacked["nodes"][0] = ["nimrod"] tdfHacked = TicDatFactory(**pkHacked) ticDatHacked = tdfHacked.TicDat(**{t : getattr(ticDat, t) for t in tdf.all_tables}) tdfHacked.sql.write_db_data(ticDatHacked, makeCleanPath(filePath)) self.assertFalse(tdfHacked.sql.find_duplicates(filePath)) self.assertTrue(self.firesException(lambda : tdfHacked.sql.write_db_data(ticDat, filePath))) tdfHacked.sql.write_db_data(ticDat, filePath, allow_overwrite =True) self.assertTrue("Unable to recognize field name in table nodes" in self.firesException(lambda :tdf.sql.create_tic_dat(filePath))) ticDatNew = tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields}) ticDatNew.cost['Pencils', 'booger', 'wooger'] = 10 ticDatNew.cost['junker', 'Detroit', 'New York'] = 20 ticDatNew.cost['bunker', 'Detroit', 'New Jerk'] = 20 ticDatNew.arcs['booger', 'wooger'] = 112 self.assertTrue({f[:2] + f[2][:1] : set(v.native_pks) for f,v in tdf.find_foreign_key_failures(ticDatNew).items()} == {('arcs', 'nodes', u'destination'): {('booger', 'wooger')}, ('arcs', 'nodes', u'source'): {('booger', 'wooger')}, ('cost', 'commodities', u'commodity'): {('bunker', 'Detroit', 'New Jerk'), ('junker', 'Detroit', 'New York')}, ('cost', 'nodes', u'destination'): {('bunker', 'Detroit', 'New Jerk'), ('Pencils', 'booger', 'wooger')}, ('cost', 'nodes', u'source'): {('Pencils', 'booger', 'wooger')}}) ticDat3 = tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields}) ticDat3.arcs['Detroit', 'Boston'] = float("inf") ticDat3.arcs['Denver', 'Boston'] = float("inf") self.assertFalse(tdf._same_data(ticDat3, ticDat)) tdf.sql.write_db_data(ticDat3, makeCleanPath(filePath)) ticDat4 = tdf.sql.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat3, ticDat4))
def testNetflow(self): tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.all_tables})) filePath = os.path.join(_scratchDir, "netflow.mdb") tdf.mdb.write_file(ticDat, filePath) mdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) def changeIt() : mdbTicDat.inflow['Pencils', 'Boston']["quantity"] = 12 self.assertTrue(self.firesException(changeIt)) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) mdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) self.assertFalse(self.firesException(changeIt)) self.assertFalse(tdf._same_data(ticDat, mdbTicDat)) pkHacked = netflowSchema() pkHacked["nodes"][0] = ["nimrod"] tdfHacked = TicDatFactory(**pkHacked) ticDatHacked = tdfHacked.TicDat(**{t : getattr(ticDat, t) for t in tdf.all_tables}) tdfHacked.mdb.write_file(ticDatHacked, makeCleanPath(filePath)) self.assertTrue(self.firesException(lambda : tdfHacked.mdb.write_file(ticDat, filePath))) tdfHacked.mdb.write_file(ticDat, filePath, allow_overwrite =True) self.assertTrue("Unable to recognize field name in table nodes" in self.firesException(lambda :tdf.mdb.create_tic_dat(filePath)))
def testDiet(self): if not _can_accdb_unit_test: return tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) filePath = makeCleanPath(os.path.join(_scratchDir, "diet.accdb")) tdf.mdb.write_file(ticDat, filePath) #shutil.copy(filePath, "diet.accdb") #uncomment to make readonly test file as .accdb self.assertFalse(tdf.mdb.find_duplicates(filePath)) accdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, accdbTicDat)) def changeit(): accdbTicDat.categories["calories"]["minNutrition"] = 12 changeit() self.assertFalse(tdf._same_data(ticDat, accdbTicDat)) self.assertTrue( self.firesException(lambda: tdf.mdb.write_file(ticDat, filePath))) tdf.mdb.write_file(ticDat, filePath, allow_overwrite=True) accdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, accdbTicDat)) self.assertTrue(self.firesException(changeit)) self.assertTrue(tdf._same_data(ticDat, accdbTicDat))
def testNetflow(self): tdf = TicDatFactory(**netflowSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields})) filePath = os.path.join(_scratchDir, "netflow.xls") tdf.xls.write_file(ticDat, filePath) xlsTicDat = tdf.xls.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, xlsTicDat)) def changeIt() : xlsTicDat.inflow['Pencils', 'Boston']["quantity"] = 12 self.assertTrue(self.firesException(changeIt)) self.assertTrue(tdf._same_data(ticDat, xlsTicDat)) xlsTicDat = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, xlsTicDat)) self.assertFalse(self.firesException(changeIt)) self.assertFalse(tdf._same_data(ticDat, xlsTicDat)) self.assertFalse(tdf.xls.get_duplicates(filePath)) pkHacked = netflowSchema() pkHacked["nodes"][0] = ["nimrod"] tdfHacked = TicDatFactory(**pkHacked) self.assertTrue(self.firesException(lambda : tdfHacked.xls.write_file(ticDat, filePath))) tdfHacked.xls.write_file(ticDat, filePath, allow_overwrite =True) self.assertTrue("nodes : name" in self.firesException(lambda :tdf.xls.create_tic_dat(filePath)))
def testWeirdDiets(self): if not self.can_run: return filePath = os.path.join(_scratchDir, "weirdDiet.db") tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) tdf2 = TicDatFactory(**dietSchemaWeirdCase()) dat2 = copyDataDietWeirdCase(ticDat) tdf2.sql.write_db_data(dat2, filePath , allow_overwrite=True) self.assertFalse(tdf2.sql.find_duplicates(filePath)) sqlTicDat = tdf.sql.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) tdf3 = TicDatFactory(**dietSchemaWeirdCase2()) dat3 = copyDataDietWeirdCase2(ticDat) tdf3.sql.write_db_data(dat3, makeCleanPath(filePath)) with sql.connect(filePath) as con: con.execute("ALTER TABLE nutrition_quantities RENAME TO [nutrition quantities]") sqlTicDat2 = tdf3.sql.create_tic_dat(filePath) self.assertTrue(tdf3._same_data(dat3, sqlTicDat2)) with sql.connect(filePath) as con: con.execute("create table nutrition_quantities(boger)") self.assertTrue(self.firesException(lambda : tdf3.sql.create_tic_dat(filePath)))
def testFour(self): objOrig = sillyMeData() staticFactory = TicDatFactory(**sillyMeSchema()) goodTable = lambda t : lambda _t : staticFactory.good_tic_dat_table(_t, t) tables = set(staticFactory.primary_key_fields) ticDat = staticFactory.freeze_me(staticFactory.TicDat(**objOrig)) self.assertTrue(staticFactory.good_tic_dat_object(ticDat)) for t in tables : self._assertSame(objOrig[t], getattr(ticDat,t), goodTable(t)) pickedData = staticFactory.TicDat(**staticFactory.as_dict(ticDat)) self.assertTrue(staticFactory._same_data(ticDat, pickedData)) mutTicDat = staticFactory.TicDat() for k,v in ticDat.a.items() : mutTicDat.a[k] = v.values() for k,v in ticDat.b.items() : mutTicDat.b[k] = v.values()[0] for r in ticDat.c: mutTicDat.c.append(r) for t in tables : self._assertSame(getattr(mutTicDat, t), getattr(ticDat,t), goodTable(t)) self.assertTrue("theboger" not in mutTicDat.a) mutTicDat.a["theboger"]["aData2"] =22 self.assertTrue("theboger" in mutTicDat.a and mutTicDat.a["theboger"].values() == (0, 22, 0)) newSchema = sillyMeSchema() newSchema["a"][1] += ("aData4",) newFactory = TicDatFactory(**newSchema) def makeNewTicDat() : return newFactory.TicDat(a=ticDat.a, b=ticDat.b, c=ticDat.c) newTicDat = makeNewTicDat() self.assertFalse(staticFactory.good_tic_dat_object(newTicDat)) self.assertTrue(newFactory.good_tic_dat_object(ticDat)) self.assertTrue(newFactory._same_data(makeNewTicDat(), newTicDat)) newTicDat.a[ticDat.a.keys()[0]]["aData4"]=12 self.assertFalse(newFactory._same_data(makeNewTicDat(), newTicDat))
def testNetflow(self): if not self.can_run: return tdf = TicDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) self._test_generic_copy(ticDat, tdf) self._test_generic_copy(ticDat, tdf, ["arcs", "nodes"]) filePath = os.path.join(_scratchDir, "netflow.xls") tdf.xls.write_file(ticDat, filePath) xlsTicDat = tdf.xls.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, xlsTicDat)) tdf.xls.write_file(ticDat, filePath + "x") self.assertTrue( tdf._same_data(ticDat, tdf.xls.create_tic_dat(filePath + "x"))) def changeIt(): xlsTicDat.inflow['Pencils', 'Boston']["quantity"] = 12 self.assertTrue(self.firesException(changeIt)) self.assertTrue(tdf._same_data(ticDat, xlsTicDat)) xlsTicDat = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, xlsTicDat)) self.assertFalse(self.firesException(changeIt)) self.assertFalse(tdf._same_data(ticDat, xlsTicDat)) self.assertFalse(tdf.xls.find_duplicates(filePath)) pkHacked = netflowSchema() pkHacked["nodes"][0] = ["nimrod"] tdfHacked = TicDatFactory(**pkHacked) self.assertTrue( self.firesException( lambda: tdfHacked.xls.write_file(ticDat, filePath))) tdfHacked.xls.write_file(ticDat, filePath, allow_overwrite=True) self.assertTrue("nodes : name" in self.firesException( lambda: tdf.xls.create_tic_dat(filePath))) ticDat = tdf.TicDat( **{t: getattr(netflowData(), t) for t in tdf.primary_key_fields}) ticDat.arcs["Detroit", "Boston"] = float("inf") ticDat.cost['Pencils', 'Detroit', 'Boston'] = -float("inf") tdf.xls.write_file(ticDat, makeCleanPath(filePath)) xlsTicDat = tdf.xls.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, xlsTicDat)) tdf.xls.write_file(ticDat, filePath + "x", allow_overwrite=True) self.assertTrue( tdf._same_data(ticDat, tdf.xls.create_tic_dat(filePath + "x"))) self.assertFalse( tdf._same_data( ticDat, tdf.xls.create_tic_dat(filePath + "x", treat_inf_as_infinity=False)))
def testTwo(self): objOrig = dietData() staticFactory = TicDatFactory(**dietSchema()) tables = set(staticFactory.primary_key_fields) ticDat = staticFactory.freeze_me(staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables})) self.assertTrue(staticFactory.good_tic_dat_object(ticDat)) for t in tables : self._assertSame(getattr(objOrig, t), getattr(ticDat,t), lambda _t : staticFactory.good_tic_dat_table(_t, t))
def testSilly(self): tdf = TicDatFactory(**sillyMeSchema()) tdf.enable_foreign_key_links() oldDat = tdf.freeze_me(tdf.TicDat(**sillyMeData())) oldDatStr = create_opl_text(tdf, oldDat) newDat = read_opl_text(tdf, oldDatStr) self.assertTrue(tdf._same_data(oldDat, newDat)) tdf.opl_prepend = "ooooo" oldDatStr = create_opl_text(tdf, oldDat) newDat = read_opl_text(tdf, oldDatStr) self.assertTrue(tdf._same_data(oldDat, newDat))
def testDiet(self): if not self.can_run: return for verbose in [True, False]: tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat(**{ t: getattr(dietData(), t) for t in tdf.primary_key_fields })) writePath = os.path.join( makeCleanDir(os.path.join(_scratchDir, "diet")), "file.json") tdf.json.write_file(ticDat, writePath, verbose=verbose) self.assertFalse(tdf.json.find_duplicates(writePath)) jsonTicDat = tdf.json.create_tic_dat(writePath) self.assertTrue(tdf._same_data(ticDat, jsonTicDat)) def change(): jsonTicDat.categories["calories"]["minNutrition"] = 12 self.assertFalse(firesException(change)) self.assertFalse(tdf._same_data(ticDat, jsonTicDat)) jsonTicDat = tdf.json.create_tic_dat(writePath, freeze_it=True) self.assertTrue(firesException(change)) self.assertTrue(tdf._same_data(ticDat, jsonTicDat)) tdf2 = TicDatFactory(**dietSchemaWeirdCase()) dat2 = copyDataDietWeirdCase(ticDat) tdf2.json.write_file(dat2, writePath, allow_overwrite=True, verbose=verbose) jsonTicDat2 = tdf.json.create_tic_dat(writePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, jsonTicDat2)) tdf3 = TicDatFactory(**dietSchemaWeirdCase2()) dat3 = copyDataDietWeirdCase2(ticDat) tdf3.json.write_file(dat3, writePath, allow_overwrite=True, verbose=verbose) with open(writePath, "r") as f: jdict = json.load(f) jdict["nutrition quantities"] = jdict["nutrition_quantities"] del (jdict["nutrition_quantities"]) with open(writePath, "w") as f: json.dump(jdict, f) jsonDat3 = tdf3.json.create_tic_dat(writePath) self.assertTrue(tdf3._same_data(dat3, jsonDat3)) jdict["nutrition_quantities"] = jdict["nutrition quantities"] with open(writePath, "w") as f: json.dump(jdict, f) self.assertTrue( self.firesException(lambda: tdf3.json.create_tic_dat(writePath)))
def testMissingTable(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.copy_tic_dat(dietData())) inputset = create_inputset_mock(tdf, ticDat) tdf2 = TicDatFactory(**(dict(dietSchema(), missing_table=[["a"],["b"]]))) ticDat2 = tdf2.opalytics.create_tic_dat(inputset) self.assertTrue(tdf._same_data(ticDat, ticDat2)) self.assertFalse(ticDat2.missing_table)
def testSqlSpaceyTwo(self): if not self.can_run: return self.assertTrue(pandatio.sql, "this unit test requires SQLite installed") tdf = TicDatFactory(**spacesSchema()) pdf = PanDatFactory(**spacesSchema()) ticDat = tdf.TicDat( **{ "a_table": { 1: [1, 2, "3"], 22.2: (12, 0.12, "something"), 0.23: (11, 12, "thirt") }, "b_table": { (1, 2, "foo"): 1, (1012.22, 4, "0012"): 12 }, "c_table": (("this", 2, 3, 4), ("that", 102.212, 3, 5.5), ("another", 5, 12.5, 24)) }) panDat = pan_dat_maker(spacesSchema(), ticDat) ext = ".db" filePath = os.path.join(_scratchDir, "spaces_2%s" % ext) with pandatio.sql.connect(filePath) as con: pdf.sql.write_file(panDat, db_file_path=None, con=con, case_space_table_names=True) with pandatio.sql.connect(filePath) as con: panDat2 = pdf.sql.create_pan_dat(db_file_path=None, con=con) self.assertTrue(pdf._same_data(panDat, panDat2)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext) with pandatio.sql.connect(filePath) as con: pdf.sql.write_file(panDat, db_file_path="", con=con, case_space_table_names=True) with pandatio.sql.connect(filePath) as con: panDat2 = pdf.sql.create_pan_dat(None, con) self.assertTrue(pdf._same_data(panDat, panDat2))
def testNetflow(self): tdf = TicDatFactory(**netflowSchema()) tdf.enable_foreign_key_links() oldDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) oldDatStr = create_opl_text(tdf, oldDat) newDat = read_opl_text(tdf, oldDatStr) self.assertTrue(tdf._same_data(oldDat, newDat)) tdf.opl_prepend = "stuff" oldDatStr = create_opl_text(tdf, oldDat) newDat = read_opl_text(tdf, oldDatStr) self.assertTrue(tdf._same_data(oldDat, newDat))
def testDiet(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) self._test_generic_copy(ticDat, tdf) self._test_generic_copy(ticDat, tdf, ["nutritionQuantities"]) filePath = os.path.join(_scratchDir, "diet.xls") tdf.xls.write_file(ticDat, filePath) xlsTicDat = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, xlsTicDat)) tdf.xls.write_file(ticDat, filePath + "x") self.assertTrue( tdf._same_data(ticDat, tdf.xls.create_tic_dat(filePath + "x"))) self.assertFalse( tdf._same_data( ticDat, tdf.xls.create_tic_dat(filePath + "x", treat_inf_as_infinity=False))) xlsTicDat.categories["calories"]["minNutrition"] = 12 self.assertFalse(tdf._same_data(ticDat, xlsTicDat)) self.assertFalse(tdf.xls.find_duplicates(filePath)) ex = self.firesException(lambda: tdf.xls.create_tic_dat( filePath, row_offsets={t: 1 for t in tdf.all_tables})) self.assertTrue("field names could not be found" in ex) xlsTicDat = tdf.xls.create_tic_dat( filePath, row_offsets={t: 1 for t in tdf.all_tables}, headers_present=False) self.assertTrue(tdf._same_data(xlsTicDat, ticDat)) xlsTicDat = tdf.xls.create_tic_dat( filePath, row_offsets={t: 2 for t in tdf.all_tables}, headers_present=False) self.assertFalse(tdf._same_data(xlsTicDat, ticDat)) self.assertTrue( all( len(getattr(ticDat, t)) - 1 == len(getattr(xlsTicDat, t)) for t in tdf.all_tables))
def testDiet(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) self._test_generic_copy(ticDat, tdf) self._test_generic_copy(ticDat, tdf, ["nutritionQuantities"]) dirPath = os.path.join(_scratchDir, "diet") tdf.csv.write_directory(ticDat,dirPath) self.assertFalse(tdf.csv.find_duplicates(dirPath)) csvTicDat = tdf.csv.create_tic_dat(dirPath) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) def change() : csvTicDat.categories["calories"]["minNutrition"]=12 self.assertFalse(firesException(change)) self.assertFalse(tdf._same_data(ticDat, csvTicDat)) self.assertTrue(self.firesException(lambda : tdf.csv.write_directory(ticDat, dirPath, dialect="excel_t") ).endswith("Invalid dialect excel_t")) tdf.csv.write_directory(ticDat, dirPath, dialect="excel-tab", allow_overwrite=True) self.assertTrue(self.firesException(lambda : tdf.csv.create_tic_dat(dirPath, freeze_it=True))) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True, dialect="excel-tab") self.assertTrue(firesException(change)) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) tdf2 = TicDatFactory(**dietSchemaWeirdCase()) dat2 = copyDataDietWeirdCase(ticDat) tdf2.csv.write_directory(dat2, dirPath, allow_overwrite=True) csvTicDat2 = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, csvTicDat2)) os.rename(os.path.join(dirPath, "nutritionquantities.csv"), os.path.join(dirPath, "nutritionquantities.csv".upper())) csvTicDat2 = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, csvTicDat2)) tdf3 = TicDatFactory(**dietSchemaWeirdCase2()) dat3 = copyDataDietWeirdCase2(ticDat) tdf3.csv.write_directory(dat3, dirPath, allow_overwrite=True) os.rename(os.path.join(dirPath, "nutrition_quantities.csv"), os.path.join(dirPath, "nutrition quantities.csv")) csvDat3 = tdf3.csv.create_tic_dat(dirPath) self.assertTrue(tdf3._same_data(dat3, csvDat3)) shutil.copy(os.path.join(dirPath, "nutrition quantities.csv"), os.path.join(dirPath, "nutrition_quantities.csv")) self.assertTrue(self.firesException(lambda : tdf3.csv.create_tic_dat(dirPath)))
def testNetflow(self): tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) ordered = tdf.sql._ordered_tables() self.assertTrue(ordered.index("nodes") < min(ordered.index(_) for _ in ("arcs", "cost", "inflow"))) self.assertTrue(ordered.index("commodities") < min(ordered.index(_) for _ in ("cost", "inflow"))) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields})) filePath = os.path.join(_scratchDir, "netflow.sql") tdf.sql.write_db_data(ticDat, filePath) sqlTicDat = tdf.sql.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) def changeIt() : sqlTicDat.inflow['Pencils', 'Boston']["quantity"] = 12 self.assertTrue(self.firesException(changeIt)) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) sqlTicDat = tdf.sql.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) self.assertFalse(self.firesException(changeIt)) self.assertFalse(tdf._same_data(ticDat, sqlTicDat)) pkHacked = netflowSchema() pkHacked["nodes"][0] = ["nimrod"] tdfHacked = TicDatFactory(**pkHacked) ticDatHacked = tdfHacked.TicDat(**{t : getattr(ticDat, t) for t in tdf.all_tables}) tdfHacked.sql.write_db_data(ticDatHacked, makeCleanPath(filePath)) self.assertTrue(self.firesException(lambda : tdfHacked.sql.write_db_data(ticDat, filePath))) tdfHacked.sql.write_db_data(ticDat, filePath, allow_overwrite =True) self.assertTrue("Unable to recognize field name in table nodes" in self.firesException(lambda :tdf.sql.create_tic_dat(filePath))) ticDatNew = tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields}) ticDatNew.cost['Pencils', 'booger', 'wooger'] = 10 ticDatNew.cost['junker', 'Detroit', 'New York'] = 20 ticDatNew.cost['bunker', 'Detroit', 'New Jerk'] = 20 ticDatNew.arcs['booger', 'wooger'] = 112 self.assertTrue({f[:2] + f[2][:1] : set(v.native_pks) for f,v in tdf.find_foreign_key_failures(ticDatNew).items()} == {('arcs', 'nodes', u'destination'): {('booger', 'wooger')}, ('arcs', 'nodes', u'source'): {('booger', 'wooger')}, ('cost', 'commodities', u'commodity'): {('bunker', 'Detroit', 'New Jerk'), ('junker', 'Detroit', 'New York')}, ('cost', 'nodes', u'destination'): {('bunker', 'Detroit', 'New Jerk'), ('Pencils', 'booger', 'wooger')}, ('cost', 'nodes', u'source'): {('Pencils', 'booger', 'wooger')}})
def testRowOffsets(self): tdf = TicDatFactory(boger = [[],["the", "big", "boger"]], woger = [[], ["the", "real", "big", "woger"]]) td = tdf.freeze_me(tdf.TicDat(boger = ([1, 2, 3], [12, 24, 36], tdf.data_fields["boger"], [100, 200, 400]), woger = ([[1, 2, 3, 4]]*4) + [tdf.data_fields["woger"]] + ([[100, 200, 300, 400]]*5))) filePath = os.path.join(_scratchDir, "rowoff.xls") tdf.xls.write_file(td, filePath) td1= tdf.xls.create_tic_dat(filePath) td2 = tdf.xls.create_tic_dat(filePath, {"woger": 5}) td3 = tdf.xls.create_tic_dat(filePath, {"woger":5, "boger":3}) self.assertTrue(tdf._same_data(td, td1)) tdCheck = tdf.TicDat(boger = td2.boger, woger = td.woger) self.assertTrue(tdf._same_data(td, tdCheck)) self.assertTrue(all (td2.woger[i]["big"] == 300 for i in range(5))) self.assertTrue(all (td3.woger[i]["real"] == 200 for i in range(5))) self.assertTrue(td3.boger[0]["big"] == 200 and len(td3.boger) == 1)
def testDiet(self): tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) filePath = makeCleanPath(os.path.join(_scratchDir, "diet.mdb")) tdf.mdb.write_file(ticDat, filePath) mdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) def changeit() : mdbTicDat.categories["calories"]["minNutrition"]=12 changeit() self.assertFalse(tdf._same_data(ticDat, mdbTicDat)) self.assertTrue(self.firesException(lambda : tdf.mdb.write_file(ticDat, filePath))) tdf.mdb.write_file(ticDat, filePath, allow_overwrite=True) mdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) self.assertTrue(self.firesException(changeit)) self.assertTrue(tdf._same_data(ticDat, mdbTicDat))
def testCsvSpacey(self): if not self.can_run: return self.assertTrue(pandatio.sql, "this unit test requires SQLite installed") tdf = TicDatFactory(**spacesSchema()) pdf = PanDatFactory(**spacesSchema()) ticDat = tdf.TicDat( **{ "a_table": { 1: [1, 2, "3"], 22.2: (12, 0.12, "something"), 0.23: (11, 12, "thirt") }, "b_table": { (1, 2, "foo"): 1, (1012.22, 4, "0012"): 12 }, "c_table": (("this", 2, 3, 4), ("that", 102.212, 3, 5.5), ("another", 5, 12.5, 24)) }) panDat = pan_dat_maker(spacesSchema(), ticDat) dirPath = os.path.join(_scratchDir, "spaces_2_csv") pdf.csv.write_directory(panDat, dirPath, case_space_table_names=True) panDat2 = pdf.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) dirPath = os.path.join(_scratchDir, "spaces_2_2_csv") pdf.csv.write_directory(panDat, dirPath, case_space_table_names=True, sep=":") panDat2 = pdf.csv.create_pan_dat(dirPath, sep=":") self.assertTrue(pdf._same_data(panDat, panDat2))
def testSilly(self): if not self.canRun: return tdf = TicDatFactory(**dict({"d" : [("dData1", "dData2", "dData3", "dData4"),[]], "e" : [["eData"],[]]}, **sillyMeSchema())) ticDat = tdf.copy_to_pandas(tdf.TicDat(**sillyMeData())) self.assertFalse(len(ticDat.d) + len(ticDat.e)) oldDat = tdf.freeze_me(tdf.TicDat(**dict({"d" : {(1,2,3,4):{}, (1, "b","c","d"):{}, ("a", 2,"c","d"):{}}, "e" : {11:{},"boger":{}}}, **sillyMeData()))) ticDat = tdf.copy_to_pandas(oldDat, drop_pk_columns=True) def checkTicDat(): self.assertTrue(len(ticDat.d) ==3 and len(ticDat.e) == 2) self.assertTrue(set(ticDat.d.index.values) == {(1,2,3,4), (1, "b","c","d"), ("a", 2,"c","d")}) self.assertTrue(set(ticDat.e.index.values) == {11,"boger"}) self.assertTrue(len(ticDat.c) == len(oldDat.c) == 3) self.assertTrue(ticDat.c.loc[i] == oldDat.c[i] for i in range(3)) checkTicDat() self.assertFalse(hasattr(ticDat.d, "dData1") or hasattr(ticDat.e, "eData")) ticDat = tdf.copy_to_pandas(oldDat, drop_pk_columns=False) checkTicDat() self.assertTrue(ticDat.e.loc[11].values[0] == 11) if sys.version_info[0] == 2: self.assertTrue(len(ticDat.d.dData1.sloc[1,:,:,:]) == 2) else : # very strange infrequent bug issue that I will investigate later self.assertTrue(len(ticDat.d.dData1.sloc[1]) == 2) ticDat = tdf.copy_to_pandas(oldDat) checkTicDat() if sys.version_info[0] == 2: self.assertTrue(len(ticDat.d.dData1.sloc[1,:,:,:]) == 2) else: self.assertTrue(len(ticDat.d.dData1.sloc[1]) == 2) self.assertTrue(ticDat.e.loc[11].values[0] == 11) self.assertTrue(set(ticDat.d.columns) == {"dData%s"%s for s in range(5)[1:]}) rebornTicDat = tdf.TicDat(**{t:getattr(ticDat, t) for t in tdf.all_tables}) self.assertTrue(tdf._same_data(rebornTicDat, oldDat)) ticDat.b = ticDat.b.bData rebornTicDat = tdf.TicDat(**{t:getattr(ticDat, t) for t in tdf.all_tables}) self.assertTrue(tdf._same_data(rebornTicDat, oldDat))
def testJsonCross(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) filePath = os.path.join(_scratchDir, "diet_cross.json") pdf.json.write_file(panDat, filePath) ticDat2 = tdf.json.create_tic_dat(filePath, from_pandas=True) self.assertTrue(tdf._same_data(ticDat, ticDat2, epsilon=0.0001)) tdf.json.write_file(ticDat, filePath, allow_overwrite=True, to_pandas=True) panDat2 = pdf.json.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=0.0001))
def testDiet(self): tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) filePath = os.path.join(_scratchDir, "diet.xls") tdf.xls.write_file(ticDat, filePath) xlsTicDat = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, xlsTicDat)) xlsTicDat.categories["calories"]["minNutrition"]=12 self.assertFalse(tdf._same_data(ticDat, xlsTicDat)) self.assertFalse(tdf.xls.get_duplicates(filePath)) ex = self.firesException(lambda : tdf.xls.create_tic_dat(filePath, row_offsets={t:1 for t in tdf.all_tables})) self.assertTrue("field names could not be found" in ex) xlsTicDat = tdf.xls.create_tic_dat(filePath, row_offsets={t:1 for t in tdf.all_tables}, headers_present=False) self.assertTrue(tdf._same_data(xlsTicDat, ticDat)) xlsTicDat = tdf.xls.create_tic_dat(filePath, row_offsets={t:2 for t in tdf.all_tables}, headers_present=False) self.assertFalse(tdf._same_data(xlsTicDat, ticDat)) self.assertTrue(all(len(getattr(ticDat, t))-1 == len(getattr(xlsTicDat, t)) for t in tdf.all_tables))
def testOne(self): def _cleanIt(x) : x.foods['macaroni'] = {"cost": 2.09} x.foods['milk'] = {"cost":0.89} return x dataObj = dietData() tdf = TicDatFactory(**dietSchema()) self.assertTrue(tdf.good_tic_dat_object(dataObj)) dataObj2 = tdf.copy_tic_dat(dataObj) dataObj3 = tdf.copy_tic_dat(dataObj, freeze_it=True) dataObj4 = tdf.TicDat(**tdf.as_dict(dataObj3)) self.assertTrue(all (tdf._same_data(dataObj, x) and dataObj is not x for x in (dataObj2, dataObj3, dataObj4))) dataObj = _cleanIt(dataObj) self.assertTrue(tdf.good_tic_dat_object(dataObj)) self.assertTrue(all (tdf._same_data(dataObj, x) and dataObj is not x for x in (dataObj2, dataObj3))) def hackit(x) : x.foods["macaroni"] = 100 self.assertTrue(self.firesException(lambda :hackit(dataObj3))) hackit(dataObj2) self.assertTrue(not tdf._same_data(dataObj, dataObj2) and tdf._same_data(dataObj, dataObj3)) msg = [] dataObj.foods[("milk", "cookies")] = {"cost": float("inf")} dataObj.boger = object() self.assertFalse(tdf.good_tic_dat_object(dataObj) or tdf.good_tic_dat_object(dataObj, bad_message_handler =msg.append)) self.assertTrue({"foods : Inconsistent key lengths"} == set(msg)) self.assertTrue(all(tdf.good_tic_dat_table(getattr(dataObj, t), t) for t in ("categories", "nutritionQuantities"))) dataObj = dietData() dataObj.categories["boger"] = {"cost":1} dataObj.categories["boger"] = {"cost":1} self.assertFalse(tdf.good_tic_dat_object(dataObj) or tdf.good_tic_dat_object(dataObj, bad_message_handler=msg.append)) self.assertTrue({'foods : Inconsistent key lengths', 'categories : Inconsistent data field name keys.'} == set(msg)) ex = firesException(lambda : tdf.freeze_me(tdf.TicDat(**{t:getattr(dataObj,t) for t in tdf.primary_key_fields}))).message self.assertTrue("categories cannot be treated as a ticDat table : Inconsistent data field name keys" in ex)
def testOther(self): tdf = TicDatFactory(table1=[["String_Field"], []], table2=[["String_Field", "Num_PK"], ["Num_Field_1", "Num_Field_2"]]) data = { "table1": { "test1": [], "test2": [], }, "table2": { ("test1", 1): [2, 3], ("test2", 2): [3, 4] } } oldDat = tdf.freeze_me(tdf.TicDat(**data)) oldDatStr = create_opl_text(tdf, oldDat) newDat = read_opl_text(tdf, oldDatStr) self.assertTrue(tdf._same_data(oldDat, newDat)) tdf.opl_prepend = "_" oldDatStr = create_opl_text(tdf, oldDat) newDat = read_opl_text(tdf, oldDatStr) self.assertTrue(tdf._same_data(oldDat, newDat))
def testSpacey2(self): if not self.can_run: return tdf = TicDatFactory(**spacesSchema()) ticDat = tdf.TicDat(**spacesData()) for ext in [".xls", ".xlsx"]: filePath = os.path.join(_scratchDir, "spaces_2%s" % ext) tdf.xls.write_file(ticDat, filePath, case_space_sheet_names=True) ticDat2 = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat2)) tdf = TicDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) for ext in [".xls", ".xlsx"]: filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext) tdf.xls.write_file(ticDat, filePath, case_space_sheet_names=True) ticDat2 = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat2))
def testDiet(self): tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) dirPath = os.path.join(_scratchDir, "diet") tdf.csv.write_directory(ticDat,dirPath) self.assertFalse(tdf.csv.get_duplicates(dirPath)) csvTicDat = tdf.csv.create_tic_dat(dirPath) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) def change() : csvTicDat.categories["calories"]["minNutrition"]=12 self.assertFalse(firesException(change)) self.assertFalse(tdf._same_data(ticDat, csvTicDat)) self.assertTrue(self.firesException(lambda : tdf.csv.write_directory(ticDat, dirPath, dialect="excel_t")).endswith( "Invalid dialect excel_t")) tdf.csv.write_directory(ticDat, dirPath, dialect="excel-tab", allow_overwrite=True) self.assertTrue(self.firesException(lambda : tdf.csv.create_tic_dat(dirPath, freeze_it=True))) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True, dialect="excel-tab") self.assertTrue(firesException(change)) self.assertTrue(tdf._same_data(ticDat, csvTicDat))
def testDiet(self): if not _can_unit_test: return tdf = TicDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) filePath = "diet.accdb" self.assertFalse(tdf.mdb.find_duplicates(filePath)) mdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) def changeit(): mdbTicDat.categories["calories"]["minNutrition"] = 12 changeit() self.assertFalse(tdf._same_data(ticDat, mdbTicDat)) mdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) self.assertTrue(self.firesException(changeit)) self.assertTrue(tdf._same_data(ticDat, mdbTicDat))
def testLongName(self): prepend = "b" * 20 tdf = TicDatFactory( **{prepend * 2 + t: v for t, v in dietSchema().items()}) self.assertTrue( self.firesException( lambda: tdf.xls._verify_differentiable_sheet_names())) tdf = TicDatFactory( **{prepend + t: v for t, v in dietSchema().items()}) ticDat = tdf.freeze_me( tdf.TicDat( **{ t: getattr(dietData(), t.replace(prepend, "")) for t in tdf.primary_key_fields })) filePath = os.path.join(_scratchDir, "longname.xls") tdf.xls.write_file(ticDat, filePath) self.assertFalse(tdf.xls.find_duplicates(filePath)) ticDat2 = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat2))
def testRowOffsets(self): if not self.can_run: return tdf = TicDatFactory(boger=[[], ["the", "big", "boger"]], woger=[[], ["the", "real", "big", "woger"]]) td = tdf.freeze_me( tdf.TicDat( boger=([1, 2, 3], [12, 24, 36], tdf.data_fields["boger"], [100, 200, 400]), woger=([[1, 2, 3, 4]] * 4) + [tdf.data_fields["woger"]] + ([[100, 200, 300, 400]] * 5))) filePath = os.path.join(_scratchDir, "rowoff.xls") tdf.xls.write_file(td, filePath) td1 = tdf.xls.create_tic_dat(filePath) td2 = tdf.xls.create_tic_dat(filePath, {"woger": 5}) td3 = tdf.xls.create_tic_dat(filePath, {"woger": 5, "boger": 3}) self.assertTrue(tdf._same_data(td, td1)) tdCheck = tdf.TicDat(boger=td2.boger, woger=td.woger) self.assertTrue(tdf._same_data(td, tdCheck)) self.assertTrue(all(td2.woger[i]["big"] == 300 for i in range(5))) self.assertTrue(all(td3.woger[i]["real"] == 200 for i in range(5))) self.assertTrue(td3.boger[0]["big"] == 200 and len(td3.boger) == 1)
def testFive(self): tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) dat = tdf.freeze_me(tdf.TicDat(**{t : getattr(netflowData(), t) for t in tdf.all_tables})) obfudat = tdf.obfusimplify(dat, freeze_it=1) self.assertFalse(tdf._same_data(dat, obfudat.copy)) for (s,d),r in obfudat.copy.arcs.items(): self.assertFalse((s,d) in dat.arcs) self.assertTrue(dat.arcs[obfudat.renamings[s][1], obfudat.renamings[d][1]]["capacity"] == r["capacity"]) obfudat = tdf.obfusimplify(dat, freeze_it=1, skip_tables=["commodities", "nodes"]) self.assertTrue(tdf._same_data(obfudat.copy, dat)) tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) mone, one2one = "many-to-one", "one-to-one" fk, fkm = _ForeignKey, _ForeignKeyMapping self.assertTrue(set(tdf.foreign_keys) == {fk("arcs", 'nodes', fkm('source',u'name'), mone), fk("arcs", 'nodes', fkm('destination',u'name'), mone), fk("cost", 'nodes', fkm('source',u'name'), mone), fk("cost", 'nodes', fkm('destination',u'name'), mone), fk("cost", 'commodities', fkm('commodity',u'name'), mone), fk("inflow", 'commodities', fkm('commodity',u'name'), mone), fk("inflow", 'nodes', fkm('node',u'name'), mone)}) tdf.clear_foreign_keys("cost") self.assertTrue(set(tdf.foreign_keys) == {fk("arcs", 'nodes', fkm('source',u'name'), mone), fk("arcs", 'nodes', fkm('destination',u'name'), mone), fk("inflow", 'commodities', fkm('commodity',u'name'), mone), fk("inflow", 'nodes', fkm('node',u'name'), mone)}) tdf = TicDatFactory(**dietSchema()) self.assertFalse(tdf.foreign_keys) addDietForeignKeys(tdf) self.assertTrue(set(tdf.foreign_keys) == {fk("nutritionQuantities", 'categories', fkm('category',u'name'), mone), fk("nutritionQuantities", 'foods', fkm('food',u'name'), mone)}) tdf.TicDat() self.assertTrue(self.firesException(lambda : tdf.clear_foreign_keys("nutritionQuantities"))) self.assertTrue(tdf.foreign_keys) tdf = TicDatFactory(**dietSchema()) addDietForeignKeys(tdf) tdf.clear_foreign_keys("nutritionQuantities") self.assertFalse(tdf.foreign_keys) tdf = TicDatFactory(parentTable = [["pk"],["pd1", "pd2", "pd3"]], goodChild = [["gk"], ["gd1", "gd2"]], badChild = [["bk1", "bk2"], ["bd"]], appendageChild = [["ak"], ["ad1", "ad2"]], appendageBadChild = [["bk1", "bk2"], []]) tdf.add_foreign_key("goodChild", "parentTable", fkm("gd1" , "pk")) tdf.add_foreign_key("badChild", "parentTable", ["bk2" , "pk"]) self.assertTrue("many-to-many" in self.firesException(lambda : tdf.add_foreign_key("badChild", "parentTable", ["bd", "pd2"]))) tdf.add_foreign_key("appendageChild", "parentTable", ["ak", "pk"]) tdf.add_foreign_key("appendageBadChild", "badChild", (("bk2", "bk2"), ("bk1","bk1"))) fks = tdf.foreign_keys _getfk = lambda t : next(_ for _ in fks if _.native_table == t) self.assertTrue(_getfk("goodChild").cardinality == "many-to-one") self.assertTrue(_getfk("badChild").cardinality == "many-to-one") self.assertTrue(_getfk("appendageChild").cardinality == "one-to-one") self.assertTrue(_getfk("appendageBadChild").cardinality == "one-to-one") tdf.clear_foreign_keys("appendageBadChild") self.assertTrue(tdf.foreign_keys and "appendageBadChild" not in tdf.foreign_keys) tdf.clear_foreign_keys() self.assertFalse(tdf.foreign_keys)