def testXlsSpacey(self): if not self.can_run: return tdf = TicDatFactory(**spacesSchema()) pdf = PanDatFactory(**spacesSchema()) ticDat = tdf.TicDat(**spacesData()) panDat = pan_dat_maker(spacesSchema(), ticDat) ext = ".xlsx" filePath = os.path.join(_scratchDir, "spaces_2%s" % ext) pdf.xls.write_file(panDat, filePath, case_space_sheet_names=True) panDat2 = pdf.xls.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext) pdf.xls.write_file(panDat, filePath, case_space_sheet_names=True) panDat2 = pdf.xls.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2))
def testDictConstructions(self): tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) panDat2 = pdf.PanDat(**{t:getattr(panDat, t).to_dict() for t in pdf.all_tables}) panDat3 = pdf.PanDat(**{t:getattr(panDat, t).to_dict(orient="list") for t in pdf.all_tables}) panDat3_1 = pdf.PanDat(**{t:list(map(list, getattr(panDat, t).itertuples(index=False))) for t in pdf.all_tables}) self.assertTrue(all(pdf._same_data(panDat, _) for _ in [panDat2, panDat3, panDat3_1])) panDat.foods["extra"] = 12 panDat4 = pdf.PanDat(**{t:getattr(panDat, t).to_dict(orient="list") for t in pdf.all_tables}) self.assertTrue(pdf._same_data(panDat, panDat4)) self.assertTrue(set(panDat4.foods["extra"]) == {12}) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) panDat2 = pdf.PanDat(**{t:getattr(panDat, t).to_dict() for t in pdf.all_tables}) panDat3 = pdf.PanDat(**{t:getattr(panDat, t).to_dict(orient="records") for t in pdf.all_tables}) self.assertTrue(all(pdf._same_data(panDat, _) for _ in [panDat2, panDat3])) panDat.cost["extra"] = "boger" panDat4 = pdf.PanDat(**{t:getattr(panDat, t).to_dict(orient="list") for t in pdf.all_tables}) self.assertTrue(pdf._same_data(panDat, panDat4)) self.assertTrue(set(panDat4.cost["extra"]) == {"boger"})
def testNetflow(self): if not self.can_run: return tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) ordered = tdf.sql._ordered_tables() self.assertTrue(ordered.index("nodes") < min(ordered.index(_) for _ in ("arcs", "cost", "inflow"))) self.assertTrue(ordered.index("commodities") < min(ordered.index(_) for _ in ("cost", "inflow"))) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields})) self._test_generic_copy(ticDat, tdf) self._test_generic_copy(ticDat, tdf, ["arcs", "nodes"]) filePath = os.path.join(_scratchDir, "netflow.sql") tdf.sql.write_db_data(ticDat, filePath) self.assertFalse(tdf.sql.find_duplicates(filePath)) sqlTicDat = tdf.sql.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) def changeIt() : sqlTicDat.inflow['Pencils', 'Boston']["quantity"] = 12 self.assertTrue(self.firesException(changeIt)) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) sqlTicDat = tdf.sql.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) self.assertFalse(self.firesException(changeIt)) self.assertFalse(tdf._same_data(ticDat, sqlTicDat)) pkHacked = netflowSchema() pkHacked["nodes"][0] = ["nimrod"] tdfHacked = TicDatFactory(**pkHacked) ticDatHacked = tdfHacked.TicDat(**{t : getattr(ticDat, t) for t in tdf.all_tables}) tdfHacked.sql.write_db_data(ticDatHacked, makeCleanPath(filePath)) self.assertFalse(tdfHacked.sql.find_duplicates(filePath)) self.assertTrue(self.firesException(lambda : tdfHacked.sql.write_db_data(ticDat, filePath))) tdfHacked.sql.write_db_data(ticDat, filePath, allow_overwrite =True) self.assertTrue("Unable to recognize field name in table nodes" in self.firesException(lambda :tdf.sql.create_tic_dat(filePath))) ticDatNew = tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields}) ticDatNew.cost['Pencils', 'booger', 'wooger'] = 10 ticDatNew.cost['junker', 'Detroit', 'New York'] = 20 ticDatNew.cost['bunker', 'Detroit', 'New Jerk'] = 20 ticDatNew.arcs['booger', 'wooger'] = 112 self.assertTrue({f[:2] + f[2][:1] : set(v.native_pks) for f,v in tdf.find_foreign_key_failures(ticDatNew).items()} == {('arcs', 'nodes', u'destination'): {('booger', 'wooger')}, ('arcs', 'nodes', u'source'): {('booger', 'wooger')}, ('cost', 'commodities', u'commodity'): {('bunker', 'Detroit', 'New Jerk'), ('junker', 'Detroit', 'New York')}, ('cost', 'nodes', u'destination'): {('bunker', 'Detroit', 'New Jerk'), ('Pencils', 'booger', 'wooger')}, ('cost', 'nodes', u'source'): {('Pencils', 'booger', 'wooger')}}) ticDat3 = tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields}) ticDat3.arcs['Detroit', 'Boston'] = float("inf") ticDat3.arcs['Denver', 'Boston'] = float("inf") self.assertFalse(tdf._same_data(ticDat3, ticDat)) tdf.sql.write_db_data(ticDat3, makeCleanPath(filePath)) ticDat4 = tdf.sql.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat3, ticDat4))
def testNetflow(self): if not _can_unit_test: return tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(netflowData(), t) for t in tdf.all_tables})) filePath = "netflow.accdb" self.assertFalse(tdf.mdb.find_duplicates(filePath)) mdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) def changeIt(): mdbTicDat.inflow['Pencils', 'Boston']["quantity"] = 12 self.assertTrue(self.firesException(changeIt)) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) mdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) self.assertFalse(self.firesException(changeIt)) self.assertFalse(tdf._same_data(ticDat, mdbTicDat)) pkHacked = netflowSchema() pkHacked["nodes"][0] = ["nimrod"] tdfHacked = TicDatFactory(**pkHacked) self.assertTrue( "Unable to recognize field nimrod in table nodes" in self. firesException(lambda: tdfHacked.mdb.create_tic_dat(filePath)))
def testNetflow(self): if not _can_accdb_unit_test: return tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.all_tables})) filePath = os.path.join(_scratchDir, "netflow.accdb") tdf.mdb.write_file(ticDat, filePath) #shutil.copy(filePath, "netflow.accdb") #uncomment to make readonly test file as .accdb self.assertFalse(tdf.mdb.find_duplicates(filePath)) accdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, accdbTicDat)) def changeIt() : accdbTicDat.inflow['Pencils', 'Boston']["quantity"] = 12 self.assertTrue(self.firesException(changeIt)) self.assertTrue(tdf._same_data(ticDat, accdbTicDat)) accdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, accdbTicDat)) self.assertFalse(self.firesException(changeIt)) self.assertFalse(tdf._same_data(ticDat, accdbTicDat)) pkHacked = netflowSchema() pkHacked["nodes"][0] = ["nimrod"] tdfHacked = TicDatFactory(**pkHacked) ticDatHacked = tdfHacked.TicDat(**{t : getattr(ticDat, t) for t in tdf.all_tables}) tdfHacked.mdb.write_file(ticDatHacked, makeCleanPath(filePath)) self.assertTrue(self.firesException(lambda : tdfHacked.mdb.write_file(ticDat, filePath))) tdfHacked.mdb.write_file(ticDat, filePath, allow_overwrite =True) self.assertTrue("Unable to recognize field name in table nodes" in self.firesException(lambda :tdf.mdb.create_tic_dat(filePath)))
def testNetflow(self): tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.all_tables})) filePath = os.path.join(_scratchDir, "netflow.mdb") tdf.mdb.write_file(ticDat, filePath) mdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) def changeIt() : mdbTicDat.inflow['Pencils', 'Boston']["quantity"] = 12 self.assertTrue(self.firesException(changeIt)) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) mdbTicDat = tdf.mdb.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, mdbTicDat)) self.assertFalse(self.firesException(changeIt)) self.assertFalse(tdf._same_data(ticDat, mdbTicDat)) pkHacked = netflowSchema() pkHacked["nodes"][0] = ["nimrod"] tdfHacked = TicDatFactory(**pkHacked) ticDatHacked = tdfHacked.TicDat(**{t : getattr(ticDat, t) for t in tdf.all_tables}) tdfHacked.mdb.write_file(ticDatHacked, makeCleanPath(filePath)) self.assertTrue(self.firesException(lambda : tdfHacked.mdb.write_file(ticDat, filePath))) tdfHacked.mdb.write_file(ticDat, filePath, allow_overwrite =True) self.assertTrue("Unable to recognize field name in table nodes" in self.firesException(lambda :tdf.mdb.create_tic_dat(filePath)))
def testSqlSimple(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) filePath = os.path.join(_scratchDir, "diet.db") pdf.sql.write_file(panDat, filePath) sqlPanDat = pdf.sql.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, sqlPanDat)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) pdf2.sql.write_file(panDat, filePath) sqlPanDat = pdf2.sql.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, sqlPanDat)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) filePath = os.path.join(_scratchDir, "netflow.db") pdf.sql.write_file(panDat, filePath) panDat2 = pdf.sql.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) sqlPanDat = pdf2.sql.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, sqlPanDat))
def testNetflow(self): tdf = TicDatFactory(**netflowSchema()) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields})) filePath = os.path.join(_scratchDir, "netflow.xls") tdf.xls.write_file(ticDat, filePath) xlsTicDat = tdf.xls.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, xlsTicDat)) def changeIt() : xlsTicDat.inflow['Pencils', 'Boston']["quantity"] = 12 self.assertTrue(self.firesException(changeIt)) self.assertTrue(tdf._same_data(ticDat, xlsTicDat)) xlsTicDat = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, xlsTicDat)) self.assertFalse(self.firesException(changeIt)) self.assertFalse(tdf._same_data(ticDat, xlsTicDat)) self.assertFalse(tdf.xls.get_duplicates(filePath)) pkHacked = netflowSchema() pkHacked["nodes"][0] = ["nimrod"] tdfHacked = TicDatFactory(**pkHacked) self.assertTrue(self.firesException(lambda : tdfHacked.xls.write_file(ticDat, filePath))) tdfHacked.xls.write_file(ticDat, filePath, allow_overwrite =True) self.assertTrue("nodes : name" in self.firesException(lambda :tdf.xls.create_tic_dat(filePath)))
def testDataTypes(self): if not self.canRun: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticdat = tdf.TicDat() ticdat.foods["a"] = 12 ticdat.foods["b"] = None ticdat.categories["1"] = {"maxNutrition":100, "minNutrition":40} ticdat.categories["2"] = [10,20] for f, p in itertools.product(ticdat.foods, ticdat.categories): ticdat.nutritionQuantities[f,p] = 5 ticdat.nutritionQuantities['a', 2] = 12 pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat)) self.assertFalse(pdf.find_data_type_failures(pandat)) pandat_copy = pdf.replace_data_type_failures(pdf.copy_pan_dat(pandat)) self.assertTrue(pdf._same_data(pandat, pandat_copy, epsilon=0.00001)) pdf = PanDatFactory(**dietSchema()) pdf.set_data_type("foods", "cost", nullable=False) pdf.set_data_type("nutritionQuantities", "qty", min=5, inclusive_min=False, max=12, inclusive_max=True) failed = pdf.find_data_type_failures(pandat) self.assertTrue(set(failed) == {('foods', 'cost'), ('nutritionQuantities', 'qty')}) self.assertTrue(set(failed['foods', 'cost']["name"]) == {'b'}) self.assertTrue(set({(v["food"], v["category"]) for v in failed['nutritionQuantities', 'qty'].T.to_dict().values()}) == {('b', '1'), ('a', '2'), ('a', '1'), ('b', '2')}) failed = pdf.find_data_type_failures(pandat, as_table=False) self.assertTrue(4 == failed['nutritionQuantities', 'qty'].value_counts()[True]) fixed = pdf.replace_data_type_failures(pdf.copy_pan_dat(pandat), {("nutritionQuantities", "qty"): 5.15}) self.assertTrue(set(fixed.foods["cost"]) == {0.0, 12.0}) self.assertTrue(set(fixed.nutritionQuantities["qty"]) == {5.15, 12.0}) tdf = TicDatFactory(**netflowSchema()) tdf.enable_foreign_key_links() addNetflowForeignKeys(tdf) pdf = PanDatFactory(**netflowSchema()) ticdat = tdf.copy_tic_dat(netflowData()) for n in ticdat.nodes["Detroit"].arcs_source: ticdat.arcs["Detroit", n] = n pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat)) self.assertFalse(pdf.find_data_type_failures(pandat)) pdf = PanDatFactory(**netflowSchema()) pdf.set_data_type("arcs", "capacity", strings_allowed="*") self.assertFalse(pdf.find_data_type_failures(pandat)) pdf = PanDatFactory(**netflowSchema()) pdf.set_data_type("arcs", "capacity", strings_allowed=["Boston", "Seattle", "lumberjack"]) failed = pdf.find_data_type_failures(pandat) self.assertTrue(set(failed) == {('arcs', 'capacity')}) self.assertTrue(set({(v["source"], v["destination"]) for v in failed['arcs', 'capacity'].T.to_dict().values()}) == {("Detroit", "New York")}) pdf.replace_data_type_failures(pandat) self.assertTrue(set(pandat.arcs["capacity"]) == {120, 'Boston', 0, 'Seattle'})
def testNetflow(self): if not self.can_run: return tdf = TicDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) self._test_generic_copy(ticDat, tdf) self._test_generic_copy(ticDat, tdf, ["arcs", "nodes"]) filePath = os.path.join(_scratchDir, "netflow.xls") tdf.xls.write_file(ticDat, filePath) xlsTicDat = tdf.xls.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, xlsTicDat)) tdf.xls.write_file(ticDat, filePath + "x") self.assertTrue( tdf._same_data(ticDat, tdf.xls.create_tic_dat(filePath + "x"))) def changeIt(): xlsTicDat.inflow['Pencils', 'Boston']["quantity"] = 12 self.assertTrue(self.firesException(changeIt)) self.assertTrue(tdf._same_data(ticDat, xlsTicDat)) xlsTicDat = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, xlsTicDat)) self.assertFalse(self.firesException(changeIt)) self.assertFalse(tdf._same_data(ticDat, xlsTicDat)) self.assertFalse(tdf.xls.find_duplicates(filePath)) pkHacked = netflowSchema() pkHacked["nodes"][0] = ["nimrod"] tdfHacked = TicDatFactory(**pkHacked) self.assertTrue( self.firesException( lambda: tdfHacked.xls.write_file(ticDat, filePath))) tdfHacked.xls.write_file(ticDat, filePath, allow_overwrite=True) self.assertTrue("nodes : name" in self.firesException( lambda: tdf.xls.create_tic_dat(filePath))) ticDat = tdf.TicDat( **{t: getattr(netflowData(), t) for t in tdf.primary_key_fields}) ticDat.arcs["Detroit", "Boston"] = float("inf") ticDat.cost['Pencils', 'Detroit', 'Boston'] = -float("inf") tdf.xls.write_file(ticDat, makeCleanPath(filePath)) xlsTicDat = tdf.xls.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, xlsTicDat)) tdf.xls.write_file(ticDat, filePath + "x", allow_overwrite=True) self.assertTrue( tdf._same_data(ticDat, tdf.xls.create_tic_dat(filePath + "x"))) self.assertFalse( tdf._same_data( ticDat, tdf.xls.create_tic_dat(filePath + "x", treat_inf_as_infinity=False)))
def testSqlSpaceyTwo(self): if not self.can_run: return self.assertTrue(pandatio.sql, "this unit test requires SQLite installed") tdf = TicDatFactory(**spacesSchema()) pdf = PanDatFactory(**spacesSchema()) ticDat = tdf.TicDat( **{ "a_table": { 1: [1, 2, "3"], 22.2: (12, 0.12, "something"), 0.23: (11, 12, "thirt") }, "b_table": { (1, 2, "foo"): 1, (1012.22, 4, "0012"): 12 }, "c_table": (("this", 2, 3, 4), ("that", 102.212, 3, 5.5), ("another", 5, 12.5, 24)) }) panDat = pan_dat_maker(spacesSchema(), ticDat) ext = ".db" filePath = os.path.join(_scratchDir, "spaces_2%s" % ext) with pandatio.sql.connect(filePath) as con: pdf.sql.write_file(panDat, db_file_path=None, con=con, case_space_table_names=True) with pandatio.sql.connect(filePath) as con: panDat2 = pdf.sql.create_pan_dat(db_file_path=None, con=con) self.assertTrue(pdf._same_data(panDat, panDat2)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext) with pandatio.sql.connect(filePath) as con: pdf.sql.write_file(panDat, db_file_path="", con=con, case_space_table_names=True) with pandatio.sql.connect(filePath) as con: panDat2 = pdf.sql.create_pan_dat(None, con) self.assertTrue(pdf._same_data(panDat, panDat2))
def testNetflow(self): if not self.can_run: return tdf = TicDatFactory(**netflowSchema()) ticDat = tdf.TicDat( **{t: getattr(netflowData(), t) for t in tdf.primary_key_fields}) self._test_generic_copy(ticDat, tdf) self._test_generic_copy(ticDat, tdf, ["arcs", "nodes"]) dirPath = os.path.join(_scratchDir, "netflow") tdf.csv.write_directory(ticDat, dirPath) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertFalse(tdf.csv.find_duplicates(dirPath)) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True, headers_present=False) self.assertFalse(tdf._same_data(ticDat, csvTicDat)) tdf.csv.write_directory(ticDat, dirPath, write_header=False, allow_overwrite=True) self.assertTrue( self.firesException( lambda: tdf.csv.create_tic_dat(dirPath, freeze_it=True))) csvTicDat = tdf.csv.create_tic_dat(dirPath, headers_present=False, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) # the casting to floats is controlled by data types and default values ticDat.nodes[12] = {} tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertFalse(tdf._same_data(ticDat, csvTicDat)) tdf2 = TicDatFactory(**netflowSchema()) tdf2.set_data_type("nodes", "name", strings_allowed='*', number_allowed=True) csvTicDat = tdf2.csv.create_tic_dat(dirPath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) del (ticDat.nodes[12]) ticDat.nodes['12'] = {} self.assertTrue( firesException(lambda: tdf.csv.write_directory(ticDat, dirPath))) tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, csvTicDat))
def testXlsSimple(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) filePath = os.path.join(_scratchDir, "diet.xlsx") pdf.xls.write_file(panDat, filePath) xlsPanDat = pdf.xls.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, xlsPanDat)) pdf_shrunk = PanDatFactory(**{ k: v for k, v in dietSchema().items() if k != "nutritionQuantities" }) self.assertTrue(len(pdf_shrunk.all_tables) == len(pdf.all_tables) - 1) xlsPanDatShrunk = pdf_shrunk.xls.create_pan_dat(filePath) self.assertTrue(pdf_shrunk._same_data(panDat, xlsPanDatShrunk)) filePathShrunk = os.path.join(_scratchDir, "diet_shrunk.xlsx") self.assertTrue( self.firesException( lambda: pdf.xls.create_pan_dat(filePathShrunk))) pdf_shrunk.xls.write_file(panDat, filePathShrunk) xlsPanDatShrunk = pdf.xls.create_pan_dat(filePathShrunk) self.assertTrue(pdf_shrunk._same_data(panDat, xlsPanDatShrunk)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) pdf2.xls.write_file(panDat, filePath) xlsPanDat = pdf2.xls.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, xlsPanDat)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) filePath = os.path.join(_scratchDir, "netflow.xlsx") pdf.xls.write_file(panDat, filePath) panDat2 = pdf.xls.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) xlsPanDat = pdf2.xls.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, xlsPanDat))
def testCsvSimple(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) dirPath = os.path.join(_scratchDir, "diet_csv") pdf.csv.write_directory(panDat, dirPath) panDat2 = pdf.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) panDat2 = pdf2.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) dirPath = os.path.join(_scratchDir, "netflow_csv") pdf.csv.write_directory(panDat, dirPath) panDat2 = pdf.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) pdf2.csv.write_directory(panDat, dirPath) panDat2 = pdf2.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) dirPath = os.path.join(_scratchDir, "diet_csv") pdf.csv.write_directory(panDat, dirPath, decimal=",") panDat2 = pdf.csv.create_pan_dat(dirPath) self.assertFalse(pdf._same_data(panDat, panDat2)) panDat2 = pdf.csv.create_pan_dat(dirPath, decimal=",") self.assertTrue(pdf._same_data(panDat, panDat2))
def testThree(self): objOrig = netflowData() staticFactory = TicDatFactory(**netflowSchema()) goodTable = lambda t : lambda _t : staticFactory.good_tic_dat_table(_t, t) tables = set(staticFactory.primary_key_fields) ticDat = staticFactory.freeze_me(staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables})) self.assertTrue(staticFactory.good_tic_dat_object(ticDat)) for t in tables : self._assertSame(getattr(objOrig, t), getattr(ticDat,t), goodTable(t)) objOrig.commodities.append(12.3) objOrig.arcs[(1, 2)] = [12] self._assertSame(objOrig.nodes, ticDat.nodes, goodTable("nodes")) self._assertSame(objOrig.cost, ticDat.cost, goodTable("cost")) self.assertTrue(firesException(lambda : self._assertSame( objOrig.commodities, ticDat.commodities, goodTable("commodities")) )) self.assertTrue(firesException(lambda : self._assertSame( objOrig.arcs, ticDat.arcs, goodTable("arcs")) )) ticDat = staticFactory.freeze_me(staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables})) for t in tables : self._assertSame(getattr(objOrig, t), getattr(ticDat,t), goodTable(t)) self.assertTrue(ticDat.arcs[1, 2]["capacity"] == 12) self.assertTrue(12.3 in ticDat.commodities) objOrig.cost[5]=5 self.assertTrue("cost cannot be treated as a ticDat table : Inconsistent key lengths" in firesException(lambda : staticFactory.freeze_me(staticFactory.TicDat (**{t:getattr(objOrig,t) for t in tables})))) objOrig = netflowData() def editMeBadly(t) : def rtn() : t.cost["hack"] = 12 return rtn def editMeWell(t) : def rtn() : t.cost["hack", "my", "balls"] = 12.12 return rtn self.assertTrue(all(firesException(editMeWell(t)) and firesException(editMeBadly(t)) for t in (ticDat, staticFactory.freeze_me(staticFactory.TicDat())))) def attributeMe(t) : def rtn() : t.boger="bogerwoger" return rtn self.assertTrue(firesException(attributeMe(ticDat)) and firesException(attributeMe( staticFactory.freeze_me(staticFactory.TicDat())))) mutable = staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables}) for t in tables : self._assertSame(getattr(objOrig, t), getattr(mutable,t), goodTable(t)) self.assertTrue(firesException(editMeBadly(mutable))) self.assertFalse(firesException(editMeWell(mutable)) or firesException(attributeMe(mutable))) self.assertTrue(firesException(lambda : self._assertSame( objOrig.cost, mutable.cost, goodTable("cost")) ))
def testVariousCoverages(self): pdf = PanDatFactory(**dietSchema()) _d = dict(categories={ "minNutrition": 0, "maxNutrition": float("inf") }, foods={"cost": 0}, nutritionQuantities={"qty": 0}) pdf.set_default_values(**_d) self.assertTrue(pdf._default_values == _d) pdf = PanDatFactory(**netflowSchema()) addNetflowForeignKeys(pdf) pdf.clear_foreign_keys("arcs") self.assertTrue({_[0] for _ in pdf._foreign_keys} == {"cost", "inflow"}) pdf.add_data_row_predicate("arcs", lambda row: True) pdf.add_data_row_predicate("arcs", lambda row: True, "dummy") pdf.add_data_row_predicate("arcs", None, 0) pdf = pdf.clone() self.assertTrue(set(pdf._data_row_predicates["arcs"]) == {"dummy"}) pdf = PanDatFactory(pdf_table_one=[["A Field"], []], pdf_table_two=[["B Field"], []], pdf_table_three=[["C Field"], []]) pdf.add_foreign_key("pdf_table_one", "pdf_table_two", ["A Field", "B Field"]) pdf.add_foreign_key("pdf_table_two", "pdf_table_three", ["B Field", "C Field"]) pdf.add_foreign_key("pdf_table_three", "pdf_table_one", ["C Field", "A Field"])
def testNetflow(self): tdf = TicDatFactory(**netflowSchema()) ticDat = tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields}) dirPath = os.path.join(_scratchDir, "netflow") tdf.csv.write_directory(ticDat, dirPath) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertFalse(tdf.csv.get_duplicates(dirPath)) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it= True, headers_present=False) self.assertFalse(tdf._same_data(ticDat, csvTicDat)) tdf.csv.write_directory(ticDat, dirPath, write_header=False,allow_overwrite=True) self.assertTrue(self.firesException(lambda : tdf.csv.create_tic_dat(dirPath, freeze_it=True))) csvTicDat = tdf.csv.create_tic_dat(dirPath, headers_present=False, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) ticDat.nodes[12] = {} tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) # minor flaw - strings that are floatable get turned into floats when reading csvs del(ticDat.nodes[12]) ticDat.nodes['12'] = {} self.assertTrue(firesException(lambda : tdf.csv.write_directory(ticDat, dirPath))) tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertFalse(tdf._same_data(ticDat, csvTicDat))
def testNetflow(self): if not self.canRun: return tdf = TicDatFactory(**netflowSchema()) tdf.enable_foreign_key_links() addNetflowForeignKeys(tdf) oldDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields})) self._test_generic_free_copy(oldDat, tdf) self._test_generic_free_copy(oldDat, tdf, ["arcs", "nodes"]) ticDat = tdf.copy_to_pandas(oldDat, ["arcs", "cost"]) self.assertTrue(all(hasattr(ticDat, t) == (t in ["arcs", "cost"]) for t in tdf.all_tables)) self.assertTrue(len(ticDat.arcs.capacity.sloc["Boston",:]) == len(oldDat.nodes["Boston"].arcs_source) == 0) self.assertTrue(len(ticDat.arcs.capacity.sloc[:,"Boston"]) == len(oldDat.nodes["Boston"].arcs_destination) == 2) self.assertTrue(all(ticDat.arcs.capacity.sloc[:,"Boston"][src] == r["capacity"] for src, r in oldDat.nodes["Boston"].arcs_destination.items())) ticDat = tdf.copy_to_pandas(oldDat, drop_pk_columns=True) rebornTicDat = tdf.TicDat(**{t:getattr(ticDat, t) for t in tdf.all_tables}) # because we have single pk field tables, dropping the pk columns is probelmatic self.assertFalse(tdf._same_data(rebornTicDat, oldDat)) # but with the default argument all is well ticDat = tdf.copy_to_pandas(oldDat) rebornTicDat = tdf.TicDat(**{t:getattr(ticDat, t) for t in tdf.all_tables}) self.assertTrue(tdf._same_data(rebornTicDat, oldDat)) self.assertTrue(set(ticDat.inflow.columns) == {"quantity"}) self.assertTrue(set(ticDat.nodes.columns) == {"name"})
def testNetflow(self): if not self.can_run: return tdf = TicDatFactory(**netflowSchema()) ticDat = tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields}) self._test_generic_copy(ticDat, tdf) self._test_generic_copy(ticDat, tdf, ["arcs", "nodes"]) dirPath = os.path.join(_scratchDir, "netflow") tdf.csv.write_directory(ticDat, dirPath) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertFalse(tdf.csv.find_duplicates(dirPath)) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it= True, headers_present=False) self.assertFalse(tdf._same_data(ticDat, csvTicDat)) tdf.csv.write_directory(ticDat, dirPath, write_header=False,allow_overwrite=True) self.assertTrue(self.firesException(lambda : tdf.csv.create_tic_dat(dirPath, freeze_it=True))) csvTicDat = tdf.csv.create_tic_dat(dirPath, headers_present=False, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) ticDat.nodes[12] = {} tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, csvTicDat)) # minor flaw - strings that are floatable get turned into floats when reading csvs del(ticDat.nodes[12]) ticDat.nodes['12'] = {} self.assertTrue(firesException(lambda : tdf.csv.write_directory(ticDat, dirPath))) tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True) csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True) self.assertFalse(tdf._same_data(ticDat, csvTicDat))
def testSimple(self): if not self.canRun: return pdf = PanDatFactory(**netflowSchema()) _dat = netflowPandasData() dat = pdf.PanDat(**{t:getattr(_dat, t) for t in pdf.all_tables}) self.assertTrue(pdf.good_pan_dat_object(dat)) dat2 = pdf.copy_pan_dat(dat) self.assertTrue(pdf._same_data(dat, dat2)) self.assertTrue(pdf.good_pan_dat_object(dat2)) delattr(dat2, "nodes") msg = [] self.assertFalse(pdf.good_pan_dat_object(dat2, msg.append)) self.assertTrue(msg[-1] == "nodes not an attribute.") dat3 = pdf.copy_pan_dat(dat) dat3.cost.drop("commodity", axis=1, inplace=True) self.assertFalse(pdf.good_pan_dat_object(dat3, msg.append)) self.assertTrue("The following are (table, field) pairs missing from the data" in msg[-1]) dat4 = pdf.copy_pan_dat(dat) dat4.cost["cost"] += 1 self.assertFalse(pdf._same_data(dat, dat4)) pdf2 = PanDatFactory(**{t:'*' for t in pdf.all_tables}) dat5 = pdf2.copy_pan_dat(dat) self.assertTrue(pdf._same_data(dat, dat5)) self.assertTrue(pdf2._same_data(dat, dat5)) dat.commodities = dat.commodities.append(dat.commodities[dat.commodities["name"] == "Pencils"]) dat.arcs = dat.arcs.append(dat.arcs[dat.arcs["destination"] == "Boston"]) self.assertFalse(pdf2._same_data(dat, dat5)) self.assertFalse(pdf._same_data(dat, dat5))
def testNetflow(self): tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) ordered = tdf.sql._ordered_tables() self.assertTrue(ordered.index("nodes") < min(ordered.index(_) for _ in ("arcs", "cost", "inflow"))) self.assertTrue(ordered.index("commodities") < min(ordered.index(_) for _ in ("cost", "inflow"))) ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields})) filePath = os.path.join(_scratchDir, "netflow.sql") tdf.sql.write_db_data(ticDat, filePath) sqlTicDat = tdf.sql.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) def changeIt() : sqlTicDat.inflow['Pencils', 'Boston']["quantity"] = 12 self.assertTrue(self.firesException(changeIt)) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) sqlTicDat = tdf.sql.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) self.assertFalse(self.firesException(changeIt)) self.assertFalse(tdf._same_data(ticDat, sqlTicDat)) pkHacked = netflowSchema() pkHacked["nodes"][0] = ["nimrod"] tdfHacked = TicDatFactory(**pkHacked) ticDatHacked = tdfHacked.TicDat(**{t : getattr(ticDat, t) for t in tdf.all_tables}) tdfHacked.sql.write_db_data(ticDatHacked, makeCleanPath(filePath)) self.assertTrue(self.firesException(lambda : tdfHacked.sql.write_db_data(ticDat, filePath))) tdfHacked.sql.write_db_data(ticDat, filePath, allow_overwrite =True) self.assertTrue("Unable to recognize field name in table nodes" in self.firesException(lambda :tdf.sql.create_tic_dat(filePath))) ticDatNew = tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields}) ticDatNew.cost['Pencils', 'booger', 'wooger'] = 10 ticDatNew.cost['junker', 'Detroit', 'New York'] = 20 ticDatNew.cost['bunker', 'Detroit', 'New Jerk'] = 20 ticDatNew.arcs['booger', 'wooger'] = 112 self.assertTrue({f[:2] + f[2][:1] : set(v.native_pks) for f,v in tdf.find_foreign_key_failures(ticDatNew).items()} == {('arcs', 'nodes', u'destination'): {('booger', 'wooger')}, ('arcs', 'nodes', u'source'): {('booger', 'wooger')}, ('cost', 'commodities', u'commodity'): {('bunker', 'Detroit', 'New Jerk'), ('junker', 'Detroit', 'New York')}, ('cost', 'nodes', u'destination'): {('bunker', 'Detroit', 'New Jerk'), ('Pencils', 'booger', 'wooger')}, ('cost', 'nodes', u'source'): {('Pencils', 'booger', 'wooger')}})
def testCsvSpacey(self): if not self.can_run: return self.assertTrue(pandatio.sql, "this unit test requires SQLite installed") tdf = TicDatFactory(**spacesSchema()) pdf = PanDatFactory(**spacesSchema()) ticDat = tdf.TicDat( **{ "a_table": { 1: [1, 2, "3"], 22.2: (12, 0.12, "something"), 0.23: (11, 12, "thirt") }, "b_table": { (1, 2, "foo"): 1, (1012.22, 4, "0012"): 12 }, "c_table": (("this", 2, 3, 4), ("that", 102.212, 3, 5.5), ("another", 5, 12.5, 24)) }) panDat = pan_dat_maker(spacesSchema(), ticDat) dirPath = os.path.join(_scratchDir, "spaces_2_csv") pdf.csv.write_directory(panDat, dirPath, case_space_table_names=True) panDat2 = pdf.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) dirPath = os.path.join(_scratchDir, "spaces_2_2_csv") pdf.csv.write_directory(panDat, dirPath, case_space_table_names=True, sep=":") panDat2 = pdf.csv.create_pan_dat(dirPath, sep=":") self.assertTrue(pdf._same_data(panDat, panDat2))
def testNetflow(self): if not self.can_run: return for hack, raw_data in list(product(*(([True, False],)*2))): tdf = TicDatFactory(**netflowSchema()) ticDat = tdf.copy_tic_dat(netflowData()) self.assertTrue(tdf._same_data(ticDat, tdf.opalytics.create_tic_dat( create_inputset_mock(tdf, ticDat, hack), raw_data=raw_data))) ticDat.nodes[12] = {} self.assertTrue(tdf._same_data(ticDat, tdf.opalytics.create_tic_dat( create_inputset_mock(tdf, ticDat, hack), raw_data=raw_data)))
def testNetflow(self): tdf = TicDatFactory(**netflowSchema()) tdf.enable_foreign_key_links() oldDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) oldDatStr = create_opl_text(tdf, oldDat) newDat = read_opl_text(tdf, oldDatStr) self.assertTrue(tdf._same_data(oldDat, newDat)) tdf.opl_prepend = "stuff" oldDatStr = create_opl_text(tdf, oldDat) newDat = read_opl_text(tdf, oldDatStr) self.assertTrue(tdf._same_data(oldDat, newDat))
def testJsonSpacey(self): if not self.can_run: return tdf = TicDatFactory(**spacesSchema()) pdf = PanDatFactory(**spacesSchema()) ticDat = tdf.TicDat(**spacesData()) panDat = pan_dat_maker(spacesSchema(), ticDat) ext = ".json" filePath = os.path.join(_scratchDir, "spaces_2%s" % ext) pdf.json.write_file(panDat, filePath, case_space_table_names=True) panDat2 = pdf.json.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2)) panDat3 = pdf.json.create_pan_dat( pdf.json.write_file(panDat, "", case_space_table_names=True)) self.assertTrue(pdf._same_data(panDat, panDat3)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext) pdf.json.write_file(panDat, filePath, case_space_table_names=True) panDat2 = pdf.json.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2)) panDat3 = pdf.json.create_pan_dat( pdf.json.write_file(panDat, "", case_space_table_names=True)) self.assertTrue(pdf._same_data(panDat, panDat3)) dicted = json.loads(pdf.json.write_file(panDat, "", orient='columns')) panDat4 = pdf.PanDat(**dicted) self.assertTrue(pdf._same_data(panDat, panDat4, epsilon=1e-5))
def testNetflow_oplrunRequired(self): self.assertTrue(_can_run_oplrun_tests) in_tdf = TicDatFactory(**netflowSchema()) in_tdf.enable_foreign_key_links() soln_tdf = TicDatFactory(flow=[["source", "destination", "commodity"], ["quantity"]], parameters=[["paramKey"], ["value"]]) dat = in_tdf.TicDat( ** {t: getattr(netflowData(), t) for t in in_tdf.primary_key_fields}) opl_soln = opl_run(get_testing_file_path("sample_netflow.mod"), in_tdf, dat, soln_tdf) self.assertTrue( nearlySame(opl_soln.parameters["Total Cost"]["value"], 5500)) self.assertTrue( nearlySame( opl_soln.flow["Pens", "Detroit", "New York"]["quantity"], 30))
def testNetflowOpalytics(self): if not self.can_run: return for hack, raw_data in list(itertools.product(*(([True, False], ) * 2))): tdf = TicDatFactory(**netflowSchema()) ticDat = tdf.copy_tic_dat(netflowData()) inputset = create_inputset_mock(tdf, ticDat, hack) pdf = PanDatFactory(**tdf.schema()) panDat = pdf.opalytics.create_pan_dat(inputset, raw_data=raw_data) self.assertTrue(tdf._same_data(ticDat, pdf.copy_to_tic_dat(panDat))) ticDat.nodes[12] = {} inputset = create_inputset_mock(tdf, ticDat, hack) pdf = PanDatFactory(**tdf.schema()) panDat = pdf.opalytics.create_pan_dat(inputset, raw_data=raw_data) self.assertTrue(tdf._same_data(ticDat, pdf.copy_to_tic_dat(panDat)))
def testNetflow(self): if not self.can_run: return for verbose in [True, False]: tdf = TicDatFactory(**netflowSchema()) ticDat = tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields}) self.assertTrue( tdf._same_data(ticDat, tdf.json.create_tic_dat( tdf.json.write_file(ticDat, "")), epsilon=0.0001)) writePath = os.path.join( makeCleanDir(os.path.join(_scratchDir, "netflow")), "file.json") tdf.json.write_file(ticDat, writePath, verbose=verbose) jsonTicDat = tdf.json.create_tic_dat(writePath, freeze_it=True) self.assertFalse(tdf.json.find_duplicates(writePath)) self.assertTrue(tdf._same_data(ticDat, jsonTicDat)) ticDat.nodes[12] = {} tdf.json.write_file(ticDat, writePath, verbose=verbose, allow_overwrite=True) jsonTicDat = tdf.json.create_tic_dat(writePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, jsonTicDat)) # unlike csv, json format respects strings that are floatable del (ticDat.nodes[12]) ticDat.nodes['12'] = {} self.assertTrue( firesException(lambda: tdf.json.write_file( ticDat, writePath, verbose=verbose))) tdf.json.write_file(ticDat, writePath, allow_overwrite=True, verbose=verbose) jsonTicDat = tdf.json.create_tic_dat(writePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, jsonTicDat))
def testSpacey2(self): if not self.can_run: return tdf = TicDatFactory(**spacesSchema()) ticDat = tdf.TicDat(**spacesData()) for ext in [".xls", ".xlsx"]: filePath = os.path.join(_scratchDir, "spaces_2%s" % ext) tdf.xls.write_file(ticDat, filePath, case_space_sheet_names=True) ticDat2 = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat2)) tdf = TicDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) for ext in [".xls", ".xlsx"]: filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext) tdf.xls.write_file(ticDat, filePath, case_space_sheet_names=True) ticDat2 = tdf.xls.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, ticDat2))
def testNetflow_runlingoRequired(self): self.assertTrue(tlingo._can_run_lingo_run_tests) in_tdf = TicDatFactory(**netflowSchema()) in_tdf.add_foreign_key("arcs", "nodes", ['source', 'name']) in_tdf.add_foreign_key("arcs", "nodes", ['destination', 'name']) in_tdf.add_foreign_key("cost", "nodes", ['source', 'name']) in_tdf.add_foreign_key("cost", "nodes", ['destination', 'name']) in_tdf.add_foreign_key("cost", "commodities", ['commodity', 'name']) in_tdf.add_foreign_key("inflow", "commodities", ['commodity', 'name']) in_tdf.add_foreign_key("inflow", "nodes", ['node', 'name']) solution_variables = TicDatFactory( flow=[["Commodity", "Source", "Destination"], ["quantity"]]) dat = in_tdf.TicDat( ** {t: getattr(netflowData(), t) for t in in_tdf.primary_key_fields}) lingo_soln = tlingo.lingo_run( get_testing_file_path("sample_netflow.lng"), in_tdf, dat, solution_variables) self.assertTrue( nearlySame( lingo_soln.flow["Pens", "Detroit", "New York"]["quantity"], 30))
def testRoundTrips(self): if not self.canRun: return tdf = TicDatFactory(**dietSchema()) tdf.enable_foreign_key_links() oldDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) pdf = PanDatFactory.create_from_full_schema( tdf.schema(include_ancillary_info=True)) pan_dat = tdf.copy_to_pandas(oldDat, drop_pk_columns=False) self.assertTrue(pdf.good_pan_dat_object(pan_dat)) tic_dat = pdf.copy_to_tic_dat(pan_dat) self.assertTrue(tdf._same_data(oldDat, tic_dat)) tdf = TicDatFactory(**netflowSchema()) tdf.enable_foreign_key_links() addNetflowForeignKeys(tdf) oldDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) pdf = PanDatFactory.create_from_full_schema( tdf.schema(include_ancillary_info=True)) pan_dat = tdf.copy_to_pandas(oldDat, drop_pk_columns=False) self.assertTrue(pdf.good_pan_dat_object(pan_dat)) tic_dat = pdf.copy_to_tic_dat(pan_dat) self.assertTrue(tdf._same_data(oldDat, tic_dat)) pdf = PanDatFactory(table=[["a", "b"], ["c"]]) pan_dat = pdf.PanDat(table=utils.DataFrame({ "a": [1, 2, 1, 1], "b": [10, 10, 10, 11], "c": [101, 102, 103, 104] })) self.assertTrue( len(pdf.find_duplicates(pan_dat, keep=False)["table"]) == 2) tic_dat = pdf.copy_to_tic_dat(pan_dat) self.assertTrue(len(tic_dat.table) == len(pan_dat.table) - 1) tdf = TicDatFactory(**pdf.schema()) tic_dat = tdf.TicDat(table=[[1, 2, 3], [None, 2, 3], [2, 1, None]]) self.assertTrue(len(tic_dat.table) == 3) tic_dat_two = pdf.copy_to_tic_dat( tdf.copy_to_pandas(tic_dat, drop_pk_columns=False)) self.assertFalse(tdf._same_data(tic_dat, tic_dat_two)) tic_dat3 = tdf.TicDat( table=[[1, 2, 3], [float("nan"), 2, 3], [2, 1, float("nan")]]) # this fails because _same_data isn't smart enough to check against nan in the keys, # because float("nan") != float("nan") self.assertFalse(tdf._same_data(tic_dat3, tic_dat_two)) pdf = PanDatFactory(table=[["a"], ["b", "c"]]) tdf = TicDatFactory(**pdf.schema()) tic_dat = tdf.TicDat(table=[[1, 2, 3], [2, None, 3], [2, 1, None]]) tic_dat_two = pdf.copy_to_tic_dat( tdf.copy_to_pandas(tic_dat, drop_pk_columns=False)) self.assertFalse(tdf._same_data(tic_dat, tic_dat_two)) tic_dat3 = tdf.TicDat( table=[[1, 2, 3], [2, float("nan"), 3], [2, 1, float("nan")]]) # _same_data works fine in checking nan equivalence in data rows - which maybe self.assertTrue( tdf._same_data(tic_dat3, tic_dat_two, nans_are_same_for_data_rows=True))
def testCsvSimple(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) dirPath = os.path.join(_scratchDir, "diet_csv") pdf.csv.write_directory(panDat, dirPath) panDat2 = pdf.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) panDat2 = pdf2.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) pdf2 = PanDatFactory(**{ k: v for k, v in dietSchema().items() if k != "nutritionQuantities" }) panDat2 = pdf2.copy_pan_dat(panDat) dirPath = os.path.join(_scratchDir, "diet_missing_csv") pdf2.csv.write_directory(panDat2, dirPath, makeCleanDir(dirPath)) panDat3 = pdf.csv.create_pan_dat(dirPath) self.assertTrue(pdf2._same_data(panDat2, panDat3)) self.assertTrue(all(hasattr(panDat3, x) for x in pdf.all_tables)) self.assertFalse(len(panDat3.nutritionQuantities)) self.assertTrue(len(panDat3.categories) and len(panDat3.foods)) pdf2 = PanDatFactory( **{k: v for k, v in dietSchema().items() if k == "categories"}) panDat2 = pdf2.copy_pan_dat(panDat) pdf2.csv.write_directory(panDat2, dirPath, makeCleanDir(dirPath)) panDat3 = pdf.csv.create_pan_dat(dirPath) self.assertTrue(pdf2._same_data(panDat2, panDat3)) self.assertTrue(all(hasattr(panDat3, x) for x in pdf.all_tables)) self.assertFalse( len(panDat3.nutritionQuantities) or len(panDat3.foods)) self.assertTrue(len(panDat3.categories)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) dirPath = os.path.join(_scratchDir, "netflow_csv") pdf.csv.write_directory(panDat, dirPath) panDat2 = pdf.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) pdf2.csv.write_directory(panDat, dirPath) panDat2 = pdf2.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) dirPath = os.path.join(_scratchDir, "diet_csv") pdf.csv.write_directory(panDat, dirPath, decimal=",") panDat2 = pdf.csv.create_pan_dat(dirPath) self.assertFalse(pdf._same_data(panDat, panDat2)) panDat2 = pdf.csv.create_pan_dat(dirPath, decimal=",") self.assertTrue(pdf._same_data(panDat, panDat2))
def testDataPredicates(self): if not self.canRun: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticdat = tdf.TicDat() ticdat.foods["a"] = 12 ticdat.foods["b"] = None ticdat.categories["1"] = {"maxNutrition":100, "minNutrition":40} ticdat.categories["2"] = [21,20] for f, p in itertools.product(ticdat.foods, ticdat.categories): ticdat.nutritionQuantities[f,p] = 5 pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat)) self.assertFalse(pdf.find_duplicates(pandat)) self.assertFalse(pdf.find_data_row_failures(pandat)) ticdat.nutritionQuantities['a', 2] = 12 ticdat.categories["3"] = ['a', 100] pandat_2 = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat)) def perform_predicate_checks(sch): pdf = PanDatFactory(**sch) pdf.add_data_row_predicate("foods", lambda row: numericish(row["cost"]) and not isnan(row["cost"]), "cost") good_qty = lambda qty : 5 < qty <= 12 pdf.add_data_row_predicate("nutritionQuantities", lambda row: good_qty(row["qty"]), "qty") pdf.add_data_row_predicate("categories", lambda row: row["maxNutrition"] >= row["minNutrition"], "minmax") failed = pdf.find_data_row_failures(pandat) self.assertTrue(set(failed) == {('foods', 'cost'), ('nutritionQuantities', 'qty'), ('categories', 'minmax')}) self.assertTrue(set(failed['foods', 'cost']["name"]) == {'b'}) self.assertTrue(set({(v["food"], v["category"]) for v in failed['nutritionQuantities', 'qty'].T.to_dict().values()}) == {('b', '1'), ('a', '2'), ('a', '1'), ('b', '2')}) self.assertTrue(set(failed['categories', 'minmax']["name"]) == {'2'}) failed = pdf.find_data_row_failures(pandat, as_table=False) self.assertTrue(4 == failed['nutritionQuantities', 'qty'].value_counts()[True]) failed = pdf.find_data_row_failures(pandat_2) self.assertTrue(set(failed['categories', 'minmax']["name"]) == {'2', '3'}) perform_predicate_checks(dietSchema()) perform_predicate_checks({t:'*' for t in dietSchema()}) tdf = TicDatFactory(**netflowSchema()) tdf.enable_foreign_key_links() addNetflowForeignKeys(tdf) pdf = PanDatFactory(**netflowSchema()) ticdat = tdf.copy_tic_dat(netflowData()) for n in ticdat.nodes["Detroit"].arcs_source: ticdat.arcs["Detroit", n] = n pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat)) self.assertFalse(pdf.find_duplicates(pandat)) self.assertFalse(pdf.find_data_row_failures(pandat)) pdf = PanDatFactory(**netflowSchema()) pdf.add_data_row_predicate("arcs", lambda row: True, "capacity") self.assertFalse(pdf.find_data_row_failures(pandat)) pdf = PanDatFactory(**netflowSchema()) good_capacity = lambda capacity: numericish(capacity) or capacity in ["Boston", "Seattle", "lumberjack"] pdf.add_data_row_predicate("arcs", lambda row: good_capacity(row["capacity"]), "capacity") failed = pdf.find_data_row_failures(pandat) self.assertTrue(set(failed) == {('arcs', 'capacity')}) self.assertTrue(set({(v["source"], v["destination"]) for v in failed['arcs', 'capacity'].T.to_dict().values()}) == {("Detroit", "New York")})
def testJsonSimple(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) filePath = os.path.join(_scratchDir, "diet.json") pdf.json.write_file(panDat, filePath) panDat2 = pdf.json.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) pdf2.json.write_file(panDat, filePath) panDat2 = pdf2.json.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5)) re_fielded_schema = { "categories": (("name", ), ["maxNutrition", "minNutrition"]), "foods": [["name"], []], "nutritionQuantities": (["food", "category"], ["qty"]) } pdf3 = PanDatFactory(**re_fielded_schema) panDat3 = pdf3.json.create_pan_dat(filePath) for t, (pks, dfs) in re_fielded_schema.items(): self.assertTrue( list(pks) + list(dfs) == list(getattr(panDat3, t).columns)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) filePath = os.path.join(_scratchDir, "netflow.json") pdf.json.write_file(panDat, filePath) panDat2 = pdf.json.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5)) panDat3 = pdf.json.create_pan_dat(pdf.json.write_file(panDat, "")) self.assertTrue(pdf._same_data(panDat, panDat3)) dicted = json.loads(pdf.json.write_file(panDat, "")) panDat4 = pdf.PanDat(**dicted) self.assertTrue(pdf._same_data(panDat, panDat4)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) panDat5 = pdf2.PanDat(**dicted) self.assertTrue(pdf._same_data(panDat, panDat5)) tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) filePath = os.path.join(_scratchDir, "diet.json") pdf.json.write_file(panDat, filePath, orient='columns', index=True) # the following doesn't generate a TicDatError, which is fine self.assertTrue( firesException(lambda: pdf.json.create_pan_dat(filePath))) panDat2 = pdf.json.create_pan_dat(filePath, orient='columns') self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5)) panDat3 = pdf.json.create_pan_dat(pdf.json.write_file( panDat, "", orient='columns'), orient="columns") self.assertTrue(pdf._same_data(panDat, panDat3, epsilon=1e-5)) dicted = json.loads(pdf.json.write_file(panDat, "", orient='columns')) panDat4 = pdf.PanDat(**dicted) self.assertTrue(pdf._same_data(panDat, panDat4, epsilon=1e-5))
def testDataPredicates(self): # this test won't run properly if the -O flag is applied if not self.canRun: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticdat = tdf.TicDat() ticdat.foods["a"] = 12 ticdat.foods["b"] = None ticdat.categories["1"] = {"maxNutrition":100, "minNutrition":40} ticdat.categories["2"] = [21,20] for f, p in itertools.product(ticdat.foods, ticdat.categories): ticdat.nutritionQuantities[f,p] = 5 pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat)) self.assertFalse(pdf.find_duplicates(pandat)) self.assertFalse(pdf.find_data_row_failures(pandat)) ticdat.nutritionQuantities['a', 2] = 12 ticdat.categories["3"] = ['a', 100] pandat_2 = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat)) def perform_predicate_checks(sch): pdf = PanDatFactory(**sch) pdf.add_data_row_predicate("foods", lambda row: numericish(row["cost"]) and not isnan(row["cost"]), "cost") good_qty = lambda qty : 5 < qty <= 12 pdf.add_data_row_predicate("nutritionQuantities", lambda row: good_qty(row["qty"]), "qty") pdf.add_data_row_predicate("categories", lambda row: row["maxNutrition"] >= row["minNutrition"], "minmax") pdf2 = PanDatFactory(**sch) def make_error_message_predicate(f, name): def error_message_predicate(row): rtn = f(row) if rtn: return True return f"{name} failed!" return error_message_predicate for t, preds in pdf._data_row_predicates.items(): for p_name, rpi in preds.items(): pdf2.add_data_row_predicate(t, make_error_message_predicate(rpi.predicate, p_name), predicate_name=p_name, predicate_failure_response="Error Message") failed = pdf.find_data_row_failures(pandat) failed2 = pdf2.find_data_row_failures(pandat) self.assertTrue(set(failed) == set(failed2) == {('foods', 'cost'), ('nutritionQuantities', 'qty'), ('categories', 'minmax')}) self.assertTrue(set(failed['foods', 'cost']["name"]) == set(failed2['foods', 'cost']["name"]) == {'b'}) for f in [failed, failed2]: self.assertTrue(set({(v["food"], v["category"]) for v in f['nutritionQuantities', 'qty'].T.to_dict().values()}) == {('b', '1'), ('a', '2'), ('a', '1'), ('b', '2')}) self.assertTrue(set(f['categories', 'minmax']["name"]) == {'2'}) for t, n in failed2: self.assertTrue(set(failed2[t, n]["Error Message"]) == {f'{n} failed!'}) for _pdf in [pdf, pdf2]: failed = _pdf.find_data_row_failures(pandat, as_table=False) self.assertTrue(4 == failed['nutritionQuantities', 'qty'].value_counts()[True]) ex = [] try: _pdf.find_data_row_failures(pandat_2) except Exception as e: ex[:] = [str(e.__class__)] self.assertTrue("TypeError" in ex[0]) failed = _pdf.find_data_row_failures(pandat_2, exception_handling="Handled as Failure") self.assertTrue(set(failed['categories', 'minmax']["name"]) == {'2', '3'}) failed = pdf2.find_data_row_failures(pandat_2, exception_handling="Handled as Failure") df = failed['categories', 'minmax'] err_str = list(df[df['name'] == '3']["Error Message"])[0] self.assertTrue(err_str=="Exception<'>=' not supported between instances of 'int' and 'str'>") perform_predicate_checks(dietSchema()) perform_predicate_checks({t:'*' for t in dietSchema()}) tdf = TicDatFactory(**netflowSchema()) tdf.enable_foreign_key_links() addNetflowForeignKeys(tdf) pdf = PanDatFactory(**netflowSchema()) ticdat = tdf.copy_tic_dat(netflowData()) for n in ticdat.nodes["Detroit"].arcs_source: ticdat.arcs["Detroit", n] = n pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticdat)) self.assertFalse(pdf.find_duplicates(pandat)) self.assertFalse(pdf.find_data_row_failures(pandat)) pdf = PanDatFactory(**netflowSchema()) pdf.add_data_row_predicate("arcs", lambda row: True, "capacity") self.assertFalse(pdf.find_data_row_failures(pandat)) pdf = PanDatFactory(**netflowSchema()) good_capacity = lambda capacity: numericish(capacity) or capacity in ["Boston", "Seattle", "lumberjack"] pdf.add_data_row_predicate("arcs", lambda row: good_capacity(row["capacity"]), "capacity") failed = pdf.find_data_row_failures(pandat) self.assertTrue(set(failed) == {('arcs', 'capacity')}) self.assertTrue(set({(v["source"], v["destination"]) for v in failed['arcs', 'capacity'].T.to_dict().values()}) == {("Detroit", "New York")}) pdf = PanDatFactory(table=[[],["Field", "Error Message", "Error Message (1)"]]) pdf.add_data_row_predicate("table", predicate=lambda row: f"Oops {row['Field']}" if row["Field"] > 1 else True, predicate_name="silly", predicate_failure_response="Error Message") df = DataFrame({"Field":[2, 1], "Error Message":["what", "go"], "Error Message (1)": ["now", "go"]}) fails = pdf.find_data_row_failures(pdf.PanDat(table=df)) df = fails["table", "silly"] self.assertTrue(list(df.columns) == ["Field", "Error Message", "Error Message (1)", "Error Message (2)"]) self.assertTrue(set(df["Field"]) == {2} and set(df["Error Message (2)"]) == {'Oops 2'})
def testNine(self): for schema in (dietSchema(), sillyMeSchema(), netflowSchema()) : d = TicDatFactory(**schema).schema() assert d == {k : map(list, v) for k,v in schema.items()}
def testEight(self): tdf = TicDatFactory(**dietSchema()) def makeIt() : rtn = tdf.TicDat() rtn.foods["a"] = 12 rtn.foods["b"] = None rtn.categories["1"] = {"maxNutrition":100, "minNutrition":40} rtn.categories["2"] = [10,20] for f, p in itertools.product(rtn.foods, rtn.categories): rtn.nutritionQuantities[f,p] = 5 rtn.nutritionQuantities['a', 2] = 12 return tdf.freeze_me(rtn) dat = makeIt() self.assertFalse(tdf.find_data_type_failures(dat)) tdf = TicDatFactory(**dietSchema()) tdf.set_data_type("foods", "cost", nullable=False) tdf.set_data_type("nutritionQuantities", "qty", min=5, inclusive_min=False, max=12, inclusive_max=True) tdf.set_default_value("foods", "cost", 2) dat = makeIt() failed = tdf.find_data_type_failures(dat) self.assertTrue(set(failed) == {('foods', 'cost'), ('nutritionQuantities', 'qty')}) self.assertTrue(set(failed['nutritionQuantities', 'qty'].pks) == {('b', '1'), ('a', '2'), ('a', '1'), ('b', '2')}) self.assertTrue(failed['nutritionQuantities', 'qty'].bad_values == (5,)) ex = self.firesException(lambda : tdf.replace_data_type_failures(tdf.copy_tic_dat(dat))) self.assertTrue(all(_ in ex for _ in ("replacement value", "nutritionQuantities", "qty"))) fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(dat), replacement_values={("nutritionQuantities", "qty"):5.001}) self.assertFalse(tdf.find_data_type_failures(fixedDat) or tdf._same_data(fixedDat, dat)) self.assertTrue(all(fixedDat.nutritionQuantities[pk]["qty"] == 5.001 for pk in failed['nutritionQuantities', 'qty'].pks)) self.assertTrue(fixedDat.foods["a"]["cost"] == 12 and fixedDat.foods["b"]["cost"] == 2 and fixedDat.nutritionQuantities['a', 2]["qty"] == 12) tdf = TicDatFactory(**dietSchema()) tdf.set_data_type("foods", "cost", nullable=False) tdf.set_data_type("nutritionQuantities", "qty", min=5, inclusive_min=False, max=12, inclusive_max=True) fixedDat2 = tdf.replace_data_type_failures(tdf.copy_tic_dat(dat), replacement_values={("nutritionQuantities", "qty"):5.001, ("foods", "cost") : 2}) self.assertTrue(tdf._same_data(fixedDat, fixedDat2)) tdf = TicDatFactory(**dietSchema()) tdf.set_data_type("foods", "cost", nullable=True) tdf.set_data_type("nutritionQuantities", "qty",number_allowed=False) failed = tdf.find_data_type_failures(dat) self.assertTrue(set(failed) == {('nutritionQuantities', 'qty')}) self.assertTrue(set(failed['nutritionQuantities', 'qty'].pks) == set(dat.nutritionQuantities)) ex = self.firesException(lambda : tdf.replace_data_type_failures(tdf.copy_tic_dat(dat))) self.assertTrue(all(_ in ex for _ in ("replacement value", "nutritionQuantities", "qty"))) tdf = TicDatFactory(**dietSchema()) tdf.set_data_type("foods", "cost") fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt())) self.assertTrue(fixedDat.foods["a"]["cost"] == 12 and fixedDat.foods["b"]["cost"] == 0) tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) dat = tdf.copy_tic_dat(netflowData(), freeze_it=1) self.assertFalse(hasattr(dat.nodes["Detroit"], "arcs_source")) tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) tdf.enable_foreign_key_links() dat = tdf.copy_tic_dat(netflowData(), freeze_it=1) self.assertTrue(hasattr(dat.nodes["Detroit"], "arcs_source")) tdf = TicDatFactory(**netflowSchema()) def makeIt() : if not tdf.foreign_keys: tdf.enable_foreign_key_links() addNetflowForeignKeys(tdf) orig = netflowData() rtn = tdf.copy_tic_dat(orig) for n in rtn.nodes["Detroit"].arcs_source: rtn.arcs["Detroit", n] = n self.assertTrue(all(len(getattr(rtn, t)) == len(getattr(orig, t)) for t in tdf.all_tables)) return tdf.freeze_me(rtn) dat = makeIt() self.assertFalse(tdf.find_data_type_failures(dat)) tdf = TicDatFactory(**netflowSchema()) tdf.set_data_type("arcs", "capacity", strings_allowed="*") dat = makeIt() self.assertFalse(tdf.find_data_type_failures(dat)) tdf = TicDatFactory(**netflowSchema()) tdf.set_data_type("arcs", "capacity", strings_allowed=["Boston", "Seattle", "lumberjack"]) dat = makeIt() failed = tdf.find_data_type_failures(dat) self.assertTrue(failed == {('arcs', 'capacity'):(("New York",), (("Detroit", "New York"),))}) fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt())) netflowData_ = tdf.copy_tic_dat(netflowData()) self.assertFalse(tdf.find_data_type_failures(fixedDat) or tdf._same_data(dat, netflowData_)) fixedDat = tdf.copy_tic_dat(tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt()), {("arcs", "capacity"):80, ("cost","cost") :"imok"})) fixedDat.arcs["Detroit", "Boston"] = 100 fixedDat.arcs["Detroit", "Seattle"] = 120 self.assertTrue(tdf._same_data(fixedDat, netflowData_))
def testFive(self): tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) dat = tdf.freeze_me(tdf.TicDat(**{t : getattr(netflowData(), t) for t in tdf.all_tables})) obfudat = tdf.obfusimplify(dat, freeze_it=1) self.assertFalse(tdf._same_data(dat, obfudat.copy)) for (s,d),r in obfudat.copy.arcs.items(): self.assertFalse((s,d) in dat.arcs) self.assertTrue(dat.arcs[obfudat.renamings[s][1], obfudat.renamings[d][1]]["capacity"] == r["capacity"]) obfudat = tdf.obfusimplify(dat, freeze_it=1, skip_tables=["commodities", "nodes"]) self.assertTrue(tdf._same_data(obfudat.copy, dat)) tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) mone, one2one = "many-to-one", "one-to-one" fk, fkm = _ForeignKey, _ForeignKeyMapping self.assertTrue(set(tdf.foreign_keys) == {fk("arcs", 'nodes', fkm('source',u'name'), mone), fk("arcs", 'nodes', fkm('destination',u'name'), mone), fk("cost", 'nodes', fkm('source',u'name'), mone), fk("cost", 'nodes', fkm('destination',u'name'), mone), fk("cost", 'commodities', fkm('commodity',u'name'), mone), fk("inflow", 'commodities', fkm('commodity',u'name'), mone), fk("inflow", 'nodes', fkm('node',u'name'), mone)}) tdf.clear_foreign_keys("cost") self.assertTrue(set(tdf.foreign_keys) == {fk("arcs", 'nodes', fkm('source',u'name'), mone), fk("arcs", 'nodes', fkm('destination',u'name'), mone), fk("inflow", 'commodities', fkm('commodity',u'name'), mone), fk("inflow", 'nodes', fkm('node',u'name'), mone)}) tdf = TicDatFactory(**dietSchema()) self.assertFalse(tdf.foreign_keys) addDietForeignKeys(tdf) self.assertTrue(set(tdf.foreign_keys) == {fk("nutritionQuantities", 'categories', fkm('category',u'name'), mone), fk("nutritionQuantities", 'foods', fkm('food',u'name'), mone)}) tdf.TicDat() self.assertTrue(self.firesException(lambda : tdf.clear_foreign_keys("nutritionQuantities"))) self.assertTrue(tdf.foreign_keys) tdf = TicDatFactory(**dietSchema()) addDietForeignKeys(tdf) tdf.clear_foreign_keys("nutritionQuantities") self.assertFalse(tdf.foreign_keys) tdf = TicDatFactory(parentTable = [["pk"],["pd1", "pd2", "pd3"]], goodChild = [["gk"], ["gd1", "gd2"]], badChild = [["bk1", "bk2"], ["bd"]], appendageChild = [["ak"], ["ad1", "ad2"]], appendageBadChild = [["bk1", "bk2"], []]) tdf.add_foreign_key("goodChild", "parentTable", fkm("gd1" , "pk")) tdf.add_foreign_key("badChild", "parentTable", ["bk2" , "pk"]) self.assertTrue("many-to-many" in self.firesException(lambda : tdf.add_foreign_key("badChild", "parentTable", ["bd", "pd2"]))) tdf.add_foreign_key("appendageChild", "parentTable", ["ak", "pk"]) tdf.add_foreign_key("appendageBadChild", "badChild", (("bk2", "bk2"), ("bk1","bk1"))) fks = tdf.foreign_keys _getfk = lambda t : next(_ for _ in fks if _.native_table == t) self.assertTrue(_getfk("goodChild").cardinality == "many-to-one") self.assertTrue(_getfk("badChild").cardinality == "many-to-one") self.assertTrue(_getfk("appendageChild").cardinality == "one-to-one") self.assertTrue(_getfk("appendageBadChild").cardinality == "one-to-one") tdf.clear_foreign_keys("appendageBadChild") self.assertTrue(tdf.foreign_keys and "appendageBadChild" not in tdf.foreign_keys) tdf.clear_foreign_keys() self.assertFalse(tdf.foreign_keys)