def testDiet(self): def doTheTests(tdf) : ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) filePath = makeCleanPath(os.path.join(_scratchDir, "diet.db")) tdf.sql.write_db_data(ticDat, filePath) sqlTicDat = tdf.sql.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) def changeit() : sqlTicDat.categories["calories"]["minNutrition"]=12 changeit() self.assertFalse(tdf._same_data(ticDat, sqlTicDat)) self.assertTrue(self.firesException(lambda : tdf.sql.write_db_data(ticDat, filePath))) tdf.sql.write_db_data(ticDat, filePath, allow_overwrite=True) sqlTicDat = tdf.sql.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) self.assertTrue(self.firesException(changeit)) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) filePath = makeCleanPath(os.path.join(_scratchDir, "diet.sql")) tdf.sql.write_sql_file(ticDat, filePath) sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) changeit() self.assertFalse(tdf._same_data(ticDat, sqlTicDat)) tdf.sql.write_sql_file(ticDat, filePath, include_schema=True) sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath, includes_schema=True, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) self.assertTrue(self.firesException(changeit)) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) doTheTests(TicDatFactory(**dietSchema())) tdf = TicDatFactory(**dietSchema()) self.assertFalse(tdf.foreign_keys) tdf.set_default_values(categories = {'maxNutrition': float("inf"), 'minNutrition': 0.0}, foods = {'cost': 0.0}, nutritionQuantities = {'qty': 0.0}) addDietForeignKeys(tdf) ordered = tdf.sql._ordered_tables() self.assertTrue(ordered.index("categories") < ordered.index("nutritionQuantities")) self.assertTrue(ordered.index("foods") < ordered.index("nutritionQuantities")) ticDat = tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}) origTicDat = tdf.copy_tic_dat(ticDat) self.assertTrue(tdf._same_data(ticDat, origTicDat)) self.assertFalse(tdf.find_foreign_key_failures(ticDat)) ticDat.nutritionQuantities['hot dog', 'boger'] = ticDat.nutritionQuantities['junk', 'protein'] = -12 self.assertTrue(tdf.find_foreign_key_failures(ticDat) == {('nutritionQuantities', 'foods', ('food', 'name'), 'many-to-one'): (('junk',), (('junk', 'protein'),)), ('nutritionQuantities', 'categories', ('category', 'name'), 'many-to-one'): (('boger',), (('hot dog', 'boger'),))}) self.assertFalse(tdf._same_data(ticDat, origTicDat)) tdf.remove_foreign_keys_failures(ticDat) self.assertFalse(tdf.find_foreign_key_failures(ticDat)) self.assertTrue(tdf._same_data(ticDat, origTicDat)) doTheTests(tdf)
def testDietCleaningOpalytisThree(self): tdf = TicDatFactory(**dietSchema()) tdf.add_data_row_predicate("categories", lambda row: row["maxNutrition"] >= 66) addDietForeignKeys(tdf) ticDat = tdf.copy_tic_dat(dietData()) pdf = PanDatFactory(**tdf.schema()) pdf.add_data_row_predicate("categories", lambda row: row["maxNutrition"] >= 66) addDietForeignKeys(pdf) input_set = create_inputset_mock(tdf, ticDat) panDat = pdf.opalytics.create_pan_dat(input_set, raw_data=True) self.assertTrue(tdf._same_data(pdf.copy_to_tic_dat(panDat), ticDat)) panDatPurged = pdf.opalytics.create_pan_dat(input_set, raw_data=False) self.assertFalse( tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat)) ticDat.categories.pop("fat") self.assertFalse( tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat)) tdf.remove_foreign_key_failures(ticDat) self.assertTrue( tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))
def testDietCleaningOpalyticsTwo(self): tdf = TicDatFactory(**dietSchema()) addDietForeignKeys(tdf) tdf.set_data_type("categories", "maxNutrition", min=66, inclusive_max=True) ticDat = tdf.copy_tic_dat(dietData()) input_set = create_inputset_mock(tdf, ticDat) pdf = PanDatFactory(**dietSchema()) addDietForeignKeys(pdf) pdf.set_data_type("categories", "maxNutrition", min=66, inclusive_max=True) panDat = pdf.opalytics.create_pan_dat(input_set, raw_data=True) self.assertTrue(tdf._same_data(pdf.copy_to_tic_dat(panDat), ticDat)) panDatPurged = pdf.opalytics.create_pan_dat(input_set, raw_data=False) self.assertFalse( tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat)) ticDat.categories.pop("fat") self.assertFalse( tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat)) tdf.remove_foreign_key_failures(ticDat) self.assertTrue( tdf._same_data(pdf.copy_to_tic_dat(panDatPurged), ticDat))
def testDietCleaningFive(self): tdf = TicDatFactory(**dietSchema()) tdf.add_data_row_predicate("categories", lambda row: row["maxNutrition"] >= 66) tdf.set_data_type("categories", "minNutrition", max=0, inclusive_max=True) addDietForeignKeys(tdf) ticDat = tdf.copy_tic_dat(dietData()) input_set = create_inputset_mock(tdf, ticDat) self.assertTrue( tdf._same_data( tdf.opalytics.create_tic_dat(input_set, raw_data=True), ticDat)) ticDatPurged = tdf.opalytics.create_tic_dat(input_set, raw_data=False) self.assertFalse(tdf._same_data(ticDatPurged, ticDat)) ticDat.categories.pop("fat") ticDat.categories.pop("calories") ticDat.categories.pop("protein") self.assertFalse(tdf._same_data(ticDatPurged, ticDat)) tdf.remove_foreign_keys_failures(ticDat) self.assertTrue(tdf._same_data(ticDatPurged, ticDat))
def testDietCleaningThree_2(self): tdf = TicDatFactory(**dietSchema()) addDietForeignKeys(tdf) ticDat = tdf.copy_tic_dat(dietData()) ticDat.categories.pop("fat") input_set = create_inputset_mock(tdf, ticDat) self.assertTrue(tdf._same_data(tdf.opalytics.create_tic_dat(input_set, raw_data=True), ticDat)) ticDatPurged = tdf.opalytics.create_tic_dat(input_set, raw_data=False) self.assertFalse(tdf._same_data(ticDatPurged, ticDat)) tdf.remove_foreign_key_failures(ticDat) self.assertTrue(tdf._same_data(ticDatPurged, ticDat))
def test_fk_max_failures(self): tdf = TicDatFactory(**dietSchema()) addDietForeignKeys(tdf) dat = tdf.TicDat(nutritionQuantities=[[f"food_{_}", f"cat_{_}", 10] for _ in range(10)]) pan_dat = tdf.copy_to_pandas(dat, drop_pk_columns=False) pdf = PanDatFactory.create_from_full_schema( tdf.schema(include_ancillary_info=True)) errs = pdf.find_foreign_key_failures(pan_dat) self.assertTrue( len(errs) == 2 and all(len(_) == 10 for _ in errs.values())) errs = pdf.find_foreign_key_failures(pan_dat, max_failures=11) self.assertTrue( len(errs) == 2 and set(map(len, errs.values())) == {10, 1}) errs = pdf.find_foreign_key_failures(pan_dat, max_failures=10) self.assertTrue( len(errs) == 1 and all(len(_) == 10 for _ in errs.values())) errs = pdf.find_foreign_key_failures(pan_dat, max_failures=9) self.assertTrue( len(errs) == 1 and all(len(_) == 9 for _ in errs.values()))
def testDiet(self): if not self.can_run: return def doTheTests(tdf) : ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields})) filePath = makeCleanPath(os.path.join(_scratchDir, "diet.db")) tdf.sql.write_db_data(ticDat, filePath) self.assertFalse(tdf.sql.find_duplicates(filePath)) sqlTicDat = tdf.sql.create_tic_dat(filePath) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) def changeit() : sqlTicDat.categories["calories"]["minNutrition"]=12 changeit() self.assertFalse(tdf._same_data(ticDat, sqlTicDat)) self.assertTrue(self.firesException(lambda : tdf.sql.write_db_data(ticDat, filePath))) tdf.sql.write_db_data(ticDat, filePath, allow_overwrite=True) sqlTicDat = tdf.sql.create_tic_dat(filePath, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) self.assertTrue(self.firesException(changeit)) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) filePath = makeCleanPath(os.path.join(_scratchDir, "diet.sql")) tdf.sql.write_sql_file(ticDat, filePath) sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) changeit() self.assertFalse(tdf._same_data(ticDat, sqlTicDat)) tdf.sql.write_sql_file(ticDat, filePath, include_schema=True, allow_overwrite=True) sqlTicDat = tdf.sql.create_tic_dat_from_sql(filePath, includes_schema=True, freeze_it=True) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) self.assertTrue(self.firesException(changeit)) self.assertTrue(tdf._same_data(ticDat, sqlTicDat)) doTheTests(TicDatFactory(**dietSchema())) tdf = TicDatFactory(**dietSchema()) self.assertFalse(tdf.foreign_keys) tdf.set_default_values(categories = {'maxNutrition': float("inf"), 'minNutrition': 0.0}, foods = {'cost': 0.0}, nutritionQuantities = {'qty': 0.0}) addDietForeignKeys(tdf) ordered = tdf.sql._ordered_tables() self.assertTrue(ordered.index("categories") < ordered.index("nutritionQuantities")) self.assertTrue(ordered.index("foods") < ordered.index("nutritionQuantities")) ticDat = tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}) self._test_generic_copy(ticDat, tdf) self._test_generic_copy(ticDat, tdf, ["nutritionQuantities"]) origTicDat = tdf.copy_tic_dat(ticDat) self.assertTrue(tdf._same_data(ticDat, origTicDat)) self.assertFalse(tdf.find_foreign_key_failures(ticDat)) ticDat.nutritionQuantities['hot dog', 'boger'] = ticDat.nutritionQuantities['junk', 'protein'] = -12 self.assertTrue(tdf.find_foreign_key_failures(ticDat) == {('nutritionQuantities', 'foods', ('food', 'name'), 'many-to-one'): (('junk',), (('junk', 'protein'),)), ('nutritionQuantities', 'categories', ('category', 'name'), 'many-to-one'): (('boger',), (('hot dog', 'boger'),))}) self.assertFalse(tdf._same_data(ticDat, origTicDat)) tdf.remove_foreign_key_failures(ticDat) self.assertFalse(tdf.find_foreign_key_failures(ticDat)) self.assertTrue(tdf._same_data(ticDat, origTicDat)) doTheTests(tdf)
def testFive(self): tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) dat = tdf.freeze_me(tdf.TicDat(**{t : getattr(netflowData(), t) for t in tdf.all_tables})) obfudat = tdf.obfusimplify(dat, freeze_it=1) self.assertFalse(tdf._same_data(dat, obfudat.copy)) for (s,d),r in obfudat.copy.arcs.items(): self.assertFalse((s,d) in dat.arcs) self.assertTrue(dat.arcs[obfudat.renamings[s][1], obfudat.renamings[d][1]]["capacity"] == r["capacity"]) obfudat = tdf.obfusimplify(dat, freeze_it=1, skip_tables=["commodities", "nodes"]) self.assertTrue(tdf._same_data(obfudat.copy, dat)) tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) mone, one2one = "many-to-one", "one-to-one" fk, fkm = _ForeignKey, _ForeignKeyMapping self.assertTrue(set(tdf.foreign_keys) == {fk("arcs", 'nodes', fkm('source',u'name'), mone), fk("arcs", 'nodes', fkm('destination',u'name'), mone), fk("cost", 'nodes', fkm('source',u'name'), mone), fk("cost", 'nodes', fkm('destination',u'name'), mone), fk("cost", 'commodities', fkm('commodity',u'name'), mone), fk("inflow", 'commodities', fkm('commodity',u'name'), mone), fk("inflow", 'nodes', fkm('node',u'name'), mone)}) tdf.clear_foreign_keys("cost") self.assertTrue(set(tdf.foreign_keys) == {fk("arcs", 'nodes', fkm('source',u'name'), mone), fk("arcs", 'nodes', fkm('destination',u'name'), mone), fk("inflow", 'commodities', fkm('commodity',u'name'), mone), fk("inflow", 'nodes', fkm('node',u'name'), mone)}) tdf = TicDatFactory(**dietSchema()) self.assertFalse(tdf.foreign_keys) addDietForeignKeys(tdf) self.assertTrue(set(tdf.foreign_keys) == {fk("nutritionQuantities", 'categories', fkm('category',u'name'), mone), fk("nutritionQuantities", 'foods', fkm('food',u'name'), mone)}) tdf.TicDat() self.assertTrue(self.firesException(lambda : tdf.clear_foreign_keys("nutritionQuantities"))) self.assertTrue(tdf.foreign_keys) tdf = TicDatFactory(**dietSchema()) addDietForeignKeys(tdf) tdf.clear_foreign_keys("nutritionQuantities") self.assertFalse(tdf.foreign_keys) tdf = TicDatFactory(parentTable = [["pk"],["pd1", "pd2", "pd3"]], goodChild = [["gk"], ["gd1", "gd2"]], badChild = [["bk1", "bk2"], ["bd"]], appendageChild = [["ak"], ["ad1", "ad2"]], appendageBadChild = [["bk1", "bk2"], []]) tdf.add_foreign_key("goodChild", "parentTable", fkm("gd1" , "pk")) tdf.add_foreign_key("badChild", "parentTable", ["bk2" , "pk"]) self.assertTrue("many-to-many" in self.firesException(lambda : tdf.add_foreign_key("badChild", "parentTable", ["bd", "pd2"]))) tdf.add_foreign_key("appendageChild", "parentTable", ["ak", "pk"]) tdf.add_foreign_key("appendageBadChild", "badChild", (("bk2", "bk2"), ("bk1","bk1"))) fks = tdf.foreign_keys _getfk = lambda t : next(_ for _ in fks if _.native_table == t) self.assertTrue(_getfk("goodChild").cardinality == "many-to-one") self.assertTrue(_getfk("badChild").cardinality == "many-to-one") self.assertTrue(_getfk("appendageChild").cardinality == "one-to-one") self.assertTrue(_getfk("appendageBadChild").cardinality == "one-to-one") tdf.clear_foreign_keys("appendageBadChild") self.assertTrue(tdf.foreign_keys and "appendageBadChild" not in tdf.foreign_keys) tdf.clear_foreign_keys() self.assertFalse(tdf.foreign_keys)