def testVariousCoverages(self): pdf = PanDatFactory(**dietSchema()) _d = dict(categories={ "minNutrition": 0, "maxNutrition": float("inf") }, foods={"cost": 0}, nutritionQuantities={"qty": 0}) pdf.set_default_values(**_d) self.assertTrue(pdf._default_values == _d) pdf = PanDatFactory(**netflowSchema()) addNetflowForeignKeys(pdf) pdf.clear_foreign_keys("arcs") self.assertTrue({_[0] for _ in pdf._foreign_keys} == {"cost", "inflow"}) pdf.add_data_row_predicate("arcs", lambda row: True) pdf.add_data_row_predicate("arcs", lambda row: True, "dummy") pdf.add_data_row_predicate("arcs", None, 0) pdf = pdf.clone() self.assertTrue(set(pdf._data_row_predicates["arcs"]) == {"dummy"}) pdf = PanDatFactory(pdf_table_one=[["A Field"], []], pdf_table_two=[["B Field"], []], pdf_table_three=[["C Field"], []]) pdf.add_foreign_key("pdf_table_one", "pdf_table_two", ["A Field", "B Field"]) pdf.add_foreign_key("pdf_table_two", "pdf_table_three", ["B Field", "C Field"]) pdf.add_foreign_key("pdf_table_three", "pdf_table_one", ["C Field", "A Field"])
def testDietWithInfFlagging(self): diet_pdf = PanDatFactory(**dietSchema()) addDietDataTypes(diet_pdf) tdf = TicDatFactory(**dietSchema()) dat = tdf.copy_to_pandas(tdf.copy_tic_dat(dietData()), drop_pk_columns=False) diet_pdf.set_infinity_io_flag(999999999) core_path = os.path.join(_scratchDir, "diet_with_inf_flagging") diet_pdf.sql.write_file(dat, core_path + ".db") diet_pdf.csv.write_directory(dat, core_path + "_csv") diet_pdf.json.write_file(dat, core_path + ".json") diet_pdf.xls.write_file(dat, core_path + ".xlsx") for attr, f in [["sql", core_path + ".db"], ["csv", core_path + "_csv"], ["json", core_path + ".json"], ["xls", core_path + ".xlsx"]]: dat_1 = getattr(diet_pdf, attr).create_pan_dat(f) self.assertTrue(diet_pdf._same_data(dat, dat_1, epsilon=1e-5)) pdf = diet_pdf.clone() dat_1 = getattr(pdf, attr).create_pan_dat(f) self.assertTrue(pdf._same_data(dat, dat_1, epsilon=1e-5)) pdf = PanDatFactory(**diet_pdf.schema()) dat_1 = getattr(pdf, attr).create_pan_dat(f) self.assertFalse(pdf._same_data(dat, dat_1, epsilon=1e-5)) protein = dat_1.categories["name"] == "protein" self.assertTrue( list(dat_1.categories[protein]["maxNutrition"])[0] == 999999999) dat_1.categories.loc[protein, "maxNutrition"] = float("inf") self.assertTrue(pdf._same_data(dat, dat_1, epsilon=1e-5))
def testDataTypes_two(self): tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**tdf.schema()) def makeIt(): rtn = tdf.TicDat() rtn.foods["a"] = 12 rtn.foods["b"] = None rtn.foods[None] = 101 rtn.categories["1"] = {"maxNutrition": 100, "minNutrition": 40} rtn.categories["2"] = [10, 20] for f, p in itertools.product(rtn.foods, rtn.categories): rtn.nutritionQuantities[f, p] = 5 rtn.nutritionQuantities['a', 2] = 12 return tdf.copy_to_pandas(rtn, drop_pk_columns=False) dat = makeIt() errs = pdf.find_data_type_failures(dat) self.assertTrue(len(errs) == 2 and not pdf.find_duplicates(dat)) dat_copied = pdf.copy_pan_dat(dat) pdf.replace_data_type_failures(dat) self.assertTrue(pdf._same_data(dat, dat_copied, epsilon=0.00001)) pdf2 = pdf.clone() pdf2.set_default_value("foods", "name", "a") pdf2.set_default_value("nutritionQuantities", "food", "a") pdf2.replace_data_type_failures(dat_copied) self.assertFalse(pdf._same_data(dat, dat_copied, epsilon=0.00001)) self.assertFalse(pdf.find_data_type_failures(dat_copied)) dups = pdf.find_duplicates(dat_copied) self.assertTrue( len(dups) == 2 and len(dups["foods"]) == 1 and len(dups["nutritionQuantities"]) == 2) from pandas import isnull def noneify(iter_of_tuples): return { tuple(None if isnull(_) else _ for _ in tuple_) for tuple_ in iter_of_tuples } self.assertTrue( noneify(errs['nutritionQuantities', 'food'].itertuples( index=False)) == {(None, "1", 5), (None, "2", 5)}) self.assertTrue( noneify(errs['foods', 'name'].itertuples(index=False)) == {(None, 101)}) pdf = PanDatFactory(**tdf.schema()) pdf.set_data_type("foods", "name", nullable=True, strings_allowed='*') pdf.set_data_type("nutritionQuantities", "food", nullable=True, strings_allowed='*') self.assertFalse(pdf.find_data_type_failures(dat)) pdf.set_data_type("foods", "cost", nullable=False) errs = pdf.find_data_type_failures(dat) self.assertTrue(len(errs) == 1) self.assertTrue( noneify(errs['foods', 'cost'].itertuples(index=False)) == {('b', None)})
def testDataRowPredicatesTwo(self): tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) num_calls=[0] mess_it_up=[] def pre_processor(dat): num_calls[0] += 1 if mess_it_up: dat.messing_it_up+=1 return {t:len(getattr(dat, t)) for t in tdf.all_tables} pdf.add_data_row_predicate("foods", lambda row, y: y==12, predicate_kwargs_maker=lambda dat: {"y":12}) pdf.add_data_row_predicate("categories", lambda row, nutritionQuantities, foods, categories: row["name"] == "fat" or categories == 4, predicate_name="catfat", predicate_kwargs_maker=pre_processor) pdf.add_data_row_predicate("foods", lambda row, nutritionQuantities, foods, categories: row["name"] == "pizza" or foods == 9, predicate_name= "foodza", predicate_kwargs_maker=pre_processor) def dummy_kwargs_maker(dat): if pdf.good_pan_dat_object(dat): return {"x":1} for t in tdf.all_tables: pdf.add_data_row_predicate(t, lambda row, x: x==1, predicate_name=f"dummy_{t}", predicate_kwargs_maker=dummy_kwargs_maker) pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, tdf.copy_tic_dat(dietData()))) self.assertFalse(pdf.find_data_row_failures(pandat)) self.assertTrue(num_calls[0] == 1) pandat.foods = pandat.foods[pandat.foods["name"] != "pizza"].copy() pandat.categories = pandat.categories[pandat.categories["name"] != "fat"].copy() fails = pdf.find_data_row_failures(pandat) self.assertTrue(num_calls[0] == 2) self.assertTrue(set(map(tuple, fails)) == {('categories', 'catfat'), ('foods', 'foodza')}) self.assertTrue(set(fails['categories', 'catfat']["name"]) == set(dietData().categories).difference(["fat"])) self.assertTrue(set(fails['foods', 'foodza']["name"]) == set(dietData().foods).difference(["pizza"])) mess_it_up.append(1) ex = [] try: pdf.find_data_row_failures(pandat) except Exception as e: ex[:] = [str(e.__class__)] self.assertTrue("AttributeError" in ex[0]) fails = pdf.find_data_row_failures(pandat, exception_handling="Handled as Failure") self.assertTrue(set(map(tuple, fails)) == {('categories', 'catfat'), ('foods', 'foodza')}) self.assertTrue(num_calls[0] == 4) for v in fails.values(): self.assertTrue(v.primary_key == '*' and "no attribute" in v.error_message) pdf = pdf.clone() fails = pdf.find_data_row_failures(pandat, exception_handling="Handled as Failure") self.assertTrue(set(map(tuple, fails)) == {('categories', 'catfat'), ('foods', 'foodza')}) mess_it_up=[] def fail_on_bad_name(row, bad_name): if row["name"] == bad_name: return f"{bad_name} is bad" return True pdf.add_data_row_predicate("foods", fail_on_bad_name, predicate_name="baddy", predicate_kwargs_maker=lambda dat: {"bad_name": sorted(dat.foods["name"])[0]}, predicate_failure_response="Error Message") pandat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, tdf.copy_tic_dat(dietData()))) fails = pdf.find_data_row_failures(pandat) self.assertTrue(set(map(tuple, fails)) == {('foods', 'baddy')}) self.assertTrue(len(fails['foods', 'baddy']) == 1) self.assertTrue(list(fails['foods', 'baddy']["Error Message"])[0] == "chicken is bad")