def testDefaults(self): tdf = TicDatFactory(one=[["a"], ["b", "c"]], two=[["a", "b"], ["c"]], three=[["a", "b", "c"], []]) dat = tdf.TicDat(one=[[1, 2, 3], [4, 5, 6]], two=[[1, 2, 3], [4, 5, 6]], three=[[1, 2, 3], [4, 5, 6]]) filePath = makeCleanPath(os.path.join(_scratchDir, "defaults.sql")) tdf.sql.write_sql_file(dat, filePath) tdf2 = TicDatFactory(one=[["a"], ["b", "c"]], two=[["a", "b"], ["c"]], three=[["a", "b", "c"], ["d"]]) dat2 = tdf2.TicDat(one=dat.one, two=dat.two, three={k: {} for k in dat.three}) dat22 = tdf2.sql.create_tic_dat_from_sql(filePath) self.assertTrue(tdf2._same_data(dat2, dat22)) tdf2 = TicDatFactory(one=[["a"], ["b", "c"]], two=[["a", "b"], ["c"]], three=[["a", "b", "c"], ["d"]]) tdf2.set_default_value("three", "d", float("inf")) dat2_b = tdf2.TicDat(one=dat.one, two=dat.two, three={k: {} for k in dat.three}) dat22_b = tdf2.sql.create_tic_dat_from_sql(filePath) self.assertTrue(tdf2._same_data(dat2_b, dat22_b)) self.assertFalse(tdf2._same_data(dat2, dat2_b))
def test_empty_text_none(self): # this is a naive data scientist who isn't using the parameters functionality filePath = os.path.join(_scratchDir, "empty.xls") tdf = TicDatFactory(parameters=[["Key"], ["Value"]]) dat_n = tdf.TicDat(parameters=[[None, 100], ["b", 10.01], ["three", 200], ["d", None]]) dat_s = tdf.TicDat( parameters=[["", 100], ["b", 10.01], ["three", 200], ["d", ""]]) def round_trip(): tdf.xls.write_file(dat_n, filePath, allow_overwrite=True) return tdf.xls.create_tic_dat(filePath) dat2 = round_trip() self.assertTrue( tdf._same_data(dat_s, dat2) and not tdf._same_data(dat_n, dat2)) tdf = TicDatFactory(parameters=[["Key"], ["Value"]]) tdf.set_data_type("parameters", "Key", nullable=True) tdf.set_default_value( "parameters", "Value", None) # this default alone will mess with number reading dat2 = round_trip() self.assertTrue(not tdf._same_data(dat_s, dat2) and tdf._same_data(dat_n, dat2)) tdf = TicDatFactory(parameters='*') dat = tdf.xls.create_tic_dat(filePath) self.assertTrue(dat.parameters.shape == (4, 2))
def testSeven(self): tdf = TicDatFactory(**dietSchema()) def makeIt() : rtn = tdf.TicDat() rtn.foods["a"] = {} rtn.categories["1"] = {} rtn.categories["2"] = [0,1] self.assertTrue(rtn.categories["2"]["minNutrition"] == 0) self.assertTrue(rtn.categories["2"]["maxNutrition"] == 1) rtn.nutritionQuantities['junk',1] = {} return tdf.freeze_me(rtn) td = makeIt() self.assertTrue(td.foods["a"]["cost"]==0 and td.categories["1"].values() == (0,0) and td.nutritionQuantities['junk',1]["qty"] == 0) tdf = TicDatFactory(**dietSchema()) tdf.set_default_values(foods = {"cost":"dontcare"},nutritionQuantities = {"qty":100} ) td = makeIt() self.assertTrue(td.foods["a"]["cost"]=='dontcare' and td.categories["1"].values() == (0,0) and td.nutritionQuantities['junk',1]["qty"] == 100) tdf = TicDatFactory(**dietSchema()) tdf.set_default_value("categories", "minNutrition", 1) tdf.set_default_value("categories", "maxNutrition", 2) td = makeIt() self.assertTrue(td.foods["a"]["cost"]==0 and td.categories["1"].values() == (1,2) and td.nutritionQuantities['junk',1]["qty"] == 0)
def test_empty_text_none(self): dir_path = os.path.join(_scratchDir, "empty_text") tdf = TicDatFactory(parameters=[["Key"], ["Value"]]) dat_n = tdf.TicDat(parameters=[[None, 100], ["b", 10.01], ["three", 200], ["d", None]]) dat_s = tdf.TicDat( parameters=[["", 100], ["b", 10.01], ["three", 200], ["d", ""]]) def round_trip(): tdf.csv.write_directory(dat_n, makeCleanDir(dir_path)) return tdf.csv.create_tic_dat(dir_path) dat2 = round_trip() self.assertTrue( tdf._same_data(dat_s, dat2) and not tdf._same_data(dat_n, dat2)) tdf = TicDatFactory(parameters=[["Key"], ["Value"]]) tdf.set_data_type("parameters", "Key", nullable=True) tdf.set_default_value( "parameters", "Value", None) # this default alone will mess with number reading dat2 = round_trip() self.assertFalse( tdf._same_data(dat_s, dat2) or tdf._same_data(dat_n, dat2)) self.assertTrue( any(r["Value"] is None for r in dat2.parameters.values())) tdf = TicDatFactory(parameters=[["Key"], ["Value"]]) tdf.set_data_type("parameters", "Key", nullable=True) tdf.set_data_type("parameters", "Value", nullable=True, must_be_int=True) dat2 = round_trip() self.assertTrue(not tdf._same_data(dat_s, dat2) and tdf._same_data(dat_n, dat2))
def test_numericish_text(self): dir_path = os.path.join(_scratchDir, "numericish") tdf = TicDatFactory(parameters=[["Key"], ["Value"]]) dat = tdf.TicDat( parameters=[["a", "100"], ["b", "010"], [3, "200"], ["d", "020"]]) def round_trip(): tdf.csv.write_directory(dat, makeCleanDir(dir_path)) return tdf.csv.create_tic_dat(dir_path) dat2 = round_trip() self.assertFalse(tdf._same_data(dat, dat2)) tdf = TicDatFactory(parameters=[["Key"], ["Value"]]) tdf.set_data_type("parameters", "Key", strings_allowed='*', number_allowed=True) tdf.set_default_value("parameters", "Value", "") dat2 = round_trip() self.assertTrue(tdf._same_data(dat, dat2)) tdf = TicDatFactory(parameters=[["Key"], ["Value"]]) tdf.set_data_type("parameters", "Value", strings_allowed='*', number_allowed=False) dat = tdf.TicDat(parameters=[["a", "100"], ["b", "010"], ["c", "200"], ["d", "020"]]) dat2 = round_trip() self.assertTrue(tdf._same_data(dat, dat2))
def testEight(self): tdf = TicDatFactory(**dietSchema()) def makeIt() : rtn = tdf.TicDat() rtn.foods["a"] = 12 rtn.foods["b"] = None rtn.categories["1"] = {"maxNutrition":100, "minNutrition":40} rtn.categories["2"] = [10,20] for f, p in itertools.product(rtn.foods, rtn.categories): rtn.nutritionQuantities[f,p] = 5 rtn.nutritionQuantities['a', 2] = 12 return tdf.freeze_me(rtn) dat = makeIt() self.assertFalse(tdf.find_data_type_failures(dat)) tdf = TicDatFactory(**dietSchema()) tdf.set_data_type("foods", "cost", nullable=False) tdf.set_data_type("nutritionQuantities", "qty", min=5, inclusive_min=False, max=12, inclusive_max=True) tdf.set_default_value("foods", "cost", 2) dat = makeIt() failed = tdf.find_data_type_failures(dat) self.assertTrue(set(failed) == {('foods', 'cost'), ('nutritionQuantities', 'qty')}) self.assertTrue(set(failed['nutritionQuantities', 'qty'].pks) == {('b', '1'), ('a', '2'), ('a', '1'), ('b', '2')}) self.assertTrue(failed['nutritionQuantities', 'qty'].bad_values == (5,)) ex = self.firesException(lambda : tdf.replace_data_type_failures(tdf.copy_tic_dat(dat))) self.assertTrue(all(_ in ex for _ in ("replacement value", "nutritionQuantities", "qty"))) fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(dat), replacement_values={("nutritionQuantities", "qty"):5.001}) self.assertFalse(tdf.find_data_type_failures(fixedDat) or tdf._same_data(fixedDat, dat)) self.assertTrue(all(fixedDat.nutritionQuantities[pk]["qty"] == 5.001 for pk in failed['nutritionQuantities', 'qty'].pks)) self.assertTrue(fixedDat.foods["a"]["cost"] == 12 and fixedDat.foods["b"]["cost"] == 2 and fixedDat.nutritionQuantities['a', 2]["qty"] == 12) tdf = TicDatFactory(**dietSchema()) tdf.set_data_type("foods", "cost", nullable=False) tdf.set_data_type("nutritionQuantities", "qty", min=5, inclusive_min=False, max=12, inclusive_max=True) fixedDat2 = tdf.replace_data_type_failures(tdf.copy_tic_dat(dat), replacement_values={("nutritionQuantities", "qty"):5.001, ("foods", "cost") : 2}) self.assertTrue(tdf._same_data(fixedDat, fixedDat2)) tdf = TicDatFactory(**dietSchema()) tdf.set_data_type("foods", "cost", nullable=True) tdf.set_data_type("nutritionQuantities", "qty",number_allowed=False) failed = tdf.find_data_type_failures(dat) self.assertTrue(set(failed) == {('nutritionQuantities', 'qty')}) self.assertTrue(set(failed['nutritionQuantities', 'qty'].pks) == set(dat.nutritionQuantities)) ex = self.firesException(lambda : tdf.replace_data_type_failures(tdf.copy_tic_dat(dat))) self.assertTrue(all(_ in ex for _ in ("replacement value", "nutritionQuantities", "qty"))) tdf = TicDatFactory(**dietSchema()) tdf.set_data_type("foods", "cost") fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt())) self.assertTrue(fixedDat.foods["a"]["cost"] == 12 and fixedDat.foods["b"]["cost"] == 0) tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) dat = tdf.copy_tic_dat(netflowData(), freeze_it=1) self.assertFalse(hasattr(dat.nodes["Detroit"], "arcs_source")) tdf = TicDatFactory(**netflowSchema()) addNetflowForeignKeys(tdf) tdf.enable_foreign_key_links() dat = tdf.copy_tic_dat(netflowData(), freeze_it=1) self.assertTrue(hasattr(dat.nodes["Detroit"], "arcs_source")) tdf = TicDatFactory(**netflowSchema()) def makeIt() : if not tdf.foreign_keys: tdf.enable_foreign_key_links() addNetflowForeignKeys(tdf) orig = netflowData() rtn = tdf.copy_tic_dat(orig) for n in rtn.nodes["Detroit"].arcs_source: rtn.arcs["Detroit", n] = n self.assertTrue(all(len(getattr(rtn, t)) == len(getattr(orig, t)) for t in tdf.all_tables)) return tdf.freeze_me(rtn) dat = makeIt() self.assertFalse(tdf.find_data_type_failures(dat)) tdf = TicDatFactory(**netflowSchema()) tdf.set_data_type("arcs", "capacity", strings_allowed="*") dat = makeIt() self.assertFalse(tdf.find_data_type_failures(dat)) tdf = TicDatFactory(**netflowSchema()) tdf.set_data_type("arcs", "capacity", strings_allowed=["Boston", "Seattle", "lumberjack"]) dat = makeIt() failed = tdf.find_data_type_failures(dat) self.assertTrue(failed == {('arcs', 'capacity'):(("New York",), (("Detroit", "New York"),))}) fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt())) netflowData_ = tdf.copy_tic_dat(netflowData()) self.assertFalse(tdf.find_data_type_failures(fixedDat) or tdf._same_data(dat, netflowData_)) fixedDat = tdf.copy_tic_dat(tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt()), {("arcs", "capacity"):80, ("cost","cost") :"imok"})) fixedDat.arcs["Detroit", "Boston"] = 100 fixedDat.arcs["Detroit", "Seattle"] = 120 self.assertTrue(tdf._same_data(fixedDat, netflowData_))
def test_diet_amplpy(self): dat = _diet_input_tdf.copy_to_ampl( _diet_dat, field_renamings={ ("foods", "Cost"): "cost", ("categories", "Min Nutrition"): "n_min", ("categories", "Max Nutrition"): "n_max", ("nutrition_quantities", "Quantity"): "amt", ("nutrition_quantities", "Other Quantity"): "other_amt" }) self.assertTrue({"n_min", "n_max"}.issubset(dat.categories.toPandas().columns)) ampl = amplpy.AMPL() ampl.setOption('solver', 'gurobi') ampl.eval(_diet_mod) _diet_input_tdf.set_ampl_data(dat, ampl, { "categories": "CAT", "foods": "FOOD" }) ampl.solve() sln = _diet_sln_tdf.copy_from_ampl_variables({ ("buy_food", "Quantity"): ampl.getVariable("Buy"), ("consume_nutrition", "Quantity"): ampl.getVariable("Consume") }) sln.parameters['Total Cost'] = ampl.getObjective('Total_Cost').value() diet_dat_two = _diet_input_tdf.copy_tic_dat(_diet_dat) for r in diet_dat_two.nutrition_quantities.values(): r["Quantity"], r["Other Quantity"] = [0.5 * r["Quantity"]] * 2 dat = _diet_input_tdf.copy_to_ampl( diet_dat_two, field_renamings={ ("foods", "Cost"): "cost", ("categories", "Min Nutrition"): "n_min", ("categories", "Max Nutrition"): "n_max", ("nutrition_quantities", "Quantity"): "amt", ("nutrition_quantities", "Other Quantity"): "other_amt" }) ampl = amplpy.AMPL() ampl.setOption('solver', 'gurobi') ampl.eval(_diet_mod) _diet_input_tdf.set_ampl_data(dat, ampl, { "categories": "CAT", "foods": "FOOD" }) ampl.solve() self.assertTrue("solved" == ampl.getValue("solve_result")) sln = _diet_sln_tdf.copy_from_ampl_variables({ ("buy_food", "Quantity"): ampl.getVariable("Buy"), ("consume_nutrition", "Quantity"): ampl.getVariable("Consume") }) sln.parameters['Total Cost'] = ampl.getObjective('Total_Cost').value() self.assertTrue(_nearly_same_dat(_diet_sln_tdf, sln, _diet_sln_ticdat)) dat = _diet_input_tdf.copy_to_ampl( _diet_dat, { ("foods", "Cost"): "cost", ("categories", "Min Nutrition"): "", ("categories", "Max Nutrition"): "n_max" }, ["nutrition_quantities"]) self.assertFalse(hasattr(dat, "nutrition_quantities")) self.assertTrue({"n_min", "n_max"}.intersection( dat.categories.toPandas().columns) == {"n_max"}) sln_tdf_2 = TicDatFactory(buy_food=[["Food"], ["Quantity"]], consume_nutrition=[["Category"], []]) sln_tdf_2.set_default_value("buy_food", "Quantity", 1) sln_2 = sln_tdf_2.copy_from_ampl_variables({ ("buy_food", False): ampl.getVariable("Buy"), ("consume_nutrition", False): (ampl.getVariable("Consume"), lambda x: x < 100) }) self.assertTrue( set(sln_2.buy_food) == set(sln.buy_food) and all(v["Quantity"] == 1 for v in sln_2.buy_food.values())) self.assertTrue(sln_2.consume_nutrition and set(sln_2.consume_nutrition) == { k for k, v in sln.consume_nutrition.items() if v["Quantity"] < 100 }) diet_dat_two = _diet_input_tdf.copy_tic_dat(_diet_dat) diet_dat_two.categories["calories"] = [0, 200] dat = _diet_input_tdf.copy_to_ampl( diet_dat_two, field_renamings={ ("foods", "Cost"): "cost", ("categories", "Min Nutrition"): "n_min", ("categories", "Max Nutrition"): "n_max", ("nutrition_quantities", "Quantity"): "amt", ("nutrition_quantities", "Other Quantity"): "other_amt" }) ampl = amplpy.AMPL() ampl.setOption('solver', 'gurobi') ampl.eval(_diet_mod) _diet_input_tdf.set_ampl_data(dat, ampl, { "categories": "CAT", "foods": "FOOD" }) ampl.solve() self.assertTrue("infeasible" == ampl.getValue("solve_result")) diet_dat_two = _diet_input_tdf.copy_tic_dat(_diet_dat) for v in diet_dat_two.categories.values(): v["Max Nutrition"] = float("inf") diet_dat_two.foods["hamburger"] = -1 dat = _diet_input_tdf.copy_to_ampl( diet_dat_two, field_renamings={ ("foods", "Cost"): "cost", ("categories", "Min Nutrition"): "n_min", ("categories", "Max Nutrition"): "n_max", ("nutrition_quantities", "Quantity"): "amt", ("nutrition_quantities", "Other Quantity"): "other_amt" }) ampl = amplpy.AMPL() ampl.setOption('solver', 'gurobi') ampl.eval(_diet_mod) _diet_input_tdf.set_ampl_data(dat, ampl, { "categories": "CAT", "foods": "FOOD" }) ampl.solve() self.assertTrue("unbounded" == ampl.getValue("solve_result"))