Пример #1
0
    def testDefaults(self):
        tdf = TicDatFactory(one=[["a"], ["b", "c"]],
                            two=[["a", "b"], ["c"]],
                            three=[["a", "b", "c"], []])
        dat = tdf.TicDat(one=[[1, 2, 3], [4, 5, 6]],
                         two=[[1, 2, 3], [4, 5, 6]],
                         three=[[1, 2, 3], [4, 5, 6]])
        filePath = makeCleanPath(os.path.join(_scratchDir, "defaults.sql"))
        tdf.sql.write_sql_file(dat, filePath)

        tdf2 = TicDatFactory(one=[["a"], ["b", "c"]],
                             two=[["a", "b"], ["c"]],
                             three=[["a", "b", "c"], ["d"]])
        dat2 = tdf2.TicDat(one=dat.one,
                           two=dat.two,
                           three={k: {}
                                  for k in dat.three})
        dat22 = tdf2.sql.create_tic_dat_from_sql(filePath)
        self.assertTrue(tdf2._same_data(dat2, dat22))

        tdf2 = TicDatFactory(one=[["a"], ["b", "c"]],
                             two=[["a", "b"], ["c"]],
                             three=[["a", "b", "c"], ["d"]])
        tdf2.set_default_value("three", "d", float("inf"))
        dat2_b = tdf2.TicDat(one=dat.one,
                             two=dat.two,
                             three={k: {}
                                    for k in dat.three})
        dat22_b = tdf2.sql.create_tic_dat_from_sql(filePath)
        self.assertTrue(tdf2._same_data(dat2_b, dat22_b))

        self.assertFalse(tdf2._same_data(dat2, dat2_b))
Пример #2
0
    def test_empty_text_none(self):
        # this is a naive data scientist who isn't using the parameters functionality
        filePath = os.path.join(_scratchDir, "empty.xls")
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        dat_n = tdf.TicDat(parameters=[[None, 100], ["b", 10.01],
                                       ["three", 200], ["d", None]])
        dat_s = tdf.TicDat(
            parameters=[["", 100], ["b", 10.01], ["three", 200], ["d", ""]])

        def round_trip():
            tdf.xls.write_file(dat_n, filePath, allow_overwrite=True)
            return tdf.xls.create_tic_dat(filePath)

        dat2 = round_trip()
        self.assertTrue(
            tdf._same_data(dat_s, dat2) and not tdf._same_data(dat_n, dat2))
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        tdf.set_data_type("parameters", "Key", nullable=True)
        tdf.set_default_value(
            "parameters", "Value",
            None)  # this default alone will mess with number reading
        dat2 = round_trip()
        self.assertTrue(not tdf._same_data(dat_s, dat2)
                        and tdf._same_data(dat_n, dat2))

        tdf = TicDatFactory(parameters='*')
        dat = tdf.xls.create_tic_dat(filePath)
        self.assertTrue(dat.parameters.shape == (4, 2))
Пример #3
0
 def testSeven(self):
     tdf = TicDatFactory(**dietSchema())
     def makeIt() :
         rtn = tdf.TicDat()
         rtn.foods["a"] = {}
         rtn.categories["1"] = {}
         rtn.categories["2"] = [0,1]
         self.assertTrue(rtn.categories["2"]["minNutrition"] == 0)
         self.assertTrue(rtn.categories["2"]["maxNutrition"] == 1)
         rtn.nutritionQuantities['junk',1] = {}
         return tdf.freeze_me(rtn)
     td = makeIt()
     self.assertTrue(td.foods["a"]["cost"]==0 and td.categories["1"].values() == (0,0) and
                     td.nutritionQuantities['junk',1]["qty"] == 0)
     tdf = TicDatFactory(**dietSchema())
     tdf.set_default_values(foods = {"cost":"dontcare"},nutritionQuantities = {"qty":100} )
     td = makeIt()
     self.assertTrue(td.foods["a"]["cost"]=='dontcare' and td.categories["1"].values() == (0,0) and
                     td.nutritionQuantities['junk',1]["qty"] == 100)
     tdf = TicDatFactory(**dietSchema())
     tdf.set_default_value("categories", "minNutrition", 1)
     tdf.set_default_value("categories", "maxNutrition", 2)
     td = makeIt()
     self.assertTrue(td.foods["a"]["cost"]==0 and td.categories["1"].values() == (1,2) and
                     td.nutritionQuantities['junk',1]["qty"] == 0)
Пример #4
0
    def test_empty_text_none(self):
        dir_path = os.path.join(_scratchDir, "empty_text")
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        dat_n = tdf.TicDat(parameters=[[None, 100], ["b", 10.01],
                                       ["three", 200], ["d", None]])
        dat_s = tdf.TicDat(
            parameters=[["", 100], ["b", 10.01], ["three", 200], ["d", ""]])

        def round_trip():
            tdf.csv.write_directory(dat_n, makeCleanDir(dir_path))
            return tdf.csv.create_tic_dat(dir_path)

        dat2 = round_trip()
        self.assertTrue(
            tdf._same_data(dat_s, dat2) and not tdf._same_data(dat_n, dat2))
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        tdf.set_data_type("parameters", "Key", nullable=True)
        tdf.set_default_value(
            "parameters", "Value",
            None)  # this default alone will mess with number reading
        dat2 = round_trip()
        self.assertFalse(
            tdf._same_data(dat_s, dat2) or tdf._same_data(dat_n, dat2))
        self.assertTrue(
            any(r["Value"] is None for r in dat2.parameters.values()))
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        tdf.set_data_type("parameters", "Key", nullable=True)
        tdf.set_data_type("parameters",
                          "Value",
                          nullable=True,
                          must_be_int=True)
        dat2 = round_trip()
        self.assertTrue(not tdf._same_data(dat_s, dat2)
                        and tdf._same_data(dat_n, dat2))
Пример #5
0
    def test_numericish_text(self):
        dir_path = os.path.join(_scratchDir, "numericish")
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        dat = tdf.TicDat(
            parameters=[["a", "100"], ["b", "010"], [3, "200"], ["d", "020"]])

        def round_trip():
            tdf.csv.write_directory(dat, makeCleanDir(dir_path))
            return tdf.csv.create_tic_dat(dir_path)

        dat2 = round_trip()
        self.assertFalse(tdf._same_data(dat, dat2))
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        tdf.set_data_type("parameters",
                          "Key",
                          strings_allowed='*',
                          number_allowed=True)
        tdf.set_default_value("parameters", "Value", "")
        dat2 = round_trip()
        self.assertTrue(tdf._same_data(dat, dat2))
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        tdf.set_data_type("parameters",
                          "Value",
                          strings_allowed='*',
                          number_allowed=False)
        dat = tdf.TicDat(parameters=[["a", "100"], ["b", "010"], ["c", "200"],
                                     ["d", "020"]])
        dat2 = round_trip()
        self.assertTrue(tdf._same_data(dat, dat2))
Пример #6
0
    def testEight(self):
        tdf = TicDatFactory(**dietSchema())
        def makeIt() :
            rtn = tdf.TicDat()
            rtn.foods["a"] = 12
            rtn.foods["b"] = None
            rtn.categories["1"] = {"maxNutrition":100, "minNutrition":40}
            rtn.categories["2"] = [10,20]
            for f, p in itertools.product(rtn.foods, rtn.categories):
                rtn.nutritionQuantities[f,p] = 5
            rtn.nutritionQuantities['a', 2] = 12
            return tdf.freeze_me(rtn)
        dat = makeIt()
        self.assertFalse(tdf.find_data_type_failures(dat))

        tdf = TicDatFactory(**dietSchema())
        tdf.set_data_type("foods", "cost", nullable=False)
        tdf.set_data_type("nutritionQuantities", "qty", min=5, inclusive_min=False, max=12, inclusive_max=True)
        tdf.set_default_value("foods", "cost", 2)
        dat = makeIt()
        failed = tdf.find_data_type_failures(dat)
        self.assertTrue(set(failed) == {('foods', 'cost'), ('nutritionQuantities', 'qty')})
        self.assertTrue(set(failed['nutritionQuantities', 'qty'].pks) ==
                        {('b', '1'), ('a', '2'), ('a', '1'), ('b', '2')})
        self.assertTrue(failed['nutritionQuantities', 'qty'].bad_values == (5,))
        ex = self.firesException(lambda : tdf.replace_data_type_failures(tdf.copy_tic_dat(dat)))
        self.assertTrue(all(_ in ex for _ in ("replacement value", "nutritionQuantities", "qty")))
        fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(dat),
                            replacement_values={("nutritionQuantities", "qty"):5.001})
        self.assertFalse(tdf.find_data_type_failures(fixedDat) or tdf._same_data(fixedDat, dat))
        self.assertTrue(all(fixedDat.nutritionQuantities[pk]["qty"] == 5.001 for pk in
                            failed['nutritionQuantities', 'qty'].pks))
        self.assertTrue(fixedDat.foods["a"]["cost"] == 12 and fixedDat.foods["b"]["cost"] == 2 and
                        fixedDat.nutritionQuantities['a', 2]["qty"] == 12)

        tdf = TicDatFactory(**dietSchema())
        tdf.set_data_type("foods", "cost", nullable=False)
        tdf.set_data_type("nutritionQuantities", "qty", min=5, inclusive_min=False, max=12, inclusive_max=True)
        fixedDat2 = tdf.replace_data_type_failures(tdf.copy_tic_dat(dat),
                            replacement_values={("nutritionQuantities", "qty"):5.001, ("foods", "cost") : 2})
        self.assertTrue(tdf._same_data(fixedDat, fixedDat2))

        tdf = TicDatFactory(**dietSchema())
        tdf.set_data_type("foods", "cost", nullable=True)
        tdf.set_data_type("nutritionQuantities", "qty",number_allowed=False)
        failed = tdf.find_data_type_failures(dat)
        self.assertTrue(set(failed) == {('nutritionQuantities', 'qty')})
        self.assertTrue(set(failed['nutritionQuantities', 'qty'].pks) == set(dat.nutritionQuantities))
        ex = self.firesException(lambda : tdf.replace_data_type_failures(tdf.copy_tic_dat(dat)))
        self.assertTrue(all(_ in ex for _ in ("replacement value", "nutritionQuantities", "qty")))

        tdf = TicDatFactory(**dietSchema())
        tdf.set_data_type("foods", "cost")
        fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt()))
        self.assertTrue(fixedDat.foods["a"]["cost"] == 12 and fixedDat.foods["b"]["cost"] == 0)

        tdf = TicDatFactory(**netflowSchema())
        addNetflowForeignKeys(tdf)
        dat = tdf.copy_tic_dat(netflowData(), freeze_it=1)
        self.assertFalse(hasattr(dat.nodes["Detroit"], "arcs_source"))

        tdf = TicDatFactory(**netflowSchema())
        addNetflowForeignKeys(tdf)
        tdf.enable_foreign_key_links()
        dat = tdf.copy_tic_dat(netflowData(), freeze_it=1)
        self.assertTrue(hasattr(dat.nodes["Detroit"], "arcs_source"))

        tdf = TicDatFactory(**netflowSchema())
        def makeIt() :
            if not tdf.foreign_keys:
                tdf.enable_foreign_key_links()
                addNetflowForeignKeys(tdf)
            orig = netflowData()
            rtn = tdf.copy_tic_dat(orig)
            for n in rtn.nodes["Detroit"].arcs_source:
                rtn.arcs["Detroit", n] = n
            self.assertTrue(all(len(getattr(rtn, t)) == len(getattr(orig, t)) for t in tdf.all_tables))
            return tdf.freeze_me(rtn)
        dat = makeIt()
        self.assertFalse(tdf.find_data_type_failures(dat))

        tdf = TicDatFactory(**netflowSchema())
        tdf.set_data_type("arcs", "capacity", strings_allowed="*")
        dat = makeIt()
        self.assertFalse(tdf.find_data_type_failures(dat))

        tdf = TicDatFactory(**netflowSchema())
        tdf.set_data_type("arcs", "capacity", strings_allowed=["Boston", "Seattle", "lumberjack"])
        dat = makeIt()
        failed = tdf.find_data_type_failures(dat)
        self.assertTrue(failed == {('arcs', 'capacity'):(("New York",), (("Detroit", "New York"),))})
        fixedDat = tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt()))
        netflowData_ = tdf.copy_tic_dat(netflowData())
        self.assertFalse(tdf.find_data_type_failures(fixedDat) or tdf._same_data(dat, netflowData_))
        fixedDat = tdf.copy_tic_dat(tdf.replace_data_type_failures(tdf.copy_tic_dat(makeIt()),
                                        {("arcs", "capacity"):80, ("cost","cost") :"imok"}))
        fixedDat.arcs["Detroit", "Boston"] = 100
        fixedDat.arcs["Detroit", "Seattle"] = 120
        self.assertTrue(tdf._same_data(fixedDat, netflowData_))
Пример #7
0
    def test_diet_amplpy(self):
        dat = _diet_input_tdf.copy_to_ampl(
            _diet_dat,
            field_renamings={
                ("foods", "Cost"): "cost",
                ("categories", "Min Nutrition"): "n_min",
                ("categories", "Max Nutrition"): "n_max",
                ("nutrition_quantities", "Quantity"): "amt",
                ("nutrition_quantities", "Other Quantity"): "other_amt"
            })
        self.assertTrue({"n_min",
                         "n_max"}.issubset(dat.categories.toPandas().columns))
        ampl = amplpy.AMPL()
        ampl.setOption('solver', 'gurobi')
        ampl.eval(_diet_mod)
        _diet_input_tdf.set_ampl_data(dat, ampl, {
            "categories": "CAT",
            "foods": "FOOD"
        })
        ampl.solve()

        sln = _diet_sln_tdf.copy_from_ampl_variables({
            ("buy_food", "Quantity"):
            ampl.getVariable("Buy"),
            ("consume_nutrition", "Quantity"):
            ampl.getVariable("Consume")
        })
        sln.parameters['Total Cost'] = ampl.getObjective('Total_Cost').value()

        diet_dat_two = _diet_input_tdf.copy_tic_dat(_diet_dat)
        for r in diet_dat_two.nutrition_quantities.values():
            r["Quantity"], r["Other Quantity"] = [0.5 * r["Quantity"]] * 2

        dat = _diet_input_tdf.copy_to_ampl(
            diet_dat_two,
            field_renamings={
                ("foods", "Cost"): "cost",
                ("categories", "Min Nutrition"): "n_min",
                ("categories", "Max Nutrition"): "n_max",
                ("nutrition_quantities", "Quantity"): "amt",
                ("nutrition_quantities", "Other Quantity"): "other_amt"
            })
        ampl = amplpy.AMPL()
        ampl.setOption('solver', 'gurobi')
        ampl.eval(_diet_mod)
        _diet_input_tdf.set_ampl_data(dat, ampl, {
            "categories": "CAT",
            "foods": "FOOD"
        })
        ampl.solve()
        self.assertTrue("solved" == ampl.getValue("solve_result"))

        sln = _diet_sln_tdf.copy_from_ampl_variables({
            ("buy_food", "Quantity"):
            ampl.getVariable("Buy"),
            ("consume_nutrition", "Quantity"):
            ampl.getVariable("Consume")
        })
        sln.parameters['Total Cost'] = ampl.getObjective('Total_Cost').value()

        self.assertTrue(_nearly_same_dat(_diet_sln_tdf, sln, _diet_sln_ticdat))

        dat = _diet_input_tdf.copy_to_ampl(
            _diet_dat, {
                ("foods", "Cost"): "cost",
                ("categories", "Min Nutrition"): "",
                ("categories", "Max Nutrition"): "n_max"
            }, ["nutrition_quantities"])
        self.assertFalse(hasattr(dat, "nutrition_quantities"))
        self.assertTrue({"n_min", "n_max"}.intersection(
            dat.categories.toPandas().columns) == {"n_max"})

        sln_tdf_2 = TicDatFactory(buy_food=[["Food"], ["Quantity"]],
                                  consume_nutrition=[["Category"], []])
        sln_tdf_2.set_default_value("buy_food", "Quantity", 1)
        sln_2 = sln_tdf_2.copy_from_ampl_variables({
            ("buy_food", False):
            ampl.getVariable("Buy"),
            ("consume_nutrition", False):
            (ampl.getVariable("Consume"), lambda x: x < 100)
        })
        self.assertTrue(
            set(sln_2.buy_food) == set(sln.buy_food)
            and all(v["Quantity"] == 1 for v in sln_2.buy_food.values()))
        self.assertTrue(sln_2.consume_nutrition
                        and set(sln_2.consume_nutrition) == {
                            k
                            for k, v in sln.consume_nutrition.items()
                            if v["Quantity"] < 100
                        })

        diet_dat_two = _diet_input_tdf.copy_tic_dat(_diet_dat)
        diet_dat_two.categories["calories"] = [0, 200]
        dat = _diet_input_tdf.copy_to_ampl(
            diet_dat_two,
            field_renamings={
                ("foods", "Cost"): "cost",
                ("categories", "Min Nutrition"): "n_min",
                ("categories", "Max Nutrition"): "n_max",
                ("nutrition_quantities", "Quantity"): "amt",
                ("nutrition_quantities", "Other Quantity"): "other_amt"
            })
        ampl = amplpy.AMPL()
        ampl.setOption('solver', 'gurobi')
        ampl.eval(_diet_mod)
        _diet_input_tdf.set_ampl_data(dat, ampl, {
            "categories": "CAT",
            "foods": "FOOD"
        })
        ampl.solve()
        self.assertTrue("infeasible" == ampl.getValue("solve_result"))

        diet_dat_two = _diet_input_tdf.copy_tic_dat(_diet_dat)
        for v in diet_dat_two.categories.values():
            v["Max Nutrition"] = float("inf")
        diet_dat_two.foods["hamburger"] = -1
        dat = _diet_input_tdf.copy_to_ampl(
            diet_dat_two,
            field_renamings={
                ("foods", "Cost"): "cost",
                ("categories", "Min Nutrition"): "n_min",
                ("categories", "Max Nutrition"): "n_max",
                ("nutrition_quantities", "Quantity"): "amt",
                ("nutrition_quantities", "Other Quantity"): "other_amt"
            })
        ampl = amplpy.AMPL()
        ampl.setOption('solver', 'gurobi')
        ampl.eval(_diet_mod)
        _diet_input_tdf.set_ampl_data(dat, ampl, {
            "categories": "CAT",
            "foods": "FOOD"
        })
        ampl.solve()
        self.assertTrue("unbounded" == ampl.getValue("solve_result"))