示例#1
0
    def testXToManyTwo(self):
        input_schema = PanDatFactory(parent=[["F1", "F2"], ["F3"]],
                                     child_one=[["F1", "F2", "F3"], []],
                                     child_two=[["F1", "F2"], ["F3"]],
                                     child_three=[[], ["F1", "F2", "F3"]])
        for t in ["child_one", "child_two", "child_three"]:
            input_schema.add_foreign_key(t, "parent",
                                         [["F1"] * 2, ["F2"] * 2, ["F3"] * 2])
        self.assertTrue({fk.cardinality
                         for fk in input_schema.foreign_keys} ==
                        {"one-to-one", "many-to-one"})

        rows = [[1, 2, 3], [1, 2.1, 3], [4, 5, 6], [4, 5.1, 6], [7, 8, 9]]
        tdf = TicDatFactory(**input_schema.schema())
        dat = tdf.TicDat(parent=rows,
                         child_one=rows,
                         child_two=rows,
                         child_three=rows)
        self.assertTrue(
            all(len(getattr(dat, t)) == 5 for t in input_schema.all_tables))
        orig_pan_dat = input_schema.copy_pan_dat(
            copy_to_pandas_with_reset(tdf, dat))
        self.assertFalse(input_schema.find_foreign_key_failures(orig_pan_dat))
        dat.child_one[1, 2, 4] = {}
        dat.child_two[1, 2.2] = 3
        dat.child_three.append([1, 2, 4])
        new_pan_dat = input_schema.copy_pan_dat(
            copy_to_pandas_with_reset(tdf, dat))
        fk_fails = input_schema.find_foreign_key_failures(new_pan_dat)
        self.assertTrue(len(fk_fails) == 3)
        input_schema.remove_foreign_key_failures(new_pan_dat)
        self.assertFalse(input_schema.find_foreign_key_failures(new_pan_dat))
        self.assertTrue(input_schema._same_data(orig_pan_dat, new_pan_dat))

        input_schema = PanDatFactory(parent=[["F1", "F2"], ["F3"]],
                                     child_one=[["F1", "F2", "F3"], []],
                                     child_two=[["F1", "F2"], ["F3"]],
                                     child_three=[[], ["F1", "F2", "F3"]])
        for t in ["child_one", "child_two", "child_three"]:
            input_schema.add_foreign_key(t, "parent", [["F1"] * 2, ["F3"] * 2])
        tdf = TicDatFactory(**input_schema.schema())
        dat = tdf.TicDat(parent=rows,
                         child_one=rows,
                         child_two=rows,
                         child_three=rows)
        self.assertTrue(
            all(len(getattr(dat, t)) == 5 for t in input_schema.all_tables))
        orig_pan_dat = input_schema.copy_pan_dat(
            copy_to_pandas_with_reset(tdf, dat))
        self.assertFalse(input_schema.find_foreign_key_failures(orig_pan_dat))
        dat.child_one[1, 2, 4] = {}
        dat.child_two[1, 2.2] = 4
        dat.child_three.append([1, 2, 4])
        new_pan_dat = input_schema.copy_pan_dat(
            copy_to_pandas_with_reset(tdf, dat))
        self.assertTrue(
            len(input_schema.find_foreign_key_failures(new_pan_dat)) == 3)
        input_schema.remove_foreign_key_failures(new_pan_dat)
        self.assertFalse(input_schema.find_foreign_key_failures(new_pan_dat))
        self.assertTrue(input_schema._same_data(orig_pan_dat, new_pan_dat))
示例#2
0
    def testAdditionalFKs(self):
        pdf = PanDatFactory(pt1=[["F1"], []],
                            pt2=[["F2"], []],
                            pt3=[["F1", "F2"], []],
                            pt4=[["F1"], ["F2"]],
                            pt5=[[], ["F1", "F2"]])
        for c in ["pt3", "pt4", "pt5"]:
            pdf.add_foreign_key(c, "pt1", ["F1", "F1"])
            pdf.add_foreign_key(c, "pt2", ["F2", "F2"])
        tdf = TicDatFactory(**pdf.schema())

        def pan_dat_(_):
            rtn = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, _))
            self.assertFalse(pdf.find_duplicates(rtn))
            return rtn

        ticDat = tdf.TicDat(pt1=[1, 2, 3, 4], pt2=[5, 6, 7, 8])
        for f1, f2 in itertools.product(range(1, 5), range(5, 9)):
            ticDat.pt3[f1, f2] = {}
            ticDat.pt4[f1] = f2
            ticDat.pt5.append((f1, f2))
        origDat = tdf.copy_tic_dat(ticDat, freeze_it=True)
        self.assertFalse(pdf.find_foreign_key_failures(pan_dat_(origDat)))
        ticDat.pt3["no", 6] = ticDat.pt3[1, "no"] = {}
        ticDat.pt4["no"] = 6
        ticDat.pt4["nono"] = 6.01
        panDat = pan_dat_(ticDat)
        fails1 = pdf.find_foreign_key_failures(panDat)
        self.assertTrue(fails1)
        pdf.remove_foreign_key_failures(panDat)
        self.assertFalse(pdf.find_foreign_key_failures(panDat))
        self.assertTrue(pdf._same_data(panDat, pan_dat_(origDat)))

        orig_lens = {t: len(getattr(origDat, t)) for t in tdf.all_tables}
        ticDat.pt3["no", 6] = ticDat.pt3[1, "no"] = {}
        ticDat.pt4["no"] = 6
        ticDat.pt4["nono"] = 6.01
        ticDat.pt5.append(("no", 6))
        ticDat.pt5.append((1, "no"))
        panDat = pan_dat_(ticDat)
        fails2 = pdf.find_foreign_key_failures(panDat)
        self.assertTrue(
            set(fails1) != set(fails2) and set(fails1).issubset(fails2))
        pdf.remove_foreign_key_failures(panDat)
        self.assertFalse(pdf.find_foreign_key_failures(panDat))
        self.assertTrue({t: len(getattr(panDat, t))
                         for t in tdf.all_tables} == orig_lens)
示例#3
0
    def testBasicFKs(self):
        for cloning in [True, False, "*"]:
            clone_me_maybe = lambda x : x.clone(tdf.all_tables if cloning == "*" else None) if cloning else x

            pdf = PanDatFactory(plants = [["name"], ["stuff", "otherstuff"]],
                                lines = [["name"], ["plant", "weird stuff"]],
                                line_descriptor = [["name"], ["booger"]],
                                products = [["name"],["gover"]],
                                production = [["line", "product"], ["min", "max"]],
                                pureTestingTable = [[], ["line", "plant", "product", "something"]],
                                extraProduction = [["line", "product"], ["extramin", "extramax"]],
                                weirdProduction = [["line1", "line2", "product"], ["weirdmin", "weirdmax"]])
            pdf.add_foreign_key("production", "lines", ("line", "name"))
            pdf.add_foreign_key("production", "products", ("product", "name"))
            pdf.add_foreign_key("lines", "plants", ("plant", "name"))
            pdf.add_foreign_key("line_descriptor", "lines", ("name", "name"))
            for f in set(pdf.data_fields["pureTestingTable"]).difference({"something"}):
                pdf.add_foreign_key("pureTestingTable", "%ss"%f, (f,"name"))
            pdf.add_foreign_key("extraProduction", "production", (("line", "line"), ("product","product")))
            pdf.add_foreign_key("weirdProduction", "production", (("line1", "line"), ("product","product")))
            pdf.add_foreign_key("weirdProduction", "extraProduction", (("line2","line"), ("product","product")))
            self._testPdfReproduction(pdf)
            pdf = clone_me_maybe(pdf)

            tdf = TicDatFactory(**pdf.schema())
            goodDat = tdf.TicDat()
            goodDat.plants["Cleveland"] = ["this", "that"]
            goodDat.plants["Newark"]["otherstuff"] =1
            goodDat.products["widgets"] = goodDat.products["gadgets"] = "shizzle"

            for i,p in enumerate(goodDat.plants):
                goodDat.lines[i]["plant"] = p

            for i,(pl, pd) in enumerate(itertools.product(goodDat.lines, goodDat.products)):
                goodDat.production[pl, pd] = {"min":1, "max":10+i}

            badDat1 = tdf.copy_tic_dat(goodDat)
            badDat1.production["notaline", "widgets"] = [0,1]
            badDat2 = tdf.copy_tic_dat(badDat1)


            def pan_dat_(_):
                rtn = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, _))
                self.assertFalse(pdf.find_duplicates(rtn))
                return rtn
            fk, fkm = ForeignKey, ForeignKeyMapping
            fk_fails1 = pdf.find_foreign_key_failures(pan_dat_(badDat1))
            fk_fails2 = pdf.find_foreign_key_failures(pan_dat_(badDat2))

            self.assertTrue(set(fk_fails1) == set(fk_fails2) ==
                            {fk('production', 'lines', fkm('line', 'name'), 'many-to-one')})
            self.assertTrue(set(pdf.find_foreign_key_failures(pan_dat_(badDat1), verbosity="Low")) ==
                            set(pdf.find_foreign_key_failures(pan_dat_(badDat2), verbosity="Low")) ==
                             {('production', 'lines', ('line', 'name'))})
            for row_fails in [next(iter(_.values())) for _ in [fk_fails1, fk_fails2]]:
                self.assertTrue(set(row_fails["line"]) == {"notaline"} and set(row_fails["product"]) == {"widgets"})

            badDat1.lines["notaline"]["plant"] = badDat2.lines["notaline"]["plant"] = "notnewark"
            fk_fails1 = pdf.find_foreign_key_failures(pan_dat_(badDat1))
            fk_fails2 = pdf.find_foreign_key_failures(pan_dat_(badDat2))
            self.assertTrue(set(fk_fails1) == set(fk_fails2) ==
                            {fk('lines', 'plants', fkm('plant', 'name'), 'many-to-one')})
            for row_fails in [next(iter(_.values())) for _ in [fk_fails1, fk_fails2]]:
                self.assertTrue(set(row_fails["name"]) == {"notaline"} and set(row_fails["plant"]) == {"notnewark"})


            for bad in [badDat1, badDat2]:
                bad_pan = pdf.remove_foreign_key_failures(pan_dat_(bad))
                self.assertFalse(pdf.find_foreign_key_failures(bad_pan))
                self.assertTrue(pdf._same_data(bad_pan, pan_dat_(goodDat)))


            _ = len(goodDat.lines)
            for i,p in enumerate(list(goodDat.plants.keys()) + list(goodDat.plants.keys())):
                goodDat.lines[i+_]["plant"] = p
            for l in goodDat.lines:
                if i%2:
                    goodDat.line_descriptor[l] = i+10

            for i,(l,pl,pdct) in enumerate(sorted(itertools.product(goodDat.lines, goodDat.plants, goodDat.products))):
                goodDat.pureTestingTable.append((l,pl,pdct,i))
            self.assertFalse(pdf.find_foreign_key_failures(pan_dat_(goodDat)))
            badDat = tdf.copy_tic_dat(goodDat)
            badDat.pureTestingTable.append(("j", "u", "nk", "ay"))
            fk_fails = pdf.find_foreign_key_failures(pan_dat_(badDat))
            self.assertTrue(set(fk_fails) ==
                {fk('pureTestingTable', 'plants', fkm('plant', 'name'), 'many-to-one'),
                 fk('pureTestingTable', 'products', fkm('product', 'name'), 'many-to-one'),
                 fk('pureTestingTable', 'lines', fkm('line', 'name'), 'many-to-one')})

            for df in fk_fails.values():
                df = df.T
                c = df.columns[0]
                self.assertTrue({'ay', 'j', 'nk', 'u'} == set(df[c]))
示例#4
0
    def testXToMany(self):
        input_schema = PanDatFactory (roster = [["Name"],["Grade", "Arrival Inning", "Departure Inning",
                                                          "Min Innings Played", "Max Innings Played"]],
                                      positions = [["Position"],["Position Importance", "Position Group",
                                                                 "Consecutive Innings Only"]],
                                      innings = [["Inning"],["Inning Group"]],
                                      position_constraints = [["Position Group", "Inning Group", "Grade"],
                                                              ["Min Players", "Max Players"]])
        input_schema.add_foreign_key("position_constraints", "roster", ["Grade", "Grade"])
        input_schema.add_foreign_key("position_constraints", "positions", ["Position Group", "Position Group"])
        input_schema.add_foreign_key("position_constraints", "innings", ["Inning Group", "Inning Group"])

        self.assertTrue({fk.cardinality for fk in input_schema.foreign_keys} == {"many-to-many"})

        tdf = TicDatFactory(**input_schema.schema())
        dat = tdf.TicDat()
        for i,p in enumerate(["bob", "joe", "fred", "alice", "lisa", "joean", "ginny"]):
            dat.roster[p]["Grade"] = (i%3)+1
        dat.roster["dummy"]["Grade"]  = "whatevers"
        for i,p in enumerate(["pitcher", "catcher", "1b", "2b", "ss", "3b", "lf", "cf", "rf"]):
            dat.positions[p]["Position Group"] = "PG %s"%((i%4)+1)
        for i in range(1, 10):
            dat.innings[i]["Inning Group"] = "before stretch" if i < 7 else "after stretch"
        dat.innings[0] ={}
        for pg, ig, g in itertools.product(["PG %s"%i for i in range(1,5)], ["before stretch", "after stretch"],
                                           [1, 2, 3]):
            dat.position_constraints[pg, ig, g] = {}

        orig_pan_dat = input_schema.copy_pan_dat(copy_to_pandas_with_reset(tdf, dat))
        self.assertFalse(input_schema.find_foreign_key_failures(orig_pan_dat))

        dat.position_constraints["no", "no", "no"] = dat.position_constraints[1, 2, 3] = {}
        new_pan_dat = input_schema.copy_pan_dat(copy_to_pandas_with_reset(tdf, dat))
        self.assertFalse(input_schema._same_data(orig_pan_dat, new_pan_dat))
        fk_fails = input_schema.find_foreign_key_failures(new_pan_dat)
        fk_fails_2 = input_schema.find_foreign_key_failures(new_pan_dat, verbosity="Low")
        fk_fails_3 = input_schema.find_foreign_key_failures(new_pan_dat, verbosity="Low", as_table=False)
        self.assertTrue({tuple(k)[:2] + (tuple(k[2]),): len(v) for k,v in fk_fails.items()} ==
                        {k:len(v) for k,v in fk_fails_2.items()} ==
                        {k:v.count(True) for k,v in fk_fails_3.items()} ==
                        {('position_constraints', 'innings', ("Inning Group", "Inning Group")): 2,
                         ('position_constraints', 'positions', ("Position Group", "Position Group")): 2,
                         ('position_constraints', 'roster', ("Grade", "Grade")): 1})
        input_schema.remove_foreign_key_failures(new_pan_dat)
        self.assertFalse(input_schema.find_foreign_key_failures(new_pan_dat))
        self.assertTrue(input_schema._same_data(orig_pan_dat, new_pan_dat))

        input_schema = PanDatFactory(table_one=[["One", "Two"], []],
                                     table_two=[["One"], ["Two"]])
        input_schema.add_foreign_key("table_two", "table_one", ["One", "One"])
        self.assertTrue({fk.cardinality for fk in input_schema.foreign_keys} == {"one-to-many"})

        tdf = TicDatFactory(**input_schema.schema())
        dat = tdf.TicDat(table_one = [[1,2], [3,4], [5,6], [7,8]], table_two = {1:2, 3:4, 5:6})

        orig_pan_dat = input_schema.copy_pan_dat(copy_to_pandas_with_reset(tdf, dat))
        self.assertFalse(input_schema.find_foreign_key_failures(orig_pan_dat))
        dat.table_two[9]=10
        new_pan_dat = input_schema.copy_pan_dat(copy_to_pandas_with_reset(tdf, dat))
        fk_fails = input_schema.find_foreign_key_failures(new_pan_dat)
        self.assertTrue({tuple(k)[:2]:len(v) for k,v in fk_fails.items()} == {('table_two', 'table_one'): 1})
        input_schema.remove_foreign_key_failures(new_pan_dat)
        self.assertFalse(input_schema.find_foreign_key_failures(new_pan_dat))
        self.assertTrue(input_schema._same_data(orig_pan_dat, new_pan_dat))