def testXToMany(self): input_schema = PanDatFactory( roster=[["Name"], [ "Grade", "Arrival Inning", "Departure Inning", "Min Innings Played", "Max Innings Played" ]], positions=[["Position"], [ "Position Importance", "Position Group", "Consecutive Innings Only" ]], innings=[["Inning"], ["Inning Group"]], position_constraints=[["Position Group", "Inning Group", "Grade"], ["Min Players", "Max Players"]]) input_schema.add_foreign_key("position_constraints", "roster", ["Grade", "Grade"]) input_schema.add_foreign_key("position_constraints", "positions", ["Position Group", "Position Group"]) input_schema.add_foreign_key("position_constraints", "innings", ["Inning Group", "Inning Group"]) self.assertTrue( {fk.cardinality for fk in input_schema.foreign_keys} == {"many-to-many"}) tdf = TicDatFactory(**input_schema.schema()) dat = tdf.TicDat() for i, p in enumerate( ["bob", "joe", "fred", "alice", "lisa", "joean", "ginny"]): dat.roster[p]["Grade"] = (i % 3) + 1 dat.roster["dummy"]["Grade"] = "whatevers" for i, p in enumerate( ["pitcher", "catcher", "1b", "2b", "ss", "3b", "lf", "cf", "rf"]): dat.positions[p]["Position Group"] = "PG %s" % ((i % 4) + 1) for i in range(1, 10): dat.innings[i][ "Inning Group"] = "before stretch" if i < 7 else "after stretch" dat.innings[0] = {} for pg, ig, g in itertools.product(["PG %s" % i for i in range(1, 5)], ["before stretch", "after stretch"], [1, 2, 3]): dat.position_constraints[pg, ig, g] = {} orig_pan_dat = input_schema.copy_pan_dat( copy_to_pandas_with_reset(tdf, dat)) self.assertFalse(input_schema.find_foreign_key_failures(orig_pan_dat)) dat.position_constraints["no", "no", "no"] = dat.position_constraints[1, 2, 3] = {} new_pan_dat = input_schema.copy_pan_dat( copy_to_pandas_with_reset(tdf, dat)) self.assertFalse(input_schema._same_data(orig_pan_dat, new_pan_dat)) fk_fails = input_schema.find_foreign_key_failures(new_pan_dat) fk_fails_2 = input_schema.find_foreign_key_failures(new_pan_dat, verbosity="Low") fk_fails_3 = input_schema.find_foreign_key_failures(new_pan_dat, verbosity="Low", as_table=False) self.assertTrue({ tuple(k)[:2] + (tuple(k[2]), ): len(v) for k, v in fk_fails.items() } == { k: len(v) for k, v in fk_fails_2.items() } == { k: v.count(True) for k, v in fk_fails_3.items() } == { ('position_constraints', 'innings', ("Inning Group", "Inning Group")): 2, ('position_constraints', 'positions', ("Position Group", "Position Group")): 2, ('position_constraints', 'roster', ("Grade", "Grade")): 1 }) input_schema.remove_foreign_key_failures(new_pan_dat) self.assertFalse(input_schema.find_foreign_key_failures(new_pan_dat)) self.assertTrue(input_schema._same_data(orig_pan_dat, new_pan_dat)) input_schema = PanDatFactory(table_one=[["One", "Two"], []], table_two=[["One"], ["Two"]]) input_schema.add_foreign_key("table_two", "table_one", ["One", "One"]) self.assertTrue( {fk.cardinality for fk in input_schema.foreign_keys} == {"one-to-many"}) tdf = TicDatFactory(**input_schema.schema()) dat = tdf.TicDat(table_one=[[1, 2], [3, 4], [5, 6], [7, 8]], table_two={ 1: 2, 3: 4, 5: 6 }) orig_pan_dat = input_schema.copy_pan_dat( copy_to_pandas_with_reset(tdf, dat)) self.assertFalse(input_schema.find_foreign_key_failures(orig_pan_dat)) dat.table_two[9] = 10 new_pan_dat = input_schema.copy_pan_dat( copy_to_pandas_with_reset(tdf, dat)) fk_fails = input_schema.find_foreign_key_failures(new_pan_dat) self.assertTrue( {tuple(k)[:2]: len(v) for k, v in fk_fails.items()} == {('table_two', 'table_one'): 1}) input_schema.remove_foreign_key_failures(new_pan_dat) self.assertFalse(input_schema.find_foreign_key_failures(new_pan_dat)) self.assertTrue(input_schema._same_data(orig_pan_dat, new_pan_dat))
def testIssue45(self): pdf = PanDatFactory(data=[["a"], ["b"]]) tdf = TicDatFactory(**pdf.schema()) dat_nums = tdf.copy_to_pandas( tdf.TicDat(data=[[1, 2], [3, 4], [22, 44]]), drop_pk_columns=False) dat_strs = tdf.copy_to_pandas( tdf.TicDat(data=[["1", "2"], ["3", "4"], ["022", "0044"]]), drop_pk_columns=False) files = [ os.path.join(_scratchDir, _) for _ in ["dat_nums.xlsx", "dat_strs.xlsx"] ] pdf.xls.write_file(dat_nums, files[0]) pdf.xls.write_file(dat_strs, files[1]) dat_nums_2, dat_strs_2 = [pdf.xls.create_pan_dat(_) for _ in files] self.assertTrue(pdf._same_data(dat_nums, dat_nums_2)) # this is pandas pushing things to be numeric self.assertFalse(pdf._same_data(dat_strs, dat_strs_2)) self.assertTrue(pdf._same_data(dat_nums, dat_strs_2)) pdf = PanDatFactory(data=[["a"], ["b"]]) pdf.set_data_type("data", "a", number_allowed=False, strings_allowed='*') dat_mixed = tdf.copy_to_pandas( tdf.TicDat(data=[["1", 2], ["3", 4], ["022", 44]]), drop_pk_columns=False) dat_nums_2, dat_strs_2 = [pdf.xls.create_pan_dat(_) for _ in files] self.assertFalse(pdf._same_data(dat_nums, dat_nums_2)) self.assertFalse(pdf._same_data(dat_strs, dat_strs_2)) self.assertFalse(pdf._same_data(dat_nums_2, dat_mixed)) self.assertTrue(pdf._same_data(dat_strs_2, dat_mixed)) pdf = PanDatFactory(data=[["a"], ["b"]]) csv_dirs = [ os.path.join(_scratchDir, _) for _ in ["dat_nums_csv", "dat_strs_csv"] ] pdf.csv.write_directory(dat_nums, csv_dirs[0]) pdf.csv.write_directory(dat_strs, csv_dirs[1]) dat_nums_2, dat_strs_2 = [pdf.csv.create_pan_dat(_) for _ in csv_dirs] self.assertTrue(pdf._same_data(dat_nums, dat_nums_2)) # this is pandas pushing things to be numeric self.assertFalse(pdf._same_data(dat_strs, dat_strs_2)) self.assertTrue(pdf._same_data(dat_nums, dat_strs_2)) pdf = PanDatFactory(data=[["a"], ["b"]]) pdf.set_data_type("data", "a", number_allowed=False, strings_allowed='*') dat_nums_2, dat_strs_2 = [pdf.csv.create_pan_dat(_) for _ in csv_dirs] self.assertFalse(pdf._same_data(dat_nums, dat_nums_2)) self.assertFalse(pdf._same_data(dat_strs, dat_strs_2)) self.assertFalse(pdf._same_data(dat_nums_2, dat_strs_2)) self.assertTrue(pdf._same_data(dat_strs_2, dat_mixed))
def testCsvSimple(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) dirPath = os.path.join(_scratchDir, "diet_csv") pdf.csv.write_directory(panDat, dirPath) panDat2 = pdf.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) panDat2 = pdf2.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) pdf2 = PanDatFactory(**{ k: v for k, v in dietSchema().items() if k != "nutritionQuantities" }) panDat2 = pdf2.copy_pan_dat(panDat) dirPath = os.path.join(_scratchDir, "diet_missing_csv") pdf2.csv.write_directory(panDat2, dirPath, makeCleanDir(dirPath)) panDat3 = pdf.csv.create_pan_dat(dirPath) self.assertTrue(pdf2._same_data(panDat2, panDat3)) self.assertTrue(all(hasattr(panDat3, x) for x in pdf.all_tables)) self.assertFalse(len(panDat3.nutritionQuantities)) self.assertTrue(len(panDat3.categories) and len(panDat3.foods)) pdf2 = PanDatFactory( **{k: v for k, v in dietSchema().items() if k == "categories"}) panDat2 = pdf2.copy_pan_dat(panDat) pdf2.csv.write_directory(panDat2, dirPath, makeCleanDir(dirPath)) panDat3 = pdf.csv.create_pan_dat(dirPath) self.assertTrue(pdf2._same_data(panDat2, panDat3)) self.assertTrue(all(hasattr(panDat3, x) for x in pdf.all_tables)) self.assertFalse( len(panDat3.nutritionQuantities) or len(panDat3.foods)) self.assertTrue(len(panDat3.categories)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) dirPath = os.path.join(_scratchDir, "netflow_csv") pdf.csv.write_directory(panDat, dirPath) panDat2 = pdf.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) pdf2.csv.write_directory(panDat, dirPath) panDat2 = pdf2.csv.create_pan_dat(dirPath) self.assertTrue(pdf._same_data(panDat, panDat2)) tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) dirPath = os.path.join(_scratchDir, "diet_csv") pdf.csv.write_directory(panDat, dirPath, decimal=",") panDat2 = pdf.csv.create_pan_dat(dirPath) self.assertFalse(pdf._same_data(panDat, panDat2)) panDat2 = pdf.csv.create_pan_dat(dirPath, decimal=",") self.assertTrue(pdf._same_data(panDat, panDat2))
def testJsonSimple(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) filePath = os.path.join(_scratchDir, "diet.json") pdf.json.write_file(panDat, filePath) panDat2 = pdf.json.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) pdf2.json.write_file(panDat, filePath) panDat2 = pdf2.json.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5)) re_fielded_schema = { "categories": (("name", ), ["maxNutrition", "minNutrition"]), "foods": [["name"], []], "nutritionQuantities": (["food", "category"], ["qty"]) } pdf3 = PanDatFactory(**re_fielded_schema) panDat3 = pdf3.json.create_pan_dat(filePath) for t, (pks, dfs) in re_fielded_schema.items(): self.assertTrue( list(pks) + list(dfs) == list(getattr(panDat3, t).columns)) tdf = TicDatFactory(**netflowSchema()) pdf = PanDatFactory(**netflowSchema()) ticDat = tdf.freeze_me( tdf.TicDat( ** {t: getattr(netflowData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(netflowSchema(), ticDat) filePath = os.path.join(_scratchDir, "netflow.json") pdf.json.write_file(panDat, filePath) panDat2 = pdf.json.create_pan_dat(filePath) self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5)) panDat3 = pdf.json.create_pan_dat(pdf.json.write_file(panDat, "")) self.assertTrue(pdf._same_data(panDat, panDat3)) dicted = json.loads(pdf.json.write_file(panDat, "")) panDat4 = pdf.PanDat(**dicted) self.assertTrue(pdf._same_data(panDat, panDat4)) pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables}) panDat5 = pdf2.PanDat(**dicted) self.assertTrue(pdf._same_data(panDat, panDat5)) tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) filePath = os.path.join(_scratchDir, "diet.json") pdf.json.write_file(panDat, filePath, orient='columns', index=True) # the following doesn't generate a TicDatError, which is fine self.assertTrue( firesException(lambda: pdf.json.create_pan_dat(filePath))) panDat2 = pdf.json.create_pan_dat(filePath, orient='columns') self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5)) panDat3 = pdf.json.create_pan_dat(pdf.json.write_file( panDat, "", orient='columns'), orient="columns") self.assertTrue(pdf._same_data(panDat, panDat3, epsilon=1e-5)) dicted = json.loads(pdf.json.write_file(panDat, "", orient='columns')) panDat4 = pdf.PanDat(**dicted) self.assertTrue(pdf._same_data(panDat, panDat4, epsilon=1e-5))
def testDefaultAdd(self): if not self.can_run: return tdf = TicDatFactory(**dietSchema()) pdf = PanDatFactory(**dietSchema()) ticDat = tdf.freeze_me( tdf.TicDat( **{t: getattr(dietData(), t) for t in tdf.primary_key_fields})) panDat = pan_dat_maker(dietSchema(), ticDat) xlsFilePath = os.path.join(_scratchDir, "diet_add.xlsx") pdf.xls.write_file(panDat, xlsFilePath) sqlFilePath = os.path.join(_scratchDir, "diet_add.sql") pdf.sql.write_file(panDat, sqlFilePath) csvDirPath = os.path.join(_scratchDir, "diet_add_csv") pdf.csv.write_directory(panDat, csvDirPath, case_space_table_names=True) pdf2 = PanDatFactory( **{ k: [p, d] if k != "foods" else [p, list(d) + ["extra"]] for k, (p, d) in dietSchema().items() }) ex = self.firesException(lambda: pdf2.xls.create_pan_dat(xlsFilePath)) self.assertTrue("missing" in ex and "extra" in ex) ex = self.firesException(lambda: pdf2.sql.create_pan_dat(sqlFilePath)) self.assertTrue("missing" in ex and "extra" in ex) ex = self.firesException(lambda: pdf2.csv.create_pan_dat(csvDirPath)) self.assertTrue("missing" in ex and "extra" in ex) ex = self.firesException( lambda: pdf2.json.create_pan_dat(pdf.json.write_file(panDat, ""))) self.assertTrue("missing" in ex and "extra" in ex) panDat2 = pdf2.sql.create_pan_dat(sqlFilePath, fill_missing_fields=True) self.assertTrue(set(panDat2.foods["extra"]) == {0}) panDat2.foods.drop("extra", axis=1, inplace=True) self.assertTrue(pdf._same_data(panDat, panDat2)) panDat2 = pdf2.xls.create_pan_dat(xlsFilePath, fill_missing_fields=True) self.assertTrue(set(panDat2.foods["extra"]) == {0}) panDat2.foods.drop("extra", axis=1, inplace=True) self.assertTrue(pdf._same_data(panDat, panDat2)) panDat2 = pdf2.csv.create_pan_dat(csvDirPath, fill_missing_fields=True) self.assertTrue(set(panDat2.foods["extra"]) == {0}) panDat2.foods.drop("extra", axis=1, inplace=True) self.assertTrue(pdf._same_data(panDat, panDat2)) panDat2 = pdf2.json.create_pan_dat(pdf.json.write_file(panDat, ""), fill_missing_fields=True) self.assertTrue(set(panDat2.foods["extra"]) == {0}) panDat2.foods.drop("extra", axis=1, inplace=True) self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5)) pdf3 = PanDatFactory(**pdf2.schema()) pdf3.set_default_value("foods", "extra", 13) panDat3 = pdf3.sql.create_pan_dat(sqlFilePath, fill_missing_fields=True) self.assertTrue(set(panDat3.foods["extra"]) == {13}) panDat3.foods.drop("extra", axis=1, inplace=True) self.assertTrue(pdf._same_data(panDat, panDat3)) panDat3 = pdf3.xls.create_pan_dat(xlsFilePath, fill_missing_fields=True) self.assertTrue(set(panDat3.foods["extra"]) == {13}) panDat3.foods.drop("extra", axis=1, inplace=True) self.assertTrue(pdf._same_data(panDat, panDat3)) panDat3 = pdf3.csv.create_pan_dat(csvDirPath, fill_missing_fields=True) self.assertTrue(set(panDat3.foods["extra"]) == {13}) panDat3.foods.drop("extra", axis=1, inplace=True) self.assertTrue(pdf._same_data(panDat, panDat3)) panDat3 = pdf3.json.create_pan_dat(pdf.json.write_file(panDat, ""), fill_missing_fields=True) self.assertTrue(set(panDat3.foods["extra"]) == {13}) panDat3.foods.drop("extra", axis=1, inplace=True) self.assertTrue(pdf._same_data(panDat, panDat3, epsilon=1e-5))