Пример #1
0
 def testFindDups(self):
     pdf = PanDatFactory(**sillyMeSchema())
     tdf = TicDatFactory(
         **{
             k: [[], list(pkfs) + list(dfs)]
             for k, (pkfs, dfs) in sillyMeSchema().items()
         })
     rows = [(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)]
     ticDat = tdf.TicDat(**{t: rows for t in tdf.all_tables})
     panDat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticDat))
     dups = pdf.find_duplicates(panDat)
     self.assertTrue(set(dups) == {'a'} and set(dups['a']['aField']) == {1})
     dups = pdf.find_duplicates(panDat, as_table=False, keep=False)
     self.assertTrue(
         set(dups) == {'a'} and dups['a'].value_counts()[True] == 2)
     dups = pdf.find_duplicates(panDat, as_table=False)
     self.assertTrue(
         set(dups) == {'a'} and dups['a'].value_counts()[True] == 1)
     rows = [(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1, 2, 3, 40)]
     ticDat = tdf.TicDat(**{t: rows for t in tdf.all_tables})
     panDat = pdf.copy_pan_dat(copy_to_pandas_with_reset(tdf, ticDat))
     dups = pdf.find_duplicates(panDat, keep=False)
     self.assertTrue(
         set(dups) == {'a', 'b'} and set(dups['a']['aField']) == {1})
     dups = pdf.find_duplicates(panDat, as_table=False, keep=False)
     self.assertTrue({k: v.value_counts()[True]
                      for k, v in dups.items()} == {
                          'a': 3,
                          'b': 2
                      })
Пример #2
0
    def testXToManyTwo(self):
        input_schema = PanDatFactory(parent=[["F1", "F2"], ["F3"]],
                                     child_one=[["F1", "F2", "F3"], []],
                                     child_two=[["F1", "F2"], ["F3"]],
                                     child_three=[[], ["F1", "F2", "F3"]])
        for t in ["child_one", "child_two", "child_three"]:
            input_schema.add_foreign_key(t, "parent",
                                         [["F1"] * 2, ["F2"] * 2, ["F3"] * 2])
        self.assertTrue({fk.cardinality
                         for fk in input_schema.foreign_keys} ==
                        {"one-to-one", "many-to-one"})

        rows = [[1, 2, 3], [1, 2.1, 3], [4, 5, 6], [4, 5.1, 6], [7, 8, 9]]
        tdf = TicDatFactory(**input_schema.schema())
        dat = tdf.TicDat(parent=rows,
                         child_one=rows,
                         child_two=rows,
                         child_three=rows)
        self.assertTrue(
            all(len(getattr(dat, t)) == 5 for t in input_schema.all_tables))
        orig_pan_dat = input_schema.copy_pan_dat(
            copy_to_pandas_with_reset(tdf, dat))
        self.assertFalse(input_schema.find_foreign_key_failures(orig_pan_dat))
        dat.child_one[1, 2, 4] = {}
        dat.child_two[1, 2.2] = 3
        dat.child_three.append([1, 2, 4])
        new_pan_dat = input_schema.copy_pan_dat(
            copy_to_pandas_with_reset(tdf, dat))
        fk_fails = input_schema.find_foreign_key_failures(new_pan_dat)
        self.assertTrue(len(fk_fails) == 3)
        input_schema.remove_foreign_key_failures(new_pan_dat)
        self.assertFalse(input_schema.find_foreign_key_failures(new_pan_dat))
        self.assertTrue(input_schema._same_data(orig_pan_dat, new_pan_dat))

        input_schema = PanDatFactory(parent=[["F1", "F2"], ["F3"]],
                                     child_one=[["F1", "F2", "F3"], []],
                                     child_two=[["F1", "F2"], ["F3"]],
                                     child_three=[[], ["F1", "F2", "F3"]])
        for t in ["child_one", "child_two", "child_three"]:
            input_schema.add_foreign_key(t, "parent", [["F1"] * 2, ["F3"] * 2])
        tdf = TicDatFactory(**input_schema.schema())
        dat = tdf.TicDat(parent=rows,
                         child_one=rows,
                         child_two=rows,
                         child_three=rows)
        self.assertTrue(
            all(len(getattr(dat, t)) == 5 for t in input_schema.all_tables))
        orig_pan_dat = input_schema.copy_pan_dat(
            copy_to_pandas_with_reset(tdf, dat))
        self.assertFalse(input_schema.find_foreign_key_failures(orig_pan_dat))
        dat.child_one[1, 2, 4] = {}
        dat.child_two[1, 2.2] = 4
        dat.child_three.append([1, 2, 4])
        new_pan_dat = input_schema.copy_pan_dat(
            copy_to_pandas_with_reset(tdf, dat))
        self.assertTrue(
            len(input_schema.find_foreign_key_failures(new_pan_dat)) == 3)
        input_schema.remove_foreign_key_failures(new_pan_dat)
        self.assertFalse(input_schema.find_foreign_key_failures(new_pan_dat))
        self.assertTrue(input_schema._same_data(orig_pan_dat, new_pan_dat))
Пример #3
0
    def testNetflow(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**netflowSchema())
        addNetflowForeignKeys(tdf)
        ordered = tdf.sql._ordered_tables()
        self.assertTrue(ordered.index("nodes") < min(ordered.index(_) for _ in ("arcs", "cost", "inflow")))
        self.assertTrue(ordered.index("commodities") < min(ordered.index(_) for _ in ("cost", "inflow")))
        ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields}))
        self._test_generic_copy(ticDat, tdf)
        self._test_generic_copy(ticDat, tdf, ["arcs", "nodes"])
        filePath = os.path.join(_scratchDir, "netflow.sql")
        tdf.sql.write_db_data(ticDat, filePath)
        self.assertFalse(tdf.sql.find_duplicates(filePath))
        sqlTicDat = tdf.sql.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
        def changeIt() :
            sqlTicDat.inflow['Pencils', 'Boston']["quantity"] = 12
        self.assertTrue(self.firesException(changeIt))
        self.assertTrue(tdf._same_data(ticDat, sqlTicDat))

        sqlTicDat = tdf.sql.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, sqlTicDat))
        self.assertFalse(self.firesException(changeIt))
        self.assertFalse(tdf._same_data(ticDat, sqlTicDat))

        pkHacked = netflowSchema()
        pkHacked["nodes"][0] = ["nimrod"]
        tdfHacked = TicDatFactory(**pkHacked)
        ticDatHacked = tdfHacked.TicDat(**{t : getattr(ticDat, t) for t in tdf.all_tables})
        tdfHacked.sql.write_db_data(ticDatHacked, makeCleanPath(filePath))
        self.assertFalse(tdfHacked.sql.find_duplicates(filePath))
        self.assertTrue(self.firesException(lambda : tdfHacked.sql.write_db_data(ticDat, filePath)))
        tdfHacked.sql.write_db_data(ticDat, filePath, allow_overwrite =True)
        self.assertTrue("Unable to recognize field name in table nodes" in
                        self.firesException(lambda  :tdf.sql.create_tic_dat(filePath)))

        ticDatNew = tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields})

        ticDatNew.cost['Pencils', 'booger', 'wooger'] =  10
        ticDatNew.cost['junker', 'Detroit', 'New York'] =  20
        ticDatNew.cost['bunker', 'Detroit', 'New Jerk'] =  20
        ticDatNew.arcs['booger', 'wooger'] =  112
        self.assertTrue({f[:2] + f[2][:1] : set(v.native_pks) for
                         f,v in tdf.find_foreign_key_failures(ticDatNew).items()} ==
        {('arcs', 'nodes', u'destination'): {('booger', 'wooger')},
         ('arcs', 'nodes', u'source'): {('booger', 'wooger')},
         ('cost', 'commodities', u'commodity'): {('bunker', 'Detroit', 'New Jerk'),
                                                 ('junker', 'Detroit', 'New York')},
         ('cost', 'nodes', u'destination'): {('bunker', 'Detroit', 'New Jerk'),
                                             ('Pencils', 'booger', 'wooger')},
         ('cost', 'nodes', u'source'): {('Pencils', 'booger', 'wooger')}})

        ticDat3 = tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields})
        ticDat3.arcs['Detroit', 'Boston'] = float("inf")
        ticDat3.arcs['Denver',  'Boston'] = float("inf")
        self.assertFalse(tdf._same_data(ticDat3, ticDat))
        tdf.sql.write_db_data(ticDat3, makeCleanPath(filePath))
        ticDat4 = tdf.sql.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat3, ticDat4))
Пример #4
0
    def testNulls(self):
        tdf = TicDatFactory(table=[["field one"], ["field two"]])
        dat = tdf.TicDat(table=[[None, 100], [200, "this"], ["that", 300],
                                [300, None], [400, "that"]])
        file_path = os.path.join(_scratchDir, "nulls.accdb")
        tdf.mdb.write_file(dat, file_path)
        dat_1 = tdf.mdb.create_tic_dat(file_path)
        self.assertTrue(tdf._same_data(dat, dat_1))

        tdf = TicDatFactory(table=[["field one"], ["field two"]])
        for f in ["field one", "field two"]:
            tdf.set_data_type("table", f, max=float("inf"), inclusive_max=True)
        tdf.set_infinity_io_flag(None)
        dat_inf = tdf.TicDat(
            table=[[float("inf"), 100], [200, "this"], ["that", 300],
                   [300, float("inf")], [400, "that"]])
        dat_1 = tdf.mdb.create_tic_dat(file_path)
        self.assertTrue(tdf._same_data(dat_inf, dat_1))
        tdf.mdb.write_file(dat_inf, makeCleanPath(file_path))
        dat_1 = tdf.mdb.create_tic_dat(file_path)
        self.assertTrue(tdf._same_data(dat_inf, dat_1))

        tdf = TicDatFactory(table=[["field one"], ["field two"]])
        for f in ["field one", "field two"]:
            tdf.set_data_type("table",
                              f,
                              min=-float("inf"),
                              inclusive_min=True)
        tdf.set_infinity_io_flag(None)
        dat_1 = tdf.mdb.create_tic_dat(file_path)
        self.assertFalse(tdf._same_data(dat_inf, dat_1))
        dat_inf = tdf.TicDat(
            table=[[float("-inf"), 100], [200, "this"], ["that", 300],
                   [300, -float("inf")], [400, "that"]])
        self.assertTrue(tdf._same_data(dat_inf, dat_1))
Пример #5
0
    def testNetflow(self):
        if not self.canRun:
            return
        tdf = TicDatFactory(**netflowSchema())
        tdf.enable_foreign_key_links()
        addNetflowForeignKeys(tdf)
        oldDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields}))
        self._test_generic_free_copy(oldDat, tdf)
        self._test_generic_free_copy(oldDat, tdf, ["arcs", "nodes"])
        ticDat = tdf.copy_to_pandas(oldDat, ["arcs", "cost"])
        self.assertTrue(all(hasattr(ticDat, t) == (t in ["arcs", "cost"]) for t in tdf.all_tables))
        self.assertTrue(len(ticDat.arcs.capacity.sloc["Boston",:]) == len(oldDat.nodes["Boston"].arcs_source) == 0)
        self.assertTrue(len(ticDat.arcs.capacity.sloc[:,"Boston"]) == len(oldDat.nodes["Boston"].arcs_destination) == 2)
        self.assertTrue(all(ticDat.arcs.capacity.sloc[:,"Boston"][src] == r["capacity"]
                            for src, r in oldDat.nodes["Boston"].arcs_destination.items()))
        ticDat = tdf.copy_to_pandas(oldDat, drop_pk_columns=True)
        rebornTicDat = tdf.TicDat(**{t:getattr(ticDat, t) for t in tdf.all_tables})
        # because we have single pk field tables, dropping the pk columns is probelmatic
        self.assertFalse(tdf._same_data(rebornTicDat, oldDat))

        # but with the default argument all is well
        ticDat = tdf.copy_to_pandas(oldDat)
        rebornTicDat = tdf.TicDat(**{t:getattr(ticDat, t) for t in tdf.all_tables})
        self.assertTrue(tdf._same_data(rebornTicDat, oldDat))
        self.assertTrue(set(ticDat.inflow.columns) == {"quantity"})
        self.assertTrue(set(ticDat.nodes.columns) == {"name"})
Пример #6
0
    def test_numericish_text(self):
        dir_path = os.path.join(_scratchDir, "numericish")
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        dat = tdf.TicDat(
            parameters=[["a", "100"], ["b", "010"], [3, "200"], ["d", "020"]])

        def round_trip():
            tdf.csv.write_directory(dat, makeCleanDir(dir_path))
            return tdf.csv.create_tic_dat(dir_path)

        dat2 = round_trip()
        self.assertFalse(tdf._same_data(dat, dat2))
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        tdf.set_data_type("parameters",
                          "Key",
                          strings_allowed='*',
                          number_allowed=True)
        tdf.set_default_value("parameters", "Value", "")
        dat2 = round_trip()
        self.assertTrue(tdf._same_data(dat, dat2))
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        tdf.set_data_type("parameters",
                          "Value",
                          strings_allowed='*',
                          number_allowed=False)
        dat = tdf.TicDat(parameters=[["a", "100"], ["b", "010"], ["c", "200"],
                                     ["d", "020"]])
        dat2 = round_trip()
        self.assertTrue(tdf._same_data(dat, dat2))
Пример #7
0
    def testSqlSimple(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(dietSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "diet.db")
        pdf.sql.write_file(panDat, filePath)
        sqlPanDat = pdf.sql.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, sqlPanDat))
        pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables})
        pdf2.sql.write_file(panDat, filePath)
        sqlPanDat = pdf2.sql.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, sqlPanDat))

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "netflow.db")
        pdf.sql.write_file(panDat, filePath)
        panDat2 = pdf.sql.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2))
        pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables})
        sqlPanDat = pdf2.sql.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, sqlPanDat))
Пример #8
0
    def _test_generic_copy(self, ticDat, tdf, skip_tables=None):
        assert all(tdf.primary_key_fields.get(t) for t in tdf.all_tables)
        path = makeCleanDir(os.path.join(_scratchDir, "generic_copy"))
        replace_name  = lambda f : "name_" if f == "name" else f
        clean_tdf = TicDatFactory(**{t:[list(map(replace_name, pks)), dfs]
                                     for t,(pks, dfs) in tdf.schema().items()})

        temp_tdf = TicDatFactory(**{t:v if t in (skip_tables or []) else '*'
                                    for t,v in clean_tdf.schema().items()})
        temp_dat = temp_tdf.TicDat(**{t:getattr(ticDat, t) for t in (skip_tables or [])})
        for t in temp_tdf.generic_tables:
            setattr(temp_dat, t, getattr(clean_tdf.copy_to_pandas(ticDat, drop_pk_columns=False) ,t))

        temp_tdf.csv.write_directory(temp_dat, path)
        self.assertFalse(temp_tdf.csv.find_duplicates(path))
        read_dat = temp_tdf.csv.create_tic_dat(path)
        generic_free_dat, _ = utils.create_generic_free(read_dat, temp_tdf)
        check_dat = clean_tdf.TicDat()

        for t in temp_tdf.generic_tables:
            for r in getattr(generic_free_dat, t):
                pks = clean_tdf.primary_key_fields[t]
                getattr(check_dat, t)[r[pks[0]] if len(pks) == 1 else tuple(r[_] for _ in pks)] = \
                    {df:r[df] for df in clean_tdf.data_fields.get(t, [])}
        for t in (skip_tables or []):
            for k,v in getattr(generic_free_dat, t).items():
                getattr(check_dat, t)[k] = v
        self.assertTrue(clean_tdf._same_data(check_dat, clean_tdf.copy_tic_dat(ticDat)))
Пример #9
0
    def testXlsSpacey(self):
        if not self.can_run:
            return

        tdf = TicDatFactory(**spacesSchema())
        pdf = PanDatFactory(**spacesSchema())
        ticDat = tdf.TicDat(**spacesData())
        panDat = pan_dat_maker(spacesSchema(), ticDat)
        ext = ".xlsx"
        filePath = os.path.join(_scratchDir, "spaces_2%s" % ext)
        pdf.xls.write_file(panDat, filePath, case_space_sheet_names=True)
        panDat2 = pdf.xls.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2))

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext)
        pdf.xls.write_file(panDat, filePath, case_space_sheet_names=True)
        panDat2 = pdf.xls.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2))
Пример #10
0
    def test_empty_text_none(self):
        # this is a naive data scientist who isn't using the parameters functionality
        filePath = os.path.join(_scratchDir, "empty.xls")
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        dat_n = tdf.TicDat(parameters=[[None, 100], ["b", 10.01],
                                       ["three", 200], ["d", None]])
        dat_s = tdf.TicDat(
            parameters=[["", 100], ["b", 10.01], ["three", 200], ["d", ""]])

        def round_trip():
            tdf.xls.write_file(dat_n, filePath, allow_overwrite=True)
            return tdf.xls.create_tic_dat(filePath)

        dat2 = round_trip()
        self.assertTrue(
            tdf._same_data(dat_s, dat2) and not tdf._same_data(dat_n, dat2))
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        tdf.set_data_type("parameters", "Key", nullable=True)
        tdf.set_default_value(
            "parameters", "Value",
            None)  # this default alone will mess with number reading
        dat2 = round_trip()
        self.assertTrue(not tdf._same_data(dat_s, dat2)
                        and tdf._same_data(dat_n, dat2))

        tdf = TicDatFactory(parameters='*')
        dat = tdf.xls.create_tic_dat(filePath)
        self.assertTrue(dat.parameters.shape == (4, 2))
Пример #11
0
 def testDupsOpalytics(self):
     if not self.can_run:
         return
     for hack in [True, False]:
         tdf = TicDatFactory(one=[["a"], ["b", "c"]],
                             two=[["a", "b"], ["c"]],
                             three=[["a", "b", "c"], []])
         tdf2 = TicDatFactory(
             **{t: [[], ["a", "b", "c"]]
                for t in tdf.all_tables})
         td = tdf2.TicDat(
             **{
                 t: [[1, 2, 1], [1, 2, 2], [2, 1, 3], [2, 2, 3], [1, 2, 2],
                     ["new", 1, 2]]
                 for t in tdf.all_tables
             })
         inputset = create_inputset_mock(tdf2, td, hack)
         pdf = PanDatFactory(**tdf.schema())
         panDat = pdf.opalytics.create_pan_dat(inputset, raw_data=True)
         self.assertTrue(
             all(len(getattr(panDat, t)) == 6 for t in tdf.all_tables))
         panDat = pdf.opalytics.create_pan_dat(inputset, raw_data=False)
         self.assertTrue(
             all(len(getattr(panDat, t)) < 6 for t in tdf.all_tables))
         td_1 = tdf.TicDat(
             **{
                 t: [[1, 2, 1], [1, 2, 2], [2, 1, 3], [2, 2, 3], [1, 2, 2],
                     ["new", 1, 2]]
                 for t in tdf.all_tables
             })
         td_2 = pdf.copy_to_tic_dat(panDat)
         self.assertTrue(
             all(
                 set(getattr(td_1, t)) == set(getattr(td_2, t))
                 for t in tdf.all_tables))
Пример #12
0
    def test_empty_text_none(self):
        dir_path = os.path.join(_scratchDir, "empty_text")
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        dat_n = tdf.TicDat(parameters=[[None, 100], ["b", 10.01],
                                       ["three", 200], ["d", None]])
        dat_s = tdf.TicDat(
            parameters=[["", 100], ["b", 10.01], ["three", 200], ["d", ""]])

        def round_trip():
            tdf.csv.write_directory(dat_n, makeCleanDir(dir_path))
            return tdf.csv.create_tic_dat(dir_path)

        dat2 = round_trip()
        self.assertTrue(
            tdf._same_data(dat_s, dat2) and not tdf._same_data(dat_n, dat2))
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        tdf.set_data_type("parameters", "Key", nullable=True)
        tdf.set_default_value(
            "parameters", "Value",
            None)  # this default alone will mess with number reading
        dat2 = round_trip()
        self.assertFalse(
            tdf._same_data(dat_s, dat2) or tdf._same_data(dat_n, dat2))
        self.assertTrue(
            any(r["Value"] is None for r in dat2.parameters.values()))
        tdf = TicDatFactory(parameters=[["Key"], ["Value"]])
        tdf.set_data_type("parameters", "Key", nullable=True)
        tdf.set_data_type("parameters",
                          "Value",
                          nullable=True,
                          must_be_int=True)
        dat2 = round_trip()
        self.assertTrue(not tdf._same_data(dat_s, dat2)
                        and tdf._same_data(dat_n, dat2))
Пример #13
0
    def testDefaults(self):
        tdf = TicDatFactory(one=[["a"], ["b", "c"]],
                            two=[["a", "b"], ["c"]],
                            three=[["a", "b", "c"], []])
        dat = tdf.TicDat(one=[[1, 2, 3], [4, 5, 6]],
                         two=[[1, 2, 3], [4, 5, 6]],
                         three=[[1, 2, 3], [4, 5, 6]])
        filePath = makeCleanPath(os.path.join(_scratchDir, "defaults.sql"))
        tdf.sql.write_sql_file(dat, filePath)

        tdf2 = TicDatFactory(one=[["a"], ["b", "c"]],
                             two=[["a", "b"], ["c"]],
                             three=[["a", "b", "c"], ["d"]])
        dat2 = tdf2.TicDat(one=dat.one,
                           two=dat.two,
                           three={k: {}
                                  for k in dat.three})
        dat22 = tdf2.sql.create_tic_dat_from_sql(filePath)
        self.assertTrue(tdf2._same_data(dat2, dat22))

        tdf2 = TicDatFactory(one=[["a"], ["b", "c"]],
                             two=[["a", "b"], ["c"]],
                             three=[["a", "b", "c"], ["d"]])
        tdf2.set_default_value("three", "d", float("inf"))
        dat2_b = tdf2.TicDat(one=dat.one,
                             two=dat.two,
                             three={k: {}
                                    for k in dat.three})
        dat22_b = tdf2.sql.create_tic_dat_from_sql(filePath)
        self.assertTrue(tdf2._same_data(dat2_b, dat22_b))

        self.assertFalse(tdf2._same_data(dat2, dat2_b))
Пример #14
0
    def testDictConstructions(self):
        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(dietSchema(), ticDat)
        panDat2 = pdf.PanDat(
            **{t: getattr(panDat, t).to_dict()
               for t in pdf.all_tables})
        panDat3 = pdf.PanDat(**{
            t: getattr(panDat, t).to_dict(orient="list")
            for t in pdf.all_tables
        })
        panDat3_1 = pdf.PanDat(
            **{
                t: list(map(list,
                            getattr(panDat, t).itertuples(index=False)))
                for t in pdf.all_tables
            })

        self.assertTrue(
            all(
                pdf._same_data(panDat, _)
                for _ in [panDat2, panDat3, panDat3_1]))
        panDat.foods["extra"] = 12
        panDat4 = pdf.PanDat(**{
            t: getattr(panDat, t).to_dict(orient="list")
            for t in pdf.all_tables
        })
        self.assertTrue(pdf._same_data(panDat, panDat4))
        self.assertTrue(set(panDat4.foods["extra"]) == {12})

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        panDat2 = pdf.PanDat(
            **{t: getattr(panDat, t).to_dict()
               for t in pdf.all_tables})
        panDat3 = pdf.PanDat(
            **{
                t: getattr(panDat, t).to_dict(orient="records")
                for t in pdf.all_tables
            })
        self.assertTrue(
            all(pdf._same_data(panDat, _) for _ in [panDat2, panDat3]))
        panDat.cost["extra"] = "boger"
        panDat4 = pdf.PanDat(**{
            t: getattr(panDat, t).to_dict(orient="list")
            for t in pdf.all_tables
        })
        self.assertTrue(pdf._same_data(panDat, panDat4))
        self.assertTrue(set(panDat4.cost["extra"]) == {"boger"})
Пример #15
0
    def testNetflow(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        self._test_generic_copy(ticDat, tdf)
        self._test_generic_copy(ticDat, tdf, ["arcs", "nodes"])
        filePath = os.path.join(_scratchDir, "netflow.xls")
        tdf.xls.write_file(ticDat, filePath)
        xlsTicDat = tdf.xls.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, xlsTicDat))
        tdf.xls.write_file(ticDat, filePath + "x")
        self.assertTrue(
            tdf._same_data(ticDat, tdf.xls.create_tic_dat(filePath + "x")))

        def changeIt():
            xlsTicDat.inflow['Pencils', 'Boston']["quantity"] = 12

        self.assertTrue(self.firesException(changeIt))
        self.assertTrue(tdf._same_data(ticDat, xlsTicDat))

        xlsTicDat = tdf.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, xlsTicDat))
        self.assertFalse(self.firesException(changeIt))
        self.assertFalse(tdf._same_data(ticDat, xlsTicDat))

        self.assertFalse(tdf.xls.find_duplicates(filePath))

        pkHacked = netflowSchema()
        pkHacked["nodes"][0] = ["nimrod"]
        tdfHacked = TicDatFactory(**pkHacked)
        self.assertTrue(
            self.firesException(
                lambda: tdfHacked.xls.write_file(ticDat, filePath)))
        tdfHacked.xls.write_file(ticDat, filePath, allow_overwrite=True)
        self.assertTrue("nodes : name" in self.firesException(
            lambda: tdf.xls.create_tic_dat(filePath)))

        ticDat = tdf.TicDat(
            **{t: getattr(netflowData(), t)
               for t in tdf.primary_key_fields})
        ticDat.arcs["Detroit", "Boston"] = float("inf")
        ticDat.cost['Pencils', 'Detroit', 'Boston'] = -float("inf")
        tdf.xls.write_file(ticDat, makeCleanPath(filePath))
        xlsTicDat = tdf.xls.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, xlsTicDat))
        tdf.xls.write_file(ticDat, filePath + "x", allow_overwrite=True)
        self.assertTrue(
            tdf._same_data(ticDat, tdf.xls.create_tic_dat(filePath + "x")))
        self.assertFalse(
            tdf._same_data(
                ticDat,
                tdf.xls.create_tic_dat(filePath + "x",
                                       treat_inf_as_infinity=False)))
Пример #16
0
    def testIssue45(self):
        pdf = PanDatFactory(data=[["a"], ["b"]])
        tdf = TicDatFactory(**pdf.schema())
        dat_nums = tdf.copy_to_pandas(
            tdf.TicDat(data=[[1, 2], [3, 4], [22, 44]]), drop_pk_columns=False)
        dat_strs = tdf.copy_to_pandas(
            tdf.TicDat(data=[["1", "2"], ["3", "4"], ["022", "0044"]]),
            drop_pk_columns=False)
        files = [
            os.path.join(_scratchDir, _)
            for _ in ["dat_nums.xlsx", "dat_strs.xlsx"]
        ]
        pdf.xls.write_file(dat_nums, files[0])
        pdf.xls.write_file(dat_strs, files[1])
        dat_nums_2, dat_strs_2 = [pdf.xls.create_pan_dat(_) for _ in files]
        self.assertTrue(pdf._same_data(dat_nums, dat_nums_2))
        # this is pandas pushing things to be numeric
        self.assertFalse(pdf._same_data(dat_strs, dat_strs_2))
        self.assertTrue(pdf._same_data(dat_nums, dat_strs_2))

        pdf = PanDatFactory(data=[["a"], ["b"]])
        pdf.set_data_type("data",
                          "a",
                          number_allowed=False,
                          strings_allowed='*')
        dat_mixed = tdf.copy_to_pandas(
            tdf.TicDat(data=[["1", 2], ["3", 4], ["022", 44]]),
            drop_pk_columns=False)
        dat_nums_2, dat_strs_2 = [pdf.xls.create_pan_dat(_) for _ in files]
        self.assertFalse(pdf._same_data(dat_nums, dat_nums_2))
        self.assertFalse(pdf._same_data(dat_strs, dat_strs_2))
        self.assertFalse(pdf._same_data(dat_nums_2, dat_mixed))
        self.assertTrue(pdf._same_data(dat_strs_2, dat_mixed))

        pdf = PanDatFactory(data=[["a"], ["b"]])
        csv_dirs = [
            os.path.join(_scratchDir, _)
            for _ in ["dat_nums_csv", "dat_strs_csv"]
        ]
        pdf.csv.write_directory(dat_nums, csv_dirs[0])
        pdf.csv.write_directory(dat_strs, csv_dirs[1])
        dat_nums_2, dat_strs_2 = [pdf.csv.create_pan_dat(_) for _ in csv_dirs]
        self.assertTrue(pdf._same_data(dat_nums, dat_nums_2))
        # this is pandas pushing things to be numeric
        self.assertFalse(pdf._same_data(dat_strs, dat_strs_2))
        self.assertTrue(pdf._same_data(dat_nums, dat_strs_2))
        pdf = PanDatFactory(data=[["a"], ["b"]])
        pdf.set_data_type("data",
                          "a",
                          number_allowed=False,
                          strings_allowed='*')
        dat_nums_2, dat_strs_2 = [pdf.csv.create_pan_dat(_) for _ in csv_dirs]
        self.assertFalse(pdf._same_data(dat_nums, dat_nums_2))
        self.assertFalse(pdf._same_data(dat_strs, dat_strs_2))
        self.assertFalse(pdf._same_data(dat_nums_2, dat_strs_2))
        self.assertTrue(pdf._same_data(dat_strs_2, dat_mixed))
Пример #17
0
    def testBooleansAndNulls(self):
        tdf = TicDatFactory(table=[["field one"], ["field two"]])
        dat = tdf.TicDat(table=[[None, 100], [200, True], [False, 300],
                                [300, None], [400, False]])
        file_one = os.path.join(_scratchDir, "boolDefaults_1.json")
        file_two = os.path.join(_scratchDir, "boolDefaults_2.json")
        tdf.json.write_file(dat, file_one, verbose=True)
        tdf.json.write_file(dat, file_two, verbose=False)
        dat_1 = tdf.json.create_tic_dat(file_one)
        dat_2 = tdf.json.create_tic_dat(file_two)
        self.assertTrue(tdf._same_data(dat, dat_1))
        self.assertTrue(tdf._same_data(dat, dat_2))

        tdf = TicDatFactory(table=[["field one"], ["field two"]])
        for f in ["field one", "field two"]:
            tdf.set_data_type("table", f, max=float("inf"), inclusive_max=True)
        tdf.set_infinity_io_flag(None)
        dat_inf = tdf.TicDat(
            table=[[float("inf"), 100], [200, True], [False, 300],
                   [300, float("inf")], [400, False]])
        dat_1 = tdf.json.create_tic_dat(file_one)
        dat_2 = tdf.json.create_tic_dat(file_two)
        self.assertTrue(tdf._same_data(dat_inf, dat_1))
        self.assertTrue(tdf._same_data(dat_inf, dat_2))
        tdf.json.write_file(dat_inf,
                            file_one,
                            verbose=True,
                            allow_overwrite=True)
        tdf.json.write_file(dat_inf,
                            file_two,
                            verbose=False,
                            allow_overwrite=True)
        dat_1 = tdf.json.create_tic_dat(file_one)
        dat_2 = tdf.json.create_tic_dat(file_two)
        self.assertTrue(tdf._same_data(dat_inf, dat_1))
        self.assertTrue(tdf._same_data(dat_inf, dat_2))

        tdf = TicDatFactory(table=[["field one"], ["field two"]])
        for f in ["field one", "field two"]:
            tdf.set_data_type("table",
                              f,
                              min=-float("inf"),
                              inclusive_min=True)
        tdf.set_infinity_io_flag(None)
        dat_1 = tdf.json.create_tic_dat(file_one)
        dat_2 = tdf.json.create_tic_dat(file_two)
        self.assertFalse(tdf._same_data(dat_inf, dat_1))
        self.assertFalse(tdf._same_data(dat_inf, dat_2))
        dat_inf = tdf.TicDat(
            table=[[float("-inf"), 100], [200, True], [False, 300],
                   [300, -float("inf")], [400, False]])
        self.assertTrue(tdf._same_data(dat_inf, dat_1))
        self.assertTrue(tdf._same_data(dat_inf, dat_2))
Пример #18
0
    def testSillyTwoTables(self):
        if not self.can_run:
            return
        for hack, raw_data in list(product(*(([True, False],)*2))):
            tdf = TicDatFactory(**sillyMeSchema())
            ticDat = tdf.TicDat(**sillyMeData())
            self.assertTrue(tdf._same_data(ticDat, tdf.opalytics.create_tic_dat(
                create_inputset_mock(tdf, ticDat, hack), raw_data=raw_data)))

            ticDat = tdf.TicDat(**sillyMeDataTwoTables())
            self.assertTrue(tdf._same_data(ticDat, tdf.opalytics.create_tic_dat(
                create_inputset_mock(tdf, ticDat, hack), raw_data=raw_data)))
Пример #19
0
 def testIssue45(self):
     tdf = TicDatFactory(data=[["a"], ["b"]])
     dat_nums = tdf.TicDat(data=[[1, 2], [3, 4], [22, 44]])
     dat_strs = tdf.TicDat(data=[["1", "2"], ["3", "4"], ["022", "0044"]])
     files = [
         os.path.join(_scratchDir, _)
         for _ in ["dat_nums.xlsx", "dat_strs.xlsx"]
     ]
     tdf.xls.write_file(dat_nums, files[0])
     tdf.xls.write_file(dat_strs, files[1])
     dat_nums_2, dat_strs_2 = [tdf.xls.create_tic_dat(_) for _ in files]
     self.assertTrue(tdf._same_data(dat_nums, dat_nums_2))
     self.assertTrue(tdf._same_data(dat_strs, dat_strs_2))
Пример #20
0
    def testSqlSpaceyTwo(self):
        if not self.can_run:
            return
        self.assertTrue(pandatio.sql,
                        "this unit test requires SQLite installed")

        tdf = TicDatFactory(**spacesSchema())
        pdf = PanDatFactory(**spacesSchema())
        ticDat = tdf.TicDat(
            **{
                "a_table": {
                    1: [1, 2, "3"],
                    22.2: (12, 0.12, "something"),
                    0.23: (11, 12, "thirt")
                },
                "b_table": {
                    (1, 2, "foo"): 1,
                    (1012.22, 4, "0012"): 12
                },
                "c_table": (("this", 2, 3, 4), ("that", 102.212, 3, 5.5),
                            ("another", 5, 12.5, 24))
            })
        panDat = pan_dat_maker(spacesSchema(), ticDat)
        ext = ".db"
        filePath = os.path.join(_scratchDir, "spaces_2%s" % ext)
        with pandatio.sql.connect(filePath) as con:
            pdf.sql.write_file(panDat,
                               db_file_path=None,
                               con=con,
                               case_space_table_names=True)
        with pandatio.sql.connect(filePath) as con:
            panDat2 = pdf.sql.create_pan_dat(db_file_path=None, con=con)
        self.assertTrue(pdf._same_data(panDat, panDat2))

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "spaces_2_2%s" % ext)
        with pandatio.sql.connect(filePath) as con:
            pdf.sql.write_file(panDat,
                               db_file_path="",
                               con=con,
                               case_space_table_names=True)
        with pandatio.sql.connect(filePath) as con:
            panDat2 = pdf.sql.create_pan_dat(None, con)
        self.assertTrue(pdf._same_data(panDat, panDat2))
Пример #21
0
    def testXlsSimple(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(dietSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "diet.xlsx")
        pdf.xls.write_file(panDat, filePath)
        xlsPanDat = pdf.xls.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, xlsPanDat))

        pdf_shrunk = PanDatFactory(**{
            k: v
            for k, v in dietSchema().items() if k != "nutritionQuantities"
        })
        self.assertTrue(len(pdf_shrunk.all_tables) == len(pdf.all_tables) - 1)
        xlsPanDatShrunk = pdf_shrunk.xls.create_pan_dat(filePath)
        self.assertTrue(pdf_shrunk._same_data(panDat, xlsPanDatShrunk))
        filePathShrunk = os.path.join(_scratchDir, "diet_shrunk.xlsx")
        self.assertTrue(
            self.firesException(
                lambda: pdf.xls.create_pan_dat(filePathShrunk)))
        pdf_shrunk.xls.write_file(panDat, filePathShrunk)
        xlsPanDatShrunk = pdf.xls.create_pan_dat(filePathShrunk)
        self.assertTrue(pdf_shrunk._same_data(panDat, xlsPanDatShrunk))

        pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables})
        pdf2.xls.write_file(panDat, filePath)
        xlsPanDat = pdf2.xls.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, xlsPanDat))

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "netflow.xlsx")
        pdf.xls.write_file(panDat, filePath)
        panDat2 = pdf.xls.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2))
        pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables})
        xlsPanDat = pdf2.xls.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, xlsPanDat))
Пример #22
0
    def testCsvSimple(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(dietSchema(), ticDat)
        dirPath = os.path.join(_scratchDir, "diet_csv")
        pdf.csv.write_directory(panDat, dirPath)
        panDat2 = pdf.csv.create_pan_dat(dirPath)
        self.assertTrue(pdf._same_data(panDat, panDat2))
        pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables})
        panDat2 = pdf2.csv.create_pan_dat(dirPath)
        self.assertTrue(pdf._same_data(panDat, panDat2))

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        dirPath = os.path.join(_scratchDir, "netflow_csv")
        pdf.csv.write_directory(panDat, dirPath)
        panDat2 = pdf.csv.create_pan_dat(dirPath)
        self.assertTrue(pdf._same_data(panDat, panDat2))
        pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables})
        pdf2.csv.write_directory(panDat, dirPath)
        panDat2 = pdf2.csv.create_pan_dat(dirPath)
        self.assertTrue(pdf._same_data(panDat, panDat2))

        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(dietSchema(), ticDat)
        dirPath = os.path.join(_scratchDir, "diet_csv")
        pdf.csv.write_directory(panDat, dirPath, decimal=",")
        panDat2 = pdf.csv.create_pan_dat(dirPath)
        self.assertFalse(pdf._same_data(panDat, panDat2))
        panDat2 = pdf.csv.create_pan_dat(dirPath, decimal=",")
        self.assertTrue(pdf._same_data(panDat, panDat2))
Пример #23
0
 def testColumnsWithoutData(self):
     tdf = TicDatFactory(data=[["a"], ["b"]])
     for x in ["", "x"]:
         file = os.path.join(_scratchDir, "no_data.xls" + x)
         tdf.xls.write_file(tdf.TicDat(), file)
         dat = tdf.xls.create_tic_dat(file)
         self.assertFalse(dat._len_dict())
Пример #24
0
    def testWeirdDiets(self):
        if not self.can_run:
            return
        filePath = os.path.join(_scratchDir, "weirdDiet.db")
        tdf = TicDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}))

        tdf2 = TicDatFactory(**dietSchemaWeirdCase())
        dat2 = copyDataDietWeirdCase(ticDat)
        tdf2.sql.write_db_data(dat2, filePath , allow_overwrite=True)
        self.assertFalse(tdf2.sql.find_duplicates(filePath))
        sqlTicDat = tdf.sql.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, sqlTicDat))


        tdf3 = TicDatFactory(**dietSchemaWeirdCase2())
        dat3 = copyDataDietWeirdCase2(ticDat)
        tdf3.sql.write_db_data(dat3, makeCleanPath(filePath))
        with sql.connect(filePath) as con:
            con.execute("ALTER TABLE nutrition_quantities RENAME TO [nutrition quantities]")

        sqlTicDat2 = tdf3.sql.create_tic_dat(filePath)
        self.assertTrue(tdf3._same_data(dat3, sqlTicDat2))
        with sql.connect(filePath) as con:
            con.execute("create table nutrition_quantities(boger)")

        self.assertTrue(self.firesException(lambda : tdf3.sql.create_tic_dat(filePath)))
Пример #25
0
    def testNetflow(self):
        if not _can_unit_test:
            return
        tdf = TicDatFactory(**netflowSchema())
        addNetflowForeignKeys(tdf)
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(netflowData(), t)
                   for t in tdf.all_tables}))
        filePath = "netflow.accdb"
        self.assertFalse(tdf.mdb.find_duplicates(filePath))
        mdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, mdbTicDat))

        def changeIt():
            mdbTicDat.inflow['Pencils', 'Boston']["quantity"] = 12

        self.assertTrue(self.firesException(changeIt))
        self.assertTrue(tdf._same_data(ticDat, mdbTicDat))

        mdbTicDat = tdf.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, mdbTicDat))
        self.assertFalse(self.firesException(changeIt))
        self.assertFalse(tdf._same_data(ticDat, mdbTicDat))

        pkHacked = netflowSchema()
        pkHacked["nodes"][0] = ["nimrod"]
        tdfHacked = TicDatFactory(**pkHacked)
        self.assertTrue(
            "Unable to recognize field nimrod in table nodes" in self.
            firesException(lambda: tdfHacked.mdb.create_tic_dat(filePath)))
Пример #26
0
    def testSpacey(self):
        if not _can_unit_test:
            return
        tdf = TicDatFactory(**spacesSchema())
        spacesData = {
            "a_table": {
                1: {
                    "a Data 3": 3,
                    "a Data 2": 2,
                    "a Data 1": 1
                },
                22: (1.1, 12, 12),
                0.23: (11, 12, 11)
            },
            "b_table": {
                ("1", "2", "3"): 1,
                ("a", "b", "b"): 12
            },
            "c_table": (("1", "2", "3", 4), {
                "c Data 4": 55,
                "c Data 2": "b",
                "c Data 3": "c",
                "c Data 1": "a"
            }, ("a", "b", "12", 24))
        }

        dat = tdf.TicDat(**spacesData)
        filePath = "spaces.accdb"
        self.assertFalse(tdf.mdb.find_duplicates(filePath))
        dat2 = tdf.mdb.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(dat, dat2))
Пример #27
0
    def testWeirdDiets(self):
        if not _can_accdb_unit_test:
            return
        filePath = os.path.join(_scratchDir, "weirdDiet.accdb")
        tdf = TicDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}))

        tdf2 = TicDatFactory(**dietSchemaWeirdCase())
        dat2 = copyDataDietWeirdCase(ticDat)
        tdf2.mdb.write_file(dat2, filePath , allow_overwrite=True)
        accdbTicDat = tdf.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, accdbTicDat))


        tdf3 = TicDatFactory(**dietSchemaWeirdCase2())
        dat3 = copyDataDietWeirdCase2(ticDat)
        tdf3.mdb.write_file(dat3, makeCleanPath(filePath))
        with py.connect(_connection_str(filePath)) as con:
            con.cursor().execute("SELECT * INTO [nutrition quantities] FROM nutrition_quantities").commit()
            con.cursor().execute("DROP TABLE nutrition_quantities").commit()

        accdbTicDat2 = tdf3.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf3._same_data(dat3, accdbTicDat2))
        with py.connect(_connection_str(filePath)) as con:
            con.cursor().execute("create table nutrition_quantities (boger int)").commit()

        self.assertTrue(self.firesException(lambda : tdf3.mdb.create_tic_dat(filePath)))
Пример #28
0
    def testSpacey(self):
        if not _can_accdb_unit_test:
            return
        tdf = TicDatFactory(**spacesSchema())
        spacesData =  {
        "a_table" : {1 : {"a Data 3":3, "a Data 2":2, "a Data 1":1},
                     22 : (1.1, 12, 12), 0.23 : (11, 12, 11)},
        "b_table" : {("1", "2", "3") : 1, ("a", "b", "b") : 12},
        "c_table" : (("1", "2", "3", 4),
                      {"c Data 4":55, "c Data 2":"b", "c Data 3":"c", "c Data 1":"a"},
                      ("a", "b", "12", 24) ) }

        dat = tdf.TicDat(**spacesData)
        filePath = makeCleanPath(os.path.join(_scratchDir, "spacey.accdb"))
        tdf.mdb.write_schema(filePath, a_table = {"a Field":"double"},
                                       c_table = {"c Data 1":"text", "c Data 2":"text",
                                                  "c Data 3":"text", "c Data 4":"int"})
        tdf.mdb.write_file(dat, filePath)
        self.assertFalse(tdf.mdb.find_duplicates(filePath))
        dat2 = tdf.mdb.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(dat,dat2))

        with py.connect(_connection_str(filePath)) as con:
            for t in tdf.all_tables:
                con.cursor().execute("SELECT * INTO [%s] FROM %s"%(t.replace("_", " "), t)).commit()
                con.cursor().execute("DROP TABLE %s"%t).commit()
        #shutil.copy(filePath, "spaces.accdb") #uncomment to make readonly test file as .accdb
        dat3 = tdf.mdb.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(dat, dat3))
Пример #29
0
    def testDiet(self):
        if not _can_accdb_unit_test:
            return
        tdf = TicDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        filePath = makeCleanPath(os.path.join(_scratchDir, "diet.accdb"))
        tdf.mdb.write_file(ticDat, filePath)
        #shutil.copy(filePath, "diet.accdb") #uncomment to make readonly test file as .accdb
        self.assertFalse(tdf.mdb.find_duplicates(filePath))
        accdbTicDat = tdf.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, accdbTicDat))

        def changeit():
            accdbTicDat.categories["calories"]["minNutrition"] = 12

        changeit()
        self.assertFalse(tdf._same_data(ticDat, accdbTicDat))

        self.assertTrue(
            self.firesException(lambda: tdf.mdb.write_file(ticDat, filePath)))
        tdf.mdb.write_file(ticDat, filePath, allow_overwrite=True)
        accdbTicDat = tdf.mdb.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, accdbTicDat))
        self.assertTrue(self.firesException(changeit))
        self.assertTrue(tdf._same_data(ticDat, accdbTicDat))
Пример #30
0
 def testDups(self):
     if not _can_accdb_unit_test:
         return
     tdf = TicDatFactory(one=[["a"], ["b, c"]],
                         two=[["a", "b"], ["c"]],
                         three=[["a", "b", "c"], []])
     tdf2 = TicDatFactory(
         **{t: [[], ["a", "b", "c"]]
            for t in tdf.all_tables})
     td = tdf2.TicDat(
         **{
             t: [[1, 2, 1], [1, 2, 2], [2, 1, 3], [2, 2, 3], [1, 2, 2],
                 [11, 1, 2]]
             for t in tdf.all_tables
         })
     f = makeCleanPath(os.path.join(_scratchDir, "testDups.accdb"))
     tdf2.mdb.write_file(td, f)
     #shutil.copy(f, "dups.accdb") #uncomment to make readonly test file as .accdb
     dups = tdf.mdb.find_duplicates(f)
     self.assertTrue(dups == {
         'three': {
             (1, 2, 2): 2
         },
         'two': {
             (1, 2): 3
         },
         'one': {
             1: 3,
             2: 2
         }
     })