Exemplo n.º 1
0
    def testDiet(self):
        if not self.can_run:
            return
        for hack, raw_data, activeEnabled in list(product(*(([True, False],)*3))):
            tdf = TicDatFactory(**dietSchema())
            ticDat = tdf.freeze_me(tdf.copy_tic_dat(dietData()))
            inputset = create_inputset_mock(tdf, ticDat, hack, activeEnabled)
            self.assertFalse(tdf.opalytics.find_duplicates(inputset, raw_data=raw_data))
            ticDat2 = tdf.opalytics.create_tic_dat(inputset, raw_data=raw_data)
            self.assertTrue(tdf._same_data(ticDat, ticDat2))

            def change() :
                ticDat2.categories["calories"]["minNutrition"]=12
            self.assertFalse(firesException(change))
            self.assertFalse(tdf._same_data(ticDat, ticDat2))

            ticDat2 = tdf.opalytics.create_tic_dat(inputset, freeze_it=True, raw_data=raw_data)
            self.assertTrue(tdf._same_data(ticDat, ticDat2))
            self.assertTrue(firesException(change))
            self.assertTrue(tdf._same_data(ticDat, ticDat2))

            tdf2 = TicDatFactory(**{k:[pks, list(dfs) + ["dmy"]] for k,(pks, dfs) in tdf.schema().items()})
            _dat = tdf2.copy_tic_dat(ticDat)
            self.assertTrue(tdf._same_data(ticDat,
                                           tdf.opalytics.create_tic_dat(create_inputset_mock(tdf2, _dat, hack),
                                                                        raw_data=raw_data)))

            ex = self.firesException(lambda: tdf2.opalytics.create_tic_dat(inputset, raw_data=raw_data))
            self.assertTrue("field dmy can't be found" in ex)
Exemplo n.º 2
0
    def testDiet(self):
        if not self.can_run:
            return
        for verbose in [True, False]:
            tdf = TicDatFactory(**dietSchema())
            ticDat = tdf.freeze_me(
                tdf.TicDat(**{
                    t: getattr(dietData(), t)
                    for t in tdf.primary_key_fields
                }))
            writePath = os.path.join(
                makeCleanDir(os.path.join(_scratchDir, "diet")), "file.json")
            tdf.json.write_file(ticDat, writePath, verbose=verbose)
            self.assertFalse(tdf.json.find_duplicates(writePath))
            jsonTicDat = tdf.json.create_tic_dat(writePath)
            self.assertTrue(tdf._same_data(ticDat, jsonTicDat))

            def change():
                jsonTicDat.categories["calories"]["minNutrition"] = 12

            self.assertFalse(firesException(change))
            self.assertFalse(tdf._same_data(ticDat, jsonTicDat))
            jsonTicDat = tdf.json.create_tic_dat(writePath, freeze_it=True)
            self.assertTrue(firesException(change))
            self.assertTrue(tdf._same_data(ticDat, jsonTicDat))

        tdf2 = TicDatFactory(**dietSchemaWeirdCase())
        dat2 = copyDataDietWeirdCase(ticDat)
        tdf2.json.write_file(dat2,
                             writePath,
                             allow_overwrite=True,
                             verbose=verbose)
        jsonTicDat2 = tdf.json.create_tic_dat(writePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, jsonTicDat2))

        tdf3 = TicDatFactory(**dietSchemaWeirdCase2())
        dat3 = copyDataDietWeirdCase2(ticDat)
        tdf3.json.write_file(dat3,
                             writePath,
                             allow_overwrite=True,
                             verbose=verbose)
        with open(writePath, "r") as f:
            jdict = json.load(f)
        jdict["nutrition quantities"] = jdict["nutrition_quantities"]
        del (jdict["nutrition_quantities"])
        with open(writePath, "w") as f:
            json.dump(jdict, f)
        jsonDat3 = tdf3.json.create_tic_dat(writePath)
        self.assertTrue(tdf3._same_data(dat3, jsonDat3))
        jdict["nutrition_quantities"] = jdict["nutrition quantities"]
        with open(writePath, "w") as f:
            json.dump(jdict, f)
        self.assertTrue(
            self.firesException(lambda: tdf3.json.create_tic_dat(writePath)))
Exemplo n.º 3
0
    def testNetflow(self):
        tdf = TicDatFactory(**netflowSchema())
        ticDat = tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields})
        dirPath = os.path.join(_scratchDir, "netflow")
        tdf.csv.write_directory(ticDat, dirPath)
        csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True)
        self.assertFalse(tdf.csv.get_duplicates(dirPath))
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))
        csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it= True, headers_present=False)
        self.assertFalse(tdf._same_data(ticDat, csvTicDat))
        tdf.csv.write_directory(ticDat, dirPath, write_header=False,allow_overwrite=True)
        self.assertTrue(self.firesException(lambda : tdf.csv.create_tic_dat(dirPath, freeze_it=True)))
        csvTicDat = tdf.csv.create_tic_dat(dirPath, headers_present=False, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))

        ticDat.nodes[12] = {}
        tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True)
        csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))

        # minor flaw - strings that are floatable get turned into floats when reading csvs
        del(ticDat.nodes[12])
        ticDat.nodes['12'] = {}
        self.assertTrue(firesException(lambda : tdf.csv.write_directory(ticDat, dirPath)))
        tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True)
        csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True)
        self.assertFalse(tdf._same_data(ticDat, csvTicDat))
Exemplo n.º 4
0
    def testNetflow(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**netflowSchema())
        ticDat = tdf.TicDat(**{t:getattr(netflowData(),t) for t in tdf.primary_key_fields})
        self._test_generic_copy(ticDat, tdf)
        self._test_generic_copy(ticDat, tdf, ["arcs", "nodes"])
        dirPath = os.path.join(_scratchDir, "netflow")
        tdf.csv.write_directory(ticDat, dirPath)
        csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True)
        self.assertFalse(tdf.csv.find_duplicates(dirPath))
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))
        csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it= True, headers_present=False)
        self.assertFalse(tdf._same_data(ticDat, csvTicDat))
        tdf.csv.write_directory(ticDat, dirPath, write_header=False,allow_overwrite=True)
        self.assertTrue(self.firesException(lambda : tdf.csv.create_tic_dat(dirPath, freeze_it=True)))
        csvTicDat = tdf.csv.create_tic_dat(dirPath, headers_present=False, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))

        ticDat.nodes[12] = {}
        tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True)
        csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))

        # minor flaw - strings that are floatable get turned into floats when reading csvs
        del(ticDat.nodes[12])
        ticDat.nodes['12'] = {}
        self.assertTrue(firesException(lambda : tdf.csv.write_directory(ticDat, dirPath)))
        tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True)
        csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True)
        self.assertFalse(tdf._same_data(ticDat, csvTicDat))
Exemplo n.º 5
0
    def testDiet(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}))
        self._test_generic_copy(ticDat, tdf)
        self._test_generic_copy(ticDat, tdf, ["nutritionQuantities"])
        dirPath = os.path.join(_scratchDir, "diet")
        tdf.csv.write_directory(ticDat,dirPath)
        self.assertFalse(tdf.csv.find_duplicates(dirPath))
        csvTicDat = tdf.csv.create_tic_dat(dirPath)
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))
        def change() :
            csvTicDat.categories["calories"]["minNutrition"]=12
        self.assertFalse(firesException(change))
        self.assertFalse(tdf._same_data(ticDat, csvTicDat))

        self.assertTrue(self.firesException(lambda  :
            tdf.csv.write_directory(ticDat, dirPath, dialect="excel_t")
                                    ).endswith("Invalid dialect excel_t"))

        tdf.csv.write_directory(ticDat, dirPath, dialect="excel-tab", allow_overwrite=True)
        self.assertTrue(self.firesException(lambda : tdf.csv.create_tic_dat(dirPath, freeze_it=True)))
        csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True, dialect="excel-tab")
        self.assertTrue(firesException(change))
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))

        tdf2 = TicDatFactory(**dietSchemaWeirdCase())
        dat2 = copyDataDietWeirdCase(ticDat)
        tdf2.csv.write_directory(dat2, dirPath, allow_overwrite=True)
        csvTicDat2 = tdf.csv.create_tic_dat(dirPath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, csvTicDat2))
        os.rename(os.path.join(dirPath, "nutritionquantities.csv"),
                  os.path.join(dirPath, "nutritionquantities.csv".upper()))
        csvTicDat2 = tdf.csv.create_tic_dat(dirPath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, csvTicDat2))

        tdf3 = TicDatFactory(**dietSchemaWeirdCase2())
        dat3 = copyDataDietWeirdCase2(ticDat)
        tdf3.csv.write_directory(dat3, dirPath, allow_overwrite=True)
        os.rename(os.path.join(dirPath, "nutrition_quantities.csv"),
                  os.path.join(dirPath, "nutrition quantities.csv"))
        csvDat3 = tdf3.csv.create_tic_dat(dirPath)
        self.assertTrue(tdf3._same_data(dat3, csvDat3))
        shutil.copy(os.path.join(dirPath, "nutrition quantities.csv"),
                    os.path.join(dirPath, "nutrition_quantities.csv"))
        self.assertTrue(self.firesException(lambda : tdf3.csv.create_tic_dat(dirPath)))
Exemplo n.º 6
0
 def firesException(self, f, troubleshoot=False):
     if troubleshoot:
         import ipdb
         ipdb.set_trace()
         f()
     e = firesException(f)
     if e:
         self.assertTrue("TicDatError" in e.__class__.__name__)
         return str(e)
Exemplo n.º 7
0
    def testDiet(self):
        tdf = TicDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(tdf.TicDat(**{t:getattr(dietData(),t) for t in tdf.primary_key_fields}))
        dirPath = os.path.join(_scratchDir, "diet")
        tdf.csv.write_directory(ticDat,dirPath)
        self.assertFalse(tdf.csv.get_duplicates(dirPath))
        csvTicDat = tdf.csv.create_tic_dat(dirPath)
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))
        def change() :
            csvTicDat.categories["calories"]["minNutrition"]=12
        self.assertFalse(firesException(change))
        self.assertFalse(tdf._same_data(ticDat, csvTicDat))

        self.assertTrue(self.firesException(lambda  :
            tdf.csv.write_directory(ticDat, dirPath, dialect="excel_t")).endswith(
                                                                        "Invalid dialect excel_t"))

        tdf.csv.write_directory(ticDat, dirPath, dialect="excel-tab", allow_overwrite=True)
        self.assertTrue(self.firesException(lambda : tdf.csv.create_tic_dat(dirPath, freeze_it=True)))
        csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True, dialect="excel-tab")
        self.assertTrue(firesException(change))
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))
Exemplo n.º 8
0
    def testNetflow(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**netflowSchema())
        ticDat = tdf.TicDat(
            **{t: getattr(netflowData(), t)
               for t in tdf.primary_key_fields})
        self._test_generic_copy(ticDat, tdf)
        self._test_generic_copy(ticDat, tdf, ["arcs", "nodes"])
        dirPath = os.path.join(_scratchDir, "netflow")
        tdf.csv.write_directory(ticDat, dirPath)
        csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True)
        self.assertFalse(tdf.csv.find_duplicates(dirPath))
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))
        csvTicDat = tdf.csv.create_tic_dat(dirPath,
                                           freeze_it=True,
                                           headers_present=False)
        self.assertFalse(tdf._same_data(ticDat, csvTicDat))
        tdf.csv.write_directory(ticDat,
                                dirPath,
                                write_header=False,
                                allow_overwrite=True)
        self.assertTrue(
            self.firesException(
                lambda: tdf.csv.create_tic_dat(dirPath, freeze_it=True)))
        csvTicDat = tdf.csv.create_tic_dat(dirPath,
                                           headers_present=False,
                                           freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))

        # the casting to floats is controlled by data types and default values
        ticDat.nodes[12] = {}
        tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True)
        csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True)
        self.assertFalse(tdf._same_data(ticDat, csvTicDat))
        tdf2 = TicDatFactory(**netflowSchema())
        tdf2.set_data_type("nodes",
                           "name",
                           strings_allowed='*',
                           number_allowed=True)
        csvTicDat = tdf2.csv.create_tic_dat(dirPath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))

        del (ticDat.nodes[12])
        ticDat.nodes['12'] = {}
        self.assertTrue(
            firesException(lambda: tdf.csv.write_directory(ticDat, dirPath)))
        tdf.csv.write_directory(ticDat, dirPath, allow_overwrite=True)
        csvTicDat = tdf.csv.create_tic_dat(dirPath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, csvTicDat))
Exemplo n.º 9
0
    def testInjection(self):
        if not self.can_run:
            return
        problems = [ "'", "''", '"', '""']
        tdf = TicDatFactory(boger = [["a"], ["b"]])
        dat = tdf.TicDat()
        for v,k in enumerate(problems):
            dat.boger[k]=v
            dat.boger[v]=k
        filePath = makeCleanPath(os.path.join(_scratchDir, "injection.db"))
        tdf.sql.write_db_data(dat, filePath)
        dat2 = tdf.sql.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(dat,dat2))

        filePath = makeCleanPath(os.path.join(_scratchDir, "injection.sql"))
        tdf.sql.write_sql_file(dat, filePath)
        self.assertTrue(firesException(lambda : tdf.sql.create_tic_dat_from_sql(filePath)))
Exemplo n.º 10
0
    def testNetflow(self):
        if not self.can_run:
            return
        for verbose in [True, False]:
            tdf = TicDatFactory(**netflowSchema())
            ticDat = tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields})
            self.assertTrue(
                tdf._same_data(ticDat,
                               tdf.json.create_tic_dat(
                                   tdf.json.write_file(ticDat, "")),
                               epsilon=0.0001))

            writePath = os.path.join(
                makeCleanDir(os.path.join(_scratchDir, "netflow")),
                "file.json")
            tdf.json.write_file(ticDat, writePath, verbose=verbose)
            jsonTicDat = tdf.json.create_tic_dat(writePath, freeze_it=True)
            self.assertFalse(tdf.json.find_duplicates(writePath))
            self.assertTrue(tdf._same_data(ticDat, jsonTicDat))

            ticDat.nodes[12] = {}
            tdf.json.write_file(ticDat,
                                writePath,
                                verbose=verbose,
                                allow_overwrite=True)
            jsonTicDat = tdf.json.create_tic_dat(writePath, freeze_it=True)
            self.assertTrue(tdf._same_data(ticDat, jsonTicDat))

            # unlike csv, json format respects strings that are floatable
            del (ticDat.nodes[12])
            ticDat.nodes['12'] = {}
            self.assertTrue(
                firesException(lambda: tdf.json.write_file(
                    ticDat, writePath, verbose=verbose)))
            tdf.json.write_file(ticDat,
                                writePath,
                                allow_overwrite=True,
                                verbose=verbose)
            jsonTicDat = tdf.json.create_tic_dat(writePath, freeze_it=True)
            self.assertTrue(tdf._same_data(ticDat, jsonTicDat))
Exemplo n.º 11
0
    def testBiggie(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(boger=[["the"], ["big", "boger"]],
                            moger=[["the", "big"], ["boger"]],
                            woger=[[], ["the", "big", "boger"]])
        smalldat = tdf.TicDat(
            boger={k: [(k + 1) % 10, (k + 2) % 5]
                   for k in range(100)},
            moger={(k, (k + 1) % 10): (k + 2) % 5
                   for k in range(75)},
            woger=[[k, (k + 1) % 10, (k + 2) % 5] for k in range(101)])
        filePath = os.path.join(_scratchDir, "smallBiggie.xls")
        tdf.xls.write_file(smalldat, filePath)
        smalldat2 = tdf.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(smalldat, smalldat2))

        filePath = makeCleanPath(filePath + "x")
        tdf.xls.write_file(smalldat, filePath)
        smalldat2 = tdf.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(smalldat, smalldat2))

        bigdat = tdf.TicDat(
            boger={k: [(k + 1) % 10, (k + 2) % 5]
                   for k in range(65537)},
            moger={(k, (k + 1) % 10): (k + 2) % 5
                   for k in range(75)},
            woger=[[k, (k + 1) % 10, (k + 2) % 5] for k in range(65537)])
        filePath = os.path.join(_scratchDir, "bigBiggie.xls")
        self.assertTrue(
            firesException(lambda: tdf.xls.write_file(bigdat, filePath)))
        filePath = makeCleanPath(filePath + "x")
        tdf.xls.write_file(bigdat, filePath)
        bigdat2 = tdf.xls.create_tic_dat(filePath)
        # the following is just to GD slow
        #self.assertTrue(tdf._same_data(bigdat, bigdat2))
        self.assertTrue(
            all(
                len(getattr(bigdat, t)) == len(getattr(bigdat2, t))
                for t in tdf.all_tables))
Exemplo n.º 12
0
    def testOne(self):
        def _cleanIt(x) :
            x.foods['macaroni'] = {"cost": 2.09}
            x.foods['milk'] = {"cost":0.89}
            return x
        dataObj = dietData()
        tdf = TicDatFactory(**dietSchema())
        self.assertTrue(tdf.good_tic_dat_object(dataObj))
        dataObj2 = tdf.copy_tic_dat(dataObj)
        dataObj3 = tdf.copy_tic_dat(dataObj, freeze_it=True)
        dataObj4 = tdf.TicDat(**tdf.as_dict(dataObj3))
        self.assertTrue(all (tdf._same_data(dataObj, x) and dataObj is not x for x in (dataObj2, dataObj3, dataObj4)))
        dataObj = _cleanIt(dataObj)
        self.assertTrue(tdf.good_tic_dat_object(dataObj))
        self.assertTrue(all (tdf._same_data(dataObj, x) and dataObj is not x for x in (dataObj2, dataObj3)))
        def hackit(x) :
            x.foods["macaroni"] = 100
        self.assertTrue(self.firesException(lambda :hackit(dataObj3)))
        hackit(dataObj2)
        self.assertTrue(not tdf._same_data(dataObj, dataObj2) and  tdf._same_data(dataObj, dataObj3))

        msg = []
        dataObj.foods[("milk", "cookies")] = {"cost": float("inf")}
        dataObj.boger = object()
        self.assertFalse(tdf.good_tic_dat_object(dataObj) or
                         tdf.good_tic_dat_object(dataObj, bad_message_handler =msg.append))
        self.assertTrue({"foods : Inconsistent key lengths"} == set(msg))
        self.assertTrue(all(tdf.good_tic_dat_table(getattr(dataObj, t), t)
                            for t in ("categories", "nutritionQuantities")))

        dataObj = dietData()
        dataObj.categories["boger"] = {"cost":1}
        dataObj.categories["boger"] = {"cost":1}
        self.assertFalse(tdf.good_tic_dat_object(dataObj) or
                         tdf.good_tic_dat_object(dataObj, bad_message_handler=msg.append))
        self.assertTrue({'foods : Inconsistent key lengths',
                         'categories : Inconsistent data field name keys.'} == set(msg))
        ex = firesException(lambda : tdf.freeze_me(tdf.TicDat(**{t:getattr(dataObj,t)
                                                                for t in tdf.primary_key_fields}))).message
        self.assertTrue("categories cannot be treated as a ticDat table : Inconsistent data field name keys" in ex)
Exemplo n.º 13
0
    def testJsonSimple(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(dietSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "diet.json")
        pdf.json.write_file(panDat, filePath)
        panDat2 = pdf.json.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5))
        pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables})
        pdf2.json.write_file(panDat, filePath)
        panDat2 = pdf2.json.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5))

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "netflow.json")
        pdf.json.write_file(panDat, filePath)
        panDat2 = pdf.json.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5))
        panDat3 = pdf.json.create_pan_dat(pdf.json.write_file(panDat, ""))
        self.assertTrue(pdf._same_data(panDat, panDat3))
        dicted = json.loads(pdf.json.write_file(panDat, ""))
        panDat4 = pdf.PanDat(**dicted)
        self.assertTrue(pdf._same_data(panDat, panDat4))
        pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables})
        panDat5 = pdf2.PanDat(**dicted)
        self.assertTrue(pdf._same_data(panDat, panDat5))

        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(dietSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "diet.json")
        pdf.json.write_file(panDat, filePath, orient='columns', index=True)
        # the following doesn't generate a TicDatError, which is fine
        self.assertTrue(
            firesException(lambda: pdf.json.create_pan_dat(filePath)))
        panDat2 = pdf.json.create_pan_dat(filePath, orient='columns')
        self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5))
        panDat3 = pdf.json.create_pan_dat(pdf.json.write_file(
            panDat, "", orient='columns'),
                                          orient="columns")
        self.assertTrue(pdf._same_data(panDat, panDat3, epsilon=1e-5))
        dicted = json.loads(pdf.json.write_file(panDat, "", orient='columns'))
        panDat4 = pdf.PanDat(**dicted)
        self.assertTrue(pdf._same_data(panDat, panDat4, epsilon=1e-5))
Exemplo n.º 14
0
 def firesException(self, f):
     e = firesException(f)
     if e:
         self.assertTrue("TicDatError" in e.__class__.__name__)
         return str(e)
Exemplo n.º 15
0
    def testSilly(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**sillyMeSchema())
        ticDat = tdf.TicDat(**sillyMeData())
        schema2 = sillyMeSchema()
        schema2["b"][0] = ("bField2", "bField1", "bField3")
        schema3 = sillyMeSchema()
        schema3["a"][1] = ("aData2", "aData3", "aData1")
        schema4 = sillyMeSchema()
        schema4["a"][1] = ("aData1", "aData3")
        schema5 = sillyMeSchema()
        _tuple = lambda x: tuple(x) if utils.containerish(x) else (x, )
        for t in ("a", "b"):
            schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0])
        schema5["a"][0], schema5["b"][0] = (), []
        schema6 = sillyMeSchema()
        schema6["d"] = [["dField"], ()]

        tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x)
                                        for x in (schema2, schema3, schema4,
                                                  schema5, schema6))
        tdf5.set_generator_tables(("a", "c"))
        filePath = os.path.join(_scratchDir, "silly.xls")
        tdf.xls.write_file(ticDat, filePath)

        ticDat2 = tdf2.xls.create_tic_dat(filePath)
        self.assertFalse(tdf._same_data(ticDat, ticDat2))

        ticDat3 = tdf3.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat3))

        ticDat4 = tdf4.xls.create_tic_dat(filePath)
        for t in ["a", "b"]:
            for k, v in getattr(ticDat4, t).items():
                for _k, _v in v.items():
                    self.assertTrue(getattr(ticDat, t)[k][_k] == _v)
                if set(v) == set(getattr(ticDat, t)[k]):
                    self.assertTrue(t == "b")
                else:
                    self.assertTrue(t == "a")

        ticDat5 = tdf5.xls.create_tic_dat(filePath,
                                          treat_inf_as_infinity=False)
        self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5))
        self.assertTrue(
            callable(ticDat5.a) and callable(ticDat5.c)
            and not callable(ticDat5.b))

        ticDat6 = tdf6.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat6))
        self.assertTrue(
            firesException(lambda: tdf6._same_data(ticDat, ticDat6)))
        self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d))

        def writeData(data, write_header="same"):
            assert filePath.endswith(".xls")
            assert not write_header or write_header in ("lower", "same",
                                                        "duped")
            import xlwt
            book = xlwt.Workbook()
            for t in tdf.all_tables:
                sheet = book.add_sheet(t)
                if write_header:
                    all_fields = tdf.primary_key_fields.get(
                        t, ()) + tdf.data_fields.get(t, ())
                    for i, f in enumerate(
                        (2 if write_header == "duped" else 1) * all_fields):
                        sheet.write(
                            0, i,
                            f.lower() if write_header == "lower"
                            or i >= len(all_fields) else f)
                for rowInd, row in enumerate(data):
                    for fieldInd, cellValue in enumerate(
                        (2 if write_header == "duped" else 1) * row):
                        sheet.write(rowInd + (1 if write_header else 0),
                                    fieldInd, cellValue)
            if os.path.exists(filePath):
                os.remove(filePath)
            book.save(filePath)
            if write_header in [
                    "lower", "same"
            ]:  # will use pandas to generate the xlsx file version
                file_path_x = filePath + "x"
                if os.path.exists(file_path_x):
                    os.remove(file_path_x)
                writer = utils.pd.ExcelWriter(file_path_x)
                for t, (pks, dfs) in tdf.schema().items():
                    fields = pks + dfs
                    if write_header == "lower":
                        fields = [_.lower() for _ in fields]
                    d = {f: [] for f in fields}
                    for row in data:
                        for f, c in zip(fields, row):
                            d[f].append(c)
                    utils.pd.DataFrame(d).to_excel(writer, t, index=False)
                writer.save()

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)],
                  write_header="duped")
        self.assertTrue(
            self.firesException(
                lambda: tdf.xls.create_tic_dat(filePath, freeze_it=True)))

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)])
        ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3)
        self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40)
        for f in [filePath, filePath + "x"]:
            rowCount = tdf.xls.find_duplicates(f)
            self.assertTrue(
                set(rowCount) == {'a'} and set(rowCount["a"]) == {1}
                and rowCount["a"][1] == 2)

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)],
                  write_header="lower")
        ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3)
        self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40)
        for f in [filePath, filePath + "x"]:
            rowCount = tdf.xls.find_duplicates(f)
            self.assertTrue(
                set(rowCount) == {'a'} and set(rowCount["a"]) == {1}
                and rowCount["a"][1] == 2)

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)],
                  write_header=False)
        self.assertTrue(
            self.firesException(
                lambda: tdf.xls.create_tic_dat(filePath, freeze_it=True)))
        ticDatMan = tdf.xls.create_tic_dat(filePath,
                                           freeze_it=True,
                                           headers_present=False)
        self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3)
        self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40)
        rowCount = tdf.xls.find_duplicates(filePath, headers_present=False)
        self.assertTrue(
            set(rowCount) == {'a'} and set(rowCount["a"]) == {1}
            and rowCount["a"][1] == 2)

        ticDat.a["theboger"] = (1, None, 12)
        tdf.xls.write_file(ticDat, filePath, allow_overwrite=True)
        ticDatNone = tdf.xls.create_tic_dat(filePath, freeze_it=True)
        # THIS IS A FLAW - but a minor one. None's are hard to represent. It is turning into the empty string here.
        # not sure how to handle this, but documenting for now.
        self.assertFalse(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == "")
        # the workaround for this flaw is to set the data type to be nullabe but not allow the empty string
        tdfwa = TicDatFactory(**sillyMeSchema())
        tdfwa.set_data_type("a", "aData2", nullable=True)
        ticDatNone = tdfwa.xls.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)

        # checking the same thing with .xlsx - using openpyxl, None is indeed recovered even without tdfwa munging!
        tdf.xls.write_file(ticDat, filePath + "x", allow_overwrite=True)
        ticDatNone = tdf.xls.create_tic_dat(filePath + "x", freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)
        ticDatNone = tdfwa.xls.create_tic_dat(filePath + "x", freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40),
                   (1, 20, 30, 12)])
        for f in [filePath, filePath + "x"]:
            rowCount = tdf.xls.find_duplicates(f)
            self.assertTrue(
                set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1}
                and rowCount["a"][1] == 3)
            self.assertTrue(
                set(rowCount["b"]) == {(1, 20, 30)}
                and rowCount["b"][1, 20, 30] == 2)
Exemplo n.º 16
0
    def testDiet(self):
        if not self.canRun:
            return
        tdf = TicDatFactory(**dietSchema())
        tdf.enable_foreign_key_links()
        oldDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        self._test_generic_free_copy(oldDat, tdf)
        self._test_generic_free_copy(oldDat, tdf, ["nutritionQuantities"])
        ticDat = tdf.copy_to_pandas(oldDat)
        for k in oldDat.foods:
            self.assertTrue(oldDat.foods[k]["cost"] == ticDat.foods.cost[k])
        for k in oldDat.categories:
            self.assertTrue(oldDat.categories[k]["minNutrition"] ==
                            ticDat.categories.minNutrition[k])
        for k1, k2 in oldDat.nutritionQuantities:
            self.assertTrue(oldDat.nutritionQuantities[k1, k2]["qty"] ==
                            ticDat.nutritionQuantities.qty[k1, k2])
        nut = ticDat.nutritionQuantities
        self.assertTrue(firesException(lambda: nut.qty.loc[:, "fatty"]))
        self.assertTrue(firesException(lambda: nut.qty.loc["chickeny", :]))
        self.assertFalse(firesException(lambda: nut.qty.sloc[:, "fatty"]))
        self.assertFalse(firesException(lambda: nut.qty.sloc["chickeny", :]))
        self.assertTrue(0 == sum(nut.qty.sloc[:, "fatty"]) == sum(nut.qty.sloc[
            "chickeny", :]))
        self.assertTrue(
            sum(nut.qty.sloc[:, "fat"]) == sum(nut.qty.loc[:, "fat"]) == sum(
                r["qty"] for (f, c), r in oldDat.nutritionQuantities.items()
                if c == "fat"))
        self.assertTrue(
            sum(nut.qty.sloc["chicken", :]) == sum(nut.qty.loc["chicken", :])
            == sum(r["qty"]
                   for (f, c), r in oldDat.nutritionQuantities.items()
                   if f == "chicken"))

        rebornTicDat = tdf.TicDat(
            **{t: getattr(ticDat, t)
               for t in tdf.all_tables})
        self.assertTrue(tdf._same_data(rebornTicDat, oldDat))

        tdf2 = TicDatFactory(**{t: '*' for t in tdf.all_tables})
        self.assertTrue(
            firesException(
                lambda: tdf2.set_data_type("nutritionQuantities", "qty")))
        genTicDat = tdf2.TicDat(
            **{t: getattr(ticDat, t)
               for t in tdf.all_tables})

        for k in oldDat.categories:
            self.assertTrue(oldDat.categories[k]["minNutrition"] ==
                            genTicDat.categories.minNutrition[k])
        for k1, k2 in oldDat.nutritionQuantities:
            self.assertTrue(oldDat.nutritionQuantities[k1, k2]["qty"] ==
                            genTicDat.nutritionQuantities.qty[k1, k2])
        self.assertFalse(tdf.good_tic_dat_object(genTicDat))
        self.assertTrue(tdf2.good_tic_dat_object(genTicDat))
        rebornTicDat = tdf.TicDat(
            **{t: getattr(genTicDat, t)
               for t in tdf.all_tables})
        self.assertTrue(tdf._same_data(rebornTicDat, oldDat))
        rebornGenTicDat = tdf2.TicDat(**tdf2.as_dict(genTicDat))
        for t, pks in tdf.primary_key_fields.items():
            getattr(rebornGenTicDat, t).index.names = pks
        rebornTicDat = tdf.TicDat(
            **{t: getattr(rebornGenTicDat, t)
               for t in tdf.all_tables})
        self.assertTrue(tdf._same_data(rebornTicDat, oldDat))

        tdf3 = TicDatFactory(**dict(dietSchema(), **{"categories": '*'}))
        self.assertFalse(
            firesException(
                lambda: tdf3.set_data_type("nutritionQuantities", "qty")))
        mixTicDat = tdf3.TicDat(
            **{t: getattr(ticDat, t)
               for t in tdf.all_tables})
        for k in oldDat.categories:
            self.assertTrue(oldDat.categories[k]["minNutrition"] ==
                            mixTicDat.categories.minNutrition[k])
        for k1, k2 in oldDat.nutritionQuantities:
            self.assertTrue(oldDat.nutritionQuantities[k1, k2]["qty"] ==
                            mixTicDat.nutritionQuantities[k1, k2]["qty"])
        self.assertFalse(tdf2.good_tic_dat_object(mixTicDat))
        self.assertFalse(tdf3.good_tic_dat_object(genTicDat))
        self.assertTrue(tdf3.good_tic_dat_object(mixTicDat))
        rebornTicDat = tdf.TicDat(
            **{t: getattr(mixTicDat, t)
               for t in tdf.all_tables})
        self.assertTrue(tdf._same_data(rebornTicDat, oldDat))
Exemplo n.º 17
0
    def testSilly(self):
        if not _can_accdb_unit_test:
            return
        tdf = TicDatFactory(**sillyMeSchema())
        ticDat = tdf.TicDat(**sillyMeData())
        filePath = os.path.join(_scratchDir, "silly.accdb")
        self.assertTrue(firesException(lambda : tdf.mdb.write_file(ticDat, makeCleanPath(filePath))))
        def sillyMeCleanData() :
            return {
                "a" : {"1" : (1, 2, "3"), "b" : (12, 12.2, "twelve"), "c" : (11, 12, "thirt")},
                "b" : {(1, 2, "3") : 1, (3, 4, "b") : 12},
                "c" : ((1, "2", 3, 4), (0.2, "b", 0.3, 0.4), (1.2, "b", 12, 24) )
            }
        ticDat = tdf.TicDat(**sillyMeCleanData())
        self.assertTrue(firesException(lambda : tdf.mdb.write_file(ticDat, makeCleanPath(filePath))))
        def makeCleanSchema() :
            tdf.mdb.write_schema(makeCleanPath(filePath), a={"aData3" : "text"},
                        b = {"bField1" : "int", "bField2" : "int"}, c={"cData2" : "text"})
            return filePath
        tdf.mdb.write_file(ticDat, makeCleanSchema())
        self.assertFalse(tdf.mdb.find_duplicates(filePath))
        accdbTicDat = tdf.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, accdbTicDat))

        schema2 = sillyMeSchema()
        schema2["b"][0] = ("bField2", "bField1", "bField3")
        schema3 = sillyMeSchema()
        schema3["a"][1] = ("aData2", "aData3", "aData1")
        schema4 = sillyMeSchema()
        schema4["a"][1] = ("aData1", "aData3")
        schema5 = sillyMeSchema()
        _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,)
        for t in ("a", "b") :
            schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0])
        schema5["a"][0], schema5["b"][0] =  (),  []
        schema6 = sillyMeSchema()
        schema6["d"] =  [["dField"],()]

        tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6))
        tdf5.set_generator_tables(("a","c"))

        ticDat2 = tdf2.mdb.create_tic_dat(filePath)
        self.assertFalse(tdf._same_data(ticDat, ticDat2))

        ticDat3 = tdf3.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat3))

        ticDat4 = tdf4.mdb.create_tic_dat(filePath)
        for t in ["a","b"]:
            for k,v in getattr(ticDat4, t).items() :
                for _k, _v in v.items() :
                    self.assertTrue(getattr(ticDat, t)[k][_k] == _v)
                if set(v) == set(getattr(ticDat, t)[k]) :
                    self.assertTrue(t == "b")
                else :
                    self.assertTrue(t == "a")

        ticDat5 = tdf5.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5))
        self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b))

        self.assertTrue("table d" in self.firesException(lambda  : tdf6.mdb.create_tic_dat(filePath)))

        ticDat.a["theboger"] = (1, None, "twelve")
        tdf.mdb.write_file(ticDat, makeCleanSchema())
        ticDatNone = tdf.mdb.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)
Exemplo n.º 18
0
    def testThree(self):
        objOrig = netflowData()
        staticFactory = TicDatFactory(**netflowSchema())
        goodTable = lambda t : lambda _t : staticFactory.good_tic_dat_table(_t, t)
        tables = set(staticFactory.primary_key_fields)
        ticDat = staticFactory.freeze_me(staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables}))
        self.assertTrue(staticFactory.good_tic_dat_object(ticDat))
        for t in tables :
            self._assertSame(getattr(objOrig, t), getattr(ticDat,t), goodTable(t))

        objOrig.commodities.append(12.3)
        objOrig.arcs[(1, 2)] = [12]
        self._assertSame(objOrig.nodes, ticDat.nodes, goodTable("nodes"))
        self._assertSame(objOrig.cost, ticDat.cost, goodTable("cost"))
        self.assertTrue(firesException(lambda : self._assertSame(
            objOrig.commodities, ticDat.commodities, goodTable("commodities")) ))
        self.assertTrue(firesException(lambda : self._assertSame(
            objOrig.arcs, ticDat.arcs, goodTable("arcs")) ))

        ticDat = staticFactory.freeze_me(staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables}))
        for t in tables :
            self._assertSame(getattr(objOrig, t), getattr(ticDat,t), goodTable(t))

        self.assertTrue(ticDat.arcs[1, 2]["capacity"] == 12)
        self.assertTrue(12.3 in ticDat.commodities)

        objOrig.cost[5]=5

        self.assertTrue("cost cannot be treated as a ticDat table : Inconsistent key lengths" in
            firesException(lambda : staticFactory.freeze_me(staticFactory.TicDat
                                    (**{t:getattr(objOrig,t) for t in tables}))))

        objOrig = netflowData()
        def editMeBadly(t) :
            def rtn() :
                t.cost["hack"] = 12
            return rtn
        def editMeWell(t) :
            def rtn() :
                t.cost["hack", "my", "balls"] = 12.12
            return rtn
        self.assertTrue(all(firesException(editMeWell(t)) and firesException(editMeBadly(t)) for t in
                            (ticDat, staticFactory.freeze_me(staticFactory.TicDat()))))

        def attributeMe(t) :
            def rtn() :
                t.boger="bogerwoger"
            return rtn

        self.assertTrue(firesException(attributeMe(ticDat)) and firesException(attributeMe(
                staticFactory.freeze_me(staticFactory.TicDat()))))

        mutable = staticFactory.TicDat(**{t:getattr(objOrig,t) for t in tables})
        for t in tables :
            self._assertSame(getattr(objOrig, t), getattr(mutable,t), goodTable(t))

        self.assertTrue(firesException(editMeBadly(mutable)))
        self.assertFalse(firesException(editMeWell(mutable)) or firesException(attributeMe(mutable)))
        self.assertTrue(firesException(lambda : self._assertSame(
            objOrig.cost, mutable.cost, goodTable("cost")) ))
Exemplo n.º 19
0
 def firesException(self, f):
     e = firesException(f)
     if e :
         self.assertTrue("TicDatError" in e.__class__.__name__)
         return e.message
Exemplo n.º 20
0
    def testSilly(self):
        tdf = TicDatFactory(**sillyMeSchema())
        ticDat = tdf.TicDat(**sillyMeData())
        schema2 = sillyMeSchema()
        schema2["b"][0] = ("bField2", "bField1", "bField3")
        schema3 = sillyMeSchema()
        schema3["a"][1] = ("aData2", "aData3", "aData1")
        schema4 = sillyMeSchema()
        schema4["a"][1] = ("aData1", "aData3")
        schema5 = sillyMeSchema()
        _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,)
        for t in ("a", "b") :
            schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0])
        schema5["a"][0], schema5["b"][0] =  (),  []
        schema6 = sillyMeSchema()
        schema6["d"] =  [["dField"],()]

        tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6))
        tdf5.set_generator_tables(("a","c"))
        filePath = os.path.join(_scratchDir, "silly.xls")
        tdf.xls.write_file(ticDat, filePath)

        ticDat2 = tdf2.xls.create_tic_dat(filePath)
        self.assertFalse(tdf._same_data(ticDat, ticDat2))

        ticDat3 = tdf3.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat3))

        ticDat4 = tdf4.xls.create_tic_dat(filePath)
        for t in ["a","b"]:
            for k,v in getattr(ticDat4, t).items() :
                for _k, _v in v.items() :
                    self.assertTrue(getattr(ticDat, t)[k][_k] == _v)
                if set(v) == set(getattr(ticDat, t)[k]) :
                    self.assertTrue(t == "b")
                else :
                    self.assertTrue(t == "a")

        ticDat5 = tdf5.xls.create_tic_dat(filePath)
        self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5))
        self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b))

        ticDat6 = tdf6.xls.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat6))
        self.assertTrue(firesException(lambda : tdf6._same_data(ticDat, ticDat6)))
        self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d))

        def writeData(data, write_header = True):
            import xlwt
            book = xlwt.Workbook()
            for t in tdf.all_tables :
                sheet = book.add_sheet(t)
                if write_header :
                    for i,f in enumerate(tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ())) :
                        sheet.write(0, i, f)
                for rowInd, row in enumerate(data) :
                    for fieldInd, cellValue in enumerate(row):
                        sheet.write(rowInd+ (1 if write_header else 0), fieldInd, cellValue)
            if os.path.exists(filePath):
                os.remove(filePath)
            book.save(filePath)

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)])
        ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3)
        self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40)
        rowCount = tdf.xls.get_duplicates(filePath)
        self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2)

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)], write_header=False)
        self.assertTrue(self.firesException(lambda  : tdf.xls.create_tic_dat(filePath, freeze_it=True)))
        ticDatMan = tdf.xls.create_tic_dat(filePath, freeze_it=True, headers_present=False)
        self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3)
        self.assertTrue(ticDatMan.b[1, 20, 30]["bData"] == 40)
        rowCount = tdf.xls.get_duplicates(filePath, headers_present=False)
        self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2)

        ticDat.a["theboger"] = (1, None, 12)
        tdf.xls.write_file(ticDat, filePath, allow_overwrite=True)
        ticDatNone = tdf.xls.create_tic_dat(filePath, freeze_it=True)
        # THIS IS A FLAW - but a minor one. None's are hard to represent. It is turning into the empty string here.
        # not sure how to handle this, but documenting for now.
        self.assertFalse(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == "")

        writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1,20,30,12)])
        rowCount = tdf.xls.get_duplicates(filePath)
        self.assertTrue(set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==3)
        self.assertTrue(set(rowCount["b"]) == {(1,20,30)} and rowCount["b"][1,20,30]==2)
Exemplo n.º 21
0
    def testJsonSimple(self):
        if not self.can_run:
            return
        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(dietSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "diet.json")
        pdf.json.write_file(panDat, filePath)
        panDat2 = pdf.json.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5))
        pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables})
        pdf2.json.write_file(panDat, filePath)
        panDat2 = pdf2.json.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5))

        re_fielded_schema = {
            "categories": (("name", ), ["maxNutrition", "minNutrition"]),
            "foods": [["name"], []],
            "nutritionQuantities": (["food", "category"], ["qty"])
        }
        pdf3 = PanDatFactory(**re_fielded_schema)
        panDat3 = pdf3.json.create_pan_dat(filePath)
        for t, (pks, dfs) in re_fielded_schema.items():
            self.assertTrue(
                list(pks) + list(dfs) == list(getattr(panDat3, t).columns))

        tdf = TicDatFactory(**netflowSchema())
        pdf = PanDatFactory(**netflowSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **
                {t: getattr(netflowData(), t)
                 for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(netflowSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "netflow.json")
        pdf.json.write_file(panDat, filePath)
        panDat2 = pdf.json.create_pan_dat(filePath)
        self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5))
        panDat3 = pdf.json.create_pan_dat(pdf.json.write_file(panDat, ""))
        self.assertTrue(pdf._same_data(panDat, panDat3))
        dicted = json.loads(pdf.json.write_file(panDat, ""))
        panDat4 = pdf.PanDat(**dicted)
        self.assertTrue(pdf._same_data(panDat, panDat4))
        pdf2 = PanDatFactory(**{t: '*' for t in pdf.all_tables})
        panDat5 = pdf2.PanDat(**dicted)
        self.assertTrue(pdf._same_data(panDat, panDat5))

        tdf = TicDatFactory(**dietSchema())
        pdf = PanDatFactory(**dietSchema())
        ticDat = tdf.freeze_me(
            tdf.TicDat(
                **{t: getattr(dietData(), t)
                   for t in tdf.primary_key_fields}))
        panDat = pan_dat_maker(dietSchema(), ticDat)
        filePath = os.path.join(_scratchDir, "diet.json")
        pdf.json.write_file(panDat, filePath, orient='columns', index=True)
        # the following doesn't generate a TicDatError, which is fine
        self.assertTrue(
            firesException(lambda: pdf.json.create_pan_dat(filePath)))
        panDat2 = pdf.json.create_pan_dat(filePath, orient='columns')
        self.assertTrue(pdf._same_data(panDat, panDat2, epsilon=1e-5))
        panDat3 = pdf.json.create_pan_dat(pdf.json.write_file(
            panDat, "", orient='columns'),
                                          orient="columns")
        self.assertTrue(pdf._same_data(panDat, panDat3, epsilon=1e-5))
        dicted = json.loads(pdf.json.write_file(panDat, "", orient='columns'))
        panDat4 = pdf.PanDat(**dicted)
        self.assertTrue(pdf._same_data(panDat, panDat4, epsilon=1e-5))
Exemplo n.º 22
0
        def doTest(headersPresent) :
            tdf = TicDatFactory(**sillyMeSchema())
            ticDat = tdf.TicDat(**sillyMeData())
            schema2 = sillyMeSchema()
            schema2["b"][0] = ("bField2", "bField1", "bField3")
            schema3 = sillyMeSchema()
            schema3["a"][1] = ("aData2", "aData3", "aData1")
            schema4 = sillyMeSchema()
            schema4["a"][1] = ("aData1", "aData3")
            schema5 = sillyMeSchema()
            _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,)
            for t in ("a", "b") :
                schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0])
            schema5["a"][0], schema5["b"][0] = (), []
            schema5b = sillyMeSchema()
            for t in ("a", "b") :
                schema5b[t][1] = _tuple(schema5b[t][0]) + _tuple(schema5b[t][1])
            schema5b["a"][0], schema5b["b"][0] = (), []
            schema6 = sillyMeSchema()
            schema6["d"] = [("dField",),[]]

            tdf2, tdf3, tdf4, tdf5, tdf5b, tdf6 = (TicDatFactory(**x) for x in
                            (schema2, schema3, schema4, schema5, schema5b, schema6))
            tdf5.set_generator_tables(["a", "c"])
            tdf5b.set_generator_tables(("a", "c"))


            dirPath = makeCleanDir(os.path.join(_scratchDir, "silly"))
            tdf.csv.write_directory(ticDat, dirPath, write_header=headersPresent)

            ticDat2 = tdf2.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            (self.assertFalse if headersPresent else self.assertTrue)(tdf._same_data(ticDat, ticDat2))

            ticDat3 = tdf3.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            (self.assertTrue if headersPresent else self.assertFalse)(tdf._same_data(ticDat, ticDat3))

            if headersPresent :
                ticDat4 = tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent)
                for t in ("a", "b") :
                    for k,v in getattr(ticDat4, t).items() :
                        for _k, _v in v.items() :
                            self.assertTrue(getattr(ticDat, t)[k][_k] == _v)
                        if set(v) == set(getattr(ticDat, t)[k]) :
                            self.assertTrue(t == "b")
                        else :
                            self.assertTrue(t == "a")
            else :
                self.assertTrue(self.firesException(lambda :
                                    tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent)))

            ticDat5 = tdf5.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            (self.assertTrue if headersPresent else self.assertFalse)(
                                                    tdf5._same_data(tdf._keyless(ticDat), ticDat5))
            self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b))

            ticDat5b = tdf5b.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            self.assertTrue(tdf5b._same_data(tdf._keyless(ticDat), ticDat5b))
            self.assertTrue(callable(ticDat5b.a) and callable(ticDat5b.c) and not callable(ticDat5b.b))


            ticDat6 = tdf6.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            self.assertTrue(tdf._same_data(ticDat, ticDat6))
            self.assertTrue(firesException(lambda : tdf6._same_data(ticDat, ticDat6)))
            self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d))
            allDataTdf = TicDatFactory(**{t:[[], tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ())]
                             for t in tdf.all_tables})

            def writeData(data):
                td = allDataTdf.TicDat(a = data, b=data, c=data)
                allDataTdf.csv.write_directory(td, dirPath, allow_overwrite=True, write_header=headersPresent)

            writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)])
            ticDatMan = tdf.csv.create_tic_dat(dirPath, headers_present=headersPresent, freeze_it=True)
            self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3)
            self.assertTrue(ticDatMan.b[(1, 20, 30)]["bData"] == 40)
            rowCount = tdf.csv.get_duplicates(dirPath, headers_present= headersPresent)
            self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2)


            writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1,20,30,12)])
            rowCount = tdf.csv.get_duplicates(dirPath, headers_present=headersPresent)
            self.assertTrue(set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==3)
            self.assertTrue(set(rowCount["b"]) == {(1,20,30)} and rowCount["b"][1,20,30]==2)
Exemplo n.º 23
0
    def testSilly(self):
        tdf = TicDatFactory(**sillyMeSchema())
        ticDat = tdf.TicDat(**sillyMeData())
        filePath = os.path.join(_scratchDir, "silly.mdb")
        self.assertTrue(firesException(lambda : tdf.mdb.write_file(ticDat, makeCleanPath(filePath))))
        def sillyMeCleanData() :
            return {
                "a" : {"1" : (1, 2, "3"), "b" : (12, 12.2, "twelve"), "c" : (11, 12, "thirt")},
                "b" : {(1, 2, "3") : 1, (3, 4, "b") : 12},
                "c" : ((1, "2", 3, 4), (0.2, "b", 0.3, 0.4), (1.2, "b", 12, 24) )
            }
        ticDat = tdf.TicDat(**sillyMeCleanData())
        self.assertTrue(firesException(lambda : tdf.mdb.write_file(ticDat, makeCleanPath(filePath))))
        def makeCleanSchema() :
            tdf.mdb.write_schema(makeCleanPath(filePath), a={"aData3" : "text"},
                        b = {"bField1" : "int", "bField2" : "int"}, c={"cData2" : "text"})
            return filePath
        tdf.mdb.write_file(ticDat, makeCleanSchema())
        mdbTicDat = tdf.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, mdbTicDat))

        schema2 = sillyMeSchema()
        schema2["b"][0] = ("bField2", "bField1", "bField3")
        schema3 = sillyMeSchema()
        schema3["a"][1] = ("aData2", "aData3", "aData1")
        schema4 = sillyMeSchema()
        schema4["a"][1] = ("aData1", "aData3")
        schema5 = sillyMeSchema()
        _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,)
        for t in ("a", "b") :
            schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0])
        schema5["a"][0], schema5["b"][0] =  (),  []
        schema6 = sillyMeSchema()
        schema6["d"] =  [["dField"],()]

        tdf2, tdf3, tdf4, tdf5, tdf6 = (TicDatFactory(**x) for x in (schema2, schema3, schema4, schema5, schema6))
        tdf5.set_generator_tables(("a","c"))

        ticDat2 = tdf2.mdb.create_tic_dat(filePath)
        self.assertFalse(tdf._same_data(ticDat, ticDat2))

        ticDat3 = tdf3.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf._same_data(ticDat, ticDat3))

        ticDat4 = tdf4.mdb.create_tic_dat(filePath)
        for t in ["a","b"]:
            for k,v in getattr(ticDat4, t).items() :
                for _k, _v in v.items() :
                    self.assertTrue(getattr(ticDat, t)[k][_k] == _v)
                if set(v) == set(getattr(ticDat, t)[k]) :
                    self.assertTrue(t == "b")
                else :
                    self.assertTrue(t == "a")

        ticDat5 = tdf5.mdb.create_tic_dat(filePath)
        self.assertTrue(tdf5._same_data(tdf._keyless(ticDat), ticDat5))
        self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b))

        self.assertTrue("table d" in self.firesException(lambda  : tdf6.mdb.create_tic_dat(filePath)))

        ticDat.a["theboger"] = (1, None, "twelve")
        tdf.mdb.write_file(ticDat, makeCleanSchema())
        ticDatNone = tdf.mdb.create_tic_dat(filePath, freeze_it=True)
        self.assertTrue(tdf._same_data(ticDat, ticDatNone))
        self.assertTrue(ticDatNone.a["theboger"]["aData2"] == None)
Exemplo n.º 24
0
        def doTest(headersPresent) :
            tdf = TicDatFactory(**sillyMeSchema())
            ticDat = tdf.TicDat(**sillyMeData())
            schema2 = sillyMeSchema()
            schema2["b"][0] = ("bField2", "bField1", "bField3")
            schema3 = sillyMeSchema()
            schema3["a"][1] = ("aData2", "aData3", "aData1")
            schema4 = sillyMeSchema()
            schema4["a"][1] = ("aData1", "aData3")
            schema5 = sillyMeSchema()
            _tuple = lambda x : tuple(x) if utils.containerish(x) else (x,)
            for t in ("a", "b") :
                schema5[t][1] = _tuple(schema5[t][1]) + _tuple(schema5[t][0])
            schema5["a"][0], schema5["b"][0] = (), []
            schema5b = sillyMeSchema()
            for t in ("a", "b") :
                schema5b[t][1] = _tuple(schema5b[t][0]) + _tuple(schema5b[t][1])
            schema5b["a"][0], schema5b["b"][0] = (), []
            schema6 = sillyMeSchema()
            schema6["d"] = [("dField",),[]]

            tdf2, tdf3, tdf4, tdf5, tdf5b, tdf6 = (TicDatFactory(**x) for x in
                            (schema2, schema3, schema4, schema5, schema5b, schema6))
            tdf5.set_generator_tables(["a", "c"])
            tdf5b.set_generator_tables(("a", "c"))


            dirPath = makeCleanDir(os.path.join(_scratchDir, "silly"))
            tdf.csv.write_directory(ticDat, dirPath, write_header=headersPresent)

            ticDat2 = tdf2.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            (self.assertFalse if headersPresent else self.assertTrue)(tdf._same_data(ticDat, ticDat2))

            ticDat3 = tdf3.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            (self.assertTrue if headersPresent else self.assertFalse)(tdf._same_data(ticDat, ticDat3))

            if headersPresent :
                ticDat4 = tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent)
                for t in ("a", "b") :
                    for k,v in getattr(ticDat4, t).items() :
                        for _k, _v in v.items() :
                            self.assertTrue(getattr(ticDat, t)[k][_k] == _v)
                        if set(v) == set(getattr(ticDat, t)[k]) :
                            self.assertTrue(t == "b")
                        else :
                            self.assertTrue(t == "a")
            else :
                self.assertTrue(self.firesException(lambda :
                                    tdf4.csv.create_tic_dat(dirPath, headers_present=headersPresent)))

            ticDat5 = tdf5.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            (self.assertTrue if headersPresent else self.assertFalse)(
                                                    tdf5._same_data(tdf._keyless(ticDat), ticDat5))
            self.assertTrue(callable(ticDat5.a) and callable(ticDat5.c) and not callable(ticDat5.b))

            ticDat5b = tdf5b.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            self.assertTrue(tdf5b._same_data(tdf._keyless(ticDat), ticDat5b))
            self.assertTrue(callable(ticDat5b.a) and callable(ticDat5b.c) and not callable(ticDat5b.b))


            ticDat6 = tdf6.csv.create_tic_dat(dirPath, headers_present=headersPresent)
            self.assertTrue(tdf._same_data(ticDat, ticDat6))
            self.assertTrue(firesException(lambda : tdf6._same_data(ticDat, ticDat6)))
            self.assertTrue(hasattr(ticDat6, "d") and utils.dictish(ticDat6.d))
            allDataTdf = TicDatFactory(**{t:[[], tdf.primary_key_fields.get(t, ()) + tdf.data_fields.get(t, ())]
                             for t in tdf.all_tables})

            def writeData(data):
                td = allDataTdf.TicDat(a = data, b=data, c=data)
                allDataTdf.csv.write_directory(td, dirPath, allow_overwrite=True, write_header=headersPresent)

            writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40)])
            ticDatMan = tdf.csv.create_tic_dat(dirPath, headers_present=headersPresent, freeze_it=True)
            self.assertTrue(len(ticDatMan.a) == 2 and len(ticDatMan.b) == 3)
            self.assertTrue(ticDatMan.b[(1, 20, 30)]["bData"] == 40)
            rowCount = tdf.csv.find_duplicates(dirPath, headers_present= headersPresent)
            self.assertTrue(set(rowCount) == {'a'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==2)


            writeData([(1, 2, 3, 4), (1, 20, 30, 40), (10, 20, 30, 40), (1,20,30,12)])
            rowCount = tdf.csv.find_duplicates(dirPath, headers_present=headersPresent)
            self.assertTrue(set(rowCount) == {'a', 'b'} and set(rowCount["a"]) == {1} and rowCount["a"][1]==3)
            self.assertTrue(set(rowCount["b"]) == {(1,20,30)} and rowCount["b"][1,20,30]==2)