Exemplo n.º 1
0
    def testPerformBinning(self):
        cubeSetName = "test-" + str(uuid.uuid4())
        csvFilePath = cubeSetName + ".csv"
        try:
            shutil.copyfile("cubify/tests/testdata.csv", cubeSetName + ".csv")
        except Exception:
            shutil.copyfile("./testdata.csv", cubeSetName + ".csv")
        binningFileName = "cubify/tests/test_binnings.json"
        if os.path.isfile(binningFileName) == False:
            binningFileName = "./test_binnings.json"
        with open(binningFileName) as binnings_file:
            binnings = json.load(binnings_file)

        cs = CubeSetService("testdb")
        cubeSet = cs.createCubeSet("testOwner", cubeSetName, csvFilePath, binnings, None)

        # Change the binning
        for binning in binnings:
            if binning["binningName"] == "QtyBinning":
                bins = []
                bins.append({"label": "0-3", "min": 0, "max": 3})
                bins.append({"label": "3+", "min": 4, "max": 99999999})
                binning["bins"] = bins

        cs.performBinning(cubeSet, binnings)

        binnedCubeRows = cs.getBinnedCubeRows(cubeSet)
        dimkeys = []
        for binnedCubeRow in binnedCubeRows:
            dimkeys.append(binnedCubeRow["dimensionKey"])
        dimkeys.sort()

        self.assertTrue(
            dimkeys[0]
            == "#CustomerId:C1#PriceBin:0-5#ProductId:P1#QtyBin:0-3#Region:West#State:CA#Year:Year2014#Date:2014-10-11"
        )
        self.assertTrue(
            dimkeys[1]
            == "#CustomerId:C1#PriceBin:0-5#ProductId:P1#QtyBin:0-3#Region:West#State:CA#Year:Year2015#Date:2015-10-11"
        )
        self.assertTrue(
            dimkeys[2]
            == "#CustomerId:C1#PriceBin:10+#ProductId:P1#QtyBin:0-3#Region:West#State:CA#Year:Year2014#Date:2014-10-10"
        )
        self.assertTrue(
            dimkeys[3]
            == "#CustomerId:C1#PriceBin:10+#ProductId:P1#QtyBin:0-3#Region:West#State:CA#Year:Year2015#Date:2015-10-10"
        )
        self.assertTrue(
            dimkeys[4]
            == "#CustomerId:C1#PriceBin:10+#ProductId:P2#QtyBin:0-3#Region:West#State:CA#Year:Year2014#Date:2014-10-11"
        )
        self.assertTrue(
            dimkeys[5]
            == "#CustomerId:C1#PriceBin:10+#ProductId:P2#QtyBin:0-3#Region:West#State:CA#Year:Year2015#Date:2015-10-11"
        )
        self.assertTrue(
            dimkeys[6]
            == "#CustomerId:C2#PriceBin:0-5#ProductId:P1#QtyBin:0-3#Region:NorthEast#State:NY#Year:Year2014#Date:2014-10-11"
        )
        self.assertTrue(
            dimkeys[7]
            == "#CustomerId:C2#PriceBin:0-5#ProductId:P1#QtyBin:0-3#Region:NorthEast#State:NY#Year:Year2015#Date:2015-10-11"
        )
        self.assertTrue(
            dimkeys[8]
            == "#CustomerId:C2#PriceBin:10+#ProductId:P1#QtyBin:0-3#Region:NorthEast#State:NY#Year:Year2014#Date:2014-10-10"
        )
        self.assertTrue(
            dimkeys[9]
            == "#CustomerId:C2#PriceBin:10+#ProductId:P1#QtyBin:0-3#Region:NorthEast#State:NY#Year:Year2015#Date:2015-10-10"
        )
        self.assertTrue(
            dimkeys[10]
            == "#CustomerId:C2#PriceBin:5-10#ProductId:P2#QtyBin:3+#Region:NorthEast#State:NY#Year:Year2014#Date:2014-10-10"
        )
        self.assertTrue(
            dimkeys[11]
            == "#CustomerId:C2#PriceBin:5-10#ProductId:P2#QtyBin:3+#Region:NorthEast#State:NY#Year:Year2015#Date:2015-10-10"
        )
        self.assertTrue(
            dimkeys[12]
            == "#CustomerId:C3#PriceBin:10+#ProductId:P1#QtyBin:3+#Region:NorthEast#State:MA#Year:Year2014#Date:2014-10-11"
        )
        self.assertTrue(
            dimkeys[13]
            == "#CustomerId:C3#PriceBin:10+#ProductId:P1#QtyBin:3+#Region:NorthEast#State:MA#Year:Year2015#Date:2015-10-11"
        )

        os.remove(cubeSetName + ".csv")
Exemplo n.º 2
0
    def testAddRowsToSourceCube(self):

        cubeSetName = "test-" + str(uuid.uuid4())
        csvFilePath = cubeSetName + ".csv"
        try:
            shutil.copyfile("cubify/tests/testdata.csv", cubeSetName + ".csv")
        except Exception:
            shutil.copyfile("./testdata.csv", cubeSetName + ".csv")
        binningFileName = "cubify/tests/test_binnings.json"
        if os.path.isfile(binningFileName) == False:
            binningFileName = "./test_binnings.json"
        with open(binningFileName) as binnings_file:
            binnings = json.load(binnings_file)
        aggFileName = "cubify/tests/test_agg.json"
        if os.path.isfile(aggFileName) == False:
            aggFileName = "./test_agg.json"
        with open(aggFileName) as agg_file:
            aggs = json.load(agg_file)

        cs = CubeSetService("testdb")
        cubeSet = cs.createCubeSet("testOwner", cubeSetName, csvFilePath, binnings, aggs)

        incFileName = "cubify/tests/testdataIncremental.csv"
        if os.path.isfile(incFileName) == False:
            incFileName = "./testdataIncremental.csv"
        cs.addRowsToSourceCube(cubeSet, incFileName)

        sourceCubeRows = cs.getSourceCubeRows(cubeSet)
        self.assertTrue(sourceCubeRows.count() == 21)

        binnedCubeRows = cs.getBinnedCubeRows(cubeSet)
        dimkeys = []
        for binnedCubeRow in binnedCubeRows:
            dimkeys.append(binnedCubeRow["dimensionKey"])
        dimkeys.sort()

        self.assertTrue(
            dimkeys[0]
            == "#CustomerId:C1#PriceBin:0-5#ProductId:P1#QtyBin:0-5#Region:West#State:CA#Year:Year2014#Date:2014-10-11"
        )
        self.assertTrue(
            dimkeys[1]
            == "#CustomerId:C1#PriceBin:0-5#ProductId:P1#QtyBin:0-5#Region:West#State:CA#Year:Year2015#Date:2015-10-11"
        )
        self.assertTrue(
            dimkeys[2]
            == "#CustomerId:C1#PriceBin:0-5#ProductId:P1#QtyBin:0-5#Region:West#State:CA#Year:Year2016#Date:2016-10-11"
        )
        self.assertTrue(
            dimkeys[3]
            == "#CustomerId:C1#PriceBin:10+#ProductId:P1#QtyBin:0-5#Region:West#State:CA#Year:Year2014#Date:2014-10-10"
        )
        self.assertTrue(
            dimkeys[4]
            == "#CustomerId:C1#PriceBin:10+#ProductId:P1#QtyBin:0-5#Region:West#State:CA#Year:Year2015#Date:2015-10-10"
        )
        self.assertTrue(
            dimkeys[5]
            == "#CustomerId:C1#PriceBin:10+#ProductId:P1#QtyBin:0-5#Region:West#State:CA#Year:Year2016#Date:2016-10-10"
        )
        self.assertTrue(
            dimkeys[6]
            == "#CustomerId:C1#PriceBin:10+#ProductId:P2#QtyBin:0-5#Region:West#State:CA#Year:Year2014#Date:2014-10-11"
        )
        self.assertTrue(
            dimkeys[7]
            == "#CustomerId:C1#PriceBin:10+#ProductId:P2#QtyBin:0-5#Region:West#State:CA#Year:Year2015#Date:2015-10-11"
        )
        self.assertTrue(
            dimkeys[8]
            == "#CustomerId:C1#PriceBin:10+#ProductId:P2#QtyBin:0-5#Region:West#State:CA#Year:Year2016#Date:2016-10-11"
        )
        self.assertTrue(
            dimkeys[9]
            == "#CustomerId:C2#PriceBin:0-5#ProductId:P1#QtyBin:0-5#Region:NorthEast#State:NY#Year:Year2014#Date:2014-10-11"
        )
        self.assertTrue(
            dimkeys[10]
            == "#CustomerId:C2#PriceBin:0-5#ProductId:P1#QtyBin:0-5#Region:NorthEast#State:NY#Year:Year2015#Date:2015-10-11"
        )
        self.assertTrue(
            dimkeys[11]
            == "#CustomerId:C2#PriceBin:0-5#ProductId:P1#QtyBin:0-5#Region:NorthEast#State:NY#Year:Year2016#Date:2016-10-11"
        )
        self.assertTrue(
            dimkeys[12]
            == "#CustomerId:C2#PriceBin:10+#ProductId:P1#QtyBin:0-5#Region:NorthEast#State:NY#Year:Year2014#Date:2014-10-10"
        )
        self.assertTrue(
            dimkeys[13]
            == "#CustomerId:C2#PriceBin:10+#ProductId:P1#QtyBin:0-5#Region:NorthEast#State:NY#Year:Year2015#Date:2015-10-10"
        )
        self.assertTrue(
            dimkeys[14]
            == "#CustomerId:C2#PriceBin:10+#ProductId:P1#QtyBin:0-5#Region:NorthEast#State:NY#Year:Year2016#Date:2016-10-10"
        )
        self.assertTrue(
            dimkeys[15]
            == "#CustomerId:C2#PriceBin:5-10#ProductId:P2#QtyBin:0-5#Region:NorthEast#State:NY#Year:Year2014#Date:2014-10-10"
        )
        self.assertTrue(
            dimkeys[16]
            == "#CustomerId:C2#PriceBin:5-10#ProductId:P2#QtyBin:0-5#Region:NorthEast#State:NY#Year:Year2015#Date:2015-10-10"
        )
        self.assertTrue(
            dimkeys[17]
            == "#CustomerId:C2#PriceBin:5-10#ProductId:P2#QtyBin:0-5#Region:NorthEast#State:NY#Year:Year2016#Date:2016-10-10"
        )
        self.assertTrue(
            dimkeys[18]
            == "#CustomerId:C3#PriceBin:10+#ProductId:P1#QtyBin:5+#Region:NorthEast#State:MA#Year:Year2014#Date:2014-10-11"
        )
        self.assertTrue(
            dimkeys[19]
            == "#CustomerId:C3#PriceBin:10+#ProductId:P1#QtyBin:5+#Region:NorthEast#State:MA#Year:Year2015#Date:2015-10-11"
        )
        self.assertTrue(
            dimkeys[20]
            == "#CustomerId:C3#PriceBin:10+#ProductId:P1#QtyBin:5+#Region:NorthEast#State:MA#Year:Year2016#Date:2016-10-11"
        )

        agg = aggs[0]
        aggCubeRows = cs.getAggregatedCubeRows(cubeSet, agg["name"])
        self.assertTrue(aggCubeRows.count(False) == 4)
        for aggCubeRow in aggCubeRows:
            self.assertTrue(len(aggCubeRow["dimensions"]) == 2)
            print aggCubeRow

        print "---------"

        agg = aggs[1]
        aggCubeRows = cs.getAggregatedCubeRows(cubeSet, agg["name"])
        self.assertTrue(aggCubeRows.count(False) == 2)
        for aggCubeRow in aggCubeRows:
            self.assertTrue(len(aggCubeRow["dimensions"]) == 1)
            print aggCubeRow

        print "---------"

        agg = aggs[2]
        aggCubeRows = cs.getAggregatedCubeRows(cubeSet, agg["name"])
        self.assertTrue(aggCubeRows.count(False) == 2)
        for aggCubeRow in aggCubeRows:
            self.assertTrue(len(aggCubeRow["dimensions"]) == 1)
            print aggCubeRow

        print "---------"

        os.remove(cubeSetName + ".csv")
Exemplo n.º 3
0
    def testGetBinnedCubeRows(self):
        cubeSetName = "test-" + str(uuid.uuid4())
        csvFilePath = cubeSetName + ".csv"
        try:
            shutil.copyfile("cubify/tests/testdata.csv", cubeSetName + ".csv")
        except Exception:
            shutil.copyfile("./testdata.csv", cubeSetName + ".csv")
        binningFileName = "cubify/tests/test_binnings.json"
        if os.path.isfile(binningFileName) == False:
            binningFileName = "./test_binnings.json"
        with open(binningFileName) as binnings_file:
            binnings = json.load(binnings_file)
        aggFileName = "cubify/tests/test_agg.json"
        if os.path.isfile(aggFileName) == False:
            aggFileName = "./test_agg.json"
        with open(aggFileName) as agg_file:
            aggs = json.load(agg_file)

        cs = CubeSetService("testdb")
        cubeSet = cs.createCubeSet("testOwner", cubeSetName, csvFilePath, binnings, aggs)
        binnedCubeRows = cs.getBinnedCubeRows(cubeSet)

        dimkeys = []
        for binnedCubeRow in binnedCubeRows:
            dimkeys.append(binnedCubeRow["dimensionKey"])
        dimkeys.sort()

        self.assertTrue(
            dimkeys[0]
            == "#CustomerId:C1#PriceBin:0-5#ProductId:P1#QtyBin:0-5#Region:West#State:CA#Year:Year2014#Date:2014-10-11"
        )
        self.assertTrue(
            dimkeys[1]
            == "#CustomerId:C1#PriceBin:0-5#ProductId:P1#QtyBin:0-5#Region:West#State:CA#Year:Year2015#Date:2015-10-11"
        )
        self.assertTrue(
            dimkeys[2]
            == "#CustomerId:C1#PriceBin:10+#ProductId:P1#QtyBin:0-5#Region:West#State:CA#Year:Year2014#Date:2014-10-10"
        )
        self.assertTrue(
            dimkeys[3]
            == "#CustomerId:C1#PriceBin:10+#ProductId:P1#QtyBin:0-5#Region:West#State:CA#Year:Year2015#Date:2015-10-10"
        )
        self.assertTrue(
            dimkeys[4]
            == "#CustomerId:C1#PriceBin:10+#ProductId:P2#QtyBin:0-5#Region:West#State:CA#Year:Year2014#Date:2014-10-11"
        )
        self.assertTrue(
            dimkeys[5]
            == "#CustomerId:C1#PriceBin:10+#ProductId:P2#QtyBin:0-5#Region:West#State:CA#Year:Year2015#Date:2015-10-11"
        )
        self.assertTrue(
            dimkeys[6]
            == "#CustomerId:C2#PriceBin:0-5#ProductId:P1#QtyBin:0-5#Region:NorthEast#State:NY#Year:Year2014#Date:2014-10-11"
        )
        self.assertTrue(
            dimkeys[7]
            == "#CustomerId:C2#PriceBin:0-5#ProductId:P1#QtyBin:0-5#Region:NorthEast#State:NY#Year:Year2015#Date:2015-10-11"
        )
        self.assertTrue(
            dimkeys[8]
            == "#CustomerId:C2#PriceBin:10+#ProductId:P1#QtyBin:0-5#Region:NorthEast#State:NY#Year:Year2014#Date:2014-10-10"
        )
        self.assertTrue(
            dimkeys[9]
            == "#CustomerId:C2#PriceBin:10+#ProductId:P1#QtyBin:0-5#Region:NorthEast#State:NY#Year:Year2015#Date:2015-10-10"
        )
        self.assertTrue(
            dimkeys[10]
            == "#CustomerId:C2#PriceBin:5-10#ProductId:P2#QtyBin:0-5#Region:NorthEast#State:NY#Year:Year2014#Date:2014-10-10"
        )
        self.assertTrue(
            dimkeys[11]
            == "#CustomerId:C2#PriceBin:5-10#ProductId:P2#QtyBin:0-5#Region:NorthEast#State:NY#Year:Year2015#Date:2015-10-10"
        )
        self.assertTrue(
            dimkeys[12]
            == "#CustomerId:C3#PriceBin:10+#ProductId:P1#QtyBin:5+#Region:NorthEast#State:MA#Year:Year2014#Date:2014-10-11"
        )
        self.assertTrue(
            dimkeys[13]
            == "#CustomerId:C3#PriceBin:10+#ProductId:P1#QtyBin:5+#Region:NorthEast#State:MA#Year:Year2015#Date:2015-10-11"
        )

        os.remove(csvFilePath)