def testGetStats(self): cubeName = 'test-' + str(uuid.uuid4()) shutil.copyfile('testdata.csv', cubeName + '.csv') cs = CubeService('testdb') cs.createCubeFromCsv(cubeName + '.csv', cubeName, cubeName) cubeCells = cs.getCubeCells(cubeName) stats = cs.getStats(cubeCells) for stat in stats: self.assertTrue(stat in ['Price', 'Qty']) if stat == 'Price': self.assertTrue(math.ceil(stats[stat]['std'] * 100) / 100 == 6.42) self.assertTrue(stats[stat]['min'] == 1.5) self.assertTrue(stats[stat]['max'] == 20.5) self.assertTrue(stats[stat]['median'] == 12.5) self.assertTrue(stats[stat]['total'] == 164.0) self.assertTrue(math.ceil(stats[stat]['mean'] * 100) / 100 == 11.72) elif stat == 'Qty': self.assertTrue(math.ceil(stats[stat]['std'] * 100) / 100 == 1.81) self.assertTrue(stats[stat]['min'] == 1.0) self.assertTrue(stats[stat]['max'] == 7.0) self.assertTrue(stats[stat]['median'] == 3.0) self.assertTrue(stats[stat]['total'] == 44.0) self.assertTrue(math.ceil(stats[stat]['mean'] * 100) / 100 == 3.15) os.remove(cubeName + '.csv')
def testBinning(self): cubeName = 'test-' + str(uuid.uuid4()) shutil.copyfile('testdata.csv', cubeName + '.csv') cs = CubeService('testdb') cs.createCubeFromCsv(cubeName + '.csv', cubeName, cubeName) with open('test_binnings.json') as binnings_file: binnings = json.load(binnings_file) cs.binCube(binnings, cubeName, cubeName + '_b', cubeName + '_b') binnedCubeCells = cs.getCubeCells(cubeName + '_b') dimkeys = [] for binnedCubeCell in binnedCubeCells: dimkeys.append(binnedCubeCell['dimensionKey']) dimkeys.sort() self.assertTrue(dimkeys[0] == '#CustomerId:C1#PriceBin:0-5#ProductId:P1#QtyBin:0-5#Region:West#State:CA#Year:Year2014#Date:2014-10-11') self.assertTrue(dimkeys[1] == '#CustomerId:C1#PriceBin:0-5#ProductId:P1#QtyBin:0-5#Region:West#State:CA#Year:Year2015#Date:2015-10-11') self.assertTrue(dimkeys[2] == '#CustomerId:C1#PriceBin:10+#ProductId:P1#QtyBin:0-5#Region:West#State:CA#Year:Year2014#Date:2014-10-10') self.assertTrue(dimkeys[3] == '#CustomerId:C1#PriceBin:10+#ProductId:P1#QtyBin:0-5#Region:West#State:CA#Year:Year2015#Date:2015-10-10') self.assertTrue(dimkeys[4] == '#CustomerId:C1#PriceBin:10+#ProductId:P2#QtyBin:0-5#Region:West#State:CA#Year:Year2014#Date:2014-10-11') self.assertTrue(dimkeys[5] == '#CustomerId:C1#PriceBin:10+#ProductId:P2#QtyBin:0-5#Region:West#State:CA#Year:Year2015#Date:2015-10-11') self.assertTrue(dimkeys[6] == '#CustomerId:C2#PriceBin:0-5#ProductId:P1#QtyBin:0-5#Region:NorthEast#State:NY#Year:Year2014#Date:2014-10-11') self.assertTrue(dimkeys[7] == '#CustomerId:C2#PriceBin:0-5#ProductId:P1#QtyBin:0-5#Region:NorthEast#State:NY#Year:Year2015#Date:2015-10-11') self.assertTrue(dimkeys[8] == '#CustomerId:C2#PriceBin:10+#ProductId:P1#QtyBin:0-5#Region:NorthEast#State:NY#Year:Year2014#Date:2014-10-10') self.assertTrue(dimkeys[9] == '#CustomerId:C2#PriceBin:10+#ProductId:P1#QtyBin:0-5#Region:NorthEast#State:NY#Year:Year2015#Date:2015-10-10') self.assertTrue(dimkeys[10] == '#CustomerId:C2#PriceBin:5-10#ProductId:P2#QtyBin:0-5#Region:NorthEast#State:NY#Year:Year2014#Date:2014-10-10') self.assertTrue(dimkeys[11] == '#CustomerId:C2#PriceBin:5-10#ProductId:P2#QtyBin:0-5#Region:NorthEast#State:NY#Year:Year2015#Date:2015-10-10') self.assertTrue(dimkeys[12] == '#CustomerId:C3#PriceBin:10+#ProductId:P1#QtyBin:5+#Region:NorthEast#State:MA#Year:Year2014#Date:2014-10-11') self.assertTrue(dimkeys[13] == '#CustomerId:C3#PriceBin:10+#ProductId:P1#QtyBin:5+#Region:NorthEast#State:MA#Year:Year2015#Date:2015-10-11') os.remove(cubeName + '.csv')
def testAggregation(self): cubeName = 'test-' + str(uuid.uuid4()) shutil.copyfile('testdata.csv', cubeName + '.csv') cs = CubeService('testdb') cs.createCubeFromCsv(cubeName + '.csv',cubeName, cubeName) with open('test_binnings.json') as binnings_file: binnings = json.load(binnings_file) cs.binCube(binnings, cubeName, cubeName + '_b', cubeName + '_b') aggs = [] with open('test_agg.json') as agg_file: aggs = json.load(agg_file) cs.aggregateCube(cubeName + '_b', aggs) aggCubeCells = cs.getCubeCells(cubeName + '_b_agg1') self.assertTrue (aggCubeCells.count() == 4) for aggCubeCell in aggCubeCells: self.assertTrue(len(aggCubeCell['dimensions']) == 2) print aggCubeCell print '---------' aggCubeCells = cs.getCubeCells(cubeName + '_b_agg2') self.assertTrue (aggCubeCells.count() == 2) print aggCubeCells.count() for aggCubeCell in aggCubeCells: self.assertTrue(len(aggCubeCell['dimensions']) == 1) print aggCubeCell print '---------' aggCubeCells = cs.getCubeCells(cubeName + '_b_agg3') self.assertTrue (aggCubeCells.count() == 2) print aggCubeCells.count() for aggCubeCell in aggCubeCells: self.assertTrue(len(aggCubeCell['dimensions']) == 1) print aggCubeCell print '---------' os.remove(cubeName + '.csv')
class CubeSetService: def __init__(self, dbName): self.dbName = dbName client = MongoClient() self.db = client[dbName] self.cubeService = CubeService(dbName) # # Update an arbitrary field in cubeset # def __updateCubeSetProperty__(self, cubeSetName, update): self.db['cubeset'].update_one({ "name" : cubeSetName}, update) def getCubeSet(self, cubeSetName): return self.db['cubeset'].find_one({ "name": cubeSetName}) def createCubeSet(self, owner, cubeSetName, cubeSetDisplayName, csvFileName, binnings, aggs): # Make sure cubeSetName is unique existing = self.getCubeSet(cubeSetName) if existing != None: raise ValueError('Cube Set with ' + cubeSetName + ' already exists') sourceCubeName = cubeSetName + "_source" self.cubeService.createCubeFromCsv(csvFileName, sourceCubeName, sourceCubeName) if binnings != None: binnedCubeName = cubeSetName + "_binned" self.cubeService.binCube(binnings, sourceCubeName, binnedCubeName, binnedCubeName) if aggs != None: self.cubeService.aggregateCube(binnedCubeName, aggs) # Now save the cubeSet cubeSet = {} cubeSet['name'] = cubeSetName cubeSet['displayName'] = cubeSetDisplayName cubeSet['owner'] = owner cubeSet['csvFileName'] = csvFileName cubeSet['createdOn'] = datetime.utcnow() cubeSet['sourceCube'] = sourceCubeName if binnings != None: cubeSet['binnedCube'] = binnedCubeName if aggs != None: aggCubeNames = [] for agg in aggs: aggCubeNames.append(binnedCubeName + "_" + agg['name']) cubeSet['aggCubes'] = aggCubeNames self.db['cubeset'].insert_one(cubeSet); # # Add cells to source cube # def addCellsToSourceCube(self, cubeSetName, csvFileName): existing = self.getCubeSet(cubeSetName) if existing == None: raise ValueError('Cube Set with ' + cubeSetName + ' does not exist') self.cubeService.appendToCubeFromCsv(csvFileName, existing['sourceCube']) #re-bin if 'binnedCube' in existing: binnedCubeName = existing['binnedCube'] binnedCube = self.cubeService.getCube(binnedCubeName) self.cubeService.rebinCube(binnedCube['binnings'], existing['sourceCube'], existing['binnedCube']) #re-aggregate if 'aggCubes' in existing: for aggCubeName in existing['aggCubes']: aggCube = self.cubeService.getCube(aggCubeName) aggs = [] aggs.append(aggCube['agg']) self.cubeService.aggregateCube(binnedCubeName, aggs) # # Remove cells from source # def removeCellsFromSourceCube(self, cubeSetName, filter): existing = self.getCubeSet(cubeSetName) if existing == None: raise ValueError('Cube Set with ' + cubeSetName + ' does not exist') self.cubeService.deleteCubeCells(existing['sourceCube'], filter) #re-bin if 'binnedCube' in existing: binnedCubeName = existing['binnedCube'] binnedCube = self.cubeService.getCube(binnedCubeName) self.cubeService.rebinCube(binnedCube['binnings'], existing['sourceCube'], existing['binnedCube']) #re-aggregate if 'aggCubes' in existing: for aggCubeName in existing['aggCubes']: aggCube = self.cubeService.getCube(aggCubeName) aggs = [] aggs.append(aggCube['agg']) self.cubeService.aggregateCube(binnedCubeName, aggs) # # Update cubeset display name # def updateCubeSetDisplayName(self, cubeSetName, displayName): self.__updateCubeSetProperty__(cubeSetName, { "$set": {"displayName" : displayName}}) def deleteCubeSet(self, cubeSetName): existing = self.getCubeSet(cubeSetName) if existing == None: raise ValueError('Cube Set with ' + cubeSetName + ' does not exist') # Delete source, binned and agg cubes self.cubeService.deleteCube(existing['sourceCube']) if 'binnedCube' in existing: self.cubeService.deleteCube(existing['binnedCube']) if 'aggCubes' in existing: for aggCube in existing['aggCubes']: self.cubeService.deleteCube(aggCube) # Delete cubeset self.db['cubeset'].remove({ "name": cubeSetName }) def getSourceCubeCells(self, cubeSetName): existing = self.getCubeSet(cubeSetName) if existing == None: raise ValueError('Cube Set with ' + cubeSetName + ' does not exist') return self.cubeService.getCubeCells(existing['sourceCube']) def getBinnedCubeCells(self, cubeSetName): existing = self.getCubeSet(cubeSetName) if existing == None: raise ValueError('Cube Set with ' + cubeSetName + ' does not exist') if existing['binnedCube'] != None: return self.cubeService.getCubeCells(existing['binnedCube']) else: return None def getAggregatedCubeCells(self, cubeSetName, aggName): existing = self.getCubeSet(cubeSetName) if existing == None: raise ValueError('Cube Set with ' + cubeSetName + ' does not exist') if 'aggCubes' in existing: for aggCube in existing['aggCubes']: if existing['binnedCube'] + "_" + aggName == aggCube: return self.cubeService.getCubeCells(aggCube) return None def performBinning(self, cubeSetName, binnings): existing = self.getCubeSet(cubeSetName) if existing == None: raise ValueError('Cube Set with ' + cubeSetName + ' does not exist') # Are we rebinning? if existing['binnedCube'] != None: self.cubeService.rebinCube(binnings, existing['sourceCube'], existing['binnedCube']) else: binnedCubeName = cubeSetName + "_binned" self.cubeService.binCube(binnings, binnedCubeName, binnedCubeName) self.__updateCubeSetProperty__(cubeSetName, { "$set": {"binnedCube" : binnedCubeName}}) def performAggregation(self, cubeSetName, aggs): existing = self.getCubeSet(cubeSetName) if existing == None: raise ValueError('Cube Set with ' + cubeSetName + ' does not exist') binnedCubeName = existing['binnedCube'] self.cubeService.aggregateCube(binnedCubeName, aggs) aggCubeNames = [] for agg in aggs: aggCubeNames.append(binnedCubeName + "_" + agg['name']) self.__updateCubeSetProperty__(cubeSetName, { "$set": {"aggCubes" : aggCubeNames}})