def initialize_model(): global model global workspace model = cubes.load_model(MODEL_PATH) workspace = cubes.create_workspace("sql", model, url=DB_URL, fact_prefix="ft_", dimension_prefix="dm_")
def initialize_model(): global workspace global model model = cubes.load_model(MODEL_PATH) workspace = cubes.create_workspace("sql.star", model, url=DB_URL, fact_prefix="ft_")
def initialize_model(): global workspace global model model = cubes.load_model(MODEL_PATH) workspace = cubes.create_workspace("sql.browser", model, url=DB_URL, view_prefix="vft_")
def from_cubes(project_id): cnt = Counter() print "-------------- FROM CUBE ------------- " + project_id model = cubes.load_model("../../cida_model.json") ws = cubes.create_workspace("sql", model, url="postgres://localhost/crs") cube = model.cube("cida") browser = ws.browser(cube) cell = cubes.Cell(cube) cut = cubes.PointCut("project_number", [project_id]) cut = cubes.PointCut("project_number", [project_id]) cell2 = cell.slice(cut) result = browser.aggregate(cell2, drilldown=["fiscal_year", "country_region_name", "id"]) for i, c in enumerate(result.cells): cnt[sum] += c["amount_spent_sum"] # print i,c['id'],project_id, c['fiscal_year'],c['country_region_name'],c['amount_spent_sum'] print cnt[sum]
def test_cubes(self): """ Testing that each cube can be instantiated Testing that facts in each cube can be called This will insure that you can aggregate and get raw data from each of the cubes in the model used by the datawarehouse. This should work when you change schema and add when you add cubes. """ filename = 'datawarehouse/fixtures/datawarehouse.json' workspace = cubes.create_workspace('sql',dwmodel,engine=engine,fact_prefix=fact_prefix,dimension_prefix=dimension_prefix) for c_key,c_name in dwmodel.cubes.iteritems(): cube = dwmodel.cube(c_name) self.assertIsInstance(cube,cubes.model.Cube) try: # If mapping is explicitly defined, grab the explicit mapping for key cube.key = cube.mappings['id'] except (TypeError,KeyError): # Otherwise either no mappings are listed or a mapping for 'id' does not exist pass try: browser = workspace.browser(cube) except Exception as error: print "Cube: " + c_key + " Failed" print error assert False self.assertIsInstance(browser,cubes.backends.sql.star.SnowflakeBrowser) cell = cubes.Cell(cube) try: facts = browser.facts(cell) except Exception as error: print "Cube: " + c_key + " Failed" print error assert False assert True print "Cube: " + c_key + " Passed"
out += row_string + "\n" out += "</table></body></html>" with open("table.html", "w") as f: f.write(out) ######################################################################## # Create browsing context model = cubes.load_model("vvo_model.json") cube = model.cube("contracts") workspace = cubes.create_workspace("sql", model, url="sqlite:///vvo_data.sqlite", dimension_prefix="dm_", fact_prefix="ft_") browser = workspace.browser(cube) cell = cubes.Cell(cube) ######################################################################## # Create cross-table (written to table.html in current directory) result = browser.aggregate(cell, drilldown=["process_type", "date"]) rows = ["process_type.description"] columns = ["date.year"] table = result.cross_table(rows, columns, ["contract_amount_sum"]) print_cross_table(rows, columns, table)
import cubes # 1. Prepare SQL data in memory model = cubes.load_model("model.json") workspace = cubes.create_workspace("sql.star", model, url="sqlite:///data.sqlite") # 2. Create a model cube = model.cube("irbd_balance") # 3. Create a browser and get a cell representing the whole cube (all data) browser = workspace.browser(cube) cell = browser.full_cube() # 4. Play with aggregates result = browser.aggregate(cell) print "Total\n" "----------------------" print "Record count: %8d" % result.summary["record_count"] print "Total amount: %8d" % result.summary["amount_sum"] # # The End! # # ... of the Hello World! example # # The following is more than just plain "hello"... uncomment it all to the end. # # # 5. Drill-down through a dimension #
import cubes # 1. Prepare SQL data in memory model = cubes.load_model("model.json") workspace = cubes.create_workspace("sql", model, url='sqlite:///data.sqlite') # 2. Create a model cube = model.cube("irbd_balance") # 3. Create a browser and get a cell representing the whole cube (all data) browser = workspace.browser(cube) cell = browser.full_cube() # 4. Play with aggregates result = browser.aggregate(cell) print "Total\n" \ "----------------------" print "Record count: %8d" % result.summary["record_count"] print "Total amount: %8d" % result.summary["amount_sum"] # # The End! # # ... of the Hello World! example # # The following is more than just plain "hello"... uncomment it all to the end. # #
("category", "string"), ("subcategory", "string"), ("line_item", "string"), ("year", "integer"), ("amount", "integer")], create_id=True ) # 2. Load model and get cube of our interest model = cubes.load_model("models/model_02.json") cube = model.cube("irbd_balance") # 3. Create a browser workspace = cubes.create_workspace("sql.star", model, engine=engine) browser = workspace.browser(cube) # 4. get a cell representing the whole cube (all data) cell = browser.full_cube() result = browser.aggregate(cell) print "Record count: %d" % result.summary["record_count"] print "Total amount: %d" % result.summary["amount_sum"] print "Drill Down by Category - top level:" result = browser.aggregate(cell, drilldown=["item"]) print "%-20s%10s%10s" % ("Category", "Count", "Total")
table_name=FACT_TABLE, fields=[("category", "string"), ("subcategory", "string"), ("line_item", "string"), ("year", "integer"), ("amount", "integer")], create_id=True) # 2. Load model and get cube of our interest model = cubes.load_model("models/model_02.json") cube = model.cube("irbd_balance") # 3. Create a browser workspace = cubes.create_workspace("sql.star", model, engine=engine) browser = workspace.browser(cube) # 4. get a cell representing the whole cube (all data) cell = browser.full_cube() result = browser.aggregate(cell) print "Record count: %d" % result.summary["record_count"] print "Total amount: %d" % result.summary["amount_sum"] print "Drill Down by Category - top level:" result = browser.aggregate(cell, drilldown=["item"]) print "%-20s%10s%10s" % ("Category", "Count", "Total")
A face is something that is measurable, like a contract. A dimension is the context for the fact, location, time, type Who signed the contract, how much was spent on the construction work, and where the transaction happen. Also, cubes supports hierchies. So you can define levels for each dimension, eg. year month, day, or continent, continent, city Levels and attributes Hierarchy Key Attributes Label Attributes "Multi-dimensional breadcrumbs" ''' import cubes model = cubes.load_model("cida_model.json") ws = cubes.create_workspace("sql",model,url="sqlite:///cida_norm.db") cube = model.cube("projects") browser = ws.browser(cube) result = browser.aggregate(drilldown=["project_number"]) print result.summary print "---------------------" for c in result.cells: print c
def initialize_model(): global workspace global model model = cubes.load_model(MODEL_PATH) workspace = cubes.create_workspace("sql", model, url=DB_URL)
import cubes # 1. Prepare SQL data in memory model = cubes.load_model("model.json") workspace = cubes.create_workspace("sql", model, url='sqlite:///data.sqlite') # 2. Create a model cube = model.cube("irbd_balance") # 3. Create a browser and get a cell representing the whole cube (all data) browser = workspace.browser(cube) cell = cubes.Cell(cube) # 4. Play with aggregates result = browser.aggregate(cell) print "Total\n" \ "----------------------" print "Record count: %8d" % result.summary["record_count"] print "Total amount: %8d" % result.summary["amount_sum"] # # The End! # # ... of the Hello World! example # # The following is more than just plain "hello"... uncomment it all to the end. # #
import cubes # 1. Prepare SQL data in memory model = cubes.load_model("model.json") workspace = cubes.create_workspace("sql", model, url='sqlite:///test.db') # 2. Create a model cube = model.cube("datatest") # 3. Create a browser and get a cell representing the whole cube (all data) browser = workspace.browser(cube) cell = cubes.Cell(cube) # 4. Play with aggregates result = browser.aggregate(cell) print "Total\n" \ "----------------------" print "Record count: %8d" % result.summary["record_count"] # # The End! # # ... of the Hello World! example # # The following is more than just plain "hello"... uncomment it all to the end. # # # 5. Drill-down through a dimension
import cubes model = cubes.load_model("vvo_model.json") cube = model.cube("contracts") workspace = cubes.create_workspace("sql", model, url="sqlite:///vvo_data.sqlite", dimension_prefix="dm_", fact_prefix="ft_") # workspace = cubes.create_workspace("sql", model, url="postgres://localhost/ep2012", # schema="vvo", # dimension_prefix="dm_", # fact_prefix="ft_", # denormalized_view_schema="views", # use_denormalization=False, # denormalized_view_prefix="mft_") def drilldown(cell, dimension): """Drill-down and aggregate recursively through als levels of `dimension`. This function is like recursively traversing directories on a file system and aggregating the file sizes, for example. * `cell` - cube cell to drill-down * `dimension` - dimension to be traversed through all levels """ if cell.is_base(dimension): return result = browser.aggregate(cell, drilldown=[dimension])
Label Attributes "Multi-dimensional breadcrumbs" """ import sys import cubes from pprint import pprint from itertools import islice from collections import Counter model = cubes.load_model("../cida_model.json") # postgres://jdcqogwzkevwog:8z47cIJBDcBM3mefOiYfPVNBXy@ec2-23-23-177-33.compute-1.amazonaws.com:5432/de652in13m1noa ws = cubes.create_workspace("sql", model, url="postgres://localhost/crs") cube = model.cube("cida") browser = ws.browser(cube) cell = cubes.Cell(cube) project_id = "A034921002" cut = cubes.PointCut("project_number", [project_id]) cell2 = cell.slice(cut) result = browser.aggregate(cell2, drilldown=["fiscal_year", "country_region_name", "id"]) counter = Counter() for i, c in enumerate(result.cells):
def get_context_data(self, **kwargs): """This method is responsible for creating and returning the context data. """ context = super(CubeResultsView, self).get_context_data(**kwargs) # call the super # initialize variables mode = None # mode can be raw or aggregated aggregations = None # a list of the aggregations requested agg_paths = [] # the aggregation paths requested agg_dict = {} # holds the aggregations and their respective paths num_slices = None # the number of slices performed cube_name = None # the requested cube name cut_list=list() # a list of the cuts to be performed on the cube hashDict = {} # a dictionary containing slices by dimension hashList = [] # a list of just the slices, without dimension information hashProduct = [] # a cartesian product of slices self.error.clear() # make sure self.error is intially empty # fetch data from GET if 'num_slices' in self.request.GET: num_slices = self.request.REQUEST['num_slices'] if 'cube' in self.request.GET: cube_name = self.request.REQUEST['cube'] if 'drilldim' in self.request.GET: aggregations = self.request.REQUEST['drilldim'].split("|") if 'drillpath' in self.request.GET: agg_paths = self.request.REQUEST['drillpath'].split("|") if 'mode' in self.request.GET: mode = self.request.REQUEST['mode'] # check for errors in the GET data if aggregations == None or aggregations == "": self.error['aggregations_error'] = "You must select an aggregation dimension." if num_slices == "" or num_slices == None: self.error['slices_error'] = "You must provide the number of slices" if cube_name == None or cube_name == "": self.error['cube_error'] = "You must provide the cube name" # couple the aggregations with their paths pathLen = len(agg_paths) for i in range(0, len(aggregations)): if i < pathLen: agg_dict[aggregations[i]] = agg_paths[i] else: agg_dict[aggregations[i]] = "" # For each slice, get the path and dimension and create a cut for x in range(0,int(num_slices)): # initialize the variables. from_path and to_path allow range slicing. dimension = None from_path = None to_path = None path = None #get the path and dimension if 'dimension_'+str(x) in self.request.GET: dimension=(self.request.REQUEST['dimension_'+str(x)]) # get the requested dimension if 'path_'+str(x) in self.request.GET: path=(self.request.REQUEST['path_'+str(x)].split('-')) # get the requested path # check for errors if dimension == None or dimension == "": self.error['dimension'+str(x)+'_error'] = "You must provide a dimension for each slice." if path == None or path == "": self.error['path'+str(x)+'_error'] = "You must provide a path for each slice." # if a dimension and corresponding path were found, create the cut if 'path'+str(x)+'_error' not in self.error and 'dimension'+str(x)+'_error' not in self.error: if len(path) > 1: # if the path has multiple paths, perform a range cut from_path = path[0].split("|") to_path = path[1].split("|") cut = cubes.RangeCut(dimension,from_path,to_path) # define the cut else: cut = cubes.PointCut(dimension,path[0].split("|")) # define the cut # add the cut to the appropriate list if dimension in hashDict: hashDict[dimension].append(cut) else: hashDict[dimension] = [cut] # if 'REMOTE_USER' in self.request.META and self.request.META['REMOTE_USER'] != "": # user = self.request.META['REMOTE_USER'] # else: # user = None # # if user: # cut = cubes.SetCut('testuser',['public',user]) # hashDict['user'] = [cut] # else: # cut = cubes.PointCut('testuser', ['public']) # hashDict['user'] = [cut] for k, v in hashDict.iteritems(): # store cuts by dimension hashList.append(v) for item in itertools.product(*hashList): # compute a cartesian product on the cuts and convert the hashProduct.append((list(item))) # tuples to lists. Each list in hashProduct is a cut list if self.error == {}: # if there are no errors, perform the aggregations # initialize variables results = {} # this dictionary gets serialized into the returned JSON object agg_result = None # this holds the aggregated cubes data counter = 0 # a counter variable cube = dwmodel.cube(dwmodel.cubes[cube_name]) # Create the cube # cube.key = cube.mappings['id'] # enforce cube mappings conn = engine.connect() # Connect to the sqlalchemy engine workspace = cubes.create_workspace("sql",dwmodel,engine=engine, dimension_prefix=dimension_prefix, fact_prefix=fact_prefix) # get a workspace browser = workspace.browser(cube) # get a browser cell = cubes.Cell(cube) # create the cell results['slices'] = [] # create a holder for the slices # for each requested aggregation, we must perform all cuts in the cut list and generate # an object containing aggregated data for the cut if mode == 'raw': result = {} for cutlist in hashProduct: sliceString = "" # create a string containing information about the cuts # this string will be added to measure names # loop over the cuts and create the sliceString and the level for cut in cutlist: sliceString += "_" if hasattr(cut, 'path'): sliceString += "|".join(cut.path) elif hasattr(cut, 'paths'): sliceString += "|".join(cut.paths) else: sliceString += "|".join(cut.from_path) sliceString += "-" sliceString += "|".join(cut.to_path) results['slices'].append(sliceString) # add the slicestring to the results object new_cell = cell.multi_slice(cutlist) # Slice up the cube using all of the cuts raw_result = browser.facts(new_cell) # and get the data counter = 0 # reinitialize the counter # Parse the cubes data into columns which are added to a dictionary for row in raw_result: if counter == 0: # on the first pass, create the column lists for k, v in row.iteritems(): result[k+sliceString] = [] counter = counter + 1 # and increment the counter for k, v in row.iteritems(): # add the key to the appropriate column list result[k+sliceString].append(v) results['raw'] = result # append this object to the dict of objects self.return_list = results # set the return_list, used by JSONMixin else: for a, p in agg_dict.iteritems(): result = {} # create an object for cutlist in hashProduct: level = None sliceString = "" # create a string containing information about the cuts # this string will be added to measure names # loop over the cuts and create the sliceString and the level for cut in cutlist: sliceString += "_" if hasattr(cut, 'path'): sliceString += "|".join(cut.path) else: sliceString += "|".join(cut.from_path) sliceString += "-" sliceString += "|".join(cut.to_path) if a == cut.dimension and p == "": # if the aggregation variable was sliced on and no path if hasattr(cut, 'path'): # was specified, use the slice path to get the appropriate level level = getNextLevel(cube, a, cut.path) else: level = getCurrentLevel(cube, a, cut.from_path) # path isn't a key for range cuts, so use from instead # and get the current level instead of the next level sliceString += "_" + str(a) if level == "level not found": # if there was an error getting the level, then the self.error['level'] = level # aggregation will not work, so return an error self.return_list = self.error return context results['slices'].append(sliceString) # add the slicestring to the results object if level == None and p == "": level = getTopLevel(cube, a) # otherwise get the first level of the aggregation variable new_cell = cell.multi_slice(cutlist) # Slice up the cube using all of the cuts if p == "": agg_result = browser.aggregate(new_cell, drilldown=[a]) # and aggregate the data else: level = p agg_result = browser.aggregate(new_cell, drilldown=[(a,'default',p)]) # add information about the x-axis to the object if str(a) == level: result['level'] = level else: result['level'] = str(a + "." + level) # get the levels. results are sorted by level up to the requested level depth arguments = [] # create an empty list for the levels for x in cube.dimension(a).levels: # loop over the levels for this cube arguments.append(x.attributes[0].ref()) # append the level to the arguments list if x.name == level: # if we've reached the requested level, then break break counter = 0 newlist = [] # create an empty list for row in agg_result: # create a list copy of the cubes result object newlist.append(row) newlist = sortDictByMultipleKeys(newlist, arguments) # sort the result object by the levels # Parse the cubes data into columns which are added to a dictionary for row in newlist: if counter == 0: # on the first pass, create the column lists for k, v in row.iteritems(): result[k+sliceString] = [] counter = counter + 1 # and increment the counter for k, v in row.iteritems(): # add the values to the appropriate column list result[k+sliceString].append(v) results[a] = result # append this object to the dict of objects self.return_list = results # set the return_list, used by JSONMixin else: self.return_list = self.error # if errors were present, return them conn.close()