def execute(viewCollection:ViewCollection, ldf: LuxDataFrame): import pandas as pd ''' Given a ViewCollection, fetch the data required to render the view 1) Apply filters 2) Retreive relevant attribute 3) return a DataFrame with relevant results ''' for view in viewCollection: print(view, utils.getFilterSpecs(view.specLst)) # Select relevant data based on attribute information attributes = set([]) for spec in view.specLst: if (spec.attribute): if (spec.attribute=="Record"): attributes.add(spec.attribute) #else: attributes.add(spec.attribute) if view.mark not in ["bar", "line", "histogram"]: whereClause, filterVars = SQLExecutor.executeFilter(view) requiredVariables = attributes | set(filterVars) requiredVariables = ",".join(requiredVariables) rowCount = list(pd.read_sql("SELECT COUNT(*) FROM {} {}".format(ldf.table_name, whereClause), ldf.SQLconnection)['count'])[0] if rowCount > 10000: query = "SELECT {} FROM {} {} ORDER BY random() LIMIT 10000".format(requiredVariables, ldf.table_name, whereClause) else: query = "SELECT {} FROM {} {}".format(requiredVariables, ldf.table_name, whereClause) data = pd.read_sql(query, ldf.SQLconnection) view.data = utils.pandasToLux(data) if (view.mark =="bar" or view.mark =="line"): SQLExecutor.executeAggregate(view, ldf) elif (view.mark =="histogram"): SQLExecutor.executeBinning(view, ldf)
def contextToJSON(context): from lux.utils import utils filterSpecs = utils.getFilterSpecs(context) attrsSpecs = utils.getAttrsSpecs(context) specs = {} specs['attributes'] = [spec.attribute for spec in attrsSpecs] specs['filters'] = [spec.attribute for spec in filterSpecs] return specs
def executeFilter(view: View): assert view.data is not None, "executeFilter assumes input view.data is populated (if not, populate with LuxDataFrame values)" filters = utils.getFilterSpecs(view.specLst) if (filters): # TODO: Need to handle OR logic for filter in filters: view.data = PandasExecutor.applyFilter(view.data, filter.attribute, filter.filterOp, filter.value)
def executeFilter(view:View): whereClause = [] filters = utils.getFilterSpecs(view.specLst) filterVars = [] if (filters): for f in range(0,len(filters)): if f == 0: whereClause.append("WHERE") else: whereClause.append("AND") whereClause.extend([str(filters[f].attribute), str(filters[f].filterOp), "'" + str(filters[f].value) + "'"]) if filters[f].attribute not in filterVars: filterVars.append(filters[f].attribute) if whereClause == []: return("", []) else: whereClause = " ".join(whereClause) return(whereClause, filterVars)
def enhance(ldf): #for benchmarking if ldf.toggleBenchmarking == True: tic = time.perf_counter() ''' Given a set of views, generates possible visualizations when an additional attribute is added to the current view. Parameters ---------- ldf : lux.luxDataFrame.LuxDataFrame LuxDataFrame with underspecified context. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Enhance action. ''' recommendation = {"action":"Enhance", "description":"Shows possible visualizations when an additional attribute is added to the current view."} filters = utils.getFilterSpecs(ldf.context) # Collect variables that already exist in the context attrSpecs = list(filter(lambda x: x.value=="" and x.attribute!="Record", ldf.context)) if(len(attrSpecs)>2): # if there are too many column attributes, return don't generate Enhance recommendations recommendation["collection"] = [] return recommendation query = ldf.context.copy() query = filters + attrSpecs query.append("?") vc = lux.view.ViewCollection.ViewCollection(query) vc = vc.load(ldf) # Then use the data populated in the view collection to compute score for view in vc: view.score = interestingness(view,ldf) vc = vc.topK(15) recommendation["collection"] = vc #for benchmarking if ldf.toggleBenchmarking == True: toc = time.perf_counter() print(f"Performed enhance action in {toc - tic:0.4f} seconds") return recommendation
def filter(ldf): #for benchmarking if ldf.toggleBenchmarking == True: tic = time.perf_counter() ''' Iterates over all possible values of a categorical variable and generates visualizations where each categorical value filters the data. Parameters ---------- ldf : lux.luxDataFrame.LuxDataFrame LuxDataFrame with underspecified context. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Filter action. ''' recommendation = { "action": "Filter", "description": "Shows possible visualizations when filtered by categorical variables in the data object's dataset." } filters = utils.getFilterSpecs(ldf.context) filterValues = [] output = [] #if Row is specified, create visualizations where data is filtered by all values of the Row's categorical variable columnSpec = utils.getAttrsSpecs(ldf.viewCollection[0].specLst) columnSpecAttr = map(lambda x: x.attribute, columnSpec) if len(filters) > 0: #get unique values for all categorical values specified and creates corresponding filters for row in filters: uniqueValues = ldf.uniqueValues[row.attribute] filterValues.append(row.value) #creates new data objects with new filters for val in uniqueValues: if val not in filterValues: newSpec = columnSpec.copy() newFilter = lux.Spec(attribute=row.attribute, value=val) newSpec.append(newFilter) tempView = View(newSpec) output.append(tempView) else: #if no existing filters, create filters using unique values from all categorical variables in the dataset categoricalVars = [] for col in list(ldf.columns): # if cardinality is not too high, and attribute is not one of the X,Y (specified) column if ldf.cardinality[col] < 40 and col not in columnSpecAttr: categoricalVars.append(col) for cat in categoricalVars: uniqueValues = ldf.uniqueValues[cat] for i in range(0, len(uniqueValues)): newSpec = columnSpec.copy() newFilter = lux.Spec(attribute=cat, filterOp="=", value=uniqueValues[i]) newSpec.append(newFilter) tempView = View(newSpec) output.append(tempView) vc = lux.view.ViewCollection.ViewCollection(output) vc = vc.load(ldf) for view in vc: view.score = interestingness(view, ldf) vc = vc.topK(15) recommendation["collection"] = vc #for benchmarking if ldf.toggleBenchmarking == True: toc = time.perf_counter() print(f"Performed filter action in {toc - tic:0.4f} seconds") return recommendation
def generalize(ldf): #for benchmarking if ldf.toggleBenchmarking == True: tic = time.perf_counter() ''' Generates all possible visualizations when one attribute or filter from the current view is removed. Parameters ---------- ldf : lux.luxDataFrame.LuxDataFrame LuxDataFrame with underspecified context. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Generalize action. ''' # takes in a dataObject and generates a list of new dataObjects, each with a single measure from the original object removed # --> return list of dataObjects with corresponding interestingness scores recommendation = {"action":"Generalize", "description":"Remove one attribute or filter to observe a more general trend."} output = [] excludedColumns = [] columnSpec = list(filter(lambda x: x.value=="" and x.attribute!="Record", ldf.context)) rowSpecs = utils.getFilterSpecs(ldf.context) # if we do no have enough column attributes or too many, return no views. if(len(columnSpec)<2 or len(columnSpec)>4): recommendation["collection"] = [] return recommendation for spec in columnSpec: columns = spec.attribute if type(columns) == list: for column in columns: if column not in excludedColumns: tempView = View(ldf.context) tempView.removeColumnFromSpecNew(column) excludedColumns.append(column) output.append(tempView) elif type(columns) == str: if columns not in excludedColumns: tempView = View(ldf.context) tempView.removeColumnFromSpecNew(columns) excludedColumns.append(columns) output.append(tempView) for i, spec in enumerate(rowSpecs): newSpec = ldf.context.copy() newSpec.pop(i) tempView = View(newSpec) output.append(tempView) vc = lux.view.ViewCollection.ViewCollection(output) vc = vc.load(ldf) recommendation["collection"] = vc for view in vc: view.score = interestingness(view,ldf) vc.sort(removeInvalid=True) #for benchmarking if ldf.toggleBenchmarking == True: toc = time.perf_counter() print(f"Performed generalize action in {toc - tic:0.4f} seconds") return recommendation
def interestingness(view:View ,ldf:LuxDataFrame) -> int: """ Compute the interestingness score of the view. The interestingness metric is dependent on the view type. Parameters ---------- view : View ldf : LuxDataFrame Returns ------- int Interestingness Score """ if view.data is None: raise Exception("View.data needs to be populated before interestingness can be computed. Run Executor.execute(view,ldf).") n_dim = 0 n_msr = 0 filterSpecs = utils.getFilterSpecs(view.specLst) viewAttrsSpecs = utils.getAttrsSpecs(view.specLst) for spec in viewAttrsSpecs: if (spec.attribute!="Record"): if (spec.dataModel == 'dimension'): n_dim += 1 if (spec.dataModel == 'measure'): n_msr += 1 n_filter = len(filterSpecs) attr_specs = [spec for spec in viewAttrsSpecs if spec.attribute != "Record"] dimensionLst = view.getAttrByDataModel("dimension") measureLst = view.getAttrByDataModel("measure") # Bar Chart if (n_dim == 1 and (n_msr == 0 or n_msr==1)): if (n_filter == 0): return unevenness(view, ldf, measureLst, dimensionLst) elif(n_filter==1): return deviationFromOverall(view,ldf,filterSpecs,measureLst[0].attribute) # Histogram elif (n_dim == 0 and n_msr == 1): if (n_filter == 0): v = view.data["Count of Records"] return skewness(v) elif (n_filter == 1): return deviationFromOverall(view,ldf,filterSpecs,"Count of Records") # Scatter Plot elif (n_dim == 0 and n_msr == 2): if (n_filter==1): v_filter_size = getFilteredSize(filterSpecs,view.data) v_size = len(view.data) sig = v_filter_size/v_size else: sig = 1 return sig * monotonicity(view,attr_specs) # Scatterplot colored by Dimension elif (n_dim == 1 and n_msr == 2): colorAttr = view.getAttrByChannel("color")[0].attribute C = ldf.cardinality[colorAttr] if (C<40): return 1/C else: return -1 # Scatterplot colored by dimension elif (n_dim== 1 and n_msr == 2): return 0.2 # Scatterplot colored by measure elif (n_msr == 3): return 0.1 # Default else: return -1