def determineEncoding(ldf: LuxDataFrame, view: View): ''' Populates View with the appropriate mark type and channel information based on ShowMe logic Currently support up to 3 dimensions or measures Parameters ---------- ldf : lux.luxDataFrame.LuxDataFrame LuxDataFrame with underspecified context view : lux.view.View Returns ------- None Notes ----- Implementing automatic encoding from Tableau's VizQL Mackinlay, J. D., Hanrahan, P., & Stolte, C. (2007). Show Me: Automatic presentation for visual analysis. IEEE Transactions on Visualization and Computer Graphics, 13(6), 1137–1144. https://doi.org/10.1109/TVCG.2007.70594 ''' # Count number of measures and dimensions Ndim = 0 Nmsr = 0 filters = [] for spec in view.specLst: if (spec.value == ""): if (spec.dataModel == "dimension"): Ndim += 1 elif (spec.dataModel == "measure" and spec.attribute != "Record"): Nmsr += 1 else: # preserve to add back to specLst later filters.append(spec) # Helper function (TODO: Move this into utils) def lineOrBar(ldf, dimension, measure): dimType = dimension.dataType # If no aggregation function is specified, then default as average if (measure.aggregation == ""): measure.aggregation = "mean" if (dimType == "temporal" or dimType == "oridinal"): return "line", {"x": dimension, "y": measure} else: # unordered categorical # if cardinality large than 5 then sort bars if ldf.cardinality[dimension.attribute] > 5: dimension.sort = "ascending" return "bar", {"x": measure, "y": dimension} # ShowMe logic + additional heuristics #countCol = Spec( attribute="count()", dataModel="measure") countCol = Spec(attribute="Record", aggregation="count", dataModel="measure", dataType="quantitative") # xAttr = view.getAttrByChannel("x") # not used as of now # yAttr = view.getAttrByChannel("y") # zAttr = view.getAttrByChannel("z") autoChannel = {} if (Ndim == 0 and Nmsr == 1): # Histogram with Count measure = view.getAttrByDataModel("measure", excludeRecord=True)[0] if (len(view.getAttrByAttrName("Record")) < 0): view.specLst.append(countCol) # If no bin specified, then default as 10 if (measure.binSize == 0): measure.binSize = 10 autoChannel = {"x": measure, "y": countCol} view.xMinMax = ldf.xMinMax view.mark = "histogram" elif (Ndim == 1 and (Nmsr == 0 or Nmsr == 1)): # Line or Bar Chart if (Nmsr == 0): view.specLst.append(countCol) dimension = view.getAttrByDataModel("dimension")[0] measure = view.getAttrByDataModel("measure")[0] view.mark, autoChannel = lineOrBar(ldf, dimension, measure) elif (Ndim == 2 and (Nmsr == 0 or Nmsr == 1)): # Line or Bar chart broken down by the dimension dimensions = view.getAttrByDataModel("dimension") d1 = dimensions[0] d2 = dimensions[1] if (ldf.cardinality[d1.attribute] < ldf.cardinality[d2.attribute]): # d1.channel = "color" view.removeColumnFromSpec(d1.attribute) dimension = d2 colorAttr = d1 else: if (d1.attribute == d2.attribute): view.specLst.pop( 0 ) # if same attribute then removeColumnFromSpec will remove both dims, we only want to remove one else: view.removeColumnFromSpec(d2.attribute) dimension = d1 colorAttr = d2 # Colored Bar/Line chart with Count as default measure if (Nmsr == 0): view.specLst.append(countCol) measure = view.getAttrByDataModel("measure")[0] view.mark, autoChannel = lineOrBar(ldf, dimension, measure) autoChannel["color"] = colorAttr elif (Ndim == 0 and Nmsr == 2): # Scatterplot view.xMinMax = ldf.xMinMax view.yMinMax = ldf.yMinMax view.mark = "scatter" autoChannel = {"x": view.specLst[0], "y": view.specLst[1]} elif (Ndim == 1 and Nmsr == 2): # Scatterplot broken down by the dimension measure = view.getAttrByDataModel("measure") m1 = measure[0] m2 = measure[1] colorAttr = view.getAttrByDataModel("dimension")[0] view.removeColumnFromSpec(colorAttr) view.xMinMax = ldf.xMinMax view.yMinMax = ldf.yMinMax view.mark = "scatter" autoChannel = {"x": m1, "y": m2, "color": colorAttr} elif (Ndim == 0 and Nmsr == 3): # Scatterplot with color view.xMinMax = ldf.xMinMax view.yMinMax = ldf.yMinMax view.mark = "scatter" autoChannel = { "x": view.specLst[0], "y": view.specLst[1], "color": view.specLst[2] } if (autoChannel != {}): view = Compiler.enforceSpecifiedChannel(view, autoChannel) view.specLst.extend(filters) # add back the preserved filters
def interestingness(view:View ,ldf:LuxDataFrame) -> int: """ Compute the interestingness score of the view. The interestingness metric is dependent on the view type. Parameters ---------- view : View ldf : LuxDataFrame Returns ------- int Interestingness Score """ if view.data is None: raise Exception("View.data needs to be populated before interestingness can be computed. Run Executor.execute(view,ldf).") n_dim = 0 n_msr = 0 filterSpecs = utils.getFilterSpecs(view.specLst) viewAttrsSpecs = utils.getAttrsSpecs(view.specLst) for spec in viewAttrsSpecs: if (spec.attribute!="Record"): if (spec.dataModel == 'dimension'): n_dim += 1 if (spec.dataModel == 'measure'): n_msr += 1 n_filter = len(filterSpecs) attr_specs = [spec for spec in viewAttrsSpecs if spec.attribute != "Record"] dimensionLst = view.getAttrByDataModel("dimension") measureLst = view.getAttrByDataModel("measure") # Bar Chart if (n_dim == 1 and (n_msr == 0 or n_msr==1)): if (n_filter == 0): return unevenness(view, ldf, measureLst, dimensionLst) elif(n_filter==1): return deviationFromOverall(view,ldf,filterSpecs,measureLst[0].attribute) # Histogram elif (n_dim == 0 and n_msr == 1): if (n_filter == 0): v = view.data["Count of Records"] return skewness(v) elif (n_filter == 1): return deviationFromOverall(view,ldf,filterSpecs,"Count of Records") # Scatter Plot elif (n_dim == 0 and n_msr == 2): if (n_filter==1): v_filter_size = getFilteredSize(filterSpecs,view.data) v_size = len(view.data) sig = v_filter_size/v_size else: sig = 1 return sig * monotonicity(view,attr_specs) # Scatterplot colored by Dimension elif (n_dim == 1 and n_msr == 2): colorAttr = view.getAttrByChannel("color")[0].attribute C = ldf.cardinality[colorAttr] if (C<40): return 1/C else: return -1 # Scatterplot colored by dimension elif (n_dim== 1 and n_msr == 2): return 0.2 # Scatterplot colored by measure elif (n_msr == 3): return 0.1 # Default else: return -1