Пример #1
0
    def determineEncoding(ldf: LuxDataFrame, view: View):
        '''
		Populates View with the appropriate mark type and channel information based on ShowMe logic
		Currently support up to 3 dimensions or measures
		
		Parameters
		----------
		ldf : lux.luxDataFrame.LuxDataFrame
			LuxDataFrame with underspecified context
		view : lux.view.View

		Returns
		-------
		None

		Notes
		-----
		Implementing automatic encoding from Tableau's VizQL
		Mackinlay, J. D., Hanrahan, P., & Stolte, C. (2007).
		Show Me: Automatic presentation for visual analysis.
		IEEE Transactions on Visualization and Computer Graphics, 13(6), 1137–1144.
		https://doi.org/10.1109/TVCG.2007.70594
		'''
        # Count number of measures and dimensions
        Ndim = 0
        Nmsr = 0
        filters = []
        for spec in view.specLst:
            if (spec.value == ""):
                if (spec.dataModel == "dimension"):
                    Ndim += 1
                elif (spec.dataModel == "measure"
                      and spec.attribute != "Record"):
                    Nmsr += 1
            else:  # preserve to add back to specLst later
                filters.append(spec)
        # Helper function (TODO: Move this into utils)
        def lineOrBar(ldf, dimension, measure):
            dimType = dimension.dataType
            # If no aggregation function is specified, then default as average
            if (measure.aggregation == ""):
                measure.aggregation = "mean"
            if (dimType == "temporal" or dimType == "oridinal"):
                return "line", {"x": dimension, "y": measure}
            else:  # unordered categorical
                # if cardinality large than 5 then sort bars
                if ldf.cardinality[dimension.attribute] > 5:
                    dimension.sort = "ascending"
                return "bar", {"x": measure, "y": dimension}

        # ShowMe logic + additional heuristics
        #countCol = Spec( attribute="count()", dataModel="measure")
        countCol = Spec(attribute="Record",
                        aggregation="count",
                        dataModel="measure",
                        dataType="quantitative")
        # xAttr = view.getAttrByChannel("x") # not used as of now
        # yAttr = view.getAttrByChannel("y")
        # zAttr = view.getAttrByChannel("z")
        autoChannel = {}
        if (Ndim == 0 and Nmsr == 1):
            # Histogram with Count
            measure = view.getAttrByDataModel("measure", excludeRecord=True)[0]
            if (len(view.getAttrByAttrName("Record")) < 0):
                view.specLst.append(countCol)
            # If no bin specified, then default as 10
            if (measure.binSize == 0):
                measure.binSize = 10
            autoChannel = {"x": measure, "y": countCol}
            view.xMinMax = ldf.xMinMax
            view.mark = "histogram"
        elif (Ndim == 1 and (Nmsr == 0 or Nmsr == 1)):
            # Line or Bar Chart
            if (Nmsr == 0):
                view.specLst.append(countCol)
            dimension = view.getAttrByDataModel("dimension")[0]
            measure = view.getAttrByDataModel("measure")[0]
            view.mark, autoChannel = lineOrBar(ldf, dimension, measure)
        elif (Ndim == 2 and (Nmsr == 0 or Nmsr == 1)):
            # Line or Bar chart broken down by the dimension
            dimensions = view.getAttrByDataModel("dimension")
            d1 = dimensions[0]
            d2 = dimensions[1]
            if (ldf.cardinality[d1.attribute] < ldf.cardinality[d2.attribute]):
                # d1.channel = "color"
                view.removeColumnFromSpec(d1.attribute)
                dimension = d2
                colorAttr = d1
            else:
                if (d1.attribute == d2.attribute):
                    view.specLst.pop(
                        0
                    )  # if same attribute then removeColumnFromSpec will remove both dims, we only want to remove one
                else:
                    view.removeColumnFromSpec(d2.attribute)
                dimension = d1
                colorAttr = d2
            # Colored Bar/Line chart with Count as default measure
            if (Nmsr == 0):
                view.specLst.append(countCol)
            measure = view.getAttrByDataModel("measure")[0]
            view.mark, autoChannel = lineOrBar(ldf, dimension, measure)
            autoChannel["color"] = colorAttr
        elif (Ndim == 0 and Nmsr == 2):
            # Scatterplot
            view.xMinMax = ldf.xMinMax
            view.yMinMax = ldf.yMinMax
            view.mark = "scatter"
            autoChannel = {"x": view.specLst[0], "y": view.specLst[1]}
        elif (Ndim == 1 and Nmsr == 2):
            # Scatterplot broken down by the dimension
            measure = view.getAttrByDataModel("measure")
            m1 = measure[0]
            m2 = measure[1]

            colorAttr = view.getAttrByDataModel("dimension")[0]
            view.removeColumnFromSpec(colorAttr)
            view.xMinMax = ldf.xMinMax
            view.yMinMax = ldf.yMinMax
            view.mark = "scatter"
            autoChannel = {"x": m1, "y": m2, "color": colorAttr}
        elif (Ndim == 0 and Nmsr == 3):
            # Scatterplot with color
            view.xMinMax = ldf.xMinMax
            view.yMinMax = ldf.yMinMax
            view.mark = "scatter"
            autoChannel = {
                "x": view.specLst[0],
                "y": view.specLst[1],
                "color": view.specLst[2]
            }
        if (autoChannel != {}):
            view = Compiler.enforceSpecifiedChannel(view, autoChannel)
            view.specLst.extend(filters)  # add back the preserved filters
Пример #2
0
def interestingness(view:View ,ldf:LuxDataFrame) -> int:
	"""
	Compute the interestingness score of the view.
	The interestingness metric is dependent on the view type.

	Parameters
	----------
	view : View
	ldf : LuxDataFrame

	Returns
	-------
	int
		Interestingness Score
	"""	
	

	if view.data is None:
		raise Exception("View.data needs to be populated before interestingness can be computed. Run Executor.execute(view,ldf).")

	n_dim = 0
	n_msr = 0
	
	filterSpecs = utils.getFilterSpecs(view.specLst)
	viewAttrsSpecs = utils.getAttrsSpecs(view.specLst)

	for spec in viewAttrsSpecs:
		if (spec.attribute!="Record"):
			if (spec.dataModel == 'dimension'):
				n_dim += 1
			if (spec.dataModel == 'measure'):
				n_msr += 1
	n_filter = len(filterSpecs)
	attr_specs = [spec for spec in viewAttrsSpecs if spec.attribute != "Record"]
	dimensionLst = view.getAttrByDataModel("dimension")
	measureLst = view.getAttrByDataModel("measure")

	# Bar Chart
	if (n_dim == 1 and (n_msr == 0 or n_msr==1)):
		if (n_filter == 0):
			return unevenness(view, ldf, measureLst, dimensionLst)
		elif(n_filter==1):
			return deviationFromOverall(view,ldf,filterSpecs,measureLst[0].attribute)
	# Histogram
	elif (n_dim == 0 and n_msr == 1):
		if (n_filter == 0):
			v = view.data["Count of Records"]
			return skewness(v)
		elif (n_filter == 1):
			return deviationFromOverall(view,ldf,filterSpecs,"Count of Records")
	# Scatter Plot
	elif (n_dim == 0 and n_msr == 2):
		if (n_filter==1):
			v_filter_size = getFilteredSize(filterSpecs,view.data)
			v_size = len(view.data)
			sig = v_filter_size/v_size
		else:
			sig = 1
		return sig * monotonicity(view,attr_specs)
	# Scatterplot colored by Dimension
	elif (n_dim == 1 and n_msr == 2):
		colorAttr = view.getAttrByChannel("color")[0].attribute
		
		C = ldf.cardinality[colorAttr]
		if (C<40):
			return 1/C
		else:
			return -1
	# Scatterplot colored by dimension
	elif (n_dim== 1 and n_msr == 2):
		return 0.2
	# Scatterplot colored by measure
	elif (n_msr == 3):
		return 0.1
	# Default
	else:
		return -1