示例#1
0
def test_sort_bar():
    from lux.compiler.Compiler import Compiler
    from lux.view.View import View
    df = pd.read_csv("lux/data/car.csv")
    view = View([
        lux.Spec(attribute="Acceleration",
                 data_model="measure",
                 data_type="quantitative"),
        lux.Spec(attribute="Origin",
                 data_model="dimension",
                 data_type="nominal")
    ])
    Compiler.determine_encoding(df, view)
    assert view.mark == "bar"
    assert view.spec_lst[1].sort == ''

    df = pd.read_csv("lux/data/car.csv")
    view = View([
        lux.Spec(attribute="Acceleration",
                 data_model="measure",
                 data_type="quantitative"),
        lux.Spec(attribute="Name", data_model="dimension", data_type="nominal")
    ])
    Compiler.determine_encoding(df, view)
    assert view.mark == "bar"
    assert view.spec_lst[1].sort == 'ascending'
示例#2
0
def test_vary_filter_val():
    df = pd.read_csv("lux/data/olympic.csv")
    view = View(["Height", "SportType=Ball"])
    view = view.load(df)
    df.set_context_as_view(view)
    df.show_more()
    assert len(
        df.recommendation["Filter"]) == len(df["SportType"].unique()) - 1
示例#3
0
文件: Compiler.py 项目: jaywoo123/lux
    def enforceSpecifiedChannel(view: View, autoChannel: Dict[str, str]):
        """
		Enforces that the channels specified in the View by users overrides the showMe autoChannels.
		
		Parameters
		----------
		view : lux.view.View
			Input View without channel specification.
		autoChannel : Dict[str,str]
			Key-value pair in the form [channel: attributeName] specifying the showMe recommended channel location.
		
		Returns
		-------
		view : lux.view.View
			View with channel specification combining both original and autoChannel specification.
		
		Raises
		------
		ValueError
			Ensures no more than one attribute is placed in the same channel.
		"""
        resultDict = {
        }  # result of enforcing specified channel will be stored in resultDict
        specifiedDict = {
        }  # specifiedDict={"x":[],"y":[list of Dobj with y specified as channel]}
        # create a dictionary of specified channels in the given dobj
        for val in autoChannel.keys():
            specifiedDict[val] = view.getAttrByChannel(val)
            resultDict[val] = ""
        # for every element, replace with what's in specifiedDict if specified
        for sVal, sAttr in specifiedDict.items():
            if (len(sAttr) == 1):  # if specified in dobj
                # remove the specified channel from autoChannel (matching by value, since channel key may not be same)
                for i in list(autoChannel.keys()):
                    if (
                        (autoChannel[i].attribute == sAttr[0].attribute)
                            and (autoChannel[i].channel == sVal)
                    ):  # need to ensure that the channel is the same (edge case when duplicate Cols with same attribute name)
                        autoChannel.pop(i)
                        break
                sAttr[0].channel = sVal
                resultDict[sVal] = sAttr[0]
            elif (len(sAttr) > 1):
                raise ValueError(
                    "There should not be more than one attribute specified in the same channel."
                )
        # For the leftover channels that are still unspecified in resultDict,
        # and the leftovers in the autoChannel specification,
        # step through them together and fill it automatically.
        leftover_channels = list(
            filter(lambda x: resultDict[x] == '', resultDict))
        for leftover_channel, leftover_encoding in zip(leftover_channels,
                                                       autoChannel.values()):
            leftover_encoding.channel = leftover_channel
            resultDict[leftover_channel] = leftover_encoding
        view.specLst = list(resultDict.values())
        return view
示例#4
0
    def execute_binning(view: View, ldf: LuxDataFrame):
        import numpy as np
        import pandas as pd
        bin_attribute = list(filter(lambda x: x.bin_size != 0,
                                    view.spec_lst))[0]
        num_bins = bin_attribute.bin_size
        attr_min = min(ldf.unique_values[bin_attribute.attribute])
        attr_max = max(ldf.unique_values[bin_attribute.attribute])
        attr_type = type(ldf.unique_values[bin_attribute.attribute][0])

        #need to calculate the bin edges before querying for the relevant data
        bin_width = (attr_max - attr_min) / num_bins
        upper_edges = []
        for e in range(1, num_bins):
            curr_edge = attr_min + e * bin_width
            if attr_type == int:
                upper_edges.append(str(math.ceil(curr_edge)))
            else:
                upper_edges.append(str(curr_edge))
        upper_edges = ",".join(upper_edges)
        view_filter, filter_vars = SQLExecutor.execute_filter(view)
        bin_count_query = "SELECT width_bucket, COUNT(width_bucket) FROM (SELECT width_bucket({}, '{}') FROM {}) as Buckets GROUP BY width_bucket ORDER BY width_bucket".format(
            bin_attribute.attribute, '{' + upper_edges + '}', ldf.table_name)
        bin_count_data = pd.read_sql(bin_count_query, ldf.SQLconnection)

        #counts,binEdges = np.histogram(ldf[bin_attribute.attribute],bins=bin_attribute.bin_size)
        #binEdges of size N+1, so need to compute binCenter as the bin location
        upper_edges = [float(i) for i in upper_edges.split(",")]
        if attr_type == int:
            bin_centers = np.array(
                [math.ceil((attr_min + attr_min + bin_width) / 2)])
        else:
            bin_centers = np.array([(attr_min + attr_min + bin_width) / 2])
        bin_centers = np.append(
            bin_centers,
            np.mean(np.vstack([upper_edges[0:-1], upper_edges[1:]]), axis=0))
        if attr_type == int:
            bin_centers = np.append(
                bin_centers,
                math.ceil((upper_edges[len(upper_edges) - 1] + attr_max) / 2))
        else:
            bin_centers = np.append(
                bin_centers,
                (upper_edges[len(upper_edges) - 1] + attr_max) / 2)

        if len(bin_centers) > len(bin_count_data):
            bucket_lables = bin_count_data['width_bucket'].unique()
            for i in range(0, len(bin_centers)):
                if i not in bucket_lables:
                    bin_count_data = bin_count_data.append(
                        pd.DataFrame([[i, 0]], columns=bin_count_data.columns))

        view.data = pd.DataFrame(
            np.array([bin_centers, list(bin_count_data['count'])]).T,
            columns=[bin_attribute.attribute, "Count of Records (binned)"])
        view.data = utils.pandas_to_lux(view.data)
示例#5
0
    def execute_aggregate(view: View, ldf: LuxDataFrame):
        import pandas as pd
        x_attr = view.get_attr_by_channel("x")[0]
        y_attr = view.get_attr_by_channel("y")[0]
        groupby_attr = ""
        measure_attr = ""
        if (y_attr.aggregation != ""):
            groupby_attr = x_attr
            measure_attr = y_attr
            agg_func = y_attr.aggregation
        if (x_attr.aggregation != ""):
            groupby_attr = y_attr
            measure_attr = x_attr
            agg_func = x_attr.aggregation

        if (measure_attr != ""):
            #barchart case, need count data for each group
            if (measure_attr.attribute == "Record"):
                where_clause, filterVars = SQLExecutor.execute_filter(view)
                count_query = "SELECT {}, COUNT({}) FROM {} {} GROUP BY {}".format(
                    groupby_attr.attribute, groupby_attr.attribute,
                    ldf.table_name, where_clause, groupby_attr.attribute)
                view.data = pd.read_sql(count_query, ldf.SQLconnection)
                view.data = view.data.rename(columns={"count": "Record"})
                view.data = utils.pandas_to_lux(view.data)

            else:
                where_clause, filterVars = SQLExecutor.execute_filter(view)
                if agg_func == "mean":
                    mean_query = "SELECT {}, AVG({}) as {} FROM {} {} GROUP BY {}".format(
                        groupby_attr.attribute, measure_attr.attribute,
                        measure_attr.attribute, ldf.table_name, where_clause,
                        groupby_attr.attribute)
                    view.data = pd.read_sql(mean_query, ldf.SQLconnection)
                    view.data = utils.pandas_to_lux(view.data)
                if agg_func == "sum":
                    mean_query = "SELECT {}, SUM({}) as {} FROM {} {} GROUP BY {}".format(
                        groupby_attr.attribute, measure_attr.attribute,
                        measure_attr.attribute, ldf.table_name, where_clause,
                        groupby_attr.attribute)
                    view.data = pd.read_sql(mean_query, ldf.SQLconnection)
                    view.data = utils.pandas_to_lux(view.data)
                if agg_func == "max":
                    mean_query = "SELECT {}, MAX({}) as {} FROM {} {} GROUP BY {}".format(
                        groupby_attr.attribute, measure_attr.attribute,
                        measure_attr.attribute, ldf.table_name, where_clause,
                        groupby_attr.attribute)
                    view.data = pd.read_sql(mean_query, ldf.SQLconnection)
                    view.data = utils.pandas_to_lux(view.data)
示例#6
0
def test_refresh_inplace():
    df = pd.DataFrame({
        'date': ['2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01'],
        'value': [10.5, 15.2, 20.3, 25.2]
    })

    assert df.data_type['nominal'][0] == 'date'

    from lux.view.View import View
    view = View(["date", "value"])
    view.load(df)

    df['date'] = pd.to_datetime(df['date'], format="%Y-%m-%d")

    assert df.data_type['temporal'][0] == 'date'
示例#7
0
    def execute_binning(view: View):
        '''
        Binning of data points for generating histograms

        Parameters
        ----------
        view: lux.View
            lux.View object that represents a visualization
        ldf : lux.luxDataFrame.LuxDataFrame
            LuxDataFrame with specified context.

        Returns
        -------
        None
        '''
        import numpy as np
        import pandas as pd  # is this import going to be conflicting with LuxDf?
        bin_attribute = list(filter(lambda x: x.bin_size != 0,
                                    view.spec_lst))[0]
        #TODO:binning runs for name attribte. Name attribute has datatype quantitative which is wrong.
        counts, bin_edges = np.histogram(view.data[bin_attribute.attribute],
                                         bins=bin_attribute.bin_size)
        #bin_edges of size N+1, so need to compute bin_center as the bin location
        bin_center = np.mean(np.vstack([bin_edges[0:-1], bin_edges[1:]]),
                             axis=0)
        # TODO: Should view.data be a LuxDataFrame or a Pandas DataFrame?
        view.data = pd.DataFrame(
            np.array([bin_center, counts]).T,
            columns=[bin_attribute.attribute, "Count of Records"])
示例#8
0
    def executeAggregate(view: View):
        '''
        Aggregate data points on an axis for bar or line charts

        Parameters
        ----------
        view: lux.View
            lux.View object that represents a visualization
        ldf : lux.luxDataFrame.LuxDataFrame
            LuxDataFrame with specified context.

        Returns
        -------
        None
        '''
        import numpy as np
        xAttr = view.getAttrByChannel("x")[0]
        yAttr = view.getAttrByChannel("y")[0]
        groupbyAttr = ""
        measureAttr = ""
        if (yAttr.aggregation != ""):
            groupbyAttr = xAttr
            measureAttr = yAttr
            aggFunc = yAttr.aggregation
        if (xAttr.aggregation != ""):
            groupbyAttr = yAttr
            measureAttr = xAttr
            aggFunc = xAttr.aggregation
        allAttrVals = view.data.uniqueValues[groupbyAttr.attribute]
        if (measureAttr != ""):
            if (measureAttr.attribute == "Record"):
                view.data = view.data.reset_index()
                view.data = view.data.groupby(
                    groupbyAttr.attribute).count().reset_index()
                view.data = view.data.rename(columns={"index": "Record"})
                view.data = view.data[[groupbyAttr.attribute, "Record"]]
            else:
                groupbyResult = view.data.groupby(groupbyAttr.attribute)
                view.data = groupbyResult.agg(aggFunc).reset_index()
            resultVals = list(view.data[groupbyAttr.attribute])
            if (len(resultVals) != len(allAttrVals)):
                # For filtered aggregation that have missing groupby-attribute values, set these aggregated value as 0, since no datapoints
                for vals in allAttrVals:
                    if (vals not in resultVals):
                        view.data.loc[len(view.data)] = [vals, 0]
            assert len(list(view.data[groupbyAttr.attribute])) == len(
                allAttrVals
            ), f"Aggregated data missing values compared to original range of values of `{groupbyAttr.attribute}`."
            view.data = view.data.sort_values(by=groupbyAttr.attribute,
                                              ascending=True)
            view.data = view.data.reset_index()
            view.data = view.data.drop(columns="index")
示例#9
0
    def execute_filter(view: View):
        assert view.data is not None, "execute_filter assumes input view.data is populated (if not, populate with LuxDataFrame values)"
        filters = utils.get_filter_specs(view.spec_lst)

        if (filters):
            # TODO: Need to handle OR logic
            for filter in filters:
                view.data = PandasExecutor.apply_filter(
                    view.data, filter.attribute, filter.filter_op,
                    filter.value)
示例#10
0
文件: Compiler.py 项目: jaywoo123/lux
 def combine(colAttrs, accum):
     last = (len(colAttrs) == 1)
     n = len(colAttrs[0])
     for i in range(n):
         columnList = copy.deepcopy(accum + [colAttrs[0][i]])
         if last:
             if len(
                     filters
             ) > 0:  # if we have filters, generate combinations for each row.
                 for row in filters:
                     specLst = copy.deepcopy(columnList + [row])
                     view = View(
                         specLst,
                         title=
                         f"{row.attribute} {row.filterOp} {row.value}")
                     collection.append(view)
             else:
                 view = View(columnList)
                 collection.append(view)
         else:
             combine(colAttrs[1:], columnList)
示例#11
0
def interestingness(view: View, ldf: LuxDataFrame) -> int:
    """
	Compute the interestingness score of the view.
	The interestingness metric is dependent on the view type.

	Parameters
	----------
	view : View
	ldf : LuxDataFrame

	Returns
	-------
	int
		Interestingness Score
	"""

    if view.data is None:
        raise Exception(
            "View.data needs to be populated before interestingness can be computed. Run Executor.execute(view,ldf)."
        )

    n_dim = 0
    n_msr = 0

    filter_specs = utils.get_filter_specs(view.spec_lst)
    view_attrs_specs = utils.get_attrs_specs(view.spec_lst)

    for spec in view_attrs_specs:
        if (spec.attribute != "Record"):
            if (spec.data_model == 'dimension'):
                n_dim += 1
            if (spec.data_model == 'measure'):
                n_msr += 1
    n_filter = len(filter_specs)
    attr_specs = [
        spec for spec in view_attrs_specs if spec.attribute != "Record"
    ]
    dimension_lst = view.get_attr_by_data_model("dimension")
    measure_lst = view.get_attr_by_data_model("measure")

    # Bar Chart
    if (n_dim == 1 and (n_msr == 0 or n_msr == 1)):
        if (n_filter == 0):
            return unevenness(view, ldf, measure_lst, dimension_lst)
        elif (n_filter == 1):
            return deviation_from_overall(view, ldf, filter_specs,
                                          measure_lst[0].attribute)
    # Histogram
    elif (n_dim == 0 and n_msr == 1):
        if (n_filter == 0):
            v = view.data["Count of Records"]
            return skewness(v)
        elif (n_filter == 1):
            return deviation_from_overall(view, ldf, filter_specs,
                                          "Count of Records")
    # Scatter Plot
    elif (n_dim == 0 and n_msr == 2):
        if (n_filter == 1):
            v_filter_size = get_filtered_size(filter_specs, view.data)
            v_size = len(view.data)
            sig = v_filter_size / v_size
        else:
            sig = 1
        return sig * monotonicity(view, attr_specs)
    # Scatterplot colored by Dimension
    elif (n_dim == 1 and n_msr == 2):
        color_attr = view.get_attr_by_channel("color")[0].attribute

        C = ldf.cardinality[color_attr]
        if (C < 40):
            return 1 / C
        else:
            return -1
    # Scatterplot colored by dimension
    elif (n_dim == 1 and n_msr == 2):
        return 0.2
    # Scatterplot colored by measure
    elif (n_msr == 3):
        return 0.1
    # Default
    else:
        return -1
示例#12
0
def test_remove():
    from lux.view.View import View
    df = pd.read_csv("lux/data/car.csv")
    view = View(["Horsepower", "Horsepower"])
    view.load(df)
    view.remove_column_from_spec_new("Horsepower", remove_first=False)
    assert (view.spec_lst == []), "Remove all instances of Horsepower"

    df = pd.read_csv("lux/data/car.csv")
    view = View(["Horsepower", "Horsepower"])
    view.load(df)
    view.remove_column_from_spec_new("Horsepower", remove_first=True)
    assert (len(view.spec_lst) == 1), "Remove only 1 instances of Horsepower"
    assert (view.spec_lst[0].attribute == "Horsepower"
            ), "Remove only 1 instances of Horsepower"
示例#13
0
def filter(ldf):
    #for benchmarking
    if ldf.toggle_benchmarking == True:
        tic = time.perf_counter()
    '''
	Iterates over all possible values of a categorical variable and generates visualizations where each categorical value filters the data.

	Parameters
	----------
	ldf : lux.luxDataFrame.LuxDataFrame
		LuxDataFrame with underspecified context.

	Returns
	-------
	recommendations : Dict[str,obj]
		object with a collection of visualizations that result from the Filter action.
	'''
    recommendation = {
        "action":
        "Filter",
        "description":
        "Shows possible visualizations when filtered by categorical variables in the dataset."
    }
    filters = utils.get_filter_specs(ldf.context)
    filter_values = []
    output = []
    #if Row is specified, create visualizations where data is filtered by all values of the Row's categorical variable
    column_spec = utils.get_attrs_specs(ldf.current_view[0].spec_lst)
    column_spec_attr = map(lambda x: x.attribute, column_spec)
    if len(filters) > 0:
        #get unique values for all categorical values specified and creates corresponding filters
        for row in filters:
            unique_values = ldf.unique_values[row.attribute]
            filter_values.append(row.value)
            #creates views with new filters
            for val in unique_values:
                if val not in filter_values:
                    new_spec = column_spec.copy()
                    new_filter = lux.Spec(attribute=row.attribute, value=val)
                    new_spec.append(new_filter)
                    temp_view = View(new_spec)
                    output.append(temp_view)
    else:  #if no existing filters, create filters using unique values from all categorical variables in the dataset
        categorical_vars = []
        for col in list(ldf.columns):
            # if cardinality is not too high, and attribute is not one of the X,Y (specified) column
            if ldf.cardinality[col] < 40 and col not in column_spec_attr:
                categorical_vars.append(col)
        for cat in categorical_vars:
            unique_values = ldf.unique_values[cat]
            for i in range(0, len(unique_values)):
                new_spec = column_spec.copy()
                new_filter = lux.Spec(attribute=cat,
                                      filter_op="=",
                                      value=unique_values[i])
                new_spec.append(new_filter)
                temp_view = View(new_spec)
                output.append(temp_view)
    vc = lux.view.ViewCollection.ViewCollection(output)
    vc = vc.load(ldf)
    for view in vc:
        view.score = interestingness(view, ldf)
    vc = vc.topK(15)
    recommendation["collection"] = vc

    #for benchmarking
    if ldf.toggle_benchmarking == True:
        toc = time.perf_counter()
        print(f"Performed filter action in {toc - tic:0.4f} seconds")
    return recommendation
示例#14
0
def generalize(ldf):
	#for benchmarking
	if ldf.toggleBenchmarking == True:
		tic = time.perf_counter()
	'''
	Generates all possible visualizations when one attribute or filter from the current view is removed.

	Parameters
	----------
	ldf : lux.luxDataFrame.LuxDataFrame
		LuxDataFrame with underspecified context.

	Returns
	-------
	recommendations : Dict[str,obj]
		object with a collection of visualizations that result from the Generalize action.
	'''
	# takes in a dataObject and generates a list of new dataObjects, each with a single measure from the original object removed
	# -->  return list of dataObjects with corresponding interestingness scores

	recommendation = {"action":"Generalize",
						   "description":"Remove one attribute or filter to observe a more general trend."}
	output = []
	excludedColumns = []
	columnSpec = list(filter(lambda x: x.value=="" and x.attribute!="Record", ldf.context))
	rowSpecs = utils.getFilterSpecs(ldf.context)
	# if we do no have enough column attributes or too many, return no views.
	if(len(columnSpec)<2 or len(columnSpec)>4):
		recommendation["collection"] = []
		return recommendation
	for spec in columnSpec:
		columns = spec.attribute
		if type(columns) == list:
			for column in columns:
				if column not in excludedColumns:
					tempView = View(ldf.context)
					tempView.removeColumnFromSpecNew(column)
					excludedColumns.append(column)
					output.append(tempView)
		elif type(columns) == str:
			if columns not in excludedColumns:
				tempView = View(ldf.context)
				tempView.removeColumnFromSpecNew(columns)
				excludedColumns.append(columns)
		output.append(tempView)
	for i, spec in enumerate(rowSpecs):
		newSpec = ldf.context.copy()
		newSpec.pop(i)
		tempView = View(newSpec)
		output.append(tempView)
		
	vc = lux.view.ViewCollection.ViewCollection(output)
	vc = vc.load(ldf)
	recommendation["collection"] = vc
	for view in vc:
		view.score = interestingness(view,ldf)
	vc.sort(removeInvalid=True)
	#for benchmarking
	if ldf.toggleBenchmarking == True:
		toc = time.perf_counter()
		print(f"Performed generalize action in {toc - tic:0.4f} seconds")
	return recommendation
示例#15
0
文件: Compiler.py 项目: jaywoo123/lux
    def determineEncoding(ldf: LuxDataFrame, view: View):
        '''
		Populates View with the appropriate mark type and channel information based on ShowMe logic
		Currently support up to 3 dimensions or measures
		
		Parameters
		----------
		ldf : lux.luxDataFrame.LuxDataFrame
			LuxDataFrame with underspecified context
		view : lux.view.View

		Returns
		-------
		None

		Notes
		-----
		Implementing automatic encoding from Tableau's VizQL
		Mackinlay, J. D., Hanrahan, P., & Stolte, C. (2007).
		Show Me: Automatic presentation for visual analysis.
		IEEE Transactions on Visualization and Computer Graphics, 13(6), 1137–1144.
		https://doi.org/10.1109/TVCG.2007.70594
		'''
        # Count number of measures and dimensions
        Ndim = 0
        Nmsr = 0
        filters = []
        for spec in view.specLst:
            if (spec.value == ""):
                if (spec.dataModel == "dimension"):
                    Ndim += 1
                elif (spec.dataModel == "measure"
                      and spec.attribute != "Record"):
                    Nmsr += 1
            else:  # preserve to add back to specLst later
                filters.append(spec)
        # Helper function (TODO: Move this into utils)
        def lineOrBar(ldf, dimension, measure):
            dimType = dimension.dataType
            # If no aggregation function is specified, then default as average
            if (measure.aggregation == ""):
                measure.aggregation = "mean"
            if (dimType == "temporal" or dimType == "oridinal"):
                return "line", {"x": dimension, "y": measure}
            else:  # unordered categorical
                # if cardinality large than 5 then sort bars
                if ldf.cardinality[dimension.attribute] > 5:
                    dimension.sort = "ascending"
                return "bar", {"x": measure, "y": dimension}

        # ShowMe logic + additional heuristics
        #countCol = Spec( attribute="count()", dataModel="measure")
        countCol = Spec(attribute="Record",
                        aggregation="count",
                        dataModel="measure",
                        dataType="quantitative")
        # xAttr = view.getAttrByChannel("x") # not used as of now
        # yAttr = view.getAttrByChannel("y")
        # zAttr = view.getAttrByChannel("z")
        autoChannel = {}
        if (Ndim == 0 and Nmsr == 1):
            # Histogram with Count
            measure = view.getAttrByDataModel("measure", excludeRecord=True)[0]
            if (len(view.getAttrByAttrName("Record")) < 0):
                view.specLst.append(countCol)
            # If no bin specified, then default as 10
            if (measure.binSize == 0):
                measure.binSize = 10
            autoChannel = {"x": measure, "y": countCol}
            view.xMinMax = ldf.xMinMax
            view.mark = "histogram"
        elif (Ndim == 1 and (Nmsr == 0 or Nmsr == 1)):
            # Line or Bar Chart
            if (Nmsr == 0):
                view.specLst.append(countCol)
            dimension = view.getAttrByDataModel("dimension")[0]
            measure = view.getAttrByDataModel("measure")[0]
            view.mark, autoChannel = lineOrBar(ldf, dimension, measure)
        elif (Ndim == 2 and (Nmsr == 0 or Nmsr == 1)):
            # Line or Bar chart broken down by the dimension
            dimensions = view.getAttrByDataModel("dimension")
            d1 = dimensions[0]
            d2 = dimensions[1]
            if (ldf.cardinality[d1.attribute] < ldf.cardinality[d2.attribute]):
                # d1.channel = "color"
                view.removeColumnFromSpec(d1.attribute)
                dimension = d2
                colorAttr = d1
            else:
                if (d1.attribute == d2.attribute):
                    view.specLst.pop(
                        0
                    )  # if same attribute then removeColumnFromSpec will remove both dims, we only want to remove one
                else:
                    view.removeColumnFromSpec(d2.attribute)
                dimension = d1
                colorAttr = d2
            # Colored Bar/Line chart with Count as default measure
            if (Nmsr == 0):
                view.specLst.append(countCol)
            measure = view.getAttrByDataModel("measure")[0]
            view.mark, autoChannel = lineOrBar(ldf, dimension, measure)
            autoChannel["color"] = colorAttr
        elif (Ndim == 0 and Nmsr == 2):
            # Scatterplot
            view.xMinMax = ldf.xMinMax
            view.yMinMax = ldf.yMinMax
            view.mark = "scatter"
            autoChannel = {"x": view.specLst[0], "y": view.specLst[1]}
        elif (Ndim == 1 and Nmsr == 2):
            # Scatterplot broken down by the dimension
            measure = view.getAttrByDataModel("measure")
            m1 = measure[0]
            m2 = measure[1]

            colorAttr = view.getAttrByDataModel("dimension")[0]
            view.removeColumnFromSpec(colorAttr)
            view.xMinMax = ldf.xMinMax
            view.yMinMax = ldf.yMinMax
            view.mark = "scatter"
            autoChannel = {"x": m1, "y": m2, "color": colorAttr}
        elif (Ndim == 0 and Nmsr == 3):
            # Scatterplot with color
            view.xMinMax = ldf.xMinMax
            view.yMinMax = ldf.yMinMax
            view.mark = "scatter"
            autoChannel = {
                "x": view.specLst[0],
                "y": view.specLst[1],
                "color": view.specLst[2]
            }
        if (autoChannel != {}):
            view = Compiler.enforceSpecifiedChannel(view, autoChannel)
            view.specLst.extend(filters)  # add back the preserved filters