def test_vis_private_properties(): from lux.vis.Vis import Vis df = pd.read_csv("lux/data/car.csv") vis = Vis(["Horsepower", "Weight"], df) vis._repr_html_() assert isinstance(vis.data, lux.core.frame.LuxDataFrame) with pytest.raises(AttributeError, match="can't set attribute"): vis.data = "some val" assert isinstance(vis.code, dict) with pytest.raises(AttributeError, match="can't set attribute"): vis.code = "some val" assert isinstance(vis.min_max, dict) with pytest.raises(AttributeError, match="can't set attribute"): vis.min_max = "some val" assert vis.mark == "scatter" with pytest.raises(AttributeError, match="can't set attribute"): vis.mark = "some val"
def test_vis_private_properties(global_var): from lux.vis.Vis import Vis df = pytest.car_df vis = Vis(["Horsepower", "Weight"], df) vis._ipython_display_() assert isinstance(vis.data, lux.core.frame.LuxDataFrame) with pytest.raises(AttributeError, match="can't set attribute"): vis.data = "some val" assert isinstance(vis.code, dict) with pytest.raises(AttributeError, match="can't set attribute"): vis.code = "some val" assert isinstance(vis.min_max, dict) with pytest.raises(AttributeError, match="can't set attribute"): vis.min_max = "some val" assert vis.mark == "scatter" with pytest.raises(AttributeError, match="can't set attribute"): vis.mark = "some val"
def determine_encoding(ldf: LuxDataFrame, vis: Vis): ''' Populates Vis with the appropriate mark type and channel information based on ShowMe logic Currently support up to 3 dimensions or measures Parameters ---------- ldf : lux.luxDataFrame.LuxDataFrame LuxDataFrame with underspecified intent vis : lux.vis.Vis Returns ------- None Notes ----- Implementing automatic encoding from Tableau's VizQL Mackinlay, J. D., Hanrahan, P., & Stolte, C. (2007). Show Me: Automatic presentation for visual analysis. IEEE Transactions on Visualization and Computer Graphics, 13(6), 1137–1144. https://doi.org/10.1109/TVCG.2007.70594 ''' # Count number of measures and dimensions ndim = 0 nmsr = 0 filters = [] for clause in vis._inferred_intent: if (clause.value == ""): if (clause.data_model == "dimension"): ndim += 1 elif (clause.data_model == "measure" and clause.attribute != "Record"): nmsr += 1 else: # preserve to add back to _inferred_intent later filters.append(clause) # Helper function (TODO: Move this into utils) def line_or_bar(ldf, dimension: Clause, measure: Clause): dim_type = dimension.data_type # If no aggregation function is specified, then default as average if (measure.aggregation == ""): measure.set_aggregation("mean") if (dim_type == "temporal" or dim_type == "oridinal"): return "line", {"x": dimension, "y": measure} else: # unordered categorical # if cardinality large than 5 then sort bars if ldf.cardinality[dimension.attribute] > 5: dimension.sort = "ascending" return "bar", {"x": measure, "y": dimension} # ShowMe logic + additional heuristics #count_col = Clause( attribute="count()", data_model="measure") count_col = Clause(attribute="Record", aggregation="count", data_model="measure", data_type="quantitative") auto_channel = {} if (ndim == 0 and nmsr == 1): # Histogram with Count measure = vis.get_attr_by_data_model("measure", exclude_record=True)[0] if (len(vis.get_attr_by_attr_name("Record")) < 0): vis._inferred_intent.append(count_col) # If no bin specified, then default as 10 if (measure.bin_size == 0): measure.bin_size = 10 auto_channel = {"x": measure, "y": count_col} vis.mark = "histogram" elif (ndim == 1 and (nmsr == 0 or nmsr == 1)): # Line or Bar Chart if (nmsr == 0): vis._inferred_intent.append(count_col) dimension = vis.get_attr_by_data_model("dimension")[0] measure = vis.get_attr_by_data_model("measure")[0] vis.mark, auto_channel = line_or_bar(ldf, dimension, measure) elif (ndim == 2 and (nmsr == 0 or nmsr == 1)): # Line or Bar chart broken down by the dimension dimensions = vis.get_attr_by_data_model("dimension") d1 = dimensions[0] d2 = dimensions[1] if (ldf.cardinality[d1.attribute] < ldf.cardinality[d2.attribute]): # d1.channel = "color" vis.remove_column_from_spec(d1.attribute) dimension = d2 color_attr = d1 else: if (d1.attribute == d2.attribute): vis._inferred_intent.pop( 0 ) # if same attribute then remove_column_from_spec will remove both dims, we only want to remove one else: vis.remove_column_from_spec(d2.attribute) dimension = d1 color_attr = d2 # Colored Bar/Line chart with Count as default measure if (nmsr == 0): vis._inferred_intent.append(count_col) measure = vis.get_attr_by_data_model("measure")[0] vis.mark, auto_channel = line_or_bar(ldf, dimension, measure) auto_channel["color"] = color_attr elif (ndim == 0 and nmsr == 2): # Scatterplot vis.mark = "scatter" vis._inferred_intent[0].set_aggregation(None) vis._inferred_intent[1].set_aggregation(None) auto_channel = { "x": vis._inferred_intent[0], "y": vis._inferred_intent[1] } elif (ndim == 1 and nmsr == 2): # Scatterplot broken down by the dimension measure = vis.get_attr_by_data_model("measure") m1 = measure[0] m2 = measure[1] vis._inferred_intent[0].set_aggregation(None) vis._inferred_intent[1].set_aggregation(None) color_attr = vis.get_attr_by_data_model("dimension")[0] vis.remove_column_from_spec(color_attr) vis.mark = "scatter" auto_channel = {"x": m1, "y": m2, "color": color_attr} elif (ndim == 0 and nmsr == 3): # Scatterplot with color vis.mark = "scatter" auto_channel = { "x": vis._inferred_intent[0], "y": vis._inferred_intent[1], "color": vis._inferred_intent[2] } relevant_attributes = [ auto_channel[channel].attribute for channel in auto_channel ] relevant_min_max = dict((attr, ldf.min_max[attr]) for attr in relevant_attributes if attr != "Record" and attr in ldf.min_max) vis.min_max = relevant_min_max if (auto_channel != {}): vis = Compiler.enforce_specified_channel(vis, auto_channel) vis._inferred_intent.extend( filters) # add back the preserved filters