def test_vis_collection_set_intent(): df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") vlist = VisList(["Height", "SportType=Ice", "?"], df) vlist.set_intent(["Height", "SportType=Boat", "?"]) for v in vlist._collection: filter_vspec = list(filter(lambda x: x.channel == "", v._inferred_intent))[0] assert filter_vspec.value == "Boat"
def exported(self) -> Union[Dict[str, VisList], VisList]: """ Get selected visualizations as exported Vis List Notes ----- Convert the _exportedVisIdxs dictionary into a programmable VisList Example _exportedVisIdxs : {'Correlation': [0, 2], 'Occurrence': [1]} indicating the 0th and 2nd vis from the `Correlation` tab is selected, and the 1st vis from the `Occurrence` tab is selected. Returns ------- Union[Dict[str,VisList], VisList] When there are no exported vis, return empty list -> [] When all the exported vis is from the same tab, return a VisList of selected visualizations. -> VisList(v1, v2...) When the exported vis is from the different tabs, return a dictionary with the action name as key and selected visualizations in the VisList. -> {"Enhance": VisList(v1, v2...), "Filter": VisList(v5, v7...), ..} """ if not hasattr(self, "_widget"): warnings.warn( "\nNo widget attached to the dataframe." "Please assign dataframe to an output variable.\n" "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", stacklevel=2) return [] exported_vis_lst = self._widget._exportedVisIdxs exported_vis = [] if (exported_vis_lst == {}): warnings.warn( "\nNo visualization selected to export.\n" "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", stacklevel=2) return [] if len(exported_vis_lst) == 1 and "currentVis" in exported_vis_lst: return self.current_vis elif len(exported_vis_lst) > 1: exported_vis = {} if ("currentVis" in exported_vis_lst): exported_vis["Current Vis"] = self.current_vis for export_action in exported_vis_lst: if (export_action != "currentVis"): exported_vis[export_action] = VisList( list( map(self.recommendation[export_action].__getitem__, exported_vis_lst[export_action]))) return exported_vis elif len(exported_vis_lst) == 1 and ("currentVis" not in exported_vis_lst): export_action = list(exported_vis_lst.keys())[0] exported_vis = VisList( list( map(self.recommendation[export_action].__getitem__, exported_vis_lst[export_action]))) return exported_vis else: warnings.warn( "\nNo visualization selected to export.\n" "See more: https://lux-api.readthedocs.io/en/latest/source/guide/FAQ.html#troubleshooting-tips", stacklevel=2) return []
def test_specified_vis_collection(global_var): sql_df = lux.LuxSQLTable(table_name="cars") vlst = VisList( [ lux.Clause(attribute="horsepower"), lux.Clause(attribute="brand"), lux.Clause(attribute="origin", value=["Japan", "USA"]), ], sql_df, ) assert len(vlst) == 2 vlst = VisList( [ lux.Clause(attribute=["horsepower", "weight"]), lux.Clause(attribute="brand"), lux.Clause(attribute="origin", value=["Japan", "USA"]), ], sql_df, ) assert len(vlst) == 4 # test if z axis has been filtered correctly chart_titles = [vis.title for vis in vlst] assert "origin = USA" and "origin = Japan" in chart_titles assert "origin = Europe" not in chart_titles
def test_parse(global_var): lux.config.set_executor_type("Pandas") df = pytest.car_df vlst = VisList( [lux.Clause("Origin=?"), lux.Clause(attribute="MilesPerGal")], df) assert len(vlst) == 3 df = pytest.car_df vlst = VisList([lux.Clause("Origin=?"), lux.Clause("MilesPerGal")], df) assert len(vlst) == 3 df = pd.read_csv("lux/data/car.csv") vlst = VisList([lux.Clause("Origin=?"), lux.Clause("MilesPerGal")], df) assert len(vlst) == 3 connection = psycopg2.connect( "host=localhost dbname=postgres user=postgres password=lux") lux.config.set_SQL_connection(connection) sql_df = lux.LuxSQLTable(table_name="cars") vlst = VisList( [lux.Clause("origin=?"), lux.Clause(attribute="milespergal")], sql_df) assert len(vlst) == 3 connection = psycopg2.connect( "host=localhost dbname=postgres user=postgres password=lux") lux.config.set_SQL_connection(connection) sql_df = lux.LuxSQLTable(table_name="cars") vlst = VisList([lux.Clause("origin=?"), lux.Clause("milespergal")], sql_df) assert len(vlst) == 3
def test_underspecified_vis_collection_zval(global_var): lux.config.set_executor_type("Pandas") # check if the number of charts is correct df = pytest.car_df vlst = VisList( [ lux.Clause(attribute="Origin", filter_op="=", value="?"), lux.Clause(attribute="MilesPerGal"), ], df, ) assert len(vlst) == 3 # does not work # df = pd.read_csv("lux/data/car.csv") # vlst = VisList([lux.Clause(attribute = ["Origin","Cylinders"], filter_op="=",value="?"),lux.Clause(attribute = ["Horsepower"]),lux.Clause(attribute = "Weight")],df) # assert len(vlst) == 8 connection = psycopg2.connect( "host=localhost dbname=postgres user=postgres password=lux") lux.config.set_SQL_connection(connection) sql_df = lux.LuxSQLTable(table_name="cars") vlst = VisList( [ lux.Clause(attribute="origin", filter_op="=", value="?"), lux.Clause(attribute="milespergal"), ], sql_df, ) assert len(vlst) == 3
def test_specified_vis_collection(): url = 'https://github.com/lux-org/lux-datasets/blob/master/data/cars.csv?raw=true' df = pd.read_csv(url) df["Year"] = pd.to_datetime( df["Year"], format='%Y') # change pandas dtype for the column "Year" to datetype vlst = VisList([ lux.Clause(attribute="Horsepower"), lux.Clause(attribute="Brand"), lux.Clause(attribute="Origin", value=["Japan", "USA"]) ], df) assert len(vlst) == 2 vlst = VisList([ lux.Clause(attribute=["Horsepower", "Weight"]), lux.Clause(attribute="Brand"), lux.Clause(attribute="Origin", value=["Japan", "USA"]) ], df) assert len(vlst) == 4 # test if z axis has been filtered correctly chart_titles = [vis.title for vis in vlst] assert "Origin = USA" and "Origin = Japan" in chart_titles assert "Origin = Europe" not in chart_titles
def test_specified_vis_collection(global_var): df = pytest.car_df # change pandas dtype for the column "Year" to datetype df["Year"] = pd.to_datetime(df["Year"], format="%Y") vlst = VisList( [ lux.Clause(attribute="Horsepower"), lux.Clause(attribute="Brand"), lux.Clause(attribute="Origin", value=["Japan", "USA"]), ], df, ) assert len(vlst) == 2 vlst = VisList( [ lux.Clause(attribute=["Horsepower", "Weight"]), lux.Clause(attribute="Brand"), lux.Clause(attribute="Origin", value=["Japan", "USA"]), ], df, ) assert len(vlst) == 4 # test if z axis has been filtered correctly chart_titles = [vis.title for vis in vlst] assert "Origin = USA" and "Origin = Japan" in chart_titles assert "Origin = Europe" not in chart_titles
def test_parse(global_var): sql_df = lux.LuxSQLTable(table_name="cars") vlst = VisList([lux.Clause("origin=?"), lux.Clause(attribute="milespergal")], sql_df) assert len(vlst) == 3 sql_df = lux.LuxSQLTable(table_name="cars") vlst = VisList([lux.Clause("origin=?"), lux.Clause("milespergal")], sql_df) assert len(vlst) == 3
def test_vis_collection_set_intent(global_var): df = pytest.olympic vlist = VisList(["Height", "SportType=Ice", "?"], df) vlist.set_intent(["Height", "SportType=Boat", "?"]) for v in vlist._collection: filter_vspec = list(filter(lambda x: x.channel == "", v._inferred_intent))[0] assert filter_vspec.value == "Boat" df.clear_intent()
def test_vis_collection_set_intent(): df = pd.read_csv("lux/data/olympic.csv") from lux.vis.VisList import VisList vc = VisList(["Height", "SportType=Ice", "?"], df) vc.set_intent(["Height", "SportType=Boat", "?"]) for v in vc.collection: filter_vspec = list( filter(lambda x: x.channel == "", v._inferred_intent))[0] assert filter_vspec.value == "Boat"
def test_vis_collection(): df = pd.read_csv("https://github.com/lux-org/lux-datasets/blob/master/data/olympic.csv?raw=true") vlist = VisList(["Height", "SportType=Ball", "?"], df) vis_with_year = list(filter(lambda x: x.get_attr_by_attr_name("Year") != [], vlist))[0] assert vis_with_year.get_attr_by_channel("x")[0].attribute == "Year" # remove 1 for vis with same filter attribute and remove 1 vis with for same attribute assert len(vlist) == len(df.columns) - 1 - 1 vlist = VisList(["Height", "?"], df) assert len(vlist) == len(df.columns) - 1 # remove 1 for vis with for same attribute
def test_vis_collection(global_var): df = pytest.olympic vlist = VisList(["Height", "SportType=Ball", "?"], df) vis_with_year = list(filter(lambda x: x.get_attr_by_attr_name("Year") != [], vlist))[0] assert vis_with_year.get_attr_by_channel("x")[0].attribute == "Year" # remove 1 for vis with same filter attribute and remove 1 vis with for same attribute assert len(vlist) == len(df.columns) - 1 - 1 vlist = VisList(["Height", "?"], df) assert len(vlist) == len(df.columns) - 1 # remove 1 for vis with for same attribute
def test_parse(): df = pd.read_csv("lux/data/car.csv") vlst = VisList( [lux.Clause("Origin=?"), lux.Clause(attribute="MilesPerGal")], df) assert len(vlst) == 3 df = pd.read_csv("lux/data/car.csv") vlst = VisList([lux.Clause("Origin=?"), lux.Clause("MilesPerGal")], df) assert len(vlst) == 3
def test_parse(global_var): df = pytest.car_df vlst = VisList( [lux.Clause("Origin=?"), lux.Clause(attribute="MilesPerGal")], df) assert len(vlst) == 3 df = pytest.car_df vlst = VisList([lux.Clause("Origin=?"), lux.Clause("MilesPerGal")], df) assert len(vlst) == 3
def random_categorical(ldf): intent = [lux.Clause("?", data_type="nominal")] vlist = VisList(intent, ldf) for vis in vlist: vis.score = 10 vlist = vlist.topK(15) return { "action": "bars", "description": "Random list of Bar charts", "collection": vlist }
def test_vis_collection(): df = pd.read_csv("lux/data/olympic.csv") from lux.vis.VisList import VisList vc = VisList(["Height", "SportType=Ball", "?"], df) vis_with_year = list( filter(lambda x: x.get_attr_by_attr_name("Year") != [], vc))[0] assert vis_with_year.get_attr_by_channel("x")[0].attribute == "Year" assert len(vc) == len( df.columns ) - 1 - 1 #remove 1 for vis with same filter attribute and remove 1 vis with for same attribute vc = VisList(["Height", "?"], df) assert len(vc) == len( df.columns) - 1 #remove 1 for vis with for same attribute
def test_parse(global_var): lux.config.set_executor_type("Pandas") df = pytest.car_df vlst = VisList( [lux.Clause("Origin=?"), lux.Clause(attribute="MilesPerGal")], df) assert len(vlst) == 3 df = pytest.car_df vlst = VisList([lux.Clause("Origin=?"), lux.Clause("MilesPerGal")], df) assert len(vlst) == 3 df = pd.read_csv("lux/data/car.csv") vlst = VisList([lux.Clause("Origin=?"), lux.Clause("MilesPerGal")], df) assert len(vlst) == 3
def remove_all_invalid(vis_collection: VisList) -> VisList: """ Given an expanded vis list, remove all visualizations that are invalid. Currently, the invalid visualizations are ones that do not contain: - two of the same attribute, - more than two temporal attributes, - no overlapping attributes (same filter attribute and visualized attribute), - more than 1 temporal attribute with 2 or more measures Parameters ---------- vis_collection : list[lux.vis.Vis] empty list that will be populated with specified lux.Vis objects. Returns ------- lux.vis.VisList vis list with compiled lux.Vis objects. """ new_vc = [] for vis in vis_collection: num_temporal_specs = 0 attribute_set = set() for clause in vis._inferred_intent: attribute_set.add(clause.attribute) if clause.data_type == "temporal": num_temporal_specs += 1 all_distinct_specs = 0 == len( vis._inferred_intent) - len(attribute_set) if (num_temporal_specs < 2 and all_distinct_specs and not (vis._nmsr == 2 and num_temporal_specs == 1)): new_vc.append(vis) # else: # warnings.warn("\nThere is more than one duplicate attribute specified in the intent.\nPlease check your intent specification again.") return VisList(new_vc)
def get_graphing_code(self, change): import inspect full_screen_vis_idx = self._widget.selectedFullScreenIndex full_screen_action = list(self._widget.selectedFullScreenIndex.keys())[0] # Using visList to support eventual full view display of multiple graphs full_screen_vis = VisList( list( map( self._recommendation[full_screen_action].__getitem__, full_screen_vis_idx[full_screen_action], ) ) ) if lux.config.plotting_backend == "vegalite": self._widget.unobserve(self.apply_full_view_changes) self._widget.visGraphCode = ( "import pandas as pd\n" + full_screen_vis[0].get_Altair_vis_code() ) self._widget.visStyleCode = lux.config.plotting_style_code self._widget.observe(self.apply_full_view_changes, names="visGraphCode") self._widget.observe(self.apply_full_view_changes, names="visStyleCode") self._widget.observe(self.change_style_config, names="configPlottingStyle") else: self._widget.visGraphCode = ( "# " + lux.config.plotting_backend + " not supported in full screen yet" ) self._widget.visStyleCode = ""
def column_group(ldf): recommendation = { "action": "Column Groups", "description": "Shows charts of possible visualizations with respect to the column-wise index." } collection = [] ldf_flat = ldf if isinstance(ldf.columns, pd.DatetimeIndex): ldf_flat.columns = ldf_flat.columns.format() ldf_flat = ldf_flat.reset_index( ) #use a single shared ldf_flat so that metadata doesn't need to be computed for every vis if (ldf.index.nlevels == 1): index_column_name = ldf.index.name if isinstance(ldf.columns, pd.DatetimeIndex): ldf.columns = ldf.columns.to_native_types() for attribute in ldf.columns: vis = Vis([ index_column_name, lux.Clause(str(attribute), aggregation=None) ], ldf_flat) collection.append(vis) vlst = VisList(collection) # Note that we are not computing interestingness score here because we want to preserve the arrangement of the aggregated ldf recommendation["collection"] = vlst return recommendation
def row_group(ldf): recommendation = { "action": "Row Groups", "description": "Shows charts of possible visualizations with respect to the row-wise index.", } collection = [] if ldf.index.nlevels == 1: if ldf.columns.name is not None: dim_name = ldf.columns.name else: dim_name = "index" for row_id in range(len(ldf)): row = ldf.iloc[row_id, ] rowdf = row.reset_index() # if (dim_name =="index"): #TODO: need to change this to auto-detect # rowdf.data_type_lookup["index"]="nominal" # rowdf.data_model_lookup["index"]="dimension" # rowdf.cardinality["index"]=len(rowdf) # if isinstance(ldf.columns,pd.DatetimeIndex): # rowdf.data_type_lookup[dim_name]="temporal" vis = Vis([ dim_name, lux.Clause(row.name, data_model="measure", aggregation=None) ], rowdf) collection.append(vis) vlst = VisList(collection) # Note that we are not computing interestingness score here because we want to preserve the arrangement of the aggregated data recommendation["collection"] = vlst return recommendation
def show_all_column_vis(self): if len(self.columns) > 1 and len( self.columns) < 4 and self.intent == [] or self.intent is None: vis = Vis(list(self.columns), self) if vis.mark != "": vis._all_column = True self.current_vis = VisList([vis])
def remove_all_invalid(vis_collection: VisList) -> VisList: """ Given an expanded vis list, remove all visualizations that are invalid. Currently, the invalid visualizations are ones that contain two of the same attribute, no more than two temporal attributes, or overlapping attributes (same filter attribute and visualized attribute). Parameters ---------- vis_collection : list[lux.vis.Vis] empty list that will be populated with specified lux.Vis objects. Returns ------- lux.vis.VisList vis list with compiled lux.Vis objects. """ new_vc = [] for vis in vis_collection: num_temporal_specs = 0 attribute_set = set() for clause in vis._inferred_intent: attribute_set.add(clause.attribute) if clause.data_type == "temporal": num_temporal_specs += 1 all_distinct_specs = 0 == len( vis._inferred_intent) - len(attribute_set) if num_temporal_specs < 2 and all_distinct_specs: new_vc.append(vis) return VisList(new_vc)
def test_underspecified_vis_collection_zval(): # check if the number of charts is correct df = pd.read_csv("lux/data/car.csv") vlst = VisList([ lux.Clause(attribute="Origin", filter_op="=", value="?"), lux.Clause(attribute="MilesPerGal") ], df) assert len(vlst) == 3
def test_specified_channel_enforced_vis_collection(global_var): sql_df = lux.LuxSQLTable(table_name="cars") visList = VisList( [lux.Clause(attribute="?"), lux.Clause(attribute="milespergal", channel="x")], sql_df, ) for vis in visList: check_attribute_on_channel(vis, "milespergal", "x")
def test_underspecified_vis_collection_zval(global_var): sql_df = lux.LuxSQLTable(table_name="cars") vlst = VisList( [ lux.Clause(attribute="origin", filter_op="=", value="?"), lux.Clause(attribute="milespergal"), ], sql_df, ) assert len(vlst) == 3
def test_autoencoding_scatter(global_var): lux.config.set_executor_type("Pandas") # No channel specified df = pytest.car_df # change pandas dtype for the column "Year" to datetype df["Year"] = pd.to_datetime(df["Year"], format="%Y") vis = Vis( [lux.Clause(attribute="MilesPerGal"), lux.Clause(attribute="Weight")], df) check_attribute_on_channel(vis, "MilesPerGal", "x") check_attribute_on_channel(vis, "Weight", "y") # Partial channel specified vis = Vis( [ lux.Clause(attribute="MilesPerGal", channel="y"), lux.Clause(attribute="Weight"), ], df, ) check_attribute_on_channel(vis, "MilesPerGal", "y") check_attribute_on_channel(vis, "Weight", "x") # Full channel specified vis = Vis( [ lux.Clause(attribute="MilesPerGal", channel="y"), lux.Clause(attribute="Weight", channel="x"), ], df, ) check_attribute_on_channel(vis, "MilesPerGal", "y") check_attribute_on_channel(vis, "Weight", "x") # Duplicate channel specified with pytest.raises(ValueError): # Should throw error because there should not be columns with the same channel specified df.set_intent([ lux.Clause(attribute="MilesPerGal", channel="x"), lux.Clause(attribute="Weight", channel="x"), ]) df.clear_intent() connection = psycopg2.connect( "host=localhost dbname=postgres user=postgres password=lux") lux.config.set_SQL_connection(connection) sql_df = lux.LuxSQLTable(table_name="cars") visList = VisList( [ lux.Clause(attribute="?"), lux.Clause(attribute="milespergal", channel="x") ], sql_df, ) for vis in visList: check_attribute_on_channel(vis, "milespergal", "x")
def test_vis_list_set_intent(): from lux.vis.VisList import VisList df = pd.read_csv("lux/data/car.csv") vislist = VisList(["Horsepower","?"],df) vislist._repr_html_() for vis in vislist: assert vis.get_attr_by_attr_name("Horsepower")!=[] vislist.intent = ["Weight","?"] vislist._repr_html_() for vis in vislist: assert vis.get_attr_by_attr_name("Weight")!=[]
def test_underspecified_vis_collection_zval(global_var): # check if the number of charts is correct df = pytest.car_df vlst = VisList( [ lux.Clause(attribute="Origin", filter_op="=", value="?"), lux.Clause(attribute="MilesPerGal"), ], df, ) assert len(vlst) == 3
def test_selection(): tbl = lux.LuxSQLTable() tbl.set_SQL_table("cars") intent = [ lux.Clause(attribute=["horsepower", "weight", "acceleration"]), lux.Clause(attribute="year"), ] vislist = VisList(intent, tbl) assert all([type(vis.data) == lux.core.frame.LuxDataFrame for vis in vislist]) assert all(vislist[2].data.columns == ["year", "acceleration"])