def test_sort_bar(): from lux.compiler.Compiler import Compiler from lux.view.View import View df = pd.read_csv("lux/data/car.csv") view = View([ lux.Spec(attribute="Acceleration", data_model="measure", data_type="quantitative"), lux.Spec(attribute="Origin", data_model="dimension", data_type="nominal") ]) Compiler.determine_encoding(df, view) assert view.mark == "bar" assert view.spec_lst[1].sort == '' df = pd.read_csv("lux/data/car.csv") view = View([ lux.Spec(attribute="Acceleration", data_model="measure", data_type="quantitative"), lux.Spec(attribute="Name", data_model="dimension", data_type="nominal") ]) Compiler.determine_encoding(df, view) assert view.mark == "bar" assert view.spec_lst[1].sort == 'ascending'
def test_remove(): from lux.view.View import View df = pd.read_csv("lux/data/car.csv") view = View(["Horsepower", "Horsepower"]) view.load(df) view.remove_column_from_spec_new("Horsepower", remove_first=False) assert (view.spec_lst == []), "Remove all instances of Horsepower" df = pd.read_csv("lux/data/car.csv") view = View(["Horsepower", "Horsepower"]) view.load(df) view.remove_column_from_spec_new("Horsepower", remove_first=True) assert (len(view.spec_lst) == 1), "Remove only 1 instances of Horsepower" assert (view.spec_lst[0].attribute == "Horsepower" ), "Remove only 1 instances of Horsepower"
def test_vary_filter_val(): df = pd.read_csv("lux/data/olympic.csv") view = View(["Height", "SportType=Ball"]) view = view.load(df) df.set_context_as_view(view) df.show_more() assert len( df.recommendation["Filter"]) == len(df["SportType"].unique()) - 1
def combine(colAttrs, accum): last = (len(colAttrs) == 1) n = len(colAttrs[0]) for i in range(n): columnList = copy.deepcopy(accum + [colAttrs[0][i]]) if last: if len( filters ) > 0: # if we have filters, generate combinations for each row. for row in filters: specLst = copy.deepcopy(columnList + [row]) view = View( specLst, title= f"{row.attribute} {row.filterOp} {row.value}") collection.append(view) else: view = View(columnList) collection.append(view) else: combine(colAttrs[1:], columnList)
def test_refresh_inplace(): df = pd.DataFrame({ 'date': ['2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01'], 'value': [10.5, 15.2, 20.3, 25.2] }) assert df.data_type['nominal'][0] == 'date' from lux.view.View import View view = View(["date", "value"]) view.load(df) df['date'] = pd.to_datetime(df['date'], format="%Y-%m-%d") assert df.data_type['temporal'][0] == 'date'
def filter(ldf): #for benchmarking if ldf.toggle_benchmarking == True: tic = time.perf_counter() ''' Iterates over all possible values of a categorical variable and generates visualizations where each categorical value filters the data. Parameters ---------- ldf : lux.luxDataFrame.LuxDataFrame LuxDataFrame with underspecified context. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Filter action. ''' recommendation = { "action": "Filter", "description": "Shows possible visualizations when filtered by categorical variables in the dataset." } filters = utils.get_filter_specs(ldf.context) filter_values = [] output = [] #if Row is specified, create visualizations where data is filtered by all values of the Row's categorical variable column_spec = utils.get_attrs_specs(ldf.current_view[0].spec_lst) column_spec_attr = map(lambda x: x.attribute, column_spec) if len(filters) > 0: #get unique values for all categorical values specified and creates corresponding filters for row in filters: unique_values = ldf.unique_values[row.attribute] filter_values.append(row.value) #creates views with new filters for val in unique_values: if val not in filter_values: new_spec = column_spec.copy() new_filter = lux.Spec(attribute=row.attribute, value=val) new_spec.append(new_filter) temp_view = View(new_spec) output.append(temp_view) else: #if no existing filters, create filters using unique values from all categorical variables in the dataset categorical_vars = [] for col in list(ldf.columns): # if cardinality is not too high, and attribute is not one of the X,Y (specified) column if ldf.cardinality[col] < 40 and col not in column_spec_attr: categorical_vars.append(col) for cat in categorical_vars: unique_values = ldf.unique_values[cat] for i in range(0, len(unique_values)): new_spec = column_spec.copy() new_filter = lux.Spec(attribute=cat, filter_op="=", value=unique_values[i]) new_spec.append(new_filter) temp_view = View(new_spec) output.append(temp_view) vc = lux.view.ViewCollection.ViewCollection(output) vc = vc.load(ldf) for view in vc: view.score = interestingness(view, ldf) vc = vc.topK(15) recommendation["collection"] = vc #for benchmarking if ldf.toggle_benchmarking == True: toc = time.perf_counter() print(f"Performed filter action in {toc - tic:0.4f} seconds") return recommendation
def generalize(ldf): #for benchmarking if ldf.toggleBenchmarking == True: tic = time.perf_counter() ''' Generates all possible visualizations when one attribute or filter from the current view is removed. Parameters ---------- ldf : lux.luxDataFrame.LuxDataFrame LuxDataFrame with underspecified context. Returns ------- recommendations : Dict[str,obj] object with a collection of visualizations that result from the Generalize action. ''' # takes in a dataObject and generates a list of new dataObjects, each with a single measure from the original object removed # --> return list of dataObjects with corresponding interestingness scores recommendation = {"action":"Generalize", "description":"Remove one attribute or filter to observe a more general trend."} output = [] excludedColumns = [] columnSpec = list(filter(lambda x: x.value=="" and x.attribute!="Record", ldf.context)) rowSpecs = utils.getFilterSpecs(ldf.context) # if we do no have enough column attributes or too many, return no views. if(len(columnSpec)<2 or len(columnSpec)>4): recommendation["collection"] = [] return recommendation for spec in columnSpec: columns = spec.attribute if type(columns) == list: for column in columns: if column not in excludedColumns: tempView = View(ldf.context) tempView.removeColumnFromSpecNew(column) excludedColumns.append(column) output.append(tempView) elif type(columns) == str: if columns not in excludedColumns: tempView = View(ldf.context) tempView.removeColumnFromSpecNew(columns) excludedColumns.append(columns) output.append(tempView) for i, spec in enumerate(rowSpecs): newSpec = ldf.context.copy() newSpec.pop(i) tempView = View(newSpec) output.append(tempView) vc = lux.view.ViewCollection.ViewCollection(output) vc = vc.load(ldf) recommendation["collection"] = vc for view in vc: view.score = interestingness(view,ldf) vc.sort(removeInvalid=True) #for benchmarking if ldf.toggleBenchmarking == True: toc = time.perf_counter() print(f"Performed generalize action in {toc - tic:0.4f} seconds") return recommendation