Пример #1
0
def test_sort_bar():
    from lux.compiler.Compiler import Compiler
    from lux.view.View import View
    df = pd.read_csv("lux/data/car.csv")
    view = View([
        lux.Spec(attribute="Acceleration",
                 data_model="measure",
                 data_type="quantitative"),
        lux.Spec(attribute="Origin",
                 data_model="dimension",
                 data_type="nominal")
    ])
    Compiler.determine_encoding(df, view)
    assert view.mark == "bar"
    assert view.spec_lst[1].sort == ''

    df = pd.read_csv("lux/data/car.csv")
    view = View([
        lux.Spec(attribute="Acceleration",
                 data_model="measure",
                 data_type="quantitative"),
        lux.Spec(attribute="Name", data_model="dimension", data_type="nominal")
    ])
    Compiler.determine_encoding(df, view)
    assert view.mark == "bar"
    assert view.spec_lst[1].sort == 'ascending'
Пример #2
0
def test_remove():
    from lux.view.View import View
    df = pd.read_csv("lux/data/car.csv")
    view = View(["Horsepower", "Horsepower"])
    view.load(df)
    view.remove_column_from_spec_new("Horsepower", remove_first=False)
    assert (view.spec_lst == []), "Remove all instances of Horsepower"

    df = pd.read_csv("lux/data/car.csv")
    view = View(["Horsepower", "Horsepower"])
    view.load(df)
    view.remove_column_from_spec_new("Horsepower", remove_first=True)
    assert (len(view.spec_lst) == 1), "Remove only 1 instances of Horsepower"
    assert (view.spec_lst[0].attribute == "Horsepower"
            ), "Remove only 1 instances of Horsepower"
Пример #3
0
def test_vary_filter_val():
    df = pd.read_csv("lux/data/olympic.csv")
    view = View(["Height", "SportType=Ball"])
    view = view.load(df)
    df.set_context_as_view(view)
    df.show_more()
    assert len(
        df.recommendation["Filter"]) == len(df["SportType"].unique()) - 1
Пример #4
0
 def combine(colAttrs, accum):
     last = (len(colAttrs) == 1)
     n = len(colAttrs[0])
     for i in range(n):
         columnList = copy.deepcopy(accum + [colAttrs[0][i]])
         if last:
             if len(
                     filters
             ) > 0:  # if we have filters, generate combinations for each row.
                 for row in filters:
                     specLst = copy.deepcopy(columnList + [row])
                     view = View(
                         specLst,
                         title=
                         f"{row.attribute} {row.filterOp} {row.value}")
                     collection.append(view)
             else:
                 view = View(columnList)
                 collection.append(view)
         else:
             combine(colAttrs[1:], columnList)
Пример #5
0
def test_refresh_inplace():
    df = pd.DataFrame({
        'date': ['2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01'],
        'value': [10.5, 15.2, 20.3, 25.2]
    })

    assert df.data_type['nominal'][0] == 'date'

    from lux.view.View import View
    view = View(["date", "value"])
    view.load(df)

    df['date'] = pd.to_datetime(df['date'], format="%Y-%m-%d")

    assert df.data_type['temporal'][0] == 'date'
Пример #6
0
def filter(ldf):
    #for benchmarking
    if ldf.toggle_benchmarking == True:
        tic = time.perf_counter()
    '''
	Iterates over all possible values of a categorical variable and generates visualizations where each categorical value filters the data.

	Parameters
	----------
	ldf : lux.luxDataFrame.LuxDataFrame
		LuxDataFrame with underspecified context.

	Returns
	-------
	recommendations : Dict[str,obj]
		object with a collection of visualizations that result from the Filter action.
	'''
    recommendation = {
        "action":
        "Filter",
        "description":
        "Shows possible visualizations when filtered by categorical variables in the dataset."
    }
    filters = utils.get_filter_specs(ldf.context)
    filter_values = []
    output = []
    #if Row is specified, create visualizations where data is filtered by all values of the Row's categorical variable
    column_spec = utils.get_attrs_specs(ldf.current_view[0].spec_lst)
    column_spec_attr = map(lambda x: x.attribute, column_spec)
    if len(filters) > 0:
        #get unique values for all categorical values specified and creates corresponding filters
        for row in filters:
            unique_values = ldf.unique_values[row.attribute]
            filter_values.append(row.value)
            #creates views with new filters
            for val in unique_values:
                if val not in filter_values:
                    new_spec = column_spec.copy()
                    new_filter = lux.Spec(attribute=row.attribute, value=val)
                    new_spec.append(new_filter)
                    temp_view = View(new_spec)
                    output.append(temp_view)
    else:  #if no existing filters, create filters using unique values from all categorical variables in the dataset
        categorical_vars = []
        for col in list(ldf.columns):
            # if cardinality is not too high, and attribute is not one of the X,Y (specified) column
            if ldf.cardinality[col] < 40 and col not in column_spec_attr:
                categorical_vars.append(col)
        for cat in categorical_vars:
            unique_values = ldf.unique_values[cat]
            for i in range(0, len(unique_values)):
                new_spec = column_spec.copy()
                new_filter = lux.Spec(attribute=cat,
                                      filter_op="=",
                                      value=unique_values[i])
                new_spec.append(new_filter)
                temp_view = View(new_spec)
                output.append(temp_view)
    vc = lux.view.ViewCollection.ViewCollection(output)
    vc = vc.load(ldf)
    for view in vc:
        view.score = interestingness(view, ldf)
    vc = vc.topK(15)
    recommendation["collection"] = vc

    #for benchmarking
    if ldf.toggle_benchmarking == True:
        toc = time.perf_counter()
        print(f"Performed filter action in {toc - tic:0.4f} seconds")
    return recommendation
Пример #7
0
def generalize(ldf):
	#for benchmarking
	if ldf.toggleBenchmarking == True:
		tic = time.perf_counter()
	'''
	Generates all possible visualizations when one attribute or filter from the current view is removed.

	Parameters
	----------
	ldf : lux.luxDataFrame.LuxDataFrame
		LuxDataFrame with underspecified context.

	Returns
	-------
	recommendations : Dict[str,obj]
		object with a collection of visualizations that result from the Generalize action.
	'''
	# takes in a dataObject and generates a list of new dataObjects, each with a single measure from the original object removed
	# -->  return list of dataObjects with corresponding interestingness scores

	recommendation = {"action":"Generalize",
						   "description":"Remove one attribute or filter to observe a more general trend."}
	output = []
	excludedColumns = []
	columnSpec = list(filter(lambda x: x.value=="" and x.attribute!="Record", ldf.context))
	rowSpecs = utils.getFilterSpecs(ldf.context)
	# if we do no have enough column attributes or too many, return no views.
	if(len(columnSpec)<2 or len(columnSpec)>4):
		recommendation["collection"] = []
		return recommendation
	for spec in columnSpec:
		columns = spec.attribute
		if type(columns) == list:
			for column in columns:
				if column not in excludedColumns:
					tempView = View(ldf.context)
					tempView.removeColumnFromSpecNew(column)
					excludedColumns.append(column)
					output.append(tempView)
		elif type(columns) == str:
			if columns not in excludedColumns:
				tempView = View(ldf.context)
				tempView.removeColumnFromSpecNew(columns)
				excludedColumns.append(columns)
		output.append(tempView)
	for i, spec in enumerate(rowSpecs):
		newSpec = ldf.context.copy()
		newSpec.pop(i)
		tempView = View(newSpec)
		output.append(tempView)
		
	vc = lux.view.ViewCollection.ViewCollection(output)
	vc = vc.load(ldf)
	recommendation["collection"] = vc
	for view in vc:
		view.score = interestingness(view,ldf)
	vc.sort(removeInvalid=True)
	#for benchmarking
	if ldf.toggleBenchmarking == True:
		toc = time.perf_counter()
		print(f"Performed generalize action in {toc - tic:0.4f} seconds")
	return recommendation