Exemplo n.º 1
0
def test_interestingness_deviation_nan():
    import numpy as np

    dataset = [
        {"date": "2017-08-25 09:06:11+00:00", "category": "A", "value": 25.0},
        {"date": "2017-08-25 09:06:11+00:00", "category": "B", "value": 1.2},
        {"date": "2017-08-25 09:06:11+00:00", "category": "C", "value": 1.3},
        {"date": "2017-08-25 09:06:11+00:00", "category": "D", "value": 1.4},
        {"date": "2017-08-25 09:06:11+00:00", "category": "E", "value": 1.5},
        {"date": "2017-08-25 09:06:11+00:00", "category": "F", "value": 0.1},
        {"date": np.nan, "category": "C", "value": 0.2},
        {"date": np.nan, "category": "B", "value": 0.2},
        {"date": np.nan, "category": "F", "value": 0.3},
        {"date": np.nan, "category": "E", "value": 0.3},
        {"date": np.nan, "category": "D", "value": 0.4},
        {"date": np.nan, "category": "A", "value": 10.4},
        {"date": "2017-07-25 15:06:11+00:00", "category": "A", "value": 15.5},
        {"date": "2017-07-25 15:06:11+00:00", "category": "F", "value": 1.0},
        {"date": "2017-07-25 15:06:11+00:00", "category": "B", "value": 0.1},
    ]
    test = pd.DataFrame(dataset)
    from lux.vis.Vis import Vis

    vis = Vis(["date", "value", "category=A"], test)
    vis2 = Vis(["date", "value", "category=B"], test)
    from lux.interestingness.interestingness import interestingness

    smaller_diff_score = interestingness(vis, test)
    bigger_diff_score = interestingness(vis2, test)
    assert np.isclose(smaller_diff_score, 0.29, rtol=0.1)
    assert np.isclose(bigger_diff_score, 0.94, rtol=0.1)
    assert smaller_diff_score < bigger_diff_score
Exemplo n.º 2
0
def test_interestingness_1_1_1(global_var):
    df = pytest.car_df
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")

    df.set_intent([
        lux.Clause(attribute="Horsepower"),
        lux.Clause(attribute="Origin", filter_op="=", value="USA",
                   bin_size=20),
    ])
    df._repr_html_()
    # check that top recommended Enhance graph score is not none and that ordering makes intuitive sense
    assert interestingness(df.recommendation["Enhance"][0], df) != None
    rank1 = -1
    rank2 = -1
    rank3 = -1
    for f in range(0, len(df.recommendation["Enhance"])):
        if (str(df.recommendation["Enhance"][f]._inferred_intent[2].value)
                == "USA"
                and str(df.recommendation["Enhance"]
                        [f]._inferred_intent[1].attribute) == "Cylinders"):
            rank1 = f
        if (str(df.recommendation["Enhance"][f]._inferred_intent[2].value)
                == "USA"
                and str(df.recommendation["Enhance"]
                        [f]._inferred_intent[1].attribute) == "Weight"):
            rank2 = f
        if (str(df.recommendation["Enhance"][f]._inferred_intent[2].value)
                == "USA"
                and str(df.recommendation["Enhance"]
                        [f]._inferred_intent[1].attribute) == "Horsepower"):
            rank3 = f
    assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3

    # check for top recommended Filter graph score is not none
    assert interestingness(df.recommendation["Filter"][0], df) != None
Exemplo n.º 3
0
def test_interestingness_0_2_0(global_var):
    df = pytest.car_df
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")

    df.set_intent([
        lux.Clause(attribute="Horsepower"),
        lux.Clause(attribute="Acceleration")
    ])
    df._repr_html_()
    # check that top recommended enhance graph score is not none and that ordering makes intuitive sense
    assert interestingness(df.recommendation["Enhance"][0], df) != None
    rank1 = -1
    rank2 = -1
    for f in range(0, len(df.recommendation["Enhance"])):
        if (str(df.recommendation["Enhance"][f]._inferred_intent[2].attribute)
                == "Origin"
                and str(df.recommendation["Enhance"][f].mark) == "scatter"):
            rank1 = f
        if (str(df.recommendation["Enhance"][f]._inferred_intent[2].attribute)
                == "Displacement"
                and str(df.recommendation["Enhance"][f].mark) == "scatter"):
            rank2 = f
    assert rank1 < rank2

    # check that top recommended filter graph score is not none and that ordering makes intuitive sense
    assert interestingness(df.recommendation["Filter"][0], df) != None
    # check that top recommended Generalize graph score is not none
    assert interestingness(df.recommendation["Generalize"][0], df) != None
    df.clear_intent()
Exemplo n.º 4
0
def test_interestingness_1_1_0():
    df = pd.read_csv("lux/data/car.csv")
    df["Year"] = pd.to_datetime(df["Year"], format='%Y')

    df.set_intent([lux.Clause(attribute = "Horsepower"),lux.Clause(attribute = "Year")])
    df._repr_html_()
    #check that top recommended Enhance graph score is not none (all graphs here have same score)
    assert interestingness(df.recommendation['Enhance'][0],df) != None

    #check that top recommended filter graph score is not none and that ordering makes intuitive sense
    assert interestingness(df.recommendation['Filter'][0],df) != None
    rank1 = -1
    rank2 = -1
    rank3 = -1
    for f in range(0, len(df.recommendation['Filter'])):
        vis = df.recommendation['Filter'][f]
        if len(vis.get_attr_by_attr_name("Cylinders"))>0:
            if int(vis._inferred_intent[2].value) == 6:
                rank1 = f
            if int(vis._inferred_intent[2].value) == 5:
                rank3 = f
        if len(vis.get_attr_by_attr_name("Origin"))>0:
            if str(vis._inferred_intent[2].value) == "Europe":
                rank2 = f
    assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3

    #check that top recommended generalize graph score is not none
    assert interestingness(df.recommendation['Filter'][0],df) != None
Exemplo n.º 5
0
def test_interestingness_1_1_1():
    df = pd.read_csv("lux/data/car.csv")
    df["Year"] = pd.to_datetime(df["Year"], format='%Y')

    df.set_intent([
        lux.Clause(attribute="Horsepower"),
        lux.Clause(attribute="Origin", filter_op="=", value="USA", bin_size=20)
    ])
    df.show_more()
    #check that top recommended Enhance graph score is not none and that ordering makes intuitive sense
    assert interestingness(df.recommendation['Enhance'][0], df) != None
    rank1 = -1
    rank2 = -1
    rank3 = -1
    for f in range(0, len(df.recommendation['Enhance'])):
        if str(df.recommendation['Enhance']
               [f]._inferred_intent[2].value) == "USA" and str(
                   df.recommendation['Enhance']
                   [f]._inferred_intent[1].attribute) == 'Cylinders':
            rank1 = f
        if str(df.recommendation['Enhance'][f]._inferred_intent[2].value
               ) == "USA" and str(df.recommendation['Enhance'][f].
                                  _inferred_intent[1].attribute) == 'Weight':
            rank2 = f
        if str(df.recommendation['Enhance']
               [f]._inferred_intent[2].value) == "USA" and str(
                   df.recommendation['Enhance']
                   [f]._inferred_intent[1].attribute) == 'Horsepower':
            rank3 = f
    assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3

    #check for top recommended Filter graph score is not none
    assert interestingness(df.recommendation['Filter'][0], df) != None
Exemplo n.º 6
0
def test_interestingness_0_1_0():
    df = pd.read_csv("lux/data/car.csv")
    df["Year"] = pd.to_datetime(df["Year"], format='%Y')

    df.set_intent([lux.Clause(attribute = "Horsepower")])
    df._repr_html_()
    #check that top recommended enhance graph score is not none and that ordering makes intuitive sense
    assert interestingness(df.recommendation['Enhance'][0],df) != None
    rank1 = -1
    rank2 = -1
    rank3 = -1
    for f in range(0, len(df.recommendation['Enhance'])):
        if df.recommendation['Enhance'][f].mark == 'scatter' and df.recommendation['Enhance'][f]._inferred_intent[1].attribute == 'Weight':
            rank1 = f
        if df.recommendation['Enhance'][f].mark == 'scatter' and df.recommendation['Enhance'][f]._inferred_intent[1].attribute == 'Acceleration':
            rank2 = f
        if df.recommendation['Enhance'][f].mark == 'line' and df.recommendation['Enhance'][f]._inferred_intent[0].attribute == 'Year':
            rank3 = f
    assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3

    #check that top recommended filter graph score is not none and that ordering makes intuitive sense
    assert interestingness(df.recommendation['Filter'][0],df) != None
    rank1 = -1
    rank2 = -1
    rank3 = -1
    for f in range(0, len(df.recommendation['Filter'])):
        if df.recommendation['Filter'][f]._inferred_intent[2].value == 4:
            rank1 = f
        if str(df.recommendation['Filter'][f]._inferred_intent[2].value) == "Europe":
            rank2 = f
        if '1971' in str(df.recommendation['Filter'][f]._inferred_intent[2].value):
            rank3 = f
    assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3
Exemplo n.º 7
0
def test_interestingness_1_1_0(global_var):
    lux.config.set_executor_type("Pandas")
    df = pytest.car_df
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")

    df.set_intent(
        [lux.Clause(attribute="Horsepower"),
         lux.Clause(attribute="Year")])
    df._ipython_display_()
    # check that top recommended Enhance graph score is not none (all graphs here have same score)
    assert interestingness(df.recommendation["Enhance"][0], df) != None

    # check that top recommended filter graph score is not none and that ordering makes intuitive sense
    assert interestingness(df.recommendation["Filter"][0], df) != None
    rank1 = -1
    rank2 = -1
    rank3 = -1
    for f in range(0, len(df.recommendation["Filter"])):
        vis = df.recommendation["Filter"][f]
        if len(vis.get_attr_by_attr_name("Cylinders")) > 0:
            if int(vis._inferred_intent[2].value) == 6:
                rank1 = f
            if int(vis._inferred_intent[2].value) == 8:
                rank2 = f
        if len(vis.get_attr_by_attr_name("Origin")) > 0:
            if str(vis._inferred_intent[2].value) == "Europe":
                rank3 = f
    assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3

    # check that top recommended generalize graph score is not none
    assert interestingness(df.recommendation["Filter"][0], df) != None
    df.clear_intent()
Exemplo n.º 8
0
def filter(dobj):
	result = lux.Result()
	recommendation = {"action":"Filter",
						   "description":"Shows possible visualizations when filtered by categorical variables in the data object's dataset."}
	filters = dobj.getObjByRowColType("Row")
	filterValues = []
	output = []
	#if Row is specified, create visualizations where data is filtered by all values of the Row's categorical variable 
	if len(filters) > 0:
		completedFilters = []
		columnSpec = dobj.getObjByRowColType("Column")
		#get unique values for all categorical values specified and creates corresponding filters
		for row in filters:
			if row.fAttribute not in completedFilters:
				uniqueValues = dobj.dataset.df[row.fAttribute].unique()
				filterValues.append(row.fVal)
				#creates new data objects with new filters
				for i in range(0, len(uniqueValues)):
					if uniqueValues[i] not in filterValues:
						#create new Data Object
						newSpec = columnSpec.copy()
						newFilter = lux.Row(fAttribute = row.fAttribute, fVal = uniqueValues[i])
						newSpec.append(newFilter)
						tempDataObj = lux.DataObj(dobj.dataset, newSpec)
						tempDataObj.score = interestingness(tempDataObj)

						#recompile the new Data Object before appending to output
						tempDataObj.compile()
						output.append(tempDataObj.compiled)
				completedFilters.append(row.fAttribute)
	#if Row is not specified, create filters using unique values from all categorical variables in the dataset
	else:
		categoricalVars = dobj.dataset.dataType['categorical']
		columnSpec = dobj.getObjByRowColType("Column")
		for cat in categoricalVars:
			uniqueValues = dobj.dataset.df[cat].unique()
			for i in range(0, len(uniqueValues)):
				newSpec = columnSpec.copy()
				newFilter = lux.Row(fAttribute = cat, fVal = uniqueValues[i])
				newSpec.append(newFilter)
				tempDataObj = lux.DataObj(dobj.dataset, newSpec)
				tempDataObj.score = interestingness(tempDataObj)

				tempDataObj.compile()
				output.append(tempDataObj.compiled)
	outputDataObjCol = lux.DataObjCollection(output)
	outputDataObjCol = outputDataObjCol.topK(5)
	recommendation["collection"] = outputDataObjCol
	result.addResult(recommendation,dobj)
	return result
Exemplo n.º 9
0
def test_interestingness_0_2_0():
    df = pd.read_csv("lux/data/car.csv")
    df["Year"] = pd.to_datetime(df["Year"], format='%Y')

    df.set_intent([
        lux.Clause(attribute="Horsepower"),
        lux.Clause(attribute="Acceleration")
    ])
    df.show_more()
    #check that top recommended enhance graph score is not none and that ordering makes intuitive sense
    assert interestingness(df.recommendation['Enhance'][0], df) != None
    rank1 = -1
    rank2 = -1
    rank3 = -1
    for f in range(0, len(df.recommendation['Enhance'])):
        if str(df.recommendation['Enhance']
               [f]._inferred_intent[2].attribute) == "Origin" and str(
                   df.recommendation['Enhance'][f].mark) == 'scatter':
            rank1 = f
        if str(df.recommendation['Enhance']
               [f]._inferred_intent[2].attribute) == "Displacement" and str(
                   df.recommendation['Enhance'][f].mark) == 'scatter':
            rank2 = f
        if str(df.recommendation['Enhance']
               [f]._inferred_intent[2].attribute) == "Year" and str(
                   df.recommendation['Enhance'][f].mark) == 'scatter':
            rank3 = f
    assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3

    #check that top recommended filter graph score is not none and that ordering makes intuitive sense
    assert interestingness(df.recommendation['Filter'][0], df) != None
    rank1 = -1
    rank2 = -1
    rank3 = -1
    for f in range(0, len(df.recommendation['Filter'])):
        if '1973' in str(
                df.recommendation['Filter'][f]._inferred_intent[2].value):
            rank1 = f
        if '1976' in str(
                df.recommendation['Filter'][f]._inferred_intent[2].value):
            rank2 = f
        if str(df.recommendation['Filter']
               [f]._inferred_intent[2].value) == "Europe":
            rank3 = f
    assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3

    #check that top recommended Generalize graph score is not none
    assert interestingness(df.recommendation['Generalize'][0], df) != None
Exemplo n.º 10
0
def user_defined(ldf):
    '''
    Generates user-defined views based on the context.

    Parameters
    ----------
    ldf : lux.luxDataFrame.LuxDataFrame
        LuxDataFrame with underspecified context.

    Returns
    -------
    recommendations : Dict[str,obj]
        object with a collection of visualizations that result from the Distribution action.
    '''
    recommendation = {
        "action": "Current Views",
        "description": "Shows a view collection defined by the context"
    }

    recommendation["collection"] = ldf.current_view

    vc = ldf.current_view
    PandasExecutor.execute(vc, ldf)
    for view in vc:
        view.score = interestingness(view, ldf)
    # ldf.clear_context()
    vc.sort(remove_invalid=True)
    return recommendation
Exemplo n.º 11
0
def test_interestingness_1_1_1(global_var):
    tbl = lux.LuxSQLTable()
    tbl.set_SQL_table("cars")

    tbl.set_intent(
        [
            lux.Clause(attribute="horsepower"),
            lux.Clause(attribute="origin", filter_op="=", value="USA", bin_size=20),
        ]
    )
    tbl._repr_html_()
    assert interestingness(tbl.recommendation["Enhance"][0], tbl) != None

    # check for top recommended Filter graph score is not none
    assert interestingness(tbl.recommendation["Filter"][0], tbl) != None
    tbl.clear_intent()
Exemplo n.º 12
0
Arquivo: custom.py Projeto: zie225/lux
def custom(ldf):
    """
    Generates user-defined vis based on the intent.

    Parameters
    ----------
    ldf : lux.core.frame
        LuxDataFrame with underspecified intent.

    Returns
    -------
    recommendations : Dict[str,obj]
        object with a collection of visualizations that result from the Distribution action.
    """
    recommendation = {
        "action": "Current Vis",
        "description": "Shows the list of visualizations generated based on user specified intent",
    }

    recommendation["collection"] = ldf.current_vis

    vlist = ldf.current_vis
    PandasExecutor.execute(vlist, ldf)
    for vis in vlist:
        vis.score = interestingness(vis, ldf)
    # ldf.clear_intent()
    vlist.sort(remove_invalid=True)
    return recommendation
Exemplo n.º 13
0
def enhance(ldf):
    """
    Given a set of vis, generates possible visualizations when an additional attribute is added to the current vis.

    Parameters
    ----------
    ldf : lux.core.frame
            LuxDataFrame with underspecified intent.

    Returns
    -------
    recommendations : Dict[str,obj]
            object with a collection of visualizations that result from the Enhance action.
    """

    filters = utils.get_filter_specs(ldf._intent)
    # Collect variables that already exist in the intent
    attr_specs = list(
        filter(lambda x: x.value == "" and x.attribute != "Record",
               ldf._intent))
    fltr_str = [
        fltr.attribute + fltr.filter_op + str(fltr.value) for fltr in filters
    ]
    attr_str = [str(clause.attribute) for clause in attr_specs]
    intended_attrs = f'<p class="highlight-intent">{", ".join(attr_str + fltr_str)}</p>'
    if len(attr_specs) == 1:
        recommendation = {
            "action":
            "Enhance",
            "description":
            f"Augmenting current {intended_attrs} intent with additional attribute.",
        }
    elif len(attr_specs) == 2:
        recommendation = {
            "action":
            "Enhance",
            "description":
            f"Further breaking down current {intended_attrs} intent by additional attribute.",
        }
    # if there are too many column attributes, return don't generate Enhance recommendations
    elif len(attr_specs) > 2:
        recommendation = {"action": "Enhance"}
        recommendation["collection"] = []
        return recommendation
    intent = ldf._intent.copy()
    # Clear channel so that channel not enforced based on input vis intent
    for clause in intent:
        clause.channel = ""
    intent = filters + attr_specs
    intent.append("?")
    vlist = lux.vis.VisList.VisList(intent, ldf)

    # Then use the data populated in the vis list to compute score
    for vis in vlist:
        vis.score = interestingness(vis, ldf)

    vlist.sort()
    vlist = vlist.showK()
    recommendation["collection"] = vlist
    return recommendation
Exemplo n.º 14
0
def test_interestingness_0_1_1():
    df = pd.read_csv("lux/data/car.csv")
    df["Year"] = pd.to_datetime(df["Year"], format='%Y')
    
    df.set_intent([lux.Clause(attribute = "Origin", filter_op="=",value="?"),lux.Clause(attribute = "MilesPerGal")])
    df._repr_html_()
    assert interestingness(df.recommendation['Current Vis'][0],df) != None
    assert str(df.recommendation['Current Vis'][0]._inferred_intent[2].value) == 'USA'
Exemplo n.º 15
0
def test_interestingness_0_2_1():
    df = pd.read_csv("lux/data/car.csv")
    df["Year"] = pd.to_datetime(df["Year"], format='%Y')

    df.set_intent([lux.Clause(attribute = "Horsepower"),lux.Clause(attribute = "MilesPerGal"),lux.Clause(attribute = "Acceleration", filter_op=">",value = 10)])
    df._repr_html_()
    #check that top recommended Generalize graph score is not none
    assert interestingness(df.recommendation['Generalize'][0],df) != None
Exemplo n.º 16
0
def enhance(dobj):
    result = lux.Result()
    recommendation = {
        "action":
        "Enhance",
        "description":
        "Shows possible visualizations when an additional attribute is added to the current view."
    }
    quantitativeVars = dobj.dataset.dataType['quantitative']
    categoricalVars = dobj.dataset.dataType['categorical']
    output = []

    dobjVars = []
    for i in range(0, len(dobj.spec)):
        if dobj.spec[i].className == "Column":
            dobjVars.append(dobj.spec[i].columnName)
        elif dobj.spec[i].className == "Row":
            dobjVars.append(dobj.spec[i].fAttribute)

    #go through and add additional quantitative variable
    for qVar in quantitativeVars:
        if qVar not in dobjVars:
            newSpec = dobj.spec.copy()
            newSpec.append(Column(qVar))
            tempDataObj = DataObj(dobj.dataset, newSpec)
            tempDataObj.score = interestingness(tempDataObj)

            tempDataObj.compile()
            output.append(tempDataObj.compiled)

    #go through and add additional categorical variable
    for cVar in categoricalVars:
        if cVar not in dobjVars:
            newSpec = dobj.spec.copy()
            newSpec.append(Column(cVar))
            tempDataObj = DataObj(dobj.dataset, newSpec)
            tempDataObj.score = interestingness(tempDataObj)

            tempDataObj.compile()
            output.append(tempDataObj.compiled)
    recommendation["collection"] = DataObjCollection(output)
    result.addResult(recommendation, dobj)
    return result
Exemplo n.º 17
0
def test_interestingness_0_1_0(global_var):
    lux.config.set_executor_type("Pandas")
    df = pytest.car_df
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")

    df.set_intent([lux.Clause(attribute="Horsepower")])
    df._ipython_display_()
    # check that top recommended enhance graph score is not none and that ordering makes intuitive sense
    assert interestingness(df.recommendation["Enhance"][0], df) != None
    rank1 = -1
    rank2 = -1
    rank3 = -1
    for f in range(0, len(df.recommendation["Enhance"])):
        if (df.recommendation["Enhance"][f].mark == "scatter" and
                df.recommendation["Enhance"][f]._inferred_intent[1].attribute
                == "Weight"):
            rank1 = f
        if (df.recommendation["Enhance"][f].mark == "scatter" and
                df.recommendation["Enhance"][f]._inferred_intent[1].attribute
                == "Acceleration"):
            rank2 = f
        if (df.recommendation["Enhance"][f].mark == "line" and
                df.recommendation["Enhance"][f]._inferred_intent[0].attribute
                == "Year"):
            rank3 = f
    assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3

    # check that top recommended filter graph score is not none and that ordering makes intuitive sense
    assert interestingness(df.recommendation["Filter"][0], df) != None
    rank1 = -1
    rank2 = -1
    rank3 = -1
    for f in range(0, len(df.recommendation["Filter"])):
        if df.recommendation["Filter"][f]._inferred_intent[2].value == 4:
            rank1 = f
        if str(df.recommendation["Filter"]
               [f]._inferred_intent[2].value) == "Europe":
            rank2 = f
        if "1970" in str(
                df.recommendation["Filter"][f]._inferred_intent[2].value):
            rank3 = f
    assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3
    df.clear_intent()
Exemplo n.º 18
0
def test_interestingness_0_1_1(global_var):
    df = pytest.car_df
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")

    df.set_intent([
        lux.Clause(attribute="Origin", filter_op="=", value="?"),
        lux.Clause(attribute="MilesPerGal"),
    ])
    df._repr_html_()
    assert interestingness(df.recommendation["Current Vis"][0], df) != None
    assert str(
        df.recommendation["Current Vis"][0]._inferred_intent[2].value) == "USA"
Exemplo n.º 19
0
def test_interestingness_0_2_1(global_var):
    df = pytest.car_df
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")

    df.set_intent([
        lux.Clause(attribute="Horsepower"),
        lux.Clause(attribute="MilesPerGal"),
        lux.Clause(attribute="Acceleration", filter_op=">", value=10),
    ])
    df._repr_html_()
    # check that top recommended Generalize graph score is not none
    assert interestingness(df.recommendation["Generalize"][0], df) != None
Exemplo n.º 20
0
def test_interestingness_1_0_0(global_var):
    df = pytest.car_df
    df["Year"] = pd.to_datetime(df["Year"], format="%Y")

    df.set_intent([lux.Clause(attribute="Origin")])
    df._repr_html_()
    # check that top recommended enhance graph score is not none and that ordering makes intuitive sense
    assert interestingness(df.recommendation["Enhance"][0], df) != None
    rank1 = -1
    rank2 = -1
    rank3 = -1
    for f in range(0, len(df.recommendation["Enhance"])):
        vis = df.recommendation["Enhance"][f]
        if vis.get_attr_by_channel("x")[0].attribute == "Displacement":
            rank1 = f
        if vis.get_attr_by_channel("x")[0].attribute == "Weight":
            rank2 = f
        if vis.get_attr_by_channel("x")[0].attribute == "Acceleration":
            rank3 = f
    assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3

    # check that top recommended filter graph score is not none and that ordering makes intuitive sense
    assert interestingness(df.recommendation["Filter"][0], df) != None
    rank1 = -1
    rank2 = -1
    rank3 = -1
    for f in range(0, len(df.recommendation["Filter"])):
        vis = df.recommendation["Filter"][f]
        if len(vis.get_attr_by_attr_name("Cylinders")) > 0:
            if int(vis._inferred_intent[2].value) == 8:
                rank1 = f
            if int(vis._inferred_intent[2].value) == 6:
                rank3 = f
        if "ford" in str(
                df.recommendation["Filter"][f]._inferred_intent[2].value):
            rank2 = f
    assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3
    df.clear_intent()
Exemplo n.º 21
0
def test_interestingness_0_1_1(global_var):
    tbl = lux.LuxSQLTable()
    tbl.set_SQL_table("cars")

    tbl.set_intent(
        [
            lux.Clause(attribute="origin", filter_op="=", value="?"),
            lux.Clause(attribute="milespergal"),
        ]
    )
    tbl._repr_html_()
    assert interestingness(tbl.recommendation["Current Vis"][0], tbl) != None
    assert str(tbl.recommendation["Current Vis"][0]._inferred_intent[2].value) == "USA"
    tbl.clear_intent()
Exemplo n.º 22
0
def enhance(ldf):
    #for benchmarking
    if ldf.toggle_benchmarking == True:
        tic = time.perf_counter()
    '''
	Given a set of views, generates possible visualizations when an additional attribute is added to the current view.

	Parameters
	----------
	ldf : lux.luxDataFrame.LuxDataFrame
		LuxDataFrame with underspecified context.

	Returns
	-------
	recommendations : Dict[str,obj]
		object with a collection of visualizations that result from the Enhance action.
	'''
    recommendation = {
        "action":
        "Enhance",
        "description":
        "Shows possible visualizations when an additional attribute is added to the current view."
    }
    filters = utils.get_filter_specs(ldf.context)
    # Collect variables that already exist in the context
    attr_specs = list(
        filter(lambda x: x.value == "" and x.attribute != "Record",
               ldf.context))
    if (
            len(attr_specs) > 2
    ):  # if there are too many column attributes, return don't generate Enhance recommendations
        recommendation["collection"] = []
        return recommendation
    query = ldf.context.copy()
    query = filters + attr_specs
    query.append("?")
    vc = lux.view.ViewCollection.ViewCollection(query)
    vc = vc.load(ldf)

    # Then use the data populated in the view collection to compute score
    for view in vc:
        view.score = interestingness(view, ldf)

    vc = vc.topK(15)
    recommendation["collection"] = vc
    #for benchmarking
    if ldf.toggle_benchmarking == True:
        toc = time.perf_counter()
        print(f"Performed enhance action in {toc - tic:0.4f} seconds")
    return recommendation
Exemplo n.º 23
0
def test_interestingness_1_0_0():
    df = pd.read_csv("lux/data/car.csv")
    df["Year"] = pd.to_datetime(df["Year"], format='%Y')

    df.set_intent([lux.Clause(attribute="Origin")])
    df.show_more()
    #check that top recommended enhance graph score is not none and that ordering makes intuitive sense
    assert interestingness(df.recommendation['Enhance'][0], df) != None
    rank1 = -1
    rank2 = -1
    rank3 = -1
    for f in range(0, len(df.recommendation['Enhance'])):
        if df.recommendation['Enhance'][f]._inferred_intent[
                0].attribute == 'Displacement':
            rank1 = f
        if df.recommendation['Enhance'][f]._inferred_intent[
                0].attribute == 'Weight':
            rank2 = f
        if df.recommendation['Enhance'][f]._inferred_intent[
                0].attribute == 'Acceleration':
            rank3 = f
    assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3

    #check that top recommended filter graph score is not none and that ordering makes intuitive sense
    assert interestingness(df.recommendation['Filter'][0], df) != None
    rank1 = -1
    rank2 = -1
    rank3 = -1
    for f in range(0, len(df.recommendation['Filter'])):
        if int(df.recommendation['Filter'][f]._inferred_intent[2].value) == 8:
            rank1 = f
        if int(df.recommendation['Filter'][f]._inferred_intent[2].value) == 6:
            rank2 = f
        if '1972' in str(
                df.recommendation['Filter'][f]._inferred_intent[2].value):
            rank3 = f
    assert rank1 < rank2 and rank1 < rank3 and rank2 < rank3
Exemplo n.º 24
0
def test_interestingness_1_2_0():
    from lux.vis.Vis import Vis
    from lux.vis.Vis import Clause
    from lux.interestingness.interestingness import interestingness

    df = pd.read_csv("lux/data/car.csv")
    y_clause = Clause(attribute = "Name", channel = "y")
    color_clause = Clause(attribute = 'Cylinders', channel = "color")

    new_vis = Vis([y_clause, color_clause])
    new_vis.refresh_source(df)
    new_vis
    #assert(len(new_vis.data)==color_cardinality*group_by_cardinality)

    assert(interestingness(new_vis, df)<0.01)
Exemplo n.º 25
0
def test_interestingness_1_2_0(global_var):
    from lux.vis.Vis import Vis
    from lux.vis.Vis import Clause
    from lux.interestingness.interestingness import interestingness

    df = pytest.car_df
    y_clause = Clause(attribute="Name", channel="y")
    color_clause = Clause(attribute="Cylinders", channel="color")

    new_vis = Vis([y_clause, color_clause])
    new_vis.refresh_source(df)
    new_vis
    # assert(len(new_vis.data)==color_cardinality*group_by_cardinality)

    assert interestingness(new_vis, df) < 0.01
Exemplo n.º 26
0
def distribution(ldf,dataTypeConstraint="quantitative"):
	'''
	Generates bar chart distributions of different attributes in the dataset.

	Parameters
	----------
	ldf : lux.luxDataFrame.LuxDataFrame
		LuxDataFrame with underspecified context.

	dataTypeConstraint: str
		The variable that controls the type of distribution chart that will be rendered.

	Returns
	-------
	recommendations : Dict[str,obj]
		object with a collection of visualizations that result from the Distribution action.
	'''
	import scipy.stats
	import numpy as np

	#for benchmarking
	if ldf.toggleBenchmarking == True:
		tic = time.perf_counter()

	if (dataTypeConstraint=="quantitative"):
		query = [lux.Spec("?",dataType="quantitative")]
		query.extend(ldf.filterSpecs)
		recommendation = {"action":"Distribution",
							"description":"Show univariate count distributions of different attributes in the dataset."}
	elif (dataTypeConstraint=="nominal"):
		query = [lux.Spec("?",dataType="nominal")]
		query.extend(ldf.filterSpecs)
		recommendation = {"action":"Category",
						   "description":"Show bar chart distributions of different attributes in the dataset."}
	vc = ViewCollection(query)
	vc = vc.load(ldf)	
	for view in vc:
		view.score = interestingness(view,ldf)
	vc = vc.topK(15)
	recommendation["collection"] = vc
	#for benchmarking
	if ldf.toggleBenchmarking == True:
		toc = time.perf_counter()
		print(f"Performed distribution action in {toc - tic:0.4f} seconds")
	return recommendation
Exemplo n.º 27
0
def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True):
    """
    Generates bivariate visualizations that represent all pairwise relationships in the data.

    Parameters
    ----------
    ldf : LuxDataFrame
            LuxDataFrame with underspecified intent.

    ignore_transpose: bool
            Boolean flag to ignore pairs of attributes whose transpose are already computed (i.e., {X,Y} will be ignored if {Y,X} is already computed)

    Returns
    -------
    recommendations : Dict[str,obj]
            object with a collection of visualizations that result from the Correlation action.
    """

    import numpy as np

    filter_specs = utils.get_filter_specs(ldf._intent)
    intent = [
        lux.Clause("?", data_model="measure"),
        lux.Clause("?", data_model="measure"),
    ]
    intent.extend(filter_specs)
    vlist = VisList(intent, ldf)
    recommendation = {
        "action":
        "Correlation",
        "description":
        "Show relationships between two <p class='highlight-descriptor'>quantitative</p> attributes.",
    }
    ignore_rec_flag = False
    # Doesn't make sense to compute correlation if less than 4 data values
    if len(ldf) < 5:
        ignore_rec_flag = True
    # Then use the data populated in the vis list to compute score
    for vis in vlist:
        measures = vis.get_attr_by_data_model("measure")
        if len(measures) < 2:
            raise ValueError(
                f"Can not compute correlation between {[x.attribute for x in ldf.columns]} since less than 2 measure values present."
            )
        msr1 = measures[0].attribute
        msr2 = measures[1].attribute

        if ignore_transpose:
            check_transpose = check_transpose_not_computed(vlist, msr1, msr2)
        else:
            check_transpose = True
        if check_transpose:
            vis.score = interestingness(vis, ldf)
        else:
            vis.score = -1
    if ignore_rec_flag:
        recommendation["collection"] = []
        return recommendation
    vlist.sort()
    vlist = vlist.showK()
    recommendation["collection"] = vlist
    return recommendation
Exemplo n.º 28
0
def enhance(ldf):
    #for benchmarking
    if ldf.toggle_benchmarking == True:
        tic = time.perf_counter()
    '''
	Given a set of views, generates possible visualizations when an additional attribute is added to the current vis.

	Parameters
	----------
	ldf : lux.luxDataFrame.LuxDataFrame
		LuxDataFrame with underspecified intent.

	Returns
	-------
	recommendations : Dict[str,obj]
		object with a collection of visualizations that result from the Enhance action.
	'''

    filters = utils.get_filter_specs(ldf.intent)
    # Collect variables that already exist in the intent
    attr_specs = list(
        filter(lambda x: x.value == "" and x.attribute != "Record",
               ldf.intent))
    fltr_str = [
        fltr.attribute + fltr.filter_op + str(fltr.value) for fltr in filters
    ]
    attr_str = [clause.attribute for clause in attr_specs]
    intended_attrs = '<p class="highlight-intent">' + ', '.join(
        attr_str + fltr_str) + '</p>'
    if (len(attr_specs) == 1):
        recommendation = {
            "action":
            "Enhance",
            "description":
            f"Augmenting current {intended_attrs} intent with additional attribute."
        }
    elif (len(attr_specs) == 2):
        recommendation = {
            "action":
            "Enhance",
            "description":
            f"Further breaking down current {intended_attrs} intent by additional attribute."
        }
    elif (
            len(attr_specs) > 2
    ):  # if there are too many column attributes, return don't generate Enhance recommendations
        recommendation = {"action": "Enhance"}
        recommendation["collection"] = []
        return recommendation
    intent = ldf.intent.copy()
    intent = filters + attr_specs
    intent.append("?")
    vc = lux.vis.VisList.VisList(intent, ldf)

    # Then use the data populated in the vis list to compute score
    for view in vc:
        view.score = interestingness(view, ldf)

    vc = vc.topK(15)
    recommendation["collection"] = vc
    #for benchmarking
    if ldf.toggle_benchmarking == True:
        toc = time.perf_counter()
        print(f"Performed enhance action in {toc - tic:0.4f} seconds")
    return recommendation
Exemplo n.º 29
0
def filter(ldf):
    """
    Iterates over all possible values of a categorical variable and generates visualizations where each categorical value filters the data.

    Parameters
    ----------
    ldf : lux.core.frame
            LuxDataFrame with underspecified intent.

    Returns
    -------
    recommendations : Dict[str,obj]
            object with a collection of visualizations that result from the Filter action.
    """
    filters = utils.get_filter_specs(ldf._intent)
    filter_values = []
    output = []
    # if fltr is specified, create visualizations where data is filtered by all values of the fltr's categorical variable
    column_spec = utils.get_attrs_specs(ldf.current_vis[0]._inferred_intent)
    column_spec_attr = map(lambda x: x.attribute, column_spec)
    if len(filters) == 1:
        # get unique values for all categorical values specified and creates corresponding filters
        fltr = filters[0]

        if ldf.data_type_lookup[fltr.attribute] == "nominal":
            recommendation = {
                "action":
                "Filter",
                "description":
                f"Changing the <p class='highlight-intent'>{fltr.attribute}</p> filter to an alternative value.",
            }
            unique_values = ldf.unique_values[fltr.attribute]
            filter_values.append(fltr.value)
            # creates vis with new filters
            for val in unique_values:
                if val not in filter_values:
                    new_spec = column_spec.copy()
                    new_filter = lux.Clause(attribute=fltr.attribute,
                                            value=val)
                    new_spec.append(new_filter)
                    temp_vis = Vis(new_spec)
                    output.append(temp_vis)
        elif ldf.data_type_lookup[fltr.attribute] == "quantitative":
            recommendation = {
                "action":
                "Filter",
                "description":
                f"Changing the <p class='highlight-intent'>{fltr.attribute}</p> filter to an alternative inequality operation.",
            }

            def get_complementary_ops(fltr_op):
                if fltr_op == ">":
                    return "<="
                elif fltr_op == "<":
                    return ">="
                elif fltr_op == ">=":
                    return "<"
                elif fltr_op == "<=":
                    return ">"
                # TODO: need to support case where fltr_op is "=" --> auto-binned ranges

            # Create vis with complementary filter operations
            new_spec = column_spec.copy()
            new_filter = lux.Clause(
                attribute=fltr.attribute,
                filter_op=get_complementary_ops(fltr.filter_op),
                value=fltr.value,
            )
            new_spec.append(new_filter)
            temp_vis = Vis(new_spec, score=1)
            output.append(temp_vis)
    # if no existing filters, create filters using unique values from all categorical variables in the dataset
    else:
        intended_attrs = ", ".join([
            clause.attribute for clause in ldf._intent
            if clause.value == "" and clause.attribute != "Record"
        ])
        recommendation = {
            "action":
            "Filter",
            "description":
            f"Applying filters to the <p class='highlight-intent'>{intended_attrs}</p> intent.",
        }
        categorical_vars = []
        for col in list(ldf.columns):
            # if cardinality is not too high, and attribute is not one of the X,Y (specified) column
            if ldf.cardinality[col] < 30 and col not in column_spec_attr:
                categorical_vars.append(col)
        for cat in categorical_vars:
            unique_values = ldf.unique_values[cat]
            for i in range(0, len(unique_values)):
                new_spec = column_spec.copy()
                new_filter = lux.Clause(attribute=cat,
                                        filter_op="=",
                                        value=unique_values[i])
                new_spec.append(new_filter)
                temp_vis = Vis(new_spec)
                output.append(temp_vis)
    vlist = lux.vis.VisList.VisList(output, ldf)
    for vis in vlist:
        vis.score = interestingness(vis, ldf)
    vlist = vlist.topK(15)
    recommendation["collection"] = vlist
    return recommendation
Exemplo n.º 30
0
def univariate(ldf, data_type_constraint="quantitative"):
    '''
	Generates bar chart distributions of different attributes in the dataframe.

	Parameters
	----------
	ldf : lux.core.frame
		LuxDataFrame with underspecified intent.

	data_type_constraint: str
		Controls the type of distribution chart that will be rendered.

	Returns
	-------
	recommendations : Dict[str,obj]
		object with a collection of visualizations that result from the Distribution action.
	'''
    import numpy as np

    filter_specs = utils.get_filter_specs(ldf._intent)
    ignore_rec_flag = False
    if (data_type_constraint == "quantitative"):
        intent = [
            lux.Clause("?",
                       data_type="quantitative",
                       exclude="Number of Records")
        ]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Distribution",
            "description":
            "Show univariate histograms of <p class='highlight-descriptor'>quantitative</p>  attributes."
        }
        if (
                len(ldf) < 5
        ):  # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated)
            ignore_rec_flag = True
    elif (data_type_constraint == "nominal"):
        intent = [lux.Clause("?", data_type="nominal")]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Occurrence",
            "description":
            "Show frequency of occurrence for <p class='highlight-descriptor'>categorical</p> attributes."
        }
    elif (data_type_constraint == "temporal"):
        intent = [lux.Clause("?", data_type="temporal")]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Temporal",
            "description":
            "Show trends over <p class='highlight-descriptor'>time-related</p> attributes."
        }
        if (
                len(ldf) < 3
        ):  # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated)
            ignore_rec_flag = True
    if (ignore_rec_flag):
        recommendation["collection"] = []
        return recommendation
    vlist = VisList(intent, ldf)
    for vis in vlist:
        vis.score = interestingness(vis, ldf)
    # vlist = vlist.topK(15) # Basic visualizations should not be capped
    vlist.sort()
    recommendation["collection"] = vlist
    return recommendation