Пример #1
0
 def intent(self, inten):
     for intent_input in inten:
         if isinstance(intent_input, lux.Clause):
             is_list_input = isinstance(inten, list)
             is_vis_input = isinstance(inten, Vis)
             if not (is_list_input or is_vis_input):
                 raise TypeError(
                     "Input intent must be either a list (of strings or lux.Clause) or a lux.Vis object."
                     "\nSee more at: https://lux-api.readthedocs.io/en/latest/source/guide/intent.html"
                 )
             if is_list_input:
                 self.set_intent(inten)
             elif is_vis_input:
                 self.set_intent_as_vis(inten)
         elif isinstance(intent_input, str):
             if len(intent_input) <= 1:
                 inten = [lux.Clause(f"{inten}")]
                 is_list_input = isinstance(inten, list)
                 is_vis_input = isinstance(inten, Vis)
                 if is_list_input:
                     self.set_intent(inten)
                 elif is_vis_input:
                     self.set_intent_as_vis(inten)
                 break
             else:
                 intent_input = [lux.Clause(f"{intent_input}")]
                 is_list_input = isinstance(intent_input, list)
                 is_vis_input = isinstance(intent_input, Vis)
                 if is_list_input:
                     self.set_intent(intent_input)
                 elif is_vis_input:
                     self.set_intent_as_vis(intent_input)
                 break
         else:
             print("other")
Пример #2
0
def test_case2():
    ldf = pd.read_csv("lux/data/car.csv")
    ldf.set_intent(["Horsepower", lux.Clause("MilesPerGal", channel="x")])
    assert type(ldf._intent[0]) is lux.Clause
    assert ldf._intent[0].attribute == "Horsepower"
    assert type(ldf._intent[1]) is lux.Clause
    assert ldf._intent[1].attribute == "MilesPerGal"
Пример #3
0
def row_group(ldf):
    recommendation = {
        "action":
        "Row Groups",
        "description":
        "Shows charts of possible visualizations with respect to the row-wise index.",
    }
    collection = []

    if ldf.index.nlevels == 1:
        if ldf.columns.name is not None:
            dim_name = ldf.columns.name
        else:
            dim_name = "index"
        for row_id in range(len(ldf)):
            row = ldf.iloc[row_id, ]
            rowdf = row.reset_index()
            # if (dim_name =="index"): #TODO: need to change this to auto-detect
            # 	rowdf.data_type_lookup["index"]="nominal"
            # 	rowdf.data_model_lookup["index"]="dimension"
            # 	rowdf.cardinality["index"]=len(rowdf)
            # if isinstance(ldf.columns,pd.DatetimeIndex):
            # 	rowdf.data_type_lookup[dim_name]="temporal"
            vis = Vis([
                dim_name,
                lux.Clause(row.name, data_model="measure", aggregation=None)
            ], rowdf)
            collection.append(vis)
    vlst = VisList(collection)
    # Note that we are not computing interestingness score here because we want to preserve the arrangement of the aggregated data

    recommendation["collection"] = vlst
    return recommendation
Пример #4
0
def column_group(ldf):
    recommendation = {
        "action":
        "Column Groups",
        "description":
        "Shows charts of possible visualizations with respect to the column-wise index."
    }
    collection = []
    ldf_flat = ldf
    if isinstance(ldf.columns, pd.DatetimeIndex):
        ldf_flat.columns = ldf_flat.columns.format()
    ldf_flat = ldf_flat.reset_index(
    )  #use a single shared ldf_flat so that metadata doesn't need to be computed for every vis
    if (ldf.index.nlevels == 1):
        index_column_name = ldf.index.name
        if isinstance(ldf.columns, pd.DatetimeIndex):
            ldf.columns = ldf.columns.to_native_types()
        for attribute in ldf.columns:
            vis = Vis([
                index_column_name,
                lux.Clause(str(attribute), aggregation=None)
            ], ldf_flat)
            collection.append(vis)
    vlst = VisList(collection)
    # Note that we are not computing interestingness score here because we want to preserve the arrangement of the aggregated ldf

    recommendation["collection"] = vlst
    return recommendation
Пример #5
0
def test_case2(global_var):
    df = pytest.car_df
    df.set_intent(["Horsepower", lux.Clause("MilesPerGal", channel="x")])
    assert type(df._intent[0]) is lux.Clause
    assert df._intent[0].attribute == "Horsepower"
    assert type(df._intent[1]) is lux.Clause
    assert df._intent[1].attribute == "MilesPerGal"
    df.clear_intent()
Пример #6
0
def column_group(ldf):
    recommendation = {
        "action":
        "Column Groups",
        "description":
        "Shows charts of possible visualizations with respect to the column-wise index.",
    }
    collection = []
    ldf_flat = ldf
    if isinstance(ldf.columns, pd.DatetimeIndex):
        ldf_flat.columns = ldf_flat.columns.format()

    # use a single shared ldf_flat so that metadata doesn't need to be computed for every vis
    ldf_flat = ldf_flat.reset_index()
    if ldf.index.nlevels == 1:
        if ldf.index.name:
            index_column_name = ldf.index.name
        else:
            index_column_name = "index"
        if isinstance(ldf.columns, pd.DatetimeIndex):
            ldf.columns = ldf.columns.to_native_types()
        for attribute in ldf.columns:
            if ldf[attribute].dtype != "object" and (attribute != "index"):
                vis = Vis([
                    lux.Clause(
                        attribute=index_column_name,
                        data_type="nominal",
                        data_model="dimension",
                        aggregation=None,
                    ),
                    lux.Clause(
                        attribute=str(attribute),
                        data_type="quantitative",
                        data_model="measure",
                        aggregation=None,
                    ),
                ])
                collection.append(vis)
    vlst = VisList(collection, ldf_flat)
    # Note that we are not computing interestingness score here because we want to preserve the arrangement of the aggregated ldf

    recommendation["collection"] = vlst
    return recommendation
Пример #7
0
 def random_categorical(ldf):
     intent = [lux.Clause("?", data_type="nominal")]
     vlist = VisList(intent, ldf)
     for vis in vlist:
         vis.score = 10
     vlist = vlist.topK(15)
     return {
         "action": "bars",
         "description": "Random list of Bar charts",
         "collection": vlist
     }
Пример #8
0
def row_group(ldf):
    recommendation = {
        "action":
        "Row Groups",
        "description":
        "Shows charts of possible visualizations with respect to the row-wise index.",
        "long_description":
        'A row index can be thought of as an extra row that indicates the values that the user is interested in. \
            Lux focuses on visualizing named dataframe indices, i.e., indices with a non-null name property, as a proxy of the attribute \
                that the user is interested in or have operated on (e.g., group-by attribute). In particular, dataframes with named indices \
                    are often pre-aggregated, so Lux visualizes exactly the values that the dataframe portrays. \
                        <a href="https://lux-api.readthedocs.io/en/latest/source/advanced/indexgroup.html" target="_blank">More details</a>',
    }
    collection = []

    if ldf.index.nlevels == 1:
        if ldf.columns.name is not None:
            dim_name = ldf.columns.name
        else:
            dim_name = "index"
        for row_id in range(len(ldf)):
            row = ldf.iloc[row_id, ]
            rowdf = row.reset_index()
            # if (dim_name =="index"): #TODO: need to change this to auto-detect
            # 	rowdf.data_type_lookup["index"]="nominal"
            # 	rowdf.data_model_lookup["index"]="dimension"
            # 	rowdf.cardinality["index"]=len(rowdf)
            # if isinstance(ldf.columns,pd.DatetimeIndex):
            # 	rowdf.data_type_lookup[dim_name]="temporal"
            vis = Vis(
                [
                    dim_name,
                    lux.Clause(
                        row.name, data_model="measure", aggregation=None),
                ],
                rowdf,
            )
            collection.append(vis)
    vlst = VisList(collection)
    # Note that we are not computing interestingness score here because we want to preserve the arrangement of the aggregated data

    recommendation["collection"] = vlst
    return recommendation
Пример #9
0
def row_group(ldf):
    #for benchmarking
    if ldf.toggle_benchmarking == True:
        tic = time.perf_counter()
    recommendation = {
        "action":
        "Row Groups",
        "description":
        "Shows charts of possible visualizations with respect to the row-wise index."
    }
    collection = []

    if (ldf.index.nlevels == 1):
        if (ldf.columns.name is not None):
            dim_name = ldf.columns.name
        else:
            dim_name = "index"
        for row_id in range(len(ldf)):
            row = ldf.iloc[row_id, ]
            rowdf = row.reset_index()
            # if (dim_name =="index"): #TODO: need to change this to auto-detect
            # 	rowdf.data_type_lookup["index"]="nominal"
            # 	rowdf.data_model_lookup["index"]="dimension"
            # 	rowdf.cardinality["index"]=len(rowdf)
            if isinstance(ldf.columns, pd.DatetimeIndex):
                rowdf.data_type_lookup[dim_name] = "temporal"
            vis = Vis(
                [dim_name, lux.Clause(row.name, aggregation=None)], rowdf)
            collection.append(vis)
    vlst = VisList(collection)
    # Note that we are not computing interestingness score here because we want to preserve the arrangement of the aggregated data

    recommendation["collection"] = vlst
    #for benchmarking
    if ldf.toggle_benchmarking == True:
        toc = time.perf_counter()
        print(f"Performed enhance action in {toc - tic:0.4f} seconds")
    return recommendation
Пример #10
0
def column_group(ldf):
	#for benchmarking
	if ldf.toggle_benchmarking == True:
		tic = time.perf_counter()
	recommendation = {"action":"Column Groups",
					"description":"Shows charts of possible visualizations with respect to the column-wise index."}
	collection = []
	data = ldf.copy()
	if (ldf.index.nlevels==1):
		index_column_name = ldf.index.name
		if isinstance(ldf.columns,pd.DatetimeIndex):
			data.columns = ldf.columns.to_native_types()
		for attribute in data.columns:
			vis = Vis([index_column_name,lux.Clause(str(attribute),aggregation=None)],data[attribute].reset_index())
			collection.append(vis)
	vlst = VisList(collection)
	# Note that we are not computing interestingness score here because we want to preserve the arrangement of the aggregated data
	
	recommendation["collection"] = vlst
	#for benchmarking
	if ldf.toggle_benchmarking == True:
		toc = time.perf_counter()
		print(f"Performed enhance action in {toc - tic:0.4f} seconds")
	return recommendation
Пример #11
0
def add_filter(ldf):
    """
    Iterates over all possible values of a categorical variable and generates visualizations where each categorical
    value filters the data.

    Parameters
    ----------
    ldf : lux.core.frame
            LuxDataFrame with underspecified intent.

    Returns
    -------
    recommendations : Dict[str,obj]
            object with a collection of visualizations that result from the Filter action.
    """
    filters = utils.get_filter_specs(ldf._intent)
    filter_values = []
    output = []
    # if fltr is specified, create visualizations where data is filtered by all values of the fltr's categorical
    # variable
    column_spec = utils.get_attrs_specs(ldf.current_vis[0].intent)
    column_spec_attr = list(map(lambda x: x.attribute, column_spec))
    if len(filters) == 1:
        # get unique values for all categorical values specified and creates corresponding filters
        fltr = filters[0]

        if ldf.data_type[fltr.attribute] == "nominal":
            recommendation = {
                "action": "Filter",
                "description": f"Changing the <p class='highlight-intent'>{fltr.attribute}</p> filter to an "
                               f"alternative value.",
                "long_description": f"Swap out the filter value for {fltr.attribute} to other possible values, while "
                                    f"keeping all else the same. Visualizations are ranked based on interestingness",
            }
            unique_values = ldf.unique_values[fltr.attribute]
            filter_values.append(fltr.value)
            # creates vis with new filters
            for val in unique_values:
                if val not in filter_values:
                    new_spec = column_spec.copy()
                    new_filter = lux.Clause(attribute=fltr.attribute, value=val)
                    new_spec.append(new_filter)
                    temp_vis = Vis(new_spec)
                    output.append(temp_vis)
        elif ldf.data_type[fltr.attribute] == "quantitative":
            recommendation = {
                "action": "Filter",
                "description": f"Changing the <p class='highlight-intent'>{fltr.attribute}</p> filter to an "
                               f"alternative inequality operation.",
                "long_description": f"Changing the <p class='highlight-intent'>{fltr.attribute}</p> filter to an "
                                    f"alternative inequality operation.",
            }

            # Create vis with complementary filter operations
            # NOTE: This section of code has been modified to allow for the rendering of multiple vis
            for op in get_complementary_ops(fltr.filter_op):
                new_spec = column_spec.copy()
                new_filter = lux.Clause(
                    attribute=fltr.attribute,
                    filter_op=op,
                    value=fltr.value,
                )
                new_spec.append(new_filter)
                temp_vis = Vis(new_spec, score=1)
                output.append(temp_vis)

    # if no existing filters, create filters using unique values from all categorical variables in the dataset
    else:
        intended_attrs = ", ".join(
            [
                str(clause.attribute)
                for clause in ldf._intent
                if clause.value == "" and clause.attribute != "Record"
            ]
        )
        recommendation = {
            "action": "Filter",
            "description": f"Applying filters to the <p class='highlight-intent'>{intended_attrs}</p> intent.",
            "long_description": f"Adding any filter while keeping the attributes on the x and y axes fixed. "
                                f"Visualizations are ranked based on interestingness",
        }
        categorical_vars = []
        for col in list(ldf.columns):
            # if cardinality is not too high, and attribute is not one of the X,Y (specified) column
            if 1 < ldf.cardinality[col] < 30 and col not in column_spec_attr:
                categorical_vars.append(col)
        for cat in categorical_vars:
            unique_values = ldf.unique_values[cat]
            for val in unique_values:
                new_spec = column_spec.copy()
                new_filter = lux.Clause(attribute=cat, filter_op="=", value=val)
                new_spec.append(new_filter)
                temp_vis = Vis(new_spec)
                output.append(temp_vis)
    if (
        ldf.current_vis is not None
        and len(ldf.current_vis) == 1
        and ldf.current_vis[0].mark == "line"
        and len(get_filter_specs(ldf.intent)) > 0
    ):
        recommendation = {
            "action": "Similarity",
            "description": "Show other charts that are visually similar to the Current vis.",
            "long_description": "Show other charts that are visually similar to the Current vis.",
        }
        last = get_filter_specs(ldf.intent)[-1]
        output = ldf.intent.copy()[0:-1]
        # array of possible values for attribute
        arr = ldf[last.attribute].unique().tolist()
        output.append(lux.Clause(last.attribute, last.attribute, arr))
    vlist = lux.vis.VisList.VisList(output, ldf)
    vlist_copy = lux.vis.VisList.VisList(output, ldf)
    for i in range(len(vlist_copy)):
        vlist[i].score = interestingness(vlist_copy[i], ldf)
    vlist.sort()
    vlist = vlist.showK()
    if recommendation["action"] == "Similarity":
        recommendation["collection"] = vlist[1:]
    else:
        recommendation["collection"] = vlist
    return recommendation
Пример #12
0
def filter(ldf):
	#for benchmarking
	if ldf.toggle_benchmarking == True:
		tic = time.perf_counter()
	'''
	Iterates over all possible values of a categorical variable and generates visualizations where each categorical value filters the data.

	Parameters
	----------
	ldf : lux.luxDataFrame.LuxDataFrame
		LuxDataFrame with underspecified intent.

	Returns
	-------
	recommendations : Dict[str,obj]
		object with a collection of visualizations that result from the Filter action.
	'''
	
	filters = utils.get_filter_specs(ldf.intent)
	filter_values = []
	output = []
	#if fltr is specified, create visualizations where data is filtered by all values of the fltr's categorical variable
	column_spec = utils.get_attrs_specs(ldf.current_vis[0]._inferred_intent)
	column_spec_attr = map(lambda x: x.attribute,column_spec)
	if len(filters) == 1:
		#get unique values for all categorical values specified and creates corresponding filters
		fltr = filters[0]
		unique_values = ldf.unique_values[fltr.attribute]
		filter_values.append(fltr.value)
		#creates views with new filters
		for val in unique_values:
			if val not in filter_values:
				new_spec = column_spec.copy()
				new_filter = lux.Clause(attribute = fltr.attribute, value = val)
				new_spec.append(new_filter)
				temp_view = Vis(new_spec)
				output.append(temp_view)
		recommendation = {"action":"Filter",
					 	  "description":f"Changing the <p class='highlight-intent'>{fltr.attribute}</p> filter to an alternative value."}
	else:	#if no existing filters, create filters using unique values from all categorical variables in the dataset
		intended_attrs = '<b>'+', '.join([clause.attribute for clause in ldf.intent if clause.value=='' and clause.attribute!="Record"])+'</b>'
		recommendation = {"action":"Filter",
					 "description":f"Applying filters to the <p class='highlight-intent'>{intended_attrs}</p> intent."}
		categorical_vars = []
		for col in list(ldf.columns):
			# if cardinality is not too high, and attribute is not one of the X,Y (specified) column
			if ldf.cardinality[col]<30 and col not in column_spec_attr:
				categorical_vars.append(col)
		for cat in categorical_vars:
			unique_values = ldf.unique_values[cat]
			for i in range(0, len(unique_values)):
				new_spec = column_spec.copy()
				new_filter = lux.Clause(attribute=cat, filter_op="=",value=unique_values[i])
				new_spec.append(new_filter)
				temp_view = Vis(new_spec)
				output.append(temp_view)
	vc = lux.vis.VisList.VisList(output,ldf)
	for view in vc:
		view.score = interestingness(view,ldf)
	vc = vc.topK(15)
	recommendation["collection"] = vc
	
	#for benchmarking
	if ldf.toggle_benchmarking == True:
		toc = time.perf_counter()
		print(f"Performed filter action in {toc - tic:0.4f} seconds")
	return recommendation
Пример #13
0
def univariate(ldf, data_type_constraint="quantitative"):
    '''
	Generates bar chart distributions of different attributes in the dataframe.

	Parameters
	----------
	ldf : lux.luxDataFrame.LuxDataFrame
		LuxDataFrame with underspecified intent.

	data_type_constraint: str
		Controls the type of distribution chart that will be rendered.

	Returns
	-------
	recommendations : Dict[str,obj]
		object with a collection of visualizations that result from the Distribution action.
	'''
    import scipy.stats
    import numpy as np

    #for benchmarking
    if ldf.toggle_benchmarking == True:
        tic = time.perf_counter()
    filter_specs = utils.get_filter_specs(ldf.intent)
    ignore_rec_flag = False
    if (data_type_constraint == "quantitative"):
        intent = [lux.Clause("?", data_type="quantitative")]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Distribution",
            "description":
            "Show univariate histograms of <p class='highlight-descriptor'>quantitative</p>  attributes."
        }
        if (
                len(ldf) < 5
        ):  # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated)
            ignore_rec_flag = True
    elif (data_type_constraint == "nominal"):
        intent = [lux.Clause("?", data_type="nominal")]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Occurrence",
            "description":
            "Show frequency of occurrence for <p class='highlight-descriptor'>categorical</p> attributes."
        }
    elif (data_type_constraint == "temporal"):
        intent = [lux.Clause("?", data_type="temporal")]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Temporal",
            "description":
            "Show trends over <p class='highlight-descriptor'>time-related</p> attributes."
        }
        if (
                len(ldf) < 3
        ):  # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated)
            ignore_rec_flag = True
    if (ignore_rec_flag):
        recommendation["collection"] = []
        return recommendation
    vc = VisList(intent, ldf)
    for view in vc:
        view.score = interestingness(view, ldf)
    vc = vc.topK(15)
    recommendation["collection"] = vc
    #for benchmarking
    if ldf.toggle_benchmarking == True:
        toc = time.perf_counter()
        print(f"Performed distribution action in {toc - tic:0.4f} seconds")
    return recommendation
Пример #14
0
def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True):
    """
    Generates bivariate visualizations that represent all pairwise relationships in the data.

    Parameters
    ----------
    ldf : LuxDataFrame
            LuxDataFrame with underspecified intent.

    ignore_transpose: bool
            Boolean flag to ignore pairs of attributes whose transpose are already computed (i.e., {X,Y} will be ignored if {Y,X} is already computed)

    Returns
    -------
    recommendations : Dict[str,obj]
            object with a collection of visualizations that result from the Correlation action.
    """

    import numpy as np

    filter_specs = utils.get_filter_specs(ldf._intent)
    intent = [
        lux.Clause("?", data_model="measure"),
        lux.Clause("?", data_model="measure"),
    ]
    intent.extend(filter_specs)
    vlist = VisList(intent, ldf)
    recommendation = {
        "action":
        "Correlation",
        "description":
        "Show relationships between two <p class='highlight-descriptor'>quantitative</p> attributes.",
    }
    ignore_rec_flag = False
    # Doesn't make sense to compute correlation if less than 4 data values
    if len(ldf) < 5:
        ignore_rec_flag = True
    # Then use the data populated in the vis list to compute score
    for vis in vlist:
        measures = vis.get_attr_by_data_model("measure")
        if len(measures) < 2:
            raise ValueError(
                f"Can not compute correlation between {[x.attribute for x in ldf.columns]} since less than 2 measure values present."
            )
        msr1 = measures[0].attribute
        msr2 = measures[1].attribute

        if ignore_transpose:
            check_transpose = check_transpose_not_computed(vlist, msr1, msr2)
        else:
            check_transpose = True
        if check_transpose:
            vis.score = interestingness(vis, ldf)
        else:
            vis.score = -1
    if ignore_rec_flag:
        recommendation["collection"] = []
        return recommendation
    vlist.sort()
    vlist = vlist.showK()
    recommendation["collection"] = vlist
    return recommendation
Пример #15
0
def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True):
    '''
	Generates bivariate visualizations that represent all pairwise relationships in the data.

	Parameters
	----------
	ldf : LuxDataFrame
		LuxDataFrame with underspecified intent.

	ignore_transpose: bool
		Boolean flag to ignore pairs of attributes whose transpose are already computed (i.e., {X,Y} will be ignored if {Y,X} is already computed)

	Returns
	-------
	recommendations : Dict[str,obj]
		object with a collection of visualizations that result from the Correlation action.
	'''

    import numpy as np
    # for benchmarking
    if ldf.toggle_benchmarking == True:
        tic = time.perf_counter()
    filter_specs = utils.get_filter_specs(ldf.intent)
    intent = [
        lux.Clause("?", data_model="measure"),
        lux.Clause("?", data_model="measure")
    ]
    intent.extend(filter_specs)
    vc = VisList(intent, ldf)
    recommendation = {
        "action":
        "Correlation",
        "description":
        "Show relationships between two <p class='highlight-descriptor'>quantitative</p> attributes."
    }
    ignore_rec_flag = False
    if (
            len(ldf) < 5
    ):  # Doesn't make sense to compute correlation if less than 4 data values
        ignore_rec_flag = True
    # Then use the data populated in the vis list to compute score
    for view in vc:
        measures = view.get_attr_by_data_model("measure")
        if len(measures) < 2:
            raise ValueError(
                f"Can not compute correlation between {[x.attribute for x in ldf.columns]} since less than 2 measure values present."
            )
        msr1 = measures[0].attribute
        msr2 = measures[1].attribute

        if (ignore_transpose):
            check_transpose = check_transpose_not_computed(vc, msr1, msr2)
        else:
            check_transpose = True
        if (check_transpose):
            view.score = interestingness(view, ldf)
        else:
            view.score = -1
    if (ignore_rec_flag):
        recommendation["collection"] = []
        return recommendation
    vc = vc.topK(15)
    recommendation["collection"] = vc

    # for benchmarking
    if ldf.toggle_benchmarking == True:
        toc = time.perf_counter()
        print(f"Performed correlation action in {toc - tic:0.4f} seconds")
    return recommendation
Пример #16
0
def univariate(ldf, *args):
    """
    Generates bar chart distributions of different attributes in the dataframe.

    Parameters
    ----------
    ldf : lux.core.frame
            LuxDataFrame with underspecified intent.

    data_type_constraint: str
            Controls the type of distribution chart that will be rendered.

    Returns
    -------
    recommendations : Dict[str,obj]
            object with a collection of visualizations that result from the Distribution action.
    """
    import numpy as np

    if len(args) == 0:
        data_type_constraint = "quantitative"
    else:
        data_type_constraint = args[0][0]

    filter_specs = utils.get_filter_specs(ldf._intent)
    ignore_rec_flag = False
    if data_type_constraint == "quantitative":
        possible_attributes = [
            c for c in ldf.columns if ldf.data_type[c] == "quantitative"
            and ldf.cardinality[c] > 5 and c != "Number of Records"
        ]
        intent = [lux.Clause(possible_attributes)]
        intent.extend(filter_specs)
        examples = ""
        if len(possible_attributes) >= 1:
            examples = f" (e.g., {possible_attributes[0]})"
        recommendation = {
            "action":
            "Distribution",
            "description":
            "Show univariate histograms of <p class='highlight-descriptor'>quantitative</p>  attributes.",
            "long_description":
            f"Distribution displays univariate histogram distributions of all quantitative attributes{examples}. Visualizations are ranked from most to least skewed.",
        }
        # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated)
        if ldf.length < 5:
            ignore_rec_flag = True
    elif data_type_constraint == "nominal":
        possible_attributes = [
            c for c in ldf.columns if ldf.data_type[c] == "nominal"
            and ldf.cardinality[c] > 5 and c != "Number of Records"
        ]
        examples = ""
        if len(possible_attributes) >= 1:
            examples = f" (e.g., {possible_attributes[0]})"
        intent = [lux.Clause("?", data_type="nominal")]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Occurrence",
            "description":
            "Show frequency of occurrence for <p class='highlight-descriptor'>categorical</p> attributes.",
            "long_description":
            f"Occurence displays bar charts of counts for all categorical attributes{examples}. Visualizations are ranked from most to least uneven across the bars. ",
        }
    elif data_type_constraint == "geographical":
        possible_attributes = [
            c for c in ldf.columns if ldf.data_type[c] == "geographical"
            and ldf.cardinality[c] > 5 and c != "Number of Records"
        ]
        examples = ""
        if len(possible_attributes) >= 1:
            examples = f" (e.g., {possible_attributes[0]})"
        intent = [
            lux.Clause("?", data_type="geographical"),
            lux.Clause("?", data_model="measure")
        ]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Geographical",
            "description":
            "Show choropleth maps of <p class='highlight-descriptor'>geographic</p> attributes",
            "long_description":
            f"Occurence displays choropleths of averages for some geographic attribute{examples}. Visualizations are ranked by diversity of the geographic attribute.",
        }
    elif data_type_constraint == "temporal":
        intent = [lux.Clause("?", data_type="temporal")]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Temporal",
            "description":
            "Show trends over <p class='highlight-descriptor'>time-related</p> attributes.",
            "long_description":
            "Temporal displays line charts for all attributes related to datetimes in the dataframe.",
        }
        # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated)
        if ldf.length < 3:
            ignore_rec_flag = True
    if ignore_rec_flag:
        recommendation["collection"] = []
        return recommendation
    vlist = VisList(intent, ldf)
    for vis in vlist:
        vis.score = interestingness(vis, ldf)
    vlist.sort()
    recommendation["collection"] = vlist
    return recommendation
Пример #17
0
def univariate(ldf, data_type_constraint="quantitative"):
    '''
	Generates bar chart distributions of different attributes in the dataframe.

	Parameters
	----------
	ldf : lux.core.frame
		LuxDataFrame with underspecified intent.

	data_type_constraint: str
		Controls the type of distribution chart that will be rendered.

	Returns
	-------
	recommendations : Dict[str,obj]
		object with a collection of visualizations that result from the Distribution action.
	'''
    import numpy as np

    filter_specs = utils.get_filter_specs(ldf._intent)
    ignore_rec_flag = False
    if (data_type_constraint == "quantitative"):
        intent = [
            lux.Clause("?",
                       data_type="quantitative",
                       exclude="Number of Records")
        ]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Distribution",
            "description":
            "Show univariate histograms of <p class='highlight-descriptor'>quantitative</p>  attributes."
        }
        if (
                len(ldf) < 5
        ):  # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated)
            ignore_rec_flag = True
    elif (data_type_constraint == "nominal"):
        intent = [lux.Clause("?", data_type="nominal")]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Occurrence",
            "description":
            "Show frequency of occurrence for <p class='highlight-descriptor'>categorical</p> attributes."
        }
    elif (data_type_constraint == "temporal"):
        intent = [lux.Clause("?", data_type="temporal")]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Temporal",
            "description":
            "Show trends over <p class='highlight-descriptor'>time-related</p> attributes."
        }
        if (
                len(ldf) < 3
        ):  # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated)
            ignore_rec_flag = True
    if (ignore_rec_flag):
        recommendation["collection"] = []
        return recommendation
    vlist = VisList(intent, ldf)
    for vis in vlist:
        vis.score = interestingness(vis, ldf)
    # vlist = vlist.topK(15) # Basic visualizations should not be capped
    vlist.sort()
    recommendation["collection"] = vlist
    return recommendation
Пример #18
0
def univariate(ldf, *args):
    """
    Generates bar chart distributions of different attributes in the dataframe.

    Parameters
    ----------
    ldf : lux.core.frame
            LuxDataFrame with underspecified intent.

    data_type_constraint: str
            Controls the type of distribution chart that will be rendered.

    Returns
    -------
    recommendations : Dict[str,obj]
            object with a collection of visualizations that result from the Distribution action.
    """
    import numpy as np

    if len(args) == 0:
        data_type_constraint = "quantitative"
    else:
        data_type_constraint = args[0][0]

    filter_specs = utils.get_filter_specs(ldf._intent)
    ignore_rec_flag = False
    if data_type_constraint == "quantitative":
        possible_attributes = [
            c for c in ldf.columns if ldf.data_type[c] == "quantitative"
            and ldf.cardinality[c] > 5 and c != "Number of Records"
        ]
        intent = [lux.Clause(possible_attributes)]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Distribution",
            "description":
            "Show univariate histograms of <p class='highlight-descriptor'>quantitative</p>  attributes.",
        }
        # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated)
        if len(ldf) < 5:
            ignore_rec_flag = True
    elif data_type_constraint == "nominal":
        intent = [lux.Clause("?", data_type="nominal")]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Occurrence",
            "description":
            "Show frequency of occurrence for <p class='highlight-descriptor'>categorical</p> attributes.",
        }
    elif data_type_constraint == "temporal":
        intent = [lux.Clause("?", data_type="temporal")]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Temporal",
            "description":
            "Show trends over <p class='highlight-descriptor'>time-related</p> attributes.",
        }
        # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated)
        if len(ldf) < 3:
            ignore_rec_flag = True
    if ignore_rec_flag:
        recommendation["collection"] = []
        return recommendation
    vlist = VisList(intent, ldf)
    for vis in vlist:
        vis.score = interestingness(vis, ldf)
    vlist.sort()
    recommendation["collection"] = vlist
    return recommendation
Пример #19
0
def filter(ldf):
    """
    Iterates over all possible values of a categorical variable and generates visualizations where each categorical value filters the data.

    Parameters
    ----------
    ldf : lux.core.frame
            LuxDataFrame with underspecified intent.

    Returns
    -------
    recommendations : Dict[str,obj]
            object with a collection of visualizations that result from the Filter action.
    """
    filters = utils.get_filter_specs(ldf._intent)
    filter_values = []
    output = []
    # if fltr is specified, create visualizations where data is filtered by all values of the fltr's categorical variable
    column_spec = utils.get_attrs_specs(ldf.current_vis[0]._inferred_intent)
    column_spec_attr = map(lambda x: x.attribute, column_spec)
    if len(filters) == 1:
        # get unique values for all categorical values specified and creates corresponding filters
        fltr = filters[0]

        if ldf.data_type_lookup[fltr.attribute] == "nominal":
            recommendation = {
                "action":
                "Filter",
                "description":
                f"Changing the <p class='highlight-intent'>{fltr.attribute}</p> filter to an alternative value.",
            }
            unique_values = ldf.unique_values[fltr.attribute]
            filter_values.append(fltr.value)
            # creates vis with new filters
            for val in unique_values:
                if val not in filter_values:
                    new_spec = column_spec.copy()
                    new_filter = lux.Clause(attribute=fltr.attribute,
                                            value=val)
                    new_spec.append(new_filter)
                    temp_vis = Vis(new_spec)
                    output.append(temp_vis)
        elif ldf.data_type_lookup[fltr.attribute] == "quantitative":
            recommendation = {
                "action":
                "Filter",
                "description":
                f"Changing the <p class='highlight-intent'>{fltr.attribute}</p> filter to an alternative inequality operation.",
            }

            def get_complementary_ops(fltr_op):
                if fltr_op == ">":
                    return "<="
                elif fltr_op == "<":
                    return ">="
                elif fltr_op == ">=":
                    return "<"
                elif fltr_op == "<=":
                    return ">"
                # TODO: need to support case where fltr_op is "=" --> auto-binned ranges

            # Create vis with complementary filter operations
            new_spec = column_spec.copy()
            new_filter = lux.Clause(
                attribute=fltr.attribute,
                filter_op=get_complementary_ops(fltr.filter_op),
                value=fltr.value,
            )
            new_spec.append(new_filter)
            temp_vis = Vis(new_spec, score=1)
            output.append(temp_vis)
    # if no existing filters, create filters using unique values from all categorical variables in the dataset
    else:
        intended_attrs = ", ".join([
            clause.attribute for clause in ldf._intent
            if clause.value == "" and clause.attribute != "Record"
        ])
        recommendation = {
            "action":
            "Filter",
            "description":
            f"Applying filters to the <p class='highlight-intent'>{intended_attrs}</p> intent.",
        }
        categorical_vars = []
        for col in list(ldf.columns):
            # if cardinality is not too high, and attribute is not one of the X,Y (specified) column
            if ldf.cardinality[col] < 30 and col not in column_spec_attr:
                categorical_vars.append(col)
        for cat in categorical_vars:
            unique_values = ldf.unique_values[cat]
            for i in range(0, len(unique_values)):
                new_spec = column_spec.copy()
                new_filter = lux.Clause(attribute=cat,
                                        filter_op="=",
                                        value=unique_values[i])
                new_spec.append(new_filter)
                temp_vis = Vis(new_spec)
                output.append(temp_vis)
    vlist = lux.vis.VisList.VisList(output, ldf)
    for vis in vlist:
        vis.score = interestingness(vis, ldf)
    vlist = vlist.topK(15)
    recommendation["collection"] = vlist
    return recommendation
Пример #20
0
            start = time.perf_counter()
            vis = Vis(test, df)
            end = time.perf_counter()
            t = end - start
            trial.append([nPts, t, test[0], test[1]])
    ################# Color Scatterplot ############################
    elif (experiment == "colorscatter"):
        lux.config.heatmap = False
        for attr in [
                'host_id', 'host_name', 'neighbourhood_group', 'neighbourhood',
                'room_type', 'number_of_reviews'
        ]:
            start = time.perf_counter()
            vis = Vis(
                ['price', 'minimum_nights',
                 lux.Clause(attr, channel="color")], df)
            end = time.perf_counter()
            t = end - start
            trial.append([nPts, t, attr])
    ################# Regular Histogram ############################
    elif (experiment == "histogram"):
        for b in list(range(5, 205, 10)):
            start = time.perf_counter()
            vis = Vis([lux.Clause("number_of_reviews", bin_size=b)], df)
            end = time.perf_counter()
            t = end - start
            trial.append([nPts, t, b])

    # ################# Regular bar ############################
    elif (experiment == "bar"):
        for attr in [