Exemplo n.º 1
0
def correlation(ldf: LuxDataFrame, ignore_transpose: bool = True):
    '''
	Generates bivariate visualizations that represent all pairwise relationships in the data.

	Parameters
	----------
	ldf : LuxDataFrame
		LuxDataFrame with underspecified intent.

	ignore_transpose: bool
		Boolean flag to ignore pairs of attributes whose transpose are already computed (i.e., {X,Y} will be ignored if {Y,X} is already computed)

	Returns
	-------
	recommendations : Dict[str,obj]
		object with a collection of visualizations that result from the Correlation action.
	'''

    import numpy as np
    # for benchmarking
    if ldf.toggle_benchmarking == True:
        tic = time.perf_counter()
    filter_specs = utils.get_filter_specs(ldf.intent)
    intent = [
        lux.Clause("?", data_model="measure"),
        lux.Clause("?", data_model="measure")
    ]
    intent.extend(filter_specs)
    vc = VisList(intent, ldf)
    recommendation = {
        "action":
        "Correlation",
        "description":
        "Show relationships between two <p class='highlight-descriptor'>quantitative</p> attributes."
    }
    ignore_rec_flag = False
    if (
            len(ldf) < 5
    ):  # Doesn't make sense to compute correlation if less than 4 data values
        ignore_rec_flag = True
    # Then use the data populated in the vis list to compute score
    for view in vc:
        measures = view.get_attr_by_data_model("measure")
        if len(measures) < 2:
            raise ValueError(
                f"Can not compute correlation between {[x.attribute for x in ldf.columns]} since less than 2 measure values present."
            )
        msr1 = measures[0].attribute
        msr2 = measures[1].attribute

        if (ignore_transpose):
            check_transpose = check_transpose_not_computed(vc, msr1, msr2)
        else:
            check_transpose = True
        if (check_transpose):
            view.score = interestingness(view, ldf)
        else:
            view.score = -1
    if (ignore_rec_flag):
        recommendation["collection"] = []
        return recommendation
    vc = vc.topK(15)
    recommendation["collection"] = vc

    # for benchmarking
    if ldf.toggle_benchmarking == True:
        toc = time.perf_counter()
        print(f"Performed correlation action in {toc - tic:0.4f} seconds")
    return recommendation
Exemplo n.º 2
0
def univariate(ldf, data_type_constraint="quantitative"):
    '''
	Generates bar chart distributions of different attributes in the dataframe.

	Parameters
	----------
	ldf : lux.luxDataFrame.LuxDataFrame
		LuxDataFrame with underspecified intent.

	data_type_constraint: str
		Controls the type of distribution chart that will be rendered.

	Returns
	-------
	recommendations : Dict[str,obj]
		object with a collection of visualizations that result from the Distribution action.
	'''
    import scipy.stats
    import numpy as np

    #for benchmarking
    if ldf.toggle_benchmarking == True:
        tic = time.perf_counter()
    filter_specs = utils.get_filter_specs(ldf.intent)
    ignore_rec_flag = False
    if (data_type_constraint == "quantitative"):
        intent = [lux.Clause("?", data_type="quantitative")]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Distribution",
            "description":
            "Show univariate histograms of <p class='highlight-descriptor'>quantitative</p>  attributes."
        }
        if (
                len(ldf) < 5
        ):  # Doesn't make sense to generate a histogram if there is less than 5 datapoints (pre-aggregated)
            ignore_rec_flag = True
    elif (data_type_constraint == "nominal"):
        intent = [lux.Clause("?", data_type="nominal")]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Occurrence",
            "description":
            "Show frequency of occurrence for <p class='highlight-descriptor'>categorical</p> attributes."
        }
    elif (data_type_constraint == "temporal"):
        intent = [lux.Clause("?", data_type="temporal")]
        intent.extend(filter_specs)
        recommendation = {
            "action":
            "Temporal",
            "description":
            "Show trends over <p class='highlight-descriptor'>time-related</p> attributes."
        }
        if (
                len(ldf) < 3
        ):  # Doesn't make sense to generate a line chart if there is less than 3 datapoints (pre-aggregated)
            ignore_rec_flag = True
    if (ignore_rec_flag):
        recommendation["collection"] = []
        return recommendation
    vc = VisList(intent, ldf)
    for view in vc:
        view.score = interestingness(view, ldf)
    vc = vc.topK(15)
    recommendation["collection"] = vc
    #for benchmarking
    if ldf.toggle_benchmarking == True:
        toc = time.perf_counter()
        print(f"Performed distribution action in {toc - tic:0.4f} seconds")
    return recommendation