Exemplo n.º 1
0
 def execute(viewCollection:ViewCollection, ldf: LuxDataFrame):
     import pandas as pd
     '''
     Given a ViewCollection, fetch the data required to render the view
     1) Apply filters
     2) Retreive relevant attribute
     3) return a DataFrame with relevant results
     '''
     for view in viewCollection:
         print(view, utils.getFilterSpecs(view.specLst))
         # Select relevant data based on attribute information
         attributes = set([])
         for spec in view.specLst:
             if (spec.attribute):
                 if (spec.attribute=="Record"):
                     attributes.add(spec.attribute)
                 #else:
                 attributes.add(spec.attribute)
         if view.mark not in ["bar", "line", "histogram"]:
             whereClause, filterVars = SQLExecutor.executeFilter(view)
             requiredVariables = attributes | set(filterVars)
             requiredVariables = ",".join(requiredVariables)
             rowCount = list(pd.read_sql("SELECT COUNT(*) FROM {} {}".format(ldf.table_name, whereClause), ldf.SQLconnection)['count'])[0]
             if rowCount > 10000:
                 query = "SELECT {} FROM {} {} ORDER BY random() LIMIT 10000".format(requiredVariables, ldf.table_name, whereClause)
             else:
                 query = "SELECT {} FROM {} {}".format(requiredVariables, ldf.table_name, whereClause)
             data = pd.read_sql(query, ldf.SQLconnection)
             view.data = utils.pandasToLux(data)
         if (view.mark =="bar" or view.mark =="line"):
             SQLExecutor.executeAggregate(view, ldf)
         elif (view.mark =="histogram"):
             SQLExecutor.executeBinning(view, ldf)
Exemplo n.º 2
0
    def contextToJSON(context):
        from lux.utils import utils

        filterSpecs = utils.getFilterSpecs(context)
        attrsSpecs = utils.getAttrsSpecs(context)

        specs = {}
        specs['attributes'] = [spec.attribute for spec in attrsSpecs]
        specs['filters'] = [spec.attribute for spec in filterSpecs]
        return specs
Exemplo n.º 3
0
    def executeFilter(view: View):
        assert view.data is not None, "executeFilter assumes input view.data is populated (if not, populate with LuxDataFrame values)"
        filters = utils.getFilterSpecs(view.specLst)

        if (filters):
            # TODO: Need to handle OR logic
            for filter in filters:
                view.data = PandasExecutor.applyFilter(view.data,
                                                       filter.attribute,
                                                       filter.filterOp,
                                                       filter.value)
Exemplo n.º 4
0
 def executeFilter(view:View):
     whereClause = []
     filters = utils.getFilterSpecs(view.specLst)
     filterVars = []
     if (filters):
         for f in range(0,len(filters)):
             if f == 0:
                 whereClause.append("WHERE")
             else:
                 whereClause.append("AND")
             whereClause.extend([str(filters[f].attribute), str(filters[f].filterOp), "'" + str(filters[f].value) + "'"])
             if filters[f].attribute not in filterVars:
                 filterVars.append(filters[f].attribute)
     if whereClause == []:
         return("", [])
     else:
         whereClause = " ".join(whereClause)
     return(whereClause, filterVars)
Exemplo n.º 5
0
def enhance(ldf):
	#for benchmarking
	if ldf.toggleBenchmarking == True:
		tic = time.perf_counter()
	'''
	Given a set of views, generates possible visualizations when an additional attribute is added to the current view.

	Parameters
	----------
	ldf : lux.luxDataFrame.LuxDataFrame
		LuxDataFrame with underspecified context.

	Returns
	-------
	recommendations : Dict[str,obj]
		object with a collection of visualizations that result from the Enhance action.
	'''
	recommendation = {"action":"Enhance",
					"description":"Shows possible visualizations when an additional attribute is added to the current view."}
	filters = utils.getFilterSpecs(ldf.context)
	# Collect variables that already exist in the context
	attrSpecs = list(filter(lambda x: x.value=="" and x.attribute!="Record", ldf.context))
	if(len(attrSpecs)>2): # if there are too many column attributes, return don't generate Enhance recommendations
		recommendation["collection"] = []
		return recommendation
	query = ldf.context.copy()
	query = filters + attrSpecs
	query.append("?")
	vc = lux.view.ViewCollection.ViewCollection(query)
	vc = vc.load(ldf)
		
	# Then use the data populated in the view collection to compute score
	for view in vc: view.score = interestingness(view,ldf)
	
	vc = vc.topK(15)
	recommendation["collection"] = vc
	#for benchmarking
	if ldf.toggleBenchmarking == True:
		toc = time.perf_counter()
		print(f"Performed enhance action in {toc - tic:0.4f} seconds")
	return recommendation
Exemplo n.º 6
0
def filter(ldf):
    #for benchmarking
    if ldf.toggleBenchmarking == True:
        tic = time.perf_counter()
    '''
	Iterates over all possible values of a categorical variable and generates visualizations where each categorical value filters the data.

	Parameters
	----------
	ldf : lux.luxDataFrame.LuxDataFrame
		LuxDataFrame with underspecified context.

	Returns
	-------
	recommendations : Dict[str,obj]
		object with a collection of visualizations that result from the Filter action.
	'''
    recommendation = {
        "action":
        "Filter",
        "description":
        "Shows possible visualizations when filtered by categorical variables in the data object's dataset."
    }
    filters = utils.getFilterSpecs(ldf.context)
    filterValues = []
    output = []
    #if Row is specified, create visualizations where data is filtered by all values of the Row's categorical variable
    columnSpec = utils.getAttrsSpecs(ldf.viewCollection[0].specLst)
    columnSpecAttr = map(lambda x: x.attribute, columnSpec)
    if len(filters) > 0:
        #get unique values for all categorical values specified and creates corresponding filters
        for row in filters:
            uniqueValues = ldf.uniqueValues[row.attribute]
            filterValues.append(row.value)
            #creates new data objects with new filters
            for val in uniqueValues:
                if val not in filterValues:
                    newSpec = columnSpec.copy()
                    newFilter = lux.Spec(attribute=row.attribute, value=val)
                    newSpec.append(newFilter)
                    tempView = View(newSpec)
                    output.append(tempView)
    else:  #if no existing filters, create filters using unique values from all categorical variables in the dataset
        categoricalVars = []
        for col in list(ldf.columns):
            # if cardinality is not too high, and attribute is not one of the X,Y (specified) column
            if ldf.cardinality[col] < 40 and col not in columnSpecAttr:
                categoricalVars.append(col)
        for cat in categoricalVars:
            uniqueValues = ldf.uniqueValues[cat]
            for i in range(0, len(uniqueValues)):
                newSpec = columnSpec.copy()
                newFilter = lux.Spec(attribute=cat,
                                     filterOp="=",
                                     value=uniqueValues[i])
                newSpec.append(newFilter)
                tempView = View(newSpec)
                output.append(tempView)
    vc = lux.view.ViewCollection.ViewCollection(output)
    vc = vc.load(ldf)
    for view in vc:
        view.score = interestingness(view, ldf)
    vc = vc.topK(15)
    recommendation["collection"] = vc

    #for benchmarking
    if ldf.toggleBenchmarking == True:
        toc = time.perf_counter()
        print(f"Performed filter action in {toc - tic:0.4f} seconds")
    return recommendation
Exemplo n.º 7
0
def generalize(ldf):
	#for benchmarking
	if ldf.toggleBenchmarking == True:
		tic = time.perf_counter()
	'''
	Generates all possible visualizations when one attribute or filter from the current view is removed.

	Parameters
	----------
	ldf : lux.luxDataFrame.LuxDataFrame
		LuxDataFrame with underspecified context.

	Returns
	-------
	recommendations : Dict[str,obj]
		object with a collection of visualizations that result from the Generalize action.
	'''
	# takes in a dataObject and generates a list of new dataObjects, each with a single measure from the original object removed
	# -->  return list of dataObjects with corresponding interestingness scores

	recommendation = {"action":"Generalize",
						   "description":"Remove one attribute or filter to observe a more general trend."}
	output = []
	excludedColumns = []
	columnSpec = list(filter(lambda x: x.value=="" and x.attribute!="Record", ldf.context))
	rowSpecs = utils.getFilterSpecs(ldf.context)
	# if we do no have enough column attributes or too many, return no views.
	if(len(columnSpec)<2 or len(columnSpec)>4):
		recommendation["collection"] = []
		return recommendation
	for spec in columnSpec:
		columns = spec.attribute
		if type(columns) == list:
			for column in columns:
				if column not in excludedColumns:
					tempView = View(ldf.context)
					tempView.removeColumnFromSpecNew(column)
					excludedColumns.append(column)
					output.append(tempView)
		elif type(columns) == str:
			if columns not in excludedColumns:
				tempView = View(ldf.context)
				tempView.removeColumnFromSpecNew(columns)
				excludedColumns.append(columns)
		output.append(tempView)
	for i, spec in enumerate(rowSpecs):
		newSpec = ldf.context.copy()
		newSpec.pop(i)
		tempView = View(newSpec)
		output.append(tempView)
		
	vc = lux.view.ViewCollection.ViewCollection(output)
	vc = vc.load(ldf)
	recommendation["collection"] = vc
	for view in vc:
		view.score = interestingness(view,ldf)
	vc.sort(removeInvalid=True)
	#for benchmarking
	if ldf.toggleBenchmarking == True:
		toc = time.perf_counter()
		print(f"Performed generalize action in {toc - tic:0.4f} seconds")
	return recommendation
Exemplo n.º 8
0
def interestingness(view:View ,ldf:LuxDataFrame) -> int:
	"""
	Compute the interestingness score of the view.
	The interestingness metric is dependent on the view type.

	Parameters
	----------
	view : View
	ldf : LuxDataFrame

	Returns
	-------
	int
		Interestingness Score
	"""	
	

	if view.data is None:
		raise Exception("View.data needs to be populated before interestingness can be computed. Run Executor.execute(view,ldf).")

	n_dim = 0
	n_msr = 0
	
	filterSpecs = utils.getFilterSpecs(view.specLst)
	viewAttrsSpecs = utils.getAttrsSpecs(view.specLst)

	for spec in viewAttrsSpecs:
		if (spec.attribute!="Record"):
			if (spec.dataModel == 'dimension'):
				n_dim += 1
			if (spec.dataModel == 'measure'):
				n_msr += 1
	n_filter = len(filterSpecs)
	attr_specs = [spec for spec in viewAttrsSpecs if spec.attribute != "Record"]
	dimensionLst = view.getAttrByDataModel("dimension")
	measureLst = view.getAttrByDataModel("measure")

	# Bar Chart
	if (n_dim == 1 and (n_msr == 0 or n_msr==1)):
		if (n_filter == 0):
			return unevenness(view, ldf, measureLst, dimensionLst)
		elif(n_filter==1):
			return deviationFromOverall(view,ldf,filterSpecs,measureLst[0].attribute)
	# Histogram
	elif (n_dim == 0 and n_msr == 1):
		if (n_filter == 0):
			v = view.data["Count of Records"]
			return skewness(v)
		elif (n_filter == 1):
			return deviationFromOverall(view,ldf,filterSpecs,"Count of Records")
	# Scatter Plot
	elif (n_dim == 0 and n_msr == 2):
		if (n_filter==1):
			v_filter_size = getFilteredSize(filterSpecs,view.data)
			v_size = len(view.data)
			sig = v_filter_size/v_size
		else:
			sig = 1
		return sig * monotonicity(view,attr_specs)
	# Scatterplot colored by Dimension
	elif (n_dim == 1 and n_msr == 2):
		colorAttr = view.getAttrByChannel("color")[0].attribute
		
		C = ldf.cardinality[colorAttr]
		if (C<40):
			return 1/C
		else:
			return -1
	# Scatterplot colored by dimension
	elif (n_dim== 1 and n_msr == 2):
		return 0.2
	# Scatterplot colored by measure
	elif (n_msr == 3):
		return 0.1
	# Default
	else:
		return -1