def inv_eval(vtrace): use_color = all([vt.color != None for vt in vtrace]) if not use_color: # simple viusal trace all_start_from_zero = all( [vt.yb1 == 0 and vt.yb2 == 0 for vt in vtrace]) table_content = [] for vt in vtrace: if all_start_from_zero: table_content.append({"c_x": vt.x1, "c_top": vt.yt1}) table_content.append({"c_x": vt.x2, "c_top": vt.yt2}) else: table_content.append({ "c_x": vt.x1, "c_top": vt.yt1, "c_bot": vt.yb1 }) table_content.append({ "c_x": vt.x2, "c_top": vt.yt2, "c_bot": vt.yb2 }) chart = MpAreaChart( c_x="c_x", c_tops=["c_top"], c_bots=None if all_start_from_zero else ["c_bot"]) return [(SymTable(values=table_content), chart)] else: # map x to multiple y color_names = list(set([vt.color for vt in vtrace])) table_dict = {} for vt in vtrace: if vt.x1 not in table_dict: table_dict[vt.x1] = {"c_x": vt.x1} if vt.x2 not in table_dict: table_dict[vt.x2] = {"c_x": vt.x2} table_dict[vt.x1]["{}".format(str( vt.color))] = (vt.yt1 - vt.yb1) if vt.yb1 is not None else vt.yt1 table_dict[vt.x2]["{}".format(str( vt.color))] = (vt.yt2 - vt.yb2) if vt.yb2 is not None else vt.yt2 table_content = [] for x in table_dict: table_content.append(table_dict[x]) if len(table_dict[x]) != len(color_names) + 1: # we require table to contain NA values return [] chart = MpScatterPlot("c_x", ["{}".format(c) for c in color_names]) return [(SymTable(values=table_content), chart)]
def inv_eval(vtrace): constraints = [] # frozen data used for removing duplicate points frozen_data = [] for vt in vtrace: # add fault tolerency: if the field is null, ignore it if vt.x1 != None and vt.y1 != None: # each end of an point will only be added once p1 = json.dumps( { "c_x": vt.x1, "c_y": vt.y1, "c_size": vt.size, "c_color": vt.color, "c_column": vt.column }, sort_keys=True) if p1 not in frozen_data: frozen_data.append(p1) if vt.x2 != None and vt.y2 != None: p2 = json.dumps( { "c_x": vt.x2, "c_y": vt.y2, "c_size": vt.size, "c_color": vt.color, "c_column": vt.column }, sort_keys=True) if p2 not in frozen_data: frozen_data.append(p2) # there should not be any points between these two constraints.append(""" (not (exists (r Tuple) (and (> r.c_x vt.x1) (< r.c_x vt.x2) (= r.c_color vt.color) (= r.c_column vt.column))))""") data_values = [json.loads(r) for r in frozen_data] unused_fields = remove_unused_fields(data_values) encodings = [] for channel, enc_ty in [("x", "_"), ("y", "_"), ("size", "nominal"), ("color", "nominal"), ("column", "nominal")]: field_name = "c_{}".format(channel) if field_name in unused_fields: continue if channel in ["x", "y"]: dtype = table_utils.infer_dtype( [r[field_name] for r in data_values]) enc_ty = "nominal" if dtype == "string" else "quantitative" encodings.append(Encoding(channel, field_name, enc_ty)) bar_chart = LineChart(encodings=encodings) return [(SymTable(values=data_values, constraints=constraints), bar_chart)]
def sample_symbolic_table(symtable, size, strategy="diversity"): """given a symbolic table, sample a smaller symbolic table that is contained by it Args: symtable: the input symbolic table size: the number of rows we want for the output table. Returns: the output table sample """ if size > len(symtable.values): size = len(symtable.values) if strategy == "uniform": chosen_indices = np.random.choice(list(range(len(symtable.values))), size, replace=False) elif strategy == "diversity": indices = set(range(len(symtable.values))) chosen_indices = set() for i in range(size): pool = indices - chosen_indices candidate_size = min([20, len(pool)]) candidates = np.random.choice(list(pool), size=candidate_size, replace=False) index = pick_diverse_candidate_index(candidates, chosen_indices, symtable.values) chosen_indices.add(index) sample_values = [symtable.values[i] for i in chosen_indices] symtable_sample = SymTable(sample_values) return symtable_sample
def inv_eval(vtrace, orientation): # map x to multiple y print(vtrace) table_dict = {} y_cols = list(set([vt.color for vt in vtrace])) for vt in vtrace: if orientation == "vertical": if vt.x not in table_dict: table_dict[vt.x] = {"c_x": vt.x} if vt.y2 is None or vt.color is None: return [] table_dict[vt.x][vt.color] = vt.y2 - vt.y1 else: if vt.y not in table_dict: table_dict[vt.y] = {"c_x": vt.y} if vt.x2 is None or vt.color is None: return [] table_dict[vt.y][vt.color] = vt.x2 - vt.x1 table_content = [] for x in table_dict: table_content.append(table_dict[x]) if len(table_dict[x]) != len(y_cols) + 1: # cannot represented in mp format return [] return [(SymTable(values=table_content), MpGroupBarChart("c_x", y_cols, orient=orientation))]
def inv_eval(vtrace, orientation): data_values = [] if orientation == "vertical": for vt in vtrace: bot = None if vt.y2 is None else vt.y1 height = vt.y1 if vt.y2 is None else vt.y2 - vt.y1 data_values.append({ "c_x": vt.x, "c_bot": bot, "c_height": height, "c_color": vt.color }) if orientation == "horizontal": for vt in vtrace: bot = None if vt.x2 is None else vt.x1 height = vt.x1 if vt.x2 is None else vt.x2 - vt.x1 data_values.append({ "c_x": vt.y, "c_bot": bot, "c_height": height, "c_color": vt.color }) # remove fields that contain none values unused_fields = remove_unused_fields(data_values) bar_chart = MpBarChart( c_x="c_x", c_bot="c_bot" if "c_bot" not in unused_fields else None, c_height="c_height", c_color="c_color" if "c_color" not in unused_fields else None, orient=orientation) return [(SymTable(values=data_values), bar_chart)]
def inv_eval(vtrace): # frozen data used for removing duplicate points frozen_data = [] for vt in vtrace: # each end of an point will only be added once p1 = json.dumps( { "c_x": vt.x1, "c_y": vt.y1, "c_size": vt.size, "c_color": vt.color, "c_column": vt.column }, sort_keys=True) p2 = json.dumps( { "c_x": vt.x2, "c_y": vt.y2, "c_size": vt.size, "c_color": vt.color, "c_column": vt.column }, sort_keys=True) if p1 not in frozen_data: frozen_data.append(p1) if p2 not in frozen_data: frozen_data.append(p2) data_values = [json.loads(r) for r in frozen_data] unused_fields = remove_unused_fields(data_values) if "c_color" not in unused_fields and "c_size" not in unused_fields: assert False col_num = 2 y_cols = None if "c_color" not in unused_fields: y_cols = list(set([r["c_color"] for r in data_values])) col_num = 1 + len(y_cols) # map x to multiple y table_dict = {} for r in data_values: if r["c_x"] not in table_dict: table_dict[r["c_x"]] = {"c_x": r["c_x"]} table_dict[r["c_x"]][r["c_color"]] = r["c_y"] table_content = [] for x in table_dict: table_content.append(table_dict[x]) if len(table_dict[x]) != col_num: # we require table to contain NA values return [] else: y_cols = ["c_y"] if "c_size" not in unused_fields: y_cols.append(["c_size"]) table_content = data_values return [(SymTable(values=table_content, constraints=[]), MpLineChart("c_x", y_cols))]
def inv_eval(vtrace): data_values = [] constraints = [] for vt in vtrace: # min max will appear in the table data_values.append({ "c_x": vt.x, "c_y": vt.min, "c_color": vt.color, "c_column": vt.column }) data_values.append({ "c_x": vt.x, "c_y": vt.max, "c_color": vt.color, "c_column": vt.column }) # the output table should satisfy these constraints constraints.append( "min([r.c_y for r in T if r.c_color == {} and r.c_column == {}]) = {}" .format(vt.color, vt.column, vt.min)) constraints.append( "max([r.c_y for r in T if r.c_color == {} and r.c_column == {}]) = {}" .format(vt.color, vt.column, vt.max)) constraints.append( "Q1([r.c_y for r in T if r.c_color == {} and r.c_column == {}]) = {}" .format(vt.color, vt.column, vt.Q1)) constraints.append( "Q3([r.c_y for r in T if r.c_color == {} and r.c_column == {}]) = {}" .format(vt.color, vt.column, vt.Q3)) constraints.append( "median([r.c_y for r in T if r.c_color == {} and r.c_column == {}]) = {}" .format(vt.color, vt.column, vt.median)) # remove fields that contain none values unused_fields = remove_unused_fields(data_values) encodings = [] for channel, enc_ty in [("x", "_"), ("y", "_"), ("color", "nominal"), ("column", "nominal")]: field_name = "c_{}".format(channel) if field_name in unused_fields: continue if channel in ["x", "y"]: # the type needs to be determined by datatype dtype = table_utils.infer_dtype( [r[field_name] for r in data_values]) enc_ty = "nominal" if dtype == "string" else "quantitative" encodings.append(Encoding(channel, field_name, enc_ty)) chart = BoxPlot(encodings=encodings) return [(SymTable(data_values, constraints), chart)]
def inv_eval(vtrace): table_dict = {} y_cols = list(set([vt.color for vt in vtrace])) size_used = any([vt.size != None for vt in vtrace]) if any([vt.shape != None for vt in vtrace]) or (len(y_cols) > 1 and size_used): # does not support shape or size + color return [] if len(y_cols) > 1: # map x to multiple y table_dict = {} for vt in vtrace: if vt.x not in table_dict: table_dict[vt.x] = {"c_x": vt.x} table_dict[vt.x][str(vt.color)] = vt.y table_content = [] for x in table_dict: table_content.append(table_dict[x]) if len(table_dict[x]) != len(y_cols) + 1: # we require table to contain NA values return [] chart = MpScatterPlot("c_x", [str(y) for y in y_cols]) return [(SymTable(values=table_content), chart)] else: table_content = [] for vt in vtrace: r = {"c_x": vt.x, "c_y": vt.y} if size_used: r["c_size"] = vt.size c_size = "c_size" else: c_size = None table_content.append(r) chart = MpScatterPlot("c_x", ["c_y"], c_size) return [(SymTable(values=table_content), chart)]
def inv_eval(vtrace): frozen_data = [] for vt in vtrace: # each end of an point will only be added once p1 = json.dumps( { "c_x": vt.x1, "c_y": vt.yt1, "c_y2": vt.yb1, "c_color": vt.color, "c_column": vt.column }, sort_keys=True) p2 = json.dumps( { "c_x": vt.x2, "c_y": vt.yt2, "c_y2": vt.yb2, "c_color": vt.color, "c_column": vt.column }, sort_keys=True) if p1 not in frozen_data: frozen_data.append(p1) if p2 not in frozen_data: frozen_data.append(p2) data_values = [json.loads(r) for r in frozen_data] channel_types = [("x", "_"), ("y", "quantitative"), ("y2", "quantitative"), ("color", "nominal"), ("column", "nominal")] # remove fields that contain none values unused_fields = remove_unused_fields(data_values) encodings = [] for channel, enc_ty in channel_types: field_name = "c_{}".format(channel) if field_name in unused_fields: continue if channel == "x": dtype = table_utils.infer_dtype( [r[field_name] for r in data_values]) enc_ty = "nominal" if dtype == "string" else "quantitative" encodings.append(Encoding(channel, field_name, enc_ty)) chart = AreaChart(encodings=encodings) return [(SymTable(values=data_values), chart)]
def inv_eval(vtrace, vty): def synth_per_case(_vtrace, _vty): if _vty == "BarV": l2 = MpGroupBarChart.inv_eval(_vtrace, orientation="vertical") l1 = MpBarChart.inv_eval(_vtrace, orientation="vertical") return l1 + l2 elif _vty == "BarH": l2 = MpGroupBarChart.inv_eval(_vtrace, orientation="horizontal") l1 = MpBarChart.inv_eval(_vtrace, orientation="horizontal") return l1 + l2 elif _vty == "Point": return MpScatterPlot.inv_eval(_vtrace) elif _vty == "Line": return MpLineChart.inv_eval(_vtrace) elif _vty == "Area": return MpAreaChart.inv_eval(_vtrace) use_column = any([vt.column is not None for vt in vtrace]) if not use_column: return synth_per_case(vtrace, vty) partition = {} for vt in vtrace: if vt.column not in partition: partition[vt.column] = [] partition[vt.column].append(vt) res = [] chart_by_type = {} for col in partition: layer_cand = synth_per_case(partition[col], vty) if layer_cand == []: return [] for l in layer_cand: table, chart = l if type(chart) not in chart_by_type: chart_by_type[type(chart)] = {"table": [], "chart": chart} col_table = table.values for r in col_table: r["c_column"] = col chart_by_type[type(chart)]["table"] += col_table return [(SymTable(values=chart_by_type[chart_ty]["table"]), MpSubplot(chart_by_type[chart_ty]["chart"], "c_column")) for chart_ty in chart_by_type]
def inv_eval(vtrace, orientation): assert (orientation in ["horizontal", "vertical"]) data_values = [] if orientation == "vertical": for vt in vtrace: data_values.append({ "c_x": vt.x, "c_y": vt.y1, "c_y2": vt.y2, "c_column": vt.column, "c_color": vt.color }) channel_types = [("x", "nominal"), ("y", "quantitative"), ("y2", "quantitative"), ("color", "nominal"), ("column", "nominal")] if orientation == "horizontal": for vt in vtrace: data_values.append({ "c_x": vt.x1, "c_x2": vt.x2, "c_y": vt.y, "c_column": vt.column, "c_color": vt.color }) channel_types = [("x", "quantitative"), ("x2", "quantitative"), ("y", "nominal"), ("color", "nominal"), ("column", "nominal")] # remove fields that contain none values unused_fields = remove_unused_fields(data_values) encodings = [] for channel, enc_ty in channel_types: field_name = "c_{}".format(channel) if field_name in unused_fields: continue encodings.append(Encoding(channel, field_name, enc_ty)) bar_chart = BarChart(encodings=encodings, orientation=orientation) return [(SymTable(values=data_values), bar_chart)]
def inv_eval(vtrace): mark_ty = "rect" if vtrace[0].point_shape == "rect" else "point" data_values = [] for vt in vtrace: data_values.append({ "c_x": vt.x, "c_y": vt.y, "c_size": vt.size, "c_color": vt.color, "c_shape": vt.shape, "c_column": vt.column }) # remove fields that contain none values unused_fields = remove_unused_fields(data_values) encodings = [] for channel, enc_ty in [("x", "_"), ("y", "_"), ("size", "_"), ("color", "nominal"), ("shape", "nominal"), ("column", "nominal")]: field_name = "c_{}".format(channel) if field_name in unused_fields: continue if channel in ["x", "y", "size"] or (channel == "color" and mark_ty == "rect"): # the type needs to be determined by datatype dtype = table_utils.infer_dtype( [r[field_name] for r in data_values]) enc_ty = "nominal" if dtype == "string" else "quantitative" encodings.append(Encoding(channel, field_name, enc_ty)) chart = ScatterPlot(mark_ty=mark_ty, encodings=encodings) return [(SymTable(values=data_values), chart)]