def spec(self, spec): if self._vlspec is not None: self._vlspec.spec = spec else: # if we are setting the spec, then we can instantiate the # VegaLite object. self._spec = spec self._vlspec = VegaLite(self._spec, self._data)
def __init__(self, spec=None, data=None): self._spec = spec or {} self._data = data if spec is not None: # If spec is specified, we need to immediately instantiate the # VegaLite object, because it will make some modifications to # the spec that we'd like to be able to see by doing ax.spec self._vlspec = VegaLite(spec, data) else: # If the spec is not specified, we set the vegalite object to None # and compute it on demand. This allows us to instantiate an empty # axis and build from there. self._vlspec = None
def vgplot_series_hist(ser, bins=10, interactive=True, width=450, height=300): df = ser.to_frame() df.columns = map(str, df.columns) spec = { "mark": "bar", "encoding": { "x": { "bin": { "maxbins": bins }, "field": df.columns[0], "type": "quantitative" }, "y": { "aggregate": "count", "type": "quantitative" } }, "width": width, "height": height } if interactive: spec.update(INTERACTIVE_SCALES) return VegaLite(spec, data=df)
def vgplot_series_barh(ser, interactive=True, width=450, height=300): df = ser.reset_index() df.columns = map(str, df.columns) x, y = df.columns D = { "$schema": "https://vega.github.io/schema/vega-lite/v2.json", "mark": "bar", "encoding": { "x": { "field": y, "type": infer_vegalite_type(df[y]) }, "y": { "field": x, "type": infer_vegalite_type(df[x], ordinal_threshold=50) } }, "width": width, "height": height, } if interactive: D.update(INTERACTIVE_SCALES) return VegaLite(D, data=df)
def series_plot(self, data, interactive=True, width=450, height=300, **kwds): self._warn_if_unused_keywords(kwds) df = data.reset_index() df.columns = map(str, df.columns) x, y = df.columns spec = { "mark": "area", "encoding": { "x": { "field": x, "type": infer_vegalite_type(df[x]) }, "y": { "field": y, "type": infer_vegalite_type(df[y]) }, }, } spec = self.vgl_spec(spec, interactive=interactive, width=width, height=height) return VegaLite(spec, data=df)
def frame_plot(self, data, x=None, y=None, stacked=True, var_name='variable', value_name='value', interactive=True, width=450, height=300, **kwds): self._warn_if_unused_keywords(kwds) usecols = [y] if y else None df = self._melt_frame(data, index=x, usecols=usecols, var_name=var_name, value_name=value_name) x = df.columns[0] spec = { "mark": "area", "encoding": { "x": { "field": x, "type": infer_vegalite_type(df[x]) }, "y": { "field": value_name, "type": infer_vegalite_type(df[value_name]) }, "color": { "field": var_name, "type": infer_vegalite_type(df[var_name], ordinal_threshold=10) } } } if stacked: spec['encoding']['y']['stack'] = 'zero' else: spec['encoding']['y']['stack'] = None spec['encoding']['opacity'] = {'value': 0.7} spec = self.vgl_spec(spec, interactive=interactive, width=width, height=height) return VegaLite(spec, data=df)
def vgplot_df_barh(df, x, y, stacked=False, interactive=True, width=450, height=300): if x is None: if df.index.name is None: df.index.name = 'index' x = df.index.name df = df.reset_index() assert x in df.columns if y is not None: assert y in df.columns df = df[[x, y]] df = df.melt([x], var_name='variable', value_name='value') D = { "$schema": "https://vega.github.io/schema/vega-lite/v2.json", "mark": "bar", "encoding": { "x": { "field": "value", "type": infer_vegalite_type(df["value"]) }, "y": { "field": x, "type": infer_vegalite_type(df[x], ordinal_threshold=50) }, "color": { "field": "variable", "type": infer_vegalite_type(df["variable"]) }, }, "width": width, "height": height, } if stacked: D['encoding']['x']['stack'] = 'zero' else: D['encoding']['x']['stack'] = None D['encoding']['opacity'] = {"value": 0.7} if interactive: D.update(INTERACTIVE_SCALES) return VegaLite(D, data=df)
def frame_plot(self, data, x=None, y=None, var_name='variable', value_name='value', interactive=True, width=450, height=300, **kwds): self._warn_if_unused_keywords(kwds) if y: usecols = [y] else: usecols = None df = self._melt_frame(data, index=x, usecols=usecols, var_name=var_name, value_name=value_name) x = df.columns[0] spec = { "mark": "line", "encoding": { "x": { "field": x, "type": infer_vegalite_type(df[x]) }, "y": { "field": value_name, "type": infer_vegalite_type(df[value_name]) }, "color": { "field": var_name, "type": infer_vegalite_type(df[var_name], ordinal_threshold=10) } }, } spec = self.vgl_spec(spec, interactive=interactive, width=width, height=height) return VegaLite(spec, data=df)
def frame_plot(self, data, x, y, c=None, s=None, interactive=True, width=450, height=300, **kwds): self._warn_if_unused_keywords(kwds) cols = [x, y] encoding = { "x": { "field": x, "type": infer_vegalite_type(data[x]) }, "y": { "field": y, "type": infer_vegalite_type(data[y]) }, } if c is not None: cols.append(c) encoding['color'] = { 'field': c, 'type': infer_vegalite_type(data[c]) } if s is not None: cols.append(s) encoding['size'] = { 'field': s, 'type': infer_vegalite_type(data[s]) } spec = {"mark": "circle", "encoding": encoding} spec = self.vgl_spec(spec, interactive=interactive, width=width, height=height) return VegaLite(spec, data=data[cols])
def _add_layer(self, spec, data=None): """Add spec as a layer to the current axes. Parameters ---------- spec : dictionary the spec to be added. If this is the first spec in the axis, every part of it will be added. Otherwise, only the 'encoding', 'mark', and 'data', 'transform', and 'description' attributes will be added. data : dataframe, optional if specified, add this data to the layer. Returns ------- self : Axes instance """ spec = VegaLite(spec, data).spec if not self.spec: # current axes spec is empty; replace it entirely with the new one self.spec = spec else: if 'layer' not in self.spec: # current axes spec is unlayered; move it to a layer keys = [ 'encoding', 'mark', 'data', 'transform', 'description', 'selection' ] self.spec['layer'] = [{ key: self.spec.pop(key) for key in keys if key in self.spec }] # Competing selections in a single layer cause problems, so we # limit selections to the first layer for simplicity. keys = ['encoding', 'mark', 'data', 'transform', 'description'] self.spec['layer'].append( {key: spec[key] for key in keys if key in spec}) # TODO: vega/vega3 raises an error without data defined at top level. # This needs an upstream fix; in the meantime we get around it this way: if 'data' not in self.spec: self.spec['data'] = {'name': 'no-toplevel-data'} return self
def vgplot_df_hist(df, by=None, bins=10, stacked=False, interactive=True, width=450, height=300): if by is not None: raise NotImplementedError('vgplot.hist `by` keyword') df = df.melt(var_name='variable', value_name='value') spec = { "mark": "bar", "encoding": { "x": { "bin": { "maxbins": bins }, "field": "value", "type": "quantitative" }, "y": { "aggregate": "count", "type": "quantitative", "stack": ('zero' if stacked else None) }, "color": { "field": "variable", "type": "nominal" }, }, "width": width, "height": height } if interactive: spec.update(INTERACTIVE_SCALES) return VegaLite(spec, data=df)
def vgplot_df_hexbin(df, x, y, C=None, reduce_C_function=None, gridsize=100, interactive=True, width=450, height=300): # TODO: Use actual hexbins rather than a grid heatmap if reduce_C_function is not None: raise NotImplementedError("Custom reduce_C_function in hexbin") if C is None: df = df[[x, y]] else: df = df[[x, y, C]] spec = { "$schema": "https://vega.github.io/schema/vega-lite/v2.json", "encoding": { "x": { "field": x, "bin": { "maxbins": gridsize }, "type": "quantitative" }, "y": { "field": y, "bin": { "maxbins": gridsize }, "type": "quantitative" }, "color": ({ "aggregate": "count", "type": "quantitative" } if C is None else { "field": C, "aggregate": "mean", "type": "quantitative" }) }, "config": { "range": { "heatmap": { "scheme": "greenblue" } }, "view": { "stroke": "transparent" } }, "mark": "rect", "width": width, "height": height, } if interactive: spec.update(INTERACTIVE_SCALES) return VegaLite(spec, data=df)
def display(self): if self._vlspec is None: self._vlspec = VegaLite(self._spec, self._data) return self._vlspec.display()
def display(self): if self._vlspec is None: self._vlspec = VegaLite(self._spec, self._data) # check max rows after VegaLite modifies the spec self._check_max_rows() return self._vlspec.display()
class Axes(object): """Class representing a pdvega plot axes""" max_rows = 10000 # default value; can be overridden by the class instances def __init__(self, spec=None, data=None): self._spec = spec or {} self._data = data if spec is not None: # If spec is specified, we need to immediately instantiate the # VegaLite object, because it will make some modifications to # the spec that we'd like to be able to see by doing ax.spec self._vlspec = VegaLite(spec, data) else: # If the spec is not specified, we set the vegalite object to None # and compute it on demand. This allows us to instantiate an empty # axis and build from there. self._vlspec = None @property def spec(self): if self._vlspec is not None: return self._vlspec.spec else: return self._spec @spec.setter def spec(self, spec): if self._vlspec is not None: self._vlspec.spec = spec else: # if we are setting the spec, then we can instantiate the # VegaLite object. self._spec = spec self._vlspec = VegaLite(self._spec, self._data) @property def data(self): if self._vlspec is not None: return self._vlspec.data else: return self._data @data.setter def data(self): if self._vlspec is not None: self._vlspec.data = data else: self._data = data @property def spec_no_data(self): return {key: val for key, val in self.spec.items() if key != 'data'} def _check_max_rows(self): """ Ensure that the number of rows in the largest dataset embedded in the spec or its layers is smaller than self.max_rows, which defaults to 10000 unless overridden by the Axes instance. Raises ------ MaxRowsExceeded : if the dataset is too large. """ nrows = 0 specs = [self.spec] + self.spec.get('layer', []) for spec in specs: if 'data' in spec and 'values' in spec['data']: nrows = max(nrows, len(spec['data']['values'])) if nrows > self.max_rows: raise MaxRowsExceeded(nrows, self.max_rows) def _ipython_display_(self): if self._vlspec is None: self._vlspec = VegaLite(self._spec, self._data) # check max rows after VegaLite modifies the spec self._check_max_rows() return self._vlspec._ipython_display_() def display(self): if self._vlspec is None: self._vlspec = VegaLite(self._spec, self._data) # check max rows after VegaLite modifies the spec self._check_max_rows() return self._vlspec.display() def _add_layer(self, spec, data=None): """Add spec as a layer to the current axes. Parameters ---------- spec : dictionary the spec to be added. If this is the first spec in the axis, every part of it will be added. Otherwise, only the 'encoding', 'mark', and 'data', 'transform', and 'description' attributes will be added. data : dataframe, optional if specified, add this data to the layer. Returns ------- self : Axes instance """ spec = VegaLite(spec, data).spec if not self.spec: # current axes spec is empty; replace it entirely with the new one self.spec = spec else: if 'layer' not in self.spec: # current axes spec is unlayered; move it to a layer keys = [ 'encoding', 'mark', 'data', 'transform', 'description', 'selection' ] self.spec['layer'] = [{ key: self.spec.pop(key) for key in keys if key in self.spec }] # Competing selections in a single layer cause problems, so we # limit selections to the first layer for simplicity. keys = ['encoding', 'mark', 'data', 'transform', 'description'] self.spec['layer'].append( {key: spec[key] for key in keys if key in spec}) # TODO: vega/vega3 raises an error without data defined at top level. # This needs an upstream fix; in the meantime we get around it this way: if 'data' not in self.spec: self.spec['data'] = {'name': 'no-toplevel-data'} return self
def display(self): from vega3 import VegaLite return VegaLite(self.to_dict())
def _ipython_display_(self): if self._vlspec is None: self._vlspec = VegaLite(self._spec, self._data) return self._vlspec._ipython_display_()