def test_auto_bin_count(ds): b = Bins(source=ds, column='mpg') assert len(b.bins) == 12 # this should test it still matches # http://stats.stackexchange.com/questions/114490/optimal-bin-width-for-two-dimensional-histogram # with iterables with the same value b = Bins(values=[5, 5, 5, 5, 5], bins=None) assert len(b.bins) == 3
class ColorAttr(AttrSpec): """An attribute specification for mapping unique data values to colors. .. note:: Should be expanded to support more complex coloring options. """ attrname = Override(default='color') iterable = Override(default=DEFAULT_PALETTE) bin = Bool(default=False) def __init__(self, **kwargs): iterable = kwargs.pop('palette', None) if iterable is not None: kwargs['iterable'] = iterable super(ColorAttr, self).__init__(**kwargs) def _generate_items(self, df, columns): """Produce list of unique tuples that identify each item.""" if not self.bin: super(ColorAttr, self)._generate_items(df, columns) else: if len(columns) == 1 and ChartDataSource.is_number(df[columns[0]]): self.bins = Bins(source=ColumnDataSource(df), column=columns[0], bins=len(self.iterable), aggregate=False) if self.sort: self.bins.sort(ascending=self.ascending) self.items = [bin.label[0] for bin in self.bins] else: raise ValueError( 'Binned colors can only be created for one column of \ numerical data.') def add_bin_labels(self, data): col = self.columns[0] # save original values into new column data._data[col + '_values'] = data._data[col] for bin in self.bins: # set all rows associated to each bin to the bin label being mapped to colors data._data.ix[data._data[col + '_values'].isin(bin.values), col] = bin.label[0] data._data[col] = pd.Categorical(data._data[col], categories=list(self.items), ordered=self.sort)
class ColorAttr(AttrSpec): """An attribute specification for mapping unique data values to colors. .. note:: Should be expanded to support more complex coloring options. """ attrname = Override(default='color') iterable = Override(default=DEFAULT_PALETTE) bin = Bool(default=False) def __init__(self, **kwargs): iterable = kwargs.pop('palette', None) if iterable is not None: kwargs['iterable'] = iterable super(ColorAttr, self).__init__(**kwargs) def _generate_items(self, df, columns): """Produce list of unique tuples that identify each item.""" if not self.bin: super(ColorAttr, self)._generate_items(df, columns) else: if len(columns) == 1 and ChartDataSource.is_number(df[columns[0]]): self.bins = Bins(source=ColumnDataSource(df), column=columns[0], bin_count=len(self.iterable), aggregate=False) if self.sort: self.bins.sort(ascending=self.ascending) self.items = [bin.label[0] for bin in self.bins] else: raise ValueError('Binned colors can only be created for one column of \ numerical data.') def add_bin_labels(self, data): col = self.columns[0] # save original values into new column data._data[col + '_values'] = data._data[col] for bin in self.bins: # set all rows associated to each bin to the bin label being mapped to colors data._data.ix[data._data[col + '_values'].isin(bin.values), col] = bin.label[0] data._data[col] = pd.Categorical(data._data[col], categories=list(self.items), ordered=self.sort)
def _generate_items(self, df, columns): """Produce list of unique tuples that identify each item.""" if not self.bin: super(ColorAttr, self)._generate_items(df, columns) else: if len(columns) == 1 and ChartDataSource.is_number(df[columns[0]]): self.bins = Bins(source=ColumnDataSource(df), column=columns[0], bin_count=len(self.iterable), aggregate=False) if self.sort: self.bins.sort(ascending=self.ascending) self.items = [bin.label[0] for bin in self.bins] else: raise ValueError('Binned colors can only be created for one column of \ numerical data.')
def test_bin_labeling(ds): Bins(source=ds, column='cyl', bins=2) assert len(pd.Series(ds.data['cyl_bin']).drop_duplicates()) == 2
def test_explicit_bin_count(ds): b = Bins(source=ds, column='mpg', bins=2) assert len(b.bins) == 2
def test_auto_bin_count(ds): b = Bins(source=ds, column='mpg') assert len(b.bins) == 12