def test_auto_bin_count(ds):
    b = Bins(source=ds, column='mpg')
    assert len(b.bins) == 12

    # this should test it still matches
    # http://stats.stackexchange.com/questions/114490/optimal-bin-width-for-two-dimensional-histogram
    # with iterables with the same value
    b = Bins(values=[5, 5, 5, 5, 5], bins=None)
    assert len(b.bins) == 3
class ColorAttr(AttrSpec):
    """An attribute specification for mapping unique data values to colors.

    .. note::
        Should be expanded to support more complex coloring options.
    """
    attrname = Override(default='color')
    iterable = Override(default=DEFAULT_PALETTE)
    bin = Bool(default=False)

    def __init__(self, **kwargs):
        iterable = kwargs.pop('palette', None)
        if iterable is not None:
            kwargs['iterable'] = iterable
        super(ColorAttr, self).__init__(**kwargs)

    def _generate_items(self, df, columns):
        """Produce list of unique tuples that identify each item."""
        if not self.bin:
            super(ColorAttr, self)._generate_items(df, columns)
        else:

            if len(columns) == 1 and ChartDataSource.is_number(df[columns[0]]):

                self.bins = Bins(source=ColumnDataSource(df),
                                 column=columns[0],
                                 bins=len(self.iterable),
                                 aggregate=False)

                if self.sort:
                    self.bins.sort(ascending=self.ascending)

                self.items = [bin.label[0] for bin in self.bins]
            else:
                raise ValueError(
                    'Binned colors can only be created for one column of \
                                 numerical data.')

    def add_bin_labels(self, data):
        col = self.columns[0]
        # save original values into new column
        data._data[col + '_values'] = data._data[col]

        for bin in self.bins:
            # set all rows associated to each bin to the bin label being mapped to colors
            data._data.ix[data._data[col + '_values'].isin(bin.values),
                          col] = bin.label[0]

        data._data[col] = pd.Categorical(data._data[col],
                                         categories=list(self.items),
                                         ordered=self.sort)
示例#3
0
文件: attributes.py 项目: 0-T-0/bokeh
class ColorAttr(AttrSpec):
    """An attribute specification for mapping unique data values to colors.

    .. note::
        Should be expanded to support more complex coloring options.
    """
    attrname = Override(default='color')
    iterable = Override(default=DEFAULT_PALETTE)
    bin = Bool(default=False)

    def __init__(self, **kwargs):
        iterable = kwargs.pop('palette', None)
        if iterable is not None:
            kwargs['iterable'] = iterable
        super(ColorAttr, self).__init__(**kwargs)

    def _generate_items(self, df, columns):
        """Produce list of unique tuples that identify each item."""
        if not self.bin:
            super(ColorAttr, self)._generate_items(df, columns)
        else:

            if len(columns) == 1 and ChartDataSource.is_number(df[columns[0]]):

                self.bins = Bins(source=ColumnDataSource(df), column=columns[0],
                                 bin_count=len(self.iterable), aggregate=False)

                if self.sort:
                    self.bins.sort(ascending=self.ascending)

                self.items = [bin.label[0] for bin in self.bins]
            else:
                raise ValueError('Binned colors can only be created for one column of \
                                 numerical data.')

    def add_bin_labels(self, data):
        col = self.columns[0]
        # save original values into new column
        data._data[col + '_values'] = data._data[col]

        for bin in self.bins:
            # set all rows associated to each bin to the bin label being mapped to colors
            data._data.ix[data._data[col + '_values'].isin(bin.values),
                          col] = bin.label[0]

        data._data[col] = pd.Categorical(data._data[col], categories=list(self.items),
                                         ordered=self.sort)
示例#4
0
    def _generate_items(self, df, columns):
        """Produce list of unique tuples that identify each item."""
        if not self.bin:
            super(ColorAttr, self)._generate_items(df, columns)
        else:

            if len(columns) == 1 and ChartDataSource.is_number(df[columns[0]]):

                self.bins = Bins(source=ColumnDataSource(df), column=columns[0],
                                 bin_count=len(self.iterable), aggregate=False)

                if self.sort:
                    self.bins.sort(ascending=self.ascending)

                self.items = [bin.label[0] for bin in self.bins]
            else:
                raise ValueError('Binned colors can only be created for one column of \
                                 numerical data.')
def test_bin_labeling(ds):
    Bins(source=ds, column='cyl', bins=2)
    assert len(pd.Series(ds.data['cyl_bin']).drop_duplicates()) == 2
def test_explicit_bin_count(ds):
    b = Bins(source=ds, column='mpg', bins=2)
    assert len(b.bins) == 2
示例#7
0
def test_auto_bin_count(ds):
    b = Bins(source=ds, column='mpg')
    assert len(b.bins) == 12