def __init__(self, low, high): if low is None or high is None: raise GgplotError("Arguments to", self.__class__.__name__, "cannot be None") try: _ = high - low except TypeError: raise GgplotError("Arguments to", self.__class__.__name__, "must be of a numeric type") self.low, self.high = low, high
def _calculate(self, data): x = pop(data, 'x', None) y = pop(data, 'y', None) # intercept and slope may be one of: # - aesthetics to geom_abline or # - parameter settings to stat_abline slope = pop(data, 'slope', self.params['slope']) intercept = pop(data, 'intercept', self.params['intercept']) if hasattr(slope, '__call__'): if x is None or y is None: raise GgplotError( 'To compute the slope, x & y aesthetics are needed') try: slope = slope(x, y) except TypeError as err: raise GgplotError(*err.args) if hasattr(intercept, '__call__'): if x is None or y is None: raise GgplotError( 'To compute the intercept, x & y aesthetics are needed') try: intercept = intercept(x, y) except TypeError as err: raise GgplotError(*err.args) try: n = len(slope) except TypeError: n = 1 try: _n = len(intercept) except TypeError: _n = 1 if n != _n: raise GgplotError('Specified {} slopes but {} intercepts'.format( n, _n)) slope = make_iterable(slope) intercept = make_iterable(intercept) new_data = pd.DataFrame({'slope': slope, 'intercept': intercept}) # Copy the other aesthetics into the new dataframe n = len(slope) for ae in data: new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n) return new_data
def _calculate(self, data): x = data.pop('x') try: float(x.iloc[0]) except: try: # try to use it as a pandas.tslib.Timestamp x = [ts.toordinal() for ts in x] except: raise GgplotError("stat_density(): aesthetic x mapping " + "needs to be convertable to float!") # TODO: Implement weight try: weight = data.pop('weight') except KeyError: weight = np.ones(len(x)) # TODO: Get "full" range of densities # i.e tail off to zero like ggplot2? But there is nothing # wrong with the current state. kde = gaussian_kde(x) bottom = np.min(x) top = np.max(x) step = (top - bottom) / 1000.0 x = np.arange(bottom, top, step) y = kde.evaluate(x) new_data = pd.DataFrame({'x': x, 'y': y}) # Copy the other aesthetics into the new dataframe n = len(x) for ae in data: new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n) return new_data
def _find_aes_and_data(self, args, kwargs): """ Identify the aes and data objects. Return a dictionary of the aes mappings and the data object. - args is a list - kwargs is a dictionary Note: This is a helper function for self.__init__ It modifies the kwargs """ passed_aes = {} data = None aes_err = 'Found more than one aes argument. Expecting zero or one' for arg in args: if isinstance(arg, aes) and passed_aes: raise Execption(aes_err) if isinstance(arg, aes): passed_aes = arg elif isinstance(arg, pd.DataFrame): data = arg else: raise GgplotError( 'Unknown argument of type "{0}".'.format(type(arg))) if 'mapping' in kwargs and passed_aes: raise GgplotError(aes_err) elif not passed_aes and 'mapping' in kwargs: passed_aes = kwargs.pop('mapping') if data is None and 'data' in kwargs: data = kwargs.pop('data') _aes = {} # To make mapping of columns to geom/stat or stat parameters # possible _keep = set(self.DEFAULT_PARAMS) | set(self._stat_type.DEFAULT_PARAMS) for k, v in passed_aes.items(): if k in self.valid_aes or k in _keep: _aes[k] = v else: raise GgplotError('Cannot recognize aesthetic: %s' % k) return _aes, data, kwargs
def __init__(self, low=None, high=None): if low != None: try: _ = low - 0 except TypeError: raise GgplotError("The 'low' argument to", self.__class__.__name__, "must be of a numeric type or None") if high != None: try: _ = high - 0 except TypeError: raise GgplotError("The 'high' argument to", self.__class__.__name__, "must be of a numeric type or None") self.low, self.high = low, high
def __init__(self, x=None, y=None, ncol=None, nrow=None, scales="free"): if x is None and y is None: raise GgplotError( "You need to specify a variable name: facet_wrap('var')") add_ggplotrc_params(self) self.x = x self.y = y self.ncol = ncol self.nrow = nrow self.scales = scales
def _calculate_global(self, data): # Calculate breaks if x is not categorical binwidth = self.params['binwidth'] self.breaks = self.params['breaks'] right = self.params['right'] x = data['x'].values # For categorical data we set labels and x-vals if is_categorical(x): labels = self.params['labels'] if labels == None: labels = sorted(set(x)) self.labels = labels self.length = len(self.labels) # For non-categoriacal data we set breaks if not (is_categorical(x) or self.breaks): # Check that x is numerical if len(x) > 0 and isinstance(x[0], datetime.date): def convert(d): d = datetime.datetime.combine(d, datetime.datetime.min.time()) return time.mktime(d.timetuple()) x = [convert(d) for d in x] elif len(x) > 0 and isinstance(x[0], datetime.datetime): x = [time.mktime(d.timetuple()) for d in x] elif len(x) > 0 and isinstance(x[0], datetime.time): raise GgplotError("Cannot recognise the type of x") elif not cbook.is_numlike(x[0]): raise GgplotError("Cannot recognise the type of x") if binwidth is None: _bin_count = 30 self._print_warning(_MSG_BINWIDTH) else: _bin_count = int(np.ceil(np.ptp(x))) / binwidth _, self.breaks = pd.cut(x, bins=_bin_count, labels=False, right=right, retbins=True) self.length = len(self.breaks)
def _verify_aesthetics(self, data): """ Check if all the required aesthetics have been specified Raise an Exception if an aesthetic is missing """ missing_aes = self.REQUIRED_AES - set(data.columns) if missing_aes: msg = '{} requires the following missing aesthetics: {}' raise GgplotError( msg.format(self.__class__.__name__, ', '.join(missing_aes)))
def __radd__(self, gg): x = gg.data.get(self.x) y = gg.data.get(self.y) if x is None and y is None: raise GgplotError("No facets provided!") # only do the deepcopy after the check gg = deepcopy(gg) if x is None: n_dim_x = 1 else: n_dim_x = x.nunique() if y is None: n_dim_y = 1 else: n_dim_y = y.nunique() n_dim = n_dim_x * n_dim_y if self.ncol is None and self.nrow is None: n_rows = n_dim_x n_cols = n_dim_y elif self.nrow is None: n_rows = self.ncol n_cols = math.ceil(float(n_dim) / n_rows) elif self.ncol is None: n_cols = self.nrow n_rows = math.ceil(float(n_dim) / n_cols) else: n_rows = self.ncol n_cols = self.nrow gg.n_rows, gg.n_columns = int(n_rows), int(n_cols) facets = [] if self.x: facets.append(self.x) if self.y: facets.append(self.y) gg.facets = facets gg.facet_type = "grid" gg.facet_scales = self.scales combos = [] for x_i in sorted(x.unique()): if y is not None: for y_i in sorted(y.unique()): combos.append((x_i, y_i)) else: combos.append((x_i, 1)) gg.facet_pairs = combos return gg
def __init__(self, *args, **kwargs): self.valid_aes = set(self.DEFAULT_AES) ^ self.REQUIRED_AES self._stat_type = self._get_stat_type(kwargs) self.aes, self.data, kwargs = self._find_aes_and_data(args, kwargs) # This set will list the geoms that were uniquely set in this # geom (not specified already i.e. in the ggplot aes). self.aes_unique_to_geom = set(self.aes.keys()) if 'colour' in kwargs: kwargs['color'] = kwargs.pop('colour') # When a geom is created, some of the parameters may be meant # for the stat and some for the layer. # Some arguments are can be identified as either aesthetics to # the geom and or parameter settings to the stat, in this case # if the argument has a scalar value it is a setting for the stat. self._stat_params = {} self.params = deepcopy(self.DEFAULT_PARAMS) self.manual_aes = {} for k, v in kwargs.items(): if k in self.aes: raise GgplotError('Aesthetic, %s, specified twice' % k) elif (k in self.valid_aes and k in self._stat_type.DEFAULT_PARAMS and is_scalar_or_string(kwargs[k])): self._stat_params[k] = v elif k in self.valid_aes: self.manual_aes[k] = v elif k in self.DEFAULT_PARAMS: self.params[k] = v elif k in self._stat_type.DEFAULT_PARAMS: self._stat_params[k] = v else: raise GgplotError('Cannot recognize argument: %s' % k) self._cache = {} # When putting together the plot information for the geoms, # we need the aethetics names to be matplotlib compatible. # These are created and stored in self._cache and so would # go stale if users or geoms change geom.manual_aes self._create_aes_with_mpl_names()
def _calculate(self, data): x = pop(data, 'x', None) # xintercept may be one of: # - aesthetic to geom_vline or # - parameter setting to stat_vline xintercept = pop(data, 'xintercept', self.params['xintercept']) if hasattr(xintercept, '__call__'): if x is None: raise GgplotError( 'To compute the intercept, x aesthetic is needed') try: xintercept = xintercept(x) except TypeError as err: raise GgplotError(*err.args) xintercept = make_iterable(xintercept) new_data = pd.DataFrame({'xintercept': xintercept}) # Copy the other aesthetics into the new dataframe n = len(xintercept) for ae in data: new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n) return new_data
def __init__(self, title): if title is None: raise GgplotError("Arguments to", self.__class__.__name__, "cannot be None") self.title = title
def __init__(self, ylab): if ylab is None: raise GgplotError("Arguments to", self.__class__.__name__, "cannot be None") self.ylab = ylab
def _calculate(self, data): x = data.pop('x') right = self.params['right'] # y values are not needed try: del data['y'] except KeyError: pass else: self._print_warning(_MSG_YVALUE) if len(x) > 0 and isinstance(x.get(0), datetime.date): def convert(d): d = datetime.datetime.combine(d, datetime.datetime.min.time()) return time.mktime(d.timetuple()) x = x.apply(convert) elif len(x) > 0 and isinstance(x.get(0), datetime.datetime): x = x.apply(lambda d: time.mktime(d.timetuple())) elif len(x) > 0 and isinstance(x.get(0), datetime.time): raise GgplotError("Cannot recognise the type of x") # If weight not mapped to, use one (no weight) try: weights = data.pop('weight') except KeyError: weights = np.ones(len(x)) else: weights = make_iterable_ntimes(weights, len(x)) if is_categorical(x.values): x_assignments = x x = self.labels width = make_iterable_ntimes(self.params['width'], self.length) elif cbook.is_numlike(x.iloc[0]): x_assignments = pd.cut(x, bins=self.breaks, labels=False, right=right) width = np.diff(self.breaks) x = [self.breaks[i] + width[i] / 2 for i in range(len(self.breaks)-1)] else: raise GgplotError("Cannot recognise the type of x") # Create a dataframe with two columns: # - the bins to which each x is assigned # - the weights of each x value # Then create a weighted frequency table _df = pd.DataFrame({'assignments': x_assignments, 'weights': weights }) _wfreq_table = pd.pivot_table(_df, values='weights', rows=['assignments'], aggfunc=np.sum) # For numerical x values, empty bins get have no value # in the computed frequency table. We need to add the zeros and # since frequency table is a Series object, we need to keep it ordered try: empty_bins = set(self.labels) - set(x_assignments) except: empty_bins = set(range(len(width))) - set(x_assignments) _wfreq_table = _wfreq_table.to_dict() for _b in empty_bins: _wfreq_table[_b] = 0 _wfreq_table = pd.Series(_wfreq_table).sort_index() y = list(_wfreq_table) new_data = pd.DataFrame({'x': x, 'y': y, 'width': width}) # Copy the other aesthetics into the new dataframe n = len(x) for ae in data: new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n) return new_data
def _calculate(self, data): x = data.pop('x') breaks = self.params['breaks'] right = self.params['right'] binwidth = self.params['binwidth'] # y values are not needed try: del data['y'] except KeyError: pass else: self._print_warning(_MSG_YVALUE) # If weight not mapped to, use one (no weight) try: weights = data.pop('weight') except KeyError: weights = np.ones(len(x)) else: weights = make_iterable_ntimes(weights, len(x)) categorical = is_categorical(x.values) if categorical: x_assignments = x x = sorted(set(x)) width = make_iterable_ntimes(self.params['width'], len(x)) elif cbook.is_numlike(x.iloc[0]): if breaks is None and binwidth is None: _bin_count = 30 self._print_warning(_MSG_BINWIDTH) if binwidth: _bin_count = int(np.ceil(np.ptp(x))) / binwidth # Breaks have a higher precedence and, # pandas accepts either the breaks or the number of bins _bins_info = breaks or _bin_count x_assignments, breaks = pd.cut(x, bins=_bins_info, labels=False, right=right, retbins=True) width = np.diff(breaks) x = [breaks[i] + width[i] / 2 for i in range(len(breaks)-1)] else: raise GgplotError("Cannot recognise the type of x") # Create a dataframe with two columns: # - the bins to which each x is assigned # - the weights of each x value # Then create a weighted frequency table _df = pd.DataFrame({'assignments': x_assignments, 'weights': weights }) _wfreq_table = pd.pivot_table(_df, values='weights', rows=['assignments'], aggfunc=np.sum) # For numerical x values, empty bins get have no value # in the computed frequency table. We need to add the zeros and # since frequency table is a Series object, we need to keep it ordered if len(_wfreq_table) < len(x): empty_bins = set(range(len(x))) - set(x_assignments) for _b in empty_bins: _wfreq_table[_b] = 0 _wfreq_table = _wfreq_table.sort_index() y = list(_wfreq_table) new_data = pd.DataFrame({'x': x, 'y': y, 'width': width}) # Copy the other aesthetics into the new dataframe n = len(x) for ae in data: new_data[ae] = make_iterable_ntimes(data[ae].iloc[0], n) return new_data