def makeQuantiles(series, n): """ Compute quantiles of input series. Parameters ---------- series: Series Must have 'order' method and index n: int Number of quantile buckets Returns ------- (edges, quantiles) edges: ith bucket --> (left edge, right edge) quantiles: ith bucket --> set of values """ series = remove_na(series).copy() series = series.order() quantiles = {} edges = {} T = float(len(series)) inc = T / n for i in range(n): theSlice = series[inc*i:(i+1)*inc] quantiles[i+1] = theSlice edges[i+1] = theSlice[0], theSlice[-1] return edges, quantiles
def makeQuantiles(series, n): """ Compute quantiles of input series. Parameters ---------- series: Series Must have 'order' method and index n: int Number of quantile buckets Returns ------- (edges, quantiles) edges: ith bucket --> (left edge, right edge) quantiles: ith bucket --> set of values """ series = remove_na(series).copy() series = series.order() quantiles = {} edges = {} T = float(len(series)) inc = T / n for i in range(n): theSlice = series[inc * i:(i + 1) * inc] quantiles[i + 1] = theSlice edges[i + 1] = theSlice[0], theSlice[-1] return edges, quantiles
def percentileRank(frame, column=None, kind='mean'): """ Return score at percentile for each point in time (cross-section) Parameters ---------- frame: DataFrame column: string or Series, optional Column name or specific Series to compute percentiles for. If not provided, percentiles are computed for all values at each point in time. Note that this can take a LONG time. kind: {'rank', 'weak', 'strict', 'mean'}, optional This optional parameter specifies the interpretation of the resulting score: - "rank": Average percentage ranking of score. In case of multiple matches, average the percentage rankings of all matching scores. - "weak": This kind corresponds to the definition of a cumulative distribution function. A percentileofscore of 80% means that 80% of values are less than or equal to the provided score. - "strict": Similar to "weak", except that only values that are strictly less than the given score are counted. - "mean": The average of the "weak" and "strict" scores, often used in testing. See http://en.wikipedia.org/wiki/Percentile_rank See also -------- scipy.stats.percentileofscore Returns ------- TimeSeries or DataFrame, depending on input """ from scipy.stats import percentileofscore fun = lambda xs, score: percentileofscore(remove_na(xs), score, kind=kind) results = {} framet = frame.T if column is not None: if isinstance(column, Series): for date, xs in frame.T.iteritems(): results[date] = fun(xs, column.get(date, NaN)) else: for date, xs in frame.T.iteritems(): results[date] = fun(xs, xs[column]) results = Series(results) else: for column in frame.columns: for date, xs in framet.iteritems(): results.setdefault(date, {})[column] = fun(xs, xs[column]) results = DataFrame(results).T return results
def plot_group(grouped, ax): keys, values = zip(*grouped) keys = [_stringify(x) for x in keys] values = [remove_na(v) for v in values] ax.boxplot(values, **kwds) if kwds.get('vert', 1): ax.set_xticklabels(keys, rotation=rot, fontsize=fontsize) else: ax.set_yticklabels(keys, rotation=rot, fontsize=fontsize)
def boxplot(self, df, axis=0, secondary_y=False, *args, **kwargs): if axis == 1: df = df.T index = df.columns self.set_index(index) clean_values = [remove_na(x) for x in df.values.T] ax = self.ax if secondary_y: ax = self.get_right_ax() # positions need to start at 0 to align with DateLocator ax.boxplot(clean_values, positions=np.arange(len(index))) self.setup_datetime(index) self.set_formatter()
def boxplot(self, df, axis=0, secondary_y=False, *args, **kwargs): """ Currently supports plotting DataFrames. Downside is that this only works for data that has equal columns. For something like plotting groups with varying sizes, you'd need to use boxplot(list()). Example is creating a SeriesGroupBy.boxplot """ if axis == 1: df = df.T index = df.columns self.set_index(index) clean_values = [remove_na(x) for x in df.values.T] ax = self.find_ax(secondary_y, kwargs) # positions need to start at 0 to align with TimestampLocator ax.boxplot(clean_values, positions=np.arange(len(index))) self.setup_datetime(index) self.set_formatter()
def skipna_wrapper(x): nona = remove_na(x) if len(nona) == 0: return np.nan return alternative(nona)
def test_remove_na_deprecation(self): # see gh-16971 with tm.assert_produces_warning(FutureWarning): remove_na(Series([]))
def boxplot(data, column=None, by=None, ax=None, fontsize=None, rot=0, grid=True, figsize=None, **kwds): """ Make a box plot from DataFrame column optionally grouped b ysome columns or other inputs Parameters ---------- data : DataFrame or Series column : column name or list of names, or vector Can be any valid input to groupby by : string or sequence Column in the DataFrame to group by fontsize : int or string rot : label rotation angle kwds : other plotting keyword arguments to be passed to matplotlib boxplot function Returns ------- ax : matplotlib.axes.AxesSubplot """ from pandas import Series, DataFrame if isinstance(data, Series): data = DataFrame({'x' : data}) column = 'x' def plot_group(grouped, ax): keys, values = zip(*grouped) keys = [_stringify(x) for x in keys] values = [remove_na(v) for v in values] ax.boxplot(values, **kwds) if kwds.get('vert', 1): ax.set_xticklabels(keys, rotation=rot, fontsize=fontsize) else: ax.set_yticklabels(keys, rotation=rot, fontsize=fontsize) if column == None: columns = None else: if isinstance(column, (list, tuple)): columns = column else: columns = [column] if by is not None: if not isinstance(by, (list, tuple)): by = [by] fig, axes = _grouped_plot_by_column(plot_group, data, columns=columns, by=by, grid=grid, figsize=figsize) # Return axes in multiplot case, maybe revisit later # 985 ret = axes else: if ax is None: ax = _gca() fig = ax.get_figure() data = data._get_numeric_data() if columns: cols = columns else: cols = data.columns keys = [_stringify(x) for x in cols] # Return boxplot dict in single plot case clean_values = [remove_na(x) for x in data[cols].values.T] bp = ax.boxplot(clean_values, **kwds) if kwds.get('vert', 1): ax.set_xticklabels(keys, rotation=rot, fontsize=fontsize) else: ax.set_yticklabels(keys, rotation=rot, fontsize=fontsize) ax.grid(grid) ret = bp fig.subplots_adjust(bottom=0.15, top=0.9, left=0.1, right=0.9, wspace=0.2) return ret