def relabel(key): pos = index_map[key] xlab = xlabels[pos] ylab = ylabels[pos] return "%sx%s" % (int(xlab) if notnull(xlab) else "NULL", int(ylab) if notnull(ylab) else "NULL")
def relabel(key): pos = index_map[key] xlab = xlabels[pos] ylab = ylabels[pos] return '%sx%s' % (int(xlab) if notnull(xlab) else 'NULL', int(ylab) if notnull(ylab) else 'NULL')
def test_series_setitem(self): s = self.ymd['A'] s[2000, 3] = np.nan self.assert_(isnull(s[42:65]).all()) self.assert_(notnull(s[:42]).all()) self.assert_(notnull(s[65:]).all()) s[2000, 3, 10] = np.nan self.assert_(isnull(s[49]))
def _bucketpanel_cat(series, xcat, ycat): xlabels, xmapping = _intern(xcat) ylabels, ymapping = _intern(ycat) shift = 10 ** (np.ceil(np.log10(ylabels.max()))) labels = xlabels * shift + ylabels sorter = labels.argsort() sorted_labels = labels.take(sorter) sorted_xlabels = xlabels.take(sorter) sorted_ylabels = ylabels.take(sorter) unique_labels = np.unique(labels) unique_labels = unique_labels[notnull(unique_labels)] locs = sorted_labels.searchsorted(unique_labels) xkeys = sorted_xlabels.take(locs) ykeys = sorted_ylabels.take(locs) stringified = ['(%s, %s)' % arg for arg in zip(xmapping.take(xkeys), ymapping.take(ykeys))] result = bucketcat(series, labels) result.columns = stringified return result
def bucketcat(series, cats): """ Produce DataFrame representing quantiles of a Series Parameters ---------- series : Series cat : Series or same-length array bucket by category; mutually exxlusive with 'by' Returns ------- DataFrame """ if not isinstance(series, Series): series = Series(series, index=np.arange(len(series))) cats = np.asarray(cats) unique_labels = np.unique(cats) unique_labels = unique_labels[notnull(unique_labels)] # group by data = {} for i, label in enumerate(unique_labels): data[label] = series[cats == label] return DataFrame(data, columns=unique_labels)
def _bucketpanel_cat(series, xcat, ycat): xlabels, xmapping = _intern(xcat) ylabels, ymapping = _intern(ycat) shift = 10**(np.ceil(np.log10(ylabels.max()))) labels = xlabels * shift + ylabels sorter = labels.argsort() sorted_labels = labels.take(sorter) sorted_xlabels = xlabels.take(sorter) sorted_ylabels = ylabels.take(sorter) unique_labels = np.unique(labels) unique_labels = unique_labels[notnull(unique_labels)] locs = sorted_labels.searchsorted(unique_labels) xkeys = sorted_xlabels.take(locs) ykeys = sorted_ylabels.take(locs) stringified = [ '(%s, %s)' % arg for arg in zip(xmapping.take(xkeys), ymapping.take(ykeys)) ] result = bucketcat(series, labels) result.columns = stringified return result
def f(x): x = np.asarray(x) nona = x[notnull(x)] if len(nona) < 2: return np.NaN else: return nona.std(ddof=1)
def f(x): x = np.asarray(x) nona = x[notnull(x)] if len(nona) == 0: return np.NaN else: return nona.min()
def test_longpanel_series_combo(self): wp = tm.makePanel() lp = wp.to_long() y = lp.pop('ItemA') model = ols(y=y, x=lp, entity_effects=True, window=20) self.assert_(notnull(model.beta.values).all()) self.assert_(isinstance(model, PanelOLS)) model.summary
def ewmcov(seriesA, seriesB, com, minCom=0, correctBias=True): """ Calculates the rolling exponentially weighted moving variance of a series. Parameters ---------- series : Series com : integer Center of Mass for exponentially weighted moving average decay = com / (1 + com) maps center of mass to decay parameter minCom : int, default None Optionally require that at least a certain number of periods as a multiple of the Center of Mass be included in the sample. correctBias : boolean Use a standard bias correction """ if correctBias: biasCorrection = (1.0 + 2.0 * com) / (2.0 * com) else: biasCorrection = 1.0 if not isinstance(seriesB, type(seriesA)): raise Exception('Input arrays must be of the same type!') if isinstance(seriesA, Series): if seriesA.index is not seriesB.index: commonIndex = seriesA.index.intersection(seriesB.index) seriesA = seriesA.reindex(commonIndex) seriesB = seriesB.reindex(commonIndex) okLocs = notnull(seriesA) & notnull(seriesB) cleanSeriesA = seriesA[okLocs] cleanSeriesB = seriesB.reindex(cleanSeriesA.index) XY = ewma(cleanSeriesA * cleanSeriesB, com=com, minCom=minCom) X = ewma(cleanSeriesA, com=com, minCom=minCom) Y = ewma(cleanSeriesB, com=com, minCom=minCom) return biasCorrection * (XY - X * Y)
def ewmcov(seriesA, seriesB, com, minCom=0, correctBias=True): """ Calculates the rolling exponentially weighted moving variance of a series. Parameters ---------- series : Series com : integer Center of Mass for exponentially weighted moving average decay = com / (1 + com) maps center of mass to decay parameter minCom : int, default None Optionally require that at least a certain number of periods as a multiple of the Center of Mass be included in the sample. correctBias : boolean Use a standard bias correction """ if correctBias: biasCorrection = ( 1.0 + 2.0 * com ) / (2.0 * com) else: biasCorrection = 1.0 if not isinstance(seriesB, type(seriesA)): raise Exception('Input arrays must be of the same type!') if isinstance(seriesA, Series): if seriesA.index is not seriesB.index: commonIndex = seriesA.index.intersection(seriesB.index) seriesA = seriesA.reindex(commonIndex) seriesB = seriesB.reindex(commonIndex) okLocs = notnull(seriesA) & notnull(seriesB) cleanSeriesA = seriesA[okLocs] cleanSeriesB = seriesB.reindex(cleanSeriesA.index) XY = ewma(cleanSeriesA * cleanSeriesB, com=com, minCom=minCom) X = ewma(cleanSeriesA, com=com, minCom=minCom) Y = ewma(cleanSeriesB, com=com, minCom=minCom) return biasCorrection * (XY - X * Y)
def _cat_labels(labels): # group by data = {} unique_labels = np.unique(labels) unique_labels = unique_labels[notnull(unique_labels)] for label in unique_labels: mask = labels == label data[stringified] = series[mask] return DataFrame(data, index=series.index)
def ewmcorr(seriesA, seriesB, com, minCom=0): """ Calculates a rolling exponentially weighted moving correlation of 2 series. Parameters ---------- seriesA : Series seriesB : Series com : integer Center of Mass for exponentially weighted moving average decay = com / (1 + com) maps center of mass to decay parameter minCom : int, default None Optionally require that at least a certain number of periods as a multiple of the Center of Mass be included in the sample. """ if not isinstance(seriesB, type(seriesA)): raise Exception('Input arrays must be of the same type!') if isinstance(seriesA, Series): if seriesA.index is not seriesB.index: commonIndex = seriesA.index.intersection(seriesB.index) seriesA = seriesA.reindex(commonIndex) seriesB = seriesB.reindex(commonIndex) okLocs = notnull(seriesA) & notnull(seriesB) cleanSeriesA = seriesA[okLocs] cleanSeriesB = seriesB.reindex(cleanSeriesA.index) XY = ewma(cleanSeriesA * cleanSeriesB, com=com, minCom=minCom) X = ewma(cleanSeriesA, com=com, minCom=minCom) Y = ewma(cleanSeriesB, com=com, minCom=minCom) varX = ewmvar(cleanSeriesA, com=com, minCom=minCom, correctBias=False) varY = ewmvar(cleanSeriesB, com=com, minCom=minCom, correctBias=False) return (XY - X * Y) / np.sqrt(varX * varY)
def _ewmoment(values, func, min_periods=None, biasCorrection=None): """ Generic rolling exponential moment function using blended accumulator method. Parameters ---------- values : ndarray or Series func : function taking previous value and next value biasCorrection : float Optional bias correction min_periods : int, optional require a particular number of periods "in window" to compute statistic If provided, overrides the minPct argument Returns ------- Same type and length as values argument """ okLocs = notnull(values) cleanValues = values[okLocs] result = np.frompyfunc(func, 2, 1).accumulate(cleanValues) result = result.astype(float) if min_periods is not None: if min_periods < 0: raise Exception('min_periods cannot be less than 0!') result[:min_periods] = np.NaN output = values.copy() output[okLocs] = result if biasCorrection is not None: if biasCorrection <= 0: raise Exception('Bias correction cannot be negative!') output *= biasCorrection return output
def f(x): x = np.asarray(x) return skew(x[notnull(x)], bias=False)
def f(x): x = np.asarray(x) return x[notnull(x)].std(ddof=1)
def f(x): x = np.asarray(x) return x[notnull(x)].max()
def f(x): x = np.asarray(x) return np.median(x[notnull(x)])
def f(x): x = np.asarray(x) return np.prod(x[notnull(x)])
def f(x): x = np.asarray(x) return x[notnull(x)].mean()
def f(x): x = np.asarray(x) return x[notnull(x)].sum()
def _first_valid_index(arr): # argmax scans from left return notnull(arr).argmax() if len(arr) else 0
def test_count(self): f = lambda s: notnull(s).sum() self._check_statistic(self.frame, 'count', f)
def test_count(self): f = lambda s: notnull(s).sum() self._check_stat_op("count", f, obj=self.panel, has_skipna=False)
def test_count(self): f = lambda s: notnull(s).sum() self._check_statistic(self.panel, 'count', f)