def test_series_setitem(self): s = self.ymd['A'] s[2000, 3] = np.nan self.assert_(isnull(s[42:65]).all()) self.assert_(notnull(s[:42]).all()) self.assert_(notnull(s[65:]).all()) s[2000, 3, 10] = np.nan self.assert_(isnull(s[49]))
def test_merge_int(self): left = Series({'a': 1., 'b': 2., 'c': 3., 'd': 4}) right = Series({1: 11, 2: 22, 3: 33}) self.assert_(left.dtype == np.float_) self.assert_(issubclass(right.dtype.type, np.integer)) merged = left.merge(right) self.assert_(merged.dtype == np.float_) self.assert_(isnull(merged['d'])) self.assert_(not isnull(merged['c']))
def test_merge_int(self): left = Series({'a' : 1., 'b' : 2., 'c' : 3., 'd' : 4}) right = Series({1 : 11, 2 : 22, 3 : 33}) self.assert_(left.dtype == np.float_) self.assert_(issubclass(right.dtype.type, np.integer)) merged = left.merge(right) self.assert_(merged.dtype == np.float_) self.assert_(isnull(merged['d'])) self.assert_(not isnull(merged['c']))
def _bucketpanel_by(series, xby, yby, xbins, ybins): xby = xby.reindex(series.index) yby = yby.reindex(series.index) n = len(series) # indices = np.arange(n) xlabels = _bucket_labels(xby.reindex(series.index), xbins) ylabels = _bucket_labels(yby.reindex(series.index), ybins) labels = _uniquify(xlabels, ylabels, xbins, ybins) mask = isnull(labels) labels[mask] = -1 unique_labels = np.unique(labels) bucketed = bucketcat(series, labels) _ulist = list(labels) index_map = dict((x, _ulist.index(x)) for x in unique_labels) def relabel(key): pos = index_map[key] xlab = xlabels[pos] ylab = ylabels[pos] return '%sx%s' % (int(xlab) if notnull(xlab) else 'NULL', int(ylab) if notnull(ylab) else 'NULL') return bucketed.rename(columns=relabel)
def predict(self, beta=None, x=None, fill_value=None, fill_method=None, axis=0): """ Parameters ---------- beta : Series x : Series or DataFrame fill_value : scalar or dict, default None fill_method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None axis : {0, 1}, default 0 See DataFrame.fillna for more details Notes ----- 1. If both fill_value and fill_method are None then NaNs are dropped (this is the default behavior) 2. An intercept will be automatically added to the new_y_values if the model was fitted using an intercept Returns ------- Series of predicted values """ if beta is None and x is None: return self.y_predict if beta is None: beta = self.beta else: beta = beta.reindex(self.beta.index) if isnull(beta).any(): raise ValueError('Must supply betas for same variables') if x is None: x = self._x orig_x = x else: orig_x = x if fill_value is None and fill_method is None: x = x.dropna(how='any') else: x = x.fillna(value=fill_value, method=fill_method, axis=axis) if isinstance(x, Series): x = DataFrame({'x': x}) if self._intercept: x['intercept'] = 1. x = x.reindex(columns=self._x.columns) rs = np.dot(x.values, beta.values) return Series(rs, x.index).reindex(orig_x.index)
def test_reindex_bool(self): frame = DataMatrix(np.ones((10, 2), dtype=bool), index=np.arange(0, 20, 2), columns=[0, 2]) reindexed = frame.reindex(np.arange(10)) self.assert_(reindexed.values.dtype == np.float_) self.assert_(np.isnan(reindexed[0][1])) reindexed = frame.reindex(columns=range(3)) self.assert_(reindexed.values.dtype == np.float_) self.assert_(isnull(reindexed[1]).all())
def format_query(sql, *args): """ """ processed_args = [] for arg in args: if isinstance(arg, float) and isnull(arg): arg = None formatter = _formatters[type(arg)] processed_args.append(formatter(arg)) return sql % tuple(processed_args)
def test_reindex_bool_pad(self): # fail ts = self.ts[5:] bool_ts = Series(np.zeros(len(ts), dtype=bool), index=ts.index) filled_bool = bool_ts.reindex(self.ts.index, fillMethod='pad') self.assert_(isnull(filled_bool[:5]).all())
def wrapper(x): if isnull(x).any(): return np.nan return np.median(x)