def _reindex_columns(self, columns): if len(columns) == 0: return DataMatrix(index=self.index) if not isinstance(columns, Index): columns = Index(columns) if self.objects is not None: object_columns = columns.intersection(self.objects.columns) columns = columns - object_columns objects = self.objects._reindex_columns(object_columns) else: objects = None if len(columns) > 0 and len(self.columns) == 0: return DataMatrix(index=self.index, columns=columns, objects=objects) indexer, mask = common.get_indexer(self.columns, columns, None) mat = self.values.take(indexer, axis=1) notmask = -mask if len(mask) > 0: if notmask.any(): if issubclass(mat.dtype.type, np.int_): mat = mat.astype(float) elif issubclass(mat.dtype.type, np.bool_): mat = mat.astype(float) common.null_out_axis(mat, notmask, 1) return DataMatrix(mat, index=self.index, columns=columns, objects=objects)
def _reindex_index(self, index, method): if index is self.index: return self.copy() if not isinstance(index, Index): index = Index(index) if len(self.index) == 0: return DataMatrix(index=index, columns=self.columns) indexer, mask = common.get_indexer(self.index, index, method) mat = self.values.take(indexer, axis=0) notmask = -mask if len(index) > 0: if notmask.any(): if issubclass(mat.dtype.type, np.int_): mat = mat.astype(float) elif issubclass(mat.dtype.type, np.bool_): mat = mat.astype(float) common.null_out_axis(mat, notmask, 0) if self.objects is not None and len(self.objects.columns) > 0: newObjects = self.objects.reindex(index) else: newObjects = None return DataMatrix(mat, index=index, columns=self.columns, objects=newObjects)
def transform(self, applyfunc): """ For given Series, group index by given mapper function or dict, take the sub-Series (reindex) for this group and call apply(applyfunc) on this sub-Series. Return a Series of the results for each key. Parameters ---------- mapper : function on being called on each element of the Series index, determines the groups. applyfunc : function to apply to each group Note ---- This function does not aggregate like groupby/tgroupby, the results of the given function on the subSeries should be another Series. Example ------- series.fgroupby(lambda x: mapping[x], lambda x: (x - mean(x)) / std(x)) Returns ------- Series standardized by each unique value of mapping """ result = self.obj.copy() for name, group in self: # XXX group.groupName = name res = applyfunc(group) indexer, _ = common.get_indexer(self.obj.index, group.index, None) np.put(result, indexer, res) return result
def _reorder_columns(mat, current, desired): indexer, mask = common.get_indexer(current, desired, None) return mat.take(indexer[mask], axis=1)
def transform(self, func): """ For given DataFrame, group index by given mapper function or dict, take the sub-DataFrame (reindex) for this group and call apply(func) on this sub-DataFrame. Return a DataFrame of the results for each key. Note: this function does not aggregate like groupby/tgroupby, the results of the given function on the subDataFrame should be another DataFrame. Parameters ---------- mapper : function, dict-like, or string Mapping or mapping function. If string given, must be a column name in the frame func : function Function to apply to each subframe Note ---- Each subframe is endowed the attribute 'groupName' in case you need to know which group you are working on. Example -------- >>> grouped = df.groupby(lambda x: mapping[x]) >>> grouped.transform(lambda x: (x - x.mean()) / x.std()) """ # DataMatrix objects? result_values = np.empty_like(self.obj.values) if self.axis == 0: trans = lambda x: x elif self.axis == 1: trans = lambda x: x.T result_values = trans(result_values) for val, group in self.groups.iteritems(): if not isinstance(group, list): # pragma: no cover group = list(group) if self.axis == 0: subframe = self.obj.reindex(group) indexer, _ = common.get_indexer(self.obj.index, subframe.index, None) else: subframe = self.obj.reindex(columns=group) indexer, _ = common.get_indexer(self.obj.columns, subframe.columns, None) subframe.groupName = val try: res = subframe.apply(func, axis=self.axis) except Exception: # pragma: no cover res = func(subframe) result_values[indexer] = trans(res.values) result_values = trans(result_values) return DataFrame(result_values, index=self.obj.index, columns=self.obj.columns)