def get_result(self): if self._is_series: if self.axis == 0: new_data = com._concat_compat([x.get_values() for x in self.objs]) name = com._consensus_name_attr(self.objs) return Series(new_data, index=self.new_axes[0], name=name).__finalize__(self, method='concat') else: data = dict(zip(range(len(self.objs)), self.objs)) index, columns = self.new_axes tmpdf = DataFrame(data, index=index) if columns is not None: tmpdf.columns = columns return tmpdf.__finalize__(self, method='concat') else: mgrs_indexers = [] for obj in self.objs: mgr = obj._data indexers = {} for ax, new_labels in enumerate(self.new_axes): if ax == self.axis: # Suppress reindexing on concat axis continue obj_labels = mgr.axes[ax] if not new_labels.equals(obj_labels): indexers[ax] = obj_labels.reindex(new_labels)[1] mgrs_indexers.append((obj._data, indexers)) new_data = concatenate_block_managers( mgrs_indexers, self.new_axes, concat_axis=self.axis, copy=self.copy) if not self.copy: new_data._consolidate_inplace() return self.objs[0]._from_axes(new_data, self.new_axes).__finalize__(self, method='concat')
def _concat_blocks(self, blocks): values_list = [b.values for b in blocks if b is not None] concat_values = com._concat_compat(values_list, axis=self.axis) if self.axis > 0: # Not safe to remove this check, need to profile if not _all_indexes_same([b.items for b in blocks]): raise Exception('dtypes are not consistent throughout ' 'DataFrames') return make_block(concat_values, blocks[0].items, self.new_axes[0]) else: offsets = np.r_[0, np.cumsum([len(x._data.axes[0]) for x in self.objs])] indexer = np.concatenate([offsets[i] + b.ref_locs for i, b in enumerate(blocks) if b is not None]) if self.ignore_index: concat_items = indexer else: concat_items = self.new_axes[0].take(indexer) if self.ignore_index: ref_items = self._get_fresh_axis() return make_block(concat_values, concat_items, ref_items) return make_block(concat_values, concat_items, self.new_axes[0])
def _concat_single_item(self, item): all_values = [] dtypes = set() for obj in self.objs: try: values = obj._data.get(item) dtypes.add(values.dtype) all_values.append(values) except KeyError: all_values.append(None) # this stinks have_object = False for dtype in dtypes: if issubclass(dtype.type, (np.object_, np.bool_)): have_object = True if have_object: empty_dtype = np.object_ else: empty_dtype = np.float64 to_concat = [] for obj, item_values in zip(self.objs, all_values): if item_values is None: shape = obj._data.shape[1:] missing_arr = np.empty(shape, dtype=empty_dtype) missing_arr.fill(np.nan) to_concat.append(missing_arr) else: to_concat.append(item_values) # this method only gets called with axis >= 1 assert (self.axis >= 1) return com._concat_compat(to_concat, axis=self.axis - 1)
def _concat_blocks(self, blocks): values_list = [b.values for b in blocks if b is not None] concat_values = com._concat_compat(values_list, axis=self.axis) if self.axis > 0: # Not safe to remove this check, need to profile if not _all_indexes_same([b.items for b in blocks]): raise Exception("dtypes are not consistent throughout " "DataFrames") return make_block(concat_values, blocks[0].items, self.new_axes[0]) else: offsets = np.r_[0, np.cumsum([len(x._data.axes[0]) for x in self.objs])] indexer = np.concatenate([offsets[i] + b.ref_locs for i, b in enumerate(blocks) if b is not None]) if self.ignore_index: concat_items = indexer else: concat_items = self.new_axes[0].take(indexer) if self.ignore_index: ref_items = self._get_fresh_axis() return make_block(concat_values, concat_items, ref_items) block = make_block(concat_values, concat_items, self.new_axes[0]) # we need to set the ref_locs in this block so we have the mapping # as we now have a non-unique index across dtypes, and we need to # map the column location to the block location # GH3602 if not self.new_axes[0].is_unique: block._ref_locs = indexer return block
def _fast_union(self, other): if len(other) == 0: return self.view(type(self)) if len(self) == 0: return other.view(type(self)) # to make our life easier, "sort" the two ranges if self[0] <= other[0]: left, right = self, other else: left, right = other, self left_start, left_end = left[0], left[-1] right_end = right[-1] if not self.offset._should_cache(): # concatenate dates if left_end < right_end: loc = right.searchsorted(left_end, side='right') right_chunk = right.values[loc:] dates = com._concat_compat((left.values, right_chunk)) return self._view_like(dates) else: return left else: return type(self)(start=left_start, end=max(left_end, right_end), freq=left.offset)
def get_result(self): if self._is_series: if self.axis == 0: new_data = com._concat_compat([x.get_values() for x in self.objs]) name = com._consensus_name_attr(self.objs) return Series(new_data, index=self.new_axes[0], name=name).__finalize__(self, method='concat') else: data = dict(zip(range(len(self.objs)), self.objs)) index, columns = self.new_axes tmpdf = DataFrame(data, index=index) if columns is not None: tmpdf.columns = columns return tmpdf.__finalize__(self, method='concat') else: mgrs_indexers = [] for obj in self.objs: mgr = obj._data indexers = {} for ax, new_labels in enumerate(self.new_axes): if ax == self.axis: # Suppress reindexing on concat axis continue obj_labels = mgr.axes[ax] if not new_labels.equals(obj_labels): indexers[ax] = obj_labels.reindex(new_labels)[1] mgrs_indexers.append((obj._data, indexers)) new_data = concatenate_block_managers( mgrs_indexers, self.new_axes, concat_axis=self.axis, copy=True) return self.objs[0]._from_axes(new_data, self.new_axes).__finalize__(self, method='concat')
def _concat_single_item(self, item): all_values = [] dtypes = set() for obj in self.objs: try: values = obj._data.get(item) dtypes.add(values.dtype) all_values.append(values) except KeyError: all_values.append(None) # this stinks have_object = False for dtype in dtypes: if issubclass(dtype.type, (np.object_, np.bool_)): have_object = True if have_object: empty_dtype = np.object_ else: empty_dtype = np.float64 to_concat = [] for obj, item_values in zip(self.objs, all_values): if item_values is None: shape = obj._data.shape[1:] missing_arr = np.empty(shape, dtype=empty_dtype) missing_arr.fill(np.nan) to_concat.append(missing_arr) else: to_concat.append(item_values) # this method only gets called with axis >= 1 assert(self.axis >= 1) return com._concat_compat(to_concat, axis=self.axis - 1)
def append(self, other): """ Append a collection of Index options together Parameters ---------- other : Index or list/tuple of indices Returns ------- appended : Index """ from pandas.core.index import _ensure_compat_concat name = self.name to_concat = [self] if isinstance(other, (list, tuple)): to_concat = to_concat + list(other) else: to_concat.append(other) for obj in to_concat: if isinstance(obj, Index) and obj.name != name: name = None break to_concat = _ensure_compat_concat(to_concat) to_concat = [x.values if isinstance(x, Index) else x for x in to_concat] return Index(com._concat_compat(to_concat), name=name)
def _concat_blocks(self, blocks): values_list = [b.values for b in blocks if b is not None] concat_values = com._concat_compat(values_list, axis=self.axis) if self.axis > 0: # Not safe to remove this check, need to profile if not _all_indexes_same([b.items for b in blocks]): raise Exception('dtypes are not consistent throughout ' 'DataFrames') return make_block(concat_values, blocks[0].items, self.new_axes[0]) else: offsets = np.r_[ 0, np.cumsum([len(x._data.axes[0]) for x in self.objs])] indexer = np.concatenate([ offsets[i] + b.ref_locs for i, b in enumerate(blocks) if b is not None ]) if self.ignore_index: concat_items = indexer else: concat_items = self.new_axes[0].take(indexer) if self.ignore_index: ref_items = self._get_fresh_axis() return make_block(concat_values, concat_items, ref_items) return make_block(concat_values, concat_items, self.new_axes[0])
def append(self, other): """ Append a collection of Index options together Parameters ---------- other : Index or list/tuple of indices Returns ------- appended : Index """ name = self.name to_concat = [self] if isinstance(other, (list, tuple)): to_concat = to_concat + list(other) else: to_concat.append(other) for obj in to_concat: if isinstance(obj, Index) and obj.name != name: name = None break to_concat = self._ensure_compat_concat(to_concat) return Index(com._concat_compat(to_concat), name=name)
def get_result(self): # series only if self._is_series: # stack blocks if self.axis == 0: new_data = com._concat_compat([x._values for x in self.objs]) name = com._consensus_name_attr(self.objs) return (Series(new_data, index=self.new_axes[0], name=name, dtype=new_data.dtype) .__finalize__(self, method='concat')) # combine as columns in a frame else: data = dict(zip(range(len(self.objs)), self.objs)) index, columns = self.new_axes tmpdf = DataFrame(data, index=index) # checks if the column variable already stores valid column # names (because set via the 'key' argument in the 'concat' # function call. If that's not the case, use the series names # as column names if (columns.equals(Index(np.arange(len(self.objs)))) and not self.ignore_index): columns = np.array([data[i].name for i in range(len(data))], dtype='object') indexer = isnull(columns) if indexer.any(): columns[indexer] = np.arange(len(indexer[indexer])) tmpdf.columns = columns return tmpdf.__finalize__(self, method='concat') # combine block managers else: mgrs_indexers = [] for obj in self.objs: mgr = obj._data indexers = {} for ax, new_labels in enumerate(self.new_axes): if ax == self.axis: # Suppress reindexing on concat axis continue obj_labels = mgr.axes[ax] if not new_labels.equals(obj_labels): indexers[ax] = obj_labels.reindex(new_labels)[1] mgrs_indexers.append((obj._data, indexers)) new_data = concatenate_block_managers( mgrs_indexers, self.new_axes, concat_axis=self.axis, copy=self.copy) if not self.copy: new_data._consolidate_inplace() return (self.objs[0]._from_axes(new_data, self.new_axes) .__finalize__(self, method='concat'))
def get_result(self): # series only if self._is_series: # stack blocks if self.axis == 0: # concat Series with length to keep dtype as much non_empties = [x for x in self.objs if len(x) > 0] if len(non_empties) > 0: values = [x._values for x in non_empties] else: values = [x._values for x in self.objs] new_data = com._concat_compat(values) name = com._consensus_name_attr(self.objs) return (Series(new_data, index=self.new_axes[0], name=name, dtype=new_data.dtype).__finalize__( self, method='concat')) # combine as columns in a frame else: data = dict(zip(range(len(self.objs)), self.objs)) index, columns = self.new_axes tmpdf = DataFrame(data, index=index) tmpdf.columns = columns return tmpdf.__finalize__(self, method='concat') # combine block managers else: mgrs_indexers = [] for obj in self.objs: mgr = obj._data indexers = {} for ax, new_labels in enumerate(self.new_axes): if ax == self.axis: # Suppress reindexing on concat axis continue obj_labels = mgr.axes[ax] if not new_labels.equals(obj_labels): indexers[ax] = obj_labels.reindex(new_labels)[1] mgrs_indexers.append((obj._data, indexers)) new_data = concatenate_block_managers(mgrs_indexers, self.new_axes, concat_axis=self.axis, copy=self.copy) if not self.copy: new_data._consolidate_inplace() return (self.objs[0]._from_axes( new_data, self.new_axes).__finalize__(self, method='concat'))
def get_result(self): if self._is_series and self.axis == 0: new_data = com._concat_compat([x.values for x in self.objs]) name = com._consensus_name_attr(self.objs) return Series(new_data, index=self.new_axes[0], name=name) elif self._is_series: data = dict(zip(self.new_axes[1], self.objs)) return DataFrame(data, index=self.new_axes[0], columns=self.new_axes[1]) else: new_data = self._get_concatenated_data() return self.objs[0]._from_axes(new_data, self.new_axes)
def get_result(self): # series only if self._is_series: # stack blocks if self.axis == 0: # concat Series with length to keep dtype as much non_empties = [x for x in self.objs if len(x) > 0] if len(non_empties) > 0: values = [x._values for x in non_empties] else: values = [x._values for x in self.objs] new_data = com._concat_compat(values) name = com._consensus_name_attr(self.objs) return (Series(new_data, index=self.new_axes[0], name=name, dtype=new_data.dtype) .__finalize__(self, method='concat')) # combine as columns in a frame else: data = dict(zip(range(len(self.objs)), self.objs)) index, columns = self.new_axes tmpdf = DataFrame(data, index=index) tmpdf.columns = columns return tmpdf.__finalize__(self, method='concat') # combine block managers else: mgrs_indexers = [] for obj in self.objs: mgr = obj._data indexers = {} for ax, new_labels in enumerate(self.new_axes): if ax == self.axis: # Suppress reindexing on concat axis continue obj_labels = mgr.axes[ax] if not new_labels.equals(obj_labels): indexers[ax] = obj_labels.reindex(new_labels)[1] mgrs_indexers.append((obj._data, indexers)) new_data = concatenate_block_managers( mgrs_indexers, self.new_axes, concat_axis=self.axis, copy=self.copy) if not self.copy: new_data._consolidate_inplace() return (self.objs[0]._from_axes(new_data, self.new_axes) .__finalize__(self, method='concat'))
def get_result(self): if self._is_series and self.axis == 0: new_data = com._concat_compat([x.values for x in self.objs]) name = com._consensus_name_attr(self.objs) return Series(new_data, index=self.new_axes[0], name=name) elif self._is_series: data = dict(itertools.izip(xrange(len(self.objs)), self.objs)) tmpdf = DataFrame(data, index=self.new_axes[0]) tmpdf.columns = self.new_axes[1] return tmpdf else: new_data = self._get_concatenated_data() return self.objs[0]._from_axes(new_data, self.new_axes)
def get_result(self): if self._is_series and self.axis == 0: new_data = com._concat_compat([x.get_values() for x in self.objs]) name = com._consensus_name_attr(self.objs) new_data = self._post_merge(new_data) return Series(new_data, index=self.new_axes[0], name=name) elif self._is_series: data = dict(zip(range(len(self.objs)), self.objs)) index, columns = self.new_axes tmpdf = DataFrame(data, index=index) if columns is not None: tmpdf.columns = columns return tmpdf else: new_data = self._get_concatenated_data() new_data = self._post_merge(new_data) return self.objs[0]._from_axes(new_data, self.new_axes)
def _concat_blocks(self, blocks): values_list = [b.get_values() for b in blocks if b is not None] concat_values = com._concat_compat(values_list, axis=self.axis) if self.axis > 0: # Not safe to remove this check, need to profile if not _all_indexes_same([b.items for b in blocks]): # TODO: Either profile this piece or remove. # FIXME: Need to figure out how to test whether this line exists or does not...(unclear if even possible # or maybe would require performance test) raise PandasError('dtypes are not consistent throughout ' 'DataFrames') return make_block(concat_values, blocks[0].items, self.new_axes[0], placement=blocks[0]._ref_locs) else: offsets = np.r_[ 0, np.cumsum([len(x._data.axes[0]) for x in self.objs])] indexer = np.concatenate([ offsets[i] + b.ref_locs for i, b in enumerate(blocks) if b is not None ]) if self.ignore_index: concat_items = indexer else: concat_items = self.new_axes[0].take(indexer) if self.ignore_index: ref_items = self._get_fresh_axis() return make_block(concat_values, concat_items, ref_items) block = make_block(concat_values, concat_items, self.new_axes[0]) # we need to set the ref_locs in this block so we have the mapping # as we now have a non-unique index across dtypes, and we need to # map the column location to the block location # GH3602 if not self.new_axes[0].is_unique: block.set_ref_locs(indexer) return block
def _concat_blocks(self, blocks): values_list = [b.get_values() for b in blocks if b is not None] concat_values = com._concat_compat(values_list, axis=self.axis) if self.axis > 0: # Not safe to remove this check, need to profile if not _all_indexes_same([b.items for b in blocks]): # TODO: Either profile this piece or remove. # FIXME: Need to figure out how to test whether this line exists or does not...(unclear if even possible # or maybe would require performance test) raise PandasError('dtypes are not consistent throughout ' 'DataFrames') return make_block(concat_values, blocks[0].items, self.new_axes[0], placement=blocks[0]._ref_locs) else: offsets = np.r_[0, np.cumsum([len(x._data.axes[0]) for x in self.objs])] indexer = np.concatenate([offsets[i] + b.ref_locs for i, b in enumerate(blocks) if b is not None]) if self.ignore_index: concat_items = indexer else: concat_items = self.new_axes[0].take(indexer) if self.ignore_index: ref_items = self._get_fresh_axis() return make_block(concat_values, concat_items, ref_items) block = make_block(concat_values, concat_items, self.new_axes[0]) # we need to set the ref_locs in this block so we have the mapping # as we now have a non-unique index across dtypes, and we need to # map the column location to the block location # GH3602 if not self.new_axes[0].is_unique: block.set_ref_locs(indexer) return block
def _concat_single_item(self, objs, item): all_values = [] dtypes = set() # le sigh if isinstance(self.objs[0], SparseDataFrame): objs = [x._data for x in self.objs] for data, orig in zip(objs, self.objs): if item in orig: values = data.get(item) dtypes.add(values.dtype) all_values.append(values) else: all_values.append(None) # this stinks have_object = False for dtype in dtypes: if issubclass(dtype.type, (np.object_, np.bool_)): have_object = True if have_object: empty_dtype = np.object_ else: empty_dtype = np.float64 to_concat = [] for obj, item_values in zip(objs, all_values): if item_values is None: shape = obj.shape[1:] missing_arr = np.empty(shape, dtype=empty_dtype) missing_arr.fill(np.nan) to_concat.append(missing_arr) else: to_concat.append(item_values) # this method only gets called with axis >= 1 if not ((self.axis >= 1)): raise AssertionError() return com._concat_compat(to_concat, axis=self.axis - 1)
def append(self, other): """ Append a collection of Index options together Parameters ---------- other : Index or list/tuple of indices Returns ------- appended : Index """ name = self.name to_concat = [self] if isinstance(other, (list, tuple)): to_concat = to_concat + list(other) else: to_concat.append(other) for obj in to_concat: if isinstance(obj, Index) and obj.name != name: name = None break to_concat = self._ensure_compat_concat(to_concat) if isinstance(to_concat[0], PeriodIndex): if len(set([x.freq for x in to_concat])) > 1: # box to_concat = [x.asobject.values for x in to_concat] else: cat_values = np.concatenate([x.values for x in to_concat]) return PeriodIndex(cat_values, freq=self.freq, name=name) to_concat = [ x.values if isinstance(x, Index) else x for x in to_concat ] return Index(com._concat_compat(to_concat), name=name)
def append(self, other): """ Append a collection of Index options together Parameters ---------- other : Index or list/tuple of indices Returns ------- appended : Index """ name = self.name to_concat = [self] if isinstance(other, (list, tuple)): to_concat = to_concat + list(other) else: to_concat.append(other) for obj in to_concat: if isinstance(obj, Index) and obj.name != name: name = None break to_concat = self._ensure_compat_concat(to_concat) if isinstance(to_concat[0], PeriodIndex): if len(set([x.freq for x in to_concat])) > 1: # box to_concat = [x.asobject.values for x in to_concat] else: cat_values = np.concatenate([x.values for x in to_concat]) return PeriodIndex(cat_values, freq=self.freq, name=name) to_concat = [x.values if isinstance(x, Index) else x for x in to_concat] return Index(com._concat_compat(to_concat), name=name)
def _concat_blocks(self, blocks): values_list = [b.values for b in blocks if b is not None] concat_values = com._concat_compat(values_list, axis=self.axis) if self.axis > 0: # Not safe to remove this check, need to profile if not _all_indexes_same([b.items for b in blocks]): raise Exception('dtypes are not consistent throughout ' 'DataFrames') return make_block(concat_values, blocks[0].items, self.new_axes[0]) else: offsets = np.r_[ 0, np.cumsum([len(x._data.axes[0]) for x in self.objs])] indexer = np.concatenate([ offsets[i] + b.ref_locs for i, b in enumerate(blocks) if b is not None ]) if self.ignore_index: concat_items = indexer else: concat_items = self.new_axes[0].take(indexer) if self.ignore_index: ref_items = self._get_fresh_axis() return make_block(concat_values, concat_items, ref_items) block = make_block(concat_values, concat_items, self.new_axes[0]) # we need to set the ref_locs in this block so we have the mapping # as we now have a non-unique index across dtypes, and we need to # map the column location to the block location # GH3602 if not self.new_axes[0].is_unique: block._ref_locs = indexer return block
def _fast_union(self, other): if len(other) == 0: return self.view(type(self)) if len(self) == 0: return other.view(type(self)) # to make our life easier, "sort" the two ranges if self[0] <= other[0]: left, right = self, other else: left, right = other, self left_end = left[-1] right_end = right[-1] # concatenate if left_end < right_end: loc = right.searchsorted(left_end, side='right') right_chunk = right.values[loc:] dates = com._concat_compat((left.values, right_chunk)) return self._shallow_copy(dates) else: return left
def _fast_union(self, other): if len(other) == 0: return self.view(type(self)) if len(self) == 0: return other.view(type(self)) # to make our life easier, "sort" the two ranges if self[0] <= other[0]: left, right = self, other else: left, right = other, self left_start, left_end = left[0], left[-1] right_end = right[-1] # concatenate if left_end < right_end: loc = right.searchsorted(left_end, side='right') right_chunk = right.values[loc:] dates = com._concat_compat((left.values, right_chunk)) return self._shallow_copy(dates) else: return left
def lreshape(data, groups, dropna=True, label=None): """ Reshape long-format data to wide. Generalized inverse of DataFrame.pivot Parameters ---------- data : DataFrame groups : dict {new_name : list_of_columns} dropna : boolean, default True Examples -------- >>> data hr1 hr2 team year1 year2 0 514 545 Red Sox 2007 2008 1 573 526 Yankees 2007 2008 >>> pd.lreshape(data, {'year': ['year1', 'year2'], 'hr': ['hr1', 'hr2']}) team hr year 0 Red Sox 514 2007 1 Yankees 573 2007 2 Red Sox 545 2008 3 Yankees 526 2008 Returns ------- reshaped : DataFrame """ if isinstance(groups, dict): keys = groups.keys() values = groups.values() else: keys, values = zip(*groups) all_cols = list(set.union(*[set(x) for x in values])) id_cols = list(data.columns.diff(all_cols)) K = len(values[0]) for seq in values: if len(seq) != K: raise ValueError('All column lists must be same length') mdata = {} pivot_cols = [] for target, names in zip(keys, values): mdata[target] = com._concat_compat([data[col].values for col in names]) pivot_cols.append(target) for col in id_cols: mdata[col] = np.tile(data[col].values, K) if dropna: mask = np.ones(len(mdata[pivot_cols[0]]), dtype=bool) for c in pivot_cols: mask &= notnull(mdata[c]) if not mask.all(): mdata = dict((k, v[mask]) for k, v in mdata.iteritems()) return DataFrame(mdata, columns=id_cols + pivot_cols)
def _concat_single_item(self, objs, item): # this is called if we don't have consistent dtypes in a row-wise append all_values = [] dtypes = set() for data, orig in zip(objs, self.objs): if item in orig: values = data.get(item) if hasattr(values,'to_dense'): values = values.to_dense() dtypes.add(values.dtype) all_values.append(values) else: all_values.append(None) # figure out the resulting dtype of the combination alls = set() seen = [] for dtype in dtypes: d = dict([ (t,False) for t in ['object','datetime','timedelta','other'] ]) if issubclass(dtype.type, (np.object_, np.bool_)): d['object'] = True alls.add('object') elif is_datetime64_dtype(dtype): d['datetime'] = True alls.add('datetime') elif is_timedelta64_dtype(dtype): d['timedelta'] = True alls.add('timedelta') else: d['other'] = True alls.add('other') seen.append(d) if 'datetime' in alls or 'timedelta' in alls: if 'object' in alls or 'other' in alls: for v, s in zip(all_values,seen): if s.get('datetime') or s.get('timedelta'): pass # if we have all null, then leave a date/time like type # if we have only that type left elif isnull(v).all(): alls.remove('other') alls.remove('object') # create the result if 'object' in alls: empty_dtype, fill_value = np.object_, np.nan elif 'other' in alls: empty_dtype, fill_value = np.float64, np.nan elif 'datetime' in alls: empty_dtype, fill_value = 'M8[ns]', tslib.iNaT elif 'timedelta' in alls: empty_dtype, fill_value = 'm8[ns]', tslib.iNaT else: # pragma raise AssertionError("invalid dtype determination in concat_single_item") to_concat = [] for obj, item_values in zip(objs, all_values): if item_values is None: shape = obj.shape[1:] missing_arr = np.empty(shape, dtype=empty_dtype) missing_arr.fill(fill_value) to_concat.append(missing_arr) else: to_concat.append(item_values) # this method only gets called with axis >= 1 if not ((self.axis >= 1)): raise AssertionError() return com._concat_compat(to_concat, axis=self.axis - 1)
def _concat_single_item(self, objs, item): # this is called if we don't have consistent dtypes in a row-wise append all_values = [] dtypes = set() for data, orig in zip(objs, self.objs): if item in orig: values = data.get(item) if hasattr(values,'to_dense'): values = values.to_dense() dtypes.add(values.dtype) all_values.append(values) else: all_values.append(None) # figure out the resulting dtype of the combination alls = set() seen = [] for dtype in dtypes: d = dict([ (t,False) for t in ['object','datetime','timedelta','other'] ]) if issubclass(dtype.type, (np.object_, np.bool_)): d['object'] = True alls.add('object') elif is_datetime64_dtype(dtype): d['datetime'] = True alls.add('datetime') elif is_timedelta64_dtype(dtype): d['timedelta'] = True alls.add('timedelta') else: d['other'] = True alls.add('other') seen.append(d) if 'datetime' in alls or 'timedelta' in alls: if 'object' in alls or 'other' in alls: for v, s in zip(all_values,seen): if s.get('datetime') or s.get('timedelta'): pass # if we have all null, then leave a date/time like type # if we have only that type left elif isnull(v).all(): alls.remove('other') alls.remove('object') # create the result if 'object' in alls: empty_dtype, fill_value = np.object_, np.nan elif 'other' in alls: empty_dtype, fill_value = np.float64, np.nan elif 'datetime' in alls: empty_dtype, fill_value = 'M8[ns]', tslib.iNaT elif 'timedelta' in alls: empty_dtype, fill_value = 'm8[ns]', tslib.iNaT else: # pragma raise AssertionError("invalid dtype determination in concat_single_item") to_concat = [] for obj, item_values in zip(objs, all_values): if item_values is None: shape = obj.shape[1:] missing_arr = np.empty(shape, dtype=empty_dtype) missing_arr.fill(fill_value) to_concat.append(missing_arr) else: to_concat.append(item_values) # this method only gets called with axis >= 1 if self.axis < 1: raise AssertionError("axis must be >= 1, input was" " {0}".format(self.axis)) return com._concat_compat(to_concat, axis=self.axis - 1)
def _concat_single_item(self, objs, item): # this is called if we don't have consistent dtypes in a row-wise append all_values = [] dtypes = set() for data, orig in zip(objs, self.objs): if item in orig: values = data.get(item) if hasattr(values, "to_dense"): values = values.to_dense() dtypes.add(values.dtype) all_values.append(values) else: all_values.append(None) # figure out the resulting dtype of the combination alls = set() seen = [] for dtype in dtypes: d = dict([(t, False) for t in ["object", "datetime", "timedelta", "other"]]) if issubclass(dtype.type, (np.object_, np.bool_)): d["object"] = True alls.add("object") elif is_datetime64_dtype(dtype): d["datetime"] = True alls.add("datetime") elif is_timedelta64_dtype(dtype): d["timedelta"] = True alls.add("timedelta") else: d["other"] = True alls.add("other") seen.append(d) if "datetime" in alls or "timedelta" in alls: if "object" in alls or "other" in alls: for v, s in zip(all_values, seen): if s.get("datetime") or s.get("timedelta"): pass # if we have all null, then leave a date/time like type # if we have only that type left elif isnull(v).all(): alls.remove("other") alls.remove("object") # create the result if "object" in alls: empty_dtype, fill_value = np.object_, np.nan elif "other" in alls: empty_dtype, fill_value = np.float64, np.nan elif "datetime" in alls: empty_dtype, fill_value = "M8[ns]", tslib.iNaT elif "timedelta" in alls: empty_dtype, fill_value = "m8[ns]", tslib.iNaT else: # pragma raise AssertionError("invalid dtype determination in concat_single_item") to_concat = [] for obj, item_values in zip(objs, all_values): if item_values is None: shape = obj.shape[1:] missing_arr = np.empty(shape, dtype=empty_dtype) missing_arr.fill(fill_value) to_concat.append(missing_arr) else: to_concat.append(item_values) # this method only gets called with axis >= 1 if self.axis < 1: raise AssertionError("axis must be >= 1, input was" " {0}".format(self.axis)) return com._concat_compat(to_concat, axis=self.axis - 1)