def get_new_values(self): values = self.values # place the values length, width = self.full_shape stride = values.shape[1] result_width = width * stride result_shape = (length, result_width) # if our mask is all True, then we can use our existing dtype if self.mask.all(): dtype = values.dtype new_values = np.empty(result_shape, dtype=dtype) else: dtype, fill_value = _maybe_promote(values.dtype) new_values = np.empty(result_shape, dtype=dtype) new_values.fill(fill_value) new_mask = np.zeros(result_shape, dtype=bool) # is there a simpler / faster way of doing this? for i in xrange(values.shape[1]): chunk = new_values[:, i * width: (i + 1) * width] mask_chunk = new_mask[:, i * width: (i + 1) * width] chunk.flat[self.mask] = self.sorted_values[:, i] mask_chunk.flat[self.mask] = True return new_values, new_mask
def block2d_to_blocknd(values, items, shape, labels, ref_items=None): """ pivot to the labels shape """ from pandas.core.internals import make_block panel_shape = (len(items),) + shape # TODO: lexsort depth needs to be 2!! # Create observation selection vector using major and minor # labels, for converting to panel format. selector = factor_indexer(shape[1:], labels) mask = np.zeros(np.prod(shape), dtype=bool) mask.put(selector, True) if mask.all(): pvalues = np.empty(panel_shape, dtype=values.dtype) else: dtype, fill_value = _maybe_promote(values.dtype) pvalues = np.empty(panel_shape, dtype=dtype) pvalues.fill(fill_value) values = values for i in xrange(len(items)): pvalues[i].flat[mask] = values[:, i] if ref_items is None: ref_items = items return make_block(pvalues, items, ref_items)
def block2d_to_blocknd(values, items, shape, labels, ref_items=None): """ pivot to the labels shape """ from pandas.core.internals import make_block panel_shape = (len(items), ) + shape # TODO: lexsort depth needs to be 2!! # Create observation selection vector using major and minor # labels, for converting to panel format. selector = factor_indexer(shape[1:], labels) mask = np.zeros(np.prod(shape), dtype=bool) mask.put(selector, True) if mask.all(): pvalues = np.empty(panel_shape, dtype=values.dtype) else: dtype, fill_value = _maybe_promote(values.dtype) pvalues = np.empty(panel_shape, dtype=dtype) pvalues.fill(fill_value) values = values for i in range(len(items)): pvalues[i].flat[mask] = values[:, i] if ref_items is None: ref_items = items return make_block(pvalues, items, ref_items)
def get_new_values(self): values = self.values # place the values length, width = self.full_shape stride = values.shape[1] result_width = width * stride result_shape = (length, result_width) # if our mask is all True, then we can use our existing dtype if self.mask.all(): dtype = values.dtype new_values = np.empty(result_shape, dtype=dtype) else: dtype, fill_value = _maybe_promote(values.dtype) new_values = np.empty(result_shape, dtype=dtype) new_values.fill(fill_value) new_mask = np.zeros(result_shape, dtype=bool) # is there a simpler / faster way of doing this? for i in range(values.shape[1]): chunk = new_values[:, i * width:(i + 1) * width] mask_chunk = new_mask[:, i * width:(i + 1) * width] chunk.flat[self.mask] = self.sorted_values[:, i] mask_chunk.flat[self.mask] = True return new_values, new_mask
def get_new_values(self): values = self.values # place the values length, width = self.full_shape stride = values.shape[1] result_width = width * stride dtype, fill_value = _maybe_promote(values.dtype) new_values = np.empty((length, result_width), dtype=dtype) new_values.fill(fill_value) new_mask = np.zeros((length, result_width), dtype=bool) # is there a simpler / faster way of doing this? for i in xrange(values.shape[1]): chunk = new_values[:, i * width: (i + 1) * width] mask_chunk = new_mask[:, i * width: (i + 1) * width] chunk.flat[self.mask] = self.sorted_values[:, i] mask_chunk.flat[self.mask] = True return new_values, new_mask
def take_2d_multi(arr, indexer, out=None, fill_value=np.nan, mask_info=None, allow_fill=True): """ Specialized Cython take which sets NaN values in one pass """ if indexer is None or (indexer[0] is None and indexer[1] is None): row_idx = np.arange(arr.shape[0], dtype=np.int64) col_idx = np.arange(arr.shape[1], dtype=np.int64) indexer = row_idx, col_idx dtype, fill_value = arr.dtype, arr.dtype.type() else: row_idx, col_idx = indexer if row_idx is None: row_idx = np.arange(arr.shape[0], dtype=np.int64) else: row_idx = com._ensure_int64(row_idx) if col_idx is None: col_idx = np.arange(arr.shape[1], dtype=np.int64) else: col_idx = com._ensure_int64(col_idx) indexer = row_idx, col_idx if not allow_fill: dtype, fill_value = arr.dtype, arr.dtype.type() mask_info = None, False else: # check for promotion based on types only (do this first because # it's faster than computing a mask) dtype, fill_value = com._maybe_promote(arr.dtype, fill_value) if dtype != arr.dtype and (out is None or out.dtype != dtype): # check if promotion is actually required based on indexer if mask_info is not None: (row_mask, col_mask), (row_needs, col_needs) = mask_info else: row_mask = row_idx == -1 col_mask = col_idx == -1 row_needs = row_mask.any() col_needs = col_mask.any() mask_info = (row_mask, col_mask), (row_needs, col_needs) if row_needs or col_needs: if out is not None and out.dtype != dtype: raise TypeError('Incompatible type for fill_value') else: # if not, then depromote, set fill_value to dummy # (it won't be used but we don't want the cython code # to crash when trying to cast it to dtype) dtype, fill_value = arr.dtype, arr.dtype.type() # at this point, it's guaranteed that dtype can hold both the arr values # and the fill_value if out is None: out_shape = len(row_idx), len(col_idx) out = np.empty(out_shape, dtype=dtype) func = _take_2d_multi_dict.get((arr.dtype.name, out.dtype.name), None) if func is None and arr.dtype != out.dtype: func = _take_2d_multi_dict.get((out.dtype.name, out.dtype.name), None) if func is not None: func = _convert_wrapper(func, out.dtype) if func is None: def func(arr, indexer, out, fill_value=np.nan): _take_2d_multi_generic(arr, indexer, out, fill_value=fill_value, mask_info=mask_info) func(arr, indexer, out=out, fill_value=fill_value) return out
def take_nd(arr, indexer, axis=0, out=None, fill_value=np.nan, mask_info=None, allow_fill=True): """ Specialized Cython take which sets NaN values in one pass Parameters ---------- arr : ndarray Input array indexer : ndarray 1-D array of indices to take, subarrays corresponding to -1 value indicies are filed with fill_value axis : int, default 0 Axis to take from out : ndarray or None, default None Optional output array, must be appropriate type to hold input and fill_value together, if indexer has any -1 value entries; call common._maybe_promote to determine this type for any fill_value fill_value : any, default np.nan Fill value to replace -1 values with mask_info : tuple of (ndarray, boolean) If provided, value should correspond to: (indexer != -1, (indexer != -1).any()) If not provided, it will be computed internally if necessary allow_fill : boolean, default True If False, indexer is assumed to contain no -1 values so no filling will be done. This short-circuits computation of a mask. Result is undefined if allow_fill == False and -1 is present in indexer. """ # dispatch to internal type takes if com.is_categorical(arr): return arr.take_nd(indexer, fill_value=fill_value, allow_fill=allow_fill) elif com.is_datetimetz(arr): return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) if indexer is None: indexer = np.arange(arr.shape[axis], dtype=np.int64) dtype, fill_value = arr.dtype, arr.dtype.type() else: indexer = com._ensure_int64(indexer) if not allow_fill: dtype, fill_value = arr.dtype, arr.dtype.type() mask_info = None, False else: # check for promotion based on types only (do this first because # it's faster than computing a mask) dtype, fill_value = com._maybe_promote(arr.dtype, fill_value) if dtype != arr.dtype and (out is None or out.dtype != dtype): # check if promotion is actually required based on indexer if mask_info is not None: mask, needs_masking = mask_info else: mask = indexer == -1 needs_masking = mask.any() mask_info = mask, needs_masking if needs_masking: if out is not None and out.dtype != dtype: raise TypeError('Incompatible type for fill_value') else: # if not, then depromote, set fill_value to dummy # (it won't be used but we don't want the cython code # to crash when trying to cast it to dtype) dtype, fill_value = arr.dtype, arr.dtype.type() flip_order = False if arr.ndim == 2: if arr.flags.f_contiguous: flip_order = True if flip_order: arr = arr.T axis = arr.ndim - axis - 1 if out is not None: out = out.T # at this point, it's guaranteed that dtype can hold both the arr values # and the fill_value if out is None: out_shape = list(arr.shape) out_shape[axis] = len(indexer) out_shape = tuple(out_shape) if arr.flags.f_contiguous and axis == arr.ndim - 1: # minor tweak that can make an order-of-magnitude difference # for dataframes initialized directly from 2-d ndarrays # (s.t. df.values is c-contiguous and df._data.blocks[0] is its # f-contiguous transpose) out = np.empty(out_shape, dtype=dtype, order='F') else: out = np.empty(out_shape, dtype=dtype) func = _get_take_nd_function(arr.ndim, arr.dtype, out.dtype, axis=axis, mask_info=mask_info) indexer = com._ensure_int64(indexer) func(arr, indexer, out, fill_value) if flip_order: out = out.T return out
def putmask(self, mask, new, align=True, inplace=False): """ putmask the data to the block; it is possible that we may create a new dtype of block return the resulting block(s) Parameters ---------- mask : the condition to respect new : a ndarray/object align : boolean, perform alignment on other/cond, default is True inplace : perform inplace modification, default is False Returns ------- a new block(s), the result of the putmask """ new_values = self.values if inplace else self.values.copy() # may need to align the new if hasattr(new, 'reindex_axis'): new = new.values.T # may need to align the mask if hasattr(mask, 'reindex_axis'): mask = mask.values.T # if we are passed a scalar None, convert it here if not is_list_like(new) and isnull(new) and not self.is_object: new = self.fill_value if self._can_hold_element(new): new = self._try_cast(new) # pseudo-broadcast if isinstance(new, np.ndarray) and new.ndim == self.ndim - 1: new = np.repeat(new, self.shape[-1]).reshape(self.shape) np.putmask(new_values, mask, new) # maybe upcast me elif mask.any(): # need to go column by column new_blocks = [] if self.ndim > 1: for i, ref_loc in enumerate(self.mgr_locs): m = mask[i] v = new_values[i] # need a new block if m.any(): n = new[i] if isinstance( new, np.ndarray) else np.array(new) # type of the new block dtype, _ = com._maybe_promote(n.dtype) # we need to exiplicty astype here to make a copy n = n.astype(dtype) nv = _putmask_smart(v, m, n) else: nv = v if inplace else v.copy() # Put back the dimension that was taken from it and make # a block out of the result. block = make_block(values=nv[np.newaxis], placement=[ref_loc], fastpath=True) new_blocks.append(block) else: nv = _putmask_smart(new_values, mask, new) new_blocks.append(make_block(values=nv, placement=self.mgr_locs, fastpath=True)) return new_blocks if inplace: return [self] return [make_block(new_values, placement=self.mgr_locs, fastpath=True)]
def setitem(self, indexer, value): """ set the value inplace; return a new block (of a possibly different dtype) indexer is a direct slice/positional indexer; value must be a compatible shape """ # coerce None values, if appropriate if value is None: if self.is_numeric: value = np.nan # coerce args values, value = self._try_coerce_args(self.values, value) arr_value = np.array(value) # cast the values to a type that can hold nan (if necessary) if not self._can_hold_element(value): dtype, _ = com._maybe_promote(arr_value.dtype) values = values.astype(dtype) transf = (lambda x: x.T) if self.ndim == 2 else (lambda x: x) values = transf(values) l = len(values) # length checking # boolean with truth values == len of the value is ok too if isinstance(indexer, (np.ndarray, list)): if is_list_like(value) and len(indexer) != len(value): if not (isinstance(indexer, np.ndarray) and indexer.dtype == np.bool_ and len(indexer[indexer]) == len(value)): raise ValueError("cannot set using a list-like indexer " "with a different length than the value") # slice elif isinstance(indexer, slice): if is_list_like(value) and l: if len(value) != length_of_indexer(indexer, values): raise ValueError("cannot set using a slice indexer with a " "different length than the value") try: def _is_scalar_indexer(indexer): # return True if we are all scalar indexers if arr_value.ndim == 1: if not isinstance(indexer, tuple): indexer = tuple([indexer]) return all([ np.isscalar(idx) for idx in indexer ]) return False def _is_empty_indexer(indexer): # return a boolean if we have an empty indexer if arr_value.ndim == 1: if not isinstance(indexer, tuple): indexer = tuple([indexer]) return any(isinstance(idx, np.ndarray) and len(idx) == 0 for idx in indexer) return False # empty indexers # 8669 (empty) if _is_empty_indexer(indexer): pass # setting a single element for each dim and with a rhs that could be say a list # GH 6043 elif _is_scalar_indexer(indexer): values[indexer] = value # if we are an exact match (ex-broadcasting), # then use the resultant dtype elif len(arr_value.shape) and arr_value.shape[0] == values.shape[0] and np.prod(arr_value.shape) == np.prod(values.shape): values[indexer] = value values = values.astype(arr_value.dtype) # set else: values[indexer] = value # coerce and try to infer the dtypes of the result if np.isscalar(value): dtype, _ = _infer_dtype_from_scalar(value) else: dtype = 'infer' values = self._try_coerce_and_cast_result(values, dtype) block = make_block(transf(values), ndim=self.ndim, placement=self.mgr_locs, fastpath=True) # may have to soft convert_objects here if block.is_object and not self.is_object: block = block.convert(numeric=False) return block except (ValueError, TypeError) as detail: raise except Exception as detail: pass return [self]