def nankurt(values, axis=None, skipna=True): """ Compute the sample skewness. The statistic computed here is the adjusted Fisher-Pearson standardized moment coefficient G2, computed directly from the second and fourth central moment. """ mask = isnull(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count = _get_counts(mask, axis) else: count = _get_counts(mask, axis, dtype=values.dtype) if skipna: values = values.copy() np.putmask(values, mask, 0) mean = values.sum(axis, dtype=np.float64) / count if axis is not None: mean = np.expand_dims(mean, axis) adjusted = values - mean if skipna: np.putmask(adjusted, mask, 0) adjusted2 = adjusted**2 adjusted4 = adjusted2**2 m2 = adjusted2.sum(axis, dtype=np.float64) m4 = adjusted4.sum(axis, dtype=np.float64) adj = 3 * (count - 1)**2 / ((count - 2) * (count - 3)) numer = count * (count + 1) * (count - 1) * m4 denom = (count - 2) * (count - 3) * m2**2 result = numer / denom - adj # floating point error numer = _zero_out_fperr(numer) denom = _zero_out_fperr(denom) if not isinstance(denom, np.ndarray): # if ``denom`` is a scalar, check these corner cases first before # doing division if count < 4: return np.nan if denom == 0: return 0 result = numer / denom - adj dtype = values.dtype if is_float_dtype(dtype): result = result.astype(dtype) if isinstance(result, np.ndarray): result = np.where(denom == 0, 0, result) result[count < 4] = np.nan return result
def nankurt(values, axis=None, skipna=True): """ Compute the sample skewness. The statistic computed here is the adjusted Fisher-Pearson standardized moment coefficient G2, computed directly from the second and fourth central moment. """ mask = isnull(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count = _get_counts(mask, axis) else: count = _get_counts(mask, axis, dtype=values.dtype) if skipna: values = values.copy() np.putmask(values, mask, 0) mean = values.sum(axis, dtype=np.float64) / count if axis is not None: mean = np.expand_dims(mean, axis) adjusted = values - mean if skipna: np.putmask(adjusted, mask, 0) adjusted2 = adjusted ** 2 adjusted4 = adjusted2 ** 2 m2 = adjusted2.sum(axis, dtype=np.float64) m4 = adjusted4.sum(axis, dtype=np.float64) adj = 3 * (count - 1) ** 2 / ((count - 2) * (count - 3)) numer = count * (count + 1) * (count - 1) * m4 denom = (count - 2) * (count - 3) * m2**2 result = numer / denom - adj # floating point error numer = _zero_out_fperr(numer) denom = _zero_out_fperr(denom) if not isinstance(denom, np.ndarray): # if ``denom`` is a scalar, check these corner cases first before # doing division if count < 4: return np.nan if denom == 0: return 0 result = numer / denom - adj dtype = values.dtype if is_float_dtype(dtype): result = result.astype(dtype) if isinstance(result, np.ndarray): result = np.where(denom == 0, 0, result) result[count < 4] = np.nan return result
def nanskew(values, axis=None, skipna=True): """ Compute the sample skewness. The statistic computed here is the adjusted Fisher-Pearson standardized moment coefficient G1. The algorithm computes this coefficient directly from the second and third central moment. """ mask = isnull(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count = _get_counts(mask, axis) else: count = _get_counts(mask, axis, dtype=values.dtype) if skipna: values = values.copy() np.putmask(values, mask, 0) mean = values.sum(axis, dtype=np.float64) / count if axis is not None: mean = np.expand_dims(mean, axis) adjusted = values - mean if skipna: np.putmask(adjusted, mask, 0) adjusted2 = adjusted ** 2 adjusted3 = adjusted2 * adjusted m2 = adjusted2.sum(axis, dtype=np.float64) m3 = adjusted3.sum(axis, dtype=np.float64) # floating point error m2 = _zero_out_fperr(m2) m3 = _zero_out_fperr(m3) result = (count * (count - 1) ** 0.5 / (count - 2)) * (m3 / m2 ** 1.5) dtype = values.dtype if is_float_dtype(dtype): result = result.astype(dtype) if isinstance(result, np.ndarray): result = np.where(m2 == 0, 0, result) result[count < 3] = np.nan return result else: result = 0 if m2 == 0 else result if count < 3: return np.nan return result
def nanskew(values, axis=None, skipna=True): """ Compute the sample skewness. The statistic computed here is the adjusted Fisher-Pearson standardized moment coefficient G1. The algorithm computes this coefficient directly from the second and third central moment. """ mask = isnull(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count = _get_counts(mask, axis) else: count = _get_counts(mask, axis, dtype=values.dtype) if skipna: values = values.copy() np.putmask(values, mask, 0) mean = values.sum(axis, dtype=np.float64) / count if axis is not None: mean = np.expand_dims(mean, axis) adjusted = values - mean if skipna: np.putmask(adjusted, mask, 0) adjusted2 = adjusted**2 adjusted3 = adjusted2 * adjusted m2 = adjusted2.sum(axis, dtype=np.float64) m3 = adjusted3.sum(axis, dtype=np.float64) # floating point error m2 = _zero_out_fperr(m2) m3 = _zero_out_fperr(m3) result = (count * (count - 1)**0.5 / (count - 2)) * (m3 / m2**1.5) dtype = values.dtype if is_float_dtype(dtype): result = result.astype(dtype) if isinstance(result, np.ndarray): result = np.where(m2 == 0, 0, result) result[count < 3] = np.nan return result else: result = 0 if m2 == 0 else result if count < 3: return np.nan return result
def _prep_values(self, values=None, kill_inf=True, how=None): if values is None: values = getattr(self._selected_obj, 'values', self._selected_obj) # GH #12373 : rolling functions error on float32 data # make sure the data is coerced to float64 if com.is_float_dtype(values.dtype): values = com._ensure_float64(values) elif com.is_integer_dtype(values.dtype): values = com._ensure_float64(values) elif com.needs_i8_conversion(values.dtype): raise NotImplementedError("ops for {action} for this " "dtype {dtype} are not " "implemented".format( action=self._window_type, dtype=values.dtype)) else: try: values = com._ensure_float64(values) except (ValueError, TypeError): raise TypeError("cannot handle this type -> {0}" "".format(values.dtype)) if kill_inf: values = values.copy() values[np.isinf(values)] = np.NaN return values
def test_delevel_infer_dtype(self): tuples = [tuple for tuple in cart_product(["foo", "bar"], [10, 20], [1.0, 1.1])] index = MultiIndex.from_tuples(tuples, names=["prm0", "prm1", "prm2"]) df = DataFrame(np.random.randn(8, 3), columns=["A", "B", "C"], index=index) deleveled = df.reset_index() self.assert_(com.is_integer_dtype(deleveled["prm1"])) self.assert_(com.is_float_dtype(deleveled["prm2"]))
def _get_data_algo(values, func_map): mask = None if com.is_float_dtype(values): f = func_map['float64'] values = com._ensure_float64(values) elif com.needs_i8_conversion(values): # if we have NaT, punt to object dtype mask = com.isnull(values) if mask.ravel().any(): f = func_map['generic'] values = com._ensure_object(values) values[mask] = np.nan else: f = func_map['int64'] values = values.view('i8') elif com.is_integer_dtype(values): f = func_map['int64'] values = com._ensure_int64(values) else: f = func_map['generic'] values = com._ensure_object(values) return f, values
def na_value_for_dtype(dtype): """ Return a dtype compat na value Parameters ---------- dtype : string / dtype Returns ------- dtype compat na value """ from pandas.core import common as com from pandas import NaT dtype = pandas_dtype(dtype) if (com.is_datetime64_dtype(dtype) or com.is_datetime64tz_dtype(dtype) or com.is_timedelta64_dtype(dtype)): return NaT elif com.is_float_dtype(dtype): return np.nan elif com.is_integer_dtype(dtype): return 0 elif com.is_bool_dtype(dtype): return False return np.nan
def _sqlalchemy_type(self, arr_or_dtype): from sqlalchemy.types import (BigInteger, Float, Text, Boolean, DateTime, Date, Interval) if arr_or_dtype is date: return Date if com.is_datetime64_dtype(arr_or_dtype): try: tz = arr_or_dtype.tzinfo return DateTime(timezone=True) except: return DateTime if com.is_timedelta64_dtype(arr_or_dtype): warnings.warn( "the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " "database.", UserWarning) return BigInteger elif com.is_float_dtype(arr_or_dtype): return Float elif com.is_integer_dtype(arr_or_dtype): # TODO: Refine integer size. return BigInteger elif com.is_bool_dtype(arr_or_dtype): return Boolean return Text
def _convert_DataFrame(rdf): columns = list(rdf.colnames) rows = np.array(rdf.rownames) data = {} for i, col in enumerate(columns): vec = rdf.rx2(i + 1) values = _convert_vector(vec) if isinstance(vec, robj.FactorVector): levels = np.asarray(vec.levels) if com.is_float_dtype(values): mask = np.isnan(values) notmask = -mask result = np.empty(len(values), dtype=object) result[mask] = np.nan locs = (values[notmask] - 1).astype(np.int_) result[notmask] = levels.take(locs) values = result else: values = np.asarray(vec.levels).take(values - 1) data[col] = values return pd.DataFrame(data, index=_check_int(rows), columns=columns)
def nanskew(values, axis=None, skipna=True): mask = isnull(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count = _get_counts(mask, axis) if skipna: values = values.copy() np.putmask(values, mask, 0) A = values.sum(axis) / count B = (values ** 2).sum(axis) / count - A ** 2 C = (values ** 3).sum(axis) / count - A ** 3 - 3 * A * B # floating point error B = _zero_out_fperr(B) C = _zero_out_fperr(C) result = ((np.sqrt((count ** 2 - count)) * C) / ((count - 2) * np.sqrt(B) ** 3)) if isinstance(result, np.ndarray): result = np.where(B == 0, 0, result) result[count < 3] = np.nan return result else: result = 0 if B == 0 else result if count < 3: return np.nan return result
def _isfinite(values): if is_datetime_or_timedelta_dtype(values): return isnull(values) if (is_complex_dtype(values) or is_float_dtype(values) or is_integer_dtype(values) or is_bool_dtype(values)): return ~np.isfinite(values) return ~np.isfinite(values.astype('float64'))
def format_array(values, formatter, float_format=None, na_rep='NaN', digits=None, space=None, justify='right'): if com.is_float_dtype(values.dtype): fmt_klass = FloatArrayFormatter elif com.is_integer_dtype(values.dtype): fmt_klass = IntArrayFormatter elif com.is_datetime64_dtype(values.dtype): fmt_klass = Datetime64Formatter else: fmt_klass = GenericArrayFormatter if space is None: space = get_option("print.column_space") if float_format is None: float_format = get_option("print.float_format") if digits is None: digits = get_option("print.precision") fmt_obj = fmt_klass(values, digits, na_rep=na_rep, float_format=float_format, formatter=formatter, space=space, justify=justify) return fmt_obj.get_result()
def _sqlalchemy_type(self, col): from sqlalchemy.types import (BigInteger, Float, Text, Boolean, DateTime, Date, Time, Interval) if com.is_datetime64_dtype(col): try: tz = col.tzinfo return DateTime(timezone=True) except: return DateTime if com.is_timedelta64_dtype(col): warnings.warn("the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " "database.", UserWarning) return BigInteger elif com.is_float_dtype(col): return Float elif com.is_integer_dtype(col): # TODO: Refine integer size. return BigInteger elif com.is_bool_dtype(col): return Boolean inferred = lib.infer_dtype(com._ensure_object(col)) if inferred == 'date': return Date if inferred == 'time': return Time return Text
def _sqlalchemy_type(self, col): from sqlalchemy.types import (BigInteger, Float, Text, Boolean, DateTime, Date, Time) if com.is_datetime64_dtype(col): try: tz = col.tzinfo return DateTime(timezone=True) except: return DateTime if com.is_timedelta64_dtype(col): warnings.warn( "the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " "database.", UserWarning) return BigInteger elif com.is_float_dtype(col): return Float elif com.is_integer_dtype(col): # TODO: Refine integer size. return BigInteger elif com.is_bool_dtype(col): return Boolean inferred = lib.infer_dtype(com._ensure_object(col)) if inferred == 'date': return Date if inferred == 'time': return Time return Text
def backfill_2d(values, limit=None, mask=None, dtype=None): if dtype is None: dtype = values.dtype _method = None if com.is_float_dtype(values): _method = getattr(algos, 'backfill_2d_inplace_%s' % dtype.name, None) elif dtype in com._DATELIKE_DTYPES or com.is_datetime64_dtype(values): _method = _backfill_2d_datetime elif com.is_integer_dtype(values): values = com._ensure_float64(values) _method = algos.backfill_2d_inplace_float64 elif values.dtype == np.object_: _method = algos.backfill_2d_inplace_object if _method is None: raise ValueError('Invalid dtype for backfill_2d [%s]' % dtype.name) if mask is None: mask = com.isnull(values) mask = mask.view(np.uint8) if np.all(values.shape): _method(values, mask, limit=limit) else: # for test coverage pass return values
def _isfinite(values): if _is_datetime_or_timedelta_dtype(values): return isnull(values) if (is_complex_dtype(values) or is_float_dtype(values) or is_integer_dtype(values) or is_bool_dtype(values)): return ~np.isfinite(values) return ~np.isfinite(values.astype('float64'))
def format_array(values, formatter, float_format=None, na_rep='NaN', digits=None, space=None, justify='right'): if com.is_float_dtype(values.dtype): fmt_klass = FloatArrayFormatter elif com.is_integer_dtype(values.dtype): fmt_klass = IntArrayFormatter elif com.is_datetime64_dtype(values.dtype): fmt_klass = Datetime64Formatter else: fmt_klass = GenericArrayFormatter if space is None: space = print_config.column_space if float_format is None: float_format = print_config.float_format if digits is None: digits = print_config.precision fmt_obj = fmt_klass(values, digits, na_rep=na_rep, float_format=float_format, formatter=formatter, space=space, justify=justify) return fmt_obj.get_result()
def _sqlalchemy_type(self, arr_or_dtype): from sqlalchemy.types import (BigInteger, Float, Text, Boolean, DateTime, Date, Interval) if arr_or_dtype is date: return Date if com.is_datetime64_dtype(arr_or_dtype): try: tz = arr_or_dtype.tzinfo return DateTime(timezone=True) except: return DateTime if com.is_timedelta64_dtype(arr_or_dtype): warnings.warn("the 'timedelta' type is not supported, and will be " "written as integer values (ns frequency) to the " "database.", UserWarning) return BigInteger elif com.is_float_dtype(arr_or_dtype): return Float elif com.is_integer_dtype(arr_or_dtype): # TODO: Refine integer size. return BigInteger elif com.is_bool_dtype(arr_or_dtype): return Boolean return Text
def nankurt(values, axis=None, skipna=True): mask = isnull(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count = _get_counts(mask, axis) if skipna: values = values.copy() np.putmask(values, mask, 0) A = values.sum(axis) / count B = (values ** 2).sum(axis) / count - A ** 2 C = (values ** 3).sum(axis) / count - A ** 3 - 3 * A * B D = (values ** 4).sum(axis) / count - A ** 4 - 6 * B * A * A - 4 * C * A B = _zero_out_fperr(B) D = _zero_out_fperr(D) if not isinstance(B, np.ndarray): # if B is a scalar, check these corner cases first before doing division if count < 4: return np.nan if B == 0: return 0 result = (((count * count - 1.) * D / (B * B) - 3 * ((count - 1.) ** 2)) / ((count - 2.) * (count - 3.))) if isinstance(result, np.ndarray): result = np.where(B == 0, 0, result) result[count < 4] = np.nan return result
def nanskew(values, axis=None, skipna=True): mask = isnull(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count = _get_counts(mask, axis) else: count = _get_counts(mask, axis, dtype=values.dtype) if skipna: values = values.copy() np.putmask(values, mask, 0) typ = values.dtype.type A = values.sum(axis) / count B = (values**2).sum(axis) / count - A**typ(2) C = (values**3).sum(axis) / count - A**typ(3) - typ(3) * A * B # floating point error B = _zero_out_fperr(B) C = _zero_out_fperr(C) result = ((np.sqrt(count * count - count) * C) / ((count - typ(2)) * np.sqrt(B)**typ(3))) if isinstance(result, np.ndarray): result = np.where(B == 0, 0, result) result[count < 3] = np.nan return result else: result = 0 if B == 0 else result if count < 3: return np.nan return result
def convert(values, unit, axis): def try_parse(values): try: return _dt_to_float_ordinal(tools.to_datetime(values)) except Exception: return values if isinstance(values, (datetime, pydt.date)): return _dt_to_float_ordinal(values) elif isinstance(values, pydt.time): return dates.date2num(values) elif (com.is_integer(values) or com.is_float(values)): return values elif isinstance(values, compat.string_types): return try_parse(values) elif isinstance(values, (list, tuple, np.ndarray)): if not isinstance(values, np.ndarray): values = com._asarray_tuplesafe(values) if com.is_integer_dtype(values) or com.is_float_dtype(values): return values try: values = tools.to_datetime(values) if isinstance(values, Index): values = values.map(_dt_to_float_ordinal) else: values = [_dt_to_float_ordinal(x) for x in values] except Exception: pass return values
def convert(values, unit, axis): from pandas.tseries.index import DatetimeIndex def try_parse(values): try: return _dt_to_float_ordinal(tools.to_datetime(values)) except Exception: return values if isinstance(values, (datetime, pydt.date)): return _dt_to_float_ordinal(values) elif isinstance(values, pydt.time): return dates.date2num(values) elif (com.is_integer(values) or com.is_float(values)): return values elif isinstance(values, basestring): return try_parse(values) elif isinstance(values, (list, tuple, np.ndarray)): if not isinstance(values, np.ndarray): values = np._asarray_tuplesafe(values) if com.is_integer_dtype(values) or com.is_float_dtype(values): return values try: values = tools.to_datetime(values) if isinstance(values, Index): values = values.map(_dt_to_float_ordinal) else: values = [_dt_to_float_ordinal(x) for x in values] except Exception: pass return values
def nansum(values, axis=None, skipna=True): values, mask, dtype, dtype_max = _get_values(values, skipna, 0) dtype_sum = dtype_max if is_float_dtype(dtype): dtype_sum = dtype the_sum = values.sum(axis, dtype=dtype_sum) the_sum = _maybe_null_out(the_sum, axis, mask) return _wrap_results(the_sum, dtype)
def _pad(values): if com.is_float_dtype(values): _method = lib.pad_2d_inplace_float64 elif values.dtype == np.object_: _method = lib.pad_2d_inplace_object else: raise ValueError('Invalid dtype for padding') _method(values, com.isnull(values).view(np.uint8))
def nansem(values, axis=None, skipna=True, ddof=1): var = nanvar(values, axis, skipna, ddof=ddof) mask = isnull(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count, _ = _get_counts_nanvar(mask, axis, ddof, values.dtype) return np.sqrt(var) / np.sqrt(count)
def nansem(values, axis=None, skipna=True, ddof=1): var = nanvar(values, axis, skipna, ddof=ddof) mask = isnull(values) if not is_float_dtype(values.dtype): values = values.astype('f8') count, _ = _get_counts_nanvar(mask, axis, ddof) return np.sqrt(var)/np.sqrt(count)
def _backfill(values): if com.is_float_dtype(values): _method = lib.backfill_2d_inplace_float64 elif values.dtype == np.object_: _method = lib.backfill_2d_inplace_object else: # pragma: no cover raise ValueError('Invalid dtype for padding') _method(values, com.isnull(values).view(np.uint8))
def _hashtable_algo(f, dtype): """ f(HashTable, type_caster) -> result """ if com.is_float_dtype(dtype): return f(htable.Float64HashTable, com._ensure_float64) elif com.is_integer_dtype(dtype): return f(htable.Int64HashTable, com._ensure_int64) else: return f(htable.PyObjectHashTable, com._ensure_object)
def test_delevel_infer_dtype(self): tuples = [tuple for tuple in cart_product(['foo', 'bar'], [10, 20], [1.0, 1.1])] index = MultiIndex.from_tuples(tuples, names=['prm0', 'prm1', 'prm2']) df = DataFrame(np.random.randn(8,3), columns=['A', 'B', 'C'], index=index) deleveled = df.delevel() self.assert_(com.is_integer_dtype(deleveled['prm1'])) self.assert_(com.is_float_dtype(deleveled['prm2']))
def test_delevel_infer_dtype(self): tuples = [tuple for tuple in cart_product(['foo', 'bar'], [10, 20], [1.0, 1.1])] index = MultiIndex.from_tuples(tuples, names=['prm0', 'prm1', 'prm2']) df = DataFrame(np.random.randn(8,3), columns=['A', 'B', 'C'], index=index) deleveled = df.reset_index() self.assert_(com.is_integer_dtype(deleveled['prm1'])) self.assert_(com.is_float_dtype(deleveled['prm2']))
def count(values, uniques=None): if uniques is not None: raise NotImplementedError else: if com.is_float_dtype(values): return _count_generic(values, lib.Float64HashTable, _ensure_float64) elif com.is_integer_dtype(values): return _count_generic(values, lib.Int64HashTable, _ensure_int64) else: return _count_generic(values, lib.PyObjectHashTable, _ensure_object)
def astype(self, dtype): dtype = pandas_dtype(dtype) if is_float_dtype(dtype) or is_integer_dtype(dtype): values = self._values.astype(dtype) elif is_object_dtype(dtype): values = self._values else: raise TypeError('Setting %s dtype to anything other than ' 'float64 or object is not supported' % self.__class__) return Index(values, name=self.name, dtype=dtype)
def _get_hash_table_and_cast(values): if com.is_float_dtype(values): klass = lib.Float64HashTable values = com._ensure_float64(values) elif com.is_integer_dtype(values): klass = lib.Int64HashTable values = com._ensure_int64(values) else: klass = lib.PyObjectHashTable values = com._ensure_object(values) return klass, values
def _value_counts_arraylike(values, dropna=True): is_datetimetz = com.is_datetimetz(values) is_period = (isinstance(values, gt.ABCPeriodIndex) or com.is_period_arraylike(values)) orig = values from pandas.core.series import Series values = Series(values).values dtype = values.dtype if com.is_datetime_or_timedelta_dtype(dtype) or is_period: from pandas.tseries.index import DatetimeIndex from pandas.tseries.period import PeriodIndex if is_period: values = PeriodIndex(values) freq = values.freq values = values.view(np.int64) keys, counts = htable.value_count_scalar64(values, dropna) if dropna: msk = keys != iNaT keys, counts = keys[msk], counts[msk] # convert the keys back to the dtype we came in keys = keys.astype(dtype) # dtype handling if is_datetimetz: if isinstance(orig, gt.ABCDatetimeIndex): tz = orig.tz else: tz = orig.dt.tz keys = DatetimeIndex._simple_new(keys, tz=tz) if is_period: keys = PeriodIndex._simple_new(keys, freq=freq) elif com.is_integer_dtype(dtype): values = com._ensure_int64(values) keys, counts = htable.value_count_scalar64(values, dropna) elif com.is_float_dtype(dtype): values = com._ensure_float64(values) keys, counts = htable.value_count_scalar64(values, dropna) else: values = com._ensure_object(values) mask = com.isnull(values) keys, counts = htable.value_count_object(values, mask) if not dropna and mask.any(): keys = np.insert(keys, 0, np.NaN) counts = np.insert(counts, 0, mask.sum()) return keys, counts
def _get_data_algo(values, func_map): if com.is_float_dtype(values): f = func_map['float64'] values = com._ensure_float64(values) elif com.is_integer_dtype(values): f = func_map['int64'] values = com._ensure_int64(values) else: f = func_map['generic'] values = com._ensure_object(values) return f, values
def fill_zeros(result, x, y, name, fill): """ if this is a reversed op, then flip x,y if we have an integer value (or array in y) and we have 0's, fill them with the fill, return the result mask the nan's from x """ if fill is None or com.is_float_dtype(result): return result if name.startswith(('r', '__r')): x, y = y, x is_typed_variable = (hasattr(y, 'dtype') or hasattr(y, 'type')) is_scalar = lib.isscalar(y) if not is_typed_variable and not is_scalar: return result if is_scalar: y = np.array(y) if com.is_integer_dtype(y): if (y == 0).any(): # GH 7325, mask and nans must be broadcastable (also: PR 9308) # Raveling and then reshaping makes np.putmask faster mask = ((y == 0) & ~np.isnan(result)).ravel() shape = result.shape result = result.astype('float64', copy=False).ravel() np.putmask(result, mask, fill) # if we have a fill of inf, then sign it correctly # (GH 6178 and PR 9308) if np.isinf(fill): signs = np.sign(y if name.startswith(('r', '__r')) else x) negative_inf_mask = (signs.ravel() < 0) & mask np.putmask(result, negative_inf_mask, -fill) if "floordiv" in name: # (PR 9308) nan_mask = ((y == 0) & (x == 0)).ravel() np.putmask(result, nan_mask, np.nan) result = result.reshape(shape) return result
def count(values, uniques=None): if uniques is not None: raise NotImplementedError else: if com.is_float_dtype(values): return _count_generic(values, lib.Float64HashTable, com._ensure_float64) elif com.is_integer_dtype(values): return _count_generic(values, lib.Int64HashTable, com._ensure_int64) else: return _count_generic(values, lib.PyObjectHashTable, com._ensure_object)
def _get_data_algo(values, func_map): if com.is_float_dtype(values): f = func_map["float64"] values = com._ensure_float64(values) elif com.is_datetime64_dtype(values): f = func_map["int64"] values = values.view("i8") elif com.is_integer_dtype(values): f = func_map["int64"] values = com._ensure_int64(values) else: f = func_map["generic"] values = com._ensure_object(values) return f, values
def nanvar(values, axis=None, skipna=True, ddof=1): dtype = values.dtype mask = isnull(values) if is_any_int_dtype(values): values = values.astype('f8') values[mask] = np.nan if is_float_dtype(values): count, d = _get_counts_nanvar(mask, axis, ddof, values.dtype) else: count, d = _get_counts_nanvar(mask, axis, ddof) if skipna: values = values.copy() np.putmask(values, mask, 0) # xref GH10242 # Compute variance via two-pass algorithm, which is stable against # cancellation errors and relatively accurate for small numbers of # observations. # # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count if axis is not None: avg = np.expand_dims(avg, axis) sqr = _ensure_numeric((avg - values) ** 2) np.putmask(sqr, mask, 0) result = sqr.sum(axis=axis, dtype=np.float64) / d # Return variance as np.float64 (the datatype used in the accumulator), # unless we were dealing with a float array, in which case use the same # precision as the original values array. if is_float_dtype(dtype): result = result.astype(dtype) return _wrap_results(result, values.dtype)
def test_set_value(self): for item in self.panel.items: for mjr in self.panel.major_axis[::2]: for mnr in self.panel.minor_axis: self.panel.set_value(item, mjr, mnr, 1.0) assert_almost_equal(self.panel[item][mnr][mjr], 1.0) # resize res = self.panel.set_value("ItemE", "foo", "bar", 1.5) self.assert_(isinstance(res, Panel)) self.assert_(res is not self.panel) self.assertEqual(res.get_value("ItemE", "foo", "bar"), 1.5) res3 = self.panel.set_value("ItemE", "foobar", "baz", 5) self.assert_(com.is_float_dtype(res3["ItemE"].values))
def nanvar(values, axis=None, skipna=True, ddof=1): dtype = values.dtype mask = isnull(values) if is_any_int_dtype(values): values = values.astype('f8') values[mask] = np.nan if is_float_dtype(values): count, d = _get_counts_nanvar(mask, axis, ddof, values.dtype) else: count, d = _get_counts_nanvar(mask, axis, ddof) if skipna: values = values.copy() np.putmask(values, mask, 0) # xref GH10242 # Compute variance via two-pass algorithm, which is stable against # cancellation errors and relatively accurate for small numbers of # observations. # # See https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance avg = _ensure_numeric(values.sum(axis=axis, dtype=np.float64)) / count if axis is not None: avg = np.expand_dims(avg, axis) sqr = _ensure_numeric((avg - values)**2) np.putmask(sqr, mask, 0) result = sqr.sum(axis=axis, dtype=np.float64) / d # Return variance as np.float64 (the datatype used in the accumulator), # unless we were dealing with a float array, in which case use the same # precision as the original values array. if is_float_dtype(dtype): result = result.astype(dtype) return _wrap_results(result, values.dtype)
def astype(self, dtype, copy=True): dtype = pandas_dtype(dtype) if is_float_dtype(dtype): values = self._values.astype(dtype, copy=copy) elif is_integer_dtype(dtype): if self.hasnans: raise ValueError('cannot convert float NaN to integer') values = self._values.astype(dtype, copy=copy) elif is_object_dtype(dtype): values = self._values.astype('object', copy=copy) else: raise TypeError('Setting %s dtype to anything other than ' 'float64 or object is not supported' % self.__class__) return Index(values, name=self.name, dtype=dtype)
def test_set_value(self): for item in self.panel.items: for mjr in self.panel.major_axis[::2]: for mnr in self.panel.minor_axis: self.panel.set_value(item, mjr, mnr, 1.) assert_almost_equal(self.panel[item][mnr][mjr], 1.) # resize res = self.panel.set_value('ItemE', 'foo', 'bar', 1.5) self.assert_(isinstance(res, Panel)) self.assert_(res is not self.panel) self.assertEqual(res.get_value('ItemE', 'foo', 'bar'), 1.5) res3 = self.panel.set_value('ItemE', 'foobar', 'baz', 5) self.assert_(com.is_float_dtype(res3['ItemE'].values))
def _get_values(values, skipna, fill_value=None, fill_value_typ=None, isfinite=False, copy=True): """ utility to get the values view, mask, dtype if necessary copy and mask using the specified fill_value copy = True will force the copy """ values = _values_from_object(values) if isfinite: mask = _isfinite(values) else: mask = isnull(values) dtype = values.dtype dtype_ok = _na_ok_dtype(dtype) # get our fill value (in case we need to provide an alternative # dtype for it) fill_value = _get_fill_value(dtype, fill_value=fill_value, fill_value_typ=fill_value_typ) if skipna: if copy: values = values.copy() if dtype_ok: np.putmask(values, mask, fill_value) # promote if needed else: values, changed = _maybe_upcast_putmask(values, mask, fill_value) elif copy: values = values.copy() values = _view_if_needed(values) # return a platform independent precision dtype dtype_max = dtype if is_integer_dtype(dtype) or is_bool_dtype(dtype): dtype_max = np.int64 elif is_float_dtype(dtype): dtype_max = np.float64 return values, mask, dtype, dtype_max
def _get_data_algo(values, func_map): if com.is_float_dtype(values): f = func_map['float64'] values = com._ensure_float64(values) elif com.needs_i8_conversion(values): f = func_map['int64'] values = values.view('i8') elif com.is_integer_dtype(values): f = func_map['int64'] values = com._ensure_int64(values) else: f = func_map['generic'] values = com._ensure_object(values) return f, values
def _hashtable_algo(f, dtype, return_dtype=None): """ f(HashTable, type_caster) -> result """ if com.is_float_dtype(dtype): return f(htable.Float64HashTable, com._ensure_float64) elif com.is_integer_dtype(dtype): return f(htable.Int64HashTable, com._ensure_int64) elif com.is_datetime64_dtype(dtype): return_dtype = return_dtype or 'M8[ns]' return f(htable.Int64HashTable, com._ensure_int64).view(return_dtype) elif com.is_timedelta64_dtype(dtype): return_dtype = return_dtype or 'm8[ns]' return f(htable.Int64HashTable, com._ensure_int64).view(return_dtype) else: return f(htable.PyObjectHashTable, com._ensure_object)
def test_set_value(self): for label in self.panel4d.labels: for item in self.panel4d.items: for mjr in self.panel4d.major_axis[::2]: for mnr in self.panel4d.minor_axis: self.panel4d.set_value(label, item, mjr, mnr, 1.) assert_almost_equal( self.panel4d[label][item][mnr][mjr], 1.) # resize res = self.panel4d.set_value('l4', 'ItemE', 'foo', 'bar', 1.5) tm.assertIsInstance(res, Panel4D) self.assertIsNot(res, self.panel4d) self.assertEqual(res.get_value('l4', 'ItemE', 'foo', 'bar'), 1.5) res3 = self.panel4d.set_value('l4', 'ItemE', 'foobar', 'baz', 5) self.assertTrue(com.is_float_dtype(res3['l4'].values))