def assert_series_equal( left, right, check_dtype=True, check_index_type="equiv", check_series_type=True, check_less_precise=no_default, check_names=True, check_exact=False, check_datetimelike_compat=False, check_categorical=True, check_category_order=True, check_freq=True, check_flags=True, rtol=1.0e-5, atol=1.0e-8, obj="Series", *, check_index=True, ): """ Check that left and right Series are equal. Parameters ---------- left : Series right : Series check_dtype : bool, default True Whether to check the Series dtype is identical. check_index_type : bool or {'equiv'}, default 'equiv' Whether to check the Index class, dtype and inferred_type are identical. check_series_type : bool, default True Whether to check the Series class is identical. check_less_precise : bool or int, default False Specify comparison precision. Only used when check_exact is False. 5 digits (False) or 3 digits (True) after decimal points are compared. If int, then specify the digits to compare. When comparing two numbers, if the first number has magnitude less than 1e-5, we compare the two numbers directly and check whether they are equivalent within the specified precision. Otherwise, we compare the **ratio** of the second number to the first number and check whether it is equivalent to 1 within the specified precision. .. deprecated:: 1.1.0 Use `rtol` and `atol` instead to define relative/absolute tolerance, respectively. Similar to :func:`math.isclose`. check_names : bool, default True Whether to check the Series and Index names attribute. check_exact : bool, default False Whether to compare number exactly. check_datetimelike_compat : bool, default False Compare datetime-like which is comparable ignoring dtype. check_categorical : bool, default True Whether to compare internal Categorical exactly. check_category_order : bool, default True Whether to compare category order of internal Categoricals. .. versionadded:: 1.0.2 check_freq : bool, default True Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex. .. versionadded:: 1.1.0 check_flags : bool, default True Whether to check the `flags` attribute. .. versionadded:: 1.2.0 rtol : float, default 1e-5 Relative tolerance. Only used when check_exact is False. .. versionadded:: 1.1.0 atol : float, default 1e-8 Absolute tolerance. Only used when check_exact is False. .. versionadded:: 1.1.0 obj : str, default 'Series' Specify object name being compared, internally used to show appropriate assertion message. check_index : bool, default True Whether to check index equivalence. If False, then compare only values. .. versionadded:: 1.3.0 Examples -------- >>> from pandas import testing as tm >>> a = pd.Series([1, 2, 3, 4]) >>> b = pd.Series([1, 2, 3, 4]) >>> tm.assert_series_equal(a, b) """ __tracebackhide__ = True if check_less_precise is not no_default: warnings.warn( "The 'check_less_precise' keyword in testing.assert_*_equal " "is deprecated and will be removed in a future version. " "You can stop passing 'check_less_precise' to silence this warning.", FutureWarning, stacklevel=find_stack_level(), ) rtol = atol = _get_tol_from_less_precise(check_less_precise) # instance validation _check_isinstance(left, right, Series) if check_series_type: assert_class_equal(left, right, obj=obj) # length comparison if len(left) != len(right): msg1 = f"{len(left)}, {left.index}" msg2 = f"{len(right)}, {right.index}" raise_assert_detail(obj, "Series length are different", msg1, msg2) if check_flags: assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}" if check_index: # GH #38183 assert_index_equal( left.index, right.index, exact=check_index_type, check_names=check_names, check_exact=check_exact, check_categorical=check_categorical, rtol=rtol, atol=atol, obj=f"{obj}.index", ) if check_freq and isinstance(left.index, (DatetimeIndex, TimedeltaIndex)): lidx = left.index ridx = right.index assert lidx.freq == ridx.freq, (lidx.freq, ridx.freq) if check_dtype: # We want to skip exact dtype checking when `check_categorical` # is False. We'll still raise if only one is a `Categorical`, # regardless of `check_categorical` if (isinstance(left.dtype, CategoricalDtype) and isinstance(right.dtype, CategoricalDtype) and not check_categorical): pass else: assert_attr_equal("dtype", left, right, obj=f"Attributes of {obj}") if check_exact and is_numeric_dtype(left.dtype) and is_numeric_dtype( right.dtype): left_values = left._values right_values = right._values # Only check exact if dtype is numeric if isinstance(left_values, ExtensionArray) and isinstance( right_values, ExtensionArray): assert_extension_array_equal( left_values, right_values, check_dtype=check_dtype, index_values=np.asarray(left.index), ) else: assert_numpy_array_equal( left_values, right_values, check_dtype=check_dtype, obj=str(obj), index_values=np.asarray(left.index), ) elif check_datetimelike_compat and (needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype)): # we want to check only if we have compat dtypes # e.g. integer and M|m are NOT compat, but we can simply check # the values in that case # datetimelike may have different objects (e.g. datetime.datetime # vs Timestamp) but will compare equal if not Index(left._values).equals(Index(right._values)): msg = (f"[datetimelike_compat=True] {left._values} " f"is not equal to {right._values}.") raise AssertionError(msg) elif is_interval_dtype(left.dtype) and is_interval_dtype(right.dtype): assert_interval_array_equal(left.array, right.array) elif isinstance(left.dtype, CategoricalDtype) or isinstance( right.dtype, CategoricalDtype): _testing.assert_almost_equal( left._values, right._values, rtol=rtol, atol=atol, check_dtype=check_dtype, obj=str(obj), index_values=np.asarray(left.index), ) elif is_extension_array_dtype(left.dtype) and is_extension_array_dtype( right.dtype): assert_extension_array_equal( left._values, right._values, rtol=rtol, atol=atol, check_dtype=check_dtype, index_values=np.asarray(left.index), ) elif is_extension_array_dtype_and_needs_i8_conversion( left.dtype, right.dtype) or is_extension_array_dtype_and_needs_i8_conversion( right.dtype, left.dtype): assert_extension_array_equal( left._values, right._values, check_dtype=check_dtype, index_values=np.asarray(left.index), ) elif needs_i8_conversion(left.dtype) and needs_i8_conversion(right.dtype): # DatetimeArray or TimedeltaArray assert_extension_array_equal( left._values, right._values, check_dtype=check_dtype, index_values=np.asarray(left.index), ) else: _testing.assert_almost_equal( left._values, right._values, rtol=rtol, atol=atol, check_dtype=check_dtype, obj=str(obj), index_values=np.asarray(left.index), ) # metadata comparison if check_names: assert_attr_equal("name", left, right, obj=obj) if check_categorical: if isinstance(left.dtype, CategoricalDtype) or isinstance( right.dtype, CategoricalDtype): assert_categorical_equal( left._values, right._values, obj=f"{obj} category", check_category_order=check_category_order, )
def assert_almost_equal( left, right, check_dtype: bool | str = "equiv", check_less_precise: bool | int | NoDefault = no_default, rtol: float = 1.0e-5, atol: float = 1.0e-8, **kwargs, ): """ Check that the left and right objects are approximately equal. By approximately equal, we refer to objects that are numbers or that contain numbers which may be equivalent to specific levels of precision. Parameters ---------- left : object right : object check_dtype : bool or {'equiv'}, default 'equiv' Check dtype if both a and b are the same type. If 'equiv' is passed in, then `RangeIndex` and `Int64Index` are also considered equivalent when doing type checking. check_less_precise : bool or int, default False Specify comparison precision. 5 digits (False) or 3 digits (True) after decimal points are compared. If int, then specify the number of digits to compare. When comparing two numbers, if the first number has magnitude less than 1e-5, we compare the two numbers directly and check whether they are equivalent within the specified precision. Otherwise, we compare the **ratio** of the second number to the first number and check whether it is equivalent to 1 within the specified precision. .. deprecated:: 1.1.0 Use `rtol` and `atol` instead to define relative/absolute tolerance, respectively. Similar to :func:`math.isclose`. rtol : float, default 1e-5 Relative tolerance. .. versionadded:: 1.1.0 atol : float, default 1e-8 Absolute tolerance. .. versionadded:: 1.1.0 """ if check_less_precise is not no_default: warnings.warn( "The 'check_less_precise' keyword in testing.assert_*_equal " "is deprecated and will be removed in a future version. " "You can stop passing 'check_less_precise' to silence this warning.", FutureWarning, stacklevel=find_stack_level(), ) # https://github.com/python/mypy/issues/7642 # error: Argument 1 to "_get_tol_from_less_precise" has incompatible # type "Union[bool, int, NoDefault]"; expected "Union[bool, int]" rtol = atol = _get_tol_from_less_precise( check_less_precise # type: ignore[arg-type] ) if isinstance(left, Index): assert_index_equal( left, right, check_exact=False, exact=check_dtype, rtol=rtol, atol=atol, **kwargs, ) elif isinstance(left, Series): assert_series_equal( left, right, check_exact=False, check_dtype=check_dtype, rtol=rtol, atol=atol, **kwargs, ) elif isinstance(left, DataFrame): assert_frame_equal( left, right, check_exact=False, check_dtype=check_dtype, rtol=rtol, atol=atol, **kwargs, ) else: # Other sequences. if check_dtype: if is_number(left) and is_number(right): # Do not compare numeric classes, like np.float64 and float. pass elif is_bool(left) and is_bool(right): # Do not compare bool classes, like np.bool_ and bool. pass else: if isinstance(left, np.ndarray) or isinstance( right, np.ndarray): obj = "numpy array" else: obj = "Input" assert_class_equal(left, right, obj=obj) # if we have "equiv", this becomes True check_dtype = bool(check_dtype) _testing.assert_almost_equal(left, right, check_dtype=check_dtype, rtol=rtol, atol=atol, **kwargs)
def assert_extension_array_equal( left, right, check_dtype=True, index_values=None, check_less_precise=no_default, check_exact=False, rtol: float = 1.0e-5, atol: float = 1.0e-8, ): """ Check that left and right ExtensionArrays are equal. Parameters ---------- left, right : ExtensionArray The two arrays to compare. check_dtype : bool, default True Whether to check if the ExtensionArray dtypes are identical. index_values : numpy.ndarray, default None Optional index (shared by both left and right), used in output. check_less_precise : bool or int, default False Specify comparison precision. Only used when check_exact is False. 5 digits (False) or 3 digits (True) after decimal points are compared. If int, then specify the digits to compare. .. deprecated:: 1.1.0 Use `rtol` and `atol` instead to define relative/absolute tolerance, respectively. Similar to :func:`math.isclose`. check_exact : bool, default False Whether to compare number exactly. rtol : float, default 1e-5 Relative tolerance. Only used when check_exact is False. .. versionadded:: 1.1.0 atol : float, default 1e-8 Absolute tolerance. Only used when check_exact is False. .. versionadded:: 1.1.0 Notes ----- Missing values are checked separately from valid values. A mask of missing values is computed for each and checked to match. The remaining all-valid values are cast to object dtype and checked. Examples -------- >>> from pandas import testing as tm >>> a = pd.Series([1, 2, 3, 4]) >>> b, c = a.array, a.array >>> tm.assert_extension_array_equal(b, c) """ if check_less_precise is not no_default: warnings.warn( "The 'check_less_precise' keyword in testing.assert_*_equal " "is deprecated and will be removed in a future version. " "You can stop passing 'check_less_precise' to silence this warning.", FutureWarning, stacklevel=find_stack_level(), ) rtol = atol = _get_tol_from_less_precise(check_less_precise) assert isinstance(left, ExtensionArray), "left is not an ExtensionArray" assert isinstance(right, ExtensionArray), "right is not an ExtensionArray" if check_dtype: assert_attr_equal("dtype", left, right, obj="ExtensionArray") if (isinstance(left, DatetimeLikeArrayMixin) and isinstance(right, DatetimeLikeArrayMixin) and type(right) == type(left)): # Avoid slow object-dtype comparisons # np.asarray for case where we have a np.MaskedArray assert_numpy_array_equal(np.asarray(left.asi8), np.asarray(right.asi8), index_values=index_values) return left_na = np.asarray(left.isna()) right_na = np.asarray(right.isna()) assert_numpy_array_equal(left_na, right_na, obj="ExtensionArray NA mask", index_values=index_values) left_valid = np.asarray(left[~left_na].astype(object)) right_valid = np.asarray(right[~right_na].astype(object)) if check_exact: assert_numpy_array_equal(left_valid, right_valid, obj="ExtensionArray", index_values=index_values) else: _testing.assert_almost_equal( left_valid, right_valid, check_dtype=check_dtype, rtol=rtol, atol=atol, obj="ExtensionArray", index_values=index_values, )
def assert_index_equal( left: Index, right: Index, exact: bool | str = "equiv", check_names: bool = True, check_less_precise: bool | int | NoDefault = no_default, check_exact: bool = True, check_categorical: bool = True, check_order: bool = True, rtol: float = 1.0e-5, atol: float = 1.0e-8, obj: str = "Index", ) -> None: """ Check that left and right Index are equal. Parameters ---------- left : Index right : Index exact : bool or {'equiv'}, default 'equiv' Whether to check the Index class, dtype and inferred_type are identical. If 'equiv', then RangeIndex can be substituted for Int64Index as well. check_names : bool, default True Whether to check the names attribute. check_less_precise : bool or int, default False Specify comparison precision. Only used when check_exact is False. 5 digits (False) or 3 digits (True) after decimal points are compared. If int, then specify the digits to compare. .. deprecated:: 1.1.0 Use `rtol` and `atol` instead to define relative/absolute tolerance, respectively. Similar to :func:`math.isclose`. check_exact : bool, default True Whether to compare number exactly. check_categorical : bool, default True Whether to compare internal Categorical exactly. check_order : bool, default True Whether to compare the order of index entries as well as their values. If True, both indexes must contain the same elements, in the same order. If False, both indexes must contain the same elements, but in any order. .. versionadded:: 1.2.0 rtol : float, default 1e-5 Relative tolerance. Only used when check_exact is False. .. versionadded:: 1.1.0 atol : float, default 1e-8 Absolute tolerance. Only used when check_exact is False. .. versionadded:: 1.1.0 obj : str, default 'Index' Specify object name being compared, internally used to show appropriate assertion message. Examples -------- >>> from pandas import testing as tm >>> a = pd.Index([1, 2, 3]) >>> b = pd.Index([1, 2, 3]) >>> tm.assert_index_equal(a, b) """ __tracebackhide__ = True def _check_types(left, right, obj="Index") -> None: if not exact: return assert_class_equal(left, right, exact=exact, obj=obj) assert_attr_equal("inferred_type", left, right, obj=obj) # Skip exact dtype checking when `check_categorical` is False if is_categorical_dtype(left.dtype) and is_categorical_dtype( right.dtype): if check_categorical: assert_attr_equal("dtype", left, right, obj=obj) assert_index_equal(left.categories, right.categories, exact=exact) return assert_attr_equal("dtype", left, right, obj=obj) def _get_ilevel_values(index, level): # accept level number only unique = index.levels[level] level_codes = index.codes[level] filled = take_nd(unique._values, level_codes, fill_value=unique._na_value) return unique._shallow_copy(filled, name=index.names[level]) if check_less_precise is not no_default: warnings.warn( "The 'check_less_precise' keyword in testing.assert_*_equal " "is deprecated and will be removed in a future version. " "You can stop passing 'check_less_precise' to silence this warning.", FutureWarning, stacklevel=find_stack_level(), ) # https://github.com/python/mypy/issues/7642 # error: Argument 1 to "_get_tol_from_less_precise" has incompatible # type "Union[bool, int, NoDefault]"; expected "Union[bool, int]" rtol = atol = _get_tol_from_less_precise( check_less_precise # type: ignore[arg-type] ) # instance validation _check_isinstance(left, right, Index) # class / dtype comparison _check_types(left, right, obj=obj) # level comparison if left.nlevels != right.nlevels: msg1 = f"{obj} levels are different" msg2 = f"{left.nlevels}, {left}" msg3 = f"{right.nlevels}, {right}" raise_assert_detail(obj, msg1, msg2, msg3) # length comparison if len(left) != len(right): msg1 = f"{obj} length are different" msg2 = f"{len(left)}, {left}" msg3 = f"{len(right)}, {right}" raise_assert_detail(obj, msg1, msg2, msg3) # If order doesn't matter then sort the index entries if not check_order: left = Index(safe_sort(left)) right = Index(safe_sort(right)) # MultiIndex special comparison for little-friendly error messages if left.nlevels > 1: left = cast(MultiIndex, left) right = cast(MultiIndex, right) for level in range(left.nlevels): # cannot use get_level_values here because it can change dtype llevel = _get_ilevel_values(left, level) rlevel = _get_ilevel_values(right, level) lobj = f"MultiIndex level [{level}]" assert_index_equal( llevel, rlevel, exact=exact, check_names=check_names, check_exact=check_exact, rtol=rtol, atol=atol, obj=lobj, ) # get_level_values may change dtype _check_types(left.levels[level], right.levels[level], obj=obj) # skip exact index checking when `check_categorical` is False if check_exact and check_categorical: if not left.equals(right): mismatch = left._values != right._values diff = np.sum(mismatch.astype(int)) * 100.0 / len(left) msg = f"{obj} values are different ({np.round(diff, 5)} %)" raise_assert_detail(obj, msg, left, right) else: # if we have "equiv", this becomes True exact_bool = bool(exact) _testing.assert_almost_equal( left.values, right.values, rtol=rtol, atol=atol, check_dtype=exact_bool, obj=obj, lobj=left, robj=right, ) # metadata comparison if check_names: assert_attr_equal("names", left, right, obj=obj) if isinstance(left, PeriodIndex) or isinstance(right, PeriodIndex): assert_attr_equal("freq", left, right, obj=obj) if isinstance(left, IntervalIndex) or isinstance(right, IntervalIndex): assert_interval_array_equal(left._values, right._values) if check_categorical: if is_categorical_dtype(left.dtype) or is_categorical_dtype( right.dtype): assert_categorical_equal(left._values, right._values, obj=f"{obj} category")