def assert_categorical_equal(left, right, *args, **kwargs): tm.assert_extension_array_equal(left, right, *args, **kwargs) assert pd.api.types.is_categorical_dtype( left.dtype ), f"{left} is not categorical dtype" assert pd.api.types.is_categorical_dtype( right.dtype ), f"{right} is not categorical dtype"
def assert_categorical_equal(left, right, *args, **kwargs): if PANDAS_GT_100: tm.assert_extension_array_equal(left, right, *args, **kwargs) assert pd.api.types.is_categorical_dtype( left.dtype), "{} is not categorical dtype".format(left) assert pd.api.types.is_categorical_dtype( right.dtype), "{} is not categorical dtype".format(right) else: return tm.assert_categorical_equal(left, right, *args, **kwargs)
def test_ior(fletcher_array): # Needed to support .replace() # ior is |= # Scalar version arr = fletcher_array([True, False, None]) arr |= True expected = fletcher_array([True, True, True]) pdt.assert_extension_array_equal(arr, expected) # Array version arr = fletcher_array([True, False, None, None]) arr |= fletcher_array([False, True, False, True]) expected = fletcher_array([True, True, None, True]) pdt.assert_extension_array_equal(arr, expected)
def test_tx_prior_week(self) -> None: # transmitted at 0.01 second before start of week tx_ = pd.DataFrame({ 'week': [1999], 'day': [6], 'time': [cn.nanos_in_day - 1 * 10**7] }) tx_gps = tm.gpsweek_to_gps(tx_.week, tx_.day, tx_.time).convert_dtypes() input_ = self.input input_.loc[:, 'ReceivedSvTimeNanos'] = tm.gpsweek_to_gps( 0, 0, tx_.day * cn.nanos_in_day + tx_.time) output = log.process_raw(input_) pt.assert_extension_array_equal(output.tx.array, tx_gps.array, check_exact=True)
def df_categories_equals(df1, df2): if not hasattr(df1, "select_dtypes"): if isinstance(df1, pandas.CategoricalDtype): return categories_equals(df1, df2) elif isinstance(getattr(df1, "dtype"), pandas.CategoricalDtype) and isinstance( getattr(df1, "dtype"), pandas.CategoricalDtype): return categories_equals(df1.dtype, df2.dtype) else: return True categories_columns = df1.select_dtypes(include="category").columns for column in categories_columns: assert_extension_array_equal( df1[column].values, df2[column].values, check_dtype=False, )
def df_categories_equals(df1, df2): if not hasattr(df1, "select_dtypes"): if isinstance(df1, pandas.CategoricalDtype): return categories_equals(df1, df2) elif isinstance(getattr(df1, "dtype"), pandas.CategoricalDtype) and isinstance( getattr(df1, "dtype"), pandas.CategoricalDtype): return categories_equals(df1.dtype, df2.dtype) else: return True df1_categorical = df1.select_dtypes(include="category") df2_categorical = df2.select_dtypes(include="category") assert df1_categorical.columns.equals(df2_categorical.columns) # Use an index instead of a column name to iterate through columns. There # may be duplicate colum names. e.g. if two columns are named col1, # selecting df1_categorical["col1"] gives a dataframe of width 2 instead of a series. for i in range(len(df1_categorical.columns)): assert_extension_array_equal( df1_categorical.iloc[:, i].values, df2_categorical.iloc[:, i].values, check_dtype=False, )
def test_clonotype_convergence(adata_clonotype): res = ir.tl.clonotype_convergence( adata_clonotype, key_coarse="clonotype_cluster", key_fine="clone_id", inplace=False, ) ir.tl.clonotype_convergence( adata_clonotype, key_coarse="clonotype_cluster", key_fine="clone_id", inplace=True, key_added="is_convergent_", ) pdt.assert_extension_array_equal( res, adata_clonotype.obs["is_convergent_"].values) pdt.assert_extension_array_equal( res, pd.Categorical( ["not convergent"] * 3 + ["nan"] * 2 + ["not convergent"] + ["convergent"] * 2 + ["not convergent"] * 2, categories=["convergent", "not convergent", "nan"], ), ) res = ir.tl.clonotype_convergence( adata_clonotype, key_fine="clonotype_cluster", key_coarse="clone_id", inplace=False, ) pdt.assert_extension_array_equal( res, pd.Categorical( ["not convergent"] * 3 + ["nan"] * 2 + ["not convergent"] * 5, categories=["convergent", "not convergent", "nan"], ), )
def categories_equals(left, right): assert (left.ordered and right.ordered) or (not left.ordered and not right.ordered) assert_extension_array_equal(left, right)
def test_tx(self) -> None: output = log.process_raw(self.input) pt.assert_extension_array_equal(output.tx.array, self.tx_gps.array, check_exact=True)
def test_rx(self) -> None: output = log.process_raw(self.input) expected = tm.gpsweek_to_gps(self.rx.week, self.rx.day, self.rx.time) pt.assert_extension_array_equal(output.rx.array, expected.array, check_exact=True)
def test_or(fletcher_array): # Scalar versions # non-null versions result = fletcher_array([True, False]) | pd.NA expected = fletcher_array([True, None]) pdt.assert_extension_array_equal(result, expected) result = fletcher_array([True, False, None]) | pd.NA expected = fletcher_array([True, None, None]) pdt.assert_extension_array_equal(result, expected) result = fletcher_array([True, False, None]) | True expected = fletcher_array([True, True, True]) pdt.assert_extension_array_equal(result, expected) result = fletcher_array([True, False, None]) | False expected = fletcher_array([True, False, None]) pdt.assert_extension_array_equal(result, expected) # Array version # Non-null version result = fletcher_array([True, False, False]) | fletcher_array([False, True, False]) expected = fletcher_array([True, True, False]) pdt.assert_extension_array_equal(result, expected) # One has nulls, the other not result = fletcher_array([True, False, None, None]) | fletcher_array( [False, True, False, True] ) expected = fletcher_array([True, True, None, True]) pdt.assert_extension_array_equal(result, expected) # Both have nulls result = fletcher_array([True, False, None, None]) | fletcher_array( [None, True, False, True] ) pdt.assert_extension_array_equal(result, expected) result = fletcher_array([True, False, None, None]) | np.array( [False, True, False, True] ) pdt.assert_extension_array_equal(result, expected)
def test_gps_gpsweek(self) -> None: ns = pd.Series([604800*2000 * 10**9 + 1 * 10 ** 7,pd.NA],dtype='Int64') ts = pd.DataFrame({'week':[2000,pd.NA],'day':[0,pd.NA],'time':[1 * 10 ** 7,pd.NA]},dtype='Int64') pt.assert_frame_equal(tm.gps_to_gpsweek(ns).astype('float64'),ts.astype('float64'),check_exact=True) pt.assert_extension_array_equal(tm.gpsweek_to_gps(ts.week,ts.day,ts.time).array,ns.array,check_exact=True)
def test_gps_doy(self) -> None: ns = pd.Series([1, 2, pd.NA], dtype='Int64') ts = pd.DataFrame({'date': ['1980006', '1980006',pd.NA], 'time': [1, 2,np.nan]}).convert_dtypes() pt.assert_frame_equal(tm.gps_to_doy(ns).astype('float64'),ts.astype('float64'),check_exact=True,check_dtype=False) pt.assert_extension_array_equal(tm.doy_to_gps(ts.date,ts.time).array,ns.array,check_exact=True)
def test_gps_utc(self) -> None: ns = pd.Series([(1167264018 * 10**9),pd.NA],dtype='Int64') ts = pd.Series([pd.Timestamp(year=2017, month=1, day=1, hour=0, minute=0, second=0, nanosecond=0), pd.NaT]) pt.assert_extension_array_equal(tm.gps_to_utc(ns).array,ts.array,check_exact=True) pt.assert_extension_array_equal(tm.utc_to_gps(ts).array,ns.array,check_exact=True)