def test_json(self): ss = self.station.statistics ss.calc_temperature_stats() ss.calc_precipitation_stats() ss.calc_humidity_stats() ss.calc_radiation_stats() ss.calc_wind_stats() with tempfile.NamedTemporaryFile() as tmp: ss.to_json(tmp.name) tmp.seek(0) ss2 = melodist.StationStatistics.from_json(tmp.name) assert_series_equal(ss.temp.max_delta, ss2.temp.max_delta) assert_frame_equal(ss.temp.mean_course, ss2.temp.mean_course) assert_equal(ss.precip.months, ss2.precip.months) assert all([cs1 == cs2 for cs1, cs2 in zip(ss.precip.stats, ss2.precip.stats)]) assert ss.hum.a0 == ss2.hum.a0 assert ss.hum.a1 == ss2.hum.a1 assert ss.hum.kr == ss2.hum.kr assert_series_equal(ss.hum.month_hour_precip_mean, ss2.hum.month_hour_precip_mean) assert_frame_equal(ss.glob.angstroem, ss2.glob.angstroem) assert_frame_equal(ss.glob.bristcamp, ss2.glob.bristcamp) assert_frame_equal(ss.glob.mean_course, ss2.glob.mean_course) assert ss.wind.a == ss2.wind.a assert ss.wind.b == ss2.wind.b assert ss.wind.t_shift == ss2.wind.t_shift
def test_make_forecasting_frame_list(self): df, y = dataframe_functions.make_forecasting_frame(x=range(4), kind="test", max_timeshift=1, rolling_direction=1) expected_df = pd.DataFrame({"id": [1, 2, 3], "kind": ["test"]*3, "value": [0., 1., 2.], "time": [0., 1., 2.]}) expected_y = pd.Series(data=[1, 2, 3], index=[1, 2, 3], name="value") assert_frame_equal(df.sort_index(axis=1), expected_df.sort_index(axis=1)) assert_series_equal(y, expected_y)
def test_establish_variables_from_mix(self): p = lm._LinearPlotter() p.establish_variables(self.df, x="x", y=self.df.y) pdt.assert_series_equal(p.x, self.df.x) pdt.assert_series_equal(p.y, self.df.y) pdt.assert_frame_equal(p.data, self.df)
def test_establish_variables_from_series(self): p = lm._LinearPlotter() p.establish_variables(None, x=self.df.x, y=self.df.y) pdt.assert_series_equal(p.x, self.df.x) pdt.assert_series_equal(p.y, self.df.y) nt.assert_is(p.data, None)
def test_remove_na(): a_array = np.array([1, 2, np.nan, 3]) a_array_rm = utils.remove_na(a_array) npt.assert_array_equal(a_array_rm, np.array([1, 2, 3])) a_series = pd.Series([1, 2, np.nan, 3]) a_series_rm = utils.remove_na(a_series) pdt.assert_series_equal(a_series_rm, pd.Series([1., 2, 3], [0, 1, 3]))
def test_make_forecasting_frame_pdSeries(self): t_index = pd.date_range('1/1/2011', periods=4, freq='H') df, y = dataframe_functions.make_forecasting_frame(x=pd.Series(data=range(4), index=t_index), kind="test", max_timeshift=1, rolling_direction=1) expected_y = pd.Series(data=[1, 2, 3], index=pd.DatetimeIndex(["2011-01-01 01:00:00", "2011-01-01 02:00:00", "2011-01-01 03:00:00"]), name="value") expected_df = pd.DataFrame({"id": pd.DatetimeIndex(["2011-01-01 01:00:00", "2011-01-01 02:00:00", "2011-01-01 03:00:00"]), "kind": ["test"]*3, "value": [0., 1., 2.], "time": pd.DatetimeIndex(["2011-01-01 00:00:00", "2011-01-01 01:00:00", "2011-01-01 02:00:00"]) }) assert_frame_equal(df.sort_index(axis=1), expected_df.sort_index(axis=1)) assert_series_equal(y, expected_y)
def test_series_roundtrip(self): if self.should_skip: return self.skip('pandas is not importable') ser = pd.Series({ 'an_int': np.int_(1), 'a_float': np.float_(2.5), 'a_nan': np.nan, 'a_minus_inf': -np.inf, 'an_inf': np.inf, 'a_str': np.str_('foo'), 'a_unicode': np.unicode_('bar'), 'date': np.datetime64('2014-01-01'), 'complex': np.complex_(1 - 2j), # TODO: the following dtypes are not currently supported. # 'object': np.object_({'a': 'b'}), }) decoded_ser = self.roundtrip(ser) assert_series_equal(decoded_ser, ser)
def test_dropna(self): p = lm._LinearPlotter() p.establish_variables(self.df, x="x", y_na="y_na") pdt.assert_series_equal(p.x, self.df.x) pdt.assert_series_equal(p.y_na, self.df.y_na) p.dropna("x", "y_na") mask = self.df.y_na.notnull() pdt.assert_series_equal(p.x, self.df.x[mask]) pdt.assert_series_equal(p.y_na, self.df.y_na[mask])
def test_variables_from_frame(self): p = lm._RegressionPlotter("x", "y", data=self.df, units="s") pdt.assert_series_equal(p.x, self.df.x) pdt.assert_series_equal(p.y, self.df.y) pdt.assert_series_equal(p.units, self.df.s) pdt.assert_frame_equal(p.data, self.df)
def test_covers(self): res = self.g7.covers(self.g8) exp = Series([True, False]) assert_series_equal(res, exp)
def test_covers_itself(self): # Each polygon in a Series covers itself res = self.g1.covers(self.g1) exp = Series([True, True]) assert_series_equal(res, exp)
def test_orientations(self): _call = self.test_obj.orientations expected = [ [0.738219, 0.0, 0.674560], [0.0, 1.0, 0.0], [-0.674560, 0.0, 0.738219], ] npt.assert_almost_equal(_call(0), expected, decimal=6) npt.assert_almost_equal(_call(CircuitNodeId("default", 0)), expected, decimal=6) pdt.assert_series_equal( _call([2, 0, 1]), pd.Series( [ np.array([ [0.462986, 0.0, 0.886365], [0.0, 1.0, 0.0], [-0.886365, 0.0, 0.462986], ]), np.array([ [0.738219, 0.0, 0.674560], [0.0, 1.0, 0.0], [-0.674560, 0.0, 0.738219], ]), np.array([ [-0.86768965, -0.44169042, 0.22808825], [0.48942842, -0.8393853, 0.23641518], [0.0870316, 0.31676788, 0.94450178], ]), ], index=[2, 0, 1], name="orientation", ), ) # NodeCircuitIds pdt.assert_series_equal( _call( CircuitNodeIds.from_arrays(["default", "default", "default"], [2, 0, 1], sort_index=False)), _call([2, 0, 1]), ) # NodePopulation without rotation_angle[x|z] _call_no_xz = create_node_population( str(TEST_DATA_DIR / "nodes_no_xz_rotation.h5"), "default").orientations # 0 and 2 node_ids have x|z rotation angles equal to zero npt.assert_almost_equal(_call_no_xz(0), _call(0)) npt.assert_almost_equal(_call_no_xz(2), _call(2)) npt.assert_almost_equal( _call_no_xz(1), [[0.97364046, -0.0, 0.22808825], [0.0, 1.0, -0.0], [-0.22808825, 0.0, 0.97364046]], decimal=6, ) # NodePopulation without rotation_angle _call_no_rot = create_node_population( str(TEST_DATA_DIR / "nodes_no_rotation.h5"), "default").orientations pdt.assert_series_equal( _call_no_rot([2, 0, 1]), pd.Series([np.eye(3), np.eye(3), np.eye(3)], index=[2, 0, 1], name="orientation"), ) # NodePopulation with quaternions _call_quat = create_node_population( str(TEST_DATA_DIR / "nodes_quaternions.h5"), "default").orientations npt.assert_almost_equal( _call_quat(0), [ [1, 0.0, 0.0], [0.0, 0, -1.0], [0.0, 1.0, 0], ], decimal=6, ) series = _call_quat([2, 0, 1]) for i in range(len(series)): series.iloc[i] = np.around(series.iloc[i], decimals=1).astype(np.float64) pdt.assert_series_equal( series, pd.Series( [ np.array([ [0.0, -1.0, 0.0], [1.0, 0.0, 0.0], [0.0, 0.0, 1.0], ]), np.array([ [1.0, 0.0, 0.0], [0.0, 0.0, -1.0], [0.0, 1.0, 0.0], ]), np.array([ [0.0, 0.0, 1.0], [0.0, 1.0, 0.0], [-1.0, 0.0, 0.0], ]), ], index=[2, 0, 1], name="orientation", ), ) _call_missing_quat = create_node_population( str(TEST_DATA_DIR / "nodes_quaternions_w_missing.h5"), "default").orientations with pytest.raises(BluepySnapError): _call_missing_quat(0)
def test_cagr(self): values = pd.Series({ 'portfolio': 0.1303543, 'RUB.INFL': 0.05548082428015655 }) assert_series_equal(self.portfolio.cagr, values, rtol=1e-4)
def test_tables(df): wrapped_df = orca.add_table('test_frame', df) @orca.table() def test_func(test_frame): return test_frame.to_frame() / 2 assert set(orca.list_tables()) == {'test_frame', 'test_func'} table = orca.get_table('test_frame') assert table is wrapped_df assert table.columns == ['a', 'b'] assert table.local_columns == ['a', 'b'] assert len(table) == 3 pdt.assert_index_equal(table.index, df.index) pdt.assert_series_equal(table.get_column('a'), df.a) pdt.assert_series_equal(table.a, df.a) pdt.assert_series_equal(table['b'], df['b']) table = orca._TABLES['test_func'] assert table.index is None assert table.columns == [] assert len(table) is 0 pdt.assert_frame_equal(table.to_frame(), df / 2) pdt.assert_frame_equal(table.to_frame([]), df[[]]) pdt.assert_frame_equal(table.to_frame(columns=['a']), df[['a']] / 2) pdt.assert_frame_equal(table.to_frame(columns='a'), df[['a']] / 2) pdt.assert_index_equal(table.index, df.index) pdt.assert_series_equal(table.get_column('a'), df.a / 2) pdt.assert_series_equal(table.a, df.a / 2) pdt.assert_series_equal(table['b'], df['b'] / 2) assert len(table) == 3 assert table.columns == ['a', 'b']
def test_categorize_column_into_5_categories(self): # bins: [ -0.1, 20. , 40. , 60. , 80. , 100. ] series = pd.Series([0, 30, 50, 80, 100]) assert_series_equal(pd.Series([1, 2, 3, 4, 5]), categorize_column(series, num_bins=5))
def assert_series_equal(cls, left: pd.Series, right: pd.Series, *args: Any, **kwargs: Any) -> None: kwargs.setdefault('check_dtype', cls.check_dtype) kwargs.setdefault('check_names', cls.check_names) tm.assert_series_equal(left, right, *args, **kwargs)
def test_fresnel(self): a = geo.fresnel(self.map_box, self.rays_box) five = geo.fresnel_integral([5 * (2 / (0.1903 * 10))**0.5])[0] b = pd.Series(data=[five, 0], index=[1, 2]) pdt.assert_series_equal(a, b)
def check_func(self, func): ks1 = koalas.from_pandas(self.pds1) mt.assert_series_equal(func(ks1).toPandas(), func(self.pds1), check_names=False)
def test_is_los(self): pdt.assert_series_equal(geo.is_los(self.map_box, self.rays_box), pd.Series([True, False], index=[1, 2])) pdt.assert_series_equal(geo.is_los(self.map_canyon, self.rays_canyon), pd.Series([False, False], index=[1, 2]))
def check_func(self, func): mt.assert_series_equal( func(self.ks_start_date).to_pandas(), func(self.pd_start_date), check_names=False )
def df_equals(df1, df2): """Tests if df1 and df2 are equal. Args: df1: (pandas or modin DataFrame or series) dataframe to test if equal. df2: (pandas or modin DataFrame or series) dataframe to test if equal. Returns: True if df1 is equal to df2. """ # Gets AttributError if modin's groupby object is not import like this from modin.pandas.groupby import DataFrameGroupBy groupby_types = (pandas.core.groupby.DataFrameGroupBy, DataFrameGroupBy) # The typing behavior of how pandas treats its index is not consistent when the # length of the DataFrame or Series is 0, so we just verify that the contents are # the same. if (hasattr(df1, "index") and hasattr(df2, "index") and len(df1) == 0 and len(df2) == 0): if type(df1).__name__ == type(df2).__name__: if hasattr(df1, "name") and hasattr( df2, "name") and df1.name == df2.name: return if (hasattr(df1, "columns") and hasattr(df2, "columns") and df1.columns.equals(df2.columns)): return assert False if isinstance(df1, (list, tuple)) and all( isinstance(d, (pd.DataFrame, pd.Series, pandas.DataFrame, pandas.Series)) for d in df1): assert isinstance(df2, type(df1)), "Different type of collection" assert len(df1) == len(df2), "Different length result" return (df_equals(d1, d2) for d1, d2 in zip(df1, df2)) # Convert to pandas if isinstance(df1, (pd.DataFrame, pd.Series)): df1 = to_pandas(df1) if isinstance(df2, (pd.DataFrame, pd.Series)): df2 = to_pandas(df2) if isinstance(df1, pandas.DataFrame) and isinstance(df2, pandas.DataFrame): if (df1.empty and not df2.empty) or (df2.empty and not df1.empty): assert False, "One of the passed frames is empty, when other isn't" elif df1.empty and df2.empty and type(df1) != type(df2): assert ( False ), f"Empty frames have different types: {type(df1)} != {type(df2)}" if isinstance(df1, pandas.DataFrame) and isinstance(df2, pandas.DataFrame): assert_frame_equal( df1, df2, check_dtype=False, check_datetimelike_compat=True, check_index_type=False, check_column_type=False, check_categorical=False, ) df_categories_equals(df1, df2) elif isinstance(df1, pandas.Index) and isinstance(df2, pandas.Index): assert_index_equal(df1, df2) elif isinstance(df1, pandas.Series) and isinstance(df2, pandas.Series): assert_series_equal(df1, df2, check_dtype=False, check_series_type=False) elif isinstance(df1, groupby_types) and isinstance(df2, groupby_types): for g1, g2 in zip(df1, df2): assert g1[0] == g2[0] df_equals(g1[1], g2[1]) elif (isinstance(df1, pandas.Series) and isinstance(df2, pandas.Series) and df1.empty and df2.empty): assert all(df1.index == df2.index) assert df1.dtypes == df2.dtypes elif isinstance(df1, pandas.core.arrays.numpy_.PandasArray): assert isinstance(df2, pandas.core.arrays.numpy_.PandasArray) assert df1 == df2 elif isinstance(df1, np.recarray) and isinstance(df2, np.recarray): np.testing.assert_array_equal(df1, df2) else: if df1 != df2: np.testing.assert_almost_equal(df1, df2)
def test_categorize_column_into_2_categories(self): series = pd.Series([5, 20, 10, 25]) # bins: [ 4.98 15. 25. ] assert_series_equal(pd.Series([1, 2, 1, 2]), categorize_column(series, num_bins=2))
def test_column_cache(df): orca.add_injectable('x', 2) series = pd.Series([1, 2, 3], index=['x', 'y', 'z']) key = ('table', 'col') @orca.table() def table(): return df @orca.column(*key, cache=True) def column(variable='x'): return series * variable def c(): return orca._COLUMNS[key] pdt.assert_series_equal(c()(), series * 2) orca.add_injectable('x', 3) pdt.assert_series_equal(c()(), series * 2) c().clear_cached() pdt.assert_series_equal(c()(), series * 3) orca.add_injectable('x', 4) pdt.assert_series_equal(c()(), series * 3) orca.clear_cache() pdt.assert_series_equal(c()(), series * 4) orca.add_injectable('x', 5) pdt.assert_series_equal(c()(), series * 4) orca.get_table('table').clear_cached() pdt.assert_series_equal(c()(), series * 5) orca.add_injectable('x', 6) pdt.assert_series_equal(c()(), series * 5) orca.add_column(*key, column=column, cache=True) pdt.assert_series_equal(c()(), series * 6)
def assertPandasEqual(self, left, right, check_exact=True): if isinstance(left, pd.DataFrame) and isinstance(right, pd.DataFrame): try: if LooseVersion(pd.__version__) >= LooseVersion("1.1"): kwargs = dict(check_freq=False) else: kwargs = dict() if LooseVersion(pd.__version__) < LooseVersion("1.1.1"): # Due to https://github.com/pandas-dev/pandas/issues/35446 check_exact = (check_exact and all([ is_numeric_dtype(dtype) for dtype in left.dtypes ]) and all( [is_numeric_dtype(dtype) for dtype in right.dtypes])) assert_frame_equal( left, right, check_index_type=("equiv" if len(left.index) > 0 else False), check_column_type=("equiv" if len(left.columns) > 0 else False), check_exact=check_exact, **kwargs, ) except AssertionError as e: msg = (str(e) + "\n\nLeft:\n%s\n%s" % (left, left.dtypes) + "\n\nRight:\n%s\n%s" % (right, right.dtypes)) raise AssertionError(msg) from e elif isinstance(left, pd.Series) and isinstance(right, pd.Series): try: if LooseVersion(pd.__version__) >= LooseVersion("1.1"): kwargs = dict(check_freq=False) else: kwargs = dict() if LooseVersion(pd.__version__) < LooseVersion("1.1.1"): # Due to https://github.com/pandas-dev/pandas/issues/35446 check_exact = (check_exact and is_numeric_dtype(left.dtype) and is_numeric_dtype(right.dtype)) assert_series_equal( left, right, check_index_type=("equiv" if len(left.index) > 0 else False), check_exact=check_exact, **kwargs, ) except AssertionError as e: msg = (str(e) + "\n\nLeft:\n%s\n%s" % (left, left.dtype) + "\n\nRight:\n%s\n%s" % (right, right.dtype)) raise AssertionError(msg) from e elif isinstance(left, pd.Index) and isinstance(right, pd.Index): try: if LooseVersion(pd.__version__) < LooseVersion("1.1.1"): # Due to https://github.com/pandas-dev/pandas/issues/35446 check_exact = (check_exact and is_numeric_dtype(left.dtype) and is_numeric_dtype(right.dtype)) assert_index_equal(left, right, check_exact=check_exact) except AssertionError as e: msg = (str(e) + "\n\nLeft:\n%s\n%s" % (left, left.dtype) + "\n\nRight:\n%s\n%s" % (right, right.dtype)) raise AssertionError(msg) from e else: raise ValueError("Unexpected values: (%s, %s)" % (left, right))
def test_frame_int(): df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]}, index=["row1", "row2"]) s1 = df.iloc[1] assert_series_equal(s1, pd.Series([2, 4], name="row2", index=['col1', 'col2']))
def test_step(test_table, test_column='test_table2.b'): tt = test_table.to_frame() test_table['a'] = tt['a'] + tt['b'] pdt.assert_series_equal(test_column, df2['b'])
def test_set(): s1 = pd.Series([1, 2, 3]) s1.iloc[:2] = 0 assert_series_equal(s1, pd.Series([0, 0, 3]))
def test_npv_tax_depr(df, r, pi, land_expensing, expected_df): test_df = cf.npv_tax_depr(df, r, pi, land_expensing) print('Types = ', type(test_df), type(expected_df)) assert_series_equal(test_df, expected_df)
def test_RangeScal_ranges(): "Testing invariance for features with null range." ranges = sca.RangeScal(MetAna_O2_I_RS) # assert str(ranges.loc['205/2790.89']) == str(MetAna_O2_I_RS.loc['205/2790.89']) assert_series_equal(ranges.loc['205/2790.89'], MetAna_O2_I_RS.loc['205/2790.89'])
def test_one_unique(self): from dataworkspaces.kits.jupyter import _metric_col_to_colormap from pandas.testing import assert_series_equal bins = _metric_col_to_colormap(pandas.Series([1.2, numpy.nan, 1.2])) assert_series_equal(pandas.Series([3, -1, 3]), bins)
def test_ror(self): portfolio_sample = pd.read_pickle('data/portfolio.pkl') assert_series_equal(self.portfolio.returns_ts, portfolio_sample)
def test_slice(): s1 = pd.Series([1, 3, 5, 7, 11]) s2 = s1.iloc[:3] assert_series_equal(s2, pd.Series([1, 3, 5]))
def test_table_copy(df): orca.add_table('test_frame_copied', df, copy_col=True) orca.add_table('test_frame_uncopied', df, copy_col=False) orca.add_table('test_func_copied', lambda: df, copy_col=True) orca.add_table('test_func_uncopied', lambda: df, copy_col=False) @orca.table(copy_col=True) def test_funcd_copied(): return df @orca.table(copy_col=False) def test_funcd_uncopied(): return df @orca.table(copy_col=True) def test_funcd_copied2(test_frame_copied): # local returns original, but it is copied by copy_col. return test_frame_copied.local @orca.table(copy_col=True) def test_funcd_copied3(test_frame_uncopied): # local returns original, but it is copied by copy_col. return test_frame_uncopied.local @orca.table(copy_col=False) def test_funcd_uncopied2(test_frame_copied): # local returns original. return test_frame_copied.local @orca.table(copy_col=False) def test_funcd_uncopied3(test_frame_uncopied): # local returns original. return test_frame_uncopied.local orca.add_table('test_cache_copied', lambda: df, cache=True, copy_col=True) orca.add_table('test_cache_uncopied', lambda: df, cache=True, copy_col=False) @orca.table(cache=True, copy_col=True) def test_cached_copied(): return df @orca.table(cache=True, copy_col=False) def test_cached_uncopied(): return df # Create tables with computed columns. orca.add_table('test_copied_columns', pd.DataFrame(index=df.index), copy_col=True) orca.add_table('test_uncopied_columns', pd.DataFrame(index=df.index), copy_col=False) for column_name in ['a', 'b']: label = "test_frame_uncopied.{}".format(column_name) def func(col=label): return col for table_name in ['test_copied_columns', 'test_uncopied_columns']: orca.add_column(table_name, column_name, func) for name in [ 'test_frame_uncopied', 'test_func_uncopied', 'test_funcd_uncopied', 'test_funcd_uncopied2', 'test_funcd_uncopied3', 'test_cache_uncopied', 'test_cached_uncopied', 'test_uncopied_columns', 'test_frame_copied', 'test_func_copied', 'test_funcd_copied', 'test_funcd_copied2', 'test_funcd_copied3', 'test_cache_copied', 'test_cached_copied', 'test_copied_columns' ]: table = orca.get_table(name) table2 = orca.get_table(name) # to_frame will always return a copy. if 'columns' in name: assert_frames_equal(table.to_frame(), df) else: pdt.assert_frame_equal(table.to_frame(), df) assert table.to_frame() is not df pdt.assert_frame_equal(table.to_frame(), table.to_frame()) assert table.to_frame() is not table.to_frame() pdt.assert_series_equal(table.to_frame()['a'], df['a']) assert table.to_frame()['a'] is not df['a'] pdt.assert_series_equal(table.to_frame()['a'], table.to_frame()['a']) assert table.to_frame()['a'] is not table.to_frame()['a'] if 'uncopied' in name: pdt.assert_series_equal(table['a'], df['a']) assert table['a'] is df['a'] pdt.assert_series_equal(table['a'], table2['a']) assert table['a'] is table2['a'] else: pdt.assert_series_equal(table['a'], df['a']) assert table['a'] is not df['a'] pdt.assert_series_equal(table['a'], table2['a']) assert table['a'] is not table2['a']