def test_summary(self): infl1 = self.infl1 infl0 = self.infl0 df0 = infl0.summary_frame() df1 = infl1.summary_frame() assert_allclose(df0.values, df1.values, rtol=5e-5) pdt.assert_index_equal(df0.index, df1.index)
def test_summary(self): infl1 = self.infl1 infl0 = self.infl0 df0 = infl0.summary_frame() df1 = infl1.summary_frame() # just some basic check on overlap except for dfbetas cols = ['cooks_d', 'standard_resid', 'hat_diag', 'dffits_internal'] assert_allclose(df0[cols].values, df1[cols].values, rtol=1e-5) pdt.assert_index_equal(df0.index, df1.index)
def test_me_breakpoints(self): results = web.DataReader("ME_Breakpoints", "famafrench", start="2010-01-01", end="2010-12-01") assert isinstance(results, dict) assert len(results) == 2 assert results[0].shape == (12, 21) exp_columns = pd.Index( [ "Count", (0, 5), (5, 10), (10, 15), (15, 20), (20, 25), (25, 30), (30, 35), (35, 40), (40, 45), (45, 50), (50, 55), (55, 60), (60, 65), (65, 70), (70, 75), (75, 80), (80, 85), (85, 90), (90, 95), (95, 100), ], dtype="object", ) tm.assert_index_equal(results[0].columns, exp_columns) exp_index = pd.period_range("2010-01-01", "2010-12-01", freq="M", name="Date") tm.assert_index_equal(results[0].index, exp_index)
def test_laps(self): activity = heartandsole.Activity(pd.DataFrame([]), laps=pd.DataFrame.from_dict({ 'mine_a': ['a', 'a'], 'mine_1': [1, 1], 'other_b': ['b', 'b'], 'c_mine': ['c', 'c'] })) result = activity.mine.laps expected = pd.DataFrame.from_dict({ 'a': ['a', 'a'], '1': [1, 1], 'c': ['c', 'c'], }) tm.assert_frame_equal(result, expected) result = activity.mine.lap_cols expected = pd.Index(['mine_a', 'mine_1', 'c_mine']) tm.assert_index_equal(result, expected)
def test_swap_time_to_year(test_df, inplace): """Swap time column for year (int) dropping subannual time resolution (default)""" if test_df.time_col == "year": pytest.skip( "IamDataFrame with time domain `year` not relevant for this test.") exp = test_df.data exp["year"] = exp["time"].apply(lambda x: x.year) exp = exp.drop("time", axis="columns") exp = IamDataFrame(exp, meta=test_df.meta) obs = test_df.swap_time_for_year(inplace=inplace) if inplace: assert obs is None obs = test_df assert_iamframe_equal(obs, exp) pdt.assert_index_equal(obs.time, pd.Index([2005, 2010], name="time"))
def test_premerge(self): coins = mm.COINS(self.gdf) result = coins._premerge() assert result.shape == (89, 8) expected_columns = pd.Index( [ "orientation", "links_p1", "links_p2", "best_p1", "best_p2", "p1_final", "p2_final", "geometry", ] ) assert_index_equal(result.columns, expected_columns) assert not result.isna().any().any()
def test_jsonfetcher_get_evaluation_mapping(self): """ get_evaluation() should return an Evaluation corresponding to the small dataset and mapping. Tests whether exclusion and renaming works when remapping. """ fetcher = JSONFetcher('tests/sample_data/dataframe_small.json', mapping={"project": "proj"}) result = fetcher.get_evaluation().get_df() # test renaming expected_projects = pd.Series( ["ibex", "oneblink", "litex-linux", "oneblink", "litex-linux"], name="proj") assert_series_equal(result["proj"], expected_projects) # test exclusion expected_columns = pd.Index(["proj"]) # should not have other columns assert_index_equal(result.columns, expected_columns)
def test_columns(self): """ Test if history.to_dataframe has the expected columns. """ df_history = self._get_df_history() expected = pd.Index( [ "trade_id", "asset", "lot", "open_bar", "close_bar", "shut_bar", "take", "stop", "pnl", ] ) assert df_history.index.name == "order_id" assert_index_equal(df_history.columns, expected)
def morton_distance_dask(geoseries): bounds = geoseries.bounds.to_numpy() total_bounds = geoseries.total_bounds x_coords, y_coords = _continuous_to_discrete_coords( bounds, level=16, total_bounds=total_bounds) ddf = from_geopandas(geoseries, npartitions=1) result = ddf.morton_distance().compute() expected = [] for i in range(len(x_coords)): x = int(x_coords[i]) y = int(y_coords[i]) expected.append(interleave(x, y)) assert list(result) == expected assert isinstance(result, pd.Series) assert_index_equal(ddf.index.compute(), result.index)
def test_baseline_with_backcast_projection_off(self) -> None: """Assert that microsim simulation results only have positive time steps""" population_simulation = PopulationSimulationFactory.build_population_simulation( self.test_outflows_data, self.test_transitions_data, self.test_total_population_data, self.simulation_architecture, ["crime"], self.user_inputs, [], 0, self.test_transitions_data, True, False, None, ) projection = population_simulation.simulate_policies() assert_index_equal(projection.index.unique().sort_values(), pd.Int64Index(range(10)))
def assert_eq(left, right, **kwargs): """ Assert that two cudf-like things are equivalent This equality test works for pandas/cudf dataframes/series/indexes/scalars in the same way, and so makes it easier to perform parametrized testing without switching between assert_frame_equal/assert_series_equal/... functions. """ __tracebackhide__ = True if hasattr(left, "to_pandas"): left = left.to_pandas() if hasattr(right, "to_pandas"): right = right.to_pandas() if isinstance(left, cupy.ndarray): left = cupy.asnumpy(left) if isinstance(right, cupy.ndarray): right = cupy.asnumpy(right) if isinstance(left, pd.DataFrame): tm.assert_frame_equal(left, right, **kwargs) elif isinstance(left, pd.Series): tm.assert_series_equal(left, right, **kwargs) elif isinstance(left, pd.Index): tm.assert_index_equal(left, right, **kwargs) elif isinstance(left, np.ndarray) and isinstance(right, np.ndarray): if np.issubdtype(left.dtype, np.floating) and np.issubdtype( right.dtype, np.floating ): assert np.allclose(left, right, equal_nan=True) else: assert np.array_equal(left, right) else: if left == right: return True else: if np.isnan(left): assert np.isnan(right) else: assert np.allclose(left, right, equal_nan=True) return True
def _assert_frame_equal(left_df, right_df, ignore_index=False, ignore_columns=False, ignore_directionality=False, decimal=7): # assert_frame_equal doesn't like None... if left_df is None or right_df is None: assert left_df is None and right_df is None else: left_values = left_df.values right_values = right_df.values if ignore_directionality: left_values, right_values = _normalize_signs( left_values, right_values) npt.assert_almost_equal(left_values, right_values, decimal=decimal) if not ignore_index: pdt.assert_index_equal(left_df.index, right_df.index) if not ignore_columns: pdt.assert_index_equal(left_df.columns, right_df.columns)
def test_cv_genes(self): npr1, ngs1 = ManagePriors.cross_validate_gold_standard( self.priors_data, self.gold_standard, 0, 0.5, 42) self.assertEqual(npr1.shape, ngs1.shape) self.assertEqual(len(npr1.index.intersection(ngs1.index)), 0) pdt.assert_index_equal(npr1.columns, self.priors_data.columns) pdt.assert_index_equal(ngs1.columns, self.gold_standard.columns) npr2, ngs2 = ManagePriors.cross_validate_gold_standard( self.priors_data, self.gold_standard, 0, 0.5, 43) npr3, ngs3 = ManagePriors.cross_validate_gold_standard( self.priors_data, self.gold_standard, 0, 0.5, 42) pdt.assert_frame_equal(npr1, npr3) pdt.assert_frame_equal(ngs1, ngs3) with self.assertRaises(AssertionError): pdt.assert_frame_equal(npr1, npr2) with self.assertRaises(AssertionError): pdt.assert_frame_equal(ngs1, ngs2)
def test_create_ids(self): tested = self.ids_cls.from_dict({'a': [0]}) pdt.assert_index_equal(tested.index, self._circuit_ids(['a'], [0])) tested = self.ids_cls.from_dict({'a': [0, 1]}) pdt.assert_index_equal(tested.index, self._circuit_ids(['a', 'a'], [0, 1])) tested = self.ids_cls.from_dict({'a': [0], 'b': [0]}) pdt.assert_index_equal(tested.index, self._circuit_ids(['a', 'b'], [0, 0])) tested = self.ids_cls.from_dict({'a': [0], 'b': [1]}) pdt.assert_index_equal(tested.index, self._circuit_ids(['a', 'b'], [0, 1])) # keep duplicates tested = self.ids_cls.from_dict({'a': [0, 0]}) pdt.assert_index_equal(tested.index, self._circuit_ids(['a', 'a'], [0, 0])) assert tested.index.size == 2
def test_append_index(): """Assert that appending and re-ordering to an index works as expected""" index = pd.MultiIndex( codes=[[0, 1]], levels=[["scen_a", "scen_b"]], names=["scenario"], ) obs = append_index_level(index, 0, "World", "region", order=["region", "scenario"]) exp = pd.MultiIndex( codes=[[0, 0], [0, 1]], levels=[["World"], ["scen_a", "scen_b"]], names=["region", "scenario"], ) pdt.assert_index_equal(obs, exp)
def test_create_ids(self): tested = self.ids_cls.from_dict({"a": [0]}) pdt.assert_index_equal(tested.index, self._circuit_ids(["a"], [0])) tested = self.ids_cls.from_dict({"a": [0, 1]}) pdt.assert_index_equal(tested.index, self._circuit_ids(["a", "a"], [0, 1])) tested = self.ids_cls.from_dict({"a": [0], "b": [0]}) pdt.assert_index_equal(tested.index, self._circuit_ids(["a", "b"], [0, 0])) tested = self.ids_cls.from_dict({"a": [0], "b": [1]}) pdt.assert_index_equal(tested.index, self._circuit_ids(["a", "b"], [0, 1])) # keep duplicates tested = self.ids_cls.from_dict({"a": [0, 0]}) pdt.assert_index_equal(tested.index, self._circuit_ids(["a", "a"], [0, 0])) assert tested.index.size == 2
def assert_option_result(self, df): """ Validate returned option data has expected format. """ assert isinstance(df, pd.DataFrame) assert len(df) > 1 exp_columns = pd.Index([ "Last", "Bid", "Ask", "Chg", "PctChg", "Vol", "Open_Int", "IV", "Root", "IsNonstandard", "Underlying", "Underlying_Price", "Quote_Time", "Last_Trade_Date", "JSON", ]) tm.assert_index_equal(df.columns, exp_columns) assert df.index.names == [u"Strike", u"Expiry", u"Type", u"Symbol"] dtypes = [ np.dtype(x) for x in ["float64"] * 7 + [ "float64", "object", "bool", "object", "float64", "datetime64[ns]", "datetime64[ns]", "object", ] ] tm.assert_series_equal(df.dtypes, pd.Series(dtypes, index=exp_columns))
def test_must_not_reset_index(self): columns = ['column1', 'column2', 'column3', 'column4'] df = pd.DataFrame([[i, i, i, i] for i in range(10)], columns=columns) df = df.iloc[2:8, :] target_columns = ['column1', 'column2', 'column3'] single_process_encoder = encoders.HashingEncoder(max_process=1, cols=target_columns) single_process_encoder.fit(df, None) df_encoded_single_process = single_process_encoder.transform(df) assert_index_equal(df.index, df_encoded_single_process.index) assert df.shape[0] == pd.concat([df, df_encoded_single_process], axis=1).shape[0] multi_process_encoder = encoders.HashingEncoder(cols=target_columns) multi_process_encoder.fit(df, None) df_encoded_multi_process = multi_process_encoder.transform(df) assert_index_equal(df.index, df_encoded_multi_process.index) assert df.shape[0] == pd.concat([df, df_encoded_multi_process], axis=1).shape[0] assert_frame_equal(df_encoded_single_process, df_encoded_multi_process)
def main(file_path1: str, file_path2: str): """ Load two files, compare their contents. Each file should be a series of pickled objects, mostly real-Pandas objects but potentially other types as well. """ for obj1, obj2 in zip_longest(load_contents(file_path1), load_contents(file_path2), fillvalue=NotInFile()): print(obj1, obj2) if isinstance(obj1, pd.DataFrame): assert_frame_equal(obj1, obj2) elif isinstance(obj1, pd.Series): assert_series_equal(obj1, obj2) elif isinstance(obj1, pd.Index): assert_index_equal(obj1, obj2) elif isinstance(obj1, np.ndarray): assert np.array_equal(obj1, obj2), f"{obj1} != {obj2}" else: assert obj1 == obj2, f"{obj1} != {obj2}" print("Everything looks the same, hurrah.")
def test_dict_specified_geometry(self): data = { "A": range(3), "B": np.arange(3.0), "other_geom": [Point(x, x) for x in range(3)], } df = GeoDataFrame(data, geometry="other_geom") check_geodataframe(df, "other_geom") with pytest.raises(ValueError): df = GeoDataFrame(data, geometry="geometry") # when no geometry specified -> works but raises error once # trying to access geometry df = GeoDataFrame(data) with pytest.raises(AttributeError): _ = df.geometry df = df.set_geometry("other_geom") check_geodataframe(df, "other_geom") # combined with custom args df = GeoDataFrame(data, geometry="other_geom", columns=["B", "other_geom"]) check_geodataframe(df, "other_geom") assert_index_equal(df.columns, pd.Index(["B", "other_geom"])) assert_series_equal(df["B"], pd.Series(np.arange(3.0), name="B")) df = GeoDataFrame(data, geometry="other_geom", columns=["other_geom", "A"]) check_geodataframe(df, "other_geom") assert_index_equal(df.columns, pd.Index(["other_geom", "A"])) assert_series_equal(df["A"], pd.Series(range(3), name="A"))
def assertPandasEqual(self, left, right, check_exact=True): if isinstance(left, pd.DataFrame) and isinstance(right, pd.DataFrame): try: assert_frame_equal( left, right, check_index_type=("equiv" if len(left.index) > 0 else False), check_column_type=("equiv" if len(left.columns) > 0 else False), check_exact=check_exact, ) except AssertionError as e: msg = (str(e) + "\n\nLeft:\n%s\n%s" % (left, left.dtypes) + "\n\nRight:\n%s\n%s" % (right, right.dtypes)) raise AssertionError(msg) from e elif isinstance(left, pd.Series) and isinstance(right, pd.Series): try: assert_series_equal( left, right, check_index_type=("equiv" if len(left.index) > 0 else False), check_exact=check_exact, ) except AssertionError as e: msg = (str(e) + "\n\nLeft:\n%s\n%s" % (left, left.dtype) + "\n\nRight:\n%s\n%s" % (right, right.dtype)) raise AssertionError(msg) from e elif isinstance(left, pd.Index) and isinstance(right, pd.Index): try: assert_index_equal(left, right, check_exact=check_exact) except AssertionError as e: msg = (str(e) + "\n\nLeft:\n%s\n%s" % (left, left.dtype) + "\n\nRight:\n%s\n%s" % (right, right.dtype)) raise AssertionError(msg) from e else: raise ValueError("Unexpected values: (%s, %s)" % (left, right))
def test_empty_df(): df = pd.DataFrame(index=["a", "b"]) df_is_empty(df) assert_index_equal(df.index, pd.Index(["a", "b"])) assert len(df.columns) == 0 df = pd.DataFrame(columns=["a", "b"]) df_is_empty(df) assert len(df.index) == 0 assert_index_equal(df.columns, pd.Index(["a", "b"])) df = pd.DataFrame() df_is_empty(df) assert len(df.index) == 0 assert len(df.columns) == 0 df = pd.DataFrame(index=["a", "b"]) df_is_empty(df) assert_index_equal(df.index, pd.Index(["a", "b"])) assert len(df.columns) == 0 df = pd.DataFrame(columns=["a", "b"]) df_is_empty(df) assert len(df.index) == 0 assert_index_equal(df.columns, pd.Index(["a", "b"])) df = pd.DataFrame() df_is_empty(df) assert len(df.index) == 0 assert len(df.columns) == 0 df = pd.DataFrame() pd_df = pandas.DataFrame() df["a"] = [1, 2, 3, 4, 5] pd_df["a"] = [1, 2, 3, 4, 5] df_equals(df, pd_df) df = pd.DataFrame() pd_df = pandas.DataFrame() df["a"] = list("ABCDEF") pd_df["a"] = list("ABCDEF") df_equals(df, pd_df) df = pd.DataFrame() pd_df = pandas.DataFrame() df["a"] = pd.Series([1, 2, 3, 4, 5]) pd_df["a"] = pandas.Series([1, 2, 3, 4, 5]) df_equals(df, pd_df)
def test_getitem(): id_ = np.identity(10) sf = SparseFrame(id_, columns=list('abcdefghij')) assert sf['a'].data.todense()[0] == 1 assert sf['j'].data.todense()[9] == 1 assert np.all(sf[['a', 'b']].data.todense() == np.asmatrix(id_[:, [0, 1]])) tmp = sf[['j', 'a']].data.todense() assert tmp[9, 0] == 1 assert tmp[0, 1] == 1 assert (sf[list('abcdefghij')].data.todense() == np.identity(10)).all() assert sf[[]].shape == (10, 0) assert len(sf[[]].columns) == 0 assert isinstance(sf.columns, type(sf[[]].columns)) with pytest.raises(ValueError): sf[None] idx = pd.Index(list('abc')) pdt.assert_index_equal(idx, sf[idx].columns) pdt.assert_index_equal(idx, sf[idx.to_series()].columns) pdt.assert_index_equal(idx, sf[idx.tolist()].columns) pdt.assert_index_equal(idx, sf[tuple(idx)].columns) pdt.assert_index_equal(idx, sf[idx.values].columns)
def test_computation_input_custom_index(index): graph = { 'OneHot': [OneHotEncoder], 'Random Forest': [RandomForestClassifier, 'OneHot.x'], 'Elastic Net': [ElasticNetClassifier, 'OneHot.x'], 'Logistic Regression': [LogisticRegressionClassifier, 'Random Forest', 'Elastic Net'] } X = pd.DataFrame( { "categories": [f"cat_{i}" for i in range(5)], "numbers": np.arange(5) }, index=index) y = pd.Series([1, 2, 1, 2, 1]) component_graph = ComponentGraph(graph) component_graph.instantiate({}) component_graph.fit(X, y) X_t = component_graph.predict(X).to_series() assert_index_equal(X_t.index, pd.RangeIndex(start=0, stop=5, step=1)) assert not X_t.isna().any(axis=None)
def test_get_x_orientation(self, stack, params_getx): x, y, expected = params_getx chains = complex_chain(stack, x, y, self._VIEWS, self._VIEW_KEYS, 'x', incl_tests=False, incl_sum=False) for chain, args in zip(chains, expected): values, index, columns, pindex, pcolumns, chain_str = args expected_dataframe = frame(values, multi_index(index), multi_index(columns)) painted_index = multi_index(pindex) painted_columns = multi_index(pcolumns) ### Test Chain.dataframe is Chain._frame assert chain.dataframe is chain._frame ### Test Chain attributes assert chain.orientation is 'x' ### Test Chain.get assert_frame_equal(chain.dataframe, expected_dataframe) ### Test Chain.paint chain.paint() assert_index_equal(chain.dataframe.index, painted_index) assert_index_equal(chain.dataframe.columns, painted_columns) ### Test Chain.toggle_labels chain.toggle_labels() assert_frame_equal(chain.dataframe, expected_dataframe) chain.toggle_labels() assert_index_equal(chain.dataframe.index, painted_index) assert_index_equal(chain.dataframe.columns, painted_columns) ### Test Chain str/ len assert str(chain) == chain_str ### Test Contents assert chain.contents == parameters.CONTENTS
def test_dict_of_series(self): data = { "A": pd.Series(range(3)), "B": pd.Series(np.arange(3.0)), "geometry": GeoSeries([Point(x, x) for x in range(3)]), } df = GeoDataFrame(data) check_geodataframe(df) df = GeoDataFrame(data, index=pd.Index([1, 2])) check_geodataframe(df) assert_index_equal(df.index, pd.Index([1, 2])) assert df["A"].tolist() == [1, 2] # one non-series -> length is not correct data = { "A": pd.Series(range(3)), "B": np.arange(3.0), "geometry": GeoSeries([Point(x, x) for x in range(3)]), } with pytest.raises(ValueError): GeoDataFrame(data, index=[1, 2])
def test_market_breaks(self): calendar = get_calendar("XTKS") sessions = calendar.sessions_in_range( pd.Timestamp("2021-06-14", tz="utc"), pd.Timestamp("2021-06-15", tz="utc")) trading_o_and_c = calendar.schedule.loc[sessions] opens = trading_o_and_c['market_open'] closes = trading_o_and_c['market_close'] break_starts = trading_o_and_c['break_start'] break_ends = trading_o_and_c['break_end'] clock = MinuteSimulationClock( sessions, opens, closes, days_at_time(sessions, time(8, 45), "Japan"), break_starts, break_ends, False) all_events = list(clock) all_events = pd.DataFrame(all_events, columns=["date", "event"]).set_index("date") bar_events = all_events[all_events.event == BAR] # XTKS is open 9am - 3pm with a 1 hour lunch break from 11:30am - 12:30pm # 2 days x 300 minutes per day self.assertEqual(len(bar_events), 600) assert_index_equal( bar_events.tz_convert("Japan").iloc[148:152].index, pd.DatetimeIndex([ '2021-06-14 11:29:00', '2021-06-14 11:30:00', '2021-06-14 12:31:00', '2021-06-14 12:32:00' ], tz="Japan", name="date"))
def test_stroke_gdf(self): coins = mm.COINS(self.gdf) result = coins.stroke_gdf() assert result.shape == (10, 2) expected_index = pd.Index(list(range(10)), name="stroke_group") assert_index_equal(result.index, expected_index) expected_segments = pd.Series( [8, 19, 17, 13, 5, 14, 2, 3, 3, 5], name="n_segments", index=expected_index, ) assert_series_equal(result["n_segments"], expected_segments) assert result.length.sum() == pytest.approx(self.gdf.length.sum()) expected = pd.Series( [ 839.5666838320316, 759.0900425060918, 744.7579337248078, 1019.7095084794428, 562.2466914415573, 1077.3606756995746, 193.04063727323836, 187.49184699173748, 182.6849740039611, 382.50195042922803, ], index=expected_index, ) assert_series_equal( result.length, expected, check_less_precise=6, check_exact=False )
def test_index(tmp_path): # set up dataset df = geopandas.read_file( geopandas.datasets.get_path("naturalearth_lowres")) # get meaningful index by shuffling (hilbert distance) df = dask_geopandas.from_geopandas( df, npartitions=2).spatial_shuffle().compute() ddf = dask_geopandas.from_geopandas(df, npartitions=4) # roundtrip preserves the index by default basedir = tmp_path / "dataset" ddf.to_feather(basedir) result = dask_geopandas.read_feather(basedir) assert "hilbert_distance" not in result.columns assert result.index.name == "hilbert_distance" assert_index_equal(result.index.compute(), df.index) # TODO not setting the index with pytest.raises(NotImplementedError): result = dask_geopandas.read_feather(basedir, index=False) # assert "hilbert_distance" in result.columns # assert result.index.name is None # setting specific columns as the index result = dask_geopandas.read_feather(basedir, index="iso_a3") assert "iso_a3" not in result.columns assert result.index.name == "iso_a3" assert_geodataframe_equal(result.compute(), df.set_index("iso_a3")) # not writing the index basedir = tmp_path / "dataset" ddf.to_feather(basedir, write_index=False) result = dask_geopandas.read_feather(basedir) assert "hilbert_distance" not in result.columns assert result.index.name is None assert result.index.compute()[0] == 0
def test_groupby_dataframe_mad(self): pd_flights = self.pd_flights().filter(self.filter_data + ["DestCountry"]) ed_flights = self.ed_flights().filter(self.filter_data + ["DestCountry"]) pd_mad = pd_flights.groupby("DestCountry").mad() ed_mad = ed_flights.groupby("DestCountry").mad() assert_index_equal(pd_mad.columns, ed_mad.columns) assert_index_equal(pd_mad.index, ed_mad.index) assert_series_equal(pd_mad.dtypes, ed_mad.dtypes) pd_min_mad = pd_flights.groupby("DestCountry").aggregate(["min", "mad"]) ed_min_mad = ed_flights.groupby("DestCountry").aggregate(["min", "mad"]) assert_index_equal(pd_min_mad.columns, ed_min_mad.columns) assert_index_equal(pd_min_mad.index, ed_min_mad.index) assert_series_equal(pd_min_mad.dtypes, ed_min_mad.dtypes)
def test_all_full_day_holidays_since_1928(request): """ Perform a full comparison of all known full day NYSE holidays since 1928/01/01 and make sure that it matches. """ # get the expected dates from the csv file expected = pd.read_csv(os.path.join(request.fspath.dirname, 'data', 'nyse_all_full_day_holidays_since_1928.csv'), index_col=0, parse_dates=True, header=None).index expected.name = None # calculated expected going direct to the underlying regular and ad_hoc calendars nyse = xnys_cal actual = pd.DatetimeIndex(nyse.adhoc_holidays).tz_convert(None).sort_values() slice_locs = actual.slice_locs(expected[0], expected[-1]) actual = actual[slice_locs[0]:slice_locs[1]] actual = actual.append(nyse.regular_holidays.holidays(expected[0], expected[-1])) actual = actual.sort_values().unique() assert_index_equal(expected, actual) # using the holidays method actual = pd.DatetimeIndex(nyse.holidays().holidays).unique() slice_locs = actual.slice_locs(expected[0], expected[-1]) actual = actual[slice_locs[0]:slice_locs[1]] assert_index_equal(expected, actual)
def test_date_range_lower_freq(): cal = mcal.get_calendar("NYSE") schedule = cal.schedule(pd.Timestamp('2017-09-05 20:00', tz='UTC'), pd.Timestamp('2017-10-23 20:00', tz='UTC')) # cannot get date range of frequency lower than 1D with pytest.raises(ValueError): mcal.date_range(schedule, frequency='3D') # instead get for 1D and convert to lower frequency short = mcal.date_range(schedule, frequency='1D') actual = mcal.convert_freq(short, '3D') expected = pd.date_range('2017-09-05 20:00', '2017-10-23 20:00', freq='3D', tz='UTC') assert_index_equal(actual, expected) actual = mcal.convert_freq(short, '1W') expected = pd.date_range('2017-09-05 20:00', '2017-10-23 20:00', freq='1W', tz='UTC') assert_index_equal(actual, expected)
def _مکمل(سے, تک, اعداد): pdt.assert_index_equal(اعداد.روزانہ().dropna(how='all').index, pd.period_range(سے, تک, freq='D'))
def assert_geodataframe_equal(left, right, check_dtype=True, check_index_type='equiv', check_column_type='equiv', check_frame_type=True, check_like=False, check_less_precise=False, check_geom_type=False, check_crs=True): """ Check that two GeoDataFrames are equal/ Parameters ---------- left, right : two GeoDataFrames check_dtype : bool, default True Whether to check the DataFrame dtype is identical. check_index_type, check_column_type : bool, default 'equiv' Check that index types are equal. check_frame_type : bool, default True Check that both are same type (*and* are GeoDataFrames). If False, will attempt to convert both into GeoDataFrame. check_like : bool, default False If true, ignore the order of rows & columns check_less_precise : bool, default False If True, use geom_almost_equals. if False, use geom_equals. check_geom_type : bool, default False If True, check that all the geom types are equal. check_crs: bool, default True If `check_frame_type` is True, then also check that the crs matches. """ try: # added from pandas 0.20 from pandas.testing import assert_frame_equal, assert_index_equal except ImportError: from pandas.util.testing import assert_frame_equal, assert_index_equal # instance validation if check_frame_type: assert isinstance(left, GeoDataFrame) assert isinstance(left, type(right)) if check_crs: # no crs can be either None or {} if not left.crs and not right.crs: pass else: assert left.crs == right.crs else: if not isinstance(left, GeoDataFrame): left = GeoDataFrame(left) if not isinstance(right, GeoDataFrame): right = GeoDataFrame(right) # shape comparison assert left.shape == right.shape, ( 'GeoDataFrame shape mismatch, left: {lshape!r}, right: {rshape!r}.\n' 'Left columns: {lcols!r}, right columns: {rcols!r}'.format( lshape=left.shape, rshape=right.shape, lcols=left.columns, rcols=right.columns)) if check_like: left, right = left.reindex_like(right), right # column comparison assert_index_equal(left.columns, right.columns, exact=check_column_type, obj='GeoDataFrame.columns') # geometry comparison assert_geoseries_equal( left.geometry, right.geometry, check_dtype=check_dtype, check_less_precise=check_less_precise, check_geom_type=check_geom_type, check_crs=False) # drop geometries and check remaining columns left2 = left.drop([left._geometry_column_name], axis=1) right2 = right.drop([right._geometry_column_name], axis=1) assert_frame_equal(left2, right2, check_dtype=check_dtype, check_index_type=check_index_type, check_column_type=check_column_type, obj='GeoDataFrame')