Пример #1
0
    def test_summary(self):
        infl1 = self.infl1
        infl0 = self.infl0

        df0 = infl0.summary_frame()
        df1 = infl1.summary_frame()
        assert_allclose(df0.values, df1.values, rtol=5e-5)
        pdt.assert_index_equal(df0.index, df1.index)
Пример #2
0
    def test_summary(self):
        infl1 = self.infl1
        infl0 = self.infl0

        df0 = infl0.summary_frame()
        df1 = infl1.summary_frame()
        # just some basic check on overlap except for dfbetas
        cols = ['cooks_d', 'standard_resid', 'hat_diag', 'dffits_internal']
        assert_allclose(df0[cols].values, df1[cols].values, rtol=1e-5)
        pdt.assert_index_equal(df0.index, df1.index)
Пример #3
0
    def test_me_breakpoints(self):
        results = web.DataReader("ME_Breakpoints",
                                 "famafrench",
                                 start="2010-01-01",
                                 end="2010-12-01")
        assert isinstance(results, dict)
        assert len(results) == 2
        assert results[0].shape == (12, 21)

        exp_columns = pd.Index(
            [
                "Count",
                (0, 5),
                (5, 10),
                (10, 15),
                (15, 20),
                (20, 25),
                (25, 30),
                (30, 35),
                (35, 40),
                (40, 45),
                (45, 50),
                (50, 55),
                (55, 60),
                (60, 65),
                (65, 70),
                (70, 75),
                (75, 80),
                (80, 85),
                (85, 90),
                (90, 95),
                (95, 100),
            ],
            dtype="object",
        )
        tm.assert_index_equal(results[0].columns, exp_columns)

        exp_index = pd.period_range("2010-01-01",
                                    "2010-12-01",
                                    freq="M",
                                    name="Date")
        tm.assert_index_equal(results[0].index, exp_index)
Пример #4
0
    def test_laps(self):
        activity = heartandsole.Activity(pd.DataFrame([]),
                                         laps=pd.DataFrame.from_dict({
                                             'mine_a': ['a', 'a'],
                                             'mine_1': [1, 1],
                                             'other_b': ['b', 'b'],
                                             'c_mine': ['c', 'c']
                                         }))

        result = activity.mine.laps
        expected = pd.DataFrame.from_dict({
            'a': ['a', 'a'],
            '1': [1, 1],
            'c': ['c', 'c'],
        })
        tm.assert_frame_equal(result, expected)

        result = activity.mine.lap_cols
        expected = pd.Index(['mine_a', 'mine_1', 'c_mine'])
        tm.assert_index_equal(result, expected)
Пример #5
0
def test_swap_time_to_year(test_df, inplace):
    """Swap time column for year (int) dropping subannual time resolution (default)"""

    if test_df.time_col == "year":
        pytest.skip(
            "IamDataFrame with time domain `year` not relevant for this test.")

    exp = test_df.data
    exp["year"] = exp["time"].apply(lambda x: x.year)
    exp = exp.drop("time", axis="columns")
    exp = IamDataFrame(exp, meta=test_df.meta)

    obs = test_df.swap_time_for_year(inplace=inplace)

    if inplace:
        assert obs is None
        obs = test_df

    assert_iamframe_equal(obs, exp)
    pdt.assert_index_equal(obs.time, pd.Index([2005, 2010], name="time"))
Пример #6
0
    def test_premerge(self):
        coins = mm.COINS(self.gdf)
        result = coins._premerge()

        assert result.shape == (89, 8)
        expected_columns = pd.Index(
            [
                "orientation",
                "links_p1",
                "links_p2",
                "best_p1",
                "best_p2",
                "p1_final",
                "p2_final",
                "geometry",
            ]
        )
        assert_index_equal(result.columns, expected_columns)

        assert not result.isna().any().any()
Пример #7
0
    def test_jsonfetcher_get_evaluation_mapping(self):
        """
        get_evaluation() should return an Evaluation corresponding to the small
        dataset and mapping.

        Tests whether exclusion and renaming works when remapping.
        """
        fetcher = JSONFetcher('tests/sample_data/dataframe_small.json',
                              mapping={"project": "proj"})
        result = fetcher.get_evaluation().get_df()

        # test renaming
        expected_projects = pd.Series(
            ["ibex", "oneblink", "litex-linux", "oneblink", "litex-linux"],
            name="proj")
        assert_series_equal(result["proj"], expected_projects)

        # test exclusion
        expected_columns = pd.Index(["proj"])  # should not have other columns
        assert_index_equal(result.columns, expected_columns)
Пример #8
0
 def test_columns(self):
     """
     Test if history.to_dataframe has the expected columns.
     """
     df_history = self._get_df_history()
     expected = pd.Index(
         [
             "trade_id",
             "asset",
             "lot",
             "open_bar",
             "close_bar",
             "shut_bar",
             "take",
             "stop",
             "pnl",
         ]
     )
     assert df_history.index.name == "order_id"
     assert_index_equal(df_history.columns, expected)
Пример #9
0
def morton_distance_dask(geoseries):

    bounds = geoseries.bounds.to_numpy()
    total_bounds = geoseries.total_bounds
    x_coords, y_coords = _continuous_to_discrete_coords(
        bounds, level=16, total_bounds=total_bounds)

    ddf = from_geopandas(geoseries, npartitions=1)
    result = ddf.morton_distance().compute()

    expected = []

    for i in range(len(x_coords)):
        x = int(x_coords[i])
        y = int(y_coords[i])
        expected.append(interleave(x, y))

    assert list(result) == expected
    assert isinstance(result, pd.Series)
    assert_index_equal(ddf.index.compute(), result.index)
Пример #10
0
    def test_baseline_with_backcast_projection_off(self) -> None:
        """Assert that microsim simulation results only have positive time steps"""
        population_simulation = PopulationSimulationFactory.build_population_simulation(
            self.test_outflows_data,
            self.test_transitions_data,
            self.test_total_population_data,
            self.simulation_architecture,
            ["crime"],
            self.user_inputs,
            [],
            0,
            self.test_transitions_data,
            True,
            False,
            None,
        )
        projection = population_simulation.simulate_policies()

        assert_index_equal(projection.index.unique().sort_values(),
                           pd.Int64Index(range(10)))
Пример #11
0
def assert_eq(left, right, **kwargs):
    """ Assert that two cudf-like things are equivalent

    This equality test works for pandas/cudf dataframes/series/indexes/scalars
    in the same way, and so makes it easier to perform parametrized testing
    without switching between assert_frame_equal/assert_series_equal/...
    functions.
    """
    __tracebackhide__ = True

    if hasattr(left, "to_pandas"):
        left = left.to_pandas()
    if hasattr(right, "to_pandas"):
        right = right.to_pandas()
    if isinstance(left, cupy.ndarray):
        left = cupy.asnumpy(left)
    if isinstance(right, cupy.ndarray):
        right = cupy.asnumpy(right)

    if isinstance(left, pd.DataFrame):
        tm.assert_frame_equal(left, right, **kwargs)
    elif isinstance(left, pd.Series):
        tm.assert_series_equal(left, right, **kwargs)
    elif isinstance(left, pd.Index):
        tm.assert_index_equal(left, right, **kwargs)
    elif isinstance(left, np.ndarray) and isinstance(right, np.ndarray):
        if np.issubdtype(left.dtype, np.floating) and np.issubdtype(
            right.dtype, np.floating
        ):
            assert np.allclose(left, right, equal_nan=True)
        else:
            assert np.array_equal(left, right)
    else:
        if left == right:
            return True
        else:
            if np.isnan(left):
                assert np.isnan(right)
            else:
                assert np.allclose(left, right, equal_nan=True)
    return True
Пример #12
0
def _assert_frame_equal(left_df,
                        right_df,
                        ignore_index=False,
                        ignore_columns=False,
                        ignore_directionality=False,
                        decimal=7):
    # assert_frame_equal doesn't like None...
    if left_df is None or right_df is None:
        assert left_df is None and right_df is None
    else:
        left_values = left_df.values
        right_values = right_df.values
        if ignore_directionality:
            left_values, right_values = _normalize_signs(
                left_values, right_values)
        npt.assert_almost_equal(left_values, right_values, decimal=decimal)

        if not ignore_index:
            pdt.assert_index_equal(left_df.index, right_df.index)
        if not ignore_columns:
            pdt.assert_index_equal(left_df.columns, right_df.columns)
Пример #13
0
    def test_cv_genes(self):
        npr1, ngs1 = ManagePriors.cross_validate_gold_standard(
            self.priors_data, self.gold_standard, 0, 0.5, 42)
        self.assertEqual(npr1.shape, ngs1.shape)
        self.assertEqual(len(npr1.index.intersection(ngs1.index)), 0)
        pdt.assert_index_equal(npr1.columns, self.priors_data.columns)
        pdt.assert_index_equal(ngs1.columns, self.gold_standard.columns)

        npr2, ngs2 = ManagePriors.cross_validate_gold_standard(
            self.priors_data, self.gold_standard, 0, 0.5, 43)
        npr3, ngs3 = ManagePriors.cross_validate_gold_standard(
            self.priors_data, self.gold_standard, 0, 0.5, 42)

        pdt.assert_frame_equal(npr1, npr3)
        pdt.assert_frame_equal(ngs1, ngs3)

        with self.assertRaises(AssertionError):
            pdt.assert_frame_equal(npr1, npr2)

        with self.assertRaises(AssertionError):
            pdt.assert_frame_equal(ngs1, ngs2)
Пример #14
0
    def test_create_ids(self):
        tested = self.ids_cls.from_dict({'a': [0]})
        pdt.assert_index_equal(tested.index, self._circuit_ids(['a'], [0]))

        tested = self.ids_cls.from_dict({'a': [0, 1]})
        pdt.assert_index_equal(tested.index,
                               self._circuit_ids(['a', 'a'], [0, 1]))

        tested = self.ids_cls.from_dict({'a': [0], 'b': [0]})
        pdt.assert_index_equal(tested.index,
                               self._circuit_ids(['a', 'b'], [0, 0]))

        tested = self.ids_cls.from_dict({'a': [0], 'b': [1]})
        pdt.assert_index_equal(tested.index,
                               self._circuit_ids(['a', 'b'], [0, 1]))

        # keep duplicates
        tested = self.ids_cls.from_dict({'a': [0, 0]})
        pdt.assert_index_equal(tested.index,
                               self._circuit_ids(['a', 'a'], [0, 0]))
        assert tested.index.size == 2
Пример #15
0
def test_append_index():
    """Assert that appending and re-ordering to an index works as expected"""

    index = pd.MultiIndex(
        codes=[[0, 1]],
        levels=[["scen_a", "scen_b"]],
        names=["scenario"],
    )

    obs = append_index_level(index,
                             0,
                             "World",
                             "region",
                             order=["region", "scenario"])

    exp = pd.MultiIndex(
        codes=[[0, 0], [0, 1]],
        levels=[["World"], ["scen_a", "scen_b"]],
        names=["region", "scenario"],
    )
    pdt.assert_index_equal(obs, exp)
Пример #16
0
    def test_create_ids(self):
        tested = self.ids_cls.from_dict({"a": [0]})
        pdt.assert_index_equal(tested.index, self._circuit_ids(["a"], [0]))

        tested = self.ids_cls.from_dict({"a": [0, 1]})
        pdt.assert_index_equal(tested.index,
                               self._circuit_ids(["a", "a"], [0, 1]))

        tested = self.ids_cls.from_dict({"a": [0], "b": [0]})
        pdt.assert_index_equal(tested.index,
                               self._circuit_ids(["a", "b"], [0, 0]))

        tested = self.ids_cls.from_dict({"a": [0], "b": [1]})
        pdt.assert_index_equal(tested.index,
                               self._circuit_ids(["a", "b"], [0, 1]))

        # keep duplicates
        tested = self.ids_cls.from_dict({"a": [0, 0]})
        pdt.assert_index_equal(tested.index,
                               self._circuit_ids(["a", "a"], [0, 0]))
        assert tested.index.size == 2
Пример #17
0
    def assert_option_result(self, df):
        """
        Validate returned option data has expected format.
        """
        assert isinstance(df, pd.DataFrame)
        assert len(df) > 1

        exp_columns = pd.Index([
            "Last",
            "Bid",
            "Ask",
            "Chg",
            "PctChg",
            "Vol",
            "Open_Int",
            "IV",
            "Root",
            "IsNonstandard",
            "Underlying",
            "Underlying_Price",
            "Quote_Time",
            "Last_Trade_Date",
            "JSON",
        ])
        tm.assert_index_equal(df.columns, exp_columns)
        assert df.index.names == [u"Strike", u"Expiry", u"Type", u"Symbol"]

        dtypes = [
            np.dtype(x) for x in ["float64"] * 7 + [
                "float64",
                "object",
                "bool",
                "object",
                "float64",
                "datetime64[ns]",
                "datetime64[ns]",
                "object",
            ]
        ]
        tm.assert_series_equal(df.dtypes, pd.Series(dtypes, index=exp_columns))
Пример #18
0
    def test_must_not_reset_index(self):
        columns = ['column1', 'column2', 'column3', 'column4']
        df = pd.DataFrame([[i, i, i, i] for i in range(10)], columns=columns)
        df = df.iloc[2:8, :]
        target_columns = ['column1', 'column2', 'column3']

        single_process_encoder = encoders.HashingEncoder(max_process=1,
                                                         cols=target_columns)
        single_process_encoder.fit(df, None)
        df_encoded_single_process = single_process_encoder.transform(df)
        assert_index_equal(df.index, df_encoded_single_process.index)
        assert df.shape[0] == pd.concat([df, df_encoded_single_process],
                                        axis=1).shape[0]

        multi_process_encoder = encoders.HashingEncoder(cols=target_columns)
        multi_process_encoder.fit(df, None)
        df_encoded_multi_process = multi_process_encoder.transform(df)
        assert_index_equal(df.index, df_encoded_multi_process.index)
        assert df.shape[0] == pd.concat([df, df_encoded_multi_process],
                                        axis=1).shape[0]

        assert_frame_equal(df_encoded_single_process, df_encoded_multi_process)
Пример #19
0
def main(file_path1: str, file_path2: str):
    """
    Load two files, compare their contents.

    Each file should be a series of pickled objects, mostly real-Pandas objects
    but potentially other types as well.
    """
    for obj1, obj2 in zip_longest(load_contents(file_path1),
                                  load_contents(file_path2),
                                  fillvalue=NotInFile()):
        print(obj1, obj2)
        if isinstance(obj1, pd.DataFrame):
            assert_frame_equal(obj1, obj2)
        elif isinstance(obj1, pd.Series):
            assert_series_equal(obj1, obj2)
        elif isinstance(obj1, pd.Index):
            assert_index_equal(obj1, obj2)
        elif isinstance(obj1, np.ndarray):
            assert np.array_equal(obj1, obj2), f"{obj1} != {obj2}"
        else:
            assert obj1 == obj2, f"{obj1} != {obj2}"
    print("Everything looks the same, hurrah.")
Пример #20
0
    def test_dict_specified_geometry(self):

        data = {
            "A": range(3),
            "B": np.arange(3.0),
            "other_geom": [Point(x, x) for x in range(3)],
        }

        df = GeoDataFrame(data, geometry="other_geom")
        check_geodataframe(df, "other_geom")

        with pytest.raises(ValueError):
            df = GeoDataFrame(data, geometry="geometry")

        # when no geometry specified -> works but raises error once
        # trying to access geometry
        df = GeoDataFrame(data)

        with pytest.raises(AttributeError):
            _ = df.geometry

        df = df.set_geometry("other_geom")
        check_geodataframe(df, "other_geom")

        # combined with custom args
        df = GeoDataFrame(data,
                          geometry="other_geom",
                          columns=["B", "other_geom"])
        check_geodataframe(df, "other_geom")
        assert_index_equal(df.columns, pd.Index(["B", "other_geom"]))
        assert_series_equal(df["B"], pd.Series(np.arange(3.0), name="B"))

        df = GeoDataFrame(data,
                          geometry="other_geom",
                          columns=["other_geom", "A"])
        check_geodataframe(df, "other_geom")
        assert_index_equal(df.columns, pd.Index(["other_geom", "A"]))
        assert_series_equal(df["A"], pd.Series(range(3), name="A"))
Пример #21
0
 def assertPandasEqual(self, left, right, check_exact=True):
     if isinstance(left, pd.DataFrame) and isinstance(right, pd.DataFrame):
         try:
             assert_frame_equal(
                 left,
                 right,
                 check_index_type=("equiv"
                                   if len(left.index) > 0 else False),
                 check_column_type=("equiv"
                                    if len(left.columns) > 0 else False),
                 check_exact=check_exact,
             )
         except AssertionError as e:
             msg = (str(e) + "\n\nLeft:\n%s\n%s" % (left, left.dtypes) +
                    "\n\nRight:\n%s\n%s" % (right, right.dtypes))
             raise AssertionError(msg) from e
     elif isinstance(left, pd.Series) and isinstance(right, pd.Series):
         try:
             assert_series_equal(
                 left,
                 right,
                 check_index_type=("equiv"
                                   if len(left.index) > 0 else False),
                 check_exact=check_exact,
             )
         except AssertionError as e:
             msg = (str(e) + "\n\nLeft:\n%s\n%s" % (left, left.dtype) +
                    "\n\nRight:\n%s\n%s" % (right, right.dtype))
             raise AssertionError(msg) from e
     elif isinstance(left, pd.Index) and isinstance(right, pd.Index):
         try:
             assert_index_equal(left, right, check_exact=check_exact)
         except AssertionError as e:
             msg = (str(e) + "\n\nLeft:\n%s\n%s" % (left, left.dtype) +
                    "\n\nRight:\n%s\n%s" % (right, right.dtype))
             raise AssertionError(msg) from e
     else:
         raise ValueError("Unexpected values: (%s, %s)" % (left, right))
Пример #22
0
def test_empty_df():
    df = pd.DataFrame(index=["a", "b"])
    df_is_empty(df)
    assert_index_equal(df.index, pd.Index(["a", "b"]))
    assert len(df.columns) == 0

    df = pd.DataFrame(columns=["a", "b"])
    df_is_empty(df)
    assert len(df.index) == 0
    assert_index_equal(df.columns, pd.Index(["a", "b"]))

    df = pd.DataFrame()
    df_is_empty(df)
    assert len(df.index) == 0
    assert len(df.columns) == 0

    df = pd.DataFrame(index=["a", "b"])
    df_is_empty(df)
    assert_index_equal(df.index, pd.Index(["a", "b"]))
    assert len(df.columns) == 0

    df = pd.DataFrame(columns=["a", "b"])
    df_is_empty(df)
    assert len(df.index) == 0
    assert_index_equal(df.columns, pd.Index(["a", "b"]))

    df = pd.DataFrame()
    df_is_empty(df)
    assert len(df.index) == 0
    assert len(df.columns) == 0

    df = pd.DataFrame()
    pd_df = pandas.DataFrame()
    df["a"] = [1, 2, 3, 4, 5]
    pd_df["a"] = [1, 2, 3, 4, 5]
    df_equals(df, pd_df)

    df = pd.DataFrame()
    pd_df = pandas.DataFrame()
    df["a"] = list("ABCDEF")
    pd_df["a"] = list("ABCDEF")
    df_equals(df, pd_df)

    df = pd.DataFrame()
    pd_df = pandas.DataFrame()
    df["a"] = pd.Series([1, 2, 3, 4, 5])
    pd_df["a"] = pandas.Series([1, 2, 3, 4, 5])
    df_equals(df, pd_df)
def test_getitem():
    id_ = np.identity(10)
    sf = SparseFrame(id_, columns=list('abcdefghij'))

    assert sf['a'].data.todense()[0] == 1
    assert sf['j'].data.todense()[9] == 1
    assert np.all(sf[['a', 'b']].data.todense() == np.asmatrix(id_[:, [0, 1]]))
    tmp = sf[['j', 'a']].data.todense()
    assert tmp[9, 0] == 1
    assert tmp[0, 1] == 1
    assert (sf[list('abcdefghij')].data.todense() == np.identity(10)).all()
    assert sf[[]].shape == (10, 0)
    assert len(sf[[]].columns) == 0
    assert isinstance(sf.columns, type(sf[[]].columns))
    with pytest.raises(ValueError):
        sf[None]

    idx = pd.Index(list('abc'))
    pdt.assert_index_equal(idx, sf[idx].columns)
    pdt.assert_index_equal(idx, sf[idx.to_series()].columns)
    pdt.assert_index_equal(idx, sf[idx.tolist()].columns)
    pdt.assert_index_equal(idx, sf[tuple(idx)].columns)
    pdt.assert_index_equal(idx, sf[idx.values].columns)
Пример #24
0
def test_computation_input_custom_index(index):
    graph = {
        'OneHot': [OneHotEncoder],
        'Random Forest': [RandomForestClassifier, 'OneHot.x'],
        'Elastic Net': [ElasticNetClassifier, 'OneHot.x'],
        'Logistic Regression':
        [LogisticRegressionClassifier, 'Random Forest', 'Elastic Net']
    }

    X = pd.DataFrame(
        {
            "categories": [f"cat_{i}" for i in range(5)],
            "numbers": np.arange(5)
        },
        index=index)
    y = pd.Series([1, 2, 1, 2, 1])
    component_graph = ComponentGraph(graph)
    component_graph.instantiate({})
    component_graph.fit(X, y)

    X_t = component_graph.predict(X).to_series()
    assert_index_equal(X_t.index, pd.RangeIndex(start=0, stop=5, step=1))
    assert not X_t.isna().any(axis=None)
Пример #25
0
    def test_get_x_orientation(self, stack, params_getx):
        x, y, expected = params_getx

        chains = complex_chain(stack,
                               x,
                               y,
                               self._VIEWS,
                               self._VIEW_KEYS,
                               'x',
                               incl_tests=False,
                               incl_sum=False)

        for chain, args in zip(chains, expected):

            values, index, columns, pindex, pcolumns, chain_str = args

            expected_dataframe = frame(values, multi_index(index),
                                       multi_index(columns))
            painted_index = multi_index(pindex)
            painted_columns = multi_index(pcolumns)

            ### Test Chain.dataframe is Chain._frame
            assert chain.dataframe is chain._frame

            ### Test Chain attributes
            assert chain.orientation is 'x'

            ### Test Chain.get
            assert_frame_equal(chain.dataframe, expected_dataframe)

            ### Test Chain.paint
            chain.paint()
            assert_index_equal(chain.dataframe.index, painted_index)
            assert_index_equal(chain.dataframe.columns, painted_columns)

            ### Test Chain.toggle_labels
            chain.toggle_labels()
            assert_frame_equal(chain.dataframe, expected_dataframe)
            chain.toggle_labels()
            assert_index_equal(chain.dataframe.index, painted_index)
            assert_index_equal(chain.dataframe.columns, painted_columns)

            ### Test Chain str/ len
            assert str(chain) == chain_str

            ### Test Contents
            assert chain.contents == parameters.CONTENTS
Пример #26
0
    def test_dict_of_series(self):

        data = {
            "A": pd.Series(range(3)),
            "B": pd.Series(np.arange(3.0)),
            "geometry": GeoSeries([Point(x, x) for x in range(3)]),
        }

        df = GeoDataFrame(data)
        check_geodataframe(df)

        df = GeoDataFrame(data, index=pd.Index([1, 2]))
        check_geodataframe(df)
        assert_index_equal(df.index, pd.Index([1, 2]))
        assert df["A"].tolist() == [1, 2]

        # one non-series -> length is not correct
        data = {
            "A": pd.Series(range(3)),
            "B": np.arange(3.0),
            "geometry": GeoSeries([Point(x, x) for x in range(3)]),
        }
        with pytest.raises(ValueError):
            GeoDataFrame(data, index=[1, 2])
Пример #27
0
    def test_market_breaks(self):

        calendar = get_calendar("XTKS")

        sessions = calendar.sessions_in_range(
            pd.Timestamp("2021-06-14", tz="utc"),
            pd.Timestamp("2021-06-15", tz="utc"))

        trading_o_and_c = calendar.schedule.loc[sessions]
        opens = trading_o_and_c['market_open']
        closes = trading_o_and_c['market_close']
        break_starts = trading_o_and_c['break_start']
        break_ends = trading_o_and_c['break_end']

        clock = MinuteSimulationClock(
            sessions, opens, closes,
            days_at_time(sessions, time(8, 45), "Japan"), break_starts,
            break_ends, False)

        all_events = list(clock)
        all_events = pd.DataFrame(all_events,
                                  columns=["date", "event"]).set_index("date")
        bar_events = all_events[all_events.event == BAR]

        # XTKS is open 9am - 3pm with a 1 hour lunch break from 11:30am - 12:30pm
        # 2 days x 300 minutes per day
        self.assertEqual(len(bar_events), 600)

        assert_index_equal(
            bar_events.tz_convert("Japan").iloc[148:152].index,
            pd.DatetimeIndex([
                '2021-06-14 11:29:00', '2021-06-14 11:30:00',
                '2021-06-14 12:31:00', '2021-06-14 12:32:00'
            ],
                             tz="Japan",
                             name="date"))
Пример #28
0
    def test_stroke_gdf(self):
        coins = mm.COINS(self.gdf)
        result = coins.stroke_gdf()

        assert result.shape == (10, 2)

        expected_index = pd.Index(list(range(10)), name="stroke_group")
        assert_index_equal(result.index, expected_index)

        expected_segments = pd.Series(
            [8, 19, 17, 13, 5, 14, 2, 3, 3, 5],
            name="n_segments",
            index=expected_index,
        )
        assert_series_equal(result["n_segments"], expected_segments)

        assert result.length.sum() == pytest.approx(self.gdf.length.sum())

        expected = pd.Series(
            [
                839.5666838320316,
                759.0900425060918,
                744.7579337248078,
                1019.7095084794428,
                562.2466914415573,
                1077.3606756995746,
                193.04063727323836,
                187.49184699173748,
                182.6849740039611,
                382.50195042922803,
            ],
            index=expected_index,
        )
        assert_series_equal(
            result.length, expected, check_less_precise=6, check_exact=False
        )
Пример #29
0
def test_index(tmp_path):
    # set up dataset
    df = geopandas.read_file(
        geopandas.datasets.get_path("naturalearth_lowres"))
    # get meaningful index by shuffling (hilbert distance)
    df = dask_geopandas.from_geopandas(
        df, npartitions=2).spatial_shuffle().compute()
    ddf = dask_geopandas.from_geopandas(df, npartitions=4)

    # roundtrip preserves the index by default
    basedir = tmp_path / "dataset"
    ddf.to_feather(basedir)
    result = dask_geopandas.read_feather(basedir)
    assert "hilbert_distance" not in result.columns
    assert result.index.name == "hilbert_distance"
    assert_index_equal(result.index.compute(), df.index)

    # TODO not setting the index
    with pytest.raises(NotImplementedError):
        result = dask_geopandas.read_feather(basedir, index=False)
    # assert "hilbert_distance" in result.columns
    # assert result.index.name is None

    # setting specific columns as the index
    result = dask_geopandas.read_feather(basedir, index="iso_a3")
    assert "iso_a3" not in result.columns
    assert result.index.name == "iso_a3"
    assert_geodataframe_equal(result.compute(), df.set_index("iso_a3"))

    # not writing the index
    basedir = tmp_path / "dataset"
    ddf.to_feather(basedir, write_index=False)
    result = dask_geopandas.read_feather(basedir)
    assert "hilbert_distance" not in result.columns
    assert result.index.name is None
    assert result.index.compute()[0] == 0
Пример #30
0
    def test_groupby_dataframe_mad(self):
        pd_flights = self.pd_flights().filter(self.filter_data + ["DestCountry"])
        ed_flights = self.ed_flights().filter(self.filter_data + ["DestCountry"])

        pd_mad = pd_flights.groupby("DestCountry").mad()
        ed_mad = ed_flights.groupby("DestCountry").mad()

        assert_index_equal(pd_mad.columns, ed_mad.columns)
        assert_index_equal(pd_mad.index, ed_mad.index)
        assert_series_equal(pd_mad.dtypes, ed_mad.dtypes)

        pd_min_mad = pd_flights.groupby("DestCountry").aggregate(["min", "mad"])
        ed_min_mad = ed_flights.groupby("DestCountry").aggregate(["min", "mad"])

        assert_index_equal(pd_min_mad.columns, ed_min_mad.columns)
        assert_index_equal(pd_min_mad.index, ed_min_mad.index)
        assert_series_equal(pd_min_mad.dtypes, ed_min_mad.dtypes)
def test_all_full_day_holidays_since_1928(request):
    """
    Perform a full comparison of all known full day NYSE holidays since 1928/01/01 and
    make sure that it matches.
    """
    # get the expected dates from the csv file
    expected = pd.read_csv(os.path.join(request.fspath.dirname, 'data', 'nyse_all_full_day_holidays_since_1928.csv'),
                           index_col=0, parse_dates=True, header=None).index
    expected.name = None

    # calculated expected going direct to the underlying regular and ad_hoc calendars
    nyse = xnys_cal
    actual = pd.DatetimeIndex(nyse.adhoc_holidays).tz_convert(None).sort_values()
    slice_locs = actual.slice_locs(expected[0], expected[-1])
    actual = actual[slice_locs[0]:slice_locs[1]]
    actual = actual.append(nyse.regular_holidays.holidays(expected[0], expected[-1]))
    actual = actual.sort_values().unique()
    assert_index_equal(expected, actual)

    # using the holidays method
    actual = pd.DatetimeIndex(nyse.holidays().holidays).unique()
    slice_locs = actual.slice_locs(expected[0], expected[-1])
    actual = actual[slice_locs[0]:slice_locs[1]]
    assert_index_equal(expected, actual)
def test_date_range_lower_freq():
    cal = mcal.get_calendar("NYSE")
    schedule = cal.schedule(pd.Timestamp('2017-09-05 20:00', tz='UTC'),
                            pd.Timestamp('2017-10-23 20:00', tz='UTC'))

    # cannot get date range of frequency lower than 1D
    with pytest.raises(ValueError):
        mcal.date_range(schedule, frequency='3D')

    # instead get for 1D and convert to lower frequency
    short = mcal.date_range(schedule, frequency='1D')
    actual = mcal.convert_freq(short, '3D')
    expected = pd.date_range('2017-09-05 20:00',
                             '2017-10-23 20:00',
                             freq='3D',
                             tz='UTC')
    assert_index_equal(actual, expected)

    actual = mcal.convert_freq(short, '1W')
    expected = pd.date_range('2017-09-05 20:00',
                             '2017-10-23 20:00',
                             freq='1W',
                             tz='UTC')
    assert_index_equal(actual, expected)
Пример #33
0
 def _مکمل(سے, تک, اعداد):
     pdt.assert_index_equal(اعداد.روزانہ().dropna(how='all').index, pd.period_range(سے, تک, freq='D'))
Пример #34
0
def assert_geodataframe_equal(left, right,
                              check_dtype=True,
                              check_index_type='equiv',
                              check_column_type='equiv',
                              check_frame_type=True,
                              check_like=False,
                              check_less_precise=False,
                              check_geom_type=False,
                              check_crs=True):
    """
    Check that two GeoDataFrames are equal/

    Parameters
    ----------
    left, right : two GeoDataFrames
    check_dtype : bool, default True
        Whether to check the DataFrame dtype is identical.
    check_index_type, check_column_type : bool, default 'equiv'
        Check that index types are equal.
    check_frame_type : bool, default True
        Check that both are same type (*and* are GeoDataFrames). If False,
        will attempt to convert both into GeoDataFrame.
    check_like : bool, default False
        If true, ignore the order of rows & columns
    check_less_precise : bool, default False
        If True, use geom_almost_equals. if False, use geom_equals.
    check_geom_type : bool, default False
        If True, check that all the geom types are equal.
    check_crs: bool, default True
        If `check_frame_type` is True, then also check that the
        crs matches.
    """
    try:
        # added from pandas 0.20
        from pandas.testing import assert_frame_equal, assert_index_equal
    except ImportError:
        from pandas.util.testing import assert_frame_equal, assert_index_equal

    # instance validation
    if check_frame_type:
        assert isinstance(left, GeoDataFrame)
        assert isinstance(left, type(right))

        if check_crs:
            # no crs can be either None or {}
            if not left.crs and not right.crs:
                pass
            else:
                assert left.crs == right.crs
    else:
        if not isinstance(left, GeoDataFrame):
            left = GeoDataFrame(left)
        if not isinstance(right, GeoDataFrame):
            right = GeoDataFrame(right)

    # shape comparison
    assert left.shape == right.shape, (
        'GeoDataFrame shape mismatch, left: {lshape!r}, right: {rshape!r}.\n'
        'Left columns: {lcols!r}, right columns: {rcols!r}'.format(
            lshape=left.shape, rshape=right.shape,
            lcols=left.columns, rcols=right.columns))

    if check_like:
        left, right = left.reindex_like(right), right

    # column comparison
    assert_index_equal(left.columns, right.columns, exact=check_column_type,
                       obj='GeoDataFrame.columns')

    # geometry comparison
    assert_geoseries_equal(
        left.geometry, right.geometry, check_dtype=check_dtype,
        check_less_precise=check_less_precise,
        check_geom_type=check_geom_type, check_crs=False)

    # drop geometries and check remaining columns
    left2 = left.drop([left._geometry_column_name], axis=1)
    right2 = right.drop([right._geometry_column_name], axis=1)
    assert_frame_equal(left2, right2, check_dtype=check_dtype,
                       check_index_type=check_index_type,
                       check_column_type=check_column_type,
                       obj='GeoDataFrame')