示例#1
0
    def test_es_if_exists_append_es_type_coerce_error(self):
        df1 = pandas_to_eland(
            pd_df,
            es_client=ES_TEST_CLIENT,
            es_dest_index="test-index",
            es_if_exists="append",
            es_refresh=True,
            es_type_overrides={"a": "byte"},
        )
        assert_pandas_eland_frame_equal(pd_df, df1)

        pd_df_short = pd.DataFrame(
            {
                "a": [128],  # This value is too large for 'byte'
                "b": [-1.0],
                "c": ["A"],
                "d": [dt],
            },
            index=["3"],
        )

        with pytest.raises(BulkIndexError) as e:
            pandas_to_eland(
                pd_df_short,
                es_client=ES_TEST_CLIENT,
                es_dest_index="test-index",
                es_if_exists="append",
            )

        # Assert that the value 128 caused the index error
        assert "Value [128] is out of range for a byte" in str(e.value)
示例#2
0
    def test_es_if_exists_replace(self):
        # Assert that 'replace' allows for creation
        df1 = pandas_to_eland(
            pd_df2,
            es_client=ES_TEST_CLIENT,
            es_dest_index="test-index",
            es_if_exists="replace",
            es_refresh=True,
        ).to_pandas()
        assert_frame_equal(pd_df2, df1)

        # Assert that 'replace' will replace existing mapping and entries
        df2 = pandas_to_eland(
            pd_df,
            es_client=ES_TEST_CLIENT,
            es_dest_index="test-index",
            es_if_exists="replace",
            es_refresh=True,
        )
        assert_pandas_eland_frame_equal(pd_df, df2)

        df3 = pandas_to_eland(
            pd_df2,
            es_client=ES_TEST_CLIENT,
            es_dest_index="test-index",
            es_if_exists="replace",
            es_refresh=True,
        ).to_pandas()
        assert_frame_equal(df1, df3)
示例#3
0
    def test_head_0(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()

        ed_head_0 = ed_flights.head(0)
        pd_head_0 = pd_flights.head(0)
        assert_pandas_eland_frame_equal(pd_head_0, ed_head_0)
示例#4
0
    def test_es_if_exists_append_mapping_mismatch(self):
        df1 = pandas_to_eland(
            pd_df,
            es_client=ES_TEST_CLIENT,
            es_dest_index="test-index",
            es_if_exists="append",
            es_refresh=True,
        )

        with pytest.raises(ValueError) as e:
            pandas_to_eland(
                pd_df2,
                es_client=ES_TEST_CLIENT,
                es_dest_index="test-index",
                es_if_exists="append",
            )

        assert str(e.value) == (
            "DataFrame dtypes and Elasticsearch index mapping aren't compatible:\n"
            "- 'b' is missing from DataFrame columns\n"
            "- 'c' is missing from DataFrame columns\n"
            "- 'd' is missing from DataFrame columns\n"
            "- 'Z' is missing from ES index mapping\n"
            "- 'a' column type ('keyword') not compatible with ES index mapping type ('long')"
        )
        # Assert that the index isn't modified
        assert_pandas_eland_frame_equal(pd_df, df1)
示例#5
0
    def test_notna(self):
        ed_ecommerce = self.ed_ecommerce()
        pd_ecommerce = eland_to_pandas(ed_ecommerce)

        for column in self.columns:
            notna_ed_ecommerce = ed_ecommerce[ed_ecommerce[column].notna()]
            notna_pd_ecommerce = pd_ecommerce[pd_ecommerce[column].notna()]
            assert_pandas_eland_frame_equal(notna_pd_ecommerce,
                                            notna_ed_ecommerce)
示例#6
0
    def test_isna(self):
        ed_ecommerce = self.ed_ecommerce()
        pd_ecommerce = eland_to_pandas(ed_ecommerce)

        isna_ed_ecommerce = ed_ecommerce[
            ed_ecommerce["geoip.region_name"].isna()]
        isna_pd_ecommerce = pd_ecommerce[
            pd_ecommerce["geoip.region_name"].isna()]
        assert_pandas_eland_frame_equal(isna_pd_ecommerce, isna_ed_ecommerce)
示例#7
0
    def test_select_dtypes_exclude_number(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()

        ed_flights_non_numeric = ed_flights.select_dtypes(exclude=[np.number])
        pd_flights_non_numeric = pd_flights.select_dtypes(exclude=[np.number])

        assert_pandas_eland_frame_equal(
            pd_flights_non_numeric.head(103), ed_flights_non_numeric.head(103)
        )
示例#8
0
    def test_getitem_query(self):
        # Examples from:
        # https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.query.html
        pd_df = pd.DataFrame(
            {
                "A": range(1, 6),
                "B": range(10, 0, -2),
                "C": range(10, 5, -1)
            },
            index=["0", "1", "2", "3", "4"],
        )
        """
        >>> pd_df
           A   B   C
        0  1  10  10
        1  2   8   9
        2  3   6   8
        3  4   4   7
        4  5   2   6
        """
        # Now create index
        index_name = "eland_test_query"

        ed_df = ed.pandas_to_eland(pd_df,
                                   ES_TEST_CLIENT,
                                   index_name,
                                   es_if_exists="replace",
                                   es_refresh=True)

        assert_pandas_eland_frame_equal(pd_df, ed_df)

        pd_df.info()
        ed_df.info()

        pd_q1 = pd_df[pd_df.A > 2]
        pd_q2 = pd_df[pd_df.A > pd_df.B]
        pd_q3 = pd_df[pd_df.B == pd_df.C]

        ed_q1 = ed_df[ed_df.A > 2]
        ed_q2 = ed_df[ed_df.A > ed_df.B]
        ed_q3 = ed_df[ed_df.B == ed_df.C]

        assert_pandas_eland_frame_equal(pd_q1, ed_q1)
        assert_pandas_eland_frame_equal(pd_q2, ed_q2)
        assert_pandas_eland_frame_equal(pd_q3, ed_q3)

        pd_q4 = pd_df[(pd_df.A > 2) & (pd_df.B > 3)]
        ed_q4 = ed_df[(ed_df.A > 2) & (ed_df.B > 3)]

        assert_pandas_eland_frame_equal(pd_q4, ed_q4)

        ES_TEST_CLIENT.indices.delete(index_name)
示例#9
0
    def test_datetime_to_ms(self):
        df = pd.DataFrame(
            data={
                "A": np.random.rand(3),
                "B": 1,
                "C": "foo",
                "D": pd.Timestamp("20190102"),
                "E": [1.0, 2.0, 3.0],
                "F": False,
                "G": [1, 2, 3],
            },
            index=["0", "1", "2"],
        )

        expected_mappings = {
            "mappings": {
                "properties": {
                    "A": {"type": "double"},
                    "B": {"type": "long"},
                    "C": {"type": "keyword"},
                    "D": {"type": "date"},
                    "E": {"type": "double"},
                    "F": {"type": "boolean"},
                    "G": {"type": "long"},
                }
            }
        }

        mappings = FieldMappings._generate_es_mappings(df)

        assert expected_mappings == mappings

        # Now create index
        index_name = "eland_test_generate_es_mappings"

        ed_df = ed.pandas_to_eland(
            df, ES_TEST_CLIENT, index_name, es_if_exists="replace", es_refresh=True
        )

        # print(df.to_string())
        # print(ed_df.to_string())
        # print(ed_df.dtypes)
        # print(ed_df.to_pandas().dtypes)

        assert_series_equal(df.dtypes, ed_df.dtypes)

        assert_pandas_eland_frame_equal(df, ed_df)
示例#10
0
    def test_es_if_exists_append(self):
        df1 = pandas_to_eland(
            pd_df,
            es_client=ES_TEST_CLIENT,
            es_dest_index="test-index",
            es_if_exists="append",
            es_refresh=True,
            # We use 'short' here specifically so that the
            # assumed type of 'long' is coerced into a 'short'
            # by append mode.
            es_type_overrides={"a": "short"},
        )
        assert_pandas_eland_frame_equal(pd_df, df1)
        assert df1.shape == (3, 4)

        pd_df2 = pd.DataFrame(
            {
                "a": [4, 5, 6],
                "b": [-1.0, -2.0, -3.0],
                "c": ["A", "B", "C"],
                "d": [dt, dt - timedelta(1), dt - timedelta(2)],
            },
            index=["3", "4", "5"],
        )
        df2 = pandas_to_eland(
            pd_df2,
            es_client=ES_TEST_CLIENT,
            es_dest_index="test-index",
            es_if_exists="append",
            es_refresh=True,
        )

        # Assert that the second pandas dataframe is actually appended
        assert df2.shape == (6, 4)
        pd_df3 = pd_df.append(pd_df2)
        assert_pandas_eland_frame_equal(pd_df3, df2)
示例#11
0
    def test_tail_head(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()

        ed_tail_10 = ed_flights.tail(10)
        pd_tail_10 = pd_flights.tail(10)
        assert_pandas_eland_frame_equal(pd_tail_10, ed_tail_10)

        ed_head_8 = ed_tail_10.head(8)
        pd_head_8 = pd_tail_10.head(8)
        assert_pandas_eland_frame_equal(pd_head_8, ed_head_8)

        ed_tail_5 = ed_head_8.tail(5)
        pd_tail_5 = pd_head_8.tail(5)
        assert_pandas_eland_frame_equal(pd_tail_5, ed_tail_5)

        ed_head_4 = ed_tail_5.head(4)
        pd_head_4 = pd_tail_5.head(4)
        assert_pandas_eland_frame_equal(pd_head_4, ed_head_4)
示例#12
0
    def test_tail(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()

        ed_tail_10 = ed_flights.tail(10)
        pd_tail_10 = pd_flights.tail(10)
        assert_pandas_eland_frame_equal(pd_tail_10, ed_tail_10)

        ed_tail_8 = ed_tail_10.tail(8)
        pd_tail_8 = pd_tail_10.tail(8)
        assert_pandas_eland_frame_equal(pd_tail_8, ed_tail_8)

        ed_tail_20 = ed_tail_10.tail(20)
        pd_tail_20 = pd_tail_10.tail(20)
        assert_pandas_eland_frame_equal(pd_tail_20, ed_tail_20)
示例#13
0
    def test_head(self):
        ed_flights = self.ed_flights()
        pd_flights = self.pd_flights()

        ed_head_10 = ed_flights.head(10)
        pd_head_10 = pd_flights.head(10)
        assert_pandas_eland_frame_equal(pd_head_10, ed_head_10)

        ed_head_8 = ed_head_10.head(8)
        pd_head_8 = pd_head_10.head(8)
        assert_pandas_eland_frame_equal(pd_head_8, ed_head_8)

        ed_head_20 = ed_head_10.head(20)
        pd_head_20 = pd_head_10.head(20)
        assert_pandas_eland_frame_equal(pd_head_20, ed_head_20)