def test_str_add_ser(self): edadd = "The last name is: " + self.ed_ecommerce( )["customer_last_name"] pdadd = "The last name is: " + self.pd_ecommerce( )["customer_last_name"] assert_pandas_eland_series_equal(pdadd, edadd)
def test_bad_str_add_ser(self): # TODO encode special characters better # Elasticsearch accepts this, but it will cause problems edadd = " *" + self.ed_ecommerce()["customer_last_name"] pdadd = " *" + self.pd_ecommerce()["customer_last_name"] assert_pandas_eland_series_equal(pdadd, edadd)
def test_ser_add_str_add_ser(self): pdadd = (self.pd_ecommerce()["customer_first_name"] + " " + self.pd_ecommerce()["customer_last_name"]) edadd = (self.ed_ecommerce()["customer_first_name"] + " " + self.ed_ecommerce()["customer_last_name"]) assert_pandas_eland_series_equal(pdadd, edadd)
def test_ser_add_str(self): edadd = self.ed_ecommerce( )["customer_first_name"] + " is the first name." pdadd = self.pd_ecommerce( )["customer_first_name"] + " is the first name." assert_pandas_eland_series_equal(pdadd, edadd)
def test_flights_filter_index_items(self, items): ed_flights_small = self.ed_flights_small()["FlightDelayType"] pd_flights_small = self.pd_flights_small()["FlightDelayType"] ed_ser = ed_flights_small.filter(items=items, axis=0) pd_ser = pd_flights_small.filter(items=items, axis=0) assert_pandas_eland_series_equal(pd_ser, ed_ser)
def test_ecommerce_series_simple_series_addition(self): pd_df = self.pd_ecommerce().head(100) ed_df = self.ed_ecommerce().head(100) pd_series = pd_df["taxful_total_price"] + pd_df["total_quantity"] ed_series = ed_df["taxful_total_price"] + ed_df["total_quantity"] assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True)
def test_ecommerce_series_simple_integer_addition(self): pd_df = self.pd_ecommerce().head(100) ed_df = self.ed_ecommerce().head(100) pd_series = pd_df["taxful_total_price"] + 5 ed_series = ed_df["taxful_total_price"] + 5 assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True)
def test_head_tail(self): pd_s = self.pd_flights()["Carrier"] ed_s = ed.Series(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME, "Carrier") pd_s_head = pd_s.head(10) ed_s_head = ed_s.head(10) assert_pandas_eland_series_equal(pd_s_head, ed_s_head) pd_s_tail = pd_s.tail(10) ed_s_tail = ed_s.tail(10) assert_pandas_eland_series_equal(pd_s_tail, ed_s_tail)
def test_ecommerce_series_simple_arithmetics(self): pd_df = self.pd_ecommerce().head(100) ed_df = self.ed_ecommerce().head(100) pd_series = (pd_df["taxful_total_price"] + 5 + pd_df["total_quantity"] / pd_df["taxless_total_price"] - pd_df["total_unique_products"] * 10.0 + pd_df["total_quantity"]) ed_series = (ed_df["taxful_total_price"] + 5 + ed_df["total_quantity"] / ed_df["taxless_total_price"] - ed_df["total_unique_products"] * 10.0 + ed_df["total_quantity"]) assert_pandas_eland_series_equal(pd_series, ed_series, rtol=True)
def test_simple_lat_lon(self): """ Note on nested object order - this can change when note this could be a bug in ES... PUT my_index/doc/1 { "location": { "lat": "50.033333", "lon": "8.570556" } } GET my_index/_search "_source": { "location": { "lat": "50.033333", "lon": "8.570556" } } GET my_index/_search { "_source": "location" } "_source": { "location": { "lon": "8.570556", "lat": "50.033333" } } Hence we store the pandas df source json as 'lon', 'lat' """ pd_dest_location = self.pd_flights()["DestLocation"].head(1) ed_dest_location = self.ed_flights()["DestLocation"].head(1) assert_pandas_eland_series_equal(pd_dest_location, ed_dest_location, check_exact=False, rtol=2)
def test_all_formats(self): index_name = self.time_index_name ed_df = ed.DataFrame(ES_TEST_CLIENT, index_name) for format_name in self.time_formats.keys(): times = [ pd.to_datetime( datetime.strptime(dt, "%Y-%m-%dT%H:%M:%S.%f%z").strftime( self.time_formats[format_name] ), format=self.time_formats[format_name], ) for dt in self.times ] ed_series = ed_df[format_name] pd_series = pd.Series( times, index=[str(i) for i in range(len(self.times))], name=format_name ) assert_pandas_eland_series_equal(pd_series, ed_series)
def test_ecommerce_series_basic_arithmetics(self): pd_df = self.pd_ecommerce().head(100) ed_df = self.ed_ecommerce().head(100) ops = [ "__add__", "__truediv__", "__floordiv__", "__pow__", "__mod__", "__mul__", "__sub__", "add", "truediv", "floordiv", "pow", "mod", "mul", "sub", ] for op in ops: pd_series = getattr(pd_df["taxful_total_price"], op)(pd_df["total_quantity"]) ed_series = getattr(ed_df["taxful_total_price"], op)(ed_df["total_quantity"]) assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True) pd_series = getattr(pd_df["taxful_total_price"], op)(10.56) ed_series = getattr(ed_df["taxful_total_price"], op)(10.56) assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True) pd_series = getattr(pd_df["taxful_total_price"], op)(np.float32(1.879)) ed_series = getattr(ed_df["taxful_total_price"], op)(np.float32(1.879)) assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True) pd_series = getattr(pd_df["taxful_total_price"], op)(int(8)) ed_series = getattr(ed_df["taxful_total_price"], op)(int(8)) assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True)
def test_ecommerce_series_basic_rarithmetics(self): pd_df = self.pd_ecommerce().head(10) ed_df = self.ed_ecommerce().head(10) ops = [ "__radd__", "__rtruediv__", "__rfloordiv__", "__rpow__", "__rmod__", "__rmul__", "__rsub__", "radd", "rtruediv", "rfloordiv", "rpow", "rmod", "rmul", "rsub", ] for op in ops: pd_series = getattr(pd_df["taxful_total_price"], op)(pd_df["total_quantity"]) ed_series = getattr(ed_df["taxful_total_price"], op)(ed_df["total_quantity"]) assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True) pd_series = getattr(pd_df["taxful_total_price"], op)(3.141) ed_series = getattr(ed_df["taxful_total_price"], op)(3.141) assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True) pd_series = getattr(pd_df["taxful_total_price"], op)(np.float32(2.879)) ed_series = getattr(ed_df["taxful_total_price"], op)(np.float32(2.879)) assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True) pd_series = getattr(pd_df["taxful_total_price"], op)(int(6)) ed_series = getattr(ed_df["taxful_total_price"], op)(int(6)) assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True)
def test_name(self): # deep copy pandas DataFrame as .name alters this reference frame pd_series = self.pd_flights()["Carrier"].copy(deep=True) ed_series = ed.Series(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME, "Carrier") assert_pandas_eland_series_equal(pd_series, ed_series) assert ed_series.name == pd_series.name pd_series.name = "renamed1" ed_series.name = "renamed1" assert_pandas_eland_series_equal(pd_series, ed_series) assert ed_series.name == pd_series.name pd_series.name = "renamed2" ed_series.name = "renamed2" assert_pandas_eland_series_equal(pd_series, ed_series) assert ed_series.name == pd_series.name
def test_rename(self): pd_carrier = self.pd_flights()["Carrier"] ed_carrier = ed.Series(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME, "Carrier") assert_pandas_eland_series_equal(pd_carrier, ed_carrier) pd_renamed = pd_carrier.rename("renamed") ed_renamed = ed_carrier.rename("renamed") print(pd_renamed) print(ed_renamed) print(ed_renamed.es_info()) assert_pandas_eland_series_equal(pd_renamed, ed_renamed) pd_renamed2 = pd_renamed.rename("renamed2") ed_renamed2 = ed_renamed.rename("renamed2") print(ed_renamed2.es_info()) assert "renamed2" == ed_renamed2.name assert_pandas_eland_series_equal(pd_renamed2, ed_renamed2)
def test_sample(self): ed_s = ed.Series(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME, "Carrier") pd_s = self.build_from_index(ed_s.sample(n=10, random_state=self.SEED)) ed_s_sample = ed_s.sample(n=10, random_state=self.SEED) assert_pandas_eland_series_equal(pd_s, ed_s_sample)
def test_supported_series_dtypes_rops(self): pd_df = self.pd_ecommerce().head(100) ed_df = self.ed_ecommerce().head(100) # Test some specific operations that are and aren't supported numeric_ops = [ "__radd__", "__rtruediv__", "__rfloordiv__", "__rpow__", "__rmod__", "__rmul__", "__rsub__", ] non_string_numeric_ops = [ "__radd__", "__rtruediv__", "__rfloordiv__", "__rpow__", "__rmod__", "__rsub__", ] # __rmul__ is supported for int * str in pandas # float op float for op in numeric_ops: pd_series = getattr(pd_df["taxful_total_price"], op)(pd_df["taxless_total_price"]) ed_series = getattr(ed_df["taxful_total_price"], op)(ed_df["taxless_total_price"]) assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True) # int op float for op in numeric_ops: pd_series = getattr(pd_df["total_quantity"], op)(pd_df["taxless_total_price"]) ed_series = getattr(ed_df["total_quantity"], op)(ed_df["taxless_total_price"]) assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True) # float op int for op in numeric_ops: pd_series = getattr(pd_df["taxful_total_price"], op)(pd_df["total_quantity"]) ed_series = getattr(ed_df["taxful_total_price"], op)(ed_df["total_quantity"]) assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True) # str op int (throws) for op in non_string_numeric_ops: with pytest.raises(TypeError): pd_series = getattr(pd_df["currency"], op)(pd_df["total_quantity"]) with pytest.raises(TypeError): ed_series = getattr(ed_df["currency"], op)(ed_df["total_quantity"]) with pytest.raises(TypeError): pd_series = getattr(pd_df["currency"], op)(10.0) with pytest.raises(TypeError): ed_series = getattr(ed_df["currency"], op)(10.0) # int op str (throws) for op in non_string_numeric_ops: with pytest.raises(TypeError): pd_series = getattr(pd_df["total_quantity"], op)(pd_df["currency"]) with pytest.raises(TypeError): ed_series = getattr(ed_df["total_quantity"], op)(ed_df["currency"])