def test_str_add_ser(self): edadd = "The last name is: " + self.ed_ecommerce( )["customer_last_name"] pdadd = "The last name is: " + self.pd_ecommerce( )["customer_last_name"] assert_pandas_eland_series_equal(pdadd, edadd)
def test_ser_add_str(self): edadd = self.ed_ecommerce( )["customer_first_name"] + " is the first name." pdadd = self.pd_ecommerce( )["customer_first_name"] + " is the first name." assert_pandas_eland_series_equal(pdadd, edadd)
def test_bad_str_add_ser(self): # TODO encode special characters better # Elasticsearch accepts this, but it will cause problems edadd = " *" + self.ed_ecommerce()["customer_last_name"] pdadd = " *" + self.pd_ecommerce()["customer_last_name"] assert_pandas_eland_series_equal(pdadd, edadd)
def test_ser_add_str_add_ser(self): pdadd = (self.pd_ecommerce()["customer_first_name"] + " " + self.pd_ecommerce()["customer_last_name"]) edadd = (self.ed_ecommerce()["customer_first_name"] + " " + self.ed_ecommerce()["customer_last_name"]) assert_pandas_eland_series_equal(pdadd, edadd)
def test_flights_filter_index_items(self, items): ed_flights_small = self.ed_flights_small()["FlightDelayType"] pd_flights_small = self.pd_flights_small()["FlightDelayType"] ed_ser = ed_flights_small.filter(items=items, axis=0) pd_ser = pd_flights_small.filter(items=items, axis=0) assert_pandas_eland_series_equal(pd_ser, ed_ser)
def test_ecommerce_series_simple_series_addition(self): pd_df = self.pd_ecommerce().head(100) ed_df = self.ed_ecommerce().head(100) pd_series = pd_df["taxful_total_price"] + pd_df["total_quantity"] ed_series = ed_df["taxful_total_price"] + ed_df["total_quantity"] assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True)
def test_getitem_one_argument(self): ed_flights = self.ed_flights().head(89) pd_flights = self.pd_flights().head(89) ed_flights_OriginAirportID = ed_flights.OriginAirportID pd_flights_OriginAirportID = pd_flights.OriginAirportID assert_pandas_eland_series_equal(pd_flights_OriginAirportID, ed_flights_OriginAirportID)
def test_getitem_one_attribute(self): ed_flights = self.ed_flights().head(103) pd_flights = self.pd_flights().head(103) ed_flights_OriginAirportID = ed_flights["OriginAirportID"] pd_flights_OriginAirportID = pd_flights["OriginAirportID"] assert_pandas_eland_series_equal(pd_flights_OriginAirportID, ed_flights_OriginAirportID)
def test_head_tail(self): pd_s = self.pd_flights()["Carrier"] ed_s = ed.Series(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME, "Carrier") pd_s_head = pd_s.head(10) ed_s_head = ed_s.head(10) assert_pandas_eland_series_equal(pd_s_head, ed_s_head) pd_s_tail = pd_s.tail(10) ed_s_tail = ed_s.tail(10) assert_pandas_eland_series_equal(pd_s_tail, ed_s_tail)
def test_ecommerce_series_simple_arithmetics(self): pd_df = self.pd_ecommerce().head(100) ed_df = self.ed_ecommerce().head(100) pd_series = (pd_df["taxful_total_price"] + 5 + pd_df["total_quantity"] / pd_df["taxless_total_price"] - pd_df["total_unique_products"] * 10.0 + pd_df["total_quantity"]) ed_series = (ed_df["taxful_total_price"] + 5 + ed_df["total_quantity"] / ed_df["taxless_total_price"] - ed_df["total_unique_products"] * 10.0 + ed_df["total_quantity"]) assert_pandas_eland_series_equal(pd_series, ed_series, check_less_precise=True)
def test_getitem_multiple_calls(self): ed_flights = self.ed_flights().head(89) pd_flights = self.pd_flights().head(89) ed_col0 = ed_flights[ ["DestCityName", "DestCountry", "DestLocation", "DestRegion"] ] try: ed_col1 = ed_col0["Carrier"] except KeyError: pass pd_col1 = pd_flights["DestCountry"] ed_col1 = ed_col0["DestCountry"] assert_pandas_eland_series_equal(pd_col1, ed_col1)
def test_all_formats(self): index_name = self.time_index_name ed_df = ed.DataFrame(ES_TEST_CLIENT, index_name) for format_name in self.time_formats.keys(): times = [ pd.to_datetime( datetime.strptime(dt, "%Y-%m-%dT%H:%M:%S.%f%z").strftime( self.time_formats[format_name]), format=self.time_formats[format_name], ) for dt in self.times ] ed_series = ed_df[format_name] pd_series = pd.Series( times, index=[str(i) for i in range(len(self.times))], name=format_name) assert_pandas_eland_series_equal(pd_series, ed_series)
def test_simple_lat_lon(self): """ Note on nested object order - this can change when note this could be a bug in ES... PUT my_index/doc/1 { "location": { "lat": "50.033333", "lon": "8.570556" } } GET my_index/_search "_source": { "location": { "lat": "50.033333", "lon": "8.570556" } } GET my_index/_search { "_source": "location" } "_source": { "location": { "lon": "8.570556", "lat": "50.033333" } } Hence we store the pandas df source json as 'lon', 'lat' """ pd_dest_location = self.pd_flights()["DestLocation"].head(1) ed_dest_location = self.ed_flights()["DestLocation"].head(1) assert_pandas_eland_series_equal( pd_dest_location, ed_dest_location, check_exact=False, rtol=2 )
def test_ecommerce_series_basic_rarithmetics(self): pd_df = self.pd_ecommerce().head(10) ed_df = self.ed_ecommerce().head(10) ops = [ "__radd__", "__rtruediv__", "__rfloordiv__", "__rpow__", "__rmod__", "__rmul__", "__rsub__", "radd", "rtruediv", "rfloordiv", "rpow", "rmod", "rmul", "rsub", ] for op in ops: pd_series = getattr(pd_df["taxful_total_price"], op)( pd_df["total_quantity"] ) ed_series = getattr(ed_df["taxful_total_price"], op)( ed_df["total_quantity"] ) assert_pandas_eland_series_equal( pd_series, ed_series, check_less_precise=True ) pd_series = getattr(pd_df["taxful_total_price"], op)(3.141) ed_series = getattr(ed_df["taxful_total_price"], op)(3.141) assert_pandas_eland_series_equal( pd_series, ed_series, check_less_precise=True ) pd_series = getattr(pd_df["taxful_total_price"], op)(np.float32(2.879)) ed_series = getattr(ed_df["taxful_total_price"], op)(np.float32(2.879)) assert_pandas_eland_series_equal( pd_series, ed_series, check_less_precise=True ) pd_series = getattr(pd_df["taxful_total_price"], op)(int(6)) ed_series = getattr(ed_df["taxful_total_price"], op)(int(6)) assert_pandas_eland_series_equal( pd_series, ed_series, check_less_precise=True )
def test_ecommerce_series_basic_arithmetics(self): pd_df = self.pd_ecommerce().head(100) ed_df = self.ed_ecommerce().head(100) ops = [ "__add__", "__truediv__", "__floordiv__", "__pow__", "__mod__", "__mul__", "__sub__", "add", "truediv", "floordiv", "pow", "mod", "mul", "sub", ] for op in ops: pd_series = getattr(pd_df["taxful_total_price"], op)( pd_df["total_quantity"] ) ed_series = getattr(ed_df["taxful_total_price"], op)( ed_df["total_quantity"] ) assert_pandas_eland_series_equal( pd_series, ed_series, check_less_precise=True ) pd_series = getattr(pd_df["taxful_total_price"], op)(10.56) ed_series = getattr(ed_df["taxful_total_price"], op)(10.56) assert_pandas_eland_series_equal( pd_series, ed_series, check_less_precise=True ) pd_series = getattr(pd_df["taxful_total_price"], op)(np.float32(1.879)) ed_series = getattr(ed_df["taxful_total_price"], op)(np.float32(1.879)) assert_pandas_eland_series_equal( pd_series, ed_series, check_less_precise=True ) pd_series = getattr(pd_df["taxful_total_price"], op)(int(8)) ed_series = getattr(ed_df["taxful_total_price"], op)(int(8)) assert_pandas_eland_series_equal( pd_series, ed_series, check_less_precise=True )
def test_name(self): # deep copy pandas DataFrame as .name alters this reference frame pd_series = self.pd_flights()["Carrier"].copy(deep=True) ed_series = ed.Series(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME, "Carrier") assert_pandas_eland_series_equal(pd_series, ed_series) assert ed_series.name == pd_series.name pd_series.name = "renamed1" ed_series.name = "renamed1" assert_pandas_eland_series_equal(pd_series, ed_series) assert ed_series.name == pd_series.name pd_series.name = "renamed2" ed_series.name = "renamed2" assert_pandas_eland_series_equal(pd_series, ed_series) assert ed_series.name == pd_series.name
def test_rename(self): pd_carrier = self.pd_flights()["Carrier"] ed_carrier = ed.Series(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME, "Carrier") assert_pandas_eland_series_equal(pd_carrier, ed_carrier) pd_renamed = pd_carrier.rename("renamed") ed_renamed = ed_carrier.rename("renamed") print(pd_renamed) print(ed_renamed) print(ed_renamed.info_es()) assert_pandas_eland_series_equal(pd_renamed, ed_renamed) pd_renamed2 = pd_renamed.rename("renamed2") ed_renamed2 = ed_renamed.rename("renamed2") print(ed_renamed2.info_es()) assert "renamed2" == ed_renamed2.name assert_pandas_eland_series_equal(pd_renamed2, ed_renamed2)
def test_supported_series_dtypes_rops(self): pd_df = self.pd_ecommerce().head(100) ed_df = self.ed_ecommerce().head(100) # Test some specific operations that are and aren't supported numeric_ops = [ "__radd__", "__rtruediv__", "__rfloordiv__", "__rpow__", "__rmod__", "__rmul__", "__rsub__", ] non_string_numeric_ops = [ "__radd__", "__rtruediv__", "__rfloordiv__", "__rpow__", "__rmod__", "__rsub__", ] # __rmul__ is supported for int * str in pandas # float op float for op in numeric_ops: pd_series = getattr(pd_df["taxful_total_price"], op)( pd_df["taxless_total_price"] ) ed_series = getattr(ed_df["taxful_total_price"], op)( ed_df["taxless_total_price"] ) assert_pandas_eland_series_equal( pd_series, ed_series, check_less_precise=True ) # int op float for op in numeric_ops: pd_series = getattr(pd_df["total_quantity"], op)( pd_df["taxless_total_price"] ) ed_series = getattr(ed_df["total_quantity"], op)( ed_df["taxless_total_price"] ) assert_pandas_eland_series_equal( pd_series, ed_series, check_less_precise=True ) # float op int for op in numeric_ops: pd_series = getattr(pd_df["taxful_total_price"], op)( pd_df["total_quantity"] ) ed_series = getattr(ed_df["taxful_total_price"], op)( ed_df["total_quantity"] ) assert_pandas_eland_series_equal( pd_series, ed_series, check_less_precise=True ) # str op int (throws) for op in non_string_numeric_ops: with pytest.raises(TypeError): pd_series = getattr(pd_df["currency"], op)(pd_df["total_quantity"]) with pytest.raises(TypeError): ed_series = getattr(ed_df["currency"], op)(ed_df["total_quantity"]) with pytest.raises(TypeError): pd_series = getattr(pd_df["currency"], op)(10.0) with pytest.raises(TypeError): ed_series = getattr(ed_df["currency"], op)(10.0) # int op str (throws) for op in non_string_numeric_ops: with pytest.raises(TypeError): pd_series = getattr(pd_df["total_quantity"], op)(pd_df["currency"]) with pytest.raises(TypeError): ed_series = getattr(ed_df["total_quantity"], op)(ed_df["currency"])
def test_sample(self): ed_s = ed.Series(ES_TEST_CLIENT, FLIGHTS_INDEX_NAME, "Carrier") pd_s = self.build_from_index(ed_s.sample(n=10, random_state=self.SEED)) ed_s_sample = ed_s.sample(n=10, random_state=self.SEED) assert_pandas_eland_series_equal(pd_s, ed_s_sample)