def test_select_columns_from_two_tables_with_same_column_name(): """ Test selecting tables :return: """ my_frame = query( """select * from forest_fires table1, forest_fires table2""") table1 = FOREST_FIRES.copy() table2 = FOREST_FIRES.copy() pandas_frame = merge(table1.assign(__=1), table2.assign(__=1), on="__", how="inner", copy=False) del pandas_frame["__"] renamed = {} for column in pandas_frame.columns: if "_x" in column: renamed[column] = "table1." + column.replace("_x", "") if "_y" in column: renamed[column] = "table2." + column.replace("_y", "") pandas_frame.rename(columns=renamed, inplace=True) for column in my_frame.columns: tm.assert_series_equal(pandas_frame[column], my_frame[column]) tm.assert_frame_equal(my_frame, pandas_frame)
def test_select_columns_from_two_tables_with_same_column_name(): """ Test selecting tables :return: """ my_frame = query( """select * from forest_fires table1, forest_fires table2""") table1 = FOREST_FIRES.copy() table2 = FOREST_FIRES.copy() table1["_temp_id"] = 1 table2["_temp_id"] = 1 pandas_frame = merge(table1, table2, on="_temp_id").drop(columns=["_temp_id"]) tm.assert_frame_equal(pandas_frame, my_frame)
def test_rank_statement_many_columns(): """ Test rank statement :return: """ my_frame = query(""" select wind, rain, month, rank() over(order by wind desc, rain asc, month) as rank from forest_fires """) pandas_frame = FOREST_FIRES.copy()[["wind", "rain", "month"]] pandas_frame.sort_values(by=["wind", "rain", "month"], ascending=[False, True, True], inplace=True) pandas_frame.reset_index(inplace=True) rank_map = {} rank_counter = 1 rank_offset = 0 pandas_frame["rank"] = 0 rank_series = pandas_frame["rank"].copy() for row_num, row in enumerate(pandas_frame.iterrows()): key = "".join(map(str, list(list(row)[1])[1:4])) if rank_map.get(key): rank_offset += 1 rank = rank_map[key] else: rank = rank_counter + rank_offset rank_map[key] = rank rank_counter += 1 rank_series[row_num] = rank pandas_frame["rank"] = rank_series pandas_frame.sort_values(by="index", ascending=True, inplace=True) pandas_frame.drop(columns=["index"], inplace=True) pandas_frame.reset_index(drop=True, inplace=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_limit(): """ Test limit clause :return: """ my_frame = query("""select * from forest_fires limit 10""") pandas_frame = FOREST_FIRES.copy().head(10) tm.assert_frame_equal(pandas_frame, my_frame)
def test_maintain_case_in_query(): """ Test nested subqueries :return: """ my_frame = query("""select wind, rh from forest_fires""") pandas_frame = FOREST_FIRES.copy()[["wind", "RH"]].rename(columns={"RH": "rh"}) tm.assert_frame_equal(pandas_frame, my_frame)
def test_union_all(): """ Test union distinct in queries :return: """ my_frame = query(""" select * from forest_fires order by wind desc limit 5 union all select * from forest_fires order by wind asc limit 5 """) pandas_frame1 = (FOREST_FIRES.copy().sort_values(by=["wind"], ascending=[False ]).head(5)) pandas_frame2 = (FOREST_FIRES.copy().sort_values(by=["wind"], ascending=[True]).head(5)) pandas_frame = concat([pandas_frame1, pandas_frame2], ignore_index=True).reset_index(drop=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_min(): """ Test the min :return: """ my_frame = query("select min(temp) from forest_fires") pandas_frame = (FOREST_FIRES.agg({ "temp": np.min }).to_frame("_col0").reset_index().drop(columns=["index"])) tm.assert_frame_equal(pandas_frame, my_frame)
def test_except_all(): """ Test except distinct in queries :return: """ my_frame = query(""" select * from forest_fires order by wind desc limit 5 except all select * from forest_fires order by wind desc limit 3 """) pandas_frame1 = (FOREST_FIRES.copy().sort_values(by=["wind"], ascending=[False ]).head(5)) pandas_frame2 = (FOREST_FIRES.copy().sort_values(by=["wind"], ascending=[False ]).head(3)) pandas_frame = pandas_frame1[~pandas_frame1.isin(pandas_frame2).all( axis=1)].reset_index(drop=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_where_clause(): """ Test where clause :return: """ my_frame = query("""select * from forest_fires where month = 'mar'""") pandas_frame = FOREST_FIRES.copy() pandas_frame = pandas_frame[pandas_frame.month == "mar"].reset_index( drop=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_set_string_value_as_column_value(): """ Select a string like 'Yes' as a column value :return: """ my_frame = query(""" select wind, 'yes' as wind_yes from forest_fires""") pandas_frame = FOREST_FIRES.copy() pandas_frame["wind_yes"] = "yes" pandas_frame = pandas_frame[["wind", "wind_yes"]] tm.assert_frame_equal(pandas_frame, my_frame)
def test_count_star(): """ Test the count aggregation :return: """ my_frame = query("select count(*) from forest_fires") pandas_frame = FOREST_FIRES.copy() pandas_frame = (pandas_frame[[ "month" ]].count().to_frame("_col0").reset_index(drop=True)) tm.assert_frame_equal(pandas_frame, my_frame)
def test_in_operator_expression_numerical(): """ Test using in operator in a sql query :return: """ my_frame = query(""" select * from forest_fires where X in (5, 9) """) pandas_frame = FOREST_FIRES.copy() pandas_frame = pandas_frame[(pandas_frame["X"]).isin( (5, 9))].reset_index(drop=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_date_cast(): """ Select casting a string as a date :return: """ my_frame = query(""" select wind, cast('2019-01-01' as datetime64) as my_date from forest_fires""" ) pandas_frame = FOREST_FIRES.copy() pandas_frame["my_date"] = datetime.strptime("2019-01-01", "%Y-%m-%d") pandas_frame = pandas_frame[["wind", "my_date"]] tm.assert_frame_equal(pandas_frame, my_frame)
def test_not_in_operator(): """ Test using in operator in a sql query :return: """ my_frame = query(""" select * from forest_fires where day not in ('fri', 'sun') """) pandas_frame = FOREST_FIRES.copy() pandas_frame = pandas_frame[~pandas_frame.day.isin( ("fri", "sun"))].reset_index(drop=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_operations_between_columns_and_numbers(): """ Tests operations between columns :return: """ my_frame = query( """select temp * wind + rain / dmc + 37 from forest_fires""") pandas_frame = FOREST_FIRES.copy() pandas_frame["_col0"] = (pandas_frame["temp"] * pandas_frame["wind"] + pandas_frame["rain"] / pandas_frame["DMC"] + 37) pandas_frame = pandas_frame["_col0"].to_frame() tm.assert_frame_equal(pandas_frame, my_frame)
def test_select_star_from_multiple_tables(): """ Test selecting from two different tables :return: """ my_frame = query("""select * from forest_fires, digimon_mon_list""") forest_fires = FOREST_FIRES.copy() digimon_mon_list_new = DIGIMON_MON_LIST.copy() forest_fires["_temp_id"] = 1 digimon_mon_list_new["_temp_id"] = 1 pandas_frame = merge(forest_fires, digimon_mon_list_new, on="_temp_id").drop(columns=["_temp_id"]) tm.assert_frame_equal(pandas_frame, my_frame)
def test_rank_statement_one_column(): """ Test rank statement :return: """ my_frame = query(""" select wind, rank() over(order by wind) as wind_rank from forest_fires """) pandas_frame = FOREST_FIRES.copy()[["wind"]] pandas_frame["wind_rank"] = pandas_frame.wind.rank( method="min").astype("int") tm.assert_frame_equal(pandas_frame, my_frame)
def test_order_by(): """ Test order by clause :return: """ my_frame = query( """select * from forest_fires order by temp desc, wind asc, area""") pandas_frame = FOREST_FIRES.copy() pandas_frame.sort_values(by=["temp", "wind", "area"], ascending=[0, 1, 1], inplace=True) pandas_frame.reset_index(drop=True, inplace=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_having_one_condition(): """ Test having clause :return: """ my_frame = query("select min(temp) from forest_fires having min(temp) > 2") pandas_frame = FOREST_FIRES.copy() pandas_frame["_col0"] = FOREST_FIRES["temp"] aggregated_df = pandas_frame.aggregate({ "_col0": "min" }).to_frame().transpose() pandas_frame = aggregated_df[aggregated_df["_col0"] > 2] tm.assert_frame_equal(pandas_frame, my_frame)
def test_case_statement_with_same_conditions(): """ Test using case statements :return: """ my_frame = query(""" select case when wind > 5 then month when wind > 5 then 'mid' else day end from forest_fires """) pandas_frame = FOREST_FIRES.copy()[["wind"]] pandas_frame.loc[pandas_frame.wind > 5, "_col0"] = FOREST_FIRES["month"] pandas_frame.loc[~(pandas_frame.wind > 5), "_col0"] = FOREST_FIRES["day"] pandas_frame.drop(columns=["wind"], inplace=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_having_with_group_by(): """ Test having clause :return: """ my_frame = query( "select day, min(temp) from forest_fires group by day having min(temp) > 5" ) pandas_frame = FOREST_FIRES.copy() pandas_frame["_col0"] = FOREST_FIRES["temp"] pandas_frame = (pandas_frame[["day", "_col0"]].groupby("day").aggregate( {"_col0": np.min})) pandas_frame = pandas_frame[pandas_frame["_col0"] > 5].reset_index() tm.assert_frame_equal(pandas_frame, my_frame)
def test_intersect_distinct(): """ Test union distinct in queries :return: """ my_frame = query(""" select * from forest_fires order by wind desc limit 5 intersect distinct select * from forest_fires order by wind desc limit 3 """) pandas_frame1 = (FOREST_FIRES.copy().sort_values(by=["wind"], ascending=[False ]).head(5)) pandas_frame2 = (FOREST_FIRES.copy().sort_values(by=["wind"], ascending=[False ]).head(3)) pandas_frame = merge( left=pandas_frame1, right=pandas_frame2, how="inner", on=list(pandas_frame1.columns), ) tm.assert_frame_equal(pandas_frame, my_frame)
def test_timestamps(): """ Select now() as date :return: """ with freeze_time(datetime.now()): my_frame = query(""" select wind, now(), today(), timestamp('2019-01-31', '23:20:32') from forest_fires""") pandas_frame = FOREST_FIRES.copy()[["wind"]] pandas_frame["now()"] = datetime.now() pandas_frame["today()"] = date.today() pandas_frame["_literal0"] = datetime(2019, 1, 31, 23, 20, 32) tm.assert_frame_equal(pandas_frame, my_frame)
def test_between_operator(): """ Test using between operator :return: """ my_frame = query(""" select * from forest_fires where wind between 5 and 6 """) pandas_frame = FOREST_FIRES.copy() pandas_frame = pandas_frame[(pandas_frame.wind >= 5) & (pandas_frame.wind <= 6)].reset_index( drop=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_rank_over_partition_by(): """ Test rank partition by statement :return: """ my_frame = query(""" select wind, rain, month, day, rank() over(partition by day order by wind desc, rain asc, month) as rank from forest_fires """) pandas_frame = FOREST_FIRES.copy()[["wind", "rain", "month", "day"]] partition_slice = 4 rank_map = {} partition_rank_counter = {} partition_rank_offset = {} pandas_frame.sort_values(by=["wind", "rain", "month"], ascending=[False, True, True], inplace=True) pandas_frame.reset_index(inplace=True) pandas_frame["rank"] = 0 rank_series = pandas_frame["rank"].copy() for row_num, series_tuple in enumerate(pandas_frame.iterrows()): row = series_tuple[1] row_list = list(row)[1:partition_slice] partition_list = list(row)[partition_slice:5] key = str(row_list) partition_key = str(partition_list) if rank_map.get(partition_key): if rank_map[partition_key].get(key): partition_rank_counter[partition_key] += 1 rank = rank_map[partition_key][key] else: partition_rank_counter[partition_key] += 1 rank = (partition_rank_counter[partition_key] + partition_rank_offset[partition_key]) rank_map[partition_key][key] = rank else: rank = 1 rank_map[partition_key] = {} partition_rank_counter[partition_key] = 1 partition_rank_offset[partition_key] = 0 rank_map[partition_key][key] = rank rank_series[row_num] = rank pandas_frame["rank"] = rank_series pandas_frame.sort_values(by="index", ascending=True, inplace=True) pandas_frame.drop(columns=["index"], inplace=True) pandas_frame.reset_index(drop=True, inplace=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_case_statement_w_no_name(): """ Test using case statements :return: """ my_frame = query(""" select case when wind > 5 then 'strong' when wind = 5 then 'mid' else 'weak' end from forest_fires """) pandas_frame = FOREST_FIRES.copy()[["wind"]] pandas_frame.loc[pandas_frame.wind > 5, "_col0"] = "strong" pandas_frame.loc[pandas_frame.wind == 5, "_col0"] = "mid" pandas_frame.loc[~((pandas_frame.wind == 5) | (pandas_frame.wind > 5)), "_col0"] = "weak" pandas_frame.drop(columns=["wind"], inplace=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_agg_w_groupby(): """ Test using aggregates and group by together :return: """ my_frame = query( "select day, month, min(temp), max(temp) from forest_fires group by day, month" ) pandas_frame = FOREST_FIRES.copy() pandas_frame["_col0"] = pandas_frame.temp pandas_frame["_col1"] = pandas_frame.temp pandas_frame = (pandas_frame.groupby(["day", "month"]).aggregate({ "_col0": np.min, "_col1": np.max }).reset_index()) tm.assert_frame_equal(pandas_frame, my_frame)
def test_boolean_order_of_operations_with_parens(): """ Test boolean order of operations with parentheses :return: """ my_frame = query("select * from forest_fires " "where (month = 'oct' and day = 'fri') or " "(month = 'nov' and day = 'tue')") pandas_frame = FOREST_FIRES.copy() pandas_frame = pandas_frame[((pandas_frame["month"] == "oct") & (pandas_frame["day"] == "fri")) | ((pandas_frame["month"] == "nov") & (pandas_frame["day"] == "tue"))].reset_index( drop=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_all_boolean_ops_clause(): """ Test where clause :return: """ my_frame = query( """select * from forest_fires where month = 'mar' and temp > 8 and rain >= 0 and area != 0 and dc < 100 and ffmc <= 90.1 """) pandas_frame = FOREST_FIRES.copy() pandas_frame = pandas_frame[(pandas_frame.month == "mar") & (pandas_frame.temp > 8.0) & (pandas_frame.rain >= 0) & (pandas_frame.area != 0) & (pandas_frame.DC < 100) & (pandas_frame.FFMC <= 90.1)].reset_index( drop=True) tm.assert_frame_equal(pandas_frame, my_frame)
def test_multiple_aggs(): """ Test multiple aggregations :return: """ my_frame = query( "select min(temp), max(temp), avg(temp), max(wind) from forest_fires") pandas_frame = FOREST_FIRES.copy() pandas_frame["_col0"] = FOREST_FIRES.temp.copy() pandas_frame["_col1"] = FOREST_FIRES.temp.copy() pandas_frame["_col2"] = FOREST_FIRES.temp.copy() pandas_frame = pandas_frame.agg({ "_col0": np.min, "_col1": np.max, "_col2": np.mean, "wind": np.max }) pandas_frame.rename({"wind": "_col3"}, inplace=True) pandas_frame = pandas_frame.to_frame().transpose() tm.assert_frame_equal(pandas_frame, my_frame)