Пример #1
0
def test_select_columns_from_two_tables_with_same_column_name():
    """
    Test selecting tables
    :return:
    """
    my_frame = query(
        """select * from forest_fires table1, forest_fires table2""")
    table1 = FOREST_FIRES.copy()
    table2 = FOREST_FIRES.copy()
    pandas_frame = merge(table1.assign(__=1),
                         table2.assign(__=1),
                         on="__",
                         how="inner",
                         copy=False)
    del pandas_frame["__"]

    renamed = {}
    for column in pandas_frame.columns:
        if "_x" in column:
            renamed[column] = "table1." + column.replace("_x", "")
        if "_y" in column:
            renamed[column] = "table2." + column.replace("_y", "")
    pandas_frame.rename(columns=renamed, inplace=True)

    for column in my_frame.columns:
        tm.assert_series_equal(pandas_frame[column], my_frame[column])

    tm.assert_frame_equal(my_frame, pandas_frame)
Пример #2
0
def test_select_columns_from_two_tables_with_same_column_name():
    """
    Test selecting tables
    :return:
    """
    my_frame = query(
        """select * from forest_fires table1, forest_fires table2""")
    table1 = FOREST_FIRES.copy()
    table2 = FOREST_FIRES.copy()
    table1["_temp_id"] = 1
    table2["_temp_id"] = 1
    pandas_frame = merge(table1, table2,
                         on="_temp_id").drop(columns=["_temp_id"])
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #3
0
def test_rank_statement_many_columns():
    """
    Test rank statement
    :return:
    """
    my_frame = query("""
    select wind, rain, month, rank() over(order by wind desc, rain asc, month) as rank
    from forest_fires
    """)
    pandas_frame = FOREST_FIRES.copy()[["wind", "rain", "month"]]
    pandas_frame.sort_values(by=["wind", "rain", "month"],
                             ascending=[False, True, True],
                             inplace=True)
    pandas_frame.reset_index(inplace=True)
    rank_map = {}
    rank_counter = 1
    rank_offset = 0
    pandas_frame["rank"] = 0
    rank_series = pandas_frame["rank"].copy()
    for row_num, row in enumerate(pandas_frame.iterrows()):
        key = "".join(map(str, list(list(row)[1])[1:4]))
        if rank_map.get(key):
            rank_offset += 1
            rank = rank_map[key]
        else:
            rank = rank_counter + rank_offset
            rank_map[key] = rank
            rank_counter += 1
        rank_series[row_num] = rank
    pandas_frame["rank"] = rank_series
    pandas_frame.sort_values(by="index", ascending=True, inplace=True)
    pandas_frame.drop(columns=["index"], inplace=True)
    pandas_frame.reset_index(drop=True, inplace=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #4
0
def test_limit():
    """
    Test limit clause
    :return:
    """
    my_frame = query("""select * from forest_fires limit 10""")
    pandas_frame = FOREST_FIRES.copy().head(10)
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #5
0
def test_maintain_case_in_query():
    """
    Test nested subqueries
    :return:
    """
    my_frame = query("""select wind, rh from forest_fires""")
    pandas_frame = FOREST_FIRES.copy()[["wind",
                                        "RH"]].rename(columns={"RH": "rh"})
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #6
0
def test_union_all():
    """
    Test union distinct in queries
    :return:
    """
    my_frame = query("""
        select * from forest_fires order by wind desc limit 5
         union all
        select * from forest_fires order by wind asc limit 5
        """)
    pandas_frame1 = (FOREST_FIRES.copy().sort_values(by=["wind"],
                                                     ascending=[False
                                                                ]).head(5))
    pandas_frame2 = (FOREST_FIRES.copy().sort_values(by=["wind"],
                                                     ascending=[True]).head(5))
    pandas_frame = concat([pandas_frame1, pandas_frame2],
                          ignore_index=True).reset_index(drop=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #7
0
def test_min():
    """
    Test the min
    :return:
    """
    my_frame = query("select min(temp) from forest_fires")
    pandas_frame = (FOREST_FIRES.agg({
        "temp": np.min
    }).to_frame("_col0").reset_index().drop(columns=["index"]))
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #8
0
def test_except_all():
    """
    Test except distinct in queries
    :return:
    """
    my_frame = query("""
                select * from forest_fires order by wind desc limit 5
                 except all
                select * from forest_fires order by wind desc limit 3
                """)
    pandas_frame1 = (FOREST_FIRES.copy().sort_values(by=["wind"],
                                                     ascending=[False
                                                                ]).head(5))
    pandas_frame2 = (FOREST_FIRES.copy().sort_values(by=["wind"],
                                                     ascending=[False
                                                                ]).head(3))
    pandas_frame = pandas_frame1[~pandas_frame1.isin(pandas_frame2).all(
        axis=1)].reset_index(drop=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #9
0
def test_where_clause():
    """
    Test where clause
    :return:
    """
    my_frame = query("""select * from forest_fires where month = 'mar'""")
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame = pandas_frame[pandas_frame.month == "mar"].reset_index(
        drop=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #10
0
def test_set_string_value_as_column_value():
    """
    Select a string like 'Yes' as a column value
    :return:
    """
    my_frame = query("""
    select wind, 'yes' as wind_yes from forest_fires""")
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame["wind_yes"] = "yes"
    pandas_frame = pandas_frame[["wind", "wind_yes"]]
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #11
0
def test_count_star():
    """
    Test the count aggregation
    :return:
    """
    my_frame = query("select count(*) from forest_fires")
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame = (pandas_frame[[
        "month"
    ]].count().to_frame("_col0").reset_index(drop=True))
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #12
0
def test_in_operator_expression_numerical():
    """
    Test using in operator in a sql query
    :return:
    """
    my_frame = query("""
    select * from forest_fires where X in (5, 9)
    """)
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame = pandas_frame[(pandas_frame["X"]).isin(
        (5, 9))].reset_index(drop=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #13
0
def test_date_cast():
    """
    Select casting a string as a date
    :return:
    """
    my_frame = query("""
    select wind, cast('2019-01-01' as datetime64) as my_date from forest_fires"""
                     )
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame["my_date"] = datetime.strptime("2019-01-01", "%Y-%m-%d")
    pandas_frame = pandas_frame[["wind", "my_date"]]
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #14
0
def test_not_in_operator():
    """
    Test using in operator in a sql query
    :return:
    """
    my_frame = query("""
    select * from forest_fires where day not in ('fri', 'sun')
    """)
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame = pandas_frame[~pandas_frame.day.isin(
        ("fri", "sun"))].reset_index(drop=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #15
0
def test_operations_between_columns_and_numbers():
    """
    Tests operations between columns
    :return:
    """
    my_frame = query(
        """select temp * wind + rain / dmc + 37 from forest_fires""")
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame["_col0"] = (pandas_frame["temp"] * pandas_frame["wind"] +
                             pandas_frame["rain"] / pandas_frame["DMC"] + 37)
    pandas_frame = pandas_frame["_col0"].to_frame()
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #16
0
def test_select_star_from_multiple_tables():
    """
    Test selecting from two different tables
    :return:
    """
    my_frame = query("""select * from forest_fires, digimon_mon_list""")
    forest_fires = FOREST_FIRES.copy()
    digimon_mon_list_new = DIGIMON_MON_LIST.copy()
    forest_fires["_temp_id"] = 1
    digimon_mon_list_new["_temp_id"] = 1
    pandas_frame = merge(forest_fires, digimon_mon_list_new,
                         on="_temp_id").drop(columns=["_temp_id"])
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #17
0
def test_rank_statement_one_column():
    """
    Test rank statement
    :return:
    """
    my_frame = query("""
    select wind, rank() over(order by wind) as wind_rank
    from forest_fires
    """)
    pandas_frame = FOREST_FIRES.copy()[["wind"]]
    pandas_frame["wind_rank"] = pandas_frame.wind.rank(
        method="min").astype("int")
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #18
0
def test_order_by():
    """
    Test order by clause
    :return:
    """
    my_frame = query(
        """select * from forest_fires order by temp desc, wind asc, area""")
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame.sort_values(by=["temp", "wind", "area"],
                             ascending=[0, 1, 1],
                             inplace=True)
    pandas_frame.reset_index(drop=True, inplace=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #19
0
def test_having_one_condition():
    """
    Test having clause
    :return:
    """
    my_frame = query("select min(temp) from forest_fires having min(temp) > 2")
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame["_col0"] = FOREST_FIRES["temp"]
    aggregated_df = pandas_frame.aggregate({
        "_col0": "min"
    }).to_frame().transpose()
    pandas_frame = aggregated_df[aggregated_df["_col0"] > 2]
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #20
0
def test_case_statement_with_same_conditions():
    """
    Test using case statements
    :return:
    """
    my_frame = query("""
        select case when wind > 5 then month when wind > 5 then 'mid' else day end
        from forest_fires
        """)
    pandas_frame = FOREST_FIRES.copy()[["wind"]]
    pandas_frame.loc[pandas_frame.wind > 5, "_col0"] = FOREST_FIRES["month"]
    pandas_frame.loc[~(pandas_frame.wind > 5), "_col0"] = FOREST_FIRES["day"]
    pandas_frame.drop(columns=["wind"], inplace=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #21
0
def test_having_with_group_by():
    """
    Test having clause
    :return:
    """
    my_frame = query(
        "select day, min(temp) from forest_fires group by day having min(temp) > 5"
    )
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame["_col0"] = FOREST_FIRES["temp"]
    pandas_frame = (pandas_frame[["day", "_col0"]].groupby("day").aggregate(
        {"_col0": np.min}))
    pandas_frame = pandas_frame[pandas_frame["_col0"] > 5].reset_index()
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #22
0
def test_intersect_distinct():
    """
    Test union distinct in queries
    :return:
    """
    my_frame = query("""
            select * from forest_fires order by wind desc limit 5
             intersect distinct
            select * from forest_fires order by wind desc limit 3
            """)
    pandas_frame1 = (FOREST_FIRES.copy().sort_values(by=["wind"],
                                                     ascending=[False
                                                                ]).head(5))
    pandas_frame2 = (FOREST_FIRES.copy().sort_values(by=["wind"],
                                                     ascending=[False
                                                                ]).head(3))
    pandas_frame = merge(
        left=pandas_frame1,
        right=pandas_frame2,
        how="inner",
        on=list(pandas_frame1.columns),
    )
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #23
0
def test_timestamps():
    """
    Select now() as date
    :return:
    """
    with freeze_time(datetime.now()):
        my_frame = query("""
        select wind, now(), today(), timestamp('2019-01-31', '23:20:32')
        from forest_fires""")
        pandas_frame = FOREST_FIRES.copy()[["wind"]]
        pandas_frame["now()"] = datetime.now()
        pandas_frame["today()"] = date.today()
        pandas_frame["_literal0"] = datetime(2019, 1, 31, 23, 20, 32)
        tm.assert_frame_equal(pandas_frame, my_frame)
Пример #24
0
def test_between_operator():
    """
    Test using between operator
    :return:
    """
    my_frame = query("""
    select * from forest_fires
    where wind between 5 and 6
    """)
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame = pandas_frame[(pandas_frame.wind >= 5)
                                & (pandas_frame.wind <= 6)].reset_index(
                                    drop=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #25
0
def test_rank_over_partition_by():
    """
    Test rank partition by statement
    :return:
    """
    my_frame = query("""
    select wind, rain, month, day,
    rank() over(partition by day order by wind desc, rain asc, month) as rank
    from forest_fires
    """)
    pandas_frame = FOREST_FIRES.copy()[["wind", "rain", "month", "day"]]
    partition_slice = 4
    rank_map = {}
    partition_rank_counter = {}
    partition_rank_offset = {}
    pandas_frame.sort_values(by=["wind", "rain", "month"],
                             ascending=[False, True, True],
                             inplace=True)
    pandas_frame.reset_index(inplace=True)
    pandas_frame["rank"] = 0
    rank_series = pandas_frame["rank"].copy()
    for row_num, series_tuple in enumerate(pandas_frame.iterrows()):
        row = series_tuple[1]
        row_list = list(row)[1:partition_slice]
        partition_list = list(row)[partition_slice:5]
        key = str(row_list)
        partition_key = str(partition_list)
        if rank_map.get(partition_key):
            if rank_map[partition_key].get(key):
                partition_rank_counter[partition_key] += 1
                rank = rank_map[partition_key][key]
            else:
                partition_rank_counter[partition_key] += 1
                rank = (partition_rank_counter[partition_key] +
                        partition_rank_offset[partition_key])
                rank_map[partition_key][key] = rank
        else:
            rank = 1
            rank_map[partition_key] = {}
            partition_rank_counter[partition_key] = 1
            partition_rank_offset[partition_key] = 0
            rank_map[partition_key][key] = rank
        rank_series[row_num] = rank
    pandas_frame["rank"] = rank_series
    pandas_frame.sort_values(by="index", ascending=True, inplace=True)
    pandas_frame.drop(columns=["index"], inplace=True)
    pandas_frame.reset_index(drop=True, inplace=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #26
0
def test_case_statement_w_no_name():
    """
    Test using case statements
    :return:
    """
    my_frame = query("""
        select case when wind > 5 then 'strong' when wind = 5 then 'mid' else 'weak' end
        from forest_fires
        """)
    pandas_frame = FOREST_FIRES.copy()[["wind"]]
    pandas_frame.loc[pandas_frame.wind > 5, "_col0"] = "strong"
    pandas_frame.loc[pandas_frame.wind == 5, "_col0"] = "mid"
    pandas_frame.loc[~((pandas_frame.wind == 5) | (pandas_frame.wind > 5)),
                     "_col0"] = "weak"
    pandas_frame.drop(columns=["wind"], inplace=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #27
0
def test_agg_w_groupby():
    """
    Test using aggregates and group by together
    :return:
    """
    my_frame = query(
        "select day, month, min(temp), max(temp) from forest_fires group by day, month"
    )
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame["_col0"] = pandas_frame.temp
    pandas_frame["_col1"] = pandas_frame.temp
    pandas_frame = (pandas_frame.groupby(["day", "month"]).aggregate({
        "_col0":
        np.min,
        "_col1":
        np.max
    }).reset_index())
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #28
0
def test_boolean_order_of_operations_with_parens():
    """
    Test boolean order of operations with parentheses
    :return:
    """
    my_frame = query("select * from forest_fires "
                     "where (month = 'oct' and day = 'fri') or "
                     "(month = 'nov' and day = 'tue')")

    pandas_frame = FOREST_FIRES.copy()
    pandas_frame = pandas_frame[((pandas_frame["month"] == "oct") &
                                 (pandas_frame["day"] == "fri"))
                                |
                                ((pandas_frame["month"] == "nov") &
                                 (pandas_frame["day"] == "tue"))].reset_index(
                                     drop=True)

    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #29
0
def test_all_boolean_ops_clause():
    """
    Test where clause
    :return:
    """
    my_frame = query(
        """select * from forest_fires where month = 'mar' and temp > 8 and rain >= 0
        and area != 0 and dc < 100 and ffmc <= 90.1
        """)

    pandas_frame = FOREST_FIRES.copy()
    pandas_frame = pandas_frame[(pandas_frame.month == "mar")
                                & (pandas_frame.temp > 8.0)
                                & (pandas_frame.rain >= 0)
                                & (pandas_frame.area != 0)
                                & (pandas_frame.DC < 100)
                                & (pandas_frame.FFMC <= 90.1)].reset_index(
                                    drop=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Пример #30
0
def test_multiple_aggs():
    """
    Test multiple aggregations
    :return:
    """
    my_frame = query(
        "select min(temp), max(temp), avg(temp), max(wind) from forest_fires")
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame["_col0"] = FOREST_FIRES.temp.copy()
    pandas_frame["_col1"] = FOREST_FIRES.temp.copy()
    pandas_frame["_col2"] = FOREST_FIRES.temp.copy()
    pandas_frame = pandas_frame.agg({
        "_col0": np.min,
        "_col1": np.max,
        "_col2": np.mean,
        "wind": np.max
    })
    pandas_frame.rename({"wind": "_col3"}, inplace=True)
    pandas_frame = pandas_frame.to_frame().transpose()
    tm.assert_frame_equal(pandas_frame, my_frame)