Exemplo n.º 1
0
def test_in_operator_expression_numerical():
    """
    Test using in operator in a sql query
    :return:
    """
    my_frame = query("""
    select * from forest_fires where X in (5, 9)
    """)
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame = pandas_frame[(pandas_frame["X"]).isin(
        (5, 9))].reset_index(drop=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Exemplo n.º 2
0
def test_not_in_operator():
    """
    Test using in operator in a sql query
    :return:
    """
    my_frame = query("""
    select * from forest_fires where day not in ('fri', 'sun')
    """)
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame = pandas_frame[~pandas_frame.day.isin(
        ("fri", "sun"))].reset_index(drop=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Exemplo n.º 3
0
def test_operations_between_columns_and_numbers():
    """
    Tests operations between columns
    :return:
    """
    my_frame = query(
        """select temp * wind + rain / dmc + 37 from forest_fires""")
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame["_col0"] = (pandas_frame["temp"] * pandas_frame["wind"] +
                             pandas_frame["rain"] / pandas_frame["DMC"] + 37)
    pandas_frame = pandas_frame["_col0"].to_frame()
    tm.assert_frame_equal(pandas_frame, my_frame)
Exemplo n.º 4
0
def test_rank_statement_one_column():
    """
    Test rank statement
    :return:
    """
    my_frame = query("""
    select wind, rank() over(order by wind) as wind_rank
    from forest_fires
    """)
    pandas_frame = FOREST_FIRES.copy()[["wind"]]
    pandas_frame["wind_rank"] = pandas_frame.wind.rank(
        method="min").astype("int")
    tm.assert_frame_equal(pandas_frame, my_frame)
Exemplo n.º 5
0
def test_order_by():
    """
    Test order by clause
    :return:
    """
    my_frame = query(
        """select * from forest_fires order by temp desc, wind asc, area""")
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame.sort_values(by=["temp", "wind", "area"],
                             ascending=[0, 1, 1],
                             inplace=True)
    pandas_frame.reset_index(drop=True, inplace=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Exemplo n.º 6
0
def test_select_star_from_multiple_tables():
    """
    Test selecting from two different tables
    :return:
    """
    my_frame = query("""select * from forest_fires, digimon_mon_list""")
    forest_fires = FOREST_FIRES.copy()
    digimon_mon_list_new = DIGIMON_MON_LIST.copy()
    forest_fires["_temp_id"] = 1
    digimon_mon_list_new["_temp_id"] = 1
    pandas_frame = merge(forest_fires, digimon_mon_list_new,
                         on="_temp_id").drop(columns=["_temp_id"])
    tm.assert_frame_equal(pandas_frame, my_frame)
Exemplo n.º 7
0
def test_having_one_condition():
    """
    Test having clause
    :return:
    """
    my_frame = query("select min(temp) from forest_fires having min(temp) > 2")
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame["_col0"] = FOREST_FIRES["temp"]
    aggregated_df = pandas_frame.aggregate({
        "_col0": "min"
    }).to_frame().transpose()
    pandas_frame = aggregated_df[aggregated_df["_col0"] > 2]
    tm.assert_frame_equal(pandas_frame, my_frame)
Exemplo n.º 8
0
def test_intersect_distinct():
    """
    Test union distinct in queries
    :return:
    """
    my_frame = query("""
            select * from forest_fires order by wind desc limit 5
             intersect distinct
            select * from forest_fires order by wind desc limit 3
            """)
    pandas_frame1 = (FOREST_FIRES.copy().sort_values(by=["wind"],
                                                     ascending=[False
                                                                ]).head(5))
    pandas_frame2 = (FOREST_FIRES.copy().sort_values(by=["wind"],
                                                     ascending=[False
                                                                ]).head(3))
    pandas_frame = merge(
        left=pandas_frame1,
        right=pandas_frame2,
        how="inner",
        on=list(pandas_frame1.columns),
    )
    tm.assert_frame_equal(pandas_frame, my_frame)
Exemplo n.º 9
0
def test_having_with_group_by():
    """
    Test having clause
    :return:
    """
    my_frame = query(
        "select day, min(temp) from forest_fires group by day having min(temp) > 5"
    )
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame["_col0"] = FOREST_FIRES["temp"]
    pandas_frame = (pandas_frame[["day", "_col0"]].groupby("day").aggregate(
        {"_col0": np.min}))
    pandas_frame = pandas_frame[pandas_frame["_col0"] > 5].reset_index()
    tm.assert_frame_equal(pandas_frame, my_frame)
Exemplo n.º 10
0
def test_case_statement_with_same_conditions():
    """
    Test using case statements
    :return:
    """
    my_frame = query("""
        select case when wind > 5 then month when wind > 5 then 'mid' else day end
        from forest_fires
        """)
    pandas_frame = FOREST_FIRES.copy()[["wind"]]
    pandas_frame.loc[pandas_frame.wind > 5, "_col0"] = FOREST_FIRES["month"]
    pandas_frame.loc[~(pandas_frame.wind > 5), "_col0"] = FOREST_FIRES["day"]
    pandas_frame.drop(columns=["wind"], inplace=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Exemplo n.º 11
0
def test_timestamps():
    """
    Select now() as date
    :return:
    """
    with freeze_time(datetime.now()):
        my_frame = query("""
        select wind, now(), today(), timestamp('2019-01-31', '23:20:32')
        from forest_fires""")
        pandas_frame = FOREST_FIRES.copy()[["wind"]]
        pandas_frame["now()"] = datetime.now()
        pandas_frame["today()"] = date.today()
        pandas_frame["_literal0"] = datetime(2019, 1, 31, 23, 20, 32)
        tm.assert_frame_equal(pandas_frame, my_frame)
Exemplo n.º 12
0
def test_between_operator():
    """
    Test using between operator
    :return:
    """
    my_frame = query("""
    select * from forest_fires
    where wind between 5 and 6
    """)
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame = pandas_frame[(pandas_frame.wind >= 5)
                                & (pandas_frame.wind <= 6)].reset_index(
                                    drop=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Exemplo n.º 13
0
def test_rank_over_partition_by():
    """
    Test rank partition by statement
    :return:
    """
    my_frame = query("""
    select wind, rain, month, day,
    rank() over(partition by day order by wind desc, rain asc, month) as rank
    from forest_fires
    """)
    pandas_frame = FOREST_FIRES.copy()[["wind", "rain", "month", "day"]]
    partition_slice = 4
    rank_map = {}
    partition_rank_counter = {}
    partition_rank_offset = {}
    pandas_frame.sort_values(by=["wind", "rain", "month"],
                             ascending=[False, True, True],
                             inplace=True)
    pandas_frame.reset_index(inplace=True)
    pandas_frame["rank"] = 0
    rank_series = pandas_frame["rank"].copy()
    for row_num, series_tuple in enumerate(pandas_frame.iterrows()):
        row = series_tuple[1]
        row_list = list(row)[1:partition_slice]
        partition_list = list(row)[partition_slice:5]
        key = str(row_list)
        partition_key = str(partition_list)
        if rank_map.get(partition_key):
            if rank_map[partition_key].get(key):
                partition_rank_counter[partition_key] += 1
                rank = rank_map[partition_key][key]
            else:
                partition_rank_counter[partition_key] += 1
                rank = (partition_rank_counter[partition_key] +
                        partition_rank_offset[partition_key])
                rank_map[partition_key][key] = rank
        else:
            rank = 1
            rank_map[partition_key] = {}
            partition_rank_counter[partition_key] = 1
            partition_rank_offset[partition_key] = 0
            rank_map[partition_key][key] = rank
        rank_series[row_num] = rank
    pandas_frame["rank"] = rank_series
    pandas_frame.sort_values(by="index", ascending=True, inplace=True)
    pandas_frame.drop(columns=["index"], inplace=True)
    pandas_frame.reset_index(drop=True, inplace=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Exemplo n.º 14
0
def test_case_statement_w_no_name():
    """
    Test using case statements
    :return:
    """
    my_frame = query("""
        select case when wind > 5 then 'strong' when wind = 5 then 'mid' else 'weak' end
        from forest_fires
        """)
    pandas_frame = FOREST_FIRES.copy()[["wind"]]
    pandas_frame.loc[pandas_frame.wind > 5, "_col0"] = "strong"
    pandas_frame.loc[pandas_frame.wind == 5, "_col0"] = "mid"
    pandas_frame.loc[~((pandas_frame.wind == 5) | (pandas_frame.wind > 5)),
                     "_col0"] = "weak"
    pandas_frame.drop(columns=["wind"], inplace=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Exemplo n.º 15
0
def test_agg_w_groupby():
    """
    Test using aggregates and group by together
    :return:
    """
    my_frame = query(
        "select day, month, min(temp), max(temp) from forest_fires group by day, month"
    )
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame["_col0"] = pandas_frame.temp
    pandas_frame["_col1"] = pandas_frame.temp
    pandas_frame = (pandas_frame.groupby(["day", "month"]).aggregate({
        "_col0":
        np.min,
        "_col1":
        np.max
    }).reset_index())
    tm.assert_frame_equal(pandas_frame, my_frame)
Exemplo n.º 16
0
def test_boolean_order_of_operations_with_parens():
    """
    Test boolean order of operations with parentheses
    :return:
    """
    my_frame = query("select * from forest_fires "
                     "where (month = 'oct' and day = 'fri') or "
                     "(month = 'nov' and day = 'tue')")

    pandas_frame = FOREST_FIRES.copy()
    pandas_frame = pandas_frame[((pandas_frame["month"] == "oct") &
                                 (pandas_frame["day"] == "fri"))
                                |
                                ((pandas_frame["month"] == "nov") &
                                 (pandas_frame["day"] == "tue"))].reset_index(
                                     drop=True)

    tm.assert_frame_equal(pandas_frame, my_frame)
Exemplo n.º 17
0
def test_all_boolean_ops_clause():
    """
    Test where clause
    :return:
    """
    my_frame = query(
        """select * from forest_fires where month = 'mar' and temp > 8 and rain >= 0
        and area != 0 and dc < 100 and ffmc <= 90.1
        """)

    pandas_frame = FOREST_FIRES.copy()
    pandas_frame = pandas_frame[(pandas_frame.month == "mar")
                                & (pandas_frame.temp > 8.0)
                                & (pandas_frame.rain >= 0)
                                & (pandas_frame.area != 0)
                                & (pandas_frame.DC < 100)
                                & (pandas_frame.FFMC <= 90.1)].reset_index(
                                    drop=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Exemplo n.º 18
0
def test_having_multiple_conditions():
    """
    Test having clause
    :return:
    """
    my_frame = query(
        "select min(temp) from forest_fires having min(temp) > 2 and "
        "max(dc) < 200 or max(dc) > 1000")
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame["_col0"] = FOREST_FIRES["temp"]
    aggregated_df = pandas_frame.aggregate({
        "_col0": "min"
    }).to_frame().transpose()
    max_dc_df = (FOREST_FIRES["DC"].aggregate({
        "DC": "max"
    }).to_frame().reset_index(drop=True))
    pandas_frame = aggregated_df[(aggregated_df["_col0"] > 2) &
                                 (max_dc_df["DC"] < 200)
                                 | (max_dc_df["DC"] > 1000)]
    tm.assert_frame_equal(pandas_frame, my_frame)
Exemplo n.º 19
0
def test_multiple_aggs():
    """
    Test multiple aggregations
    :return:
    """
    my_frame = query(
        "select min(temp), max(temp), avg(temp), max(wind) from forest_fires")
    pandas_frame = FOREST_FIRES.copy()
    pandas_frame["_col0"] = FOREST_FIRES.temp.copy()
    pandas_frame["_col1"] = FOREST_FIRES.temp.copy()
    pandas_frame["_col2"] = FOREST_FIRES.temp.copy()
    pandas_frame = pandas_frame.agg({
        "_col0": np.min,
        "_col1": np.max,
        "_col2": np.mean,
        "wind": np.max
    })
    pandas_frame.rename({"wind": "_col3"}, inplace=True)
    pandas_frame = pandas_frame.to_frame().transpose()
    tm.assert_frame_equal(pandas_frame, my_frame)
Exemplo n.º 20
0
def test_select_star_from_multiple_tables():
    """
    Test selecting from two different tables
    :return:
    """
    my_frame = query("""select * from forest_fires, digimon_mon_list""")
    forest_fires = FOREST_FIRES.copy()
    digimon_mon_list = DIGIMON_MON_LIST.copy()
    pandas_frame = merge(forest_fires.assign(__=1),
                         digimon_mon_list.assign(__=1),
                         on="__",
                         how="inner")
    del pandas_frame["__"]
    renamed = {}
    for column in pandas_frame.columns:
        if "_x" in column:
            renamed[column] = "forest_fires." + column.replace("_x", "")
        if "_y" in column:
            renamed[column] = "digimon_mon_list." + column.replace("_y", "")
    pandas_frame.rename(columns=renamed, inplace=True)
    tm.assert_frame_equal(pandas_frame, my_frame)
Exemplo n.º 21
0
def pandas_frame2_for_set_ops():
    return (FOREST_FIRES.copy().sort_values(by=["wind"],
                                            ascending=[True],
                                            kind="mergesort").head(5))