Exemplo n.º 1
0
# ----------------

features = model.transform(population_table=population_table,
                           peripheral_tables=[peripheral_table])

# ----------------

features = model.transform(population_table=population_table,
                           peripheral_tables=[peripheral_table],
                           df_name="features")

# ----------------

yhat = model.predict(population_table=population_table,
                     peripheral_tables=[peripheral_table])

# ----------------

print(model.to_sql())

# ----------------

scores = model.score(population_table=population_table,
                     peripheral_tables=[peripheral_table])

print(scores)

# ----------------

engine.delete_project("examples")
Exemplo n.º 2
0
def test_relboost_same_units():
    """Check if the same results will be obtained regardless of whether
    the units are assigned to the DataFrame, to the Columns, or to the
    RelboostModel.

    """

    # ----------------------------------------------------------------

    engine.set_project("examples")

    seed = 33231

    units = {"column_01": "column_01"}

    # ----------------------------------------------------------------

    # Generate artificial dataset
    # The problem we create looks like this:
    #
    # SELECT COUNT( * )
    # FROM POPULATION t1
    # LEFT JOIN PERIPHERAL t2
    # ON t1.join_key = t2.join_key
    # WHERE (
    #    ( t1.time_stamp - t2.time_stamp <= 0.5 )
    # ) AND t2.time_stamp <= t1.time_stamp
    # GROUP BY t1.join_key,
    #          t1.time_stamp;

    # ----------------------------------------------------------------

    # Assign the units to the columns
    population_table_columns, peripheral_table_columns = datasets.make_same_units_numerical(
        random_state=seed)

    population_table_columns.set_unit("column_01", "column_01")
    peripheral_table_columns.set_unit("column_01", "column_01")

    population_placeholder_columns = population_table_columns.to_placeholder()
    peripheral_placeholder_columns = peripheral_table_columns.to_placeholder()
    population_placeholder_columns.join(peripheral_placeholder_columns,
                                        "join_key", "time_stamp")

    # ----------------------------------------------------------------

    model_columns, features_columns, yhat_columns, scores_columns = _fit_model(
        population_table_columns, peripheral_table_columns,
        population_placeholder_columns, peripheral_placeholder_columns, seed,
        dict())

    # ----------------------------------------------------------------

    # Assign units to Model
    population_table_model, peripheral_table_model = datasets.make_same_units_numerical(
        random_state=seed)

    population_placeholder_model = population_table_model.to_placeholder()
    peripheral_placeholder_model = peripheral_table_model.to_placeholder()
    population_placeholder_model.join(peripheral_placeholder_model, "join_key",
                                      "time_stamp")

    # ----------------------------------------------------------------

    model_model, features_model, yhat_model, scores_model = _fit_model(
        population_table_model, peripheral_table_model,
        population_placeholder_model, peripheral_placeholder_model, seed,
        units)

    # ----------------------------------------------------------------

    # Check whether the results are the same.
    assert scores_model == scores_columns

    assert (yhat_model == yhat_columns).all()

    # ----------------------------------------------------------------

    engine.delete_project("examples")