Пример #1
0
population_table = population_table.rename(
    index=str, columns={"time_stamp_population": "time_stamp"})

peripheral_table = peripheral_table.rename(
    index=str, columns={"time_stamp_peripheral": "time_stamp"})

# Replace NaN targets with 0.0 - target values may never be NaN!.
population_table.targets = np.where(np.isnan(population_table['targets']), 0,
                                    population_table['targets'])

# ----------------
# Build model

population_placeholder = data.Placeholder(name="POPULATION",
                                          numerical=["column_01"],
                                          join_keys=["join_key"],
                                          targets=["targets"])

peripheral_placeholder = data.Placeholder(name="PERIPHERAL",
                                          numerical=["column_01"],
                                          join_keys=["join_key"])

population_placeholder.join(peripheral_placeholder, "join_key")

predictor = predictors.LinearRegression()

#predictor = predictors.XGBoostRegressor()

model = models.RelboostModel(population=population_placeholder,
                             peripheral=[peripheral_placeholder],
                             loss_function=loss_functions.SquareLoss(),
Пример #2
0
# Here we load the first part of the pandas.DataFrame...
population_on_engine = data.DataFrame(name="POPULATION",
                                      roles={
                                          "join_key": ["join_key"],
                                          "numerical": ["column_01"],
                                          "time_stamp": ["time_stamp"],
                                          "target": ["targets"]
                                      }).read_pandas(population_table[:20])

# ...and now we load the second part
population_on_engine.read_pandas(population_table[20:], append=True)

# ----------------
# Build model

population_placeholder = data.Placeholder(name="numerical_population")

peripheral_placeholder = data.Placeholder(name="numerical_peripheral")

population_placeholder.join(peripheral_placeholder, "join_key", "time_stamp")

predictor = predictors.LinearRegression()

model = models.MultirelModel(
    aggregation=[aggregations.Count, aggregations.Sum],
    population=population_placeholder,
    peripheral=[peripheral_placeholder],
    loss_function=loss_functions.SquareLoss(),
    predictor=predictor,
    num_features=10,
    share_aggregations=1.0,
                                          "target": ["column_01"],
                                          "time_stamp": ["time_stamp_lagged"]
                                      }).read_pandas(time_series)

peripheral_on_engine = data.DataFrame(name="PERIPHERAL",
                                      roles={
                                          "join_key": ["join_key"],
                                          "numerical": ["column_01"],
                                          "time_stamp":
                                          ["time_stamp", "upper_time_stamp"]
                                      }).read_pandas(time_series)

# ----------------
# Build model

population_placeholder = data.Placeholder(name="TIME_SERIES")

peripheral_placeholder = data.Placeholder(name="TIME_SERIES")

population_placeholder.join(peripheral_placeholder,
                            join_key="join_key",
                            time_stamp="time_stamp_lagged",
                            other_time_stamp="time_stamp",
                            upper_time_stamp="upper_time_stamp")

predictor = predictors.LinearRegression()

model = models.MultirelModel(
    aggregation=[aggregations.Count, aggregations.Sum],
    population=population_placeholder,
    peripheral=[peripheral_placeholder],