features_test = test_features features_0 = basic_features + mdums + cdums features_1 = basic_features + mdums + cdums + structural_variables features_2 = basic_features + mdums + cdums + structural_variables + political_variables features_3 = basic_features + mdums + cdums + structural_variables + political_variables + survey_variables features_4 = basic_features + mdums + cdums + structural_variables + political_variables + survey_variables + corona_variables estimators = 200 model_baseline = api.Model(name="benchmark model", col_outcome="ged_dummy_sb", cols_features=features_benchmark, steps=steps, periods=periods, outcome_type="real", estimator=RandomForestRegressor( n_jobs=-1, criterion="mse", n_estimators=estimators), tags=["sb"]) model_0 = api.Model(name="basic model", col_outcome="ged_dummy_sb", cols_features=features_0, steps=steps, periods=periods, outcome_type="real", estimator=RandomForestRegressor(n_jobs=-1, criterion="mse", n_estimators=estimators),
features_test = test_features features_0 = basic_features + mdums + cdums features_1 = basic_features + mdums + cdums + structural_variables + corona_variables features_2 = basic_features + mdums + cdums + structural_variables + political_variables features_3 = basic_features + mdums + cdums + structural_variables + corona_variables + political_variables + survey_variables #features_4 = basic_features + mdums + cdums + structural_variables + political_variables + survey_variables + corona_variables estimators = 200 model_baseline = api.Model( name = "benchmark model", col_outcome= "ged_dummy_sb", cols_features = features_benchmark, steps = steps, periods = periods, outcome_type = "prob", delta_outcome = True, estimator=RandomForestClassifier(n_jobs=-1, n_estimators=estimators), tags=["sb"] ) model_d0 = api.Model( name = "basic model", col_outcome = "ged_dummy_sb", cols_features = features_0, steps = steps, periods = periods, outcome_type = "prob", delta_outcome = True, estimator = RandomForestClassifier(n_jobs=-1, n_estimators=estimators),
features_m2 = basic_features + structural_variables + corona_variables + political_variables features_m3 = all_vars elif task == 4: features_m1 = basic_features + structural_variables + corona_variables features_m2 = basic_features + structural_variables + corona_variables + political_variables features_m3 = all_vars #number of estimator estimators = 200 #normal models model_0 = api.Model(name="basic_model ", col_outcome="ged_dummy_sb", cols_features=features_m0, steps=steps, periods=periods, outcome_type="real", estimator=RandomForestRegressor(n_jobs=-1, criterion="mse", n_estimators=estimators), tags=["sb"]) model_1 = api.Model(name="structural_model ", col_outcome="ged_dummy_sb", cols_features=features_m1, steps=steps, periods=periods, outcome_type="real", estimator=RandomForestRegressor(n_jobs=-1, criterion="mse", n_estimators=estimators), tags=["sb"])
rf = RandomForestClassifier(n_jobs=-1, n_estimators=10_000) # The currently latest model development run id run_id = "d_2020_04_01" periods: List[api.Period] = get_periods(run_id=run_id) steps = [1, 3, 6, 9, 12, 18, 24, 30, 36, 38] fullsample = api.Downsampling(share_positive=1.0, share_negative=1.0) cm_sb_vdem_global = api.Model( name="cm_sb_vdem_global", col_outcome=cm["sb_vdem_global"]["col_outcome"], cols_features=cm["sb_vdem_global"]["cols_features"], steps=steps, outcome_type="prob", estimator=rf, periods=periods, downsampling=fullsample, tags=["train_global"], ) cm_sb_wdi_global = api.Model( name="cm_sb_wdi_global", col_outcome=cm["sb_wdi_global"]["col_outcome"], cols_features=cm["sb_wdi_global"]["cols_features"], steps=steps, outcome_type="prob", estimator=rf, periods=periods, downsampling=fullsample, tags=["train_global"],
features_0 = basic_features + mdums + cdums features_1 = benchmark_features features_2 = basic_features + mdums + cdums + structural_variables + corona_variables + political_variables #features_1 = political_variables_part #features_2 = basic_features + mdums + cdums + structural_variables + political_variables features_3 = basic_features + mdums + cdums + structural_variables + political_variables + survey_variables #features_4 = basic_features + mdums + cdums + structural_variables estimators = 200 model_0 = api.Model(name="t4_model_basic", col_outcome="ged_dummy_sb", cols_features=features_0, steps=steps, periods=periods_t4, outcome_type="real", estimator=RandomForestRegressor(n_jobs=-1, criterion="mse", n_estimators=estimators), tags=["sb"]) model_1 = api.Model(name="t4_model_benchmark", col_outcome="ged_dummy_sb", cols_features=features_1, steps=steps, periods=periods_t4, outcome_type="real", estimator=RandomForestRegressor(n_jobs=-1, criterion="mse", n_estimators=estimators), tags=["sb"])
from views.specs.periods import get_periods log = logging.getLogger(__name__) rf = RandomForestClassifier(n_jobs=-1, n_estimators=1_000) # The currently latest model development run id run_id = "d_2020_04_01" periods: List[api.Period] = get_periods(run_id=run_id) steps = [1, 3, 6, 9, 12, 18, 24, 30, 36, 38] pgm_sb_allthemes = api.Model( name="pgm_sb_allthemes", col_outcome=pgm["sb_allthemes"]["col_outcome"], cols_features=pgm["sb_allthemes"]["cols_features"], steps=steps, outcome_type="prob", estimator=rf, periods=periods, tags=["train_africa"], ) pgm_sb_pgd_natural = api.Model( name="pgm_sb_pgd_natural", col_outcome=pgm["sb_pgd_natural"]["col_outcome"], cols_features=pgm["sb_pgd_natural"]["cols_features"], steps=steps, outcome_type="prob", estimator=rf, periods=periods, tags=["train_africa"], ) pgm_sb_pgd_social = api.Model(
# In[13]: # Specify number of estimators in RF estimator n_estimators = 200 # In[14]: # Define the benchmark models. benchmark_delta = api.Model( name="benchmark_delta", col_outcome="ln_ged_best_sb", cols_features=cols_features, steps=steps, outcome_type="real", periods=periods, estimator=RandomForestRegressor( n_estimators=n_estimators, criterion="mse", n_jobs=-1, ), delta_outcome=True, downsampling=downsampling, ) models = [benchmark_delta] # ## Model fit, prediction, and evaluation # In[15]: #get_().run_cell_magic('time', '', '# Train all models\nfor model in models:\n model.fit_estimators(df)')
features_m0_t3 = basic_features features_m1_t3 = basic_features + structural_variables features_m2_t3 = basic_features + structural_variables + political_variables features_m3_t3 = basic_features + structural_variables + political_variables + survey_variables ##number of estimator estimators = 200 ##task 1, normal models model_0_t1 = api.Model( name="basic_model_t1", col_outcome="ged_dummy_sb", cols_features=features_m0_t1, steps=steps, periods=periods_t1, outcome_type="real", estimator=RandomForestRegressor(n_jobs=-1, criterion="mse", n_estimators=estimators), tags=["sb"]) model_1_t1 = api.Model( name="structural_model_t1", col_outcome="ged_dummy_sb", cols_features=features_m1_t1, steps=steps, periods=periods_t1, outcome_type="real", estimator=RandomForestRegressor(n_jobs=-1, criterion="mse",