# # Train Baseline Model # This block trains the baseline model (Distant Supervision of CbG Databases) that will be used as a reference to compare against. # In[16]: ds_start = 0 ds_end = 9 regularization_grid = pd.np.round(pd.np.linspace(0.01, 5, num=5), 2) # In[17]: dev_ds_grid, test_ds_grid = train_baseline_model( correct_L, correct_L_dev, candidate_dfs['dev'].curated_gig.values, correct_L_test, list(range(ds_start, ds_end)), regularization_grid, train_marginal_dir="data/random_sampling/GiG/marginals/") dev_baseline_marginals = list(dev_ds_grid.values())[0][:, 0] test_baseline_marginals = list(test_ds_grid.values())[0][:, 0] dev_ds_grid = (generate_results_df( dev_ds_grid, candidate_dfs['dev'].curated_gig.values).reset_index().rename( index=str, columns={ 0: "AUPRC", 1: "AUROC", "index": "l2_param" }))
# This block trains the baseline model (Distant Supervision of CbG Databases) that will be used as a reference to compare against. # In[16]: ds_start = 0 ds_end = 9 regularization_grid = pd.np.round(pd.np.linspace(0.01, 5, num=5), 2) # In[17]: dev_ds_grid, test_ds_grid = train_baseline_model( correct_L, correct_L_dev, correct_L_test, list(range(ds_start, ds_end)), regularization_grid ) dev_ds_grid = ( generate_results_df( dev_ds_grid, candidate_dfs['dev'].curated_cbg.values ) .reset_index() .rename(index=str, columns={0:"AUPRC", 1:"AUROC", "index":"l2_param"}) ) test_ds_grid = ( generate_results_df( test_ds_grid, candidate_dfs['test'].curated_cbg.values
# This block trains the baseline model (Distant Supervision of GiG Databases) that will be used as a reference to compare against. # In[16]: ds_start = 0 ds_end = 9 regularization_grid = pd.np.round(pd.np.linspace(0.01, 5, num=5), 2) # In[17]: dev_ds_grid, test_ds_grid = train_baseline_model( correct_L, correct_L_dev, correct_L_test, list(range(ds_start, ds_end)), regularization_grid, train_marginal_dir="data/random_sampling/GiG/marginals/" ) dev_baseline_marginals = list(dev_ds_grid.values())[0][:,0] test_baseline_marginals = list(test_ds_grid.values())[0][:,0] dev_ds_grid = ( generate_results_df( dev_ds_grid, candidate_dfs['dev'].curated_gig.values ) .reset_index() .rename(index=str, columns={0:"AUPRC", 1:"AUROC", "index":"l2_param"}) )