# # Train Baseline Model

# This block trains the baseline model (Distant Supervision of CbG Databases) that will be used as a reference to compare against.

# In[16]:

ds_start = 0
ds_end = 9
regularization_grid = pd.np.round(pd.np.linspace(0.01, 5, num=5), 2)

# In[17]:

dev_ds_grid, test_ds_grid = train_baseline_model(
    correct_L,
    correct_L_dev,
    candidate_dfs['dev'].curated_gig.values,
    correct_L_test,
    list(range(ds_start, ds_end)),
    regularization_grid,
    train_marginal_dir="data/random_sampling/GiG/marginals/")

dev_baseline_marginals = list(dev_ds_grid.values())[0][:, 0]
test_baseline_marginals = list(test_ds_grid.values())[0][:, 0]

dev_ds_grid = (generate_results_df(
    dev_ds_grid, candidate_dfs['dev'].curated_gig.values).reset_index().rename(
        index=str, columns={
            0: "AUPRC",
            1: "AUROC",
            "index": "l2_param"
        }))
Exemplo n.º 2
0
# This block trains the baseline model (Distant Supervision of CbG Databases) that will be used as a reference to compare against.

# In[16]:


ds_start = 0
ds_end = 9
regularization_grid = pd.np.round(pd.np.linspace(0.01, 5, num=5), 2)


# In[17]:


dev_ds_grid, test_ds_grid = train_baseline_model(
    correct_L, correct_L_dev, correct_L_test,
    list(range(ds_start, ds_end)), regularization_grid
)

dev_ds_grid = (
    generate_results_df(
        dev_ds_grid, 
        candidate_dfs['dev'].curated_cbg.values
    )
    .reset_index()
    .rename(index=str, columns={0:"AUPRC", 1:"AUROC", "index":"l2_param"})
)

test_ds_grid = (
    generate_results_df(
        test_ds_grid, 
        candidate_dfs['test'].curated_cbg.values
# This block trains the baseline model (Distant Supervision of GiG Databases) that will be used as a reference to compare against.

# In[16]:


ds_start = 0
ds_end = 9
regularization_grid = pd.np.round(pd.np.linspace(0.01, 5, num=5), 2)


# In[17]:


dev_ds_grid, test_ds_grid = train_baseline_model(
    correct_L, correct_L_dev, correct_L_test,
    list(range(ds_start, ds_end)), regularization_grid,
    train_marginal_dir="data/random_sampling/GiG/marginals/"
)

dev_baseline_marginals = list(dev_ds_grid.values())[0][:,0]
test_baseline_marginals = list(test_ds_grid.values())[0][:,0]

dev_ds_grid = (
    generate_results_df(
        dev_ds_grid, 
        candidate_dfs['dev'].curated_gig.values
    )
    .reset_index()
    .rename(index=str, columns={0:"AUPRC", 1:"AUROC", "index":"l2_param"})
)