Пример #1
0
for key in candidate_dfs:
    print("Size of {} set: {}".format(key, candidate_dfs[key].shape[0]))

# In[8]:

label_functions = (list(DG_LFS["DaG"].values()))

if quick_load:
    label_matricies = pickle.load(open("label_matricies.pkl", "rb"))
else:
    #labeler = LabelAnnotator(lfs=label_functions)
    label_matricies = {
        key: label_candidates(
            session,
            candidate_dfs[key]['candidate_id'],
            label_functions,
            num_threads=10,
            batch_size=candidate_dfs[key]['candidate_id'].shape[0])
        for key in candidate_dfs
    }

# In[9]:

lf_names = list(DG_LFS["DaG"].keys())

# ## Visualize Label Functions

# Before training the generative model, here are some visualizations for the given label functions. These visualizations are helpful in determining the efficacy of each label functions as well as observing the overlaps and conflicts between each function.

# In[10]:
Пример #2
0
)


# In[10]:


if not quick_load:
    label_matricies = {
        'train':label_candidates(
            session, 
            (
                total_candidates_df
                .query("split==6&compound_mention_count==1&gene_mention_count==1")
                .candidate_id
                .values
                .tolist()
            ),
            lfs, 
            lf_names,
            num_threads=10,
            batch_size=50000,
            multitask=False
        )
    }


# In[11]:


if not quick_load:
    label_matricies.update({
    label_matricies = {
        key:
        labeler.load_matrix(session,
                            cids_query=make_cids_query(session,
                                                       candidate_dfs[key]))
        for key in candidate_dfs
    }

else:
    labeler = LabelAnnotator(lfs=label_functions)

    label_matricies = {
        key:
        label_candidates(labeler,
                         cids_query=make_cids_query(session,
                                                    candidate_dfs[key]),
                         label_functions=label_functions,
                         apply_existing=(key != 'train'))
        for key in candidate_dfs
    }

# In[9]:

lf_names = [
    label_matricies['test'].get_key(session, index).name
    for index in range(label_matricies['test'].shape[1])
]

# ## Visualize Label Functions

# Before training the generative model, here are some visualizations for the given label functions. These visualizations are helpful in determining the efficacy of each label functions as well as observing the overlaps and conflicts between each function.