inside_labels = [0]
outside_labels = [1]
begin_labels = [2]

L = 3

confmats = {}

for w, worker_model in enumerate(worker_models):

    outputfile = output_dir + '/EPi_list_%s.pkl' % worker_model
    if os.path.exists(outputfile):
        with open(outputfile, 'rb') as fh:
            EPi_list = pickle.load(fh)
    else:
        gt, annos, doc_start, text, gt_task1_dev, gt_dev, doc_start_dev, text_dev = load_data.load_biomedical_data(
            False, s)

        # # get all the gold-labelled data together
        goldidxs_dev = gt_task1_dev != -1
        gt[goldidxs_dev] = gt_task1_dev[goldidxs_dev]

        nu0_factor = 0.1
        alpha0_diags = 0.1
        alpha0_factor = 0.1

        # matrices are repeated for the different annotators/previous label conditions inside the BAC code itself.
        if worker_model == 'seq' or worker_model == 'ibcc' or worker_model == 'vec':

            f = alpha0_factor / ((L - 1) / 2)
            d = alpha0_diags + alpha0_factor - alpha0_factor
'''
Created on April 27, 2018

@author: Edwin Simpson
'''
import os
import evaluation.experiment
from evaluation.experiment import Experiment
import data.load_data as load_data

regen_data = False
gt, annos, doc_start, features, gt_val, _, _, _ = load_data.load_biomedical_data(
    regen_data)  # , debug_subset_size=900)

beta0_factor = 1
alpha0_diags = 10
alpha0_factor = 10
best_begin_factor = 10

output_dir = os.path.join(
    evaluation.experiment.output_root_dir,
    'pico3_%f_%f_%f' % (beta0_factor, alpha0_diags, alpha0_factor))
exp = Experiment(output_dir,
                 3,
                 annos,
                 gt,
                 doc_start,
                 features,
                 annos,
                 gt_val,
                 doc_start,
if __name__ == '__main__':

    parser = ArgumentParser()
    parser.add_argument("dir", help="Directory containing the pred_xxx.csv files for recomputing performance.")
    parser.add_argument("dataset", help="use NER or PICO to select on of the two loaders for the gold labels")
    parser.add_argument("strict", help="STRICT=use strict span-level precision, recall and f1, or RELAXED=count fractions of span matches")
    args = parser.parse_args()

    if args.dataset == 'NER':
        gt, annos, doc_start, _, gt_nocrowd, doc_start_nocrowd, _, _, gt_val, doc_start_val, _ = \
            load_data.load_ner_data(False)



    elif args.dataset == 'PICO': # task 1
        gt, annos, doc_start, _, _, gt_nocrowd, doc_start_nocrowd, _ = load_data.load_biomedical_data(False)

    elif args.dataset == 'PICO2': # task 2
        gt, annos, doc_start, _, _, _, _, _ = load_data.load_biomedical_data(False)

        gold_labelled = gt.flatten() != -1

        gt_nocrowd = gt[gold_labelled]
        doc_start_nocrowd = doc_start[gold_labelled]
    else:
        print('Invalid dataset %s' % args.dataset)

    nclasses = np.max(gt) + 1

    if not os.path.isdir(args.dir):
        print('The dir argument must specify a directory.')
示例#4
0
'''
Created on April 27, 2018

@author: Edwin Simpson
'''

from evaluation.experiment import Experiment
import data.load_data as load_data
import numpy as np

output_dir = '../../data/bayesian_sequence_combination/output/pico2/'

regen_data = False

gt, annos, doc_start, text, gt_task1_dev, gt_dev, doc_start_dev, text_dev = \
    load_data.load_biomedical_data(regen_data)

exp = Experiment(None, 3, annos.shape[1], None, max_iter=20)

exp.save_results = True
exp.opt_hyper = False  #True

# this is the one we used in the paper with O | O multiplier of 5.
best_nu0factor = 1
best_diags = 10
best_factor = 100

# ------------------------------------------------------------------------------------------------
exp = Experiment(None, 3, annos.shape[1], None, max_iter=20)

exp.save_results = True
示例#5
0
'''
Created on April 27, 2018

@author: Edwin Simpson
'''
import os

import evaluation.experiment
from evaluation.experiment import Experiment
import data.load_data as load_data
import numpy as np

regen_data = False
gt, annos, doc_start, features, gt_val, _, _, _ = load_data.load_biomedical_data(
    regen_data)
# , debug_subset_size=1000) # include this argument to debug with small dataset

# ------------------------------------------------------------------------------------------------

# only hmm_Crowd actually uses these hyperparameters
beta0_factor = 0.1
alpha0_diags = 0.1
alpha0_factor = 0.1
output_dir = os.path.join(evaluation.experiment.output_root_dir, 'pico3')
exp = Experiment(output_dir,
                 3,
                 annos,
                 gt,
                 doc_start,
                 features,
                 annos,