def _run_synth_exp(self):
        # initialise result array
        results = np.zeros((self.param_values.shape[0], len(SCORE_NAMES), len(self.methods), self.num_runs))

        # iterate through parameter settings
        for param_idx in range(self.param_values.shape[0]):

            print('parameter setting: {0}'.format(param_idx))

            # read parameter directory
            path_pattern = self.output_dir + 'data/param{0}/set{1}/'

            # iterate through data sets
            for run_idx in range(self.num_runs):
                print('data set number: {0}'.format(run_idx))

                data_path = path_pattern.format(*(param_idx, run_idx))

                if os.path.exists(os.path.join(data_path, 'pred_.csv')):
                    os.remove(os.path.join(data_path, 'pred_.csv'))
                if os.path.exists(os.path.join(data_path, 'probs_.csv')):
                    os.remove(os.path.join(data_path, 'probs_.csv'))

                # read data
                doc_start, gt, annos = self.generator.read_data_file(data_path + 'full_data.csv')
                exp = Experiment(data_path, self.generator.num_labels, annos, gt, doc_start, None,
                                 alpha0_factor=self.alpha0_factor, alpha0_diags=self.alpha0_diags,
                                 beta0_factor=self.beta0_factor, begin_factor=self.begin_factor,
                                 max_iter=self.max_iter, crf_probs=self.crf_probs, bootstrapping=False)
                exp.methods = self.methods
                # run methods
                results[param_idx, :, :, run_idx], preds, probabilities, _, _, _ = exp.run_methods(new_data=True,
                                                                    save_with_timestamp=False)


        if self.save_results:
            if not os.path.exists(self.output_dir):
                os.makedirs(self.output_dir)

            print('Saving results...')
            # np.savetxt(self.output_dir + 'results.csv', np.mean(results, 3)[:, :, 0])
            results.dump(self.output_dir + 'results')

        if self.show_plots or self.save_plots:
            plot_results(self.param_values, self.methods, self.param_idx, results, self.show_plots,
                         self.save_plots, self.output_dir)

        return results
Exemplo n.º 2
0
best_diags = 1
best_factor = 1
best_acc_bias = 0

exp.alpha0_diags = best_diags
exp.alpha0_factor = best_factor
exp.nu0_factor = best_nu0factor
exp.alpha0_acc_bias = best_acc_bias

exp.methods = [
    'majority',
    # 'mace',
    # 'ds',
    # 'ibcc',
    # 'bac_vec_integrateIF',
    # 'bac_ibcc_integrateIF',
    # 'bac_mace_integrateIF',
    # 'HMM_crowd',
    # 'best',
    # 'worst',
    # best_bac_wm,
    'bac_seq_integrateIF',
]

exp.run_methods(annos,
                gt,
                doc_start,
                output_dir,
                text,
                ground_truth_val=gt_val,
                doc_start_val=doc_start_val,
                text_val=text_val,
Exemplo n.º 3
0
for rep in range(1, num_reps):

    output_dir = '../data/bayesian_sequence_combination/output/ner_al_super_new/'
    if not os.path.isdir(output_dir):
        os.mkdir(output_dir)

    exp = Experiment(None, 9, annos.shape[1], None, max_iter=20, crf_probs=True, rep=rep)
    exp.save_results = True
    exp.opt_hyper = False#True

    exp.nu0_factor = 0.1
    exp.alpha0_diags = 1 # best_diags
    exp.alpha0_factor = 1#9 # best_factor

    exp.methods =  [
        'bac_seq_integrateIF',
        'HMM_crowd',
                    ]

    results, preds, probs, results_nocrowd, preds_nocrowd, probs_nocrowd = exp.run_methods(
                        annos, gt, doc_start, output_dir, text,
                        ground_truth_val=gt_val, doc_start_val=doc_start_val, text_val=text_val,
                        ground_truth_nocrowd=gt_nocrowd, doc_start_nocrowd=doc_start_nocrowd, text_nocrowd=text_nocrowd,
                        active_learning=True, AL_batch_fraction=batch_frac, max_AL_iters=AL_iters
    )

    # exp = Experiment(None, 9, annos.shape[1], None, max_iter=10, crf_probs=True, rep=rep)
    # exp.save_results = True
    # exp.opt_hyper = False#True

    # exp.nu0_factor = 0.1
    # exp.alpha0_diags = 100 # best_diags
                 gt,
                 doc_start,
                 features,
                 annos,
                 gt_val,
                 doc_start,
                 features,
                 alpha0_factor=alpha0_factor,
                 alpha0_diags=alpha0_diags,
                 beta0_factor=beta0_factor,
                 max_iter=20,
                 begin_factor=best_begin_factor)
# # run all the methods that don't require tuning here
exp.methods = [
    # 'bac_seq_integrateIF',
    'bac_seq_integrateIF_thenLSTM',
    'bac_seq_integrateIF_integrateLSTM_atEnd',
]
# this will run task 1 -- train on all crowdsourced data, test on the labelled portion thereof
exp.run_methods(new_data=regen_data)

# ------------------------------------------------------------------------------------------------
beta0_factor = 0.1  # 100
alpha0_diags = 0.1  # 1
alpha0_factor = 0.1  # 1
output_dir = os.path.join(
    evaluation.experiment.output_root_dir,
    'pico3_%f_%f_%f' % (beta0_factor, alpha0_diags, alpha0_factor))
exp = Experiment(output_dir,
                 3,
                 annos,
Exemplo n.º 5
0
                 gt,
                 doc_start,
                 features,
                 annos,
                 gt_val,
                 doc_start,
                 features,
                 alpha0_factor=alpha0_factor,
                 alpha0_diags=alpha0_diags,
                 beta0_factor=beta0_factor,
                 max_iter=20)

exp.methods = [
    'best',  # does not use the hyperparameters
    'worst',  # does not use the hyperparameters
    'majority',  # does not use the hyperparameters
    'mace',  # worked best with its own default hyperparameters, smoothing=0.001, alpha=0.5, beta=0.5
    'ds',  # does not use the hyperparameters
    'HMM_Crowd',  # does not use alpha0_diags; default values happened to work best
]

# should run both task 1 and 2.
exp.run_methods(new_data=regen_data)

# ----------------------------------------------------------------------------

beta0_factor = 0.1
alpha0_diags = 0.1
alpha0_factor = 1
output_dir = os.path.join(
    evaluation.experiment.output_root_dir,
    'ner3_%f_%f_%f' % (beta0_factor, alpha0_diags, alpha0_factor))
    exp.save_results = True
    exp.opt_hyper = False  #True

    # values obtained from tuning on dev:
    best_nu0factor = 1000  #1
    best_diags = 1000
    best_factor = 0.1

    exp.nu0_factor = best_nu0factor
    exp.alpha0_diags = best_diags
    exp.alpha0_factor = best_factor

    exp.methods = [
        # 'bac_seq_integrateIF',
        # 'bac_seq',
        'bac_seq_integrateIF_noHMM',
    ]

    exp.run_methods(annos,
                    gt,
                    doc_start,
                    output_dir,
                    text,
                    rerun_all=True,
                    test_no_crowd=False)
    #
    # # values obtained from tuning on dev:
    # best_nu0factor = 0.1
    # best_diags = 10
    # best_factor = 0.1
Exemplo n.º 7
0
    # doc_start_dev = doc_start_dev[:s]
    # text_dev = text_dev[:s]
    # -------------------------

    exp = Experiment(None, 3, annos.shape[1], None)

    exp.alpha0_factor = 1
    exp.alpha0_diags = 100

    exp.save_results = True
    exp.opt_hyper = False  # True

    # run all the methods that don't require tuning here
    exp.methods = [
        'ibcc',
        'majority',
        'best',
        'worst',
    ]

    seeds = [10]  #np.arange(100)

    annos = annos[gt.flatten() != -1]
    text = text[gt.flatten() != -1]
    doc_start = doc_start[gt.flatten() != -1]
    gt = gt[gt.flatten() != -1]

    annos_sample = annos[:, :252]

    print('Mean annotations per token: %f' %
          np.mean(np.sum(annos_sample != -1, axis=1)))
                 9,
                 annos,
                 gt,
                 doc_start,
                 features,
                 annos,
                 gt_val,
                 doc_start,
                 features,
                 alpha0_factor=alpha0_factor,
                 alpha0_diags=alpha0_diags,
                 beta0_factor=beta0_factor,
                 max_iter=20)
exp.methods = [
    # 'bac_seq_integrateIF',
    'bac_seq_integrateIF_thenLSTM',
    'bac_seq_integrateIF_integrateLSTM_atEnd',
]

# should run both task 1 and 2.
exp.run_methods(new_data=regen_data)

# -------------------------------------------------------------------------------------

beta0_factor = 0.1
alpha0_diags = 0.1
alpha0_factor = 0.1
output_dir = os.path.join(
    evaluation.experiment.output_root_dir,
    'ner3_%f_%f_%f' % (beta0_factor, alpha0_diags, alpha0_factor))
exp = Experiment(output_dir,
Exemplo n.º 9
0
                 gt,
                 doc_start,
                 features,
                 annos,
                 gt_val,
                 doc_start,
                 features,
                 alpha0_factor=alpha0_factor,
                 alpha0_diags=alpha0_diags,
                 beta0_factor=beta0_factor,
                 max_iter=20)
# # run all the methods that don't require tuning here
exp.methods = [
    'best',
    'worst',
    'majority',
    'ds',
    'mace',
    'HMM_crowd',
]
# this will run task 1 -- train on all crowdsourced data, test on the labelled portion thereof
exp.run_methods(new_data=regen_data)

# ------------------------------------------------------------------------------------------------

beta0_factor = 0.1
alpha0_diags = 10
alpha0_factor = 1
output_dir = os.path.join(
    evaluation.experiment.output_root_dir,
    'pico3_%f_%f_%f' % (beta0_factor, alpha0_diags, alpha0_factor))
exp = Experiment(output_dir,