예제 #1
0
def evaluate_model(results, dataset, all_predictions, all_y_true,
                   stats_graph_folder, epoch_number, epoch_start_time,
                   output_filepaths):
    results['epoch'][epoch_number] = []
    assess_and_save(results, dataset, None, all_predictions, all_y_true,
                    stats_graph_folder, epoch_number, 0, epoch_start_time)
    plot_f1_vs_epoch(results, stats_graph_folder, 'f1_score')
    plot_f1_vs_epoch(results, stats_graph_folder, 'accuracy_score')
    # CoNLL evaluation script
    for dataset_type in ['train', 'valid', 'test']:
        conll_evaluation_script = os.path.join('.', 'conlleval')
        conll_output_filepath = '{0}_conll_evaluation.txt'.format(
            output_filepaths[dataset_type])
        shell_command = 'perl {0} < {1} > {2}'.format(
            conll_evaluation_script, output_filepaths[dataset_type],
            conll_output_filepath)
        print('shell_command: {0}'.format(shell_command))
        #subprocess.call([shell_command])
        os.system(shell_command)
        conll_parsed_output = utils_nlp.get_parsed_conll_output(
            conll_output_filepath)
        #print('conll_parsed_output: {0}'.format(conll_parsed_output))
        results['epoch'][epoch_number][0][dataset_type][
            'conll'] = conll_parsed_output
        results['epoch'][epoch_number][0][dataset_type]['f1_conll'] = {}
        results['epoch'][epoch_number][0][dataset_type]['f1_conll'][
            'micro'] = results['epoch'][epoch_number][0][dataset_type][
                'conll']['all']['f1']

    plot_f1_vs_epoch(results, stats_graph_folder, 'f1_conll', from_json=False)
예제 #2
0
파일: evaluate.py 프로젝트: Por123/NeuroNER
def evaluate_model(results, dataset, y_pred_all, y_true_all, stats_graph_folder, epoch_number, epoch_start_time, output_filepaths, parameters, verbose=False):
    results['epoch'][epoch_number] = []
    result_update = {}

    for dataset_type in ['train', 'valid', 'test']:
        print('Generating plots for the {0} set'.format(dataset_type))
        result_update[dataset_type] = {}
        y_pred_original = y_pred_all[dataset_type]
        y_true_original = y_true_all[dataset_type]
        
        for evaluation_mode in ['BIO', 'token', 'binary']:
            y_pred, y_true, label_indices, label_names = remap_labels(y_pred_original, y_true_original, dataset, evaluation_mode=evaluation_mode)
            result_update[dataset_type][evaluation_mode] = assess_model(y_pred, y_true, label_indices, label_names, dataset_type, stats_graph_folder, epoch_number, 
                                                       evaluation_mode=evaluation_mode, verbose=verbose)
            if parameters['main_evaluation_mode'] == evaluation_mode:
                result_update[dataset_type].update(result_update[dataset_type][evaluation_mode]) #copy.deepcopy(result_update[dataset_type][evaluation_mode]) 
                
    result_update['time_elapsed_since_epoch_start'] = time.time() - epoch_start_time
    result_update['time_elapsed_since_train_start'] = time.time() - results['execution_details']['train_start']
    results['epoch'][epoch_number].append(result_update)
    
    plot_f1_vs_epoch(results, stats_graph_folder, 'f1_score')
    plot_f1_vs_epoch(results, stats_graph_folder, 'accuracy_score')
    
    # CoNLL evaluation script
    for dataset_type in ['train', 'valid', 'test']:
        conll_evaluation_script = os.path.join('.', 'conlleval')
        conll_output_filepath = '{0}_conll_evaluation.txt'.format(output_filepaths[dataset_type])
        shell_command = 'perl {0} < {1} > {2}'.format(conll_evaluation_script, output_filepaths[dataset_type], conll_output_filepath)
        print('shell_command: {0}'.format(shell_command))
        
        os.system(shell_command)
        conll_parsed_output = utils_nlp.get_parsed_conll_output(conll_output_filepath)
        
        results['epoch'][epoch_number][0][dataset_type]['conll'] = conll_parsed_output
        results['epoch'][epoch_number][0][dataset_type]['f1_conll'] = {}
        results['epoch'][epoch_number][0][dataset_type]['f1_conll']['micro'] = results['epoch'][epoch_number][0][dataset_type]['conll']['all']['f1']

    plot_f1_vs_epoch(results, stats_graph_folder, 'f1_conll')

    results['execution_details']['train_duration'] = time.time() - results['execution_details']['train_start']
    save_results(results, stats_graph_folder)
예제 #3
0
def evaluate_model(results,
                   dataset,
                   y_pred_all,
                   y_true_all,
                   stats_graph_folder,
                   epoch_number,
                   epoch_start_time,
                   output_filepaths,
                   parameters,
                   verbose=False):
    results['execution_details']['num_epochs'] = epoch_number
    results['epoch'][epoch_number] = []
    result_update = {}

    for dataset_type in ['train', 'valid', 'test']:
        if dataset_type not in output_filepaths.keys():
            continue
        print('Generating plots for the {0} set'.format(dataset_type))
        result_update[dataset_type] = {}
        y_pred_original = y_pred_all[dataset_type]
        y_true_original = y_true_all[dataset_type]

        for evaluation_mode in ['bio', 'token', 'binary']:
            y_pred, y_true, label_indices, label_names, label_indices_with_o, label_names_with_o = remap_labels(
                y_pred_original,
                y_true_original,
                dataset,
                evaluation_mode=evaluation_mode)
            result_update[dataset_type][evaluation_mode] = assess_model(
                y_pred,
                y_true,
                label_indices,
                label_names,
                label_indices_with_o,
                label_names_with_o,
                dataset_type,
                stats_graph_folder,
                epoch_number,
                parameters,
                evaluation_mode=evaluation_mode,
                verbose=verbose)
            if parameters['main_evaluation_mode'] == evaluation_mode:
                result_update[dataset_type].update(
                    result_update[dataset_type][evaluation_mode])

    result_update['time_elapsed_since_epoch_start'] = time.time(
    ) - epoch_start_time
    result_update['time_elapsed_since_train_start'] = time.time(
    ) - results['execution_details']['train_start']
    results['epoch'][epoch_number].append(result_update)

    # CoNLL evaluation script
    for dataset_type in ['train', 'valid', 'test']:
        if dataset_type not in output_filepaths.keys():
            continue
        conll_evaluation_script = os.path.join('.', 'conlleval')
        conll_output_filepath = '{0}_conll_evaluation.txt'.format(
            output_filepaths[dataset_type])
        shell_command = 'perl {0} < {1} > {2}'.format(
            conll_evaluation_script, output_filepaths[dataset_type],
            conll_output_filepath)
        print('shell_command: {0}'.format(shell_command))
        os.system(shell_command)
        conll_parsed_output = utils_nlp.get_parsed_conll_output(
            conll_output_filepath)
        results['epoch'][epoch_number][0][dataset_type][
            'conll'] = conll_parsed_output
        results['epoch'][epoch_number][0][dataset_type]['f1_conll'] = {}
        results['epoch'][epoch_number][0][dataset_type]['f1_conll'][
            'micro'] = results['epoch'][epoch_number][0][dataset_type][
                'conll']['all']['f1']
        if parameters['main_evaluation_mode'] == 'conll':
            results['epoch'][epoch_number][0][dataset_type]['f1_score'] = {}
            results['epoch'][epoch_number][0][dataset_type]['f1_score'][
                'micro'] = results['epoch'][epoch_number][0][dataset_type][
                    'conll']['all']['f1']
            results['epoch'][epoch_number][0][dataset_type][
                'accuracy_score'] = results['epoch'][epoch_number][0][
                    dataset_type]['conll']['all']['accuracy']
            utils_plots.plot_classification_report(
                results['epoch'][epoch_number][0][dataset_type]['conll'],
                title=
                'Classification report for epoch {0} in {1} ({2} evaluation)\n'
                .format(epoch_number, dataset_type, 'conll'),
                cmap='RdBu',
                from_conll_json=True)
            plt.savefig(os.path.join(
                stats_graph_folder,
                'classification_report_for_epoch_{0:04d}_in_{1}_conll_evaluation.{3}'
                .format(epoch_number, dataset_type, evaluation_mode,
                        parameters['plot_format'])),
                        dpi=300,
                        format=parameters['plot_format'],
                        bbox_inches='tight')
            plt.close()

    if parameters['train_model'] and 'train' in output_filepaths.keys(
    ) and 'valid' in output_filepaths.keys():
        plot_f1_vs_epoch(results, stats_graph_folder, 'f1_score', parameters)
        plot_f1_vs_epoch(results, stats_graph_folder, 'accuracy_score',
                         parameters)
        plot_f1_vs_epoch(results, stats_graph_folder, 'f1_conll', parameters)

    results['execution_details']['train_duration'] = time.time(
    ) - results['execution_details']['train_start']
    save_results(results, stats_graph_folder)
예제 #4
0
def main():


    #### Parameters - start
    conf_parameters = configparser.ConfigParser()
    conf_parameters.read(os.path.join('.','parameters.ini'))
    nested_parameters = utils.convert_configparser_to_dictionary(conf_parameters)
    parameters = {}
    for k,v in nested_parameters.items():
        parameters.update(v)
    for k,v in parameters.items():
        if k in ['remove_unknown_tokens','character_embedding_dimension','character_lstm_hidden_state_dimension','token_embedding_dimension','token_lstm_hidden_state_dimension',
                 'patience','maximum_number_of_epochs','maximum_training_time','number_of_cpu_threads','number_of_gpus']:
            parameters[k] = int(v)
        if k in ['dropout_rate']:
            parameters[k] = float(v)
        if k in ['use_character_lstm','is_character_lstm_bidirect','is_token_lstm_bidirect','use_crf']:
            parameters[k] = distutils.util.strtobool(v)
    pprint(parameters)

    # Load dataset
    dataset_filepaths = {}
    dataset_filepaths['train'] = os.path.join(parameters['dataset_text_folder'], 'train.txt')
    dataset_filepaths['valid'] = os.path.join(parameters['dataset_text_folder'], 'valid.txt')
    dataset_filepaths['test']  = os.path.join(parameters['dataset_text_folder'], 'test.txt')
    dataset = ds.Dataset()
    dataset.load_dataset(dataset_filepaths, parameters)


    with tf.Graph().as_default():
        session_conf = tf.ConfigProto(
          device_count={'CPU': 1, 'GPU': 1},
          allow_soft_placement=True, #  automatically choose an existing and supported device to run the operations in case the specified one doesn't exist
          log_device_placement=False
          )

        sess = tf.Session(config=session_conf)

        with sess.as_default():
            model = EntityLSTM(dataset, parameters)

            # Define training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            if parameters['optimizer'] == 'adam':
                optimizer = tf.train.AdamOptimizer(1e-3)
            elif parameters['optimizer'] == 'sgd':
                optimizer = tf.train.GradientDescentOptimizer(0.005)
            else:
                raise ValueError("The lr_method parameter must be either adam or sgd.")

            # https://github.com/google/prettytensor/issues/6
            # https://www.tensorflow.org/api_docs/python/framework/graph_collections

            #print('tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) : {0}'.format(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) ))
            #print('tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) : {0}'.format(tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) ))
            #print('tf.get_collection(tf.GraphKeys.MODEL_VARIABLES) : {0}'.format(tf.get_collection(tf.GraphKeys.MODEL_VARIABLES) ))

            # https://github.com/blei-lab/edward/issues/286#ref-pullrequest-181330211 : utility function to get all tensorflow variables a node depends on


            grads_and_vars = optimizer.compute_gradients(model.loss)

            # By defining a global_step variable and passing it to the optimizer we allow TensorFlow handle the counting of training steps for us.
            # The global step will be automatically incremented by one every time you execute train_op.
            train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)


            # Initialize all variables
            sess.run(tf.global_variables_initializer())

            # Load pretrained token embeddings
            if not parameters['token_pretrained_embedding_filepath'] == '':
                load_token_embeddings(sess, model.W, dataset, parameters)


            estop = False  # early stop
            start_time = time.time()
            experiment_timestamp = utils.get_current_time_in_miliseconds()
            results = {}
            #results['model_options'] = copy.copy(model_options)
            #results['model_options'].pop('optimizer', None)
            results['epoch'] = {}
            # save/initialize execution details
            results['execution_details'] = {}
            results['execution_details']['train_start'] = start_time
            results['execution_details']['time_stamp'] = experiment_timestamp
            results['execution_details']['early_stop'] = False
            results['execution_details']['keyboard_interrupt'] = False
            results['execution_details']['num_epochs'] = 0
            results['model_options'] = copy.copy(parameters)

            dataset_name = utils.get_basename_without_extension(parameters['dataset_text_folder']) #opts.train.replace('/', '_').split('.')[0] # 'conll2003en'
            model_name = '{0}_{1}'.format(dataset_name, results['execution_details']['time_stamp'])

            output_folder=os.path.join('..', 'output')
            stats_graph_folder=os.path.join(output_folder, model_name) # Folder where to save graphs
            utils.create_folder_if_not_exists(output_folder)
            print('stats_graph_folder: {0}'.format(stats_graph_folder))
            utils.create_folder_if_not_exists(stats_graph_folder)
            model_folder = os.path.join(stats_graph_folder, 'model')
            utils.create_folder_if_not_exists(model_folder)

            step = 0
            bad_counter = 0
            previous_best_valid_f1_score = 0
            transition_params_trained = np.random.rand(len(dataset.unique_labels),len(dataset.unique_labels))
            try:
                while True:
                    epoch_number = math.floor(step / len(dataset.token_indices['train']))
                    print('epoch_number: {0}'.format(epoch_number))

                    epoch_start_time = time.time()

                    #print('step: {0}'.format(step))

                    # Train model: loop over all sequences of training set with shuffling
                    sequence_numbers=list(range(len(dataset.token_indices['train'])))
                    random.shuffle(sequence_numbers)
                    for sequence_number in sequence_numbers:
                        transition_params_trained = train_step(sess, dataset, sequence_number, train_op, global_step, model, transition_params_trained, parameters)
                        step += 1
                        if sequence_number % 100 == 0:
                            print('.',end='', flush=True)
                            #break

                    # Evaluate model
                    print('step: {0}'.format(step))
                    all_predictions = {}
                    all_y_true  = {}
                    output_filepaths = {}
                    for dataset_type in ['train', 'valid', 'test']:
                        print('dataset_type:     {0}'.format(dataset_type))
                        all_predictions[dataset_type], all_y_true[dataset_type], output_filepaths[dataset_type] = evaluate_model(sess, dataset, dataset_type, model, transition_params_trained, step, stats_graph_folder, epoch_number, parameters)
                        model_options = None

                    # Save and plot results
                    # TODO: remove uidx
                    uidx = 0
                    results['epoch'][epoch_number] = []
                    results['execution_details']['num_epochs'] = epoch_number

                    epoch_elapsed_training_time = time.time() - epoch_start_time
                    print('epoch_elapsed_training_time: {0:02f} seconds'.format(epoch_elapsed_training_time))

                    assess_model.assess_and_save(results, dataset, model_options, all_predictions, all_y_true, stats_graph_folder, epoch_number, uidx, epoch_start_time)
                    assess_model.plot_f1_vs_epoch(results, stats_graph_folder, 'f1_score')
                    assess_model.plot_f1_vs_epoch(results, stats_graph_folder, 'accuracy_score')

                    # CoNLL evaluation script
                    for dataset_type in ['train', 'valid', 'test']:
                        conll_evaluation_script = os.path.join('.', 'conlleval')
                        conll_output_filepath = '{0}_conll_evaluation.txt'.format(output_filepaths[dataset_type])
                        shell_command = 'perl {0} < {1} > {2}'.format(conll_evaluation_script, output_filepaths[dataset_type], conll_output_filepath)
                        print('shell_command: {0}'.format(shell_command))
                        #subprocess.call([shell_command])
                        os.system(shell_command)
                        conll_parsed_output = utils_nlp.get_parsed_conll_output(conll_output_filepath)
                        print('conll_parsed_output: {0}'.format(conll_parsed_output))
                        results['epoch'][epoch_number][0][dataset_type]['conll'] = conll_parsed_output
                        results['epoch'][epoch_number][0][dataset_type]['f1_conll'] = {}
                        results['epoch'][epoch_number][0][dataset_type]['f1_conll']['micro'] = results['epoch'][epoch_number][0][dataset_type]['conll']['all']['f1']
                    assess_model.plot_f1_vs_epoch(results, stats_graph_folder, 'f1_conll', from_json=False)

                    #end_time = time.time()
                    #results['execution_details']['train_duration'] = end_time - start_time
                    #results['execution_details']['train_end'] = end_time

                    # Early stop
                    valid_f1_score = results['epoch'][epoch_number][0]['valid']['f1_score']['micro']
                    if  valid_f1_score > previous_best_valid_f1_score:
                        bad_counter = 0
                        previous_best_valid_f1_score = valid_f1_score
                    else:
                        bad_counter += 1


                    if bad_counter > parameters['patience']:
                        print('Early Stop!')
                        results['execution_details']['early_stop'] = True
                        break

                    if epoch_number > parameters['maximum_number_of_epochs']: break

            except KeyboardInterrupt:
                results['execution_details']['keyboard_interrupt'] = True
        #         assess_model.save_results(results, stats_graph_folder)
                print('Training interrupted')

            print('Finishing the experiment')
            end_time = time.time()
            results['execution_details']['train_duration'] = end_time - start_time
            results['execution_details']['train_end'] = end_time
            assess_model.save_results(results, stats_graph_folder)

    sess.close() # release the session's resources