Exemplo n.º 1
0
def process_sentences(writer, merged_summary, epoch, data, model, is_training, learningrate, config, name):
    """
    Process all the sentences with the labeler, return evaluation metrics.
    """
    evaluator = MLTEvaluator(config)
    batches_of_sentence_ids = create_batches_of_sentence_ids(data, config["batch_equal_size"], config["max_batch_size"])
    if is_training == True:
        random.shuffle(batches_of_sentence_ids)

    # Each batch has 32 sentences
    for count, sentence_ids_in_batch in enumerate(batches_of_sentence_ids):
        print('############### Epoch', epoch + 1,'Batch', count + 1, 'of', len(batches_of_sentence_ids) , '###############')
        batch = [data[i] for i in sentence_ids_in_batch]
        summary, cost, sentence_scores, token_scores_list = model.process_batch(merged_summary, batch, is_training, learningrate)
        evaluator.append_data(cost, batch, sentence_scores, token_scores_list)
        
        writer.add_summary(summary, len(batches_of_sentence_ids) * (epoch) + count)
        
        while config["garbage_collection"] == True and gc.collect() > 0:
            pass

    results = evaluator.get_results(name)
    for key in results:
        print(key + ": " + str(results[key]))

    return results
Exemplo n.º 2
0
def process_sentences(data_train, data, model, is_training, learningrate,
                      config, name, epoch):
    """
    Process all the sentences with the labeler, return evaluation metrics.
    """
    evaluator = MLTEvaluator(config)
    batches_of_sentence_ids = create_batches_of_sentence_ids(
        data, config["batch_equal_size"], config["max_batch_size"])
    if is_training == True:
        random.shuffle(batches_of_sentence_ids)

    for sentence_ids_in_batch in batches_of_sentence_ids:
        batch = [data_train[i] for i in sentence_ids_in_batch]
        cost, sentence_scores, token_scores = model.process_batch(
            data_train, batch, is_training, learningrate)
        evaluator.append_data(cost, batch, sentence_scores, token_scores, name,
                              epoch)

    while config["garbage_collection"] == True and gc.collect() > 0:
        pass

    results = evaluator.get_results(name)
    for key in results:
        print(key + ": " + str(results[key]))

    if name == "dev":
        f1 = codecs.open("./result_with_know_c2h/result_train_" + str(epoch) +
                         ".txt",
                         encoding="utf-8",
                         mode="w")
        f1.write(str(results))
        f1.close()
    return results
Exemplo n.º 3
0
def process_sentences(data, model, is_training, learningrate, config, name):
    """
	Process all the sentences with the labeler, return evaluation metrics.
	"""
    evaluator = MLTEvaluator(config)
    batches_of_sentence_ids = create_batches_of_sentence_ids(
        data, config["batch_equal_size"], config["max_batch_size"])
    if is_training == True:
        random.shuffle(batches_of_sentence_ids)

    for sentence_ids_in_batch in batches_of_sentence_ids:
        batch = [data[i] for i in sentence_ids_in_batch]
        cost, sentence_scores, token_scores_list, token_probs, token_probs_all_labels, unsup_weights, selective_weights, indicative_weights = model.process_batch(
            batch, is_training, learningrate)

        evaluator.append_data(cost, batch, sentence_scores, token_scores_list,
                              token_probs)

        while config["garbage_collection"] == True and gc.collect() > 0:
            pass

    results = evaluator.get_results(name)
    for key in results:
        print(key + ": " + str(results[key]))
    return results
def process_sentences(data_train, data, model_data, is_training, learningrate, name, epoch):
    """
    Process all the sentences with the labeler, return evaluation metrics.
    """
    evaluator = MLTEvaluator()
    ##each id represents a combination of (id, sentences, context, label_distribution)
    batches_of_sentence_ids = create_batches_of_sentence_ids(data, hyp.batch_equal_size, hyp.max_batch_size)
    
    all_batches = list()
    if is_training == True:
        random.shuffle(batches_of_sentence_ids)

    for sentence_ids_in_batch in batches_of_sentence_ids:
        batch = [data_train[i] for i in sentence_ids_in_batch]
        
        # ##each batch contains a combination of (id, sentences, context, label_distribution)
        # print ("Batch is:")
        # print (batch)
        batch_data = model_data.process_batch(data_train, batch, is_training, learningrate)
        # print ("Returned batch is:")
        # print (batch_data)
        # print ()
        all_batches.append(batch_data)

    while hyp.garbage_collection == True and gc.collect() > 0:
            pass

    return all_batches
Exemplo n.º 5
0
def process_sentences(data,
                      model,
                      is_training,
                      learningrate,
                      config,
                      name,
                      epoch=0,
                      write_out=""):
    """
    Process all the sentences with the labeler, return evaluation metrics.
    """
    evaluators = {task: MLTEvaluator(config) for task in data.keys()}
    task_sent_id_batches = create_batches_of_sentence_ids(
        data, config, is_training, epoch)
    if is_training:
        for task, sentence_ids_in_batch in task_sent_id_batches:
            random.shuffle(sentence_ids_in_batch)

    for task, sentence_ids_in_batch in tqdm(task_sent_id_batches):
        batch = [data[task][i] for i in sentence_ids_in_batch]
        cost, sentence_scores, token_scores_list, attention_scores_list = \
            model.process_batch(task, batch, is_training, learningrate)

        evaluators[task].append_data(cost, batch, sentence_scores,
                                     token_scores_list, attention_scores_list)

        while config["garbage_collection"] and gc.collect() > 0:
            pass

    results = {}
    for task in data.keys():
        print("\n=== TASK: {} ===".format(task))
        results[task] = evaluators[task].get_results(name)
        for key, res in results[task].items():
            print(task + " " + key + ": " + str(res))

        if write_out:
            write_out_task = write_out + "_" + task
            if not os.path.exists(write_out_task):
                os.mkdir(write_out_task)
            evaluators[task].write_predictions(write_out_task)

    return results
Exemplo n.º 6
0
import sys


from model import MLTModel
from evaluator import MLTEvaluator
from experiment import read_input_files


if __name__ == "__main__":
    model = MLTModel.load(sys.argv[1])
    data = read_input_files(sys.argv[2], -1)
    batch_size = 32
    # Evaluator
    evaluator = MLTEvaluator(model.config)
    
    for i in range(0, len(data), batch_size):
        batch = data[i:i+batch_size]
        cost, sentence_scores, token_scores_list = model.process_batch_inference(batch, False, 0.0)

        for j in range(len(batch)):
            for k in range(len(batch[j])):
                print(" ".join([str(x) for x in batch[j][k]]) + "\t" + str(token_scores_list[0][j][k]) + "\t" + str(sentence_scores[j]))
            print("")

        # Evaluator
        evaluator.append_data(cost, batch, sentence_scores, token_scores_list)

    # Evaluator
    results = evaluator.get_results("test")
    for key in results:
        sys.stderr.write(key + ": " + str(results[key]) + "\n")