Пример #1
0
def iteration_model(model, dataset, parameter, train=True):
    precision_count = np.array([0., 0.])
    recall_count = np.array([0., 0.])

    # 학습
    avg_cost = 0.0
    avg_correct = 0.0
    total_labels = 0.0

    for morph, ne_dict, character, seq_len, char_len, label, step in dataset.get_data_batch_size(
            parameter["batch_size"], train):
        feed_dict = {
            model.morph: morph,
            model.ne_dict: ne_dict,
            model.character: character,
            model.sequence: seq_len,
            model.character_len: char_len,
            model.label: label,
            model.dropout_rate: parameter["keep_prob"],
            model.weight_dropout_keep_prob: parameter["weight_keep_prob"],
            model.lstm_dropout_keep_prob: parameter["lstm_keep_prob"],
            model.emb_dropout_keep_prob: parameter["emb_keep_prob"],
            model.dense_dropout_keep_prob: parameter["dense_keep_prob"],
            model.learning_rate: parameter["learning_rate"]
        }

        if train:
            cost, tf_viterbi_sequence, _ = sess.run(
                [model.cost, model.viterbi_sequence, model.train_op],
                feed_dict=feed_dict)
        else:
            cost, tf_viterbi_sequence = sess.run(
                [model.cost, model.viterbi_sequence], feed_dict=feed_dict)
        avg_cost += cost

        mask = (np.expand_dims(np.arange(parameter["sentence_length"]), axis=0)
                < np.expand_dims(seq_len, axis=1))
        total_labels += np.sum(seq_len)

        correct_labels = np.sum((label == tf_viterbi_sequence) * mask)
        avg_correct += correct_labels
        precision_count, recall_count = diff_model_label(
            dataset, precision_count, recall_count, tf_viterbi_sequence, label,
            seq_len)
        if train and step % 5 == 0:
            print('[Train step: {:>4}] cost = {:>.9} Accuracy = {:>.6}'.format(
                step + 1, avg_cost / (step + 1),
                100.0 * avg_correct / float(total_labels)))
        else:
            if step % 5 == 0:
                print(
                    '[Dev step: {:>4}] cost = {:>.9} Accuracy = {:>.6}'.format(
                        step + 1, avg_cost / (step + 1),
                        100.0 * avg_correct / float(total_labels)))

        if step > 5:
            break

    return avg_cost / (step + 1), 100.0 * avg_correct / float(
        total_labels), precision_count, recall_count
Пример #2
0
def iteration_model(models, dataset, parameter, train=True):
    precision_count = np.zeros((parameter["num_ensemble"], 2))
    recall_count = np.zeros((parameter["num_ensemble"], 2))
    avg_cost = np.zeros(parameter["num_ensemble"])
    avg_correct = np.zeros(parameter["num_ensemble"])
    total_labels = np.zeros(parameter["num_ensemble"])
    correct_labels = np.zeros(parameter["num_ensemble"])
    dataset.shuffle_data()

    e_precision_count = np.array([ 0. , 0. ])
    e_recall_count = np.array([ 0. , 0. ])
    e_avg_correct = 0.0
    e_total_labels = 0.0

    if train:
        keep_prob = parameter["keep_prob"]
    else:
        keep_prob = 1.0

    batch_gen = dataset.get_data_batch_size(parameter["batch_size"], train)
    total_iter = int(len(dataset) / parameter["batch_size"])

    for morph, ne_dict, character, seq_len, char_len, label, step in tqdm(batch_gen, total=total_iter):
        ensemble = []

        for i, model in enumerate(models):
            feed_dict = {model.morph: morph,
                         model.ne_dict: ne_dict,
                         model.character: character,
                         model.sequence: seq_len,
                         model.character_len: char_len,
                         model.label: label,
                         model.dropout_rate: keep_prob
                         }
            if train:
                cost, tf_viterbi_sequence, _ = sess.run([model.cost, model.viterbi_sequence, model.train_op], feed_dict=feed_dict)
            else:
                cost, tf_viterbi_sequence = sess.run([model.cost, model.viterbi_sequence], feed_dict=feed_dict)
            ensemble.append(tf_viterbi_sequence)

            avg_cost[i] += cost

            mask = (np.expand_dims(np.arange(parameter["sentence_length"]), axis=0) <
                                np.expand_dims(seq_len, axis=1))
            total_labels[i] += np.sum(seq_len)

            correct_labels[i] = np.sum((label == tf_viterbi_sequence) * mask)
            avg_correct[i] += correct_labels[i]
            precision_count[i], recall_count[i] = diff_model_label(dataset, precision_count[i], recall_count[i], tf_viterbi_sequence, label, seq_len)

        # Calculation for ensemble measure
        ensemble = np.array(stats.mode(ensemble)[0][0])

        mask = (np.expand_dims(np.arange(parameter["sentence_length"]), axis=0) <
                np.expand_dims(seq_len, axis=1))
        e_total_labels += np.sum(seq_len)

        e_correct_labels = np.sum((label == ensemble) * mask)
        e_avg_correct += e_correct_labels
        e_precision_count, e_recall_count = diff_model_label(dataset, e_precision_count, e_recall_count,
                                                               ensemble, label, seq_len)

    return avg_cost / (step + 1), 100.0 * avg_correct / total_labels.astype(float), precision_count, recall_count, \
        100.0 * e_avg_correct / e_total_labels.astype(float), e_precision_count, e_recall_count
Пример #3
0
def iteration_model(model, dataset, parameter, train=True):
    # train
    avg_cost = 0.0
    avg_correct = 0.0
    total_labels = 0.0
    precision_cnt = np.array([0., 0.])
    recall_cnt = np.array([0., 0.])
    for morph, ne_dict, character, seq_len, char_len, label, tr_step in dataset.get_data_batch_size(
            parameter["batch_size"], train):
        feed_dict = {
            model.morph: morph,
            model.ne_dict: ne_dict,
            model.character: character,
            model.sequence: seq_len,
            model.character_len: char_len,
            model.label: label,
            model.dropout_rate: parameter["keep_prob"],
        }

        cost, tf_viterbi_sequence, _ = sess.run(
            [model.cost, model.viterbi_sequence, model.train_op],
            feed_dict=feed_dict)
        avg_cost += cost

        mask = (np.expand_dims(np.arange(parameter["sentence_length"]), axis=0)
                < np.expand_dims(seq_len, axis=1))
        total_labels += np.sum(seq_len)

        correct_labels = np.sum((label == tf_viterbi_sequence) * mask)
        avg_correct += correct_labels
        precision_cnt, recall_cnt = diff_model_label(dataset, precision_cnt,
                                                     recall_cnt,
                                                     tf_viterbi_sequence,
                                                     label, seq_len)

        if tr_step % 100 == 0:
            print('[Train step: {:>4}] cost = {:>.9} Accuracy = {:>.6}'.format(
                tr_step + 1, avg_cost / (tr_step + 1),
                100.0 * avg_correct / float(total_labels)))

    tr_avg_cost = avg_cost / (tr_step + 1)
    tr_acc = 100.0 * avg_correct / float(total_labels)
    tr_precision_cnt, tr_recall_cnt = precision_cnt, recall_cnt

    # valid
    avg_cost = 0.0
    avg_correct = 0.0
    total_labels = 0.0
    precision_cnt = np.array([0., 0.])
    recall_cnt = np.array([0., 0.])
    for morph, ne_dict, character, seq_len, char_len, label, te_step in dataset.get_data_batch_size(
            parameter["batch_size"], train, valid=True):
        feed_dict = {
            model.morph: morph,
            model.ne_dict: ne_dict,
            model.character: character,
            model.sequence: seq_len,
            model.character_len: char_len,
            model.label: label,
            model.dropout_rate: parameter["keep_prob"],
        }

        cost, tf_viterbi_sequence = sess.run(
            [model.cost, model.viterbi_sequence], feed_dict=feed_dict)

        avg_cost += cost

        mask = (np.expand_dims(np.arange(parameter["sentence_length"]), axis=0)
                < np.expand_dims(seq_len, axis=1))
        total_labels += np.sum(seq_len)

        correct_labels = np.sum((label == tf_viterbi_sequence) * mask)
        avg_correct += correct_labels
        precision_cnt, recall_cnt = diff_model_label(dataset, precision_cnt,
                                                     recall_cnt,
                                                     tf_viterbi_sequence,
                                                     label, seq_len)

        if te_step % 100 == 0:
            print('[valid step: {:>4}] cost = {:>.9} Accuracy = {:>.6}'.format(
                te_step + 1, avg_cost / (te_step + 1),
                100.0 * avg_correct / float(total_labels)))

    te_avg_cost = avg_cost / (te_step + 1)
    te_acc = 100.0 * avg_correct / float(total_labels)
    te_precision_cnt, te_recall_cnt = precision_cnt, recall_cnt

    return [tr_avg_cost, tr_acc, tr_precision_cnt, tr_recall_cnt, tr_step], \
           [te_avg_cost, te_acc, te_precision_cnt, te_recall_cnt, te_step]
Пример #4
0
def iteration_model(model, dataset, parameter, train=True):
    precision_count = np.array([0., 0.])
    recall_count = np.array([0., 0.])

    # 학습
    avg_cost = 0.0
    avg_correct = 0.0
    total_labels = 0.0

    # test_list = {}
    # test_list["sentence"] = []
    # test_list["labeled"] = []
    # test_list["test_labeled"] = []

    for morph, ne_dict, character, seq_len, char_len, label, step in dataset.get_data_batch_size(
            parameter["batch_size"], train):
        feed_dict = {
            model.morph: morph,
            model.ne_dict: ne_dict,
            model.character: character,
            model.sequence: seq_len,
            model.character_len: char_len,
            model.label: label,
            model.dropout_rate: parameter["keep_prob"],
            model.weight_dropout_keep_prob: parameter["weight_keep_prob"],
            model.lstm_dropout_keep_prob: parameter["lstm_keep_prob"],
            model.emb_dropout_keep_prob: parameter["emb_keep_prob"],
            model.dense_dropout_keep_prob: parameter["dense_keep_prob"],
            model.learning_rate: parameter["learning_rate"]
        }

        if train:
            cost, tf_viterbi_sequence, _ = sess.run(
                [model.cost, model.viterbi_sequence, model.train_op],
                feed_dict=feed_dict)
        else:
            cost, tf_viterbi_sequence = sess.run(
                [model.cost, model.viterbi_sequence], feed_dict=feed_dict)
        avg_cost += cost

        mask = (np.expand_dims(np.arange(parameter["sentence_length"]), axis=0)
                < np.expand_dims(seq_len, axis=1))
        total_labels += np.sum(seq_len)

        correct_labels = np.sum((label == tf_viterbi_sequence) * mask)
        avg_correct += correct_labels

        # if parameter["mode"] == "test":
        #     test_list["sentence"].append(dataset.extern_data[step][1])
        #     test_list["labeled"].append(dataset.extern_data[step][2])
        #     temp = []
        #     for j in range(len(dataset.extern_data[step][1])):
        #         if tf_viterbi_sequence[step][j] ==0:
        #             temp.append('-')
        #         else:
        #             for ner, index in dataset.necessary_data["ner_tag"].items():
        #                 if index == tf_viterbi_sequence[step][j]:
        #                     temp.append(ner)
        #     test_list["test_labeled"].append(temp)

        precision_count, recall_count = diff_model_label(
            dataset, precision_count, recall_count, tf_viterbi_sequence, label,
            seq_len)
        if train and step % 5 == 0:
            print('[Train step: {:>4}] cost = {:>.9} Accuracy = {:>.6}'.format(
                step + 1, avg_cost / (step + 1),
                100.0 * avg_correct / float(total_labels)))
        else:
            if step % 5 == 0:
                print(
                    '[Dev step: {:>4}] cost = {:>.9} Accuracy = {:>.6}'.format(
                        step + 1, avg_cost / (step + 1),
                        100.0 * avg_correct / float(total_labels)))

        if step > 10:
            break
    # if parameter["mode"] == "test":
    #     file = open('./data/labeled.txt', 'w', encoding = 'utf8')
    #     for i,j,k in zip(test_list["sentence"], test_list["labeled"], test_list["test_labeled"]):
    #         line = (i+'\t'+j+'\t'+k+'\n')
    #         file.write(line)
    #     file.close()

    return avg_cost / (step + 1), 100.0 * avg_correct / float(
        total_labels), precision_count, recall_count