Exemplo n.º 1
0
    def _predict_pos_tags(self, session, model, words, data_path):
        '''
        Define prediction function of POS Tagging
        return tuples [(word, tag)]
        '''
        word_data = pos_reader.sentence_to_word_ids(data_path, words)
        tag_data = [0] * len(word_data)
        state = session.run(model.initial_state)

        predict_id = []
        for step, (x, y) in enumerate(
                pos_reader.iterator(word_data, tag_data, model.batch_size,
                                    model.num_steps)):
            #print ("Current Step" + str(step))
            fetches = [model.cost, model.final_state, model.logits]
            feed_dict = {}
            feed_dict[model.input_data] = x
            feed_dict[model.targets] = y
            for i, (c, h) in enumerate(model.initial_state):
                feed_dict[c] = state[i].c
                feed_dict[h] = state[i].h

            _, _, logits = session.run(fetches, feed_dict)
            predict_id.append(int(np.argmax(logits)))
            #print (logits)
        predict_tag = pos_reader.word_ids_to_sentence(data_path, predict_id)
        return zip(words, predict_tag)
def run_epoch(session, model, word_data, tag_data, eval_op, verbose=False):
    """Runs the model on the given data."""
    epoch_size = ((len(word_data) // model.batch_size) - 1) // model.num_steps
    start_time = time.time()
    costs = 0.0
    iters = 0
    state = session.run(model.initial_state)
    for step, (x, y) in enumerate(
            reader.iterator(word_data, tag_data, model.batch_size,
                            model.num_steps)):
        fetches = [model.cost, model.final_state, eval_op]
        feed_dict = {}
        feed_dict[model.input_data] = x
        feed_dict[model.targets] = y
        for i, (c, h) in enumerate(model.initial_state):
            feed_dict[c] = state[i].c
            feed_dict[h] = state[i].h
        cost, state, _ = session.run(fetches, feed_dict)
        costs += cost
        iters += model.num_steps

        if verbose and step % (epoch_size // 10) == 10:
            print("%.3f perplexity: %.3f speed: %.0f wps" %
                  (step * 1.0 / epoch_size, np.exp(costs / iters),
                   iters * model.batch_size / (time.time() - start_time)))

        # Save Model to CheckPoint when is_training is True
        if model.is_training:
            if step % (epoch_size // 10) == 10:
                checkpoint_path = os.path.join(FLAGS.pos_train_dir, "pos.ckpt")
                model.saver.save(session, checkpoint_path)
                print("Model Saved... at time step " + str(step))

    return np.exp(costs / iters)
Exemplo n.º 3
0
def run_epoch(session, model, word_data, tag_data, eval_op, verbose=False):
    """Runs the model on the given data."""
    epoch_size = ((len(word_data) // model.batch_size) - 1) // model.num_steps

    start_time = time.time()
    costs = 0.0
    iters = 0
    correct_labels = 0  #prediction accuracy
    total_labels = 0

    for step, (x, y) in enumerate(
            reader.iterator(word_data, tag_data, model.batch_size,
                            model.num_steps)):
        feed_dict = {}
        feed_dict[model.input_data] = x
        feed_dict[model.targets] = y
        fetches = []

        if (model.crf_layer):  # model has the CRF decoding layer
            fetches = [
                model.cost, model.logits, model.transition_params, eval_op
            ]
            cost, logits, transition_params, _ = session.run(
                fetches, feed_dict)
            # iterate over batches [batch_size, num_steps, target_num], [batch_size, target_num]
            for unary_score_, y_ in zip(
                    logits, y
            ):  #  unary_score_  :[num_steps, target_num], y_: [num_steps]
                viterbi_prediction = tf.contrib.crf.viterbi_decode(
                    unary_score_, transition_params)
                # viterbi_prediction: tuple (list[id], value)
                # y_: tuple
                correct_labels += np.sum(np.equal(
                    viterbi_prediction[0],
                    y_))  # compare prediction sequence with golden sequence
                total_labels += len(y_)
                #print ("step %d:" % step)
                #print ("correct_labels %d" % correct_labels)
                #print ("viterbi_prediction")
                #print (viterbi_prediction)
        else:
            fetches = [model.cost, model.logits, eval_op]
            cost, logits, _ = session.run(fetches, feed_dict)

        costs += cost
        iters += model.num_steps

        if verbose and step % (epoch_size // 10) == 10:
            print("%.3f perplexity: %.3f speed: %.0f wps" %
                  (step * 1.0 / epoch_size, np.exp(costs / iters),
                   iters * model.batch_size / (time.time() - start_time)))

        # Accuracy
        if verbose and step % (epoch_size // 10) == 10:
            accuracy = 100.0 * correct_labels / float(total_labels)
            print("Accuracy: %.2f%%" % accuracy)

        # Save Model to CheckPoint when is_training is True
        if model.is_training:
            if step % (epoch_size // 10) == 10:
                checkpoint_path = os.path.join(FLAGS.pos_train_dir,
                                               "pos_bilstm_crf.ckpt")
                model.saver.save(session, checkpoint_path)
                print("Model Saved... at time step " + str(step))

    return np.exp(costs / iters)