Exemplo n.º 1
0
def predict():
    word_weights, tag_weights = load_embedding()
    word_voc, tag_voc, label_voc = load_voc()

    # train data
    sentences, tags, labels = load_train_data(word_voc, tag_voc, label_voc)
    seed = 137
    np.random.seed(seed)
    np.random.shuffle(sentences)
    np.random.seed(seed)
    np.random.shuffle(tags)
    np.random.seed(seed)
    np.random.shuffle(labels)

    # load data
    sentences_test, tags_test = load_test_data(word_voc, tag_voc, label_voc)
    labels_test = None
    
    # clear reslut
    command = 'rm ./Data/result/*'
    os.popen(command)

    # 划分训练、开发、测试集
    kf = KFold(n_splits=config.KFOLD)
    train_indices, dev_indices = [], []
    for train_index, dev_index in kf.split(labels):
        train_indices.append(train_index)
        dev_indices.append(dev_index)
    for num in range(config.KFOLD):
        train_index, dev_index = train_indices[num], dev_indices[num]
        sentences_train, sentences_dev = sentences[train_index], sentences[dev_index]
        tags_train, tags_dev = tags[train_index], tags[dev_index]
        labels_train, labels_dev = labels[train_index], labels[dev_index]

        # init model
        model = DCModel(
            config.MAX_LEN, word_weights, tag_weights, result_path='./Data/result/result.txt',
            label_voc=label_voc)

        # fit model
        model.fit(
            sentences_train, tags_train, labels_train,
            sentences_dev, tags_dev, labels_dev,
            sentences_test, tags_test, labels_test,
            config.BATCH_SIZE, config.NB_EPOCH, keep_prob=config.KEEP_PROB,
            word_keep_prob=config.WORD_KEEP_PROB, tag_keep_prob=config.TAG_KEEP_PROB)
        print(model.get_best_score())
        [p_test, r_test, f_test], nb_epoch = model.get_best_score()
        command = 'cp ./Data/result/epoch_%d.csv ./Data/result/best_%d' % (nb_epoch+1, num)
        print(command)
        os.popen(command)
        print(p_test, r_test, f_test, '\n')
        # evaluate
        # result_path_k = result_path % k
        # p_test, r_test, f_test = model.evaluate(sentences_test, tags_test, positions_test,
        #    labels_test, simple_compute=False, ignore_label=IGNORE_LABEL,
        #    label_voc=relation_voc, result_path=result_path_k)
        # clear model
        model.clear_model()
        del model
Exemplo n.º 2
0
def main_lstmatt(args, DEVICE):
    train_iter, dev_iter, test_iter, text_field, label_field = load_data(args, DEVICE)
    pretrained_embeddings = load_embedding(text_field)

    model = LSTMAttBC(args.dim_embd, args.dim, args.batch_size, pretrained_embeddings,
                      vocab_size=len(text_field.vocab), label_size=len(label_field.vocab) - 1,
                      DEVICE=DEVICE)
Exemplo n.º 3
0
def main_lstmatt(args, DEVICE):
    train_iter, dev_iter, test_iter, text_field, label_field = load_data(
        args, DEVICE)
    pretrained_embeddings = load_embedding(text_field)

    model = LSTMAttBC(args.dim_embd,
                      args.dim,
                      args.batch_size,
                      pretrained_embeddings,
                      vocab_size=len(text_field.vocab),
                      label_size=len(label_field.vocab) - 1,
                      DEVICE=DEVICE)
    # Train if no pre-trained model is given
    if args.new_train:
        train(args, [train_iter, dev_iter, test_iter], model, DEVICE)

    interpret_lstmatt(args, test_iter, text_field, label_field, DEVICE)
Exemplo n.º 4
0
        #predictions: [candidate_answer_num,  2* hidden_dim]
        with tf.variable_scope('projection_layer',reuse=True):
            softmax_w=tf.get_variable('softmax_w')
            softmax_b=tf.get_variable('softmax_b')
            scores=tf.matmul(predictions, softmax_w)+softmax_b
            scores=tf.squeeze(scores) #[candidate_answer_num]
            scores=tf.expand_dims(scores, 0) #[1, candidate_answer_num]
            truth=tf.onehot(answer, tf.gather(tf.shape(predictions),0))
            truth=tf.expand_dims(truth, 0)
            cross_entropy=tf.nn.softmax_cross_entropy_with_logits(logits=scores, labels=truth)
            cross_entropy=tf.squeeze(cross_entropy)
            return cross_entropy
    def add_training_op(self):
        opt=tr.train.AdagradOptimizer(self.config.lr)
        train_op=opt.minimize(self.loss)
        return train_op
if __name__=='__main__':
    logging.basicConfig(filename="ccrc_model_test.log",level=logging.WARNING)
    from my_main import Config
    config=Config()
    word2idx,embedding=load_data.load_embedding()
    config.embedding=embedding
    gpu_config = tf.ConfigProto()
    gpu_config.gpu_options.allow_growth = True
    with tf.Session(config=gpu_config) as sess:
        logging.warn('begin build the model')
        model = ccrc_model(config)
        logging.warn('model build done')
        sess.run(tf.global_variables_initializer())

# -*- coding: utf-8 -*-
from load_data import load_embedding, load_voc, load_train_data, load_test_data
import time
from generator import BatchGenerator
from TFNN.layers.EmbeddingLayer import Embedding
from sklearn.model_selection import KFold
from triggerType_to_trigger import get_trigger
'''
For Chinese word segmentation.
'''

#############################1.load data   ######################################
class_type = 3
training_count = 16796
test_count = 2570
word_weights, tag_weights = load_embedding()  #矩阵形式
word_voc, tag_voc, label_voc = load_voc()  #字典形式
sentences, tags, labels = load_train_data(word_voc, tag_voc, label_voc,
                                          class_type, training_count)
Xend_sentence, Xend_tag_test, yend_test = load_test_data(
    word_voc, tag_voc, label_voc, class_type, test_count)

#划分训练集,测试集(这里的y为词性tag

kf = KFold(n_splits=10)
train_indices, dev_indices = [], []
for train_index, dev_index in kf.split(labels):
    train_indices.append(train_index)
    dev_indices.append(dev_index)
for num in range(10):
    train_index, dev_index = train_indices[num], dev_indices[num]