示例#1
0
def model_init(vocab_size, embedding_size, n_past_words, n_pos_tags):
    pos_tagger = model.Tagger(vocab_size, embedding_size, n_past_words, n_pos_tags)

    global_step = tf.Variable(
        initial_value=0, name="global_step", trainable=False)
    optimizer = tf.train.AdamOptimizer()
    train_op = optimizer.minimize(pos_tagger.loss, global_step=global_step)

    return pos_tagger, train_op, global_step
def init(args):
    def parse(line):
        attr, pos_id = line.split()
        attr = tuple(attr.split(','))
        return (attr, int(pos_id))

    model = md.Analyzer(
        md.BidirectionalRecognizer(md.Recognizer(256, 100, 100, 100),
                                   md.Recognizer(256, 100, 100, 100)),
        md.Tagger(md.BiClassifier(100), chainer.ChainList()))
    optimizer = optimizers.AdaGrad(lr=0.01)
    optimizer.setup(model)
    return Storage(model, optimizer)
def init(args):
    def parse(line):
        attr, pos_id = line.split()
        attr = tuple(attr.split(','))
        return (attr, int(pos_id))

    mappings = Attribute(
        util.OneToOneMapping(parse(line) for line in args.pos_def),
        util.OneToOneMapping(
            (row[1], int(row[0])) for row in csv.reader(args.conj_type_def)),
        util.OneToOneMapping(
            (row[1], int(row[0])) for row in csv.reader(args.conj_form_def)))
    model = md.Analyzer(
        md.BidirectionalRecognizer(
            md.Recognizer(256, 256, 256, 256),
            md.Recognizer(256, 256, 256, 64 + 256 + 128 + 128)),
        md.Tagger(
            md.BiClassifier(64),
            chainer.ChainList(md.Classifier(256, len(mappings.pos)),
                              md.Classifier(128, len(mappings.conj_type)),
                              md.Classifier(128, len(mappings.conj_form)))))
    optimizer = optimizers.AdaGrad(lr=0.01)
    optimizer.setup(model)
    return Storage(mappings, model, optimizer)
示例#4
0
def train(flag, file_to_save_model, data_dir):
    num_of_training_dt = 10
    config = parameter.Config()
    if flag == '1':
        tagger = model.Tagger(config=config)
    elif flag == '2':
        tagger = model_biRNN.biRNNTagger(config=config)
    elif flag == '3':
        tagger = model_LSTMCRF.CRFTagger(config=config)
    elif flag == '4':
        tagger = model_BLSTMCRF.BCRFTagger(config=config)
    else:
    	print("No such model!")
    	exit()

    # Initialize the variables (i.e. assign their default value)
    init = tf.global_variables_initializer()
    sess_config = tf.ConfigProto()
    sess_config.gpu_options.allow_growth = True
    sess = tf.InteractiveSession(config=sess_config)  
    sess.run(init)
    
    # create the path to save the model
    if not os.path.exists( os.path.join('.',file_to_save_model) ):
        os.mkdir(os.path.join('.',file_to_save_model))
        
    saver = tf.train.Saver(max_to_keep=3)
    trainconfig = parameter.TrainConfig()
    batch_size = trainconfig.batch_size
    num_epoch = trainconfig.num_epoch
    eps = trainconfig.lr_eps
    lr_decay_rate = trainconfig.lr_decay_rate
    loss_eps = trainconfig.loss_eps
    try:
        saver.restore(sess, tf.train.latest_checkpoint(os.path.join('.',file_to_save_model)))
        print("restore previous model")
        # create the file to save training & testing accuracy and loss
        curve = open(os.path.join('.',file_to_save_model,'loss_acc_curve.txt'),'a+')
        log = open(os.path.join('.',file_to_save_model,'train_log.txt'),'a+')
        curve.seek(0)
        lines = curve.readlines()
        lastline = lines[-1].strip('\n').split('\t')
        last_loss = float(lastline[-1])
        epoch_base = int(float(lastline[0])) + 1
        learning_rate = float(lastline[1])
        learning_rate = learning_rate * lr_decay_rate
        max_acc = float(lastline[2])
        log.write('[PARAMETER SETTING] initial_learning_rate: %f, lr_eps: %f, lr_decay_rate: %f, batch_size: %i, num_epoch: %i \n'%(learning_rate, eps, lr_decay_rate, batch_size, num_epoch))
        print('start from epoch: %i, learning rate: %f, last_loss: %f, max_acc: %f' %(epoch_base,learning_rate,last_loss,max_acc) )
    except:
        print("build new model")
        # create the file to save traing&testing accuracy and loss
        curve = open(os.path.join('.',file_to_save_model,'loss_acc_curve.txt'),'w')
        curve.write('epoch'+'\t'+'learning rate'+'\t'+'training accuracy'+'\t'+'testing accuracy'+'\t'+'loss'+'\n')
        log = open(os.path.join('.',file_to_save_model,'train_log.txt'),'w')
        learning_rate = trainconfig.initial_learning_rate
        max_acc = 0 # maximum accuracy, usage: to compare training accuracy at each epoch and save the best model
        last_loss = 0
        epoch_base = 0
        log.write('[PARAMETER SETTING] initial_learning_rate: %f, lr_eps: %f, lr_decay_rate: %f, batch_size: %i, num_epoch: %i \n'%(learning_rate, eps, lr_decay_rate, batch_size, num_epoch))
    LR_decay=True
    ###----- loading all data -----###
    X_test, y_test, _, _ = model.readdata(data_dir+'testing_dt.json')
    testinputs, testlabels, testseq_length = tagger.pre_process(X_test, y_test)
    
    X_train_ls=[]
    y_train_ls=[]
    train_input_ls=[]
    train_label_ls=[]
    train_seqlength_ls=[]

    for num in range(num_of_training_dt):
        #dataname = 'training_dt'+str(num)+'.json'
        dataname = 'training_dt.json'
        log.write('start to load '+dataname+'\n');log.flush()
        start_mini = timeit.default_timer()
        X_train, y_train, _, _ = model.readdata(data_dir+dataname)
        traininputs, trainlabels, trainseq_length = tagger.pre_process(X_train, y_train)
        X_train_ls.append(X_train)
        y_train_ls.append(y_train)
        train_input_ls.append(traininputs)
        train_label_ls.append(trainlabels)
        train_seqlength_ls.append(trainseq_length)
        log.write(dataname +' loaded. i/o time:'+str(timeit.default_timer()-start_mini)+' seconds'+'\n');log.flush()

    for epoch in range(epoch_base, num_epoch):
        start = timeit.default_timer()
        
        ###-----training step-----###
        for num in range(num_of_training_dt):
            start_mini = timeit.default_timer()
            X_train = X_train_ls[num]
            y_train = y_train_ls[num]
            #shuffle(X_train, y_train)   

            # split the data into minibatch
            [x_batch, y_batch] = model.multiminibatch([X_train, y_train], batch_size)
            #counter=0
            for x,y in zip(x_batch,y_batch):
                inputs, labels, seq_length = tagger.pre_process(x,y)
                #try:
                sess.run([tagger.train_op], feed_dict={
                    tagger.x: inputs, 
                    tagger.y: labels, 
                    tagger.length: seq_length, 
                    tagger.dropout: 0.2, 
                    tagger.lr: learning_rate})   
                #sess.run(tagger.x, feed_dict={tagger.x: inputs})
                #sess.run(tagger.y, feed_dict={tagger.y: labels})
                #sess.run(tagger.length, feed_dict={tagger.length: seq_length}) 
                #sess.run(tagger.dropout, feed_dict={tagger.dropout: 0.2})
                #sess.run(tagger.lr, feed_dict={tagger.lr: learning_rate})      
                #except:
                #	print(counter)
                #	print(x[0],y[0],seq_length)
                #	exit()
                #counter+=1
            log.write('epoch: '+str(epoch+1)+', training_dt'+str(num)+', train_op run time: '+str(timeit.default_timer()-start_mini)+' seconds'+'\n'); log.flush()

        
        ###-----calculate loss and training accuracy-----###
        for num in range(num_of_training_dt):
            start_mini = timeit.default_timer() 
            [inputs,labels,seq_length] = model.multiminibatch([train_input_ls[num], train_label_ls[num], train_seqlength_ls[num]], batch_size=1000)
            loss=[]; acc=[]
            for x,y,z in zip(inputs,labels,seq_length):
                loss += sess.run([tagger.loss], feed_dict={tagger.x: x, tagger.y: y, tagger.length: z, tagger.dropout: 0.0}) # return a list of one element
                acc += sess.run([tagger.accuracy], feed_dict={tagger.x: x, tagger.y: y, tagger.length: z, tagger.dropout: 0.0}) # return a list of one element
            log.write('epoch: '+str(epoch+1)+', training_dt'+str(num)+', calculating loss & acc run time: '+str(timeit.default_timer()-start_mini)+' seconds'+'\n'); log.flush()
        LOSS=sum(loss)/len(loss)
        ACC=sum(acc)/len(acc)
        
        log.write('epoch: '+str(epoch+1)+', loss: '+str(LOSS)+', training accuracy: '+str(ACC)+'\n'); log.flush()
        
        ###-----calculate testing accuracy-----###
        start_mini = timeit.default_timer() 
        test_acc=0
        [inputs,labels,seq_length] = model.multiminibatch([testinputs, testlabels, testseq_length], batch_size=1000)
        for x,y,z in zip(inputs,labels,seq_length):
            test_acc += sess.run(tagger.accuracy, feed_dict={tagger.x: x, tagger.y: y, tagger.length: z, tagger.dropout: 0.0})
        test_acc = test_acc/len(inputs)
        log.write('epoch: '+str(epoch+1)+', testing accuracy: '+str(test_acc)+', learing rate: '+str(learning_rate)+', run time: '+str(timeit.default_timer()-start_mini)+' seconds'+'\n'); log.flush()
        print('epoch: %d, loss: %f, learning rate: %f, training accuracy: %f, testing accuracy: %f, run time: %f seconds' %(epoch+1, LOSS, learning_rate, ACC, test_acc, timeit.default_timer()-start))

        ###-----write training information into loss_acc_curve.txt-----###
        curve.write(str(epoch+1)+'\t'+str(learning_rate)+'\t'+str(ACC)+'\t'+str(test_acc)+'\t'+str(LOSS)+'\n');curve.flush()
        
        ###-----learning rate decay-----###
        if learning_rate < 1e-6:
            LR_decay = False
        if LR_decay:
            if last_loss != 0 and abs(last_loss - LOSS)/last_loss < eps:
                learning_rate = learning_rate * lr_decay_rate
            last_loss = LOSS
        
        ###-----save better model-----###        
        if ACC > max_acc:
            max_acc = ACC
            saver.save(sess,os.path.join('.',file_to_save_model,'tagging-model.ckpt'))
            log.write('epoch: '+str(epoch+1)+', better model saved.'+'\n'); log.flush()

    log.close()
    curve.close()
    sess.close()
示例#5
0
        "--model_type",
        type=str,
        default='3',
        dest="flag",
        help=
        "the model to be trained \n1: LSTM-RNN \n2: BiLSTM-RNN \n3: LSTM+CRF \n4: BLSTM+CRF \n"
    )
    arg = parser.parse_args()
    data_dir = arg.data_dir
    file_to_save_model = arg.file_to_save_model
    flag = arg.flag

    config = parameter.Config()

    if flag == '1':
        tagger = model.Tagger(config=config)
    elif flag == '2':
        tagger = model_biRNN.biRNNTagger(config=config)
    elif flag == '3':
        tagger = model_LSTMCRF.CRFTagger(config=config)
    elif flag == '4':
        tagger = model_BLSTMCRF.CRFTagger(config=config)
    else:
        print("No such model")
        exit()

    init = tf.global_variables_initializer()
    sess_config = tf.ConfigProto()
    sess_config.gpu_options.allow_growth = True
    saver = tf.train.Saver()
def save_label(flag, file_to_save_model, data_dir, data_name, batch_size=100):
    posdt_dir = os.path.join(data_dir + 'posdt.json')
    with open(posdt_dir, 'r') as f:
        posdt = json.load(f)
    inv_map = dict(zip(posdt.values(), posdt.keys()))

    config = parameter.Config()
    if flag == '1':
        tagger = model.Tagger(config=config)
    elif flag == '2':
        tagger = model_biRNN.biRNNTagger(config=config)
    elif flag == '3':
        tagger = model_LSTMCRF.CRFTagger(config=config)
    elif flag == '4':
        tagger = model_BLSTMCRF.BCRFTagger(config=config)
    else:
        print("No such model")
        return

    init = tf.global_variables_initializer()
    sess_config = tf.ConfigProto()
    sess_config.gpu_options.allow_growth = True
    saver = tf.train.Saver()
    with tf.Session(config=sess_config) as sess:
        sess.run(init)
        saver.restore(
            sess,
            tf.train.latest_checkpoint(
                os.path.join(data_dir, file_to_save_model)))

        #        for data in data_ls:
        #            labeling(sess, file_to_save_model, data_dir, data, batch_size=1000)

        f = open(os.path.join(data_dir, file_to_save_model,
                              'pred_' + data_name.strip('.json') + '.txt'),
                 'w',
                 encoding='utf-8')
        f.write('true' + '\n' + 'prediction' + '\n' + '\n')

        X_train, y_train, data_seg, label_seg = model.readdata(
            os.path.join(data_dir, data_name))
        [x_batch, y_batch, data_seg_batch, label_seg_batch
         ] = model.multiminibatch([X_train, y_train, data_seg, label_seg],
                                  batch_size)
        acc = 0
        for step in range(len(x_batch)):
            batch_xs = x_batch[step]
            batch_ys = y_batch[step]
            batch_data_seg = data_seg_batch[step]
            batch_label_seg = label_seg_batch[step]
            traininputs, trainlabels, trainseq_length = tagger.pre_process(
                batch_xs, batch_ys)
            pred = sess.run(tagger.prediction,
                            feed_dict={
                                tagger.x: traininputs,
                                tagger.length: trainseq_length,
                                tagger.dropout: 0.0
                            })
            acc += sess.run(tagger.accuracy,
                            feed_dict={
                                tagger.x: traininputs,
                                tagger.y: trainlabels,
                                tagger.length: trainseq_length,
                                tagger.dropout: 0.0
                            })
            #correct_index = sess.run(tagger.correct_index,feed_dict={tagger.x: traininputs, tagger.y: trainlabels, tagger.length: trainseq_length, tagger.dropout: 0.0})
            if flag == '1' or flag == '2':
                tag_ls = np.argmax(
                    pred, axis=1
                )  #if use model LSTM/BLSTM, pred return a onehot encoding vector
            elif flag == '3' or flag == '4':
                tag_ls = pred  #if use model combine CRF, pred return a label

            ix = 0
            for count in range(len(trainseq_length)):

                sen = batch_data_seg[count]
                sentence = sen.split(' ')  #[:-1]
                lab = batch_label_seg[count]
                lals = lab.split(' ')  #[:-1]
                true = ''
                #print(sentence)
                for word, tag in zip(sentence, lals):
                    true = true + word + '_' + tag + ' '
                #true = batch_original[count]
                f.write(true + '\n')
                result = ''
                #equal=''
                for i in range(ix, ix + trainseq_length[count]):
                    result = result + sentence[i - ix] + '_' + inv_map[
                        tag_ls[i]] + ' '
                    #equal = equal + correct_index + '   '
                f.write(result + '\n')
                #f.write(equal+'\n')
                f.write('\n')
                ix = ix + trainseq_length[count]

            f.flush()

        acc = acc / len(x_batch)
        f.write('accuracy: ' + str(acc))
        print('accuracy: %f' % acc)
        f.close()
        print(data_name + ' saved!')