def main(): # limit gpu memory usage def get_session(gpu_fraction): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_fraction) return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) K.set_session(get_session(args.gpu_fraction)) save_path = os.path.join(args.save_dir, args.model) if args.load_model is not None: load_path = os.path.join(args.save_dir, args.load_model) #####read data##### dm = DataManager() print('Loading data...') if args.action == 'train': dm.add_data('train_data', train_path, True) elif args.action == 'semi': dm.add_data('train_data', train_path, True) dm.add_data('semi_data', semi_path, False) elif args.action == 'test': dm.add_data('train_data', train_path, True) dm.add_data('test_data', test_path, True) else: raise Exception('Action except for train, semi, and test') # prepare tokenizer print('get Tokenizer...') if args.load_model is not None: # read exist tokenizer dm.load_tokenizer(os.path.join(load_path, 'token.pk')) else: # create tokenizer on new data dm.tokenize(args.vocab_size) if not os.path.isdir(save_path): os.makedirs(save_path) if not os.path.exists(os.path.join(save_path, 'token.pk')): dm.save_tokenizer(os.path.join(save_path, 'token.pk')) # convert to sequences dm.to_sequence(args.max_length) # initial model print('initial model...') model = simpleRNN(args) model.summary() print("args.load_model =", args.load_model) if args.load_model is not None: if args.action == 'train': print('Warning : load a exist model and keep training') path = os.path.join(load_path, 'model.h5') if os.path.exists(path): print('load model from %s' % path) model.load_weights(path) else: raise ValueError("Can't find the file %s" % path) elif args.action == 'test': #print ('Warning : testing without loading any model') print('args.action is %s' % (args.action)) path = os.path.join(load_path, 'model.h5') if os.path.exists(path): print('load model from %s' % path) model.load_weights(path) else: raise ValueError("Can't find the file %s" % path) # training if args.action == 'train': (X, Y), (X_val, Y_val) = dm.split_data('train_data', args.val_ratio) #earlystopping = EarlyStopping(monitor='val_loss', patience = 3, verbose=1, mode='max') save_path = os.path.join(save_path, 'model.h5') """ checkpoint = ModelCheckpoint(filepath=save_path, verbose=1, save_best_only=True, save_weights_only=True, monitor='val_loss', mode='max' ) """ tweets = X[0, :] snippets = X[1, :] targets = X[2, :] print("tweets's shape = ", tweets.shape) print("snippets's shape = ", snippets.shape) print("targets's shape = ", targets.shape) print("Y's shape = ", Y.shape) #model = Model(inputs=[main_input, auxiliary_input], outputs=[main_output, auxiliary_output]) history = model.fit( [tweets, snippets, targets], Y, validation_data=([X_val[0, :], X_val[1, :], X_val[2, :]], Y_val), epochs=args.nb_epoch, batch_size=args.batch_size) #, #callbacks=[checkpoint, earlystopping] ) predictions = model.predict([tweets, snippets, targets]) #print(predictions.shape) #print(predictions) model.save(save_path) # testing elif args.action == 'test': args.val_ratio = 0 (X, Y), (X_val, Y_val) = dm.split_data('test_data', args.val_ratio) tweets = X[0, :] snippets = X[1, :] targets = X[2, :] #print("tweets.shape =", tweets.shape) #print("snippets.shape =", snippets.shape) #print("targets.shape =", targets.shape) predictions = model.predict([tweets, snippets, targets]) preidctions = predictions.reshape(-1) #print(predictions) #print(Y.shape) #scores = np.sum((predictions - Y)**2)/len(Y) scores = model.evaluate([tweets, snippets, targets], Y) print("test data mse by keras = %f" % scores[1]) print("test data mse by sklearn = %f" % mean_squared_error(Y, predictions)) for idx, value in enumerate(predictions): if value > 0: predictions[idx] = 1 elif value == 0: predictions[idx] = 0 elif value < 0: predictions[idx] = -1 for idx, value in enumerate(Y): if value > 0: Y[idx] = 1 elif value == 0: Y[idx] = 0 elif value < 0: Y[idx] = -1 print("test data micro f1 score by sklearn = %f" % f1_score(Y, predictions, average='micro')) print("test data macro f1 score by sklearn = %f" % f1_score(Y, predictions, average='macro')) #print("test data scores[1](loss = mse) = %f" % scores[1]) #raise Exception ('Implement your testing function') (X, Y), (X_val, Y_val) = dm.split_data('train_data', args.val_ratio) tweets = X[0, :] snippets = X[1, :] targets = X[2, :] predictions = model.predict([tweets, snippets, targets]) preidctions = predictions.reshape(-1) #scores = np.sum((predictions - Y)**2)/len(Y) scores = model.evaluate([tweets, snippets, targets], Y) print("train data mse by keras = %f" % scores[1]) print("train data mse by sklearn = %f" % mean_squared_error(Y, predictions)) for idx, value in enumerate(predictions): if value > 0: predictions[idx] = 1 elif value == 0: predictions[idx] = 0 elif value < 0: predictions[idx] = -1 for idx, value in enumerate(Y): if value > 0: Y[idx] = 1 elif value == 0: Y[idx] = 0 elif value < 0: Y[idx] = -1 print("train data micro f1 score by sklearn = %f" % f1_score(Y, predictions, average='micro')) print("train data macro f1 score by sklearn = %f" % f1_score(Y, predictions, average='macro')) # semi-supervised training elif args.action == 'semi': (X, Y), (X_val, Y_val) = dm.split_data('train_data', args.val_ratio) [semi_all_X] = dm.get_data('semi_data') earlystopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='max') save_path = os.path.join(save_path, 'model.h5') checkpoint = ModelCheckpoint(filepath=save_path, verbose=1, save_best_only=True, save_weights_only=True, monitor='val_loss', mode='max') # repeat 10 times for i in range(10): # label the semi-data semi_pred = model.predict(semi_all_X, batch_size=1024, verbose=True) semi_X, semi_Y = dm.get_semi_data('semi_data', semi_pred, args.threshold, args.loss_function) semi_X = np.concatenate((semi_X, X)) semi_Y = np.concatenate((semi_Y, Y)) print('-- iteration %d semi_data size: %d' % (i + 1, len(semi_X))) # train history = model.fit(semi_X, semi_Y, validation_data=(X_val, Y_val), epochs=2, batch_size=args.batch_size, callbacks=[checkpoint, earlystopping]) if os.path.exists(save_path): print('load model from %s' % save_path) model.load_weights(save_path) else: raise ValueError("Can't find the file %s" % path)
def main(): # limit gpu memory usage def get_session(gpu_fraction): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_fraction) return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) K.set_session(get_session(args.gpu_fraction)) save_path = os.path.join(args.save_dir, args.model) if args.load_model is not None: load_path = os.path.join(args.save_dir, args.load_model) #处理数据 #####read data##### dm = DataManager() print('Loading data...') if args.action == 'train': dm.add_data('train_data', train_path, True) elif args.action == 'semi': dm.add_data('train_data', train_path, True) dm.add_data('semi_data', semi_path, False) else: dm.add_data('test_data', test_path, False) # prepare tokenizer print('get Tokenizer...') if args.load_model is not None: # read exist tokenizer dm.load_tokenizer(os.path.join(load_path, 'token.pk')) else: # create tokenizer on new data dm.tokenize(args.vocab_size) if not os.path.isdir(save_path): os.makedirs(save_path) if not os.path.exists(os.path.join(save_path, 'token.pk')): dm.save_tokenizer(os.path.join(save_path, 'token.pk')) # convert to sequences dm.to_sequence(args.max_length) #初始化模型 # initial model print('initial model...') model = simpleRNN(args) print(model.summary()) if args.load_model is not None: if args.action == 'train': print('Warning : load a exist model and keep training') path = os.path.join(load_path, 'model.h5') if os.path.exists(path): print('load model from %s' % path) model.load_weights(path) else: raise ValueError("Can't find the file %s" % path) elif args.action == 'test': print('Warning : testing without loading any model') #训练过程 # training if args.action == 'train': (X, Y), (X_val, Y_val) = dm.split_data('train_data', args.val_ratio) earlystopping = EarlyStopping(monitor='val_acc', patience=3, verbose=1, mode='max') save_path = os.path.join(save_path, 'model.h5') checkpoint = ModelCheckpoint(filepath=save_path, verbose=1, save_best_only=True, save_weights_only=True, monitor='val_acc', mode='max') #创建一个实例history history = LossHistory() hist = model.fit(X, Y, validation_data=(X_val, Y_val), epochs=args.nb_epoch, batch_size=args.batch_size, callbacks=[checkpoint, earlystopping, history]) #绘制acc-loss曲线 history.loss_plot('epoch') #测试过程 # testing elif args.action == 'test': id = dm.data['test_data'][1] out = model.predict(dm.data['test_data'][0]) out = np.squeeze(out) out[out <= 0.5] = 0 out[out > 0.5] = 1 out = out.astype(int) print("pred shape:", np.array(out).shape) print("id shape:", np.array(id).shape) result = pd.concat( [pd.DataFrame({'id': id}), pd.DataFrame({'sentiment': out})], axis=1) wd = pd.DataFrame(result) wd.to_csv("submission.csv", index=None) newZip = zipfile.ZipFile('submission.zip', 'w') newZip.write('submission.csv', compress_type=zipfile.ZIP_DEFLATED) newZip.close() #半监督训练过 # semi-supervised training elif args.action == 'semi': (X, Y), (X_val, Y_val) = dm.split_data('train_data', args.val_ratio) [semi_all_X] = dm.get_data('semi_data') earlystopping = EarlyStopping(monitor='val_acc', patience=3, verbose=1, mode='max') save_path = os.path.join(save_path, 'model.h5') checkpoint = ModelCheckpoint(filepath=save_path, verbose=1, save_best_only=True, save_weights_only=True, monitor='val_acc', mode='max') # repeat 10 times for i in range(10): # label the semi-data semi_pred = model.predict(semi_all_X, batch_size=1024, verbose=True) semi_X, semi_Y = dm.get_semi_data('semi_data', semi_pred, args.threshold, args.loss_function) semi_X = np.concatenate((semi_X, X)) semi_Y = np.concatenate((semi_Y, Y)) print('-- iteration %d semi_data size: %d' % (i + 1, len(semi_X))) history = LossHistory() # train hist = model.fit(semi_X, semi_Y, validation_data=(X_val, Y_val), epochs=2, batch_size=args.batch_size, callbacks=[checkpoint, earlystopping, history]) history.loss_plot('epoch') if os.path.exists(save_path): print('load model from %s' % save_path) model.load_weights(save_path) else: raise ValueError("Can't find the file %s" % path)
def main(): # limit gpu memory usage def get_session(gpu_fraction): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_fraction) return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) K.set_session(get_session(args.gpu_fraction)) save_path = os.path.join(args.save_dir, args.model) if args.load_model is not None: load_path = os.path.join(args.save_dir, args.load_model) #####read data##### dm = DataManager() print('Loading data...') if args.action == 'train': dm.add_data('train_data', train_path, True) elif args.action == 'semi': dm.add_data('train_data', train_path, True) dm.add_data('semi_data', semi_path, False) elif args.action == 'test': dm.add_data('test_data', test_path, True) else: raise Exception('Action except for train, semi, and test') # prepare tokenizer print('get Tokenizer...') if args.load_model is not None: # read exist tokenizer dm.load_tokenizer(os.path.join(load_path, 'token.pk')) else: # create tokenizer on new data dm.tokenize(args.vocab_size) if not os.path.isdir(save_path): os.makedirs(save_path) if not os.path.exists(os.path.join(save_path, 'token.pk')): dm.save_tokenizer(os.path.join(save_path, 'token.pk')) # convert to sequences dm.to_sequence(args.max_length) # prepare glove embedding embedding_matrix = preEB(dm) # initial model print('initial model...') model = simpleRNN(args, embedding_matrix, dm.tokenizer.word_index) model.summary() print("args.load_model =", args.load_model) if args.load_model is not None: if args.action == 'train': print('Warning : load a exist model and keep training') path = os.path.join(load_path, 'model.h5') if os.path.exists(path): print('load model from %s' % path) model.load_weights(path) else: raise ValueError("Can't find the file %s" % path) elif args.action == 'test': #print ('Warning : testing without loading any model') print('args.action is %s' % (args.action)) path = os.path.join(load_path, 'model.h5') if os.path.exists(path): print('load model from %s' % path) model.load_weights(path) else: raise ValueError("Can't find the file %s" % path) # training if args.action == 'train': (X, Y), (X_val, Y_val) = dm.split_data('train_data', args.val_ratio) #earlystopping = EarlyStopping(monitor='val_loss', patience = 3, verbose=1, mode='max') save_path = os.path.join(save_path, 'model.h5') """ checkpoint = ModelCheckpoint(filepath=save_path, verbose=1, save_best_only=True, save_weights_only=True, monitor='val_loss', mode='max' ) """ history = model.fit(X, Y, validation_data=(X_val, Y_val), epochs=args.nb_epoch, batch_size=args.batch_size) #, #callbacks=[checkpoint, earlystopping] ) model.save(save_path) # testing elif args.action == 'test': args.val_ratio = 0 (X, Y), (X_val, Y_val) = dm.split_data('test_data', args.val_ratio) pred = model.predict(X) scores = model.evaluate(X, Y) print("test data scores(loss = mse) = %f" % scores[1]) print("mse: ", evaluation(pred, Y, 'mse')) print("micro: ", evaluation(pred, Y, 'f1_micro')) print("macro: ", evaluation(pred, Y, 'f1_macro')) # semi-supervised training elif args.action == 'semi': (X, Y), (X_val, Y_val) = dm.split_data('train_data', args.val_ratio) [semi_all_X] = dm.get_data('semi_data') earlystopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='max') save_path = os.path.join(save_path, 'model.h5') checkpoint = ModelCheckpoint(filepath=save_path, verbose=1, save_best_only=True, save_weights_only=True, monitor='val_loss', mode='max') # repeat 10 times for i in range(10): # label the semi-data semi_pred = model.predict(semi_all_X, batch_size=1024, verbose=True) semi_X, semi_Y = dm.get_semi_data('semi_data', semi_pred, args.threshold, args.loss_function) semi_X = np.concatenate((semi_X, X)) semi_Y = np.concatenate((semi_Y, Y)) print('-- iteration %d semi_data size: %d' % (i + 1, len(semi_X))) # train history = model.fit(semi_X, semi_Y, validation_data=(X_val, Y_val), epochs=2, batch_size=args.batch_size, callbacks=[checkpoint, earlystopping]) if os.path.exists(save_path): print('load model from %s' % save_path) model.load_weights(save_path) else: raise ValueError("Can't find the file %s" % path)
def main(): # limit gpu memory usage def get_session(gpu_fraction): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_fraction) return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) K.set_session(get_session(args.gpu_fraction)) save_path = os.path.join(args.save_dir, args.model) if args.load_model is not None: load_path = os.path.join(args.save_dir, args.load_model) #####read data##### dm = DataManager() print('Loading data...') if args.action == 'train': dm.add_data('train_data', train_path, True) dm.add_data('test_data', test_path, True) elif args.action == 'semi': dm.add_data('train_data', train_path, True) dm.add_data('semi_data', semi_path, False) elif args.action == 'test': dm.add_data('train_data', train_path, True) dm.add_data('test_data', test_path, True) else: raise Exception('Action except for train, semi, and test') """ # prepare tokenizer print ('get Tokenizer...') if args.load_model is not None: # read exist tokenizer dm.load_tokenizer(os.path.join(load_path,'token.pk')) else: # create tokenizer on new data dm.tokenize(args.vocab_size) """ if not os.path.isdir(save_path): os.makedirs(save_path) """ if not os.path.exists(os.path.join(save_path,'token.pk')): dm.save_tokenizer(os.path.join(save_path,'token.pk')) """ # convert to sequences token_corpus = dm.to_token_corpus(args.max_length) #word2vec = to_word2vec(token_corpus) if args.action == "train": word2vec = to_word2vec(token_corpus) save_path_word2vec_model = os.path.join(save_path, 'word2vec.model') word2vec.save(save_path_word2vec_model) elif args.action == "test": path = os.path.join(load_path, 'word2vec.model') if os.path.exists(path): print('load model from %s' % path) word2vec = Word2Vec.load(path) else: raise ValueError("Can't find the file %s" % path) word2vec = word2vec.wv #print(word2vec['downgrades']) #padding sentence dm.padding_sent(args.max_length) dm.sent_to_word2vec(word2vec) #(X,Y),(X_val,Y_val) = dm.split_data('train_data', args.val_ratio) # initial model print('initial model...') model = simpleRNN(args) model.summary() print("args.load_model =", args.load_model) if args.load_model is not None: if args.action == 'train': print('Warning : load a exist model and keep training') path = os.path.join(load_path, 'model.h5') if os.path.exists(path): print('load model from %s' % path) model.load_weights(path) else: raise ValueError("Can't find the file %s" % path) elif args.action == 'test': #print ('Warning : testing without loading any model') print('args.action is %s' % (args.action)) path = os.path.join(load_path, 'model.h5') if os.path.exists(path): print('load model from %s' % path) model.load_weights(path) else: raise ValueError("Can't find the file %s" % path) # training if args.action == 'train': (X, Y), (X_val, Y_val) = dm.split_data('train_data', args.val_ratio) #print(type(X)) #print(type(X[0])) #print(X[0][0]) #print(X) #earlystopping = EarlyStopping(monitor='val_loss', patience = 3, verbose=1, mode='max') #X, Y, X_val, Y_val = np.array(X), np.array(Y), np.array(X_val), np.array(Y_val) #print(X) #print(X[0]) #X_val = np.reshape(X_val, (X_val.shape[0], args.max_length, X_val.shape[2])) save_path_model_h5 = os.path.join(save_path, 'model.h5') """ checkpoint = ModelCheckpoint(filepath=save_path, verbose=1, save_best_only=True, save_weights_only=True, monitor='val_loss', mode='max' ) """ history = model.fit(X, Y, validation_data=(X_val, Y_val), epochs=args.nb_epoch, batch_size=args.batch_size) #, #callbacks=[checkpoint, earlystopping] ) model.save(save_path_model_h5) # testing elif args.action == 'test': args.val_ratio = 0 (X, Y), (X_val, Y_val) = dm.split_data('test_data', args.val_ratio) predictions = model.predict(X) predictions = predictions.reshape(-1) scores = model.evaluate(X, Y) print("test data mse by keras = %f" % scores[1]) print("test data mse by sklearn = %f" % mean_squared_error(Y, predictions)) for idx, value in enumerate(predictions): if value > 0: predictions[idx] = 1 elif value == 0: predictions[idx] = 0 elif value < 0: predictions[idx] = -1 for idx, value in enumerate(Y): if value > 0: Y[idx] = 1 elif value == 0: Y[idx] = 0 elif value < 0: Y[idx] = -1 print("test data micro f1 score by sklearn = %f" % f1_score(Y, predictions, average='micro')) print("test data macro f1 score by sklearn = %f" % f1_score(Y, predictions, average='macro')) (X, Y), (X_val, Y_val) = dm.split_data('train_data', args.val_ratio) predictions = model.predict(X) predictions = predictions.reshape(-1) scores = model.evaluate(X, Y) print("train data mse by keras = %f" % scores[1]) print("train data mse by sklearn = %f" % mean_squared_error(Y, predictions)) for idx, value in enumerate(predictions): if value > 0: predictions[idx] = 1 elif value == 0: predictions[idx] = 0 elif value < 0: predictions[idx] = -1 for idx, value in enumerate(Y): if value > 0: Y[idx] = 1 elif value == 0: Y[idx] = 0 elif value < 0: Y[idx] = -1 print("train data micro f1 score by sklearn = %f" % f1_score(Y, predictions, average='micro')) print("train data macro f1 score by sklearn = %f" % f1_score(Y, predictions, average='macro')) #raise Exception ('Implement your testing function') # semi-supervised training elif args.action == 'semi': (X, Y), (X_val, Y_val) = dm.split_data('train_data', args.val_ratio) [semi_all_X] = dm.get_data('semi_data') earlystopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='max') save_path = os.path.join(save_path, 'model.h5') checkpoint = ModelCheckpoint(filepath=save_path, verbose=1, save_best_only=True, save_weights_only=True, monitor='val_loss', mode='max') # repeat 10 times for i in range(10): # label the semi-data semi_pred = model.predict(semi_all_X, batch_size=1024, verbose=True) semi_X, semi_Y = dm.get_semi_data('semi_data', semi_pred, args.threshold, args.loss_function) semi_X = np.concatenate((semi_X, X)) semi_Y = np.concatenate((semi_Y, Y)) print('-- iteration %d semi_data size: %d' % (i + 1, len(semi_X))) # train history = model.fit(semi_X, semi_Y, validation_data=(X_val, Y_val), epochs=2, batch_size=args.batch_size, callbacks=[checkpoint, earlystopping]) if os.path.exists(save_path): print('load model from %s' % save_path) model.load_weights(save_path) else: raise ValueError("Can't find the file %s" % path)
def main(): parser = argparse.ArgumentParser(description='Text OHCA recognition') parser.add_argument('model') parser.add_argument('action', choices=['train', 'test']) # training argument parser.add_argument('--batch_size', default=256, type=float) parser.add_argument('--nb_epoch', default=2000, type=int) parser.add_argument('--val_ratio', default=0.1, type=float) parser.add_argument('--gpu_fraction', default=0.6, type=float) parser.add_argument('--vocab_size', default=50000, type=int) parser.add_argument('--max_length', default=400, type=int) parser.add_argument('--patience', default=30, type=int) # model parameter parser.add_argument('--loss_function', default='binary_crossentropy') parser.add_argument('--cell', default='LSTM', choices=['LSTM', 'GRU']) parser.add_argument('-num_lay', '--num_layers', default=2, type=int) parser.add_argument('-emb_dim', '--embedding_dim', default=256, type=int) parser.add_argument('-hid_siz', '--hidden_size', default=400, type=int) parser.add_argument('--pretrain_emb', default=True, type=bool) parser.add_argument('--emb_matrix', default='cbowemb.npz') # parser.add_argument('--dropout_rate', default=0.3, type=float) parser.add_argument('--keep_prob', default=1.0, type=float) parser.add_argument('-lr', '--learning_rate', default=0.013, type=float) parser.add_argument('--threshold', default=0.5, type=float) # output path for your prediction parser.add_argument( '--result_path', default='result.csv', ) # put model in the same directory parser.add_argument('--load_model', default=None) parser.add_argument('--load_token', default=True, type=bool) parser.add_argument('--save_dir', default='model/') # log dir for tensorboard parser.add_argument('--log_dir', default='log_dir/') # testing output parser.add_argument('--testfile', default='data/ohca_scripts.txt') parser.add_argument('--testout', default='data/script_test.txt') args = parser.parse_args() train_path = 'data/ohca_scripts.txt' test_path = args.testfile save_path = 'token/' #load token path if args.load_token is not None: load_path = os.path.join(save_path) # limit gpu memory usage def get_session(gpu_fraction): gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=gpu_fraction) return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) sess = get_session(args.gpu_fraction) #####read data##### dm = DataManager() print('Loading data...') if args.action == 'train': dm.add_data('train_data', train_path, with_label=True) else: dm.add_data('test_data', test_path, with_label=True) # now the test will have label # prepare tokenizer print('get Tokenizer...') if args.load_token is not None: # read exist tokenizer dm.load_tokenizer(os.path.join(load_path, 'token.pk')) else: # create tokenizer on new data dm.tokenize(args.vocab_size) if not os.path.isdir(save_path): os.makedirs(save_path) if not os.path.exists(os.path.join(save_path, 'token.pk')): dm.save_tokenizer(os.path.join(save_path, 'token.pk')) # convert to sequences dm.to_sequence(args.max_length) # Create the graph object tf.reset_default_graph() # initial model print('initial model...') rnnmodel = simpleRNN(args) #print (model.summary()) with tf.name_scope('inputs'): #create placeholder for training (testing) data X_ = tf.placeholder(tf.int32, [None, args.max_length], name='X') y_ = tf.placeholder(tf.int32, [ args.batch_size, ], name='y_') keep_prob = tf.placeholder_with_default(1.0, shape=(), name="keep_prob") y_predict = rnnmodel.model(args, X_, keep_prob) #prepare for saving model to evaluate train_var = [X_, y_, keep_prob, y_predict] tf.add_to_collection('train_var', train_var[0]) tf.add_to_collection('train_var', train_var[1]) tf.add_to_collection('train_var', train_var[2]) tf.add_to_collection('train_var', train_var[3]) #loss (MSE) mse = rnnmodel.loss(y_, y_predict) #optimizers train_op = rnnmodel.optimizer(args, mse) #accuracy for validation accuracy = rnnmodel.accuracy(y_, y_predict) #initial state of LSTM init_state = rnnmodel.initial_state # merge the write out histogram plots (tensorboard) merged = tf.summary.merge_all() #check outputs of LSTM routputs = rnnmodel.outputs if args.load_model is not None: load_path = os.path.join(args.save_dir) if args.action == 'train': print('Warning : load a exist model variables and keep training') path = os.path.join(load_path, 'Sentimen_rnn_final') if os.path.exists(path + ".meta"): print('load model from %s' % path) #model.load_weights(path) change to tensorflow model else: raise ValueError("Can't find the file %s" % path) elif args.action == 'test': print('Warning : testing without loading any model') raise Exception('Not loading model for testing...') # training if args.action == 'train': (X, Y), (X_val, Y_val) = dm.split_data('train_data', args.val_ratio) print("Shape of X is {}, and y is {}".format( np.array(X).shape, np.array(Y).shape)) elif args.action == 'test': (X, Y) = dm.get_labeldata('test_data') print("Load test data (shape {})".format(X.shape)) #raise Exception ('Implement your testing function') init = tf.global_variables_initializer() #prepare to save model save_vars = tf.trainable_variables() saver = tf.train.Saver(save_vars, max_to_keep=7, keep_checkpoint_every_n_hours=1) last_loss = 1000000.0 with tf.Session() as sess: init.run() #if pre-trained, load embedding matrix if (args.pretrain_emb == True): emb_npfn = save_path + args.emb_matrix emb_matrix = np.load(emb_npfn)['embed_m'] if (emb_matrix.shape[0] != args.vocab_size or emb_matrix.shape[1] != args.embedding_dim): print( "Import embedding matrix shape {} does not match shape of ({},{})..." .format(emb_matrix.shape, args.vocab_size, args.embedding_dim)) exit(1) else: print("Loading embedding matrix.....") sess.run(rnnmodel.embedding_mat.assign(emb_matrix)) train_writer = tf.summary.FileWriter(args.log_dir + 'train', sess.graph) valid_writer = tf.summary.FileWriter(args.log_dir + 'valid', sess.graph) # load variables in graphs if assigned if args.load_model is not None: saver.restore(sess, path) #if semi-learning, first apply model to semi-learning data if (args.action == 'train'): #training early_stop_counter = 0 generation_num = 0 # repeat nb_epoch times for e in range(args.nb_epoch): state = sess.run([init_state]) semi_preds = [] if (e == 0): # hard copy X_train = X.copy() Y_train = Y.copy() #elif ( args.action='train'): #reset initial LSTM state every epochs n_batches = len(X) // args.batch_size for ix, (X_batch, y_batch) in enumerate( get_batches(X_train, Y_train, args.batch_size), 1): generation_num += 1 train_dict = { X_: X_batch, y_: y_batch, keep_prob: args.keep_prob, init_state: state } #for each traing generation, reload zero initial states _, summary, mse_train, accu_train = sess.run( [train_op, merged, mse, accuracy], feed_dict=train_dict) train_writer.add_summary(summary, generation_num) outputs_ = routputs.eval(feed_dict=train_dict) if (ix == 1): print(X_batch.shape) #print("shape of outputs is {}".format(outputs_[:,-1].shape)) if (generation_num % 10 == 0): print("Epoch: {}/{}".format(e, args.nb_epoch), "Iteration: {}".format(generation_num), "Train loss: {:.3f}".format(mse_train)) #validation for each 50 generations or end of each epoch if (generation_num % 50 == 0 or ix == n_batches): val_acc = [] val_loss = [] val_state = sess.run([init_state]) for iv, (X_batch, y_batch) in enumerate( get_batches(X_val, Y_val, args.batch_size), 1): val_dict = { X_: X_batch, y_: y_batch, keep_prob: 1, init_state: val_state } summary, batch_acc, batch_loss = sess.run( [merged, accuracy, mse], feed_dict=val_dict) #print out some answer for checking val_predict = sess.run(y_predict, feed_dict=val_dict) #print("shape of val_predict is {}".format(np.array(val_predict).shape)) #last ten elements of each batch for y_true, y_pre in zip(y_batch[-9:], val_predict[-9:]): print("y_true: {}, y_predict: {}".format( y_true, y_pre)) val_loss.append(batch_loss) val_acc.append(batch_acc) sys.stdout.flush() print("Iteration: {}".format(generation_num), "Val acc: {:.3f}".format(np.mean(val_acc)), "Val mse: {:.3f}".format(np.mean(val_loss))) valid_writer.add_summary(summary, generation_num) loss_val_avg = np.mean(val_loss) #save variables every 50 generations saver.save(sess, os.path.join(args.save_dir, "Sentimen_rnn"), global_step=generation_num) if (ix == n_batches): #early stop count here if (last_loss > loss_val_avg): last_loss = loss_val_avg early_stop_counter = 0 else: early_stop_counter += 1 if (early_stop_counter >= args.patience or e == (args.nb_epoch - 1)): #save model saver.save( sess, os.path.join(args.save_dir, "Sentimen_rnn_final")) saver.export_meta_graph(os.path.join( args.save_dir, "Sentimen_rnn_final.meta"), collection_list=['train_var']) break print("End of training.....") #testing elif (args.action == 'test'): # hard copy X_test = X.copy() Y_test = Y.copy() state = sess.run([init_state]) with open(args.testout, 'w+') as outfile: for ix, (X_batch, y_batch) in enumerate( get_batches(X_test, Y_test, args.batch_size), 1): test_dict = { X_: X_batch, y_: y_batch, keep_prob: args.keep_prob, init_state: state } #for each traing generation, reload zero initial states _, y_prebatch, accu_train = sess.run( [train_op, y_predict, accuracy], feed_dict=test_dict) for y_true, y_pre in zip(y_batch, y_prebatch): strout = "%d\t%f\n" % (y_true, y_pre) outfile.write(strout) print("Testing finish, write out file {}".format(args.testout)) #raise Exception ('Implement your testing function') return