def load_tf_model(path, embedding_dim=128, graph_size=20, n_encode_layers=2): """Load model weights from hd5 file """ # https://stackoverflow.com/questions/51806852/cant-save-custom-subclassed-model CAPACITIES = {10: 20., 20: 30., 50: 40., 100: 50.} data_random = [ tf.random.uniform(( 2, 2, ), minval=0, maxval=1, dtype=tf.dtypes.float32), tf.random.uniform((2, graph_size, 2), minval=0, maxval=1, dtype=tf.dtypes.float32), tf.cast( tf.random.uniform( minval=1, maxval=10, shape=(2, graph_size), dtype=tf.int32), tf.float32) / tf.cast(CAPACITIES[graph_size], tf.float32) ] model_loaded = AttentionModel(embedding_dim, n_encode_layers=n_encode_layers) set_decode_type(model_loaded, "greedy") _, _ = model_loaded(data_random) model_loaded.load_weights(path) return model_loaded
def __init__(self, input_dim, second_hidden_size, minute_hidden_size, rnn_layers, batch_size, bidirectional, use_lstm): super(SafetyModel, self).__init__() self.batch_size = batch_size self.rnn_layers = rnn_layers self.use_lstm = use_lstm self.second_hidden_size = second_hidden_size self.minute_hidden_size = minute_hidden_size self.second_att_net = AttentionModel(input_dim, second_hidden_size, bidirectional, use_lstm) if bidirectional: second_hidden_size *= 2 self.minute_att_net = AttentionModel(second_hidden_size, minute_hidden_size, bidirectional, use_lstm) if bidirectional: minute_hidden_size *= 2 self.fc = nn.Linear(minute_hidden_size, 2) self.init_hidden()
def copy_of_tf_model(model, embedding_dim=128, graph_size=20): """Copy model weights to new model """ # https://stackoverflow.com/questions/56841736/how-to-copy-a-network-in-tensorflow-2-0 CAPACITIES = {10: 20., 20: 30., 50: 40., 100: 50.} data_random = [ tf.random.uniform(( 2, 2, ), minval=0, maxval=1, dtype=tf.dtypes.float32), tf.random.uniform((2, graph_size, 2), minval=0, maxval=1, dtype=tf.dtypes.float32), tf.cast( tf.random.uniform( minval=1, maxval=10, shape=(2, graph_size), dtype=tf.int32), tf.float32) / tf.cast(CAPACITIES[graph_size], tf.float32) ] new_model = AttentionModel(embedding_dim) set_decode_type(new_model, "sampling") _, _ = new_model(data_random) for a, b in zip(new_model.variables, model.variables): a.assign(b) return new_model
def main(): args = parse_args() print(args) num_layers = args.num_layers src_vocab_size = args.src_vocab_size tar_vocab_size = args.tar_vocab_size batch_size = args.batch_size dropout = args.dropout init_scale = args.init_scale max_grad_norm = args.max_grad_norm hidden_size = args.hidden_size if args.enable_ce: fluid.default_main_program().random_seed = 102 framework.default_startup_program().random_seed = 102 train_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(train_program, startup_program): # Training process if args.attention: model = AttentionModel(hidden_size, src_vocab_size, tar_vocab_size, batch_size, num_layers=num_layers, init_scale=init_scale, dropout=dropout) else: model = BaseModel(hidden_size, src_vocab_size, tar_vocab_size, batch_size, num_layers=num_layers, init_scale=init_scale, dropout=dropout) loss = model.build_graph() inference_program = train_program.clone(for_test=True) clip = fluid.clip.GradientClipByGlobalNorm(clip_norm=max_grad_norm) lr = args.learning_rate opt_type = args.optimizer if opt_type == "sgd": optimizer = fluid.optimizer.SGD(lr, grad_clip=clip) elif opt_type == "adam": optimizer = fluid.optimizer.Adam(lr, grad_clip=clip) else: print("only support [sgd|adam]") raise Exception("opt type not support") optimizer.minimize(loss) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = Executor(place) exe.run(startup_program) device_count = len(fluid.cuda_places()) if args.use_gpu else len( fluid.cpu_places()) CompiledProgram = fluid.CompiledProgram(train_program).with_data_parallel( loss_name=loss.name) train_data_prefix = args.train_data_prefix eval_data_prefix = args.eval_data_prefix test_data_prefix = args.test_data_prefix vocab_prefix = args.vocab_prefix src_lang = args.src_lang tar_lang = args.tar_lang print("begin to load data") raw_data = reader.raw_data(src_lang, tar_lang, vocab_prefix, train_data_prefix, eval_data_prefix, test_data_prefix, args.max_len) print("finished load data") train_data, valid_data, test_data, _ = raw_data def prepare_input(batch, epoch_id=0, with_lr=True): src_ids, src_mask, tar_ids, tar_mask = batch res = {} src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1])) in_tar = tar_ids[:, :-1] label_tar = tar_ids[:, 1:] in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1])) label_tar = label_tar.reshape( (label_tar.shape[0], label_tar.shape[1], 1)) res['src'] = src_ids res['tar'] = in_tar res['label'] = label_tar res['src_sequence_length'] = src_mask res['tar_sequence_length'] = tar_mask return res, np.sum(tar_mask) # get train epoch size def eval(data, epoch_id=0): eval_data_iter = reader.get_data_iter(data, batch_size, mode='eval') total_loss = 0.0 word_count = 0.0 for batch_id, batch in enumerate(eval_data_iter): input_data_feed, word_num = prepare_input(batch, epoch_id, with_lr=False) fetch_outs = exe.run(inference_program, feed=input_data_feed, fetch_list=[loss.name], use_program_cache=False) cost_train = np.array(fetch_outs[0]) total_loss += cost_train * batch_size word_count += word_num ppl = np.exp(total_loss / word_count) return ppl def train(): ce_time = [] ce_ppl = [] max_epoch = args.max_epoch for epoch_id in range(max_epoch): start_time = time.time() if args.enable_ce: train_data_iter = reader.get_data_iter(train_data, batch_size, enable_ce=True) else: train_data_iter = reader.get_data_iter(train_data, batch_size) total_loss = 0 word_count = 0.0 batch_times = [] time_interval = 0.0 batch_start_time = time.time() epoch_word_count = 0.0 total_reader_cost = 0.0 batch_read_start = time.time() for batch_id, batch in enumerate(train_data_iter): input_data_feed, word_num = prepare_input(batch, epoch_id=epoch_id) word_count += word_num total_reader_cost += time.time() - batch_read_start fetch_outs = exe.run(program=CompiledProgram, feed=input_data_feed, fetch_list=[loss.name], use_program_cache=True) cost_train = np.mean(fetch_outs[0]) # print(cost_train) total_loss += cost_train * batch_size batch_end_time = time.time() batch_time = batch_end_time - batch_start_time batch_times.append(batch_time) time_interval += batch_time epoch_word_count += word_num if batch_id > 0 and batch_id % 100 == 0: print( "-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f; reader cost: %0.5f s; ips: %0.5f tokens/sec" % (epoch_id, batch_id, batch_time, np.exp(total_loss / word_count), total_reader_cost / 100, word_count / time_interval)) ce_ppl.append(np.exp(total_loss / word_count)) total_loss = 0.0 word_count = 0.0 time_interval = 0.0 total_reader_cost = 0.0 # profiler tools if args.profile and epoch_id == 0 and batch_id == 100: profiler.reset_profiler() elif args.profile and epoch_id == 0 and batch_id == 105: return batch_start_time = time.time() batch_read_start = time.time() end_time = time.time() epoch_time = end_time - start_time ce_time.append(epoch_time) print( "\nTrain epoch:[%d]; Epoch Time: %.5f; avg_time: %.5f s/step; ips: %0.5f tokens/sec\n" % (epoch_id, epoch_time, sum(batch_times) / len(batch_times), epoch_word_count / sum(batch_times))) if not args.profile: save_path = os.path.join(args.model_path, "epoch_" + str(epoch_id), "checkpoint") print("begin to save", save_path) fluid.save(train_program, save_path) print("save finished") dev_ppl = eval(valid_data) print("dev ppl", dev_ppl) test_ppl = eval(test_data) print("test ppl", test_ppl) if args.enable_ce: card_num = get_cards() _ppl = 0 _time = 0 try: _time = ce_time[-1] _ppl = ce_ppl[-1] except: print("ce info error") print("kpis\ttrain_duration_card%s\t%s" % (card_num, _time)) print("kpis\ttrain_ppl_card%s\t%f" % (card_num, _ppl)) with profile_context(args.profile, args.profiler_path): train()
def train(): args = parse_args() num_layers = args.num_layers src_vocab_size = args.src_vocab_size tar_vocab_size = args.tar_vocab_size batch_size = args.batch_size dropout = args.dropout init_scale = args.init_scale max_grad_norm = args.max_grad_norm hidden_size = args.hidden_size if args.enable_ce: fluid.default_main_program().random_seed = 102 framework.default_startup_program().random_seed = 102 # Training process if args.attention: model = AttentionModel(hidden_size, src_vocab_size, tar_vocab_size, batch_size, num_layers=num_layers, init_scale=init_scale, dropout=dropout) else: model = BaseModel(hidden_size, src_vocab_size, tar_vocab_size, batch_size, num_layers=num_layers, init_scale=init_scale, dropout=dropout) loss = model.build_graph() # clone from default main program and use it as the validation program main_program = fluid.default_main_program() inference_program = fluid.default_main_program().clone(for_test=True) fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByGlobalNorm( clip_norm=max_grad_norm)) lr = args.learning_rate opt_type = args.optimizer if opt_type == "sgd": optimizer = fluid.optimizer.SGD(lr) elif opt_type == "adam": optimizer = fluid.optimizer.Adam(lr) else: print("only support [sgd|adam]") raise Exception("opt type not support") optimizer.minimize(loss) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) train_data_prefix = args.train_data_prefix eval_data_prefix = args.eval_data_prefix test_data_prefix = args.test_data_prefix vocab_prefix = args.vocab_prefix src_lang = args.src_lang tar_lang = args.tar_lang print("begin to load data") raw_data = reader.raw_data(src_lang, tar_lang, vocab_prefix, train_data_prefix, eval_data_prefix, test_data_prefix, args.max_len) print("finished load data") train_data, valid_data, test_data, _ = raw_data def prepare_input(batch, epoch_id=0, with_lr=True): src_ids, src_mask, tar_ids, tar_mask = batch res = {} src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1], 1)) in_tar = tar_ids[:, :-1] label_tar = tar_ids[:, 1:] in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1], 1)) label_tar = label_tar.reshape( (label_tar.shape[0], label_tar.shape[1], 1)) res['src'] = src_ids res['tar'] = in_tar res['label'] = label_tar res['src_sequence_length'] = src_mask res['tar_sequence_length'] = tar_mask return res, np.sum(tar_mask) # get train epoch size def eval(data, epoch_id=0): eval_data_iter = reader.get_data_iter(data, batch_size, mode='eval') total_loss = 0.0 word_count = 0.0 for batch_id, batch in enumerate(eval_data_iter): input_data_feed, word_num = prepare_input(batch, epoch_id, with_lr=False) fetch_outs = exe.run(inference_program, feed=input_data_feed, fetch_list=[loss.name], use_program_cache=False) cost_train = np.array(fetch_outs[0]) total_loss += cost_train * batch_size word_count += word_num ppl = np.exp(total_loss / word_count) return ppl ce_time = [] ce_ppl = [] max_epoch = args.max_epoch for epoch_id in range(max_epoch): start_time = time.time() print("epoch id", epoch_id) if args.enable_ce: train_data_iter = reader.get_data_iter(train_data, batch_size, enable_ce=True) else: train_data_iter = reader.get_data_iter(train_data, batch_size) total_loss = 0 word_count = 0.0 for batch_id, batch in enumerate(train_data_iter): input_data_feed, word_num = prepare_input(batch, epoch_id=epoch_id) fetch_outs = exe.run(feed=input_data_feed, fetch_list=[loss.name], use_program_cache=True) cost_train = np.array(fetch_outs[0]) total_loss += cost_train * batch_size word_count += word_num if batch_id > 0 and batch_id % 100 == 0: print("ppl", batch_id, np.exp(total_loss / word_count)) ce_ppl.append(np.exp(total_loss / word_count)) total_loss = 0.0 word_count = 0.0 end_time = time.time() time_gap = end_time - start_time ce_time.append(time_gap) dir_name = args.model_path + "/epoch_" + str(epoch_id) print("begin to save", dir_name) fluid.io.save_params(exe, dir_name) print("save finished") dev_ppl = eval(valid_data) print("dev ppl", dev_ppl) test_ppl = eval(test_data) print("test ppl", test_ppl) if args.enable_ce: card_num = get_cards() _ppl = 0 _time = 0 try: _time = ce_time[-1] _ppl = ce_ppl[-1] except: print("ce info error") print("kpis\ttrain_duration_card%s\t%s" % (card_num, _time)) print("kpis\ttrain_ppl_card%s\t%f" % (card_num, _ppl))
from attention_model import AttentionModel if __name__ == '__main__': debug = 0 if len(sys.argv) > 1: if sys.argv[1] == '--debug': debug = 1 print('-> Starting Bot!') f = open("token.txt", "r") token = f.read().strip() if debug == 0: checkpoint_path = "models/char_att7/" global model model = AttentionModel(checkpoint_path=checkpoint_path, load_model=True).model elif debug == 1: model = 'yolo' # with CustomObjectScope({'SeqSelfAttention': SeqSelfAttention, # 'MultiHead': MultiHead, # 'root_mean_squared_error': root_mean_squared_error}): # model = keras.models.load_model('models/char_18_epoch_5.h5') # global graph # graph = tf.get_default_graph() graph = None pp = PicklePersistence(filename='data/conversationbot') updater = Updater(token, persistence=pp, use_context=True) bot = NeuralBot(updater, model, graph) updater.dispatcher.add_handler(CommandHandler('hello', bot.hello))
def train(self): self.max_acc = 1 self.is_training = True with tf.Graph().as_default(): data_processor = DataProcessor() vocab_size = data_processor.get_vocabulary_size(FLAGS.vocab_path) vocab, revocab = DataProcessor.initialize_vocabulary( FLAGS.vocab_path) data_processor.get_init(FLAGS.input_training_data_path, FLAGS.input_validation_data_path, vocab, vocab_size, FLAGS.max_length, revocab) models = AttentionModel() input_q = tf.placeholder(tf.int32, shape=(None, FLAGS.max_length), name="input_x1") # FLAGS.train_batch_size input_ap = tf.placeholder(tf.int32, shape=(None, FLAGS.max_length)) input_an = tf.placeholder(tf.int32, shape=(None, FLAGS.max_length)) # input_k = tf.placeholder(tf.int32, shape=(None, FLAGS.max_length)) # input_v = tf.placeholder(tf.int32, shape=(None, FLAGS.max_length)) q_encode = models.embed( inputs=input_q, vocab_size=vocab_size + 1, num_units=hp.hidden_units) # embedding size plus 1 for padding ap_encode = models.embed(inputs=input_ap, vocab_size=vocab_size + 1, num_units=hp.hidden_units) an_encode = models.embed(inputs=input_an, vocab_size=vocab_size + 1, num_units=hp.hidden_units) # k_encode = models.embed(input_k, vocab_size=vocab_size,num_units=hp.hidden_units) # v_encode = models.embed(input_v, vocab_size=vocab_size,num_units=hp.hidden_units) # apply dropout q_encode = tf.layers.dropout(q_encode, hp.dropout_rate, training=tf.convert_to_tensor( self.is_training)) ap_encode = tf.layers.dropout(ap_encode, hp.dropout_rate, training=tf.convert_to_tensor( self.is_training)) an_encode = tf.layers.dropout(an_encode, hp.dropout_rate, training=tf.convert_to_tensor( self.is_training)) # k_encode = tf.layers.dropout(k_encode, hp.dropout_rate, training=tf.convert_to_tensor(self.is_training)) # v_encode = tf.layers.dropout(v_encode, hp.dropout_rate, training=tf.convert_to_tensor(self.is_training)) # multihead blocks for i in range(hp.num_blocks): with tf.variable_scope("num_blocks_{}".format(i)): q_encode = models.multihead_attention( query=q_encode, key=q_encode, value=q_encode, num_heads=hp.num_heads, is_training=tf.convert_to_tensor(self.is_training), dropout_rate=hp.dropout_rate, mask_future=False) q_encode = models.feed_forward( q_encode, units=[hp.hidden_units * 4, hp.hidden_units]) ap_encode = models.multihead_attention( query=ap_encode, key=ap_encode, value=ap_encode, num_heads=hp.num_heads, is_training=tf.convert_to_tensor(self.is_training), dropout_rate=hp.dropout_rate, mask_future=False) ap_encode = models.feed_forward( ap_encode, units=[hp.hidden_units * 4, hp.hidden_units]) an_encode = models.multihead_attention( query=an_encode, key=an_encode, value=an_encode, num_heads=hp.num_heads, is_training=tf.convert_to_tensor(self.is_training), dropout_rate=hp.dropout_rate, mask_future=False) an_encode = models.feed_forward( an_encode, units=[hp.hidden_units * 4, hp.hidden_units]) ## output layer with tf.name_scope('output_layer'): dims = q_encode.get_shape().as_list() q_encode = tf.reshape(q_encode, [-1, dims[1] * dims[2]]) ap_encode = tf.reshape(ap_encode, [-1, dims[1] * dims[2]]) an_encode = tf.reshape(an_encode, [-1, dims[1] * dims[2]]) weight = tf.get_variable( 'output_weight', [q_encode.get_shape().as_list()[-1], hp.hidden_units]) q_encode = tf.matmul(q_encode, weight) ap_encode = tf.matmul(ap_encode, weight) an_encode = tf.matmul(an_encode, weight) q_encode = models.vec_normalize(q_encode) ap_encode = models.vec_normalize(ap_encode) an_encode = models.vec_normalize(an_encode) ## calculate similarity and loss cos_12 = tf.reduce_sum(tf.multiply(q_encode, ap_encode), 1) # wisely multiple vectors cos_13 = tf.reduce_sum(tf.multiply(q_encode, an_encode), 1) zero = tf.constant(0, shape=[FLAGS.train_batch_size], dtype=tf.float32) margin = tf.constant(FLAGS.loss_margin, shape=[FLAGS.train_batch_size], dtype=tf.float32) losses = tf.maximum( zero, tf.subtract(margin, tf.subtract(cos_12, cos_13))) loss_sum = tf.reduce_sum(losses) loss_avg = tf.div(loss_sum, FLAGS.train_batch_size) correct = tf.equal(zero, losses) accuracy = tf.reduce_mean(tf.cast(correct, "float"), name="accuracy") global_step = tf.Variable( 0, name="global_step", trainable=False ) # The global step will be automatically incremented by one every time you execute a train loop optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) # optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate) grads_and_vars = optimizer.compute_gradients(loss_avg) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) saver = tf.train.Saver(tf.global_variables()) # session start point with tf.Session() as session: session.run(tf.local_variables_initializer()) session.run(tf.global_variables_initializer()) session.run(tf.tables_initializer()) # meta_path = FLAGS.output_model_path + '/step6500_loss0.0_trainAcc1.0_evalAcc0.36.meta' # model_path = FLAGS.output_model_path + '/step6500_loss0.0_trainAcc1.0_evalAcc0.36' # saver = tf.train.import_meta_graph(meta_path) # print('graph imported') # saver.restore(session, model_path) # print('variables restored!') # Load pre-trained model ckpt = tf.train.get_checkpoint_state( FLAGS.input_previous_model_path) if ckpt and ckpt.model_checkpoint_path: saver.restore(session, ckpt.model_checkpoint_path) print("Load Model From ", ckpt.model_checkpoint_path) else: print("No model found") print("Begin to train model.") max_acc = 0 for step in range(FLAGS.training_steps): train_data_batch = data_processor.next_batch_train_random( FLAGS.train_batch_size) train_q_vec_b, train_q_vec_len_b, train_d_vec_b, train_d_vec_len_b, train_dneg_vec_b, train_dneg_vec_len_b = train_data_batch feed_dict = { input_q: train_q_vec_b, input_ap: train_d_vec_b, input_an: train_dneg_vec_b } _, loss_avg_, accuracy_, step_ = session.run( [train_op, loss_avg, accuracy, global_step], feed_dict=feed_dict) print('=' * 10 + 'step{}, loss_avg = {}, acc={}'.format( step_, loss_avg_, accuracy_)) # loss for all batches if step_ % FLAGS.eval_every == 0: print('\n============================> begin to test ') eval_size = FLAGS.validation_size def test_step(input_y1, input_y2, input_y3, label_list, sess): feed_dict = { input_q: input_y1, input_ap: input_y2, input_an: input_y3 } correct_flag = 0 cos_12_ = sess.run(cos_12, feed_dict) cos_max = max(cos_12_) index_max = list(cos_12_).index(cos_max) if label_list[index_max] == '1': correct_flag = 1 # cos_pos_, cos_neg_, accuracy_ = sess.run([cos_12, cos_13, accuracy], feed_dict) # data_processor.saveFeatures(cos_pos_, cos_neg_, test_loss_, accuracy_) return correct_flag def evaluate(eval_size): correct_num = int(0) for i in range(eval_size): print('evaluation step %d ' % i) batches = data_processor.loadValData_step( vocab, vocab_size, FLAGS.input_validation_data_path, FLAGS.max_length, eval_size) # batch_size*seq_len # 显示/保存测试数据 # save_test_data(batch_y1, batch_y2, label_list) batch_y1, batch_y2, label_list = batches[i] correct_flag = test_step( batch_y1, batch_y2, batch_y2, label_list, session) correct_num += correct_flag print('correct_num', correct_num) acc = correct_num / float(eval_size) return acc self.is_training = False acc_ = evaluate(eval_size=eval_size) self.is_training = True print( '--------The test result among the test data sets: acc = {0}, test size = {1}----------' .format(acc_, eval_size)) if acc_ >= max_acc: max_acc = acc_ # # acc = test_for_bilstm.test() path = saver.save( session, FLAGS.output_model_path + '/step' + str(step_) + '_loss' + str(loss_avg_) + '_trainAcc' + str(accuracy_) + '_evalAcc' + str(acc_)) saver.export_meta_graph(FLAGS.output_model_path + '/meta_' + 'step' + str(step_) + '_loss' + str(loss_avg_) + '_trainAcc' + str(accuracy_) + '_evalAcc' + str(acc_)) print("Save checkpoint(model) to {}".format(path))
def main(): args = parse_args() print(args) num_layers = args.num_layers src_vocab_size = args.src_vocab_size tar_vocab_size = args.tar_vocab_size batch_size = args.batch_size dropout = args.dropout init_scale = args.init_scale max_grad_norm = args.max_grad_norm hidden_size = args.hidden_size place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() with fluid.dygraph.guard(place): #args.enable_ce = True if args.enable_ce: fluid.default_startup_program().random_seed = 102 fluid.default_main_program().random_seed = 102 np.random.seed(102) random.seed(102) # Training process if args.attention: model = AttentionModel(hidden_size, src_vocab_size, tar_vocab_size, batch_size, num_layers=num_layers, init_scale=init_scale, dropout=dropout) else: model = BaseModel(hidden_size, src_vocab_size, tar_vocab_size, batch_size, num_layers=num_layers, init_scale=init_scale, dropout=dropout) gloabl_norm_clip = GradientClipByGlobalNorm(max_grad_norm) lr = args.learning_rate opt_type = args.optimizer if opt_type == "sgd": optimizer = fluid.optimizer.SGD(lr, parameter_list=model.parameters(), grad_clip=gloabl_norm_clip) elif opt_type == "adam": optimizer = fluid.optimizer.Adam(lr, parameter_list=model.parameters(), grad_clip=gloabl_norm_clip) else: print("only support [sgd|adam]") raise Exception("opt type not support") train_data_prefix = args.train_data_prefix eval_data_prefix = args.eval_data_prefix test_data_prefix = args.test_data_prefix vocab_prefix = args.vocab_prefix src_lang = args.src_lang tar_lang = args.tar_lang print("begin to load data") raw_data = reader.raw_data(src_lang, tar_lang, vocab_prefix, train_data_prefix, eval_data_prefix, test_data_prefix, args.max_len) print("finished load data") train_data, valid_data, test_data, _ = raw_data def prepare_input(batch, epoch_id=0): src_ids, src_mask, tar_ids, tar_mask = batch res = {} src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1])) in_tar = tar_ids[:, :-1] label_tar = tar_ids[:, 1:] in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1])) label_tar = label_tar.reshape( (label_tar.shape[0], label_tar.shape[1], 1)) inputs = [src_ids, in_tar, label_tar, src_mask, tar_mask] return inputs, np.sum(tar_mask) # get train epoch size def eval(data, epoch_id=0): model.eval() eval_data_iter = reader.get_data_iter(data, batch_size, mode='eval') total_loss = 0.0 word_count = 0.0 for batch_id, batch in enumerate(eval_data_iter): input_data_feed, word_num = prepare_input(batch, epoch_id) loss = model(input_data_feed) total_loss += loss * batch_size word_count += word_num ppl = np.exp(total_loss.numpy() / word_count) model.train() return ppl ce_time = [] ce_ppl = [] max_epoch = args.max_epoch for epoch_id in range(max_epoch): epoch_start = time.time() model.train() if args.enable_ce: train_data_iter = reader.get_data_iter(train_data, batch_size, enable_ce=True) else: train_data_iter = reader.get_data_iter(train_data, batch_size) total_loss = 0 word_count = 0.0 batch_times = [] total_reader_cost = 0.0 interval_time_start = time.time() batch_start = time.time() for batch_id, batch in enumerate(train_data_iter): batch_reader_end = time.time() total_reader_cost += batch_reader_end - batch_start input_data_feed, word_num = prepare_input(batch, epoch_id=epoch_id) word_count += word_num loss = model(input_data_feed) loss.backward() optimizer.minimize(loss) model.clear_gradients() total_loss += loss * batch_size total_loss_value = total_loss.numpy() batch_times.append(time.time() - batch_start) if batch_id > 0 and batch_id % 100 == 0: print( "-- Epoch:[%d]; Batch:[%d]; ppl: %.5f, batch_cost: %.5f sec, reader_cost: %.5f sec, ips: %.5f words/sec" % (epoch_id, batch_id, np.exp(total_loss_value / word_count), (time.time() - interval_time_start) / 100, total_reader_cost / 100, word_count / (time.time() - interval_time_start))) ce_ppl.append(np.exp(total_loss_value / word_count)) total_loss = 0.0 word_count = 0.0 total_reader_cost = 0.0 interval_time_start = time.time() batch_start = time.time() train_epoch_cost = time.time() - epoch_start print( "\nTrain epoch:[%d]; epoch_cost: %.5f sec; avg_batch_cost: %.5f s/step\n" % (epoch_id, train_epoch_cost, sum(batch_times) / len(batch_times))) ce_time.append(train_epoch_cost) dir_name = os.path.join(args.model_path, "epoch_" + str(epoch_id)) print("begin to save", dir_name) paddle.fluid.save_dygraph(model.state_dict(), dir_name) print("save finished") dev_ppl = eval(valid_data) print("dev ppl", dev_ppl) test_ppl = eval(test_data) print("test ppl", test_ppl) if args.enable_ce: card_num = get_cards() _ppl = 0 _time = 0 try: _time = ce_time[-1] _ppl = ce_ppl[-1] except: print("ce info error") print("kpis\ttrain_duration_card%s\t%s" % (card_num, _time)) print("kpis\ttrain_ppl_card%s\t%f" % (card_num, _ppl))
set up model, loss criterion, optimizer ''' # Instantiate the model Exp_model = TextEncoder(embedding_dim=1024, hidden_size=256, num_layers=1, bidir=True, dropout1=0.5) Query_model = TextEncoder(embedding_dim=1024, hidden_size=256, num_layers=1, bidir=True, dropout1=0.5) Attn_model = AttentionModel(para_encoder_input_dim=512, query_dim=512, output_dim=256) para_encoder_attn_model = AttentionModel(para_encoder_input_dim=512, query_dim=512, output_dim=512) para_encoder = ParaEncoder(input_dim=1024, hidden_size=256, num_layers=1, attn_model=para_encoder_attn_model, bidir=True, dropout1=0.5) linearfc = LinearFC(num_classes=2, encoded_embedding_dim=512, context_dim=512, dropout1=0.2)
def train(): args = parse_args() num_layers = args.num_layers src_vocab_size = args.src_vocab_size tar_vocab_size = args.tar_vocab_size batch_size = args.batch_size dropout = args.dropout init_scale = args.init_scale max_grad_norm = args.max_grad_norm hidden_size = args.hidden_size # inference process print("src", src_vocab_size) # dropout type using upscale_in_train, dropout can be remove in inferecen # So we can set dropout to 0 if args.attention: model = AttentionModel(hidden_size, src_vocab_size, tar_vocab_size, batch_size, num_layers=num_layers, init_scale=init_scale, dropout=0.0) else: model = BaseModel(hidden_size, src_vocab_size, tar_vocab_size, batch_size, num_layers=num_layers, init_scale=init_scale, dropout=0.0) beam_size = args.beam_size trans_res = model.build_graph(mode='beam_search', beam_size=beam_size) # clone from default main program and use it as the validation program main_program = fluid.default_main_program() place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) source_vocab_file = args.vocab_prefix + "." + args.src_lang infer_file = args.infer_file infer_data = reader.raw_mono_data(source_vocab_file, infer_file) def prepare_input(batch, epoch_id=0, with_lr=True): src_ids, src_mask, tar_ids, tar_mask = batch res = {} src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1], 1)) in_tar = tar_ids[:, :-1] label_tar = tar_ids[:, 1:] in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1], 1)) in_tar = np.zeros_like(in_tar, dtype='int64') label_tar = label_tar.reshape( (label_tar.shape[0], label_tar.shape[1], 1)) label_tar = np.zeros_like(label_tar, dtype='int64') res['src'] = src_ids res['tar'] = in_tar res['label'] = label_tar res['src_sequence_length'] = src_mask res['tar_sequence_length'] = tar_mask return res, np.sum(tar_mask) dir_name = args.reload_model print("dir name", dir_name) fluid.io.load_params(exe, dir_name) train_data_iter = reader.get_data_iter(infer_data, 1, mode='eval') tar_id2vocab = [] tar_vocab_file = args.vocab_prefix + "." + args.tar_lang with open(tar_vocab_file, "r") as f: for line in f.readlines(): tar_id2vocab.append(line.strip()) infer_output_file = args.infer_output_file out_file = open(infer_output_file, 'w') for batch_id, batch in enumerate(train_data_iter): input_data_feed, word_num = prepare_input(batch, epoch_id=0) fetch_outs = exe.run(feed=input_data_feed, fetch_list=[trans_res.name], use_program_cache=False) res = [tar_id2vocab[e] for e in fetch_outs[0].reshape(-1)] res = res[1:] new_res = [] for ele in res: if ele == "</s>": break new_res.append(ele) out_file.write(' '.join(new_res)) out_file.write('\n') out_file.close()
def main(): h5_list = [ h5py.File('data/processed0.h5', 'r'), h5py.File('data/processed1.h5', 'r'), h5py.File('data/processed3.h5', 'r'), h5py.File('data/processed4.h5', 'r') ] h5_list_test = [h5py.File('data/processed2.h5', 'r')] batch_size = 100 bg_train = BatchGenerator(h5_list, batch_size) bg_test = BatchGenerator(h5_list_test, batch_size) #, maxlen = 400000) # if we want to change batch size during training # dynamic_batch = True dynamic_batch = False n_epochs = 30 opt = keras.optimizers.Adam() # opt = keras.optimizers.Adadelta() # opt = keras.optimizers.RMSprop(lr=0.001) # which iteration of models to load # next_it = 2 # # encoder_output, attention_weights = SelfAttention(size=50, # # num_hops=16, # # use_penalization=False)(x) checkpoint_path = "models/char_att7/" # Create checkpoint callback cp_callback = keras.callbacks.ModelCheckpoint(checkpoint_path, monitor='val_accuracy', verbose=1, save_weights_only=True, mode='max', save_best_only=True) load_model = False # load_model = True am = AttentionModel( checkpoint_path=checkpoint_path, rnn_size=512, rnn_style='GRU', #'CuDNNLSTM', # bidirectional = True, dropout_rate=0.4, load_model=load_model) # am.model.save(checkpoint_path + 'model.h5') # am.build_model() am.model.compile( optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy', perplexity, categorical_accuracy], ) # print(am.model.summary()) am.save_config() # am.model.fit(X_train, # y_train, # batch_size = batch_size, # validation_data=(X_test, y_test), # callbacks = [cp_callback], # epochs=n_epochs) # fit using the batch generators am.model.fit_generator( bg_train, validation_data=bg_test, callbacks=[cp_callback], # use_multiprocessing=True, # workers=4, epochs=n_epochs)
default_value=0) train_x, train_y = load_data(train_df) splitVal = int(len(train_y) * VAL_RATE) val_x, val_y = train_x[splitVal:], train_y[splitVal:] src_dataset = tf.contrib.data.Dataset.from_tensor_slices( (train_x, train_y)) val_dataset = tf.contrib.data.Dataset.from_tensor_slices((val_x, val_y)) src_iterator = train_iterator(src_dataset, src_vocab_table, BATCH_SIZE, max_length=500) val_iterator = train_iterator(val_dataset, src_vocab_table, BATCH_SIZE, max_length=500) attention_model = AttentionModel(src_iterator, val_iterator, 4, 200, 200, False) sess_config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) sess_config.gpu_options.allow_growth = True with tf.Session(config=sess_config) as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.tables_initializer()) for i in range(MAX_EPOCHS): sess.run(src_iterator.initializer) sess.run(val_iterator.initializer) print("epoch:", i) step = 0 while True: try: attention_model.train(sess, step) step += 1
def infer(): args = parse_args() num_layers = args.num_layers src_vocab_size = args.src_vocab_size tar_vocab_size = args.tar_vocab_size batch_size = args.batch_size dropout = args.dropout init_scale = args.init_scale max_grad_norm = args.max_grad_norm hidden_size = args.hidden_size # inference process print("src", src_vocab_size) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() with fluid.dygraph.guard(place): # dropout type using upscale_in_train, dropout can be remove in inferecen # So we can set dropout to 0 if args.attention: model = AttentionModel(hidden_size, src_vocab_size, tar_vocab_size, batch_size, beam_size=args.beam_size, num_layers=num_layers, init_scale=init_scale, dropout=0.0, mode='beam_search') else: model = BaseModel(hidden_size, src_vocab_size, tar_vocab_size, batch_size, beam_size=args.beam_size, num_layers=num_layers, init_scale=init_scale, dropout=0.0, mode='beam_search') source_vocab_file = args.vocab_prefix + "." + args.src_lang infer_file = args.infer_file infer_data = reader.raw_mono_data(source_vocab_file, infer_file) def prepare_input(batch, epoch_id=0): src_ids, src_mask, tar_ids, tar_mask = batch res = {} src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1])) in_tar = tar_ids[:, :-1] label_tar = tar_ids[:, 1:] in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1])) label_tar = label_tar.reshape( (label_tar.shape[0], label_tar.shape[1], 1)) inputs = [src_ids, in_tar, label_tar, src_mask, tar_mask] return inputs, np.sum(tar_mask) dir_name = args.reload_model print("dir name", dir_name) state_dict, _ = fluid.dygraph.load_dygraph(dir_name) model.set_dict(state_dict) model.eval() train_data_iter = reader.get_data_iter(infer_data, batch_size, mode='infer') tar_id2vocab = [] tar_vocab_file = args.vocab_prefix + "." + args.tar_lang with io.open(tar_vocab_file, "r", encoding='utf-8') as f: for line in f.readlines(): tar_id2vocab.append(line.strip()) infer_output_file = args.infer_output_file infer_output_dir = infer_output_file.split('/')[0] if not os.path.exists(infer_output_dir): os.mkdir(infer_output_dir) with io.open(infer_output_file, 'w', encoding='utf-8') as out_file: for batch_id, batch in enumerate(train_data_iter): input_data_feed, word_num = prepare_input(batch, epoch_id=0) # import ipdb; ipdb.set_trace() outputs = model(input_data_feed) for i in range(outputs.shape[0]): ins = outputs[i].numpy() res = [tar_id2vocab[int(e)] for e in ins[:, 0].reshape(-1)] new_res = [] for ele in res: if ele == "</s>": break new_res.append(ele) out_file.write(space_tok.join(new_res)) out_file.write(line_tok)
def main(): # load test data filename = "data/mixed.txt" raw_text = open(filename, 'r', encoding='utf-8').read() print('-> Raw text length:', len(raw_text)) raw_text = raw_text.lower()[:700000] raw_text = re.sub('\n', " ", raw_text) # create mapping of unique chars to integers chars = sorted(list(set(raw_text))) # print('-> chars:', chars) char_to_int = dict((c, i) for i, c in enumerate(chars)) int_to_char = dict((i, c) for i, c in enumerate(chars)) print('-> int to char:', int_to_char) # print('-> char to int:', char_to_int) # print(char_to_int) # char_to_int = {' ': 0, '!': 1, '%': 2, '&': 3, "'": 4, ',': 5, '-': 6, '.': 7, '/': 8, '0': 9, '1': 10, '2': 11, '3': 12, '4': 13, '5': 14, '6': 15, '7': 16, '8': 17, '9': 18, ':': 19, ';': 20, '<': 21, '>': 22, '?': 23, 'a': 24, 'b': 25, 'c': 26, 'd': 27, 'e': 28, 'f': 29, 'g': 30, 'h': 31, 'i': 32, 'j': 33, 'k': 34, 'l': 35, 'm': 36, 'n': 37, 'o': 38, 'p': 39, 'q': 40, 'r': 41, 's': 42, 't': 43, 'u': 44, 'v': 45, 'w': 46, 'x': 47, 'y': 48, 'z': 49, '~': 50, '—': 51} char_list = list(char_to_int.keys()) raw_text = ''.join([i for i in raw_text if i in char_list]) print('-> char to int:', char_to_int) # summarize the loaded data n_chars = len(raw_text) n_vocab = len(char_list) print("-> Total Characters: ", n_chars) print("-> Total Vocab: ", n_vocab) # prepare the dataset of input to output pairs encoded as integers seq_length = 100 sentences = [] next_chars = [] for i in range(0, n_chars - seq_length, 1): sentences.append(raw_text[i:i + seq_length]) next_chars.append(raw_text[i + seq_length]) n_patterns = len(sentences) print("Total Patterns: ", n_patterns) X = np.zeros((n_patterns, seq_length, n_vocab), dtype=np.bool) y = np.zeros((n_patterns, n_vocab), dtype=np.bool) for i, sentence in enumerate(sentences): for t, char in enumerate(sentence): X[i, t, char_to_int[char]] = 1 y[i, char_to_int[next_chars[i]]] = 1 print('- Input:', X[0, :, :].shape) print('- Output:', y[0].shape) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) batch_size = 100 # if we want to change batch size during training # dynamic_batch = True dynamic_batch = False n_epochs = 40 # opt = keras.optimizers.Adam() # opt = keras.optimizers.Adadelta() opt = keras.optimizers.RMSprop(lr=0.001) sen_len = seq_length emb_len = n_vocab # which iteration of models to load # next_it = 2 # # encoder_output, attention_weights = SelfAttention(size=50, # # num_hops=16, # # use_penalization=False)(x) checkpoint_path = "models/char_att4/" # Create checkpoint callback cp_callback = keras.callbacks.ModelCheckpoint(checkpoint_path, monitor='val_accuracy', verbose=1, save_weights_only=True, mode='max', save_best_only=True) load_model = False # load_model = True am = AttentionModel(checkpoint_path=checkpoint_path, rnn_size=512, rnn_style='CuDNNLSTM', dropout_rate=0.3, load_model=load_model) # am.build_model() am.model.compile( optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy', perplexity, categorical_accuracy], ) # print(am.model.summary()) am.save_config() am.model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), callbacks=[cp_callback], epochs=n_epochs)
def main(): start_time = time.time() random.seed(42) # os.environ["CUDA_VISIBLE_DEVICES"] = '-1' parser = argparse.ArgumentParser() parser.add_argument('--mode', type=int, help='Preprocess or execute the data.', default=None) args = vars(parser.parse_args()) # Convert the arguments to a dict if args['mode'] == 1: train = pd.read_csv('../data/proppy_1.0.train.tsv', sep='\t', header=None) train_processed = Preprocessing.pipeline(train[train.columns[0]]) train_processed_df = pd.DataFrame(columns=['text_stem', 'text_join', 'text', 'label']) train_processed_df['text_stem'], train_processed_df['text_join'] = train_processed train_processed_df['label'] = train[train.columns[len(train.columns) - 1]] train_processed_df['text'] = Preprocessing.pipeline_simple(train[train.columns[0]]) # train_processed_df['embedding'] = train_embeddings train_processed_df.to_csv('../data/train_preprocessed.tsv', sep='\t', index=False, index_label=False) test = pd.read_csv('../data/proppy_1.0.test.tsv', sep='\t', header=None) test_processed = Preprocessing.pipeline(test[test.columns[0]]) test_processed_df = pd.DataFrame(columns=['text_stem', 'text_join', 'text', 'label']) test_processed_df['text_stem'], test_processed_df['text_join'] = test_processed test_processed_df['label'] = test[test.columns[len(test.columns) - 1]] test_processed_df['text'] = Preprocessing.pipeline_simple(test[test.columns[0]]) # train_processed_df['embedding'] = train_embeddings test_processed_df.to_csv('../data/test_preprocessed.tsv', sep='\t', index=False, index_label=False) dev = pd.read_csv('../data/proppy_1.0.dev.tsv', sep='\t', header=None) dev_processed = Preprocessing.pipeline(dev[dev.columns[0]]) dev_processed_df = pd.DataFrame(columns=['text_stem', 'text_join', 'text', 'label']) dev_processed_df['text_stem'], dev_processed_df['text_join'] = dev_processed dev_processed_df['label'] = dev[dev.columns[len(dev.columns) - 1]] dev_processed_df['text'] = Preprocessing.pipeline_simple(dev[dev.columns[0]]) # train_processed_df['embedding'] = train_embeddings dev_processed_df.to_csv('../data/dev_preprocessed.tsv', sep='\t', index=False, index_label=False) elif args['mode'] == 2: # Creación del modelo con embeddings de fasttext o glove config = ModelConfig.AttentionConfig.value model = AttentionModel(batch_size=config['batch_size'], epochs=config['epochs'], vocab_size=config['vocab_size'], max_len=config['max_len'], filters=config['filters'], kernel_size=config['kernel_size'], optimizer=config['optimizer'], learning_rate=config['learning_rate'], max_sequence_len=config['max_sequence_len'], lstm_units=config['lstm_units'], embedding_size=config['embedding_size'], load_embeddings=config['load_embeddings'], pool_size=config['pool_size'], path_train=config['path_train'], path_test=config['path_test'], path_dev=config['path_dev'], emb_type=config['emb_type'], buffer_size=config['buffer_size'], rate=config['rate'], length_type=config['length_type'], dense_units=config['dense_units'], att_units=config['att_units'] ) model.prepare_data_as_tensors() print('Building the model.') model.call() print('Previo a fit') model.fit_as_tensors(with_validation=False) print('Previo a predict') model.predict_test_dev() elif args['mode'] == 3: # Creación del modelo con embeddings de fasttext o glove config = ModelConfig.TrainEmbeddings.value model = BiLSTMModel(batch_size=config['batch_size'], epochs=config['epochs'], vocab_size=config['vocab_size'], max_len=config['max_len'], filters=config['filters'], kernel_size=config['kernel_size'], optimizer=config['optimizer'], learning_rate=config['learning_rate'], max_sequence_len=config['max_sequence_len'], lstm_units=config['lstm_units'], embedding_size=config['embedding_size'], load_embeddings=config['load_embeddings'], pool_size=config['pool_size'], path_train=config['path_train'], path_test=config['path_test'], path_dev=config['path_dev'], emb_type=config['emb_type'], buffer_size=config['buffer_size'], rate=config['rate'], length_type=config['length_type'], dense_units=config['dense_units'], concat=config['concat'] ) model.prepare_data_as_tensors() print('Building the model.') model.call() print('Previo a fit') # model.fit_as_tensors(with_validation=False) print('Previo a predict') # model.predict_test_dev() # print('Se guarda historial del loss:') # model.save_plot_history() elif args['mode'] == 4: config = ModelConfig.SecondExperiment.value model = BiLSTMModel(batch_size=config['batch_size'], epochs=config['epochs'], vocab_size=config['vocab_size'], max_len=config['max_len'], filters=config['filters'], kernel_size=config['kernel_size'], optimizer=config['optimizer'], learning_rate=config['learning_rate'], max_sequence_len=config['max_sequence_len'], lstm_units=config['lstm_units'], embedding_size=config['embedding_size'], load_embeddings=config['load_embeddings'], pool_size=config['pool_size'], path_train=config['path_train'], path_test=config['path_test'], path_dev=None, emb_type=config['emb_type'], buffer_size=config['buffer_size'], rate=config['rate'], length_type=config['length_type'], dense_units=config['dense_units'], concat=config['concat'] ) model.prepare_data_as_tensors() print('Building the model.') model.call() print('Previo a fit') model.fit_as_tensors(with_validation=False) print('Previo a predict') model.predict() elif args['mode'] == 5: config = ModelConfig.BertConfig.value model = BertModel(max_len=config['max_len'], path_train=config['path_train'], path_test=config['path_test'], path_dev=config['path_dev'], epochs=config['epochs'], optimizer=config['optimizer'], load_embeddings=False, batch_size=config['batch_size'], max_sequence_len=config['max_sequence_len'], rate=config['rate'], learning_rate=config['learning_rate'], length_type=config['length_type'] ) print('Loading the data.') model.load_data() print('Creating the model.') model.call() print('Fitting the model.') # model.fit(with_validation=True) print('Predict the test set.') # model.predict() elif args['mode'] == 6: config = ModelConfig.TransformerConfig.value model = TransformerModel(batch_size=config['batch_size'], epochs=config['epochs'], vocab_size=config['vocab_size'], max_len=config['max_len'], filters=config['filters'], kernel_size=config['kernel_size'], optimizer=config['optimizer'], learning_rate=config['learning_rate'], max_sequence_len=config['max_sequence_len'], lstm_units=config['lstm_units'], embedding_size=config['embedding_size'], load_embeddings=config['load_embeddings'], pool_size=config['pool_size'], path_train=config['path_train'], path_test=config['path_test'], path_dev=config['path_dev'], emb_type=config['emb_type'], buffer_size=config['buffer_size'], rate=config['rate'], length_type=config['length_type'], dense_units=config['dense_units'], attheads=config['attheads'], att_layers=config['att_layers'] ) model.prepare_data_as_tensors() print('Building the model.') model.call() print('Previo a fit') model.fit_as_tensors(with_validation=True) print('Previo a predict') model.predict() elif args['mode'] == 7: # Creación del modelo con embeddings de fasttext o glove config = ModelConfig.MeanModelConfig.value model = LocalAttentionModel(batch_size=config['batch_size'], epochs=config['epochs'], vocab_size=config['vocab_size'], max_len=config['max_len'], filters=config['filters'], kernel_size=config['kernel_size'], optimizer=config['optimizer'], learning_rate=config['learning_rate'], max_sequence_len=config['max_sequence_len'], lstm_units=config['lstm_units'], embedding_size=config['embedding_size'], load_embeddings=config['load_embeddings'], pool_size=config['pool_size'], path_train=config['path_train'], path_test=config['path_test'], path_dev=config['path_dev'], emb_type=config['emb_type'], buffer_size=config['buffer_size'], rate=config['rate'], length_type=config['length_type'], dense_units=config['dense_units'] ) model.prepare_data_as_tensors() print('Building the model.') model.call() print('Previo a fit') model.fit_as_tensors(with_validation=False) print('Previo a predict') model.predict_test_dev() print('Se muestra la atención:') # model.plot_attention() elif args['mode'] == 8: config = ModelConfig.SecondExperiment.value model = AttentionModel(batch_size=config['batch_size'], epochs=config['epochs'], vocab_size=config['vocab_size'], max_len=config['max_len'], filters=config['filters'], kernel_size=config['kernel_size'], optimizer=config['optimizer'], learning_rate=config['learning_rate'], max_sequence_len=config['max_sequence_len'], lstm_units=config['lstm_units'], embedding_size=config['embedding_size'], load_embeddings=config['load_embeddings'], pool_size=config['pool_size'], path_train=config['path_train'], path_test=config['path_test'], path_dev=None, emb_type=config['emb_type'], buffer_size=config['buffer_size'], rate=config['rate'], length_type=config['length_type'], dense_units=config['dense_units'] ) model.prepare_data_as_tensors() print('Building the model.') model.call() print('Previo a fit') model.fit_as_tensors(with_validation=True) print('Previo a predict') model.predict() elif args['mode'] == 9: config = ModelConfig.AttentionConfig.value model = AttentionModel(batch_size=config['batch_size'], epochs=config['epochs'], vocab_size=config['vocab_size'], max_len=config['max_len'], filters=config['filters'], kernel_size=config['kernel_size'], optimizer=config['optimizer'], learning_rate=config['learning_rate'], max_sequence_len=config['max_sequence_len'], lstm_units=config['lstm_units'], embedding_size=config['embedding_size'], load_embeddings=config['load_embeddings'], pool_size=config['pool_size'], path_train=config['path_train'], path_test=config['path_test'], path_dev=config['path_dev'], emb_type=config['emb_type'], buffer_size=config['buffer_size'], rate=config['rate'], length_type=config['length_type'], dense_units=config['dense_units'], both_embeddings=config['both_embeddings'], att_units=config['att_units'] ) model.prepare_data_as_tensors() print('Building the model.') model.call() print('Previo a fit') model.fit_as_tensors(with_validation=False) print('Previo a predict') model.predict_test_dev() elif args['mode'] == 10: config = ModelConfig.BertConfigSecondExp.value model = BertModel(max_len=config['max_len'], path_train=config['path_train'], path_test=config['path_test'], epochs=config['epochs'], optimizer=config['optimizer'], load_embeddings=False, batch_size=config['batch_size'], max_sequence_len=config['max_sequence_len'], rate=config['rate'], learning_rate=config['learning_rate'], length_type=config['length_type'] ) print('Loading the data.') model.load_data() print('Creating the model.') model.call() print('Fitting the model.') model.fit(with_validation=False) print('Predict the test set.') model.predict() elif args['mode'] == 11: config = ModelConfig.SecondExperiment.value model = LocalAttentionModel(batch_size=config['batch_size'], epochs=config['epochs'], vocab_size=config['vocab_size'], max_len=config['max_len'], filters=config['filters'], kernel_size=config['kernel_size'], optimizer=config['optimizer'], learning_rate=config['learning_rate'], max_sequence_len=config['max_sequence_len'], lstm_units=config['lstm_units'], embedding_size=config['embedding_size'], load_embeddings=config['load_embeddings'], pool_size=config['pool_size'], path_train=config['path_train'], path_test=config['path_test'], path_dev=None, emb_type=config['emb_type'], buffer_size=config['buffer_size'], rate=config['rate'], length_type=config['length_type'], dense_units=config['dense_units'] ) model.prepare_data_as_tensors() print('Building the model.') model.call() print('Previo a fit') model.fit_as_tensors(with_validation=False) print('Previo a predict') model.predict() elif args['mode'] == 12: config = ModelConfig.MeanModelConfig.value model = LocalAttentionModelNela(batch_size=config['batch_size'], epochs=config['epochs'], vocab_size=config['vocab_size'], max_len=config['max_len'], filters=config['filters'], kernel_size=config['kernel_size'], optimizer=config['optimizer'], learning_rate=config['learning_rate'], max_sequence_len=config['max_sequence_len'], lstm_units=config['lstm_units'], embedding_size=config['embedding_size'], load_embeddings=config['load_embeddings'], pool_size=config['pool_size'], path_train=config['path_train'], path_test=config['path_test'], path_dev=config['path_dev'], emb_type=config['emb_type'], buffer_size=config['buffer_size'], rate=config['rate'], length_type=config['length_type'], dense_units=config['dense_units'] ) model.prepare_data() print('Building the model.') model.call() print('Previo a fit') model.fit(with_validation=False) print('Previo a predict') model.predict_test_dev() else: print('No other mode implemented yed.') elapsed_time = time.time() - start_time print('The execution took: ' + str(elapsed_time) + ' seconds.') print('End of execution.')
def sample(preds, temperature=1.0): # helper function to sample an index from a probability array preds = np.asarray(preds).astype('float64') preds = np.log(preds) / temperature exp_preds = np.exp(preds) preds = exp_preds / np.sum(exp_preds) probas = np.random.multinomial(1, preds, 1) return np.argmax(probas) if __name__ == '__main__': print('-> Loading att model') checkpoint_path = "models/char_att7/" model = AttentionModel(checkpoint_path=checkpoint_path, load_model=True).model filename = "data/mixed.txt" raw_text = open(filename, 'r', encoding='utf-8').read() raw_text = raw_text.lower() raw_text = re.sub('\n', " ", raw_text)[:10000] # create mapping of unique chars to integers chars = sorted(list(set(raw_text))) # char_to_int = dict((c, i) for i, c in enumerate(chars)) # int_to_char = dict((i, c) for i, c in enumerate(chars)) int_to_char = { 0: ' ', 1: '!', 2: '"', 3: "'",
def eval(self): self.max_acc = 1 self.is_training = False with tf.Graph().as_default(): data_processor = DataProcessor() vocab_size = data_processor.get_vocabulary_size(FLAGS.vocab_path) vocab, revocab = DataProcessor.initialize_vocabulary( FLAGS.vocab_path) data_processor.get_init(FLAGS.input_training_data_path, FLAGS.input_validation_data_path, vocab, vocab_size, FLAGS.max_length, revocab) models = AttentionModel() input_q = tf.placeholder(tf.int32, shape=(None, FLAGS.max_length), name="input_x1") # FLAGS.train_batch_size input_ap = tf.placeholder(tf.int32, shape=(None, FLAGS.max_length)) input_an = tf.placeholder(tf.int32, shape=(None, FLAGS.max_length)) q_encode = models.embed( inputs=input_q, vocab_size=vocab_size + 1, num_units=hp.hidden_units) # embedding size plus 1 for padding ap_encode = models.embed(inputs=input_ap, vocab_size=vocab_size + 1, num_units=hp.hidden_units) an_encode = models.embed(inputs=input_an, vocab_size=vocab_size + 1, num_units=hp.hidden_units) # multihead blocks for i in range(hp.num_blocks): with tf.variable_scope("num_blocks_{}".format(i)): q_encode = models.multihead_attention( query=q_encode, key=q_encode, value=q_encode, num_heads=hp.num_heads, mask_future=False) q_encode = models.feed_forward( q_encode, units=[hp.hidden_units * 4, hp.hidden_units]) ap_encode = models.multihead_attention( query=ap_encode, key=ap_encode, value=ap_encode, num_heads=hp.num_heads, mask_future=False) ap_encode = models.feed_forward( ap_encode, units=[hp.hidden_units * 4, hp.hidden_units]) an_encode = models.multihead_attention( query=an_encode, key=an_encode, value=an_encode, num_heads=hp.num_heads, mask_future=False) an_encode = models.feed_forward( an_encode, units=[hp.hidden_units * 4, hp.hidden_units]) ## output layer with tf.name_scope('output_layer'): dims = q_encode.get_shape().as_list() q_encode = tf.reshape(q_encode, [-1, dims[1] * dims[2]]) ap_encode = tf.reshape(ap_encode, [-1, dims[1] * dims[2]]) an_encode = tf.reshape(an_encode, [-1, dims[1] * dims[2]]) weight = tf.get_variable( 'output_weight', [q_encode.get_shape().as_list()[-1], hp.hidden_units]) q_encode = tf.matmul(q_encode, weight) ap_encode = tf.matmul(ap_encode, weight) an_encode = tf.matmul(an_encode, weight) q_encode = models.vec_normalize(q_encode) ap_encode = models.vec_normalize(ap_encode) an_encode = models.vec_normalize(an_encode) ## calculate similarity and loss cos_12 = tf.reduce_sum(tf.multiply(q_encode, ap_encode), 1) # wisely multiple vectors cos_13 = tf.reduce_sum(tf.multiply(q_encode, an_encode), 1) zero = tf.constant(0, shape=[FLAGS.train_batch_size], dtype=tf.float32) margin = tf.constant(FLAGS.loss_margin, shape=[FLAGS.train_batch_size], dtype=tf.float32) losses = tf.maximum( zero, tf.subtract(margin, tf.subtract(cos_12, cos_13))) loss_sum = tf.reduce_sum(losses) loss_avg = tf.div(loss_sum, FLAGS.train_batch_size) correct = tf.equal(zero, losses) accuracy = tf.reduce_mean(tf.cast(correct, "float"), name="accuracy") global_step = tf.Variable( 0, name="global_step", trainable=False ) # The global step will be automatically incremented by one every time you execute a train loop optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate) # optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate) grads_and_vars = optimizer.compute_gradients(loss_avg) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) saver = tf.train.Saver(tf.global_variables()) # session start point with tf.Session() as session: session.run(tf.local_variables_initializer()) session.run(tf.global_variables_initializer()) session.run(tf.tables_initializer()) # Load pre-trained model ckpt = tf.train.get_checkpoint_state( FLAGS.input_previous_model_path) if ckpt and ckpt.model_checkpoint_path: saver.restore(session, ckpt.model_checkpoint_path) print("Load Model From ", ckpt.model_checkpoint_path) else: print("No model found and exit.") exit() print( '\n============================> begin to evaluate model. ' ) eval_size = FLAGS.evaluation_size def evaluate_all(): correct_num = int(0) batches = data_processor.loadValData_step( vocab, vocab_size, FLAGS.input_validation_data_path, FLAGS.max_length, eval_size=0) # batch_size*seq_len for i in range(eval_size): # 显示/保存测试数据 # save_test_data(batch_y1, batch_y2, label_list) batch_y1, batch_y2, label_list = batches[i] correct_flag = test_step(batch_y1, batch_y2, batch_y2, label_list, session) correct_num += correct_flag if (correct_flag == 1): print('step %d ==== correct prediction' % i) else: print('step %d ==== wrong prediction' % i) print('correct_num', correct_num) acc = correct_num / float(eval_size) return acc def test_step(input_y1, input_y2, input_y3, label_list, sess): feed_dict = { input_q: input_y1, input_ap: input_y2, input_an: input_y3 } correct_flag = 0 cos_12_ = sess.run(cos_12, feed_dict) cos_max = max(cos_12_) index_max = list(cos_12_).index(cos_max) if label_list[index_max] == '1': correct_flag = 1 return correct_flag def evaluate(eval_size): correct_num = int(0) batches = data_processor.loadValData_step( vocab, vocab_size, FLAGS.input_validation_data_path, FLAGS.max_length, eval_size) # batch_size*seq_len for i in range(eval_size): # 显示/保存测试数据 # save_test_data(batch_y1, batch_y2, label_list) batch_y1, batch_y2, label_list = batches[i] correct_flag = test_step(batch_y1, batch_y2, batch_y2, label_list, session) correct_num += correct_flag if (correct_flag == 1): print('step %d ==== correct prediction' % i) else: print('step %d ==== wrong prediction' % i) print('correct_num', correct_num) acc = correct_num / float(eval_size) return acc acc_ = evaluate(eval_size=eval_size) print( '--------The test result among the test data sets: acc = {0}, test size = {1}----------' .format(acc_, eval_size)) exit()
json.dump(vars(opts), f, indent=True) # Load data from load_path load_data = {} if opts.load_path is not None: print(' [*] Loading data from {}'.format(opts.load_path)) load_data = torch.load( opts.load_path, map_location=lambda storage, loc: storage) # Load on CPU # Initialize model model = maybe_cuda_model( AttentionModel(opts.embedding_dim, opts.hidden_dim, problem, n_encode_layers=opts.n_encode_layers, mask_inner=True, mask_logits=True, normalization=opts.normalization, tanh_clipping=opts.tanh_clipping), opts.use_cuda) # Overwrite model parameters by parameters to load model.load_state_dict({**model.state_dict(), **load_data.get('model', {})}) # Initialize baseline if opts.baseline == 'exponential': baseline = ExponentialBaseline(opts.exp_beta) elif opts.baseline == 'critic': baseline = CriticBaseline( maybe_cuda_model( CriticNetwork(problem.NODE_DIM, opts.embedding_dim, opts.hidden_dim, opts.n_encode_layers,
wordlist = Word2VecUtil.review_to_wordlist(review) train_x.append(' '.join(wordlist)) return train_x if __name__ == '__main__': test_df = pd.read_csv("data/testData.tsv", delimiter="\t", quoting=3) src_vocab_table = lookup_ops.index_table_from_file('data/vocab.txt', default_value=0) test_x = load_data(test_df) test_dataset = tf.contrib.data.Dataset.from_tensor_slices(test_x) test_iterator = infer_iterator(test_dataset, src_vocab_table, BATCH_SIZE, max_length=500) attention_model = AttentionModel(None, test_iterator, 4, 200, 200, False) sess_config = tf.ConfigProto(log_device_placement=False, allow_soft_placement=True) sess_config.gpu_options.allow_growth = True with tf.Session(config=sess_config) as sess: attention_model.load_model(sess, "model/attention_model.ckpt") sess.run(tf.tables_initializer()) sess.run(test_iterator.initializer) test_preds = [] while True: try: batch_preds = attention_model.test_infer(sess) test_preds.append(batch_preds) except tf.errors.OutOfRangeError: test_preds = (np.concatenate(test_preds, axis=0))[:, 1] submission = pd.DataFrame({