def train(): startup_program = fluid.default_startup_program() main_program = fluid.default_main_program() raw_data = reader.raw_data('fra.txt', num_samples=num_samples) train_data = raw_data[0] data_vars = raw_data[1] model = BaseModel(hidden_size=latent_dim, src_vocab_size=data_vars['num_encoder_tokens'], tar_vocab_size=data_vars['num_decoder_tokens'], batch_size=batch_size, batch_first=True) loss = model.build_graph() optimizer = fluid.optimizer.Adam(learning_rate=0.001) optimizer.minimize(loss) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(framework.default_startup_program()) ce_ppl = [] for epoch_id in range(num_epochs): print("epoch ", epoch_id) train_data_iter = reader.get_data_iter(train_data, batch_size) total_loss = 0 word_count = 0.0 for batch_id, batch in enumerate(train_data_iter): input_data_feed, word_num = prepare_input(batch, epoch_id=epoch_id) fetch_outs = exe.run(feed=input_data_feed, fetch_list=[loss.name], use_program_cache=True) cost_train = np.array(fetch_outs[0]) total_loss += cost_train * batch_size word_count += word_num if batch_id > 0 and batch_id % batch_size == 0: print(" ppl", batch_id, np.exp(total_loss / word_count)) ce_ppl.append(np.exp(total_loss / word_count)) total_loss = 0.0 word_count = 0.0
def train(): model = BaseModel(batch_size=batch_size, maxlen=n_frames) loss, acc, output, no_grad_set = model.build_graph() main_program = fluid.default_main_program() inference_program = fluid.default_main_program().clone(for_test=True) optimizer = fluid.optimizer.Adadelta(0.001) optimizer.minimize(loss, no_grad_set=no_grad_set) place = fluid.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) log_writter = LogWriter(log_path, sync_cycle=10) with log_writter.mode("train") as logger: log_train_loss = logger.scalar(tag="train_loss") log_train_acc = logger.scalar(tag="train_acc") with log_writter.mode("validation") as logger: log_valid_loss = logger.scalar(tag="validation_loss") log_valid_acc = logger.scalar(tag="validation_acc") def prepare_input(batch): x, y, x_seqlen = batch res = {} res['input'] = np.array(x).astype("float32") res['input_seqlen'] = np.array(x_seqlen).astype("int64") res['label'] = np.array(y).astype("float32") return res # (samples, seq, width, height, pixel) noisy_movies, shifted_movies = reader.generate_movies(n_samples, n_frames) data = noisy_movies[:1000], shifted_movies[:1000] train_data, validation_data = split(data, validation_split) step_id = 0 for epoch_id in range(max_epoch): start_time = time.time() print("epoch id", epoch_id) valid_data_iter = reader.get_data_iter(validation_data, batch_size) train_data_iter = reader.get_data_iter(train_data, batch_size) # train total_loss = 0 batch_id = 0 for batch in train_data_iter: input_data_feed = prepare_input(batch) fetch_outs = exe.run(program=main_program, feed=input_data_feed, fetch_list=[loss.name, acc.name], use_program_cache=False) cost_train = np.array(fetch_outs[0]) acc_train = fetch_outs[1] total_loss += cost_train if batch_id > 0 and batch_id % 5 == 0: log_train_loss.add_record(step_id, total_loss) log_train_acc.add_record(step_id, acc_train) step_id += 1 print("current loss: %.7f, for batch %d" % (total_loss, batch_id)) total_loss = 0.0 batch_id += 1 # validate total_loss = 0 total_acc = 0 batch_id = 0 for batch in valid_data_iter: input_data_feed = prepare_input(batch) fetch_outs = exe.run(program=inference_program, feed=input_data_feed, fetch_list=[loss.name, acc.name], use_program_cache=False) cost_train = np.array(fetch_outs[0]) acc_train = fetch_outs[1] total_loss += cost_train batch_id += 1 log_valid_loss.add_record(epoch_id, total_loss) log_valid_acc.add_record(epoch_id, total_acc / batch_id) print("validation loss: %.7f" % (total_loss)) fluid.io.save_inference_model( dirname=params_path, feeded_var_names=['input', 'input_seqlen'], target_vars=[loss, acc], executor=exe)
def train(): def prepare_input(batch): src_ids, label = batch res = {} res['src'] = src_ids res['label'] = label return res # Set parameters: # ngram_range = 2 will add bi-grams features ngram_range = 2 max_features = 20000 maxlen = 400 batch_size = 32 embedding_dims = 50 epochs = 5 print('Loading data...') all_data = reader.raw_data(num_words=max_features) x_train, y_train, x_test, y_test = all_data print(len(x_train), 'train sequences') print(len(x_test), 'test sequences') print('Average train sequence length: {}'.format( np.mean(list(map(len, x_train)), dtype=int))) print('Average test sequence length: {}'.format( np.mean(list(map(len, x_test)), dtype=int))) if ngram_range > 1: print('Adding {}-gram features'.format(ngram_range)) # Create set of unique n-gram from the training set. ngram_set = set() for input_list in x_train: for i in range(2, ngram_range + 1): set_of_ngram = create_ngram_set(input_list, ngram_value=i) ngram_set.update(set_of_ngram) # Dictionary mapping n-gram token to a unique integer. # Integer values are greater than max_features in order # to avoid collision with existing features. start_index = max_features + 1 token_indice = {v: k + start_index for k, v in enumerate(ngram_set)} indice_token = {token_indice[k]: k for k in token_indice} # max_features is the highest integer that could be found in the dataset. max_features = np.max(list(indice_token.keys())) + 1 # Augmenting x_train and x_test with n-grams features x_train = add_ngram(x_train, token_indice, ngram_range) x_test = add_ngram(x_test, token_indice, ngram_range) print('Average train sequence length: {}'.format( np.mean(list(map(len, x_train)), dtype=int))) print('Average test sequence length: {}'.format( np.mean(list(map(len, x_test)), dtype=int))) print('Pad sequences (samples x time)') x_train = reader.pad_sequences(x_train, maxlen=maxlen) x_test = reader.pad_sequences(x_test, maxlen=maxlen) print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) all_data = x_train, y_train, x_test, y_test print('Build model...') model = BaseModel(max_features=max_features) loss, acc = model.build_graph() main_program = fluid.default_main_program() inference_program = fluid.default_main_program().clone(for_test=True) optimizer = fluid.optimizer.Adam(0.01) optimizer.minimize(loss) place = fluid.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) for epoch_id in range(epochs): start_time = time.time() print("epoch id", epoch_id) train_data_iter = reader.get_data_iter(all_data, batch_size) total_loss = 0 total_acc = 0 batch_id = 0 for batch in train_data_iter: input_data_feed = prepare_input(batch) fetch_outs = exe.run(feed=input_data_feed, fetch_list=[loss.name, acc.name], use_program_cache=False) cost_train = np.array(fetch_outs[0]) acc_train = np.array(fetch_outs[1]) total_loss += cost_train total_acc += acc_train if batch_id > 0 and batch_id % 10 == 0: print("current loss: %.3f, current acc: %.3f for step %d" % (total_loss, total_acc * 0.1, batch_id)) total_loss = 0.0 total_acc = 0.0 batch_id += 1 test_data_iter = reader.get_data_iter(all_data, batch_size, mode='test') all_acc = [] for batch in test_data_iter: input_data_feed = prepare_input(batch) fetch_outs = exe.run(program=inference_program, feed=input_data_feed, fetch_list=[loss.name, acc.name], use_program_cache=False) all_acc.append(fetch_outs[1]) all_acc = np.array(all_acc).astype("float32") print("test acc: %.3f" % all_acc.mean())
def train(): raw_data, raw_data_test = reader.get_lt5_data() model = BaseModel(fine_tune=False) loss, acc, output = model.build_graph() main_program = fluid.default_main_program() test_program = main_program.clone(for_test=True) optimizer = fluid.optimizer.Adadelta(0.01) optimizer.minimize(loss) place = fluid.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) def prepare_input(batch, epoch_id=0): x, y = batch res = {} res['img'] = np.array(x).astype("float32") / 255 res['label'] = np.array(y).astype("int64") return res def train_test(test_batch): total_acc = [] input_data_feed = prepare_input(test_batch) fetch_outs = exe.run(program=test_program, feed=input_data_feed, fetch_list=[acc.name], use_program_cache=True) acc_train = np.array(fetch_outs[0]) total_acc.append(acc_train) print("test avg acc: {0:.2%}".format(np.mean(total_acc))) for epoch_id in range(epochs): print("epoch id", epoch_id) train_data_iter = reader.get_data_iter(raw_data, batch_size) test_data_iter = reader.get_data_iter(raw_data_test, batch_size) data_iter = zip(train_data_iter, test_data_iter) total_loss = 0 total_acc = [] for batch_id, batch in enumerate(data_iter): batch_train, batch_test = batch input_data_feed = prepare_input(batch_train) fetch_outs = exe.run(program=main_program, feed=input_data_feed, fetch_list=[loss.name, acc.name], use_program_cache=True) cost_train = np.array(fetch_outs[0]) acc_train = np.array(fetch_outs[1]) total_loss += cost_train * batch_size total_acc.append(acc_train) print("train total loss: ", total_loss, np.mean(total_acc)) train_test(batch_test) print() shutil.rmtree(temp_model_path, ignore_errors=True) os.makedirs(temp_model_path) fluid.io.save_params(executor=exe, dirname=temp_model_path)
def train(): args = parse_args() num_layers = args.num_layers src_vocab_size = args.src_vocab_size batch_size = args.batch_size dropout = args.dropout init_scale = args.init_scale max_grad_norm = args.max_grad_norm hidden_size = args.hidden_size model = BaseModel(hidden_size, src_vocab_size, batch_size, num_layers=num_layers, init_scale=init_scale, dropout=dropout) loss, acc = model.build_graph() # clone from default main program and use it as the validation program main_program = fluid.default_main_program() inference_program = fluid.default_main_program().clone(for_test=True) lr = args.learning_rate opt_type = args.optimizer if opt_type == "sgd": optimizer = fluid.optimizer.SGD(lr) elif opt_type == "adam": optimizer = fluid.optimizer.Adam(lr) else: print("only support [sgd|adam]") raise Exception("opt type not support") optimizer.minimize(loss) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) def prepare_input(batch, epoch_id=0, with_lr=True): src_ids, src_mask, label = batch res = {} src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1], 1)) res['src'] = src_ids res['label'] = label res['src_sequence_length'] = src_mask return res all_data = reader.raw_data() max_epoch = args.max_epoch for epoch_id in range(max_epoch): start_time = time.time() print("epoch id", epoch_id) train_data_iter = reader.get_data_iter(all_data, batch_size) total_loss = 0 word_count = 0.0 batch_id = 0 for batch in train_data_iter: input_data_feed = prepare_input(batch) fetch_outs = exe.run(feed=input_data_feed, fetch_list=[loss.name, acc.name], use_program_cache=False) cost_train = np.array(fetch_outs[0]) acc_train = np.array(fetch_outs[1]) total_loss += cost_train if batch_id > 0 and batch_id % 100 == 0: print("current loss: %.3f, for step %d" % (total_loss, batch_id)) total_loss = 0.0 batch_id += 1 test_data_iter = reader.get_data_iter(all_data, batch_size, mode='test') all_acc = [] for batch in test_data_iter: input_data_feed = prepare_input(batch) fetch_outs = exe.run(program=inference_program, feed=input_data_feed, fetch_list=[acc.name], use_program_cache=False) all_acc.append(fetch_outs[0]) all_acc = np.array(all_acc).astype("float32") print("test acc:%.3f" % all_acc.mean())
def train(): model = BaseModel(batch_size=batch_size, maxlen=7) pred = model.build_graph(mode='test') inference_program = fluid.default_main_program().clone(for_test=True) place = fluid.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) fluid.io.load_params(executor=exe, dirname=infer_model_path) def prepare_input(batch): x, y, x_seqlen = batch res = {} res['input'] = np.array(x).astype("float32") res['input_seqlen'] = np.array(x_seqlen).astype("int64") res['label'] = np.array(y).astype("float32") return res # (samples, seq, width, height, pixel) noisy_movies, shifted_movies = reader.generate_movies(n_samples, n_frames) # Testing the network on one movie # feed it with the first 7 positions and then # predict the new positions which = 1004 track_test = noisy_movies[which][:7, ::, ::, ::] track_res = shifted_movies[which][:7, ::, ::, ::] track_test = track_test[np.newaxis, ::, ::, ::, ::] track_res = track_res[np.newaxis, ::, ::, ::, ::] for j in range(16): track_raw = track_test, track_res data_iter = reader.get_data_iter(track_raw, 1) # batch for batch in data_iter: input_data_feed = prepare_input(batch) fetch_outs = exe.run(program=inference_program, feed=input_data_feed, fetch_list=[pred.name], use_program_cache=False) guess = fetch_outs[0] last_seq = guess[0][-1] temp = [] for row in last_seq: temp_row = [] for ele in row: pred_label = np.argsort(ele)[1] temp_row.append([pred_label]) temp.append(temp_row) guess = [[temp]] new = np.array(guess) track_test = np.concatenate((track_test, new), axis=1) # And then compare the predictions # to the ground truth track2 = noisy_movies[which][::, ::, ::, ::] for i in range(15): fig = plt.figure(figsize=(10, 5)) ax = fig.add_subplot(121) if i >= 7: ax.text(1, 3, 'Predictions !', fontsize=20, color='w') else: ax.text(1, 3, 'Initial trajectory', fontsize=20) toplot = track_test[0][i, ::, ::, 0] plt.imshow(toplot) ax = fig.add_subplot(122) plt.text(1, 3, 'Ground truth', fontsize=20) toplot = track2[i, ::, ::, 0] if i >= 2: toplot = shifted_movies[which][i - 1, ::, ::, 0] plt.imshow(toplot) plt.savefig('./res/%i_animate.png' % (i + 1))