train_data = loader.load_data('train') valid_data = loader.load_data('valid') test_data = loader.load_data('test') args.n_train = len(train_data[0]) print("Number of train:{}, valid:{}, test:{}.".format(len(train_data[0]), len(valid_data[0]), len(test_data[0]))) plot_config(args) heads, tails = loader.heads_tails() head_idx, tail_idx, head_cache, tail_cache, head_pos, tail_pos = loader.get_cache_list() caches = [head_idx, tail_idx, head_cache, tail_cache, head_pos, tail_pos] train_data = [torch.LongTensor(vec) for vec in train_data] valid_data = [torch.LongTensor(vec) for vec in valid_data] test_data = [torch.LongTensor(vec) for vec in test_data] tester_val = lambda: model.test_link(valid_data, n_ent, heads, tails, args.filter) tester_tst = lambda: model.test_link(test_data, n_ent, heads, tails, args.filter) corrupter = BernCorrupter(train_data, n_ent, n_rel) model = BaseModel(n_ent, n_rel, args) best_str = model.train(train_data, caches, corrupter, tester_val, tester_tst) with open(args.perf_file, 'a') as f: print('Training finished and best performance:', best_str) f.write('best_performance: '+best_str)
def infer(): args = parse_args() num_layers = args.num_layers src_vocab_size = args.src_vocab_size tar_vocab_size = args.tar_vocab_size batch_size = args.batch_size dropout = args.dropout init_scale = args.init_scale max_grad_norm = args.max_grad_norm hidden_size = args.hidden_size # inference process print("src", src_vocab_size) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() with fluid.dygraph.guard(place): # dropout type using upscale_in_train, dropout can be remove in inferecen # So we can set dropout to 0 if args.attention: model = AttentionModel(hidden_size, src_vocab_size, tar_vocab_size, batch_size, beam_size=args.beam_size, num_layers=num_layers, init_scale=init_scale, dropout=0.0, mode='beam_search') else: model = BaseModel(hidden_size, src_vocab_size, tar_vocab_size, batch_size, beam_size=args.beam_size, num_layers=num_layers, init_scale=init_scale, dropout=0.0, mode='beam_search') source_vocab_file = args.vocab_prefix + "." + args.src_lang infer_file = args.infer_file infer_data = reader.raw_mono_data(source_vocab_file, infer_file) def prepare_input(batch, epoch_id=0): src_ids, src_mask, tar_ids, tar_mask = batch res = {} src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1])) in_tar = tar_ids[:, :-1] label_tar = tar_ids[:, 1:] in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1])) label_tar = label_tar.reshape( (label_tar.shape[0], label_tar.shape[1], 1)) inputs = [src_ids, in_tar, label_tar, src_mask, tar_mask] return inputs, np.sum(tar_mask) dir_name = args.reload_model print("dir name", dir_name) state_dict, _ = fluid.dygraph.load_dygraph(dir_name) model.set_dict(state_dict) model.eval() train_data_iter = reader.get_data_iter(infer_data, batch_size, mode='infer') tar_id2vocab = [] tar_vocab_file = args.vocab_prefix + "." + args.tar_lang with io.open(tar_vocab_file, "r", encoding='utf-8') as f: for line in f.readlines(): tar_id2vocab.append(line.strip()) infer_output_file = args.infer_output_file infer_output_dir = infer_output_file.split('/')[0] if not os.path.exists(infer_output_dir): os.mkdir(infer_output_dir) with io.open(infer_output_file, 'w', encoding='utf-8') as out_file: for batch_id, batch in enumerate(train_data_iter): input_data_feed, word_num = prepare_input(batch, epoch_id=0) # import ipdb; ipdb.set_trace() outputs = model(input_data_feed) for i in range(outputs.shape[0]): ins = outputs[i].numpy() res = [tar_id2vocab[int(e)] for e in ins[:, 0].reshape(-1)] new_res = [] for ele in res: if ele == "</s>": break new_res.append(ele) out_file.write(space_tok.join(new_res)) out_file.write(line_tok)
def train(): args = parse_args() num_layers = args.num_layers src_vocab_size = args.src_vocab_size batch_size = args.batch_size dropout = args.dropout init_scale = args.init_scale max_grad_norm = args.max_grad_norm hidden_size = args.hidden_size model = BaseModel(hidden_size, src_vocab_size, batch_size, num_layers=num_layers, init_scale=init_scale, dropout=dropout) loss, acc = model.build_graph() # clone from default main program and use it as the validation program main_program = fluid.default_main_program() inference_program = fluid.default_main_program().clone(for_test=True) lr = args.learning_rate opt_type = args.optimizer if opt_type == "sgd": optimizer = fluid.optimizer.SGD(lr) elif opt_type == "adam": optimizer = fluid.optimizer.Adam(lr) else: print("only support [sgd|adam]") raise Exception("opt type not support") optimizer.minimize(loss) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) def prepare_input(batch, epoch_id=0, with_lr=True): src_ids, src_mask, label = batch res = {} src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1], 1)) res['src'] = src_ids res['label'] = label res['src_sequence_length'] = src_mask return res all_data = reader.raw_data() max_epoch = args.max_epoch for epoch_id in range(max_epoch): start_time = time.time() print("epoch id", epoch_id) train_data_iter = reader.get_data_iter(all_data, batch_size) total_loss = 0 word_count = 0.0 batch_id = 0 for batch in train_data_iter: input_data_feed = prepare_input(batch) fetch_outs = exe.run(feed=input_data_feed, fetch_list=[loss.name, acc.name], use_program_cache=False) cost_train = np.array(fetch_outs[0]) acc_train = np.array(fetch_outs[1]) total_loss += cost_train if batch_id > 0 and batch_id % 100 == 0: print("current loss: %.3f, for step %d" % (total_loss, batch_id)) total_loss = 0.0 batch_id += 1 test_data_iter = reader.get_data_iter(all_data, batch_size, mode='test') all_acc = [] for batch in test_data_iter: input_data_feed = prepare_input(batch) fetch_outs = exe.run(program=inference_program, feed=input_data_feed, fetch_list=[acc.name], use_program_cache=False) all_acc.append(fetch_outs[0]) all_acc = np.array(all_acc).astype("float32") print("test acc:%.3f" % all_acc.mean())
def main(args): #def main(args, i): # set number of threads in pytorch torch.set_num_threads(6) # select which gpu to use logger_init(args) # set gpu if args.GPU: torch.cuda.set_device(args.gpu) # the default settings for correspdonding dataset args = default_search_hyper(args) # hyperOpt = {"lr":[0.00635456700742798, 0.0049700352658686425, 0.0023726642982752643], # "lamb":[3.162503061522238e-05, 1.9567149674424395e-05, 1.0729611255307008e-05], # "d":[512, 512, 512], # "dr":[0.9933500551931267, 0.9903909316509071, 0.9933910046627364], # "batch_size":[256, 256, 256]} # # args.lr = hyperOpt["lr"][i] # args.lamb = hyperOpt["lamb"][i] # args.n_dim = hyperOpt["d"][i] # args.decay_rate = hyperOpt["dr"][i] # args.n_batch = hyperOpt["batch_size"][i] # load data # read nary data if args.n_arity > 2: d = nary_dataloader(args.task_dir) entity_idxs = {d.entities[i]: i for i in range(len(d.entities))} relation_idxs = {d.relations[i]: i for i in range(len(d.relations))} n_ent, n_rel = len(entity_idxs), len(relation_idxs) print("Number of train:{}, valid:{}, test:{}.".format( len(d.train_data), len(d.valid_data), len(d.test_data))) train_data = torch.LongTensor( get_data_idxs(d.train_data, entity_idxs, relation_idxs)) valid_data = torch.LongTensor( get_data_idxs(d.valid_data, entity_idxs, relation_idxs)) test_data = torch.LongTensor( get_data_idxs(d.test_data, entity_idxs, relation_idxs)) e1_sp, e2_sp, e3_sp = n_ary_heads(train_data, valid_data, test_data) # train_data = torch.LongTensor(get_data_idxs(d.train_data, entity_idxs, relation_idxs))[0:512] # valid_data = torch.LongTensor(get_data_idxs(d.valid_data, entity_idxs, relation_idxs))[0:512] # test_data = torch.LongTensor(get_data_idxs(d.test_data, entity_idxs, relation_idxs))[0:512] else: loader = DataLoader(args.task_dir) n_ent, n_rel = loader.graph_size() train_data = loader.load_data('train') valid_data = loader.load_data('valid') test_data = loader.load_data('test') print("Number of train:{}, valid:{}, test:{}.".format( len(train_data[0]), len(valid_data[0]), len(test_data[0]))) heads, tails = loader.heads_tails() train_data = torch.LongTensor(train_data).transpose(0, 1) #[0:100] valid_data = torch.LongTensor(valid_data).transpose(0, 1) #[0:100] test_data = torch.LongTensor(test_data).transpose(0, 1) #[0:100] file_path = "oas_nary" + "_" + str(args.num_blocks) directory = os.path.join("results", args.dataset, file_path) args.out_dir = directory if not os.path.exists(directory): os.makedirs(directory) os.environ["OMP_NUM_THREADS"] = "4" os.environ["MKL_NUM_THREADS"] = "4" args.perf_file = os.path.join( directory, args.dataset + '_oas_nary_' + str(args.num_blocks) + "_" + str(args.trial) + '.txt') print('output file name:', args.perf_file) plot_config(args) def tester_val(facts=None, arch=None): if args.n_arity == 2: if facts is None: return model.test_link(test_data=valid_data, n_ent=n_ent, heads=heads, tails=tails, filt=args.filter, arch=arch) else: return model.test_link(test_data=facts, n_ent=n_ent, heads=heads, tails=tails, filt=args.filter, arch=arch) elif args.n_arity > 2: if facts is None: return model.evaluate(valid_data, e1_sp, e2_sp, e3_sp, arch) else: return model.evaluate(facts, e1_sp, e2_sp, e3_sp, arch) def tester_tst(): if args.n_arity == 2: return model.test_link(test_data=test_data, n_ent=n_ent, heads=heads, tails=tails, filt=args.filter) elif args.n_arity > 2: return model.evaluate(test_data, e1_sp, e2_sp, e3_sp) tester_trip_class = None model = BaseModel(n_ent, n_rel, args) model.train(train_data, valid_data, tester_val, tester_tst, tester_trip_class)
#!/usr/bin/python3 from base_model import BaseModel my_model = BaseModel() my_model.name = "Holberton" my_model.my_number = 89 print(my_model.id) print(my_model) print(type(my_model.created_at)) print("--") my_model_json = my_model.to_dict() print(my_model_json) print("JSON of my_model:") for key in my_model_json.keys(): print( "\t{}: ({}) - {}".format( key, type(my_model_json[key]), my_model_json[key] ) ) print("--") my_new_model = BaseModel(**my_model_json) print(my_new_model.id) print(my_new_model) print(type(my_new_model.created_at)) print("--") print(my_model is my_new_model)
space4kge = { "lr": hp.uniform("lr", 0, 1), "lamb": hp.uniform("lamb", -5, 0), "decay_rate": hp.uniform("decay_rate", 0.99, 1.0), "n_batch": hp.choice("n_batch", [128, 256, 512, 1024]), "n_dim": hp.choice("n_dim", [64]), } trials = Trials() best = fmin(run_kge, space4kge, algo=partial(tpe.suggest, n_startup_jobs=30), max_evals=200, trials=trials) else: plot_config(args) model = BaseModel(n_ent, n_rel, args, struct) tester_val = lambda: model.test_link(valid_data, valid_head_filter, valid_tail_filter) tester_tst = lambda: model.test_link(test_data, test_head_filter, test_tail_filter) best_mrr, best_str = model.train(train_data, tester_val, tester_tst) with open(args.perf_file, 'a') as f: print('structure:', struct, best_str) for s in struct: f.write(str(s) + ' ') f.write('\t\tbest_performance: ' + best_str + '\n')
def predict(save_dir): warnings.filterwarnings("ignore") os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"]=FLAGS.GPU_device[len(FLAGS.GPU_device)-1] os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Load the useful files to build the architecture print("Loading the connection matrix...") start = time.time() adj_matrix = pd.read_csv(os.path.join(FLAGS.dir_data,"adj_matrix.csv"),index_col=0) first_matrix_connection = pd.read_csv(os.path.join(FLAGS.dir_data,"first_matrix_connection_GO.csv"),index_col=0) csv_go = pd.read_csv(os.path.join(FLAGS.dir_data,"go_level.csv"),index_col=0) connection_matrix = [] connection_matrix.append(np.array(first_matrix_connection.values,dtype=np.float32)) connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(7)].loc[lambda x: x==1].index,csv_go[str(6)].loc[lambda x: x==1].index].values,dtype=np.float32)) connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(6)].loc[lambda x: x==1].index,csv_go[str(5)].loc[lambda x: x==1].index].values,dtype=np.float32)) connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(5)].loc[lambda x: x==1].index,csv_go[str(4)].loc[lambda x: x==1].index].values,dtype=np.float32)) connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(4)].loc[lambda x: x==1].index,csv_go[str(3)].loc[lambda x: x==1].index].values,dtype=np.float32)) connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(3)].loc[lambda x: x==1].index,csv_go[str(2)].loc[lambda x: x==1].index].values,dtype=np.float32)) connection_matrix.append(np.ones((FLAGS.n_hidden_6, FLAGS.n_classes),dtype=np.float32)) end = time.time() elapsed=end - start print("Total time: {}h {}min {}sec".format(time.gmtime(elapsed).tm_hour, time.gmtime(elapsed).tm_min, time.gmtime(elapsed).tm_sec)) # Load the data print("Loading the test dataset...") loaded = np.load(os.path.join(FLAGS.dir_data,"X_test.npz")) X_test = loaded['x'] y_test = loaded['y'] if FLAGS.n_classes>=2: y_test=to_categorical(y_test) end = time.time() elapsed=end - start print("Total time: {}h {}min {}sec".format(time.gmtime(elapsed).tm_hour, time.gmtime(elapsed).tm_min, time.gmtime(elapsed).tm_sec)) # Launch the model print("Launching the evaluation") if FLAGS.type_training != "": print("with {} and ALPHA={}".format(FLAGS.type_training,FLAGS.alpha)) tf.reset_default_graph() # -- Inputs of the model -- X = tf.placeholder(tf.float32, shape=[None, FLAGS.n_input]) Y = tf.placeholder(tf.float32, shape=[None, FLAGS.n_classes]) # -- Hyperparameters of the neural network -- is_training = tf.placeholder(tf.bool,name="is_training") # Batch Norm hyperparameter keep_prob = tf.placeholder(tf.float32, name="keep_prob") # Dropout hyperparameter network=BaseModel(X=X,n_input=FLAGS.n_input,n_classes=FLAGS.n_classes, n_hidden_1=FLAGS.n_hidden_1,n_hidden_2=FLAGS.n_hidden_2,n_hidden_3=FLAGS.n_hidden_3,n_hidden_4=FLAGS.n_hidden_4, n_hidden_5=FLAGS.n_hidden_5,n_hidden_6=FLAGS.n_hidden_6,keep_prob=keep_prob,is_training=is_training) # Model instantiation pred = network() # -- Compute the prediction error -- if FLAGS.n_classes>=2: y_hat = tf.argmax(pred,1) else: y_hat = tf.nn.sigmoid(pred) y_hat = tf.cast(pred>0.5,dtype=tf.int64) # -- Configure the use of the gpu -- config = tf.ConfigProto(log_device_placement=False,allow_soft_placement=True) #config.gpu_options.allow_growth = True, log_device_placement=True if FLAGS.restore : saver = tf.train.Saver() start = time.time() with tf.device(FLAGS.GPU_device): with tf.Session(config=config) as sess: if FLAGS.restore: saver.restore(sess,os.path.join(save_dir,"model")) # -- Predict the outcome predictions of the samples from the test set -- y_hat = sess.run([y_hat], feed_dict={X: X_test,Y: y_test,is_training:FLAGS.is_training,keep_prob:1}) end = time.time() elapsed=end - start print("Total time: {}h {}min {}sec ".format(time.gmtime(elapsed).tm_hour, time.gmtime(elapsed).tm_min, time.gmtime(elapsed).tm_sec)) return y_hat
def train(): raw_data, raw_data_test = reader.get_gte5_data() model = BaseModel(fine_tune=True) loss, acc, output = model.build_graph() main_program = fluid.default_main_program() test_program = main_program.clone(for_test=True) optimizer = fluid.optimizer.Adadelta(0.01) optimizer.minimize(loss) place = fluid.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) fluid.io.load_params(executor=exe, dirname=temp_model_path) def prepare_input(batch): x, y = batch res = {} res['img'] = np.array(x).astype("float32") / 255 res['label'] = np.array(y).astype("int64") return res def train_test(test_batch): total_acc = [] input_data_feed = prepare_input(test_batch) fetch_outs = exe.run(program=test_program, feed=input_data_feed, fetch_list=[acc.name], use_program_cache=True) acc_train = np.array(fetch_outs[0]) total_acc.append(acc_train) print("test avg acc: {0:.2%}".format(np.mean(total_acc))) for epoch_id in range(epochs): print("epoch id", epoch_id) train_data_iter = reader.get_data_iter(raw_data, batch_size) test_data_iter = reader.get_data_iter(raw_data_test, batch_size) data_iter = zip(train_data_iter, test_data_iter) total_loss = 0 total_acc = [] for batch_id, batch in enumerate(data_iter): batch_train, batch_test = batch input_data_feed = prepare_input(batch_train) fetch_outs = exe.run(feed=input_data_feed, fetch_list=[loss.name, acc.name], use_program_cache=True) cost_train = np.array(fetch_outs[0]) acc_train = np.array(fetch_outs[1]) total_loss += cost_train * batch_size total_acc.append(acc_train) print("train total loss: ", total_loss, np.mean(total_acc)) train_test(batch_test) print()
def train(): model = BaseModel(batch_size=batch_size, maxlen=7) pred = model.build_graph(mode='test') inference_program = fluid.default_main_program().clone(for_test=True) place = fluid.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) fluid.io.load_params(executor=exe, dirname=infer_model_path) def prepare_input(batch): x, y, x_seqlen = batch res = {} res['input'] = np.array(x).astype("float32") res['input_seqlen'] = np.array(x_seqlen).astype("int64") res['label'] = np.array(y).astype("float32") return res # (samples, seq, width, height, pixel) noisy_movies, shifted_movies = reader.generate_movies(n_samples, n_frames) # Testing the network on one movie # feed it with the first 7 positions and then # predict the new positions which = 1004 track_test = noisy_movies[which][:7, ::, ::, ::] track_res = shifted_movies[which][:7, ::, ::, ::] track_test = track_test[np.newaxis, ::, ::, ::, ::] track_res = track_res[np.newaxis, ::, ::, ::, ::] for j in range(16): track_raw = track_test, track_res data_iter = reader.get_data_iter(track_raw, 1) # batch for batch in data_iter: input_data_feed = prepare_input(batch) fetch_outs = exe.run(program=inference_program, feed=input_data_feed, fetch_list=[pred.name], use_program_cache=False) guess = fetch_outs[0] last_seq = guess[0][-1] temp = [] for row in last_seq: temp_row = [] for ele in row: pred_label = np.argsort(ele)[1] temp_row.append([pred_label]) temp.append(temp_row) guess = [[temp]] new = np.array(guess) track_test = np.concatenate((track_test, new), axis=1) # And then compare the predictions # to the ground truth track2 = noisy_movies[which][::, ::, ::, ::] for i in range(15): fig = plt.figure(figsize=(10, 5)) ax = fig.add_subplot(121) if i >= 7: ax.text(1, 3, 'Predictions !', fontsize=20, color='w') else: ax.text(1, 3, 'Initial trajectory', fontsize=20) toplot = track_test[0][i, ::, ::, 0] plt.imshow(toplot) ax = fig.add_subplot(122) plt.text(1, 3, 'Ground truth', fontsize=20) toplot = track2[i, ::, ::, 0] if i >= 2: toplot = shifted_movies[which][i - 1, ::, ::, 0] plt.imshow(toplot) plt.savefig('./res/%i_animate.png' % (i + 1))
def test_fixed_init_xgb(self): model = BaseModel('xgb', 'test_params_set') model.fit(X, y) self.assertEqual(model.predict(X).shape[0], n_samples) self.assertEqual(model.predict(X).shape[1], num_classes)
def test_fixed_init_rf(self): model = BaseModel('c-rf', 'test_params_rf_set') model.fit(X, y) self.assertEqual(model.predict_proba(X).shape[0], n_samples) self.assertEqual(model.predict_proba(X).shape[1], num_classes)
def test_init_rf(self): model = BaseModel('c-rf', 'test_params_rf_random') self.assertEqual(model.name, 'c-rf') self.assertEqual(model.params['n_jobs'], -1)
def test_init_xgb(self): model = BaseModel('xgb', 'test_params_random') self.assertEqual(model.name, 'xgb') self.assertEqual(model.params['num_class'], 10) self.assertTrue(model.num_rounds >= 10) self.assertTrue(model.num_rounds <= 75)
def train(save_dir): warnings.filterwarnings("ignore") os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"]=FLAGS.GPU_device[len(FLAGS.GPU_device)-1] os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Load the useful files to build the architecture print("Loading the connection matrix...") start = time.time() adj_matrix = pd.read_csv(os.path.abspath(os.path.join(FLAGS.dir_data,"adj_matrix.csv")),index_col=0) first_matrix_connection = pd.read_csv(os.path.abspath(os.path.join(FLAGS.dir_data,"first_matrix_connection_GO.csv")),index_col=0) csv_go = pd.read_csv(os.path.abspath(os.path.join(FLAGS.dir_data,"go_level.csv")),index_col=0) connection_matrix = [] connection_matrix.append(np.array(first_matrix_connection.values,dtype=np.float32)) connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(7)].loc[lambda x: x==1].index,csv_go[str(6)].loc[lambda x: x==1].index].values,dtype=np.float32)) connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(6)].loc[lambda x: x==1].index,csv_go[str(5)].loc[lambda x: x==1].index].values,dtype=np.float32)) connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(5)].loc[lambda x: x==1].index,csv_go[str(4)].loc[lambda x: x==1].index].values,dtype=np.float32)) connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(4)].loc[lambda x: x==1].index,csv_go[str(3)].loc[lambda x: x==1].index].values,dtype=np.float32)) connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(3)].loc[lambda x: x==1].index,csv_go[str(2)].loc[lambda x: x==1].index].values,dtype=np.float32)) connection_matrix.append(np.ones((FLAGS.n_hidden_6, FLAGS.n_classes),dtype=np.float32)) end = time.time() elapsed=end - start print("Total time: {}h {}min {}sec".format(time.gmtime(elapsed).tm_hour, time.gmtime(elapsed).tm_min, time.gmtime(elapsed).tm_sec)) # Load the data print("Loading the data...") start = time.time() loaded = np.load(os.path.abspath(os.path.join(FLAGS.dir_data,"X_train.npz"))) X_train = loaded['x'] y_train = loaded['y'] if FLAGS.n_classes>=2: y_train=to_categorical(y_train) loaded = np.load(os.path.abspath(os.path.join(FLAGS.dir_data,"X_test.npz"))) X_test = loaded['x'] y_test = loaded['y'] if FLAGS.n_classes>=2: y_test=to_categorical(y_test) end = time.time() elapsed=end - start print("Total time: {}h {}min {}sec".format(time.gmtime(elapsed).tm_hour, time.gmtime(elapsed).tm_min, time.gmtime(elapsed).tm_sec)) # Launch the model print("Launching the learning") if FLAGS.type_training != "": print("with {} and ALPHA={}".format(FLAGS.type_training,FLAGS.alpha)) tf.reset_default_graph() # -- Inputs of the model -- X = tf.placeholder(tf.float32, shape=[None, FLAGS.n_input]) Y = tf.placeholder(tf.float32, shape=[None, FLAGS.n_classes]) # -- Hyperparameters of the neural network -- is_training = tf.placeholder(tf.bool,name="is_training") # Batch Norm hyperparameter learning_rate = tf.placeholder(tf.float32, name="learning_rate") # Optimizer hyperparameter keep_prob = tf.placeholder(tf.float32, name="keep_prob") # Dropout hyperparameter total_batches=len(X_train)//FLAGS.batch_size network=BaseModel(X=X,n_input=FLAGS.n_input,n_classes=FLAGS.n_classes, n_hidden_1=FLAGS.n_hidden_1,n_hidden_2=FLAGS.n_hidden_2,n_hidden_3=FLAGS.n_hidden_3,n_hidden_4=FLAGS.n_hidden_4, n_hidden_5=FLAGS.n_hidden_5,n_hidden_6=FLAGS.n_hidden_6,keep_prob=keep_prob,is_training=is_training) # Model instantiation pred = network() # -- Loss function -- # ---- CE loss ---- # Compute the average of the loss across all the dimensions if FLAGS.n_classes>=2: ce_loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=Y)) else: ce_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=Y)) # ---- Regularization loss (LGO, L2, L1) ---- additional_loss = 0 if FLAGS.type_training=="LGO": for idx,weight in enumerate(network.weights.values()): additional_loss+=l2_loss_func(weight*(1-connection_matrix[idx])) # Penalization of the noGO connections elif FLAGS.type_training=="L2" : for weight in network.weights.values(): additional_loss += l2_loss_func(weight) elif FLAGS.type_training=="L1" : for idx,weight in enumerate(network.weights.values()): additional_loss+=l1_loss_func(weight) # ---- Total loss ---- if FLAGS.type_training!='' : total_loss = ce_loss + FLAGS.alpha*additional_loss else: total_loss = ce_loss # ---- Norm of the weights of the connections ---- norm_no_go_connections=0 norm_go_connections=0 for idx,weight in enumerate(list(network.weights.values())[:-1]): norm_no_go_connections+=tf.norm((weight*(1-connection_matrix[idx])),ord=1)/np.count_nonzero(1-connection_matrix[idx]) norm_go_connections+=tf.norm((weight*connection_matrix[idx]),ord=1)/np.count_nonzero(connection_matrix[idx]) norm_no_go_connections/=FLAGS.n_layers norm_go_connections/=FLAGS.n_layers # -- Optimizer -- with tf.control_dependencies(tf.get_collection(tf.GraphKeys.UPDATE_OPS)): if FLAGS.lr_method=="adam": trainer = tf.train.AdamOptimizer(learning_rate = learning_rate) elif FLAGS.lr_method=="momentum": trainer = tf.train.MomentumOptimizer(learning_rate = learning_rate, momentum=0.09, use_nesterov=True) elif FLAGS.lr_method=="adagrad": trainer = tf.train.AdagradOptimizer(learning_rate=learning_rate) elif FLAGS.lr_method=="rmsprop": trainer = tf.train.RMSPropOptimizer(learning_rate = learning_rate) optimizer = trainer.minimize(total_loss) # -- Compute the prediction error -- if FLAGS.n_classes>=2: correct_prediction = tf.equal(tf.argmax(pred,1), tf.argmax(Y, 1)) else: sig_pred=tf.nn.sigmoid(pred) sig_pred=tf.cast(sig_pred>0.5,dtype=tf.int64) ground_truth=tf.cast(Y,dtype=tf.int64) correct_prediction = tf.equal(sig_pred,ground_truth) # -- Calculate the accuracy across all the given batches and average them out -- accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # -- Initialize the variables -- init = tf.global_variables_initializer() # -- Configure the use of the gpu -- config = tf.ConfigProto(log_device_placement=False,allow_soft_placement=True) #config.gpu_options.allow_growth = True, log_device_placement=True if FLAGS.save or FLAGS.restore : saver = tf.train.Saver() start = time.time() with tf.device(FLAGS.GPU_device): with tf.Session(config=config) as sess: sess.run(init) train_c_accuracy=[] train_c_total_loss=[] test_c_accuracy=[] test_c_total_loss=[] c_l1_norm_go=[] c_l1_norm_no_go=[] if FLAGS.type_training!="": train_c_ce_loss=[] test_c_ce_loss=[] train_c_additional_loss=[] test_c_additional_loss=[] for epoch in tqdm(np.arange(0,FLAGS.epochs)): index = np.arange(X_train.shape[0]) np.random.shuffle(index) batch_X = np.array_split(X_train[index], total_batches) batch_Y = np.array_split(y_train[index], total_batches) # -- Optimization -- for batch in range(total_batches): batch_x,batch_y=batch_X[batch],batch_Y[batch] sess.run(optimizer, feed_dict={X: batch_x,Y: batch_y,is_training:FLAGS.is_training,keep_prob:FLAGS.keep_prob,learning_rate:FLAGS.learning_rate}) if ((epoch+1) % FLAGS.display_step == 0) or (epoch==0) : if not((FLAGS.display_step==FLAGS.epochs) and (epoch==0)): # -- Calculate batch loss and accuracy after a specific epoch on the train and test set -- avg_cost,avg_acc,l1_norm_no_go,l1_norm_go = sess.run([total_loss, accuracy,norm_no_go_connections,norm_go_connections], feed_dict={X: X_train,Y: y_train, is_training:False,keep_prob:1.0}) train_c_total_loss.append(avg_cost) train_c_accuracy.append(avg_acc) c_l1_norm_go.append(l1_norm_go) c_l1_norm_no_go.append(l1_norm_no_go) if FLAGS.type_training!="": avg_ce_loss,avg_additional_loss= sess.run([ce_loss, additional_loss], feed_dict={X: X_train,Y: y_train,is_training:False,keep_prob:1.0}) train_c_additional_loss.append(avg_additional_loss) train_c_ce_loss.append(avg_ce_loss) avg_cost,avg_acc = sess.run([total_loss, accuracy], feed_dict={X: X_test,Y: y_test,is_training:False,keep_prob:1.0}) test_c_total_loss.append(avg_cost) test_c_accuracy.append(avg_acc) if FLAGS.type_training!="": avg_ce_loss,avg_additional_loss= sess.run([ce_loss, additional_loss], feed_dict={X: X_test,Y: y_test,is_training:False,keep_prob:1.0}) test_c_additional_loss.append(avg_additional_loss) test_c_ce_loss.append(avg_ce_loss) current_idx=len(train_c_total_loss)-1 print('| Epoch: {}/{} | Train: Loss {:.6f} Accuracy : {:.6f} '\ '| Test: Loss {:.6f} Accuracy : {:.6f}\n'.format( epoch+1, FLAGS.epochs,train_c_total_loss[current_idx], train_c_accuracy[current_idx],test_c_total_loss[current_idx],test_c_accuracy[current_idx])) if FLAGS.save: saver.save(sess=sess, save_path=os.path.join(save_dir,"model")) end = time.time() elapsed=end - start print("Total time: {}h {}min {}sec ".format(time.gmtime(elapsed).tm_hour, time.gmtime(elapsed).tm_min, time.gmtime(elapsed).tm_sec)) performances = { 'total_loss':train_c_total_loss,'test_total_loss':test_c_total_loss, 'acc':train_c_accuracy,'test_acc':test_c_accuracy } performances['norm_go']=c_l1_norm_go performances['norm_no_go']=c_l1_norm_no_go if FLAGS.type_training!="": performances['additional_loss']=train_c_additional_loss performances['test_additional_loss']=test_c_additional_loss performances['ce_loss']=train_c_ce_loss performances['test_ce_loss']=test_c_ce_loss return performances
def main(): args = parse_args() print(args) num_layers = args.num_layers src_vocab_size = args.src_vocab_size tar_vocab_size = args.tar_vocab_size batch_size = args.batch_size dropout = args.dropout init_scale = args.init_scale max_grad_norm = args.max_grad_norm hidden_size = args.hidden_size if args.enable_ce: fluid.default_main_program().random_seed = 102 framework.default_startup_program().random_seed = 102 train_program = fluid.Program() startup_program = fluid.Program() with fluid.program_guard(train_program, startup_program): # Training process if args.attention: model = AttentionModel(hidden_size, src_vocab_size, tar_vocab_size, batch_size, num_layers=num_layers, init_scale=init_scale, dropout=dropout) else: model = BaseModel(hidden_size, src_vocab_size, tar_vocab_size, batch_size, num_layers=num_layers, init_scale=init_scale, dropout=dropout) loss = model.build_graph() inference_program = train_program.clone(for_test=True) fluid.clip.set_gradient_clip(clip=fluid.clip.GradientClipByGlobalNorm( clip_norm=max_grad_norm)) lr = args.learning_rate opt_type = args.optimizer if opt_type == "sgd": optimizer = fluid.optimizer.SGD(lr) elif opt_type == "adam": optimizer = fluid.optimizer.Adam(lr) else: print("only support [sgd|adam]") raise Exception("opt type not support") optimizer.minimize(loss) place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = Executor(place) exe.run(startup_program) device_count = len(fluid.cuda_places()) if args.use_gpu else len( fluid.cpu_places()) CompiledProgram = fluid.CompiledProgram(train_program).with_data_parallel( loss_name=loss.name) train_data_prefix = args.train_data_prefix eval_data_prefix = args.eval_data_prefix test_data_prefix = args.test_data_prefix vocab_prefix = args.vocab_prefix src_lang = args.src_lang tar_lang = args.tar_lang print("begin to load data") raw_data = reader.raw_data(src_lang, tar_lang, vocab_prefix, train_data_prefix, eval_data_prefix, test_data_prefix, args.max_len) print("finished load data") train_data, valid_data, test_data, _ = raw_data def prepare_input(batch, epoch_id=0, with_lr=True): src_ids, src_mask, tar_ids, tar_mask = batch res = {} src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1])) in_tar = tar_ids[:, :-1] label_tar = tar_ids[:, 1:] in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1])) label_tar = label_tar.reshape( (label_tar.shape[0], label_tar.shape[1], 1)) res['src'] = src_ids res['tar'] = in_tar res['label'] = label_tar res['src_sequence_length'] = src_mask res['tar_sequence_length'] = tar_mask return res, np.sum(tar_mask) # get train epoch size def eval(data, epoch_id=0): eval_data_iter = reader.get_data_iter(data, batch_size, mode='eval') total_loss = 0.0 word_count = 0.0 for batch_id, batch in enumerate(eval_data_iter): input_data_feed, word_num = prepare_input(batch, epoch_id, with_lr=False) fetch_outs = exe.run(inference_program, feed=input_data_feed, fetch_list=[loss.name], use_program_cache=False) cost_train = np.array(fetch_outs[0]) total_loss += cost_train * batch_size word_count += word_num ppl = np.exp(total_loss / word_count) return ppl def train(): ce_time = [] ce_ppl = [] max_epoch = args.max_epoch for epoch_id in range(max_epoch): start_time = time.time() if args.enable_ce: train_data_iter = reader.get_data_iter(train_data, batch_size, enable_ce=True) else: train_data_iter = reader.get_data_iter(train_data, batch_size) total_loss = 0 word_count = 0.0 batch_times = [] for batch_id, batch in enumerate(train_data_iter): batch_start_time = time.time() input_data_feed, word_num = prepare_input(batch, epoch_id=epoch_id) word_count += word_num fetch_outs = exe.run(program=CompiledProgram, feed=input_data_feed, fetch_list=[loss.name], use_program_cache=True) cost_train = np.mean(fetch_outs[0]) # print(cost_train) total_loss += cost_train * batch_size batch_end_time = time.time() batch_time = batch_end_time - batch_start_time batch_times.append(batch_time) if batch_id > 0 and batch_id % 100 == 0: print( "-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f" % (epoch_id, batch_id, batch_time, np.exp(total_loss / word_count))) ce_ppl.append(np.exp(total_loss / word_count)) total_loss = 0.0 word_count = 0.0 # profiler tools if args.profile and epoch_id == 0 and batch_id == 100: profiler.reset_profiler() elif args.profile and epoch_id == 0 and batch_id == 105: return end_time = time.time() epoch_time = end_time - start_time ce_time.append(epoch_time) print( "\nTrain epoch:[%d]; Epoch Time: %.5f; avg_time: %.5f s/step\n" % (epoch_id, epoch_time, sum(batch_times) / len(batch_times))) if not args.profile: save_path = os.path.join(args.model_path, "epoch_" + str(epoch_id), "checkpoint") print("begin to save", save_path) fluid.save(train_program, save_path) print("save finished") dev_ppl = eval(valid_data) print("dev ppl", dev_ppl) test_ppl = eval(test_data) print("test ppl", test_ppl) if args.enable_ce: card_num = get_cards() _ppl = 0 _time = 0 try: _time = ce_time[-1] _ppl = ce_ppl[-1] except: print("ce info error") print("kpis\ttrain_duration_card%s\t%s" % (card_num, _time)) print("kpis\ttrain_ppl_card%s\t%f" % (card_num, _ppl)) with profile_context(args.profile, args.profiler_path): train()
def main(): args = parse_args() print(args) num_layers = args.num_layers src_vocab_size = args.src_vocab_size tar_vocab_size = args.tar_vocab_size batch_size = args.batch_size dropout = args.dropout init_scale = args.init_scale max_grad_norm = args.max_grad_norm hidden_size = args.hidden_size place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() with fluid.dygraph.guard(place): #args.enable_ce = True if args.enable_ce: fluid.default_startup_program().random_seed = 102 fluid.default_main_program().random_seed = 102 np.random.seed(102) random.seed(102) # Training process if args.attention: model = AttentionModel(hidden_size, src_vocab_size, tar_vocab_size, batch_size, num_layers=num_layers, init_scale=init_scale, dropout=dropout) else: model = BaseModel(hidden_size, src_vocab_size, tar_vocab_size, batch_size, num_layers=num_layers, init_scale=init_scale, dropout=dropout) gloabl_norm_clip = GradClipByGlobalNorm(max_grad_norm) lr = args.learning_rate opt_type = args.optimizer if opt_type == "sgd": optimizer = fluid.optimizer.SGD(lr, parameter_list=model.parameters()) elif opt_type == "adam": optimizer = fluid.optimizer.Adam(lr, parameter_list=model.parameters()) else: print("only support [sgd|adam]") raise Exception("opt type not support") train_data_prefix = args.train_data_prefix eval_data_prefix = args.eval_data_prefix test_data_prefix = args.test_data_prefix vocab_prefix = args.vocab_prefix src_lang = args.src_lang tar_lang = args.tar_lang print("begin to load data") raw_data = reader.raw_data(src_lang, tar_lang, vocab_prefix, train_data_prefix, eval_data_prefix, test_data_prefix, args.max_len) print("finished load data") train_data, valid_data, test_data, _ = raw_data def prepare_input(batch, epoch_id=0): src_ids, src_mask, tar_ids, tar_mask = batch res = {} src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1])) in_tar = tar_ids[:, :-1] label_tar = tar_ids[:, 1:] in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1])) label_tar = label_tar.reshape( (label_tar.shape[0], label_tar.shape[1], 1)) inputs = [src_ids, in_tar, label_tar, src_mask, tar_mask] return inputs, np.sum(tar_mask) # get train epoch size def eval(data, epoch_id=0): model.eval() eval_data_iter = reader.get_data_iter(data, batch_size, mode='eval') total_loss = 0.0 word_count = 0.0 for batch_id, batch in enumerate(eval_data_iter): input_data_feed, word_num = prepare_input(batch, epoch_id) loss = model(input_data_feed) total_loss += loss * batch_size word_count += word_num ppl = np.exp(total_loss.numpy() / word_count) model.train() return ppl ce_time = [] ce_ppl = [] max_epoch = args.max_epoch for epoch_id in range(max_epoch): model.train() start_time = time.time() if args.enable_ce: train_data_iter = reader.get_data_iter(train_data, batch_size, enable_ce=True) else: train_data_iter = reader.get_data_iter(train_data, batch_size) total_loss = 0 word_count = 0.0 batch_times = [] for batch_id, batch in enumerate(train_data_iter): batch_start_time = time.time() input_data_feed, word_num = prepare_input(batch, epoch_id=epoch_id) word_count += word_num loss = model(input_data_feed) # print(loss.numpy()[0]) loss.backward() optimizer.minimize(loss, grad_clip=gloabl_norm_clip) model.clear_gradients() total_loss += loss * batch_size batch_end_time = time.time() batch_time = batch_end_time - batch_start_time batch_times.append(batch_time) if batch_id > 0 and batch_id % 100 == 0: print( "-- Epoch:[%d]; Batch:[%d]; Time: %.5f s; ppl: %.5f" % (epoch_id, batch_id, batch_time, np.exp(total_loss.numpy() / word_count))) ce_ppl.append(np.exp(total_loss.numpy() / word_count)) total_loss = 0.0 word_count = 0.0 end_time = time.time() epoch_time = end_time - start_time print( "\nTrain epoch:[%d]; Epoch Time: %.5f; avg_time: %.5f s/step\n" % (epoch_id, epoch_time, sum(batch_times) / len(batch_times))) ce_time.append(epoch_time) dir_name = os.path.join(args.model_path, "epoch_" + str(epoch_id)) print("begin to save", dir_name) paddle.fluid.save_dygraph(model.state_dict(), dir_name) print("save finished") dev_ppl = eval(valid_data) print("dev ppl", dev_ppl) test_ppl = eval(test_data) print("test ppl", test_ppl) if args.enable_ce: card_num = get_cards() _ppl = 0 _time = 0 try: _time = ce_time[-1] _ppl = ce_ppl[-1] except: print("ce info error") print("kpis\ttrain_duration_card%s\t%s" % (card_num, _time)) print("kpis\ttrain_ppl_card%s\t%f" % (card_num, _ppl))
def train(): args = parse_args() num_layers = args.num_layers src_vocab_size = args.src_vocab_size tar_vocab_size = args.tar_vocab_size batch_size = args.batch_size dropout = args.dropout init_scale = args.init_scale max_grad_norm = args.max_grad_norm hidden_size = args.hidden_size # inference process print("src", src_vocab_size) # dropout type using upscale_in_train, dropout can be remove in inferecen # So we can set dropout to 0 if args.attention: model = AttentionModel(hidden_size, src_vocab_size, tar_vocab_size, batch_size, num_layers=num_layers, init_scale=init_scale, dropout=0.0) else: model = BaseModel(hidden_size, src_vocab_size, tar_vocab_size, batch_size, num_layers=num_layers, init_scale=init_scale, dropout=0.0) beam_size = args.beam_size trans_res = model.build_graph(mode='beam_search', beam_size=beam_size) # clone from default main program and use it as the validation program main_program = fluid.default_main_program() place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace() exe = Executor(place) exe.run(framework.default_startup_program()) source_vocab_file = args.vocab_prefix + "." + args.src_lang infer_file = args.infer_file infer_data = reader.raw_mono_data(source_vocab_file, infer_file) def prepare_input(batch, epoch_id=0, with_lr=True): src_ids, src_mask, tar_ids, tar_mask = batch res = {} src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1], 1)) in_tar = tar_ids[:, :-1] label_tar = tar_ids[:, 1:] in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1], 1)) in_tar = np.zeros_like(in_tar, dtype='int64') label_tar = label_tar.reshape( (label_tar.shape[0], label_tar.shape[1], 1)) label_tar = np.zeros_like(label_tar, dtype='int64') res['src'] = src_ids res['tar'] = in_tar res['label'] = label_tar res['src_sequence_length'] = src_mask res['tar_sequence_length'] = tar_mask return res, np.sum(tar_mask) dir_name = args.reload_model print("dir name", dir_name) fluid.io.load_params(exe, dir_name) train_data_iter = reader.get_data_iter(infer_data, 1, mode='eval') tar_id2vocab = [] tar_vocab_file = args.vocab_prefix + "." + args.tar_lang with open(tar_vocab_file, "r") as f: for line in f.readlines(): tar_id2vocab.append(line.strip()) infer_output_file = args.infer_output_file out_file = open(infer_output_file, 'w') for batch_id, batch in enumerate(train_data_iter): input_data_feed, word_num = prepare_input(batch, epoch_id=0) fetch_outs = exe.run(feed=input_data_feed, fetch_list=[trans_res.name], use_program_cache=False) res = [tar_id2vocab[e] for e in fetch_outs[0].reshape(-1)] res = res[1:] new_res = [] for ele in res: if ele == "</s>": break new_res.append(ele) out_file.write(' '.join(new_res)) out_file.write('\n') out_file.close()
def train_dense_nodes(): for dn in [4096, 2048, 1024, 512, 256, 128]: m = BaseModel('data/', dense_nodes=dn) train_one(m)
def test_should_raise_exception_on_renderer_type(self): # given model = BaseModel(standalone=False) # then self.assertRaises(AttributeError, lambda: model.renderer_type)
def evaluate(save_dir): warnings.filterwarnings("ignore") os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"]=FLAGS.GPU_device[len(FLAGS.GPU_device)-1] os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Load the useful files to build the architecture print("Loading the connection matrix...") start = time.time() adj_matrix = pd.read_csv(os.path.join(FLAGS.dir_data,"adj_matrix.csv"),index_col=0) first_matrix_connection = pd.read_csv(os.path.join(FLAGS.dir_data,"first_matrix_connection_GO.csv"),index_col=0) csv_go = pd.read_csv(os.path.join(FLAGS.dir_data,"go_level.csv"),index_col=0) connection_matrix = [] connection_matrix.append(np.array(first_matrix_connection.values,dtype=np.float32)) connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(7)].loc[lambda x: x==1].index,csv_go[str(6)].loc[lambda x: x==1].index].values,dtype=np.float32)) connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(6)].loc[lambda x: x==1].index,csv_go[str(5)].loc[lambda x: x==1].index].values,dtype=np.float32)) connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(5)].loc[lambda x: x==1].index,csv_go[str(4)].loc[lambda x: x==1].index].values,dtype=np.float32)) connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(4)].loc[lambda x: x==1].index,csv_go[str(3)].loc[lambda x: x==1].index].values,dtype=np.float32)) connection_matrix.append(np.array(adj_matrix.loc[csv_go[str(3)].loc[lambda x: x==1].index,csv_go[str(2)].loc[lambda x: x==1].index].values,dtype=np.float32)) connection_matrix.append(np.ones((FLAGS.n_hidden_6, FLAGS.n_classes),dtype=np.float32)) end = time.time() elapsed=end - start print("Total time: {}h {}min {}sec".format(time.gmtime(elapsed).tm_hour, time.gmtime(elapsed).tm_min, time.gmtime(elapsed).tm_sec)) # Load the data print("Loading the test dataset...") loaded = np.load(os.path.join(FLAGS.dir_data,"X_test.npz")) X_test = loaded['x'] y_test = loaded['y'] if FLAGS.n_classes>=2: y_test=to_categorical(y_test) end = time.time() elapsed=end - start print("Total time: {}h {}min {}sec".format(time.gmtime(elapsed).tm_hour, time.gmtime(elapsed).tm_min, time.gmtime(elapsed).tm_sec)) # Launch the model print("Launching the evaluation") if FLAGS.type_training != "": print("with {} and ALPHA={}".format(FLAGS.type_training,FLAGS.alpha)) tf.reset_default_graph() # -- Inputs of the model -- X = tf.placeholder(tf.float32, shape=[None, FLAGS.n_input]) Y = tf.placeholder(tf.float32, shape=[None, FLAGS.n_classes]) # -- Hyperparameters of the neural network -- is_training = tf.placeholder(tf.bool,name="is_training") # Batch Norm hyperparameter keep_prob = tf.placeholder(tf.float32, name="keep_prob") # Dropout hyperparameter network=BaseModel(X=X,n_input=FLAGS.n_input,n_classes=FLAGS.n_classes, n_hidden_1=FLAGS.n_hidden_1,n_hidden_2=FLAGS.n_hidden_2,n_hidden_3=FLAGS.n_hidden_3,n_hidden_4=FLAGS.n_hidden_4, n_hidden_5=FLAGS.n_hidden_5,n_hidden_6=FLAGS.n_hidden_6,keep_prob=keep_prob,is_training=is_training) # Model instantiation pred = network() # -- Loss function -- # ---- CE loss ---- # Compute the average of the loss across all the dimensions if FLAGS.n_classes>=2: ce_loss = f.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=pred, labels=Y)) else: ce_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=Y)) # ---- Regularization loss (LGO, L2, L1) ---- additional_loss = 0 if FLAGS.type_training=="LGO": for idx,weight in enumerate(network.weights.values()): additional_loss+=l2_loss_func(weight*(1-connection_matrix[idx])) # Penalization of the noGO connections elif FLAGS.type_training=="L2" : for weight in network.weights.values(): additional_loss += l2_loss_func(weight) elif FLAGS.type_training=="L1" : for idx,weight in enumerate(network.weights.values()): additional_loss+=l1_loss_func(weight) # ---- Total loss ---- if FLAGS.type_training!='' : total_loss = ce_loss + FLAGS.alpha*additional_loss else: total_loss = ce_loss # -- Compute the prediction error -- if FLAGS.n_classes>=2: correct_prediction = tf.equal(tf.argmax(pred,1), tf.argmax(Y, 1)) else: sig_pred=tf.nn.sigmoid(pred) sig_pred=tf.cast(sig_pred>0.5,dtype=tf.int64) ground_truth=tf.cast(Y,dtype=tf.int64) correct_prediction = tf.equal(sig_pred,ground_truth) # -- Calculate the accuracy across all the given batches and average them out -- accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # -- Configure the use of the gpu -- config = tf.ConfigProto(log_device_placement=False,allow_soft_placement=True) #config.gpu_options.allow_growth = True, log_device_placement=True if FLAGS.restore : saver = tf.train.Saver() start = time.time() with tf.device(FLAGS.GPU_device): with tf.Session(config=config) as sess: if FLAGS.restore: saver.restore(sess,os.path.join(save_dir,"model")) # -- Calculate the final loss and the final accuracy on the test set -- avg_cost,avg_acc = sess.run([total_loss, accuracy], feed_dict={X: X_test,Y: y_test,is_training:FLAGS.is_training,keep_prob:1}) print('Test loss {:.6f}, test accuracy : {:.6f}\n'.format(avg_cost,avg_acc)) end = time.time() elapsed=end - start print("Total time: {}h {}min {}sec ".format(time.gmtime(elapsed).tm_hour, time.gmtime(elapsed).tm_min, time.gmtime(elapsed).tm_sec)) return