def train(self): data = load_carla_data( os.path.join(ANNOT_DIR, self.config['train']['annot_file_name']), self.config['model']['classes']) np.random.shuffle(data) train_instances, validation_instances = data[:1655], data[1655:] np.random.shuffle(train_instances) np.random.shuffle(validation_instances) train_generator = BatchGenerator(self.config, train_instances, jitter=True) validation_generator = BatchGenerator(self.config, validation_instances, jitter=False) checkpoint = ModelCheckpoint('checkpoints' + os.path.sep + 'model.{epoch:02d}-{val_loss:.2f}.h5', monitor='val_loss', verbose=1, save_best_only=True, mode='auto', period=1) checkpoint_all = ModelCheckpoint( 'checkpoints' + os.path.sep + 'all_models.{epoch:02d}-{loss:.2f}.h5', monitor='loss', verbose=1, save_best_only=True, mode='auto', period=1) # optimizer = RMSprop(lr=1e-3,rho=0.9, epsilon=1e-08, decay=0.0) # optimizer = Adam(lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0) optimizer = SGD(lr=1e-5, momentum=0.9, decay=0.0005) self.model.compile(loss=self.custom_loss, optimizer=optimizer) #, metrics=['accuracy']) self.model.summary() history = self.model.fit_generator( generator=train_generator, steps_per_epoch=len(train_generator), epochs=self.config['train']['nb_epochs'], verbose=1, validation_data=validation_generator, validation_steps=len(validation_generator), callbacks=[checkpoint, checkpoint_all ], # map_evaluator_cb], # checkpoint, tensorboard max_queue_size=10, workers=3)
def main(): features = np.load('0_feat.npy') labels = np.load('0_label.npy') num_instances = 32 batch_size = 128 Batch = BatchGenerator(labels, num_instances=num_instances, batch_size=batch_size) batch = Batch.batch() inputs = Variable(torch.FloatTensor(features[batch, :])).cuda() targets = Variable(torch.LongTensor(labels[batch])).cuda() print(KmeanLoss(n_cluster=32)(inputs, targets))
def evaluate(self): data = load_carla_data( os.path.join(ANNOT_DIR, self.config['train']['annot_file_name']), self.config['model']['classes']) np.random.shuffle(data) validation_instances = data #[1400:] validation_generator = BatchGenerator(self.config, validation_instances, jitter=False) map_evaluator_cb = self.MAP_evaluation( self, validation_generator, save_best=True, save_name='checkpoints\\best-mAP.h5', # os.path.join(BASE_DIR,'best_mAP\\weights.{epoch:02d}-{val_loss:.2f}.h5'), tensorboard=None, iou_threshold=0.4) self.model.compile(loss=self.custom_loss, optimizer=SGD(lr=1e-5, momentum=0.9, decay=0.0005)) self.model.summary() history = self.model.fit_generator( generator=validation_generator, steps_per_epoch=len(validation_generator), epochs=1, verbose=1, callbacks=[map_evaluator_cb])
def main(_): """ Trains model from data """ if not os.path.exists(FLAGS.train_dir): os.mkdir(FLAGS.train_dir) print('Created training directory', FLAGS.train_dir) data_reader = BatchGenerator(FLAGS.data_dir, FLAGS.batch_size, FLAGS.max_word_length, sentence_limit=FLAGS.max_word_length * FLAGS.num_unroll_steps, num_valid=FLAGS.num_valid, threshold=FLAGS.report_threshold) print('initialized all dataset readers') FLAGS.char_vocab_size = len(data_reader.chars_dict) FLAGS.char_embed_size = round(FLAGS.char_vocab_size * 0.66) FLAGS.num_classes = data_reader.num_classes with tf.Graph().as_default(), tf.Session() as sess: # tensorflow seed must be inside graph tf.set_random_seed(FLAGS.seed) np.random.seed(seed=FLAGS.seed) model = Classifier() sess.run(tf.global_variables_initializer()) # initialzie model # start training for current_epoch in range(FLAGS.max_epochs): start = time.time() # training step for batch_x, batch_y in data_reader.batches(): current_step = tf.train.global_step(sess, model.global_step) feed = {model.train_data: batch_x, model.targets: batch_y, model.learning_rate: FLAGS.learning_rate, model.dropout_keep_prob: FLAGS.dropout_keep_prob} _, step, loss, accuracy = sess.run([model.train_op, model.global_step, model.loss, model.accuracy], feed_dict=feed) print("{}/{} ({} epochs) step, loss : {:.6f}, accuracy : {:.3f}, time/batch : {:.3f}sec" .format(current_step, data_reader.num_batches * FLAGS.max_epochs, current_epoch, loss, accuracy, time.time() - start)) start = time.time() # model test step avg_loss, avg_accuracy = 0.0, 0.0 start = time.time() for valid_x, valid_y in data_reader.valid_batches(): feed = {model.train_data: valid_x, model.targets: valid_y, model.dropout_keep_prob: 1.0, model.learning_rate: FLAGS.learning_rate} loss, accuracy = sess.run([model.loss, model.accuracy], feed_dict=feed) avg_accuracy += accuracy * len(valid_x) avg_loss += loss * len(valid_x) print("({} epochs) evaluation step, loss : {:.6f}, accuracy : {:.3f}, time/batch : {:.3f}sec" .format(current_epoch, avg_loss / len(data_reader.valid_data), avg_accuracy / len(data_reader.valid_data), time.time() - start))
def evaluate(step, sess, model, summary_writer): batch_gen = BatchGenerator(VALIDATION_DATA, FLAGS, isTest=True) eval_step = 0 summary = None while True: batch = batch_gen.get_batch() if batch is None: break recalls, loss, summary = model.validate(batch) if eval_step != 0 and eval_step % FLAGS.save_summary_every == 0: print("Loss:{0:.5f} Recall@1 = {1:.5f} Recall@2 = {2:.5f} " "Recall@3 = {3:.5f} Recall@5 = {4:.5f}".format( loss, recalls[0], recalls[1], recalls[2], recalls[3])) eval_step += 1 summary_writer.add_summary(summary, step)
def train_loop(sess, model, summary_writer, model_saver): batch_gen = BatchGenerator(TRAIN_DATA, FLAGS) epoch = 0 while True: batch = batch_gen.get_batch() if batch is None: tf.logging.info("Epoch {0} is over".format(epoch)) epoch += 1 del batch_gen batch_gen = BatchGenerator( TRAIN_DATA, FLAGS) # create batch generator again and proceed continue loss, step, summary = model.batch_fit(batch) if step % FLAGS.save_summary_every == 0: summary_writer.add_summary(summary, step) tf.logging.info("Step: {0} Loss: {1}".format(step, loss)) if step != 0 and step % FLAGS.eval_every == 0: save_signal = evaluate(step, sess, model, summary_writer) if save_signal: model.save_model(model_saver, MODEL_DIR, step)
def main(): features = np.load('0_feat.npy') labels = np.load('0_label.npy') centers, center_labels = cluster_(features, labels, n_clusters=3) centers = Variable(torch.FloatTensor(centers).cuda(), requires_grad=True) center_labels = Variable(torch.LongTensor(center_labels)).cuda() cluster_counter = np.zeros([100, 3]) num_instances = 3 batch_size = 120 Batch = BatchGenerator(labels, num_instances=num_instances, batch_size=batch_size) batch = Batch.batch() # _mask = Variable(torch.ByteTensor(np.ones([num_class_dict[args.data], args.n_cluster])).cuda()) _mask = Variable(torch.ByteTensor(np.ones([100, 3])).cuda()) inputs = Variable(torch.FloatTensor(features[batch, :])).cuda() targets = Variable(torch.LongTensor(labels[batch])).cuda() # print(torch.mean(inputs)) mca = MCALoss(alpha=16, centers=centers, center_labels=center_labels, cluster_counter=cluster_counter) for i in range(2): # loss, accuracy, dist_ap, dist_an = # MCALoss(alpha=16, centers=centers, center_labels=center_labels)(inputs, targets) loss, accuracy, dist_ap, dist_an = \ mca(inputs, targets, _mask) # print(loss.data[0]) loss.backward() # print(centers.grad.data) centers.data -= centers.grad.data centers.grad.data.zero_() # print(centers.grad) print(cluster_counter)
def TrainModel(model, args, X_train, X_valid, y_train, y_valid): """ Train the model """ checkpoint = ModelCheckpoint('model-{epoch:03d}.h5', monitor='val_loss', verbose=0, save_best_only=args.saveBestModel, mode='auto') model.compile(loss='mean_squared_error', optimizer=Adam(lr=args.learningRate)) model.fit_generator(BatchGenerator(args.dataDir, X_train, y_train, args.batchSize, True), args.samplesPerEpoch, args.nbEpoch, max_q_size=1, validation_data=BatchGenerator(args.dataDir, X_valid, y_valid, args.batchSize, False), nb_val_samples=len(X_valid), callbacks=[checkpoint], verbose=1)
def main(_): """ Trains model from data """ if not os.path.exists(FLAGS.train_dir): os.mkdir(FLAGS.train_dir) print('Created training directory', FLAGS.train_dir) data_reader = BatchGenerator(FLAGS.data_dir, FLAGS.batch_size, FLAGS.max_word_length, sentence_limit=FLAGS.max_word_length * FLAGS.num_unroll_steps, num_valid=FLAGS.num_valid, threshold=FLAGS.report_threshold) print('initialized all dataset readers') FLAGS.char_vocab_size = len(data_reader.chars_dict) FLAGS.char_embed_size = round(FLAGS.char_vocab_size * 0.66) FLAGS.num_classes = data_reader.num_classes with tf.Graph().as_default(), tf.Session() as sess: # tensorflow seed must be inside graph tf.set_random_seed(FLAGS.seed) np.random.seed(seed=FLAGS.seed) model = Classifier() sess.run(tf.global_variables_initializer()) # initialzie model # start training for current_epoch in range(FLAGS.max_epochs): start = time.time() # training step for batch_x, batch_y in data_reader.batches(): current_step = tf.train.global_step(sess, model.global_step) feed = { model.train_data: batch_x, model.targets: batch_y, model.learning_rate: FLAGS.learning_rate, model.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, loss, accuracy = sess.run([ model.train_op, model.global_step, model.loss, model.accuracy ], feed_dict=feed) print( "{}/{} ({} epochs) step, loss : {:.6f}, accuracy : {:.3f}, time/batch : {:.3f}sec" .format(current_step, data_reader.num_batches * FLAGS.max_epochs, current_epoch, loss, accuracy, time.time() - start)) start = time.time() # model test step avg_loss, avg_accuracy = 0.0, 0.0 start = time.time() for valid_x, valid_y in data_reader.valid_batches(): feed = { model.train_data: valid_x, model.targets: valid_y, model.dropout_keep_prob: 1.0, model.learning_rate: FLAGS.learning_rate } loss, accuracy = sess.run([model.loss, model.accuracy], feed_dict=feed) avg_accuracy += accuracy * len(valid_x) avg_loss += loss * len(valid_x) print( "({} epochs) evaluation step, loss : {:.6f}, accuracy : {:.3f}, time/batch : {:.3f}sec" .format(current_epoch, avg_loss / len(data_reader.valid_data), avg_accuracy / len(data_reader.valid_data), time.time() - start))
label_field.build_vocab(trainds) traindl = data.BucketIterator(dataset=(trainds), batch_size=4, sort_key=lambda x: len(x.text), device=device, sort_within_batch=True, repeat=False) validdl = data.BucketIterator(dataset=(validds), batch_size=6, sort_key=lambda x: len(x.text), device=device, sort_within_batch=True, repeat=False) train_batch_it = BatchGenerator(traindl, 'text', 'Category') valid_batch_it = BatchGenerator(validdl, 'text', 'Category') vocab_size = len(txt_field.vocab) model = SimpleGRU(vocab_size, embedding_dim, n_hidden, n_out, trainds.fields['text'].vocab.vectors, dropout=dropout).to(device) opt = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), 1e-3) fit(model=model, train_dl=train_batch_it,
def train(self, n_rows, n_cols, rows, cols, vals, n_factors, d_pairwise, hidden_layer_sizes, n_iterations, batch_size, holdout_ratio, learning_rate, reg_param, l2_param, root_savedir, root_logdir, no_train_metric=False, seed=None): """ Training routine. :param n_rows: Number of rows :param n_cols: Number of cols :param rows: :param cols: :param vals: :param n_factors: Number of non-bilinear terms :param d_pairwise: Number of bilinear terms :param hidden_layer_sizes: :param n_iterations: :param batch_size: :param holdout_ratio: :param learning_rate: :param reg_param: Frobenius norm regularization terms for the features :param l2_param: L2 regularization parameter for the nnet weights :param root_savedir: :param root_logdir: :param no_train_metric: :param seed: :return: """ self.n_rows = n_rows self.n_cols = n_cols self.n_factors = n_factors self.d_pairwise = d_pairwise self.hidden_layer_sizes = hidden_layer_sizes self.reg_param = reg_param self.l2_param = l2_param if not os.path.exists(root_savedir): os.makedirs(root_savedir) ### Data handling ### # here we only train on positive examples, so all pairs are only the "on" values pairs = np.vstack([rows, cols, vals]).T # (3, n_obs) batch_generator = BatchGenerator(pairs, batch_size, holdout_ratio=holdout_ratio, seed=seed) ### Construct the TF graph ### self.construct_graph() all_vars = tf.trainable_variables() latent_vars = [self.U, self.V, self.Up, self.Vp] # the inputs to the nnets nnet_vars = [x for x in all_vars if x not in latent_vars] # the nnet variables print("\nlatent vars:", latent_vars) print("\nnnet vars:", nnet_vars) train_lvars = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.loss, var_list=latent_vars) train_nnet = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(self.loss, var_list=nnet_vars) ### Training ### if not no_train_metric: train_loss = tf.placeholder(dtype=tf.float32, shape=[], name='train_loss') train_loss_summary = tf.summary.scalar('train_loss', train_loss) if holdout_ratio is not None: test_mse = tf.placeholder(dtype=tf.float32, shape=[], name='test_mse') test_mse_summary = tf.summary.scalar('test_mse', test_mse) # create tensorboard summary objects scalar_summaries = [tf.summary.scalar(var_.name, var_) for var_ in all_vars if len(var_.shape) == 0] array_summaries = [tf.summary.histogram(var_.name, var_) for var_ in all_vars if len(var_.shape) > 0] writer = tf.summary.FileWriter(root_logdir) saver = tf.train.Saver() init = tf.global_variables_initializer() with tf.Session() as sess: init.run() if not no_train_metric: train_dict = {self.row: batch_generator.train[:, 0], self.col: batch_generator.train[:, 1], self.val: batch_generator.train[:, 2]} if holdout_ratio is not None: test_dict = {self.row: batch_generator.test[:, 0], self.col: batch_generator.test[:, 1], self.val: batch_generator.test[:, 2]} for iteration in range(n_iterations): batch = batch_generator.next_batch() batch_dict = {self.row: batch[:, 0], self.col: batch[:, 1], self.val: batch[:, 2]} # alternate between optimizing inputs and nnet vars sess.run(train_lvars, feed_dict=batch_dict) sess.run(train_nnet, feed_dict=batch_dict) if iteration % 20 == 0: print(iteration, end="") if not no_train_metric: train_loss_ = sess.run(self.loss, feed_dict=train_dict) train_loss_summary_str = sess.run(train_loss_summary, feed_dict={train_loss: train_loss_}) writer.add_summary(train_loss_summary_str, iteration) print("\ttrain loss: %.4f" % train_loss_, end="") if holdout_ratio is not None: test_sse_ = sess.run(self.sse, feed_dict=test_dict) test_mse_ = test_sse_ / len(batch_generator.test) test_mse_summary_str = sess.run(test_mse_summary, feed_dict={test_mse: test_mse_}) writer.add_summary(test_mse_summary_str, iteration) print("\ttest mse: %.4f" % test_mse_) scalar_summaries_str = sess.run(scalar_summaries) array_summaries_str = sess.run(array_summaries) for summary_ in scalar_summaries_str + array_summaries_str: writer.add_summary(summary_, iteration) # save the model saver.save(sess, os.path.join(root_savedir, "model.ckpt")) # close the file writer writer.close()
def main(): args = config_train() # Specifying location to store model, best model and tensorboard log. args.save_model = os.path.join(args.output_dir, 'save_model/model') args.save_best_model = os.path.join(args.output_dir, 'best_model/model') args.tb_log_dir = os.path.join(args.output_dir, 'tensorboard_log/') args.vocab_file = '' # Create necessary directories. if len(args.init_dir) != 0: args.output_dir = args.init_dir else: if os.path.exists(args.output_dir): shutil.rmtree(args.output_dir) for paths in [args.save_model, args.save_best_model, args.tb_log_dir]: os.makedirs(os.path.dirname(paths)) logging.basicConfig(stream=sys.stdout, format='%(asctime)s %(levelname)s:%(message)s', level=logging.INFO, datefmt='%I:%M:%S') print('=' * 60) print('All final and intermediate outputs will be stored in %s/' % args.output_dir) print('=' * 60 + '\n') if args.debug: logging.info('args are:\n%s', args) if len(args.init_dir) != 0: with open(os.path.join(args.init_dir, 'result.json'), 'r') as f: result = json.load(f) params = result['params'] args.init_model = result['latest_model'] best_model = result['best_model'] best_valid_ppl = result['best_valid_ppl'] if 'encoding' in result: args.encoding = result['encoding'] else: args.encoding = 'utf-8' args.vocab_file = os.path.join(args.init_dir, 'vocab.json') else: params = {'batch_size': args.batch_size, 'num_unrollings': args.num_unrollings, 'hidden_size': args.hidden_size, 'max_grad_norm': args.max_grad_norm, 'embedding_size': args.embedding_size, 'num_layers': args.num_layers, 'learning_rate': args.learning_rate, 'model': args.model, 'dropout': args.dropout, 'input_dropout': args.input_dropout} best_model = '' logging.info('Parameters are:\n%s\n', json.dumps(params, sort_keys=True, indent=4)) # Read and split data. logging.info('Reading data from: %s', args.data_file) with codecs.open(args.data_file, 'r', encoding=args.encoding) as f: text = f.read() if args.test: text = text[:50000] logging.info('Number of characters: %s', len(text)) if args.debug: logging.info('First %d characters: %s', 10, text[:10]) logging.info('Creating train, valid, test split') train_size = int(args.train_frac * len(text)) valid_size = int(args.valid_frac * len(text)) test_size = len(text) - train_size - valid_size train_text = text[:train_size] valid_text = text[train_size:train_size + valid_size] test_text = text[train_size + valid_size:] vocab_loader = VocabularyLoader() if len(args.vocab_file) != 0: vocab_loader.load_vocab(args.vocab_file, args.encoding) else: logging.info('Creating vocabulary') vocab_loader.create_vocab(text) vocab_file = os.path.join(args.output_dir, 'vocab.json') vocab_loader.save_vocab(vocab_file, args.encoding) logging.info('Vocabulary is saved in %s', vocab_file) args.vocab_file = vocab_file params['vocab_size'] = vocab_loader.vocab_size logging.info('Vocab size: %d', vocab_loader.vocab_size) # Create batch generators. batch_size = params['batch_size'] num_unrollings = params['num_unrollings'] train_batches = BatchGenerator(vocab_loader.vocab_index_dict, train_text, batch_size, num_unrollings) valid_batches = BatchGenerator(vocab_loader.vocab_index_dict, valid_text, batch_size, num_unrollings) test_batches = BatchGenerator(vocab_loader.vocab_index_dict, test_text, batch_size, num_unrollings) if args.debug: logging.info('Test batch generators') x, y = train_batches.next_batch() logging.info((str(x[0]), str(batche2string(x[0], vocab_loader.index_vocab_dict)))) logging.info((str(y[0]), str(batche2string(y[0], vocab_loader.index_vocab_dict)))) # Create graphs logging.info('Creating graph') graph = tf.Graph() with graph.as_default(): with tf.name_scope('training'): train_model = CharRNNLM(is_training=True, infer=False, **params) tf.get_variable_scope().reuse_variables() with tf.name_scope('validation'): valid_model = CharRNNLM(is_training=False, infer=False, **params) with tf.name_scope('evaluation'): test_model = CharRNNLM(is_training=False, infer=False, **params) saver = tf.train.Saver(name='model_saver') best_model_saver = tf.train.Saver(name='best_model_saver') logging.info('Start training\n') result = {} result['params'] = params result['vocab_file'] = args.vocab_file result['encoding'] = args.encoding try: with tf.Session(graph=graph) as session: # Version 8 changed the api of summary writer to use # graph instead of graph_def. if TF_VERSION >= 8: graph_info = session.graph else: graph_info = session.graph_def train_writer = tf.train.SummaryWriter(args.tb_log_dir + 'train/', graph_info) valid_writer = tf.train.SummaryWriter(args.tb_log_dir + 'valid/', graph_info) # load a saved model or start from random initialization. if len(args.init_model) != 0: saver.restore(session, args.init_model) else: tf.initialize_all_variables().run() learning_rate = args.learning_rate for epoch in range(args.num_epochs): logging.info('=' * 19 + ' Epoch %d ' + '=' * 19 + '\n', epoch) logging.info('Training on training set') # training step ppl, train_summary_str, global_step = train_model.run_epoch(session, train_batches, is_training=True, learning_rate=learning_rate, verbose=args.verbose, freq=args.progress_freq) # record the summary train_writer.add_summary(train_summary_str, global_step) train_writer.flush() # save model saved_path = saver.save(session, args.save_model, global_step=train_model.global_step) logging.info('Latest model saved in %s\n', saved_path) logging.info('Evaluate on validation set') valid_ppl, valid_summary_str, _ = valid_model.run_epoch(session, valid_batches, is_training=False, learning_rate=learning_rate, verbose=args.verbose, freq=args.progress_freq) # save and update best model if (len(best_model) == 0) or (valid_ppl < best_valid_ppl): best_model = best_model_saver.save(session, args.save_best_model, global_step=train_model.global_step) best_valid_ppl = valid_ppl else: learning_rate /= 2.0 logging.info('Decay the learning rate: ' + str(learning_rate)) valid_writer.add_summary(valid_summary_str, global_step) valid_writer.flush() logging.info('Best model is saved in %s', best_model) logging.info('Best validation ppl is %f\n', best_valid_ppl) result['latest_model'] = saved_path result['best_model'] = best_model # Convert to float because numpy.float is not json serializable. result['best_valid_ppl'] = float(best_valid_ppl) result_path = os.path.join(args.output_dir, 'result.json') if os.path.exists(result_path): os.remove(result_path) with open(result_path, 'w') as f: json.dump(result, f, indent=2, sort_keys=True) logging.info('Latest model is saved in %s', saved_path) logging.info('Best model is saved in %s', best_model) logging.info('Best validation ppl is %f\n', best_valid_ppl) logging.info('Evaluate the best model on test set') saver.restore(session, best_model) test_ppl, _, _ = test_model.run_epoch(session, test_batches, is_training=False, learning_rate=learning_rate, verbose=args.verbose, freq=args.progress_freq) result['test_ppl'] = float(test_ppl) finally: result_path = os.path.join(args.output_dir, 'result.json') if os.path.exists(result_path): os.remove(result_path) with open(result_path, 'w') as f: json.dump(result, f, indent=2, sort_keys=True)
import os import tensorflow as tf import CONFIG from keras_model import create_model from utils import BatchGenerator, load_data, save_json train_data, valid_data, total_words, indexToString, stringToIndex = load_data() train_data_generator = BatchGenerator(train_data, CONFIG.number_of_words, CONFIG.batch_size, total_words, skip_step=CONFIG.number_of_words) valid_data_generator = BatchGenerator(valid_data, CONFIG.number_of_words, CONFIG.batch_size, total_words, skip_step=CONFIG.number_of_words) optimizer = tf.keras.optimizers.Adam(lr=CONFIG.learning_rate, decay=CONFIG.learning_rate_decay) model = create_model(total_words=total_words, hidden_size=CONFIG.hidden_size, num_steps=CONFIG.number_of_words, optimizer=optimizer) print(model.summary())
help='Learning rate decay', type=float) parser.add_argument('--batch_size', default=80, help='Batch Size', type=int) parser.add_argument('--data_augmentation', default=True, help='Using data augmentation', type=int) parser.add_argument('--grayscale', default=True, help='Using data augmentation', type=int) parser.add_argument('--keep_prob', default=0.8, help='Keep probability for dropout', type=int) config = parser.parse_args() trainBG = BatchGenerator(X_train, y_train, config.batch_size, config.grayscale, 'train') validBG = BatchGenerator(X_valid, y_valid, config.batch_size, config.grayscale) testBG = BatchGenerator(X_valid, y_valid, config.batch_size, config.grayscale) config.decay_steps = trainBG.num_batches * config.num_epoch label_dict = {} with open('signnames.csv') as f: reader = csv.DictReader(f) for row in reader: label_dict[row['ClassId']] = row['SignName'] #vgg = VGGsimple(config, label_dict) #vgg.train(trainBG, validBG, config.num_epoch) lenet = LeNet(config, label_dict) lenet.train(trainBG, validBG, config.num_epoch)
def train(self): run_flags = self.flags sess = self.sess sw = self.sw hr_img = tf.placeholder(tf.float32, [ None, run_flags.input_width * run_flags.scale, run_flags.input_length * run_flags.scale, 3 ]) #128*128*3 as default lr_img = tf.placeholder( tf.float32, [None, run_flags.input_width, run_flags.input_length, 3 ]) #64*64*3 as default myModel = Model(locals()) out_gen = Model.generative(myModel, lr_img) real_out_dis = Model.discriminative(myModel, hr_img) fake_out_dis = Model.discriminative(myModel, out_gen, reuse=True) cost_gen, cost_dis, var_train, var_gen, var_dis = \ Model.costs_and_vars(myModel, hr_img, out_gen, real_out_dis, fake_out_dis) # cost_gen, cost_dis, var_train, var_gen, var_dis = \ # Model.wgan_loss(myModel, hr_img, out_gen, real_out_dis, fake_out_dis) optimizer_gen = tf.train.AdamOptimizer(learning_rate=run_flags.lr_gen). \ minimize(cost_gen, var_list=var_gen) optimizer_dis = tf.train.AdamOptimizer(learning_rate=run_flags.lr_dis). \ minimize(cost_dis, var_list=var_dis) init = tf.global_variables_initializer() with sess: sess.run(init) saver = tf.train.Saver() if not exists('models'): makedirs('models') passed_iters = 0 for epoch in range(1, run_flags.epochs + 1): print('Epoch:', str(epoch)) for batch in BatchGenerator(run_flags.batch_size, self.datasize): batch_hr = self.dataset[batch] / 255.0 batch_lr = array([imresize(img, size=(run_flags.input_width, run_flags.input_length, 3)) \ for img in batch_hr]) _, gc, dc = sess.run([optimizer_gen, cost_gen, cost_dis], feed_dict={ hr_img: batch_hr, lr_img: batch_lr }) sess.run([optimizer_dis], feed_dict={ hr_img: batch_hr, lr_img: batch_lr }) passed_iters += 1 if passed_iters % run_flags.sample_iter == 0: print('Passed iterations=%d, Generative cost=%.9f, Discriminative cost=%.9f' % \ (passed_iters, gc, dc)) plot.plot('train_dis_cost_gan', abs(dc)) plot.plot('train_gen_cost_gan', abs(gc)) if (passed_iters < 5) or (passed_iters % 100 == 99): plot.flush() plot.tick() if run_flags.checkpoint_iter and epoch % run_flags.checkpoint_iter == 0: saver.save( sess, '/'.join(['models', run_flags.model, run_flags.model])) print('Model \'%s\' saved in: \'%s/\'' \ % (run_flags.model, '/'.join(['models', run_flags.model]))) print('Optimization finished.') saver.save(sess, '/'.join(['models', run_flags.model, run_flags.model])) print('Model \'%s\' saved in: \'%s/\'' \ % (run_flags.model, '/'.join(['models', run_flags.model])))
def test(self): run_flags = self.flags sess = self.sess sw = self.sw img_size = run_flags.scale * run_flags.input_length real_data = tf.placeholder(tf.float32, [ None, run_flags.input_width * run_flags.scale, run_flags.input_width * run_flags.scale, 3 ]) #128*128*3 as default lr_img = tf.placeholder( tf.float32, [None, run_flags.input_width, run_flags.input_length, 3 ]) #64*64*3 as default myModel = Model(locals()) if run_flags.train_model == 'wgangp': fake_data = Model.generative_res(myModel, lr_img) real_out_dis = Model.discriminative(myModel, real_data) fake_out_dis = Model.discriminative(myModel, fake_data, reuse=True) t_vars = tf.trainable_variables() var_gen = [var for var in t_vars if 'g_' in var.name] var_dis = [var for var in t_vars if 'd_' in var.name] cost_gen = -tf.reduce_mean(fake_out_dis) cost_dis = tf.reduce_mean(fake_out_dis) - tf.reduce_mean( real_out_dis) alpha = tf.random_uniform(shape=[run_flags.batch_size, 1], minval=0., maxval=1.) differences = fake_data - real_data differences = tf.reshape( differences, [run_flags.batch_size, img_size * img_size * 3]) interpolates = tf.reshape( real_data, [run_flags.batch_size, img_size * img_size * 3 ]) + (alpha * differences) gradients = tf.gradients( Model.discriminative(myModel, interpolates, reuse=True), [interpolates])[0] slopes = tf.sqrt( tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) gradient_penalty = tf.reduce_mean((slopes - 1.)**2) cost_dis += 10 * gradient_penalty # add L1 difference to penalty fake_data_downsampled = downsample(fake_data) real_data_downsampled = downsample(real_data) gen_l1_cost = tf.reduce_mean( tf.abs(fake_data_downsampled - real_data_downsampled)) cost_gen = run_flags.loss_weight * gen_l1_cost + ( 1 - run_flags.loss_weight) * cost_gen # #edge detection edge_detector1 = EdgeDetector('configs/hed.yaml', real_data) edge_detector1.setup(sess) real_edge = edge_detector1.run(sess, real_data) edge_detector2 = NewEdgeDetector('configs/hed.yaml', fake_data) edge_detector2.setup(sess) fake_edge = edge_detector2.run(sess, fake_data, reuse=True) real_edge_downsampled = downsample(real_edge) fake_edge_downsampled = downsample(fake_edge) edge_cost = tf.reduce_mean( tf.abs(fake_edge_downsampled - real_edge_downsampled)) cost_gen = 0.8 * cost_gen + edge_cost * 20 optimizer_gen = tf.train.RMSPropOptimizer(learning_rate=run_flags.lr_gen). \ minimize(cost_gen, var_list=var_gen) optimizer_dis = tf.train.RMSPropOptimizer(learning_rate=run_flags.lr_dis). \ minimize(cost_dis, var_list=var_dis) #optimizer_dis = tf.train.AdamOptimizer(learning_rate=run_flags.lr_dis, beta1=0.5, beta2=0.9). \ # minimize(cost_dis, var_list=var_dis) if run_flags.train_model == 'gan': fake_data = Model.generative_res(myModel, lr_img) real_out_dis = Model.discriminative_gan(myModel, real_data) fake_out_dis = Model.discriminative_gan(myModel, fake_data, reuse=True) cost_gen, cost_dis, _, var_gen, var_dis = Model.costs_and_vars( myModel, real_data, fake_data, real_out_dis, fake_out_dis) optimizer_gen = tf.train.AdamOptimizer(learning_rate=run_flags.lr_gen). \ minimize(cost_gen, var_list=var_gen) optimizer_dis = tf.train.AdamOptimizer(learning_rate=run_flags.lr_dis). \ minimize(cost_dis, var_list=var_dis) # init = tf.global_variables_initializer() var_all = tf.global_variables() var_gan = [ var for var in var_all if 'g_' in var.name or 'd_' in var.name ] init = tf.variables_initializer(var_gan) with tf.Session() as sess: sess.run(init) saver = tf.train.Saver() try: saver.restore( sess, '/'.join(['models', run_flags.model, run_flags.model])) except: print('Model coult not be restored. Exiting.') exit() makedirs(run_flags.out_path) print('Saving test results ...') start = 0 for batch in BatchGenerator(run_flags.batch_size, self.datasize): batch_big = self.dataset[batch] / 255.0 batch_sml = array([imresize(img, size=(run_flags.input_width, run_flags.input_length, 3)) \ for img in batch_big]) if batch.shape[0] != run_flags.batch_size: break superres_imgs = sess.run(fake_data, feed_dict={lr_img: batch_sml}) gc, dc , re, fe = sess.run([cost_gen, cost_dis, real_edge, fake_edge], \ feed_dict={real_data : batch_big, lr_img : batch_sml}) for i, img in enumerate(superres_imgs): img = img * 255 img = img.astype(int) img_gray = np.dot(img[..., :3], [0.299, 0.587, 0.114]) img_sobel = scipy.ndimage.filters.sobel(img_gray) imsave( '%s/%d_edge_fake.png' % (run_flags.out_path, start + i), img_sobel) for i, img in enumerate(batch_big): img = img * 255 img = img.astype(int) img_gray = np.dot(img[..., :3], [0.299, 0.587, 0.114]) img_sobel = scipy.ndimage.filters.sobel(img_gray) imsave( '%s/%d_edge_real.png' % (run_flags.out_path, start + i), img_sobel) for i, img in enumerate(batch_sml): img = imresize(img, size=(img_size, img_size, 3)) img = img * 255 img = img.astype(int) img_gray = np.dot(img[..., :3], [0.299, 0.587, 0.114]) img_sobel = scipy.ndimage.filters.sobel(img_gray) imsave( '%s/%d_edge_low.png' % (run_flags.out_path, start + i), img_sobel) images = concatenate( \ ( \ array([imresize(img, size=(img_size, img_size, 3)) / 255.0 for img in batch_sml]), \ superres_imgs, batch_big ), 2) criteria_psnr = mean( array([ psnr.psnr(sr_img * 255, real_img * 255) for sr_img in superres_imgs for real_img in batch_big ])) bicubic_criteria_psnr = mean( array([ psnr.psnr( imresize(lr_img, size=(img_size, img_size, 3)) * 255, real_img * 255) for lr_img in batch_sml for real_img in batch_big ])) for idx, image in enumerate(images): imsave('%s/%d.png' % (run_flags.out_path, start + idx), image) start += run_flags.batch_size print('%d/%d saved successfully: Generative cost=%.9f, Discriminative cost=%.9f, psnr=%.9f, bicubic_psnr = %.9f' % \ (min(start, self.datasize), self.datasize, gc, dc, criteria_psnr, bicubic_criteria_psnr))
def train(self, n_rows, n_cols, rows, cols, vals, n_factors, d_pairwise, hidden_layer_sizes, n_iterations, batch_size, holdout_ratio, learning_rate, n_samples, root_savedir, root_logdir, no_train_metric=False, seed=None): """ Training routine. :param n_rows: Number of rows :param n_cols: :param rows: Rows for "on" entries :param cols: Corresponding columns for "on" entries :param vals: :param n_factors: Number of non-bilinear terms :param d_pairwise: Number of bilinear terms :param hidden_layer_sizes: :param n_iterations: :param batch_size: :param holdout_ratio: :param learning_rate: :param n_samples: :param root_savedir: :param root_logdir: :param no_train_metric: :param seed: :return: """ self.n_rows = n_rows self.n_cols = n_cols self.n_factors = n_factors self.d_pairwise = d_pairwise self.hidden_layer_sizes = hidden_layer_sizes if not os.path.exists(root_savedir): os.makedirs(root_savedir) ### Data handling ### pairs = np.vstack([rows, cols, vals]).T # (3, n_obs) batch_generator = BatchGenerator(pairs, batch_size, holdout_ratio=holdout_ratio, seed=seed) ### Construct the TF graph ### self.construct_graph() train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(-self.elbo) ### Training ### if not no_train_metric: train_elbo = tf.placeholder(dtype=tf.float32, shape=[], name='train_elbo') train_elbo_summary = tf.summary.scalar('train_elbo', train_elbo) train_ll = tf.placeholder(dtype=tf.float32, shape=[], name='train_ll') train_ll_summary = tf.summary.scalar('train_ll', train_ll) if holdout_ratio is not None: test_ll = tf.placeholder(dtype=tf.float32, shape=[], name='test_ll') test_ll_summary = tf.summary.scalar('test_ll', test_ll) # create tensorboard summary objects all_vars = tf.trainable_variables() scalar_summaries = [tf.summary.scalar(var_.name, var_) for var_ in all_vars if len(var_.shape) == 0] array_summaries = [tf.summary.histogram(var_.name, var_) for var_ in all_vars if len(var_.shape) > 0] writer = tf.summary.FileWriter(root_logdir) saver = tf.train.Saver() init = tf.global_variables_initializer() with tf.Session() as sess: init.run() if not no_train_metric: train_dict = {self.row: batch_generator.train[:, 0], self.col: batch_generator.train[:, 1], self.val: batch_generator.train[:, 2], self.n_samples: 100, self.batch_scale: 1.0} if holdout_ratio is not None: test_dict = {self.row: batch_generator.test[:, 0], self.col: batch_generator.test[:, 1], self.val: batch_generator.test[:, 2], self.n_samples: 100, self.batch_scale: 1.0} for iteration in range(n_iterations): batch = batch_generator.next_batch() sess.run(train_op, feed_dict={self.row: batch[:, 0], self.col: batch[:, 1], self.val: batch[:, 2], self.n_samples: n_samples, self.batch_scale: len(batch_generator.train) / len(batch) }) if iteration % 20 == 0: print(iteration, end="") if not no_train_metric: train_ll_, train_elbo_ = sess.run([self.data_loglikel, self.elbo], feed_dict=train_dict) train_ll_summary_str, train_elbo_summary_str = sess.run([train_ll_summary, train_elbo_summary], feed_dict={train_ll: train_ll_, train_elbo: train_elbo_}) writer.add_summary(train_ll_summary_str, iteration) writer.add_summary(train_elbo_summary_str, iteration) print("\tTrain ELBO: %.4f" % train_elbo_, end="") print("\tTrain LL: %.4f" % train_ll_, end="") if holdout_ratio is not None: test_ll_ = sess.run(self.data_loglikel, feed_dict=test_dict) test_ll_summary_str = sess.run(test_ll_summary, feed_dict={test_ll: test_ll_}) writer.add_summary(test_ll_summary_str, iteration) print("\tTest LL: %.4f" % test_ll_) scalar_summaries_str = sess.run(scalar_summaries) array_summaries_str = sess.run(array_summaries) for summary_ in scalar_summaries_str + array_summaries_str: writer.add_summary(summary_, iteration) # save the model saver.save(sess, os.path.join(root_savedir, "model.ckpt")) # close the file writer writer.close()
def run( n_samples: int, version: str, task: str, modality: str, results_dir: str, triplets_dir: str, lmbda: float, batch_size: int, embed_dim: int, rnd_seed: int, device: torch.device, ) -> None: #load train triplets train_triplets, _ = load_data(device=device, triplets_dir=os.path.join( triplets_dir, modality)) #number of unique items in the data matrix n_items = torch.max(train_triplets).item() + 1 #initialize an identity matrix of size n_items x n_items for one-hot-encoding of triplets I = torch.eye(n_items) #get mini-batches for training to sample an equally sized synthetic dataset train_batches = BatchGenerator(I=I, dataset=train_triplets, batch_size=batch_size, sampling_method=None, p=None) #initialise model for i in range(n_samples): if version == 'variational': model = VSPoSE(in_size=n_items, out_size=embed_dim) else: model = SPoSE(in_size=n_items, out_size=embed_dim) #load weights of pretrained model model = load_model( model=model, results_dir=results_dir, modality=modality, version=version, dim=embed_dim, lmbda=lmbda, rnd_seed=rnd_seed, device=device, ) #move model to current device model.to(device) #probabilistically sample triplet choices given model ouput PMFs sampled_choices = validation( model=model, val_batches=train_batches, version=version, task=task, device=device, embed_dim=embed_dim, sampling=True, batch_size=batch_size, ) PATH = os.path.join(triplets_dir, 'synthetic', f'sample_{i+1:02d}') if not os.path.exists(PATH): os.makedirs(PATH) np.savetxt(os.path.join(PATH, 'train_90.txt'), sampled_choices)
from utils import BatchGenerator from net import Vgg19 import time import numpy as np #----------BatchGenerator----------# BATCH_SIZE = 16 epoch = 5 BG = BatchGenerator(batch_size=BATCH_SIZE) data = BG.get_data() NUM_BATCH = BG.len() print(NUM_BATCH, 'batches.') #----------Load pretrained network----------# vgg19 = Vgg19(vgg19_npy_path='./vgg19_fine_tuning.npy') rec_loss = np.load('rec_loss.npy').tolist() print('record loss loaded.') for e in range(epoch): print('epoch', e) BG.generate_batch(batch_size=BATCH_SIZE) loss_train = 0 accuracy_train = 0 for i in range(NUM_BATCH): batch_x, batch_y = BG.get(i) loss_batch, accuracy_batch = vgg19.train(batch_x, batch_y, lr=1e-5,
save_weights_only=False), ModelCheckpoint(monitor='val_loss', filepath=config["wieghts_file_bestval"], save_best_only=True, save_weights_only=True), ModelCheckpoint(monitor='val_loss', filepath=config["wieghts_file_lasttrain"], save_best_only=False, save_weights_only=True), TensorBoard(log_dir=config["tensorboar_log_dir"]), CSVLogger(config["logging_file"], append=True) ] # training from utils import BatchGenerator import numpy as np model.fit_generator( generator=BatchGenerator(trainX, trainY, config["batch_size"], augment=True), steps_per_epoch=np.ceil(float(len(trainX)) / config["batch_size"]), epochs=config["n_epochs"], verbose=1, callbacks=callbacks, validation_data=BatchGenerator(testX, testY, config["batch_size"], augment=False), validation_steps=np.ceil(float(len(testX)) / config["batch_size"]))
def main(args): train_df = pd.read_pickle(args.train_data) valid_df = pd.read_pickle(args.valid_data) train_df.fillna('NO_SUBTITLE', inplace=True) valid_df.fillna('NO_SUBTITLE', inplace=True) tokenizer = get_tokenizer( args.transfer, train_df.repl_words.tolist() + valid_df.repl_words.tolist()) model = Att_BiLSTM_CRF(vocab_size=len(tokenizer.vocab_word), tag_to_ix=tokenizer.vocab_tag, embedding_dim=args.word_emb_size, lstm1_units=args.lstm1_units, lstm2_units=args.lstm2_units) if args.transfer: bilm_model = BiLM(embedding_dim=args.bilm_emb_size, lstm_units=args.bilm_lstm_units, vocab_size=len(tokenizer.vocab_word)) model = transfer_weight(model, bilm_model, args.bilm_model_path) # choice CPU / GPU mode if torch.cuda.device_count() > 1: print("Use", torch.cuda.device_count(), "GPUs.") model = torch.nn.DataParallel(model) elif torch.cuda.device_count() == 1: print("Use single GPU.") else: print("Use CPU.") model.to(DEVICE) train_sentences, train_sentembs_hash, train_tag_seq = get_data( train_df, args.target_col, tokenizer) valid_sentences, valid_sentembs_hash, valid_tag_seq = get_data( valid_df, args.target_col, tokenizer) # create mini-batch generator if args.under_sampling: batch_generator = BatchGeneratorWithUnderSampling( tokenizer.vocab_tag, batch_size=args.batch_size, shuffle=True, negative_rate=args.under_sampling) else: batch_generator = BatchGenerator(batch_size=args.batch_size, shuffle=True) batch_generator.get_section_embs(train_df) batch_generator.get_section_embs(valid_df) print("Start training...") if args.early_stopping: early_stopping = EarlyStopping(patience=args.early_stopping) else: early_stopping = None train(model, (train_sentences, train_sentembs_hash, train_tag_seq), (valid_sentences, valid_sentembs_hash, valid_tag_seq), epochs=args.epochs, batch_generator=batch_generator, early_stopping=early_stopping) print("Save model") torch.save(model.state_dict(), args.output)
def test(self): run_flags = self.flags sess = self.sess sw = self.sw hr_img = tf.placeholder(tf.float32, [ None, run_flags.input_width * run_flags.scale, run_flags.input_width * run_flags.scale, 3 ]) # 128*128*3 as default lr_img = tf.placeholder( tf.float32, [None, run_flags.input_width, run_flags.input_length, 3 ]) # 64*64*3 as default myModel = Model(locals()) out_gen = Model.generative(myModel, lr_img) real_out_dis = Model.discriminative(myModel, hr_img) fake_out_dis = Model.discriminative(myModel, out_gen, reuse=True) cost_gen, cost_dis, var_train, var_gen, var_dis = \ Model.costs_and_vars(myModel, hr_img, out_gen, real_out_dis, fake_out_dis) # cost_gen, cost_dis, var_train, var_gen, var_dis = \ # Model.wgan_loss(myModel, hr_img, out_gen, real_out_dis, fake_out_dis) init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) saver = tf.train.Saver() try: saver.restore( sess, '/'.join(['models', run_flags.model, run_flags.model])) except: print('Model coult not be restored. Exiting.') exit() makedirs(run_flags.out_path) print('Saving test results ...') start = 0 for batch in BatchGenerator(run_flags.batch_size, self.datasize): batch_big = self.dataset[batch] / 255.0 batch_sml = array([imresize(img, size=(run_flags.input_width, run_flags.input_length, 3)) \ for img in batch_big]) superres_imgs = sess.run(out_gen, feed_dict={lr_img: batch_sml}) gc, dc = sess.run([cost_gen, cost_dis], \ feed_dict={hr_img : batch_big, lr_img : batch_sml}) images = concatenate( \ ( \ array([imresize(img, size=(128, 128, 3)) / 255.0 \ for img in batch_sml]), \ superres_imgs, batch_big \ ), 2) for idx, image in enumerate(images): imsave('%s/%d.png' % (run_flags.out_path, start + idx), image) start += run_flags.batch_size print('%d/%d saved successfully: Generative cost=%.9f, Discriminative cost=%.9f' % \ (min(start, self.datasize), self.datasize, gc, dc))
def train(self, N, row, col, T, n_features, n_pairwise_features, hidden_layer_sizes, n_iterations, batch_size, n_samples, holdout_ratio_valid, learning_rate, root_savedir, log_interval=10, no_train_metric=False, seed=None, debug=False): """ Training routine. Note about the data: the (row, col) tuples of the ON (i.e., one-valued) entries of the graph are to be passed, and they should correspond to the upper triangle of the graph. (Recall we do not allow self-links.) Regardless, the code will make a symmetric graph out of all passed entries (within the upper triangular or not) and only the upper triangle of the resulting matrix will be kept. :param N: Number of nodes in the graph. :param row: row indices corresponding to the ON entries (in the upper triangle). :param col: col indices corresponding to the ON entries (in the upper triangle). :param T: Truncation level for the DP. :param n_features: :param hidden_layer_sizes: :param n_iterations: :param batch_size: HALF the minibatch size. In particular, we will always add the symmetric entry in the graph (i.e., the corresponding entry in the lower triangle) in the minibatch. :param n_samples: :param holdout_ratio_valid: :param learning_rate: :param root_savedir: :param no_train_metric: :param seed: :param debug: :return: """ self.N = N self.T = T self.n_features = n_features self.n_pairwise_features = n_pairwise_features self.hidden_layer_sizes = hidden_layer_sizes if not os.path.exists(root_savedir): os.makedirs(root_savedir) # Data handling. X_sp = sp.csr_matrix((np.ones(len(row)), (row, col)), shape=[N, N]) X_sp = X_sp + X_sp.transpose() X_sp = sp.triu(X_sp, k=1) row, col = X_sp.nonzero() pairs = get_pairs(N, row, col) pairs = pairs.astype(int) batch_generator = BatchGenerator(pairs, batch_size, holdout_ratio=holdout_ratio_valid, seed=seed) # Construct the TF graph. self.construct_graph() all_vars = tf.trainable_variables() print("\nTrainable variables:") pprint([var_.name for var_ in all_vars]) train_op = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(-self.elbo) ### Create q(Z) variational parameters ### # before this was uniformly initialized # self.qZ_ = np.ones([N, T]) / T self.qZ_ = np.random.dirichlet(np.ones(T), size=N) # (N, T) # the following quantity needs to be passed to the TF graph and must be updated after every update to qZ sum_qZ_above = np.zeros([N, T - 1]) for k in range(T - 1): sum_qZ_above[:, k] = np.sum(self.qZ_[:, k + 1:], axis=1) # Training. if not no_train_metric: train_elbo = tf.placeholder(dtype=tf.float32, shape=[], name='train_elbo') train_elbo_summary = tf.summary.scalar('train_elbo', train_elbo) train_ll = tf.placeholder(dtype=tf.float32, shape=[], name='train_ll') train_ll_summary = tf.summary.scalar('train_ll', train_ll) if holdout_ratio_valid is not None: test_ll = tf.placeholder(dtype=tf.float32, shape=[], name='test_ll') test_ll_summary = tf.summary.scalar('test_ll', test_ll) # Grab all scalar variables, to track in Tensorboard. trainable_vars = tf.trainable_variables() scalar_summaries = [ tf.summary.scalar(tensor_.name, tensor_) for tensor_ in trainable_vars if len(tensor_.shape) == 0 ] tensor_summaries = [ tf.summary.histogram(tensor_.name, tensor_) for tensor_ in trainable_vars if len(tensor_.shape) > 0 ] root_logdir = os.path.join(root_savedir, "tf_logs") writer = tf.summary.FileWriter(root_logdir) saver = tf.train.Saver() init = tf.global_variables_initializer() with tf.Session() as sess: init.run() if not no_train_metric: # add symmetric entries from the lower triangle train_data = batch_generator.train row = np.concatenate([train_data[:, 0], train_data[:, 1]]) col = np.concatenate([train_data[:, 1], train_data[:, 0]]) val = np.concatenate([train_data[:, 2], train_data[:, 2]]) train_dict = { self.row: row, self.col: col, self.val: val, self.batch_scale: 1.0 } if holdout_ratio_valid is not None: test_data = batch_generator.test row = np.concatenate([test_data[:, 0], test_data[:, 1]]) col = np.concatenate([test_data[:, 1], test_data[:, 0]]) val = np.concatenate([test_data[:, 2], test_data[:, 2]]) test_dict = { self.row: row, self.col: col, self.val: val, self.batch_scale: 1.0 } logging.info("Starting training...") for iteration in range(n_iterations): batch = batch_generator.next_batch() batch_dict = { self.row: np.concatenate([batch[:, 0], batch[:, 1]]), self.col: np.concatenate([batch[:, 1], batch[:, 0]]), self.val: np.concatenate([batch[:, 2], batch[:, 2]]), self.qZ: self.qZ_, self.n_samples: n_samples, self.batch_scale: len(pairs) / len(batch), self.sum_qZ_above: sum_qZ_above, } # make a gradient update sess.run(train_op, feed_dict=batch_dict) # analytically self.update_qZ(sess=sess, batch=batch, n_samples=n_samples, debug=debug) # this update to sum_qZ_above was done at the beginning of the iteration. this implementation updates the sum_qZ_above before # logging the intermediate loss functions, and also one more time before saving the model. this actually makes more sense to me. # we could also just add this computation inside the construct graph function? it would have to be recomputed a few times more, but makes the code cleaner for k in range(T - 1): sum_qZ_above[:, k] = np.sum(self.qZ_[:, k + 1:], axis=1) if iteration % log_interval == 0: # Add scalar variables to Tensorboard. for summ_str in sess.run(scalar_summaries): writer.add_summary(summ_str, iteration) # Add tensor variables to Tensorboard. for summ_str in sess.run(tensor_summaries): writer.add_summary(summ_str, iteration) if not no_train_metric: train_dict.update({ self.qZ: self.qZ_, self.sum_qZ_above: sum_qZ_above, self.n_samples: 100 }) train_ll_, train_elbo_ = sess.run( [self.data_loglikel, self.elbo], feed_dict=train_dict) train_ll_summary_str, train_elbo_summary_str = sess.run( [train_ll_summary, train_elbo_summary], feed_dict={ train_ll: train_ll_, train_elbo: train_elbo_ }) writer.add_summary(train_ll_summary_str, iteration) writer.add_summary(train_elbo_summary_str, iteration) if holdout_ratio_valid is not None: test_dict.update({ self.qZ: self.qZ_, self.sum_qZ_above: sum_qZ_above, self.n_samples: 100 }) test_ll_ = sess.run(self.data_loglikel, feed_dict=test_dict) test_ll_summary_str = sess.run( test_ll_summary, feed_dict={test_ll: test_ll_}) writer.add_summary(test_ll_summary_str, iteration) # Log training overview. log_str = "%-4d" % iteration if not no_train_metric: log_str += " ELBO: %.4e Train ll: %.4e" % ( train_elbo_, train_ll_) if holdout_ratio_valid is not None: log_str += " Valid ll: %.4e" % test_ll_ logging.info(log_str) # save the model saver.save(sess, os.path.join(root_savedir, "model.ckpt")) # close the file writer writer.close()