def __init__(self): with tf.variable_scope('holders'): self.inp_holder = tf.placeholder(tf.float32, [None, 28, 28, 1]) self.lab_holder = tf.placeholder(tf.float32, [None, 10]) with tf.variable_scope('mainMod'): mod = M.Model(self.inp_holder) mod.convLayer(7, 64, stride=2, activation=M.PARAM_RELU) mod.convLayer(5, 128, stride=2, activation=M.PARAM_RELU) mod.capsulization(dim=16, caps=8) mod.caps_conv(3, 8, 16, activation=None, usebias=False) mod.caps_flatten() mod.squash() mod.capsLayer(10, 8, 3, BSIZE=128) mod.squash() feat = mod.capsDown() with tf.variable_scope('loss'): length = tf.norm(feat, axis=2) self.length = length loss = self.lab_holder * tf.square(tf.maximum( 0., 0.9 - length)) + 0.5 * (1 - self.lab_holder) * tf.square( tf.maximum(0., length - 0.1)) self.loss = tf.reduce_mean(tf.reduce_sum(loss, 1)) self.accuracy = M.accuracy(length, tf.argmax(self.lab_holder, 1)) with tf.variable_scope('opti'): self.train_op = tf.train.AdamOptimizer(0.001).minimize(self.loss) self.sess = tf.Session() M.loadSess(self.sess, './model/', init=True)
def build_graph(): img_holder = tf.placeholder(tf.float32,[None,28*28]) lab_holder = tf.placeholder(tf.float32,[None,10]) last_layer = build_model(img_holder) loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=lab_holder,logits=last_layer)) accuracy = M.accuracy(last_layer,tf.argmax(lab_holder,1)) train_step = tf.train.GradientDescentOptimizer(0.1).minimize(loss) return img_holder,lab_holder,loss,train_step,accuracy
def __init__(self, class_num, is_training=True, mod_dir='./model/'): self.mod_dir = mod_dir with tf.variable_scope('Input'): self.img_holder = tf.placeholder(tf.float32, [None, 128, 128, 3]) self.lab_holder = tf.placeholder(tf.float32, [None, class_num]) with tf.variable_scope('Res_101_cy'): mod = M.Model(self.img_holder) mod.set_bn_training(is_training) # 64x64 mod.convLayer(7, 64, stride=2, activation=M.PARAM_LRELU, batch_norm=True) mod.res_block(256, stride=1, activation=M.PARAM_LRELU) mod.res_block(256, stride=1, activation=M.PARAM_LRELU) mod.res_block(256, stride=1, activation=M.PARAM_LRELU) # 32x32 mod.res_block(512, stride=2, activation=M.PARAM_LRELU) mod.res_block(512, stride=1, activation=M.PARAM_LRELU) mod.res_block(512, stride=1, activation=M.PARAM_LRELU) mod.res_block(512, stride=1, activation=M.PARAM_LRELU) # 16x16 for i in range(14): mod.res_block(1024, stride=2, activation=M.PARAM_LRELU) # 8x8 mod.res_block(2048, stride=2, activation=M.PARAM_LRELU) mod.res_block(2048, stride=1, activation=M.PARAM_LRELU) mod.res_block(2048, stride=1, activation=M.PARAM_LRELU) mod.avgpoolLayer(8) mod.flatten() #mod.fcLayer(256,nobias=True) self.feat = mod.get_current_layer() with tf.variable_scope('Classification'): logit_layer, eval_layer = M.enforcedClassifier(self.feat, self.lab_holder, dropout=1, multi=None, L2norm=False) self.accuracy = M.accuracy(eval_layer, tf.argmax(self.lab_holder, -1)) if is_training: print('Building optimizer...') self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits( logits=logit_layer, labels=self.lab_holder)) with tf.control_dependencies(M.get_update_ops()): self.train_op = tf.train.AdamOptimizer(0.0001).minimize( self.loss) self.sess = tf.Session() self.saver = tf.train.Saver() M.loadSess(mod_dir, self.sess, init=True)
def train(): with tf.Graph().as_default(): global_step = tf.contrib.framework.get_or_create_global_step() with tf.device('/cpu:0'): images, labels = model.altered_input() logits = model.resnet(images, scope='resnet1', istrain=True) loss = model.loss(logits, labels, scope='loss1') accuracy = model.accuracy(logits, labels, scope='accuracy') train_op = model.train(loss, global_step) class Log(tf.train.SessionRunHook): def begin(self): self.step = -1 self.start_time = time.time() self.total_time = time.time() def before_run(self, run_context): self.step += 1 return tf.train.SessionRunArgs([loss, accuracy]) def after_run(self, run_context, run_values): ''' logs loss, examples per second, seconds per batch ''' if not self.step % write_frequency: curtime = time.time() #current time duration = curtime - self.start_time #start time total_dur = curtime - self.total_time ts = total_dur % 60 tm = (total_dur // 60) % 60 th = total_dur // 3600 self.start_time = curtime [loss, accuracy] = run_values.results ex_per_sec = write_frequency * batch_size / duration sec_per_batch = float(duration / write_frequency) string = ( 'step: %d, accuracy:%.3f loss: %.3f, examples/sec: %.2f, sec/batch: %1f, total time: %dh %dm %ds' ) print(string % (self.step, accuracy, loss, ex_per_sec, sec_per_batch, th, tm, ts)) with tf.train.MonitoredTrainingSession( checkpoint_dir=train_dir, #for checkpoint writing hooks=[ #things to do while running the session tf.train.StopAtStepHook(last_step=max_steps), tf.train.NanTensorHook(loss), Log() ], save_summaries_steps=100) as sess: while not sess.should_stop(): sess.run(train_op)
def main(ckpt = None): #with tf.Graph().as_default(): with tf.Session().graph.as_default(): keep_prob = tf.placeholder("float") # データ準備 images, labels, _ = data_input.load_data([FLAGS.train], FLAGS.batch_size, shuffle = True, distored = True) # モデル構築 logits = model.inference_deep(images, keep_prob, data_input.DST_LONG_SIZE,data_input.DST_SHORT_SIZE, data_input.NUM_CLASS) loss_value = model.loss(logits, labels) train_op = model.training(loss_value, FLAGS.learning_rate) acc = model.accuracy(logits, labels) saver = tf.train.Saver(max_to_keep = 0) sess = tf.Session() sess.run(tf.initialize_all_variables()) if ckpt: print 'restore ckpt', ckpt saver.restore(sess, ckpt) tf.train.start_queue_runners(sess) summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph_def) #モデル構築をモニタリング for step in range(FLAGS.max_steps): start_time = time.time() _, loss_result, acc_res = sess.run([train_op, loss_value, acc], feed_dict={keep_prob: 0.99}) duration = time.time() - start_time if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)') print (format_str % (datetime.now(), step, loss_result, examples_per_sec, sec_per_batch)) print 'acc_res', acc_res if step % 100 == 0: summary_str = sess.run(summary_op,feed_dict={keep_prob: 1.0}) summary_writer.add_summary(summary_str, step) if step % 1000 == 0 or (step + 1) == FLAGS.max_steps or loss_result == 0: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') save_path = saver.save(sess, checkpoint_path, global_step=step) print('%s saved' % save_path) if loss_result == 0: print('loss is zero') break
def __init__(self, age_size, id_num, model_path='./aim_model/'): self.model_path = model_path self.inp_holder = tf.placeholder(tf.float32, [None, 128, 128, 3]) # self.real_holder = tf.placeholder(tf.float32,[None,128,128,3]) self.uni_holder = tf.placeholder(tf.float32, [None, 2, 2, 512]) self.age_holder = tf.placeholder(tf.float32, [None, age_size]) self.target_holder = tf.placeholder(tf.float32, [None, 128, 128, 3]) self.id_holder = tf.placeholder(tf.float32, [None, id_num]) # get_feature self.feat = N.feat_encoder(self.inp_holder) # retrieve tensor for adv1 and ip adv1, ip = N.discriminator_f(self.feat, id_num) adv1_uni, _ = N.discriminator_f(self.uni_holder, id_num) # get attention A and C age_expanded = self.expand(self.age_holder, self.feat) aged_feature = tf.concat([age_expanded, self.feat], -1) self.A, self.C = N.generator_att(aged_feature) # construct synthesized image self.generated = self.A * self.C + (1. - self.A) * self.inp_holder # retrieve tensor for adv2 and ae adv2, age_pred = N.discriminator(self.generated, age_size) adv2_real, age_pred_real = N.discriminator(self.target_holder, age_size) # retrieve tensor for ai1 and ai2 ai1 = N.age_classify_r(self.feat, age_size) ai2 = N.age_classify(self.feat, age_size) # call loss builder functions print('Building losses...') self.build_loss_mc() self.build_loss_adv1(adv1, adv1_uni) self.build_loss_ip(ip) self.build_loss_adv2(adv2, adv2_real) self.build_loss_ae(age_pred, age_pred_real) self.build_loss_ai1(ai1) self.build_loss_ai2(ai2, age_size) self.build_loss_A() self.update_ops() self.accuracy = M.accuracy(ip, tf.argmax(self.id_holder, -1)) self.sess = tf.Session() M.loadSess(model_path, self.sess, init=True) self.saver = tf.train.Saver()
def test_accuracy(self): batches = self.loader.load(self.valset, batch_size=10, shuffle=True, to_tensor=True) model = Net(word_vocab_size=self.params.word_vocab_size, tag_vocab_size=self.params.tag_vocab_size, embedding_dim=self.params.embedding_dim, lstm_hidden_dim=self.params.lstm_hidden_dim) running_avg = RunningAvg() for batch in batches: inputs, targets = batch outputs = model(inputs) # should be used with no_grad() acc = accuracy(outputs.data, targets.data) running_avg.step(acc.item()) self.logger.debug('running accuracy: {}'.format(running_avg()))
def build_graph(): inpholder = tf.placeholder(tf.float32, [None, 28 * 28]) labholder = tf.placeholder(tf.float32, [None, 10]) out = build_model(inpholder) with tf.variable_scope('length'): length = tf.sqrt(tf.reduce_sum(tf.square(out), -1)) with tf.variable_scope('marg_loss'): marg_loss = labholder * tf.square(tf.maximum( 0., 0.9 - length)) + 0.5 * (1 - labholder) * tf.square( tf.maximum(0., length - 0.1)) marg_loss = tf.reduce_mean(tf.reduce_sum(marg_loss, 1)) with tf.variable_scope('opti'): train_step = tf.train.AdamOptimizer(0.0001).minimize(marg_loss) with tf.variable_scope('Accuracy'): acc = M.accuracy(length, tf.argmax(labholder, 1)) return inpholder, labholder, marg_loss, train_step, acc
def eval_sensitivity(model, dataloder, use_cuda=True): if use_cuda and torch.cuda.is_available(): from torch.backends import cudnn cudnn.benchmark = True device = torch.device('cuda:0') model.to(device) model.eval() criterion = nn.CrossEntropyLoss(size_average=False) total_loss, total_sensitivity, total_acc = 0, 0, 0 for idx, (data, target) in enumerate(dataloder): data = data.to(device).requires_grad_() target = target.to(device) output = model(data) loss_elem = criterion(output, target) total_acc += accuracy(output, target) total_loss += loss_elem.item() grad_x, = autograd.grad(loss_elem / data.size(0), data) for i in range(data.size(0)): total_sensitivity += torch.norm(grad_x[i]).item() avg_loss = total_loss / len(dataloder.dataset) avg_acc = total_acc / len(dataloder.dataset) avg_sensitivity = total_sensitivity / len(dataloder.dataset) return avg_loss, avg_acc, avg_sensitivity
def run_training(): """Create model graph and start training.""" with tf.Graph().as_default(): # Create a vartiable to count the number of times the optimizer has # run. This equals the number of batches processed. global_step = tf.Variable(0, trainable=False, name='global_step') # The number of digits the current batch is training. num_digits = tf.placeholder(tf.int32, name='num_digits') # Create images and labels pipeline. images, labels_array = input.training_input(num_digits, FLAGS.batch_size) # Build a Graph that computes predictions from the inference model. logits_array = model.inference(images, tf.constant(0.5)) # Calculate the loss and accuracy. loss = model.loss(num_digits, logits_array, labels_array) accuracy = model.accuracy(num_digits, logits_array, labels_array) # Build a Graph that trains the modelwith one batch of examples and # updates the model parameters. train_op = model.training(loss, global_step) hooks = [ NumDigitsHook(num_digits, accuracy), LoggerHook(num_digits, loss, accuracy), tf.train.NanTensorHook(loss) ] if FLAGS.max_steps > 0: hooks.append(tf.train.StopAtStepHook(last_step=FLAGS.max_steps)) with tf.train.MonitoredTrainingSession(checkpoint_dir=FLAGS.logdir, hooks=hooks) as sess: while not sess.should_stop(): sess.run([train_op])
def main(ckpt_path, csv='test.txt'): with tf.Graph().as_default(): images, labels, filename = data_input.load_data_for_test([csv], 610) #print 'start', images, labels keep_prob = tf.placeholder("float") logits = model.inference_deep(images, keep_prob, data_input.DST_LONG_SIZE, data_input.DST_SHORT_SIZE, data_input.NUM_CLASS) acc = model.accuracy(logits, labels) saver = tf.train.Saver() sess = tf.Session() sess.run(tf.initialize_all_variables()) saver.restore(sess, ckpt_path) tf.train.start_queue_runners(sess) acc_res, filename_res, actual_res, expect_res = sess.run( [acc, filename, logits, labels], feed_dict={keep_prob: 1.0}) print 'accuracy', acc_res return goods = [] bads = [] for idx, (act, exp) in enumerate(zip(actual_res, expect_res)): if np.argmax(act) == np.argmax(exp): goods.append(filename_res[idx]) else: bads.append(filename_res[idx]) print 'good' for f in goods: print 'cp', f, 'out_goods' print 'bad' for f in bads: print 'cp', f, 'out_bads'
def run_training(): """ Train the Classy model for a number of steps """ with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels for runway images, labels = rw.inputs(FLAGS.batch_size, NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN) # Batch normalization if FLAGS.batch_norm: phase_train = tf.Variable(True, trainable=False, dtype=tf.bool) images = batch_norm(images, 3, phase_train=phase_train) # Build a Graph that computes the logits predictions from the # inference model. logits = cl.inference(images, keep_prob=FLAGS.keep_prob, overlap_pool=FLAGS.overlap_pool) # Calculate loss. loss = cl.loss(logits, labels) # Calculate accuracy accuracy = cl.accuracy(logits, labels) cl.add_accuracy_summaries(accuracy) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = train(loss, global_step) # Create a saver. Store 2 files per epoch, plus 2 for the beginning and end of training saver = tf.train.Saver(tf.all_variables(), max_to_keep=FLAGS.num_epochs*2+2) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) # start the summary writer summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) # start the training! accuracies = [] losses = [] steps_per_epoch = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size) steps_per_checkpoint = int(steps_per_epoch / 2) max_steps = FLAGS.num_epochs * steps_per_epoch for step in range(max_steps): start_time = time.time() _, loss_value, acc_value = sess.run([train_op, loss, accuracy]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' losses.append(loss_value) accuracies.append(acc_value) if step % 10 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f, train_acc = %.2f, (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, acc_value, examples_per_sec, sec_per_batch)) if step % 100 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) np.save(os.path.join(FLAGS.train_dir, 'tr_losses'), np.array(losses)) np.save(os.path.join(FLAGS.train_dir, 'tr_accuracies'), np.array(accuracies)) # Save the model checkpoint periodically. if step % steps_per_checkpoint == 0 or (step + 1) == max_steps or _shutdown: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) if _shutdown: break print('Classy training finished!')
CLASS, dropout=0.15, enforced=True) with tf.name_scope('loss'): loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=labholder, logits=classlayer)) tf.summary.scalar('loss', loss) return classlayer, loss, imgholder, labholder, featurelayer, evallayer with tf.variable_scope('MainModel'): classlayer, loss, imgholder, labholder, featurelayer, evallayer = res_18() with tf.name_scope('accuracy'): acc = M.accuracy(evallayer, tf.argmax(labholder, 1)) tf.summary.scalar('accuracy', acc) with tf.name_scope('optimizer'): extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) train = tf.train.AdamOptimizer(0.0001).minimize(loss) modelpath = './resnet/' logpath = './log/' listFile = 'list.txt' validationDB = '/home/psl/7T/ms_clean_0/train1_235.hd5' def training(EPOC): with tf.Session() as sess: writer = tf.summary.FileWriter(logpath, sess.graph)
def main(): # pylint: disable=too-many-locals, too-many-statements """Create the RNN model and train it, outputting the text results. Periodically: (1) the training/evaluation set cost and accuracies are printed, and (2) the RNN is given a random input feed to output its own self-generated output text for our amusement. """ text = utils.retrieve_text(params.TEXT_FILE) chars = set(text) chars_size = len(chars) dictionary, reverse_dictionary = utils.build_dataset(chars) train_one_hots, eval_one_hots = utils.create_one_hots(text, dictionary) x = tf.placeholder(tf.float32, [None, params.N_INPUT * chars_size]) labels = tf.placeholder(tf.float32, [None, chars_size]) logits = model.inference(x, chars_size) cost = model.cost(logits, labels) optimizer = model.optimizer(cost) accuracy = model.accuracy(logits, labels) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter( params.SUMMARIES_DIR + '/train', sess.graph) eval_writer = tf.summary.FileWriter(params.SUMMARIES_DIR + '/eval') loss_total = 0 accuracy_total = 0 for epoch in range(params.EPOCHS): for index in range(0, len(train_one_hots) - params.N_INPUT - params.BATCH_SIZE, params.BATCH_SIZE): input_x, output_y = utils.create_training_io( train_one_hots, index, params.BATCH_SIZE, chars_size) _, acc, loss, summary = sess.run( [optimizer, accuracy, cost, merged], feed_dict={x: input_x, labels: output_y}) step = epoch * (len(train_one_hots) - params.N_INPUT) + index train_writer.add_summary(summary, step) loss_total += loss accuracy_total += acc if index % params.TRAINING_DISPLAY_STEP == 0 and index: print( 'Epoch: {} Training Step: {}\n' 'Training Set: Loss: {:.3f} ' 'Accuracy: {:.3f}'.format( epoch, index, loss_total * params.BATCH_SIZE / params.TRAINING_DISPLAY_STEP, accuracy_total * params.BATCH_SIZE / params.TRAINING_DISPLAY_STEP, ) ) loss_total = accuracy_total = 0 evaluation.evaluation(sess, step, eval_one_hots, x, labels, accuracy, cost, eval_writer, chars_size, merged) utils.create_example_text(sess, x, logits, chars, dictionary, reverse_dictionary)
def __init__(self, model_path='./aim_model_gen/'): self.model_path = model_path self.inp_holder = tf.placeholder(tf.float32, [None, 128, 128, 3]) self.age_holder = tf.placeholder(tf.float32, [None, 1]) self.age_holder2 = tf.placeholder(tf.float32, [None, 1]) # get attention A and C age_expanded = self.expand(self.age_holder, self.inp_holder) aged_feature = tf.concat([age_expanded, self.inp_holder], -1) A, C = N.generator_att(aged_feature) # construct synthesized image generated = A * C + (1. - A) * self.inp_holder # get attention A2 and C2 age_expanded2 = self.expand(self.age_holder2, generated) aged_feature2 = tf.concat([age_expanded2, generated], -1) A2, C2 = N.generator_att(aged_feature2) generated2 = A2 * C2 + (1. - A2) * generated # retrieve tensor for adv2 and ae adv2, age_pred = N.discriminator(generated) adv2_real, age_pred_real = N.discriminator(self.inp_holder) adv2_2, age_pred2 = N.discriminator(generated2) feat = N.feat_encoder(self.inp_holder) feat1 = N.feat_encoder(generated) feat2 = N.feat_encoder(generated2) self.feat_loss = tf.reduce_mean( tf.square(feat - feat1) + tf.square(feat - feat2)) self.train_feat = tf.train.AdamOptimizer(0.00001).minimize( self.feat_loss, var_list=M.get_all_vars('gen_att')) # get gradient penalty # gamma1 = tf.random_uniform([],0.0,1.0) # interp1 = gamma1 * generated + (1. - gamma1) * self.inp_holder # interp1_y, _ = N.discriminator(interp1, 7) # grad_p1 = tf.gradients(interp1_y, interp1)[0] # grad_p1 = tf.sqrt(tf.reduce_sum(tf.square(grad_p1),axis=[1,2,3])) # grad_p1 = tf.reduce_mean(tf.square(grad_p1 - 1.) * 10.) # gamma2 = tf.random_uniform([],0.0,1.0) # interp2 = gamma2 * generated + (1. - gamma2) * self.inp_holder # interp2_y, _ = N.discriminator(interp2, 7) # grad_p2 = tf.gradients(interp2_y, interp2)[0] # grad_p2 = tf.sqrt(tf.reduce_sum(tf.square(grad_p2),axis=[1,2,3])) # grad_p2 = tf.reduce_mean(tf.square(grad_p2 - 1.) * 10.) grad_p1 = grad_p2 = 0. # call loss builder functions self.mc_loss, self.train_mc = self.build_loss_mc( generated2, self.inp_holder) self.adv2_loss_d1, self.adv2_loss_g1, self.train_adv2_1 = self.build_loss_adv2( adv2, adv2_real, grad_p1) self.adv2_loss_d2, self.adv2_loss_g2, self.train_adv2_2 = self.build_loss_adv2( adv2_2, adv2_real, grad_p2) self.age_cls_loss_dis, self.train_ae_dis = self.build_loss_ae_dis( age_pred_real, self.age_holder2) self.age_cls_loss_gen, self.train_ae_gen = self.build_loss_ae_gen( age_pred, self.age_holder) self.age_cls_loss_gen2, self.train_ae_gen2 = self.build_loss_ae_gen( age_pred2, self.age_holder2) self.loss_A, self.train_A = self.build_loss_A(A) self.loss_A2, self.train_A2 = self.build_loss_A(A2) self.update_ops() self.accuracy = M.accuracy(age_pred_real, tf.argmax(self.age_holder2, -1)) self.A1_l, self.A2_l = tf.reduce_mean(tf.square(A)), tf.reduce_mean( tf.square(A2)) self.generated = generated self.A, self.C = A, C self.sess = tf.Session() M.loadSess(model_path, self.sess, init=True) M.loadSess('./aim_model/', self.sess, var_list=M.get_all_vars('encoder')) self.saver = tf.train.Saver()
def train(): if not os.path.isfile(train_data_pickle): # trainig data train_features, train_labels = features(['fold0', 'fold1', 'fold2']) traindata = TrainData(train_features, train_labels) with open(train_data_pickle, mode='wb') as f: pickle.dump(traindata, f) else: print("loading: %s" % (train_data_pickle)) with open(train_data_pickle, mode='rb') as f: traindata = pickle.load(f) train_features = traindata.train_inputs train_labels = traindata.train_targets if not os.path.isfile(test_data_pickle): test_features, test_labels = features(['fold3']) testdata = TestData(test_features, test_labels) with open(test_data_pickle, mode='wb') as f: pickle.dump(testdata, f) else: print("loading: %s" % (test_data_pickle)) with open(test_data_pickle, mode='rb') as f: testdata = pickle.load(f) test_features = testdata.test_inputs test_labels = testdata.test_targets # TODO change to use train and test train_labels = one_hot_encode(train_labels) test_labels = one_hot_encode(test_labels) # random train and test sets. train_test_split = np.random.rand(len(train_features)) < 0.70 train_x = train_features[train_test_split] train_y = train_labels[train_test_split] test_x = train_features[~train_test_split] test_y = train_labels[~train_test_split] n_dim = train_features.shape[1] print("input dim: %s" % (n_dim)) # create placeholder X = tf.placeholder(tf.float32, [None, n_dim]) Y = tf.placeholder(tf.float32, [None, FLAGS.num_classes]) # build graph logits = model.inference(X, n_dim) weights = tf.all_variables() saver = tf.train.Saver(weights) # create loss loss = model.loss(logits, Y) tf.scalar_summary('loss', loss) accracy = model.accuracy(logits, Y) tf.scalar_summary('test accuracy', accracy) # train operation train_op = model.train_op(loss) # variable initializer init = tf.initialize_all_variables() # get Session sess = tf.Session() # sumary merge and writer merged = tf.merge_all_summaries() train_writer = tf.train.SummaryWriter(FLAGS.summaries_dir) # initialize sess.run(init) for step in xrange(MAX_STEPS): t_pred = sess.run(tf.argmax(logits, 1), feed_dict={X: train_features}) t_true = sess.run(tf.argmax(train_labels, 1)) print("train samples pred: %s" % t_pred[:30]) print("train samples target: %s" % t_true[:30]) print('Train accuracy: ', sess.run(accracy, feed_dict={ X: train_x, Y: train_y })) for epoch in xrange(training_epochs): summary, logits_val, _, loss_val = sess.run( [merged, logits, train_op, loss], feed_dict={ X: train_x, Y: train_y }) train_writer.add_summary(summary, step) print("step:%d, loss: %s" % (step, loss_val)) y_pred = sess.run(tf.argmax(logits, 1), feed_dict={X: test_x}) y_true = sess.run(tf.argmax(test_y, 1)) print("test samples pred: %s" % y_pred[:10]) print("test samples target: %s" % y_true[:10]) accracy_val = sess.run([accracy], feed_dict={X: test_x, Y: test_y}) # print('Test accuracy: ', accracy_val) # train_writer.add_summary(accracy_val, step) p, r, f, s = precision_recall_fscore_support(y_true, y_pred, average='micro') print("F-score: %s" % f) if step % 1000 == 0: saver.save(sess, FLAGS.ckpt_dir, global_step=step)
labels_placeholder = tf.placeholder("float", shape=(None, NUM_CLASSES)) # dropout率を入れる仮のTensor keep_prob = tf.placeholder("float") # inference()を呼び出してモデルを作る logits = model.inference(images_placeholder, keep_prob) # loss()を呼び出して損失を計算 loss_value = model.loss(logits, labels_placeholder) # training()を呼び出して訓練して学習モデルのパラメーターを調整する train_op = model.training(loss_value, FLAGS.learning_rate) # 精度の計算 acc = model.accuracy(logits, labels_placeholder) # 保存の準備 saver = tf.train.Saver() # Sessionの作成(TensorFlowの計算は絶対Sessionの中でやらなきゃだめ) sess = tf.Session() # 変数の初期化(Sessionを開始したらまず初期化) sess.run(tf.global_variables_initializer()) # TensorBoard表示の設定(TensorBoardの宣言的な?) summary_op = tf.summary.merge_all() # train_dirでTensorBoardログを出力するpathを指定 summary_writer = tf.summary.FileWriter(FLAGS.train_dir, sess.graph)
def run_training(): """ Train the Listnr model for a number of steps """ with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels for runway # tr_frames_t, tr_labels_t = tm.inputs(FLAGS.batch_size) # ts_frames_t, ts_labels_t = tm.inputs(FLAGS.batch_size, train=False) # frames, labels = placeholder_inputs() frames, labels = tm.inputs(FLAGS.batch_size, NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN) # Build a Graph that computes the logits predictions from the # inference model. logits = md.inference(frames) # Calculate loss. looss = md.loss(logits, labels) # calculate accuracy accuracy = md.accuracy(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = train(looss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables(), max_to_keep=FLAGS.num_epochs) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) # run the training steps_per_epoch = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size) max_steps = FLAGS.num_epochs * steps_per_epoch losses_epochs = [] losses_batches = [] accuracies_epochs = [] accuracies_batches = [] for step in range(max_steps+1): start_time = time.time() _, loss_value, acc_value = sess.run([train_op, looss, accuracy]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 100 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f, train_acc = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, acc_value, examples_per_sec, sec_per_batch)) summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) losses_batches.append(loss_value) accuracies_batches.append(acc_value) # Save the model checkpoint periodically. if (step-1) % steps_per_epoch == 0 or (step + 1) == max_steps or _shutdown: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) #accuracies_epochs.append(np.mean(accuracies_batches)) #losses_epochs.append(np.mean(losses_batches)) # save accuracy and loss np.save(os.path.join(FLAGS.train_dir, 'tr_loss'), np.array(losses_batches)) np.save(os.path.join(FLAGS.train_dir, 'tr_accuracy'), np.array(accuracies_batches)) print('Saving model: ', (step-1) / steps_per_epoch) if _shutdown: break print('Listnr training finished!')
def evaluate_model(model, x_test, y_test): y_pred = model.predict(x_test) y_pred = backend.cast(y_pred, 'float32') current_accuracy = accuracy(y_test, y_pred) current_loss = loss(y_test, y_pred) return [current_loss, current_accuracy]
def train(): train_dir='/home/daijiaming/Galaxy/data3/trainset/' train_label_dir='/home/daijiaming/Galaxy/data3/train_label.csv' test_dir='/home/daijiaming/Galaxy/data3/testset/' test_label_dir='/home/daijiaming/Galaxy/data3/test_label.csv' train_log_dir = '/home/daijiaming/Galaxy/Dieleman/logs/train/' val_log_dir = '/home/daijiaming/Galaxy/Dieleman/logs//val/' tra_image_batch, tra_label_batch,tra_galalxyid_batch = input_data.read_galaxy11(data_dir=train_dir, label_dir=train_label_dir, batch_size= BATCH_SIZE) val_image_batch, val_label_batch,val_galalxyid_batch = input_data.read_galaxy11_test(data_dir=test_dir, label_dir=test_label_dir, batch_size= BATCH_SIZE) x = tf.placeholder(tf.float32, [BATCH_SIZE, 64, 64, 3]) y_ = tf.placeholder(tf.float32, [BATCH_SIZE, N_CLASSES]) keep_prob=tf.placeholder(tf.float32) logits,fc_output = model.inference(x, BATCH_SIZE, N_CLASSES,keep_prob) loss = model.loss(logits, y_) accuracy = model.accuracy(logits, y_) my_global_step = tf.Variable(0, name='global_step', trainable=False) train_op = model.optimize(loss, learning_rate, my_global_step) saver = tf.train.Saver(tf.global_variables()) summary_op = tf.summary.merge_all() init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) tra_summary_writer = tf.summary.FileWriter(train_log_dir, sess.graph) val_summary_writer = tf.summary.FileWriter(val_log_dir, sess.graph) try: for step in np.arange(MAX_STEP): if coord.should_stop(): break tra_images,tra_labels = sess.run([tra_image_batch, tra_label_batch]) _, tra_loss,tra_acc,summary_str = sess.run([train_op,loss, accuracy,summary_op],feed_dict={x:tra_images, y_:tra_labels,keep_prob:0.5}) if step % 50 == 0 or (step + 1) == MAX_STEP: print ('Step: %d, tra_loss: %.4f, tra_accuracy: %.2f%%' % (step, tra_loss, tra_acc)) # summary_str = sess.run(summary_op,feed_dict={x:tra_images, y_:tra_labels}) tra_summary_writer.add_summary(summary_str, step) if step % 200 == 0 or (step + 1) == MAX_STEP: val_images, val_labels = sess.run([val_image_batch, val_label_batch]) val_loss, val_acc, summary_str = sess.run([loss, accuracy,summary_op],feed_dict={x:val_images,y_:val_labels,keep_prob:1}) print('** Step %d, test_loss = %.4f, test_accuracy = %.2f%% **' %(step, val_loss, val_acc)) # summary_str = sess.run([summary_op],feed_dict={x:val_images,y_:val_labels}) val_summary_writer.add_summary(summary_str, step) if step % 2000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(train_log_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: coord.request_stop() coord.join(threads) sess.close()
def main(): _IMG_SIZE = 32 # _IMG_SIZE = 28 _IMG_CHANNEL = 3 _IMG_CLASS = 10 parser = argparse.ArgumentParser(description='alexnet') parser.add_argument('--data_dir', type=str, default='./cifar-10-batches-py/') parser.add_argument('--learning_rate', type=float, default=0.01) parser.add_argument('--batch_size', type=int, default=100) # must be integer times of total images_num parser.add_argument('--keepPro', type=float, default=0.5) parser.add_argument('--summary_dir', type=str, default='./summary/alexnetlog/') parser.add_argument('--max_epoch', type=int, default=40) parser.add_argument('--eval_freq', type=int, default=1) parser.add_argument('--save_freq', type=int, default=100) args = parser.parse_args() print(args) with tf.device('/gpu:1'): # ----------------------------------------------------------------------------- # BUILD GRAPH # ----------------------------------------------------------------------------- inputs, labels, dropout_keep_prob, learning_rate, is_training = alexnet.input_placeholder(_IMG_SIZE, _IMG_CHANNEL, _IMG_CLASS) logits = alexnet.interface(inputs, args.keepPro, _IMG_CLASS, is_training) accuracy = alexnet.accuracy(logits, labels) loss = alexnet.loss(logits, labels) train = alexnet.train(loss, learning_rate, 'RMSProp') # ready for summary or save merged = tf.summary.merge_all() saver = tf.train.Saver() print("[BUILD GRAPH] memory_usage=%f" % (resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024), file=sys.stderr) # ----------------------------------------------------------------------------- # LOAD DATA # ----------------------------------------------------------------------------- train_images, train_labels, test_images, test_labels = loader.load_batch_data(args.data_dir, args.batch_size) print("[LOAD DATA] memory_usage=%f" % (resource.getrusage(resource.RUSAGE_SELF).ru_maxrss / 1024), file=sys.stderr) # ----------------------------------------------------------------------------- # START THE SESSION # ----------------------------------------------------------------------------- cur_lr = args.learning_rate # current learning rate config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) train_writer = tf.summary.FileWriter(logdir=args.summary_dir + 'norm' + '/train/', graph=sess.graph) test_writer = tf.summary.FileWriter(logdir=args.summary_dir + 'norm' + '/test/') for epoch in range(args.max_epoch): start = timeit.default_timer() train_accs = [] train_losses = [] # train for index, images_batch in enumerate(train_images): _, summary, train_loss, train_acc = sess.run(fetches = [train, merged, loss, accuracy], feed_dict = { inputs: images_batch, labels: train_labels[index], dropout_keep_prob: args.keepPro, learning_rate: cur_lr, is_training: True }) train_accs.append(train_acc) train_losses.append(train_loss) print('[batch] {} done'.format(index), end='\r') train_avg_acc = float(np.mean(np.asarray(train_accs))) train_avg_loss = float(np.mean(np.asarray(train_losses))) train_summary = tf.Summary(value=[tf.Summary.Value(tag="accuracy", simple_value=train_avg_acc), tf.Summary.Value(tag="avg_loss", simple_value=train_avg_loss), tf.Summary.Value(tag="learning_rate", simple_value=cur_lr)]) train_writer.add_summary(summary, epoch) train_writer.add_summary(train_summary, epoch) print('=' * 20 + 'EPOCH {} [TRAIN]'.format(epoch) + '=' * 20) print('cost time: {:.3f}s'.format(timeit.default_timer()-start)) print('acc: {0}, avg_loss: {1}'.format(train_avg_acc, train_avg_loss)) # evaluate if (epoch + 1) % args.eval_freq == 0: test_accs = [] test_losses = [] for index, test_images_batch in enumerate(test_images): test_loss, test_acc = sess.run(fetches = [loss, accuracy], feed_dict = { inputs: test_images_batch, labels: test_labels[index], dropout_keep_prob: args.keepPro, learning_rate: cur_lr, is_training: False}) test_accs.append(test_acc) test_losses.append(test_loss) test_avg_acc = float(np.mean(np.asarray(test_accs))) test_avg_loss = float(np.mean(np.asarray(test_losses))) test_summary = tf.Summary(value=[tf.Summary.Value(tag="accuracy", simple_value=test_avg_acc), tf.Summary.Value(tag="avg_loss", simple_value=test_avg_loss)]) test_writer.add_summary(test_summary, epoch) print('=' * 20 + 'EPOCH {} [EVAL]'.format(epoch) + '=' * 20) print('acc: {0}, avg_loss: {1}'.format(test_avg_acc, test_avg_loss)) # lr decay cur_lr = lr(cur_lr, epoch) # save if (epoch + 1) % args.save_freq == 0: checkpoint_file = args.summary_dir + 'model.ckpt' saver.save(sess, checkpoint_file, global_step=epoch) print('Saved checkpoint') train_writer.close() test_writer.close()
IMAGE_WIDTH = 100 IMAGE_HEIGHT = 100 LEARNING_RATE = 1e-3 BATCH_SIZE = 150 ITERATOR = 300 # step = ITERATOR * TOTAL_IMAGE_COUNT / BATCH_SIZE TOTAL_IMAGE_COUNT = 1.4e5 # TOTAL_IMAGE_COUNT = 10000 if __name__ == '__main__': train_batch_images, train_batch_labels = image_reader.get_train_batch( PATH_TRAIN, IMAGE_WIDTH, IMAGE_HEIGHT, BATCH_SIZE) logits = model.inference(train_batch_images, 4, 0.75) loss = model.loss(logits, train_batch_labels) # accuracy = model.accuracy(model.inference(test_batch_images, 4), test_batch_labels) accuracy = model.accuracy(logits, train_batch_labels) train = model.train(loss, LEARNING_RATE) summary = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(PATH_SUMMARY, tf.get_default_graph()) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) checkpoint = tf.train.get_checkpoint_state(PATH_MODEL) if checkpoint and checkpoint.model_checkpoint_path: saver.restore(sess, checkpoint.model_checkpoint_path) print("Load last model successfully.") coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord)
def run_training(): """ Train the Listnr model for a number of steps """ with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels for runway # tr_frames_t, tr_labels_t = tm.inputs(FLAGS.batch_size) # ts_frames_t, ts_labels_t = tm.inputs(FLAGS.batch_size, train=False) # frames, labels = placeholder_inputs() frames, labels = tm.inputs(FLAGS.batch_size, NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN) # Build a Graph that computes the logits predictions from the # inference model. logits = md.inference(frames) # Calculate loss. looss = md.loss(logits, labels) # calculate accuracy accuracy = md.accuracy(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = train(looss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables(), max_to_keep=FLAGS.num_epochs) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) # run the training steps_per_epoch = int(NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN / FLAGS.batch_size) max_steps = FLAGS.num_epochs * steps_per_epoch losses_epochs = [] losses_batches = [] accuracies_epochs = [] accuracies_batches = [] for step in range(max_steps + 1): start_time = time.time() _, loss_value, acc_value = sess.run([train_op, looss, accuracy]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 100 == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f, train_acc = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, acc_value, examples_per_sec, sec_per_batch)) summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) losses_batches.append(loss_value) accuracies_batches.append(acc_value) # Save the model checkpoint periodically. if (step - 1) % steps_per_epoch == 0 or ( step + 1) == max_steps or _shutdown: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) #accuracies_epochs.append(np.mean(accuracies_batches)) #losses_epochs.append(np.mean(losses_batches)) # save accuracy and loss np.save(os.path.join(FLAGS.train_dir, 'tr_loss'), np.array(losses_batches)) np.save(os.path.join(FLAGS.train_dir, 'tr_accuracy'), np.array(accuracies_batches)) print('Saving model: ', (step - 1) / steps_per_epoch) if _shutdown: break print('Listnr training finished!')
def train(): with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get train images and labels float_image, label = tfrecord.train_data_read( tfrecord_path=FLAGS.train_data) images, labels = tfrecord.create_batch(float_image, label, count_num=FLAGS.train_num) # Get evaluate images and labels eval_float_image, eval_label = tfrecord.eval_data_read( tfrecord_path=FLAGS.eval_data_dir) eval_images, eval_labels = tfrecord.create_batch( eval_float_image, eval_label, count_num=FLAGS.eval_num) # Model inference x = tf.placeholder(tf.float32, [ FLAGS.batch_size, FLAGS.image_width, FLAGS.image_height, FLAGS.image_channels ], name='x_input') y_ = tf.placeholder(tf.int32, [FLAGS.batch_size, None], name='y_input') keep_prob = tf.placeholder(tf.float32) #logits = model.inference(images, FLAGS.keep_prob) logits = model.inference(x, keep_prob) # loss computing loss = model.loss(logits, y_) # accuracy compution #accuracy = model.accuracy(model.inference(eval_images, 1), eval_labels) accuracy = model.accuracy(logits, y_) # train model train_op = model.train(loss, global_step) # save model saver = tf.train.Saver(tf.global_variables()) # merge all summaries summary_op = tf.summary.merge_all() # initialize all variables init = tf.initialize_all_variables() # Run session sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # start queue runners tf.train.start_queue_runners(sess=sess) summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph_def=sess.graph_def) for step in xrange(FLAGS.max_steps): if step % 10 == 0: imgs, lbls = sess.run([eval_images, eval_labels]) summary_str, acc = sess.run( [summary_op, accuracy], feed_dict={ x: imgs, y_: np.reshape(lbls, (FLAGS.batch_size, -1)), keep_prob: 1.0 }) summary_writer.add_summary(summary_str, step) print('%s: step %d, accuracy = %.3f' % (datetime.now(), step, acc)) else: imgs, lbls = sess.run([images, labels]) if step % 100 == 99 or (step + 1) == FLAGS.max_steps: summary_str, _ = sess.run( [summary_op, train_op], feed_dict={ x: imgs, y_: np.reshape(lbls, (FLAGS.batch_size, -1)), keep_prob: FLAGS.keep_prob }) summary_writer.add_summary(summary_str, step) checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) else: start_time = time.time() _, loss_value = sess.run( [train_op, loss], feed_dict={ x: imgs, y_: np.reshape(lbls, (FLAGS.batch_size, -1)), keep_prob: FLAGS.keep_prob }) duration = time.time() - start_time assert not np.isnan( loss_value), 'Model diverged with loss = NaN' num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch))
def main(_): learning_rate = FLAGS.learning_rate learning_rate_decay_factor = FLAGS.learning_rate_decay_factor num_layers = FLAGS.num_layers num_steps = FLAGS.num_steps embedding_size = FLAGS.embedding_size hidden_size = FLAGS.hidden_size keep_prob = FLAGS.keep_prob batch_size = FLAGS.batch_size vocab_size = FLAGS.vocab_size clip_norm = FLAGS.clip_norm num_classes = FLAGS.num_classes checkpoint_path = FLAGS.checkpoint_path tensorboard_path = FLAGS.tensorboard_path tfrecords_path = FLAGS.tfrecords_path train_tfrecords_filename = os.path.join(tfrecords_path, 'train.tfrecords') validate_tfrecords_filename = os.path.join(tfrecords_path, 'validate.tfrecords') train_data = tfrecords_utils.read_and_decode(train_tfrecords_filename) train_batch_features, train_batch_labels, train_words_len_batch = train_data validate_data = tfrecords_utils.read_and_decode(validate_tfrecords_filename) validate_batch_features, validate_batch_labels, validate_words_len_batch = validate_data with tf.device('/cpu:0'): global_step = tf.Variable(0, name='global_step', trainable=False) # Decay the learning rate exponentially based on the number of steps. decay_steps = FLAGS.decay_steps lr = tf.train.exponential_decay(learning_rate, global_step, decay_steps, learning_rate_decay_factor, staircase=True) optimizer = tf.train.RMSPropOptimizer(lr) with tf.variable_scope('model'): logits, final_state = model.inference(train_batch_features, batch_size, num_steps, vocab_size, embedding_size, hidden_size, keep_prob, num_layers, num_classes, is_training=True) train_batch_labels = tf.to_int64(train_batch_labels) # Loss of cross entropy between logits and labels # slice_logits, slice_train_batch_labels = model.slice_seq(logits, train_batch_labels, train_words_len_batch, # batch_size, num_steps) # loss = model.loss(slice_logits, slice_train_batch_labels) # Loss of crf loss = model.crf_loss(logits, train_batch_labels, batch_size, num_steps, num_classes) with tf.variable_scope('model', reuse=True): accuracy_logits, final_state_valid = model.inference(validate_batch_features, batch_size, num_steps, vocab_size, embedding_size, hidden_size, keep_prob, num_layers, num_classes, is_training=False) validate_batch_labels = tf.to_int64(validate_batch_labels) slice_accuracy_logits, slice_validate_batch_labels = model.slice_seq(accuracy_logits, validate_batch_labels, validate_words_len_batch, batch_size, num_steps) accuracy = model.accuracy(slice_accuracy_logits, slice_validate_batch_labels) # summary tf.summary.scalar('loss', loss) tf.summary.scalar('accuracy', accuracy) tf.summary.scalar('lr', lr) if not os.path.exists(tensorboard_path): os.makedirs(tensorboard_path) # compute and update gradient # train_op = optimizer.minimize(loss, global_step=global_step) # computer, clip and update gradient gradients, variables = zip(*optimizer.compute_gradients(loss)) clip_gradients, _ = tf.clip_by_global_norm(gradients, clip_norm) train_op = optimizer.apply_gradients(zip(clip_gradients, variables), global_step=global_step) init_op = tf.global_variables_initializer() saver = tf.train.Saver(max_to_keep=None) if not os.path.exists(checkpoint_path): os.makedirs(checkpoint_path) checkpoint_filename = os.path.join(checkpoint_path, 'model.ckpt') with tf.Session() as sess: summary_op = tf.summary.merge_all() writer = tf.summary.FileWriter(tensorboard_path, sess.graph) sess.run(init_op) ckpt = tf.train.get_checkpoint_state(checkpoint_path) if ckpt and ckpt.model_checkpoint_path: print('Continue training from the model {}'.format(ckpt.model_checkpoint_path)) saver.restore(sess, ckpt.model_checkpoint_path) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord, sess=sess) start_time = datetime.datetime.now() try: while not coord.should_stop(): _, loss_value, step = sess.run([train_op, loss, global_step]) if step % 100 == 0: accuracy_value, summary_value, lr_value = sess.run([accuracy, summary_op, lr]) end_time = datetime.datetime.now() print('[{}] Step: {}, loss: {}, accuracy: {}, lr: {}'.format(end_time - start_time, step, loss_value, accuracy_value, lr_value)) if step % 1000 == 0: writer.add_summary(summary_value, step) saver.save(sess, checkpoint_filename, global_step=step) print 'save model to ' + checkpoint_filename + '-' + str(step) start_time = end_time except tf.errors.OutOfRangeError: print('Done training after reading all data') finally: coord.request_stop() coord.join(threads)
import model # PATH_EVAL = "data/train" PATH_EVAL = "data/test" PATH_SUMMARY = "log/summary" PATH_MODEL = "log/model_dog&cat" BATCH_SIZE = 100 TOTAL_IMAGE_COUNT = 10000 if __name__ == '__main__': test_batch_images, test_batch_labels = image_reader.get_eval_batch( PATH_EVAL, BATCH_SIZE) logits = model.inference(test_batch_images, 4, 1) accuracy = model.accuracy(logits, test_batch_labels) saver = tf.train.Saver() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) checkpoint = tf.train.get_checkpoint_state(PATH_MODEL) if checkpoint and checkpoint.model_checkpoint_path: saver.restore(sess, checkpoint.model_checkpoint_path) print("Load last model successfully.") coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess, coord) try: print("Start eval...") step_count = int(TOTAL_IMAGE_COUNT / BATCH_SIZE) accuracy_total = []
for epoch_cnt in range(10): idxs = np.arange(len(x_train)) np.random.shuffle(idxs) for batch_cnt in range(0, len(x_train) // batch_size): optim.zero_grad() start_ind = batch_cnt*batch_size batch_indices = idxs[start_ind : start_ind+batch_size] batch = x_train[batch_indices] # random batch of our training data pred = model(torch.Tensor(batch).to(device)) pred_true = y_train[batch_indices] loss = loss_fn(pred, torch.from_numpy(pred_true).long().to(device)) loss.backward() acc = m.accuracy(pred.cpu(), pred_true) optim.step() # TODO uncomment in jupyter notebook plotter.set_train_batch({"loss" : loss.item(), "accuracy" : acc}, batch_size = batch_size) # TODO uncomment in jupyter notebook test_idxs = np.arange(len(x_test)) for batch_cnt in range(0, (len(x_test) // batch_size)): start_ind = batch_cnt*batch_size batch_indices = test_idxs[start_ind : start_ind+batch_size] batch = x_test[batch_indices]
def main(unused_args): if FLAGS.force_use_cpu: os.environ['CUDA_VISIBLE_DEVICES'] = '' ps_hosts = FLAGS.ps_hosts.split(',') worker_hosts = FLAGS.worker_hosts.split(',') tf.logging.info('Worker hosts are: %s' % worker_hosts) cluster_spec = tf.train.ClusterSpec({ 'ps': ps_hosts, 'worker': worker_hosts }) server = tf.train.Server(cluster_spec.as_dict(), job_name=FLAGS.job_name, task_index=FLAGS.task_id, protocol=FLAGS.protocol) if FLAGS.job_name == 'ps': print "I'm a parameter server." server.join() else: dataset = CarsData(subset=FLAGS.subset) assert dataset.data_files() if FLAGS.task_id == 0: if not tf.gfile.Exists(FLAGS.train_dir): tf.gfile.MakeDirs(FLAGS.train_dir) with tf.device( tf.train.replica_device_setter( worker_device="/job:worker/task:%d" % FLAGS.task_id, cluster=cluster_spec)): print "I'm a worker" images_placeholder = tf.placeholder(dtype=tf.float32, shape=(None, 3, 224, 224), name="images_placeholder") labels_placeholder = tf.placeholder(dtype=tf.int32, shape=(None), name="labels_placeholder") images, labels = image_processing.batch_inputs( dataset=dataset, batch_size=FLAGS.batch_size, num_preprocess_threads=FLAGS.num_preprocess_threads, train=True, regular=True) logits, predictions = model.inference(images_placeholder) loss = model.loss(logits, labels_placeholder) accuracy = model.accuracy(logits, labels_placeholder) global_step = tf.contrib.framework.get_or_create_global_step() opt = tf.train.GradientDescentOptimizer(learning_rate=0.05) num_workers = len(cluster_spec.as_dict()['worker']) print "Number of workers: %d" % num_workers opt = tf.train.SyncReplicasOptimizer( opt, replicas_to_aggregate=num_workers, total_num_replicas=num_workers, use_locking=True, name='sync_replicas') train_op = opt.minimize(loss=loss, global_step=global_step) is_chief = (FLAGS.task_id == 0) print "IsChief: %s" % is_chief sync_replicas_hook = opt.make_session_run_hook( is_chief=is_chief, num_tokens=num_workers) last_step_hook = tf.train.StopAtStepHook(num_steps=FLAGS.max_steps) hooks = [sync_replicas_hook, last_step_hook] with tf.train.MonitoredTrainingSession( config=tf.ConfigProto( allow_soft_placement=True, log_device_placement=FLAGS.log_device_placement), master=server.target, is_chief=is_chief, checkpoint_dir=FLAGS.train_dir, hooks=hooks, stop_grace_period_secs=120) as mon_session: while not mon_session.should_stop(): image_feed, label_feed = mon_session.run([images, labels]) # Need to check mon_session.should_stop after each session.run to avoid # errors when calling run after stop if (mon_session.should_stop()): break # Convert from NHWC to NCHW format image_feed_NCHW = np.transpose(image_feed, (0, 3, 1, 2)) feed_dict = { images_placeholder: image_feed_NCHW, labels_placeholder: label_feed } print "===================================================" print "Running train op" _, current_loss, current_step, current_accuracy = \ mon_session.run([train_op, loss, global_step, accuracy], feed_dict = feed_dict) print "Current step: %s" % current_step print "Current loss: %.2f" % current_loss print "Current accuracy: %.4f" % current_accuracy print "==================================================="
x = tf.placeholder(tf.float32, shape=[None, w, h, c], name='x') # tensor y_ = tf.placeholder(tf.int32, shape=[ None, ], name='y_') keep_prob = tf.placeholder(tf.float32, name='kp') regularizer = tf.contrib.layers.l2_regularizer(0.0001) logits = model.inference(x, regularizer, keep_prob) # (小处理)将logits乘以1赋值给logits_eval,定义name,方便在后续调用模型时通过tensor名字调用输出tensor b = tf.constant(value=1, dtype=tf.float32) logits_eval = tf.multiply(logits, b, name='logits_eval') cross_entropy = model.loss(logits, y_) train_op = model.training(cross_entropy, 0.001) acc = model.accuracy(logits, y_) merged = tf.summary.merge_all() saver = tf.train.Saver(max_to_keep=1) x_train, y_train, x_val, y_val = input_data.get_batch() iter_num = int(len(x_train) / batch_size) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() # 开启多线程 threads = tf.train.start_queue_runners( sess=sess, coord=coord) # 这里指代的是读取数据的线程,如果不加的话队列一直挂起 train_writer = tf.summary.FileWriter('log' + '/train', sess.graph) test_writer = tf.summary.FileWriter('log' + '/val')
def main(argv=None): ''' main process for running image recongization unless specified, channels will be 3 (colored) ''' graph = tf.Graph() with graph.as_default(): #placeholders for image_batch and label batch image_placeholder = tf.placeholder(tf.float32, shape=[None, IMAGE_PIXELS]) label_placeholder = tf.placeholder(tf.int32, shape=[FLAGS.batch_size]) keep_prob = tf.placeholder(tf.float32) #logits for learning logits, weight, bias = model.inference(image_placeholder, keep_prob=keep_prob) logits = tf.identity(logits, name='out_node') #loss for learning loss = model.loss(label_placeholder, logits) #optimazition for learning global_step = tf.Variable(0, name="global_step", trainable=False) train_op = model.training(loss, global_step) #accuracy acc = model.accuracy(logits, label_placeholder) #read tfrecords tfrecords = helper.getTFrecords(FLAGS.data_dir_mo) labels, image_paths = helper.readTFrecord(tfrecords) labels_batch, images_batch = helper.batched(labels, image_paths, FLAGS.batch_size) #creave saver saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.max_to_keep) #for tensor board and learned data tf.summary.FileWriterCache.clear() #performance run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() #merge all graphs summary = tf.summary.merge_all() with tf.Session(graph=graph) as sess: #debugging #sess = tf_debug.LocalCLIDebugWrapperSession(sess) #check for checkpoint files if FLAGS.restudy_old_model == 1: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt is not None: last_model = ckpt.model_checkpoint_path print("Loading Last saved Model: " + last_model) saver.restore(sess, last_model) else: #initialize both global and local variables sess.run([ tf.global_variables_initializer(), tf.local_variables_initializer() ]) else: #initialize both global and local variables sess.run([ tf.global_variables_initializer(), tf.local_variables_initializer() ]) #write train summaries for tensorboard writer = tf.summary.FileWriter(FLAGS.checkpoint_dir, sess.graph) #begin learning and print the loss for each steps #prepare threads to obtain data from batches coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: start_time_total = time.time() for step in range(1, FLAGS.max_steps + 1): start_time = time.time() #batches labels, images = sess.run([labels_batch, images_batch]) _ = sess.run( [train_op], feed_dict={ image_placeholder: images, label_placeholder: labels, keep_prob: 1.0 }) trained_accurcy, train_loss, summary_str = sess.run( [acc, loss, summary], feed_dict={ image_placeholder: images, label_placeholder: labels, keep_prob: 0.5 }, options=run_options, run_metadata=run_metadata) writer.add_summary(summary_str, step) writer.flush() step_stats = run_metadata.step_stats tl = timeline.Timeline(step_stats) ctf = tl.generate_chrome_trace_format(show_memory=False, show_dataflow=True) with open(FLAGS.log_dir + "timeline.json", "w") as f: f.write(ctf) duration = time.time() - start_time format_learning = '%s: step %d, loss = %.5f , accuracy %.5f (%.3f sec/batch)' print(format_learning % (datetime.now(), step, train_loss, trained_accurcy, duration)) if step % 100 == 0: save_path = saver.save(sess, FLAGS.checkpoint_dir + 'model', global_step=step) print("The model is saved to the file: %s" % save_path) except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: #print total time took to learn total_time = time.time() - start_time_total print('total time took to learn (%.3f sec)' % (total_time)) #saving the final result save_path = saver.save(sess, FLAGS.checkpoint_dir + 'model', global_step=step) print("The model is saved to the file: %s" % save_path) #for creating pb file if FLAGS.save_as_pb == 1: g_2 = tf.Graph() with g_2.as_default(): x_2 = tf.placeholder(tf.float32, shape=[None, IMAGE_PIXELS], name='input') x_2 = tf.reshape(x_2, [-1, 1024]) W_2 = tf.constant(sess.run(weight), name='weight') b_2 = tf.constant(sess.run(bias), name='bias') y_2 = tf.nn.softmax(tf.matmul(x_2, W_2) + b_2, name='output') sess_2 = tf.Session() init_2 = tf.global_variables_initializer() sess_2.run(init_2) graph_def = g_2.as_graph_def() date = datetime.now().strftime('%Y%m%d') tf.train.write_graph(graph_def, FLAGS.checkpoint_dir, date + '.pb', as_text=False) print('Pb file was save to and as ' + FLAGS.checkpoint_dir + date + '.pb') coord.request_stop() coord.join(threads) #testing the checkpoint file accuracy graph_3 = tf.Graph() with graph_3.as_default(): image_placeholder_3 = tf.placeholder(tf.float32, shape=[None, IMAGE_PIXELS]) keep_prob_3 = tf.placeholder(tf.float32) image_path = helper.get_latest_modified_file_path(FLAGS.test_image_dir) print("Checkpoint: using image is: " + image_path) input = helper._getImage(image_path) input = tf.reshape(input, [-1, IMAGE_PIXELS]) logits_3 = predictor.pred_inference(image_placeholder_3, keep_prob=keep_prob_3) with tf.Session() as sess_3: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt is not None: saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.max_to_keep) last_model = ckpt.model_checkpoint_path print("Loading Last saved Model: " + last_model) saver.restore(sess_3, last_model) pred = np.argmax( logits_3.eval(feed_dict={ image_placeholder_3: input.eval(), keep_prob_3: 1.0 })[0]) if pred in LABELS: print("Predcition using checkpoint is " + str(LABELS[pred])) else: print("failed to predict") else: print("Failed to load the last saved Model") #testing pb file if FLAGS.save_as_pb == 1: with tf.gfile.FastGFile(FLAGS.checkpoint_dir + date + '.pb', 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) _ = tf.import_graph_def(graph_def, name='') with tf.Session() as sess_4: image_path = helper.get_latest_modified_file_path( FLAGS.test_image_dir) print("Protocal Buff: using image is: " + image_path) input = helper._getImage(image_path) input = tf.reshape(input, [-1, IMAGE_PIXELS]) prediction = np.argmax( sess_4.run('output:0', {'input:0': input.eval()})) if pred in LABELS: print("Predcition using pb is " + str(LABELS[prediction])) else: print("failed to predict using pb file")
test_dataset = torchvision.datasets.ImageFolder(root=dataset_paths['dunnings'], transform=tr) #test_dataset.class_to_idx = {'fire': 1, 'nofire': 0} # for dunnings test = torch.utils.data.DataLoader(test_dataset, batch_size=64, num_workers=0, shuffle=True) device = torch.device("cuda:0") test_acc = [] print('loading model...') model = torch.load(weight_path) model = model.to(device) for param in model.parameters(): param.requires_grad = False for i, data in enumerate(test): print(f'testing batch {i}/{len(test)}') inputs = data[0].to(device) labels = (~torch.tensor(data[1], dtype=torch.bool)).to(device) outputs = model(inputs) a = accuracy(outputs, labels) test_acc.append(a) print(average(test_acc))
def eval(num_units, threshold): # Load the active filters w1, w2, indices = read_active_filters(num_units, threshold) with tf.Graph().as_default() as g: # Load the MNIST dataset mnist = input_data.read_data_sets(DATA_DIR, one_hot=False) np.random.seed(seed=SEED) # Insert placeholders for input images and labels. with tf.device('/cpu:0'): images_train = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE**2], name='train_images') labels_train = tf.placeholder(tf.int64, shape=[None], name='train_labels') images_val = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE**2], name='validation_images') labels_val = tf.placeholder(tf.int64, shape=[None], name='validation_labels') images_test = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE**2], name='test_images') labels_test = tf.placeholder(tf.int64, shape=[None], name='test_labels') # Build a Graph to perform the inference. weights = tf.constant(w1) biases = tf.constant(w2) logits_train = model.inference_eval(images_train, weights, biases, indices) logits_val = model.inference_eval(images_val, weights, biases, indices) logits_test = model.inference_eval(images_test, weights, biases, indices) # Compute the total loss. loss = model.loss(logits_train, labels_train) # Compute the accuracy accuracy_val = model.accuracy(logits_val, labels_val) accuracy_test = model.accuracy(logits_test, labels_test) # Write summaries to the disk summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(EVAL_DIR, g) # Demand GPU resources gpu_options = tf.GPUOptions(allow_growth=True) with tf.Session(config=tf.ConfigProto( gpu_options=gpu_options, log_device_placement=False)) as sess: # Load the dataset val_images, val_labels = mnist.validation.images, mnist.validation.labels test_images, test_labels = mnist.test.images, mnist.test.labels batch_images, batch_labels = mnist.train.next_batch(BATCH_SIZE) accuracy_val_value = 0.0 accuracy_test_value = 0.0 for i in range(10): if i < 5: accuracy_val_value += sess.run( accuracy_val, feed_dict={ images_train: batch_images, labels_train: batch_labels, images_val: val_images[i * 1000:(i + 1) * 1000], labels_val: val_labels[i * 1000:(i + 1) * 1000], images_test: test_images[i * 1000:(i + 1) * 1000], labels_test: test_labels[i * 1000:(i + 1) * 1000] }) accuracy_test_value += sess.run( accuracy_test, feed_dict={ images_train: batch_images, labels_train: batch_labels, images_val: val_images[i * 1000:(i + 1) * 1000], labels_val: val_labels[i * 1000:(i + 1) * 1000], images_test: test_images[i * 1000:(i + 1) * 1000], labels_test: test_labels[i * 1000:(i + 1) * 1000] }) loss_value = sess.run(loss, feed_dict={ images_train: batch_images, labels_train: batch_labels, images_val: val_images[0:1000], labels_val: val_labels[0:1000], images_test: test_images[0:1000], labels_test: test_labels[0:1000] }) accuracy_val_value /= 5.0 accuracy_test_value /= 10.0 summary_writer.add_summary( sess.run(summary_op, feed_dict={ images_train: batch_images, labels_train: batch_labels, images_val: val_images[0:1000], labels_val: val_labels[0:1000], images_test: test_images[0:1000], labels_test: test_labels[0:1000] })) summary_writer.flush() return loss_value, accuracy_val_value, accuracy_test_value, indices[ NUM_CLASSES]
max_steps = 2000 train_dir = '../aerial_cactus_identification/data/train/' model_path = '../aerial_cactus_identification/model' model_name = 'cactus.ckpt' train_data = pd.read_csv('../aerial_cactus_identification/data/train.csv') input_image = tf.placeholder(dtype=tf.float32, shape=[batch_size, w, h, 3], name='x_input') output = tf.placeholder(dtype=tf.int64, shape=[batch_size], name='y_output') is_training = tf.placeholder(dtype=tf.bool) lr = tf.placeholder(dtype=tf.float32) logit = model.resnet50(input_image, is_training) sum_loss = model.total_loss(prediction=logit, labels=output) tf.summary.scalar('loss', sum_loss) acc = model.accuracy(pred=logit, labels=output) tf.summary.scalar('accuracy', acc) global_step = tf.Variable(0, trainable=False) train_op = model.train(lr, sum_loss, global_step) coord = tf.train.Coordinator() merged = tf.summary.merge_all() saver = tf.train.Saver() with tf.Session() as sess: tf.global_variables_initializer().run() writer = tf.summary.FileWriter('../aerial_cactus_identification/logs/', graph=sess.graph) image_list = [] label_list = [] images = train_data['id'].values for image_id in images: