def evaluate(): """Eval network for a number of steps.""" with tf.Graph().as_default(): # Get images and labels for network. eval_data = FLAGS.eval_data == 'test' print(eval_data) print("evaluating model...") images, labels = network.inputs(eval_data=eval_data) # Build a Graph that computes the logits predictions from the # inference model. logits = network.inference(images) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( network.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() graph_def = tf.get_default_graph().as_graph_def() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, graph_def=graph_def) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def test(test_dir, checkpoint_dir='./checkpoint/'): # predict the result test_images = os.listdir(test_dir) features = tf.placeholder("float32", shape=[None, IMAGE_SIZE, IMAGE_SIZE, IMAGE_CHANNEL], name="features") labels = tf.placeholder("float32", [None], name="labels") one_hot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=80) train_step, cross_entropy, logits, keep_prob = network.inference(features, one_hot_labels) values, indices = tf.nn.top_k(logits, 3) with tf.Session() as sess: saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print('Restore the model from checkpoint %s' % ckpt.model_checkpoint_path) # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) start_step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: raise Exception('no checkpoint find') result = [] for test_image in test_images: temp_dict = {} x = scene_input.img_resize(os.path.join(test_dir, test_image), IMAGE_SIZE) predictions = np.squeeze(sess.run(indices, feed_dict={features: np.expand_dims(x, axis=0), keep_prob: 1}), axis=0) temp_dict['image_id'] = test_image temp_dict['label_id'] = predictions.tolist() result.append(temp_dict) print('image %s is %d,%d,%d' % (test_image, predictions[0], predictions[1], predictions[2])) with open('submit.json', 'w') as f: json.dump(result, f) print('write result json, num is %d' % len(result))
def train(): """Train network for a number of steps.""" with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) # Get images and labels for network. images, labels = network.distorted_inputs() # Build a Graph that computes the logits predictions from the # inference model. logits = network.inference(images) # Calculate loss. loss = network.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = network.train(loss, global_step) # Create a saver. saver = tf.train.Saver(tf.all_variables()) # Build the summary operation based on the TF collection of Summaries. #summary_op = tf.merge_all_summaries() # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Start the queue runners. tf.train.start_queue_runners(sess=sess) #summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, # graph_def=sess.graph_def) for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = FLAGS.batch_input_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch))
def test(): with tf.Graph().as_default() as g: images, labels = dataset.process_inputs("testing") logits = network.inference(images) top_k_op = tf.nn.in_top_k(logits, labels, 1) variable_averages = tf.train.ExponentialMovingAverage( arg_parsing.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.model_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] else: raise ValueError("No checkpoint file found") coord = tf.train.Coordinator() try: threads = [] for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): threads.extend(qr.create_threads(sess, coord=coord, daemon=True, start=True)) num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size)) true_count = 0 total_sample_count = num_iter * FLAGS.batch_size step = 0 while step < num_iter and not coord.should_stop(): predictions = sess.run([top_k_op]) true_count += np.sum(predictions) step += 1 precision = true_count / total_sample_count print('%s: precision @ 1 = %.3f' % (datetime.now(), precision)) except Exception as e: coord.request_stop(e) coord.request_stop() coord.join(threads, stop_grace_period_secs=10)
def _build_training_graph(images, labels, num_classes, reuse_variables=None): with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables): logits, features = \ network.inference(images, num_classes, for_training=True, feature_name=FLAGS.feature_name) losses = [network.loss_ce(logits, labels)] regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n(losses + regularization_losses, name='total_loss') loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') loss_averages_op = loss_averages.apply(losses + [total_loss]) for l in losses + [total_loss]: loss_name = re.sub('%s_[0-9]*/' % network.TOWER_NAME, '', l.op.name) tf.summary.scalar(loss_name + ' (raw)', l) tf.summary.scalar(loss_name, loss_averages.average(l)) with tf.control_dependencies([loss_averages_op]): total_loss = tf.identity(total_loss) return total_loss, logits
def train(train_dir, annotations, max_step, checkpoint_dir='./checkpoint/'): # train the model scene_data = scene_input.scene_data_fn(train_dir, annotations) features = tf.placeholder("float32", shape=[None, IMAGE_SIZE, IMAGE_SIZE, IMAGE_CHANNEL], name="features") labels = tf.placeholder("float32", [None], name="labels") one_hot_labels = tf.one_hot(indices=tf.cast(labels, tf.int32), depth=80) train_step, cross_entropy, logits, keep_prob = network.inference(features, one_hot_labels) correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_labels, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) with tf.Session() as sess: saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print('Restore the model from checkpoint %s' % ckpt.model_checkpoint_path) # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) start_step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: sess.run(tf.global_variables_initializer()) start_step = 0 print('start training from new state') logger = scene_input.train_log(LOGNAME) for step in range(start_step, start_step + max_step): start_time = time.time() x, y = scene_data.next_batch(BATCH_SIZE, IMAGE_SIZE) sess.run(train_step, feed_dict={features: x, labels: y, keep_prob: 0.5}) if step % 50 == 0: train_accuracy = sess.run(accuracy, feed_dict={features: x, labels: y, keep_prob: 1}) train_loss = sess.run(cross_entropy, feed_dict={features: x, labels: y, keep_prob: 1}) duration = time.time() - start_time logger.info("step %d: training accuracy %g, loss is %g (%0.3f sec)" % (step, train_accuracy, train_loss, duration)) if step % 1000 == 1: saver.save(sess, CHECKFILE, global_step=step) print('writing checkpoint at step %s' % step)
def evaluate(): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): float_image, label = tfrecord.eval_data_read( tfrecord_path=FLAGS.eval_data) images, labels = tfrecord.create_batch(float_image, label, count_num=FLAGS.num_examples) logits = network.inference(images) # tf.nn.in_top_k:计算预测的结果和实际结果的是否相等,返回bool类型的张量 top_k_op = tf.nn.in_top_k(logits, labels, 1) variable_averages = tf.train.ExponentialMovingAverage( network.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) summary_op = tf.summary.merge_all() graph_def = tf.get_default_graph().as_graph_def() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, graph_def=graph_def) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def main(argv=None): keep_probability = tf.placeholder(tf.float32, name="keep_probabilty") X = tf.placeholder(tf.float32, [4, sys_param.N_USER]) Y = tf.placeholder(tf.int32, [None, 2 * sys_param.N_USER]) # initial UL & DL deadline in time slot class deadline: True_UL_deadline = np.zeros(shape=[ sys_param.N_USER, ]) for i in range(sys_param.N_USER): True_UL_deadline[i] = random.randint(5 * i + 1, 5 * i + 6) Expected_UL_deadline = np.zeros(shape=True_UL_deadline.shape) True_DL_deadline = np.zeros(shape=[ sys_param.N_USER, ]) for i in range(sys_param.N_USER): True_DL_deadline[i] = random.randint(5 * i + 1, 5 * i + 6) # initial battery level class bat_level: #Expected_bat_level = np.random.randint(low=0, high=40, size=20) True_bat_level = sys_param.Tx_power * np.ones(shape=[ sys_param.N_USER, ]) Expected_bat_level = np.zeros(shape=True_bat_level.shape) # initial penalty class penalty: UL_deadline_penalty = np.zeros(shape=(1, 20), dtype=int) DL_deadline_penalty = np.zeros(shape=(1, 20), dtype=int) UL_bat_level_penalty = np.zeros(shape=(1, 20), dtype=int) count = np.zeros(shape=(3, 20), dtype=int) h_5, selection, var_dict = network.inference(keep_probability, FLAGS.logs_dir, FLAGS.weight, X, sys_param.N_USER) # loss function loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=h_5, labels=Y)) trainable_var = tf.trainable_variables() train_op = train(loss, trainable_var) print("Session Open") sess = tf.Session() print("Weight Initialization") sess.run(tf.global_variables_initializer()) if FLAGS.mode == "train": print("Start Training") # load channel information mat = scipy.io.loadmat('/home/mukim/Desktop/EH/H_H_hermitian.mat') channel = mat['H_H_hermitian'] train_data_size = channel.shape[0] total_penalty = np.zeros([1, 40]) cumul_expected_penalty = np.zeros([1, 40]) save_loss = np.zeros([MAX_ITERATION, train_data_size / 100]) save_penalty_count = np.zeros([MAX_ITERATION, 3, sys_param.N_USER]) for itr in xrange(MAX_ITERATION): print("Process: %d iteration, Current time: %s" % (itr + 1, datetime.datetime.now())) penalty.count = np.zeros(shape=(3, 20), dtype=int) for train_count in xrange(train_data_size): # Make training dataset UL_d_p = np.reshape(penalty.UL_deadline_penalty, newshape=[ sys_param.N_USER, ]) DL_d_p = np.reshape(penalty.DL_deadline_penalty, newshape=[ sys_param.N_USER, ]) UL_b_p = np.reshape(penalty.UL_bat_level_penalty, newshape=[ sys_param.N_USER, ]) channel_info = np.zeros(shape=[ sys_param.N_USER, ]) for i in range(sys_param.N_USER): ind = np.argsort(channel[train_count, :])[i] channel_info[ ind] = i # the largest channel gain means sys_param.N-1, the smallest channel gain means 0 # input = np.array([channel[train_count,:],deadline.Expected_UL_deadline,deadline.True_DL_deadline,bat_level.Expected_bat_level]) input = np.array([channel_info, UL_d_p, DL_d_p, UL_b_p]) input = np.reshape(input, newshape=(4, sys_param.N_USER)) if (train_count % 1000) < 2 * sys_param.N_USER: nd_array_selection = np.zeros( shape=[1, 2 * sys_param.N_USER]) nd_array_selection[0][train_count % 1000] = 1 nd_array_selection = np.reshape( nd_array_selection, newshape=[2, sys_param.N_USER]) deadline, bat_level, penalty, Expected_label, Expected_total_penalty = \ info_update(sys_param,deadline,bat_level,penalty,nd_array_selection,input) else: feed_dict = {keep_probability: 0.7, X: input} nd_array_selection = sess.run(selection, feed_dict=feed_dict) deadline, bat_level, penalty, Expected_label, Expected_total_penalty = \ info_update(sys_param, deadline, bat_level, penalty, nd_array_selection, input) feed_dict = { keep_probability: 0.7, X: input, Y: Expected_label } sess.run(train_op, feed_dict=feed_dict) if train_count % (train_data_size - 1) == 0: if (itr + 1) % 10 == 0: weight_dict_ = sess.run(var_dict, feed_dict=feed_dict) np.save( "/home/mukim/Desktop/EH/weight" + "_" + str(itr + 1) + ".npy", weight_dict_) print("Weight saved!") if train_count % 100 == 0: train_loss, weight_dict_ = sess.run( [loss, var_dict], feed_dict=feed_dict) #print("-----------------Penalty count-------------------") #print (penalty.count) #print("------------True battery level----------------") #print (bat_level.True_bat_level) #print("------------Expected battery level----------------") #print(bat_level.Expected_bat_level) #current_penalty = cal_penalty(penalty) #total_penalty = total_penalty + current_penalty #cumul_expected_penalty = cumul_expected_penalty+Expected_total_penalty #print (current_penalty) #print("--------------EXPECTATION---------------") #print (Expected_total_penalty) save_loss[itr][train_count / 100] = train_loss print("Time: %s, Round: %d, Batch: %d, Train_loss:%g" % (datetime.datetime.now(), itr + 1, train_count, train_loss)) save_penalty_count[itr, :, :] = penalty.count np.save('/home/mukim/Desktop/EH/h7_channelinfo_loss.npy', save_loss) np.save('/home/mukim/Desktop/EH/h7_channelinfo_penalty_count.npy', save_penalty_count) elif FLAGS.mode == "test": print("To be continue...")
TOTAL_STEPS = int(os.environ['TOTAL_STEPS'] or 1000) LEARNING_RATE = float(os.environ['LEARNING_RATE'] or 0.1) RESTORE = ((os.environ['RESTORE'] or '') == 'true') or False learning_rate_value = LEARNING_RATE session_config = tf.ConfigProto(log_device_placement=True) session_config.gpu_options.allow_growth = True # this is required if want to use GPU as device. # see: https://github.com/tensorflow/tensorflow/issues/2292 session_config.allow_soft_placement = True if __name__ == "__main__": with tf.Graph().as_default() as g, tf.device(USE_DEVICE): # inference() input, deep_features = network.inference() labels, logits, cross_entropy = network.loss(deep_features) centroid_loss, centroids, spread = network.center_loss( deep_features, labels) # combine the two losses _lambda = tf.placeholder(dtype=tf.float32) total_loss = cross_entropy + _lambda / 2. * centroid_loss learning_rate, train, global_step = network.training(total_loss) eval = network.evaluation(logits, labels) init = tf.initialize_all_variables() with tf.Session(config=session_config) as sess, \ h5py.File(DUMP_FILE, 'a', libver='latest', swmr=True) as h5_file:
def train(networkmodel, MODEL_SAVE_PATH, MODEL_NAME): # with tf.device('/gpu:0'): with tf.device('/cpu:0'): train_start = time.time() # 生成训练数据含标签 x, y_ = readdata.get_batch(train=True, batch_size=BATCH_SIZE, num_epochs=None) # 生成测试数据含标签 text_x, text_y = readdata.get_batch(train=False, batch_size=BATCH_SIZE, num_epochs=None) # 神经网络模型 if networkmodel: # 调整神经网络输入为一维,-1代表未知数量 x = tf.reshape(x, [-1, x.shape[1] * x.shape[2] * x.shape[3]]) # 训练部分输出 y = network.inference(x, avg_class=None, reuse=False, lamada=None) else: # 卷积模型 # 训练部分输入、输出tensor y = cnn.inference(x, False, False, regularizer=None) # 初始化,从0开始,每batch一次,增加1 global_step = tf.Variable(0, trainable=False) # 神经网络模型 if networkmodel: # 测试数据转化为一维,适应神经网络输入 text_x = tf.reshape( text_x, [-1, text_x.shape[1] * text_x.shape[2] * text_x.shape[3]]) # 测试输出 average_y = network.inference(text_x, avg_class=None, reuse=True, lamada=None) else: # 卷积网络模型测试输入、输出 average_y = cnn.inference(text_x, True, False, regularizer=None) # 对每个batch数据结果求均值,cross_entropy是一种信息熵方法,能够预测模型对真实概率分布估计的准确程度 cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=y, labels=tf.argmax(y_, 1)) # 求平均值 cross_entropy_mean = tf.reduce_mean(cross_entropy) # 损失函数 loss = cross_entropy_mean # 训练操作,GradientDescentOptimizer为梯度下降算法的优化器,学习率LEARNING_RATE,minimize为最小化损失函数操作 train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize( loss, global_step=global_step) # 设计计算图 with tf.control_dependencies([train_step]): train_op = tf.no_op(name='train') # 预测数字类别是否为正确类别,tf.argmax找出真实类别 correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(text_y, 1)) # tf.reduce_mean求平均值 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 初始化tf持久化类 saver = tf.train.Saver() # 初始化会话,并开始训练 with tf.Session() as sess: # 初始化模型的参数 sess.run(tf.local_variables_initializer()) sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() # 创建一个协调器,管理线程 threads = tf.train.start_queue_runners(sess=sess, coord=coord) # 启动所有队列线程 # 迭代的训练神经网络 for i in range(TRAINING_STEPS): start_time = time.time() _, loss_value, step = sess.run([train_op, loss, global_step]) # 设置好整个图后,启动计算 end_time = time.time() print('Training elapsed each step time:%f s' % (end_time - start_time)) # 打印训练损失 if (i + 1) % 10 == 0: print( "After %d training step(s), loss on training batch is %g." % (step, loss_value)) # 打印验证准确率 if (i + 1) % 100 == 0: validate_acc = sess.run(accuracy) # 设置好整个图后,启动计算accuracy print( "After %d training step(s),validation accuracy using average model is %g." % (step, validate_acc)) a = os.path.join(MODEL_SAVE_PATH, MODEL_NAME) saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step) # 保存模型 train_end = time.time() print('Training elapsed total time:%f s' % (train_end - train_start)) coord.request_stop() # 要求所有线程停止 coord.join(threads)
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): gloabl_step = tf.train.get_or_create_global_step() # Get images and lables for CIFAR-10 # Force input pipelines to CPU:0 to avoid operations sometimes ending up on GPU and resultign in a slow down. with tf.device('/cpu:0'): images, labels = network.distorted_inputs() # Build a Graph that computes the logits predictions from the inference model. logits = network.inference(images) # print(logits.get_shape()) # print(labels.get_shape()) # os.system('pause') # Calcute loss. loss = network.loss(logits, labels) # Buid a Graph that trains the model with one batch of examples and updates the model parameters. train_op = network.train(loss, gloabl_step) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime""" def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) # Asks for loss value def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ( "%s: step %d, loss = %.2f (%.1f exmples/sec: %.3f sec/batch" ) print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[ tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook() ], config=tf.ConfigProto(log_device_placement=FLAGS. log_device_placement)) as mon_sess: print( '********************************Sussessfully creating session' ) while not mon_sess.should_stop(): mon_sess.run(train_op)
def train(networkmodel, MODEL_SAVE_PATH, MODEL_NAME): if FLAGS.job_name is None or FLAGS.job_name == '': raise ValueError('Must specify an explicit job_name !') else: print('job_name : %s' % FLAGS.job_name) if FLAGS.task_index is None or FLAGS.task_index == '': raise ValueError('Must specify an explicit task_index!') else: print('task_index : %d' % FLAGS.task_index) ps_spec = FLAGS.ps_hosts.split(',') worker_spec = FLAGS.worker_hosts.split(',') # 创建集群 # num_worker = len(worker_spec) cluster = tf.train.ClusterSpec({'ps': ps_spec, 'worker': worker_spec}) server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index) if FLAGS.job_name == 'ps': server.join() is_chief = (FLAGS.task_index == 0) # worker_device = '/job:worker/task%d/cpu:0' % FLAGS.task_index with tf.device(tf.train.replica_device_setter(cluster=cluster)): # 生成训练数据含标签 x, y_ = readdata.get_batch(train=True, batch_size=BATCH_SIZE, num_epochs=None) # 生成测试数据含标签 text_x, text_y = readdata.get_batch(train=False, batch_size=BATCH_SIZE, num_epochs=50) # 神经网络模型 if networkmodel: # 调整神经网络输入为一维,-1代表未知数量 x = tf.reshape(x, [-1, x.shape[1] * x.shape[2] * x.shape[3]]) # 训练部分输出 y = network.inference(x, avg_class=None, reuse=False, lamada=None) else: # 卷积模型 # 训练部分输入、输出tensor y = cnn.inference(x, False, False, regularizer=None) # 初始化,从0开始,每batch一次,增加1,创建纪录全局训练步数变量 global_step = tf.Variable(0, name='global_step', trainable=False) # 神经网络模型 if networkmodel: # 测试数据转化为一维,适应神经网络输入 text_x = tf.reshape( text_x, [-1, text_x.shape[1] * text_x.shape[2] * text_x.shape[3]]) # 测试输出 average_y = network.inference(text_x, avg_class=None, reuse=True, lamada=None) else: # 卷积网络模型测试输入、输出 average_y = cnn.inference(text_x, True, False, regularizer=None) # 对每个batch数据结果求均值,cross_entropy是一种信息熵方法,能够预测模型对真实概率分布估计的准确程度 cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=y, labels=tf.argmax(y_, 1)) # 求损失函数 loss = tf.reduce_mean(cross_entropy) # 训练操作,GradientDescentOptimizer为梯度下降算法的优化器,学习率LEARNING_RATE,minimize为最小化损失函数操作 train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize( loss, global_step=global_step) # 预测数字类别是否为正确类别,tf.argmax找出真实类别 correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(text_y, 1)) # tf.reduce_mean求平均值 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # # 设计计算图 # with tf.control_dependencies([train_step]): # train_op = tf.no_op(name='train') # 生成本地的参数初始化操作init_op init_op = tf.global_variables_initializer() train_dir = tempfile.mkdtemp() sv = tf.train.Supervisor(is_chief=is_chief, logdir=train_dir, init_op=init_op, recovery_wait_secs=1, global_step=global_step) if is_chief: print('Worker %d: Initailizing session...' % FLAGS.task_index) else: print('Worker %d: Waiting for session to be initaialized...' % FLAGS.task_index) sess = sv.prepare_or_wait_for_session(server.target) print('Worker %d: Session initialization complete.' % FLAGS.task_index) time_begin = time.time() print('Traing begins @ %f' % time_begin) local_step = 0 for i in range(TRAINING_STEPS): coord = tf.train.Coordinator() # 创建一个协调器,管理线程 threads = tf.train.start_queue_runners(sess=sess, coord=coord) # 启动所有队列线程 _, step, loss_value = sess.run([train_step, global_step, loss]) local_step += 1 now = time.time() print('%f: Worker %d: traing step %d dome (global step:%d)' % (now, FLAGS.task_index, local_step, step)) # 打印验证准确率 if (i + 1) % 100 == 0: validate_acc = sess.run(accuracy) # 设置好整个图后,启动计算accuracy print( "After %d training step(s),validation accuracy using average model is %g." % (step, validate_acc)) coord.request_stop() # 要求所有线程停止 coord.join(threads) time_end = time.time() print('Training ends @ %f' % time_end) train_time = time_end - time_begin print('Training elapsed time:%f s' % train_time) sess.close()
def train(): total_epoch, total_iter = 100, 0 best_loss, init_lr = 1e10, 5e-5 batch_size, image_h, image_w = 8, 512, 512 image = tf.placeholder(tf.float32, [None, image_h, image_w, 3]) label = tf.placeholder(tf.float32, [None, image_h, image_w, 3]) lr = tf.placeholder(tf.float32) pred = inference(image, width=0.75, is_training=True) c_loss = color_loss(pred, label) s_loss = smoothness_loss(pred) r_loss = reconstruct_loss(pred, label) total_loss = 1e-2*c_loss + 1e2*s_loss + r_loss #total_loss = c_loss + r_loss all_vars = tf.trainable_variables() backbone_vars = [var for var in all_vars if 'backbone' in var.name] train_psnr = cal_psnr(pred, label) tf.summary.scalar('loss', total_loss) tf.summary.scalar('color_loss', c_loss) tf.summary.scalar('smoothness_loss', s_loss) tf.summary.scalar('reconstruct_loss', r_loss) tf.summary.scalar('psnr', train_psnr) optimizer = tf.train.AdamOptimizer(learning_rate=lr) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) train_op = optimizer.minimize(total_loss) train_op = tf.group([train_op, update_ops]) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) ''' gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.75) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) ''' train_writer = tf.summary.FileWriter('train_log', sess.graph) summary_op = tf.summary.merge_all() saver = tf.train.Saver() with tf.device('/device:GPU:0'): sess.run(tf.global_variables_initializer()) weight = np.load('mobilenetv2_075.npy', allow_pickle=True) assign_ops = [] for var, para in zip(backbone_vars, weight): assign_ops.append(var.assign(para)) sess.run(assign_ops) data_dir = 'data_loacation_in_your_computer' dataloader = get_train_loader((image_h, image_w), batch_size, data_dir) for epoch in range(total_epoch): for batch in tqdm(dataloader): total_iter += 1 _, train_info, loss = sess.run([train_op, summary_op, total_loss], feed_dict={image: batch[0], label: batch[1], lr: init_lr}) train_writer.add_summary(train_info, total_iter) if np.mod(total_iter, 20) == 0: print('{}th epoch, {}th iter, loss: {}'.format(epoch, total_iter, loss)) if loss < best_loss: best_loss = loss saver.save(sess, 'saved_models/model', global_step=total_iter)
TOTAL_STEPS = int(os.environ['TOTAL_STEPS'] or 1000) LEARNING_RATE = float(os.environ['LEARNING_RATE'] or 0.1) RESTORE = ((os.environ['RESTORE'] or '') == 'true') or False learning_rate_value = LEARNING_RATE session_config = tf.ConfigProto(log_device_placement=True) session_config.gpu_options.allow_growth = True # this is required if want to use GPU as device. # see: https://github.com/tensorflow/tensorflow/issues/2292 session_config.allow_soft_placement = True if __name__ == "__main__": with tf.Graph().as_default() as g, tf.device(USE_DEVICE): # inference() input, deep_features = network.inference() labels, logits, cross_entropy = network.loss(deep_features) centroid_loss, centroids, spread = network.center_loss(deep_features, labels) # combine the two losses _lambda = tf.placeholder(dtype=tf.float32) total_loss = cross_entropy + _lambda / 2. * centroid_loss learning_rate, train, global_step = network.training(total_loss) eval = network.evaluation(logits, labels) init = tf.initialize_all_variables() with tf.Session(config=session_config) as sess, \ h5py.File(DUMP_FILE, 'a', libver='latest', swmr=True) as h5_file: # Merge all the summaries and write them out to /tmp/mnist_logs (by default)
def main(_): with tf.Graph().as_default(): images, labels = utils.prepare_testdata(FLAGS.dataset_dir, FLAGS.batch_size) logits, _ = network.inference(images, FLAGS.num_classes, for_training=False, feature_name=FLAGS.feature_name) top_1_op = tf.nn.in_top_k(logits, labels, 1) top_5_op = tf.nn.in_top_k(logits, labels, 5) var_averages = tf.train.ExponentialMovingAverage(FLAGS.ema_decay) var_to_restore = var_averages.variables_to_restore() saver = tf.train.Saver(var_to_restore) ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_path) model_checkpoint_path = ckpt.model_checkpoint_path init = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init) saver.restore(sess, model_checkpoint_path) global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] print('Successfully loaded model from %s at step=%s.' % (model_checkpoint_path, global_step)) coord = tf.train.Coordinator() try: threads = [] for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): threads.extend(qr.create_threads(sess, coord=coord, daemon=True, start=True)) num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size)) print('num_iter = ' + str(num_iter)) # Counts the number of correct predictions. count_top_1 = count_top_5 = 0.0 total_sample_count = num_iter * FLAGS.batch_size step = 0 print('%s: starting evaluation on (%s).' % (datetime.now(), 'test')) start_time = time.time() while step < num_iter and not coord.should_stop(): top_1, top_5 = sess.run([top_1_op, top_5_op]) count_top_1 += np.sum(top_1) count_top_5 += np.sum(top_5) step += 1 # print progress every 20 batches if step % 20 == 0: duration = time.time() - start_time sec_per_batch = duration / 20.0 examples_per_sec = FLAGS.batch_size / sec_per_batch print('%s: [%d batches out of %d] (%.1f examples/sec; %.3f sec/batch)' % (datetime.now(), step, num_iter, examples_per_sec, sec_per_batch)) start_time = time.time() # Compute precision @ 1. (accuracy) and print results precision_at_1 = count_top_1 / total_sample_count recall_at_5 = count_top_5 / total_sample_count print('%s: precision @ 1 = %.4f recall @ 5 = %.4f [%d examples]' % (datetime.now(), precision_at_1, recall_at_5, total_sample_count)) # save results into a txt file file_path = FLAGS.eval_dir+FLAGS.save_txt text_file = open(file_path, 'a') text_file.write(FLAGS.checkpoint_path) text_file.write('\n') text_file.write('%s: precision @ 1 = %.4f recall @ 5 = %.4f' % (datetime.now(), precision_at_1, recall_at_5)) text_file.write('\n') text_file.close() except Exception as e: # pylint: disable=broad-except coord.request_stop(e) coord.request_stop() coord.join(threads, stop_grace_period_secs=10)
def main(): args = get_parser().parse_args() observation_length = 17 action_length = 6 # Read the expert rollouts from disk. observations, actions = load_data(args.rollouts_file) print("observations shape = " + str(observations.shape)) print("actions shape = " + str(actions.shape)) # Make sure our files exist! assert (os.path.exists(os.path.dirname(os.path.abspath(args.stats_file)))) # Load the expert. print("Loading and building expert policy.") policy_fn = load_policy.load_policy(args.expert_policy_file) print("Expert policy loaded and built.") # Assemble the network. opl = tf.placeholder(tf.float32, shape=(None, observation_length), name="observations") apl = tf.placeholder(tf.float32, shape=(None, action_length), name="actions") logits = network.inference(opl, observation_length, args.hidden1, args.hidden2, action_length) errors, loss = network.loss(logits, apl) global_step, train_op = network.training(loss, args.learning_rate) with tf.Session() as sess: # Initialize the network. tf_util.initialize() saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(args.checkpoint_dir)) env = gym.make("Walker2d-v1") max_steps = env.spec.timestep_limit avg_returns = [] stddev_returns = [] observations = list(observations) actions = list(actions) for iteration in range(args.num_iterations): obs = np.array(observations) acts = np.array(actions) assert (obs.shape[0] == acts.shape[0]) # Train the network. if iteration != 0: num_batches = int(obs.shape[0] / args.batch_size) for step in range(args.training_steps): i = step % num_batches if i == 0: p = np.random.permutation(obs.shape[0]) obs = obs[p] acts = acts[p] start = int(i * args.batch_size) stop = int((i + 1) * args.batch_size) feed_dict = {opl: obs[start:stop], apl: acts[start:stop]} _, loss_value, step_value = sess.run( [train_op, loss, global_step], feed_dict=feed_dict) if step % 100 == 0: loss_value = sess.run(loss, feed_dict={ opl: obs, apl: acts }) msg = "Iteration {}; step {}; loss = {}".format( iteration, step_value, loss_value) print(msg) # Generate new rollouts. rewards = [] for i in range(args.num_rollouts): print("Iteration {}; rollout {}".format(iteration, i)) obs = env.reset() done = False steps = 0 totalr = 0 while not done: expert_action = policy_fn(obs[None, :]) observations.append(obs) actions.append(expert_action[0]) action = sess.run(logits, feed_dict={opl: obs[None, :]}) obs, r, done, _ = env.step(action) totalr += r steps += 1 if steps >= max_steps: break rewards.append(totalr) print("Iteration {}; average return {}".format( iteration, np.mean(rewards))) print("Iteration {}; stddev return {}".format( iteration, np.std(rewards))) avg_returns.append(np.mean(rewards)) stddev_returns.append(np.std(rewards)) with open(args.stats_file, "w") as f: stats = { "mean_return": avg_returns, "stddev_returns": stddev_returns } json.dump(stats, f, indent=4)
def main(): ########################################## USER INPUT ############################################################## # Training parameters: if len(sys.argv) >= 8: IMAGE_NAME = sys.argv[1] # IMAGE_NAME = '1' NETWORK_NAME = sys.argv[2] # 'unet', 'deep_decoder' LOSS_NAME = sys.argv[ 3] # 'mse', 'l1', 'mse_l1', 'mse_with_tv_reg', 'mse_with_edge_reg' OPTIMIZER_TYPE = sys.argv[4] # 'sgd', 'adam' LEARNING_RATE = float(sys.argv[5]) NUM_ITERATIONS = int(sys.argv[6]) ITERATIONS_TO_SAVE = int(sys.argv[7]) if len(sys.argv) == 11: w_h = float(sys.argv[8]) w_v = float(sys.argv[9]) w_mse = float(sys.argv[10]) else: w_h = None w_v = None w_mse = None else: print('Not enough input parameters.') return #################################################################################################################### # Load images: RAW_FILENAME = os.path.join('Raw', '{}_Raw Image.tif'.format(IMAGE_NAME)) AVERAGED_FILENAME = os.path.join( 'Averaged', '{}_Averaged Image.tif'.format(IMAGE_NAME)) try: input_image = hf.get_training_image(RAW_FILENAME) except: print("Error loading {}".format(RAW_FILENAME)) return try: ground_truth = hf.get_training_image(AVERAGED_FILENAME) except: print("Error loading {}".format(AVERAGED_FILENAME)) return # Validate settings: VALID_NETWORK_NAMES = ["unet", "deep_decoder"] VALID_OPTIMIZER_TYPES = ["sgd", "adam"] VALID_LOSS_NAMES = [ "mse", "l1", "mse_l1", "mse_with_tv_reg", "mse_with_edge_reg" ] if not (NETWORK_NAME in VALID_NETWORK_NAMES): print("Error: {} network does not exist.".format(NETWORK_NAME)) return if not (OPTIMIZER_TYPE in VALID_OPTIMIZER_TYPES): print("Error: {} optimizer does not exist.".format(OPTIMIZER_TYPE)) return if not (LOSS_NAME in VALID_LOSS_NAMES): print("Error: {} loss does not exist.".format(LOSS_NAME)) return # Create folder to save results: SAVE_FOLDER = os.path.join('./results', IMAGE_NAME) count = 0 CHECK_FOLDER = SAVE_FOLDER while os.path.exists(CHECK_FOLDER): count += 1 CHECK_FOLDER = '{}({})'.format(SAVE_FOLDER, count) SAVE_FOLDER = CHECK_FOLDER os.mkdir(SAVE_FOLDER) WRITE_FILENAME = os.path.join(SAVE_FOLDER, 'metrics.txt') with open(WRITE_FILENAME, 'a') as wf: wf.write( 'PARAMETERS\nNetwork: {}\nLoss: {}\nOptimizer: {}\nLearning rate: {}\nNumber of iterations: {}' .format(NETWORK_NAME, LOSS_NAME, OPTIMIZER_TYPE, LEARNING_RATE, NUM_ITERATIONS)) wf.write('\n\nw_h: {}\nw_v: {}\nw_mse: {}'.format(w_h, w_v, w_mse)) wf.write('\n\nIteration\tLoss\tSNR\tCNR\tSSIM') # Get input noise: if NETWORK_NAME == "unet": input_noise = hf.get_noise_matrix(input_image.shape[1], input_image.shape[2], 32) elif NETWORK_NAME == "deep_decoder": input_noise = hf.get_noise_matrix(input_image.shape[1] / (2**4), input_image.shape[2] / (2**4), 64) # Save inputs: save_filename = os.path.join(SAVE_FOLDER, 'input_image.tif') imsave(save_filename, input_image[0, :, :, 0], cmap='gray') save_filename = os.path.join(SAVE_FOLDER, 'ground_truth.tif') imsave(save_filename, ground_truth[0, :, :, 0], cmap='gray') # Calculate initial metrics: snr_i = hf.calculate_metrics(ground_truth, input_image, 'snr', IMAGE_NAME) cnr_i = hf.calculate_metrics(ground_truth, input_image, 'cnr', IMAGE_NAME) ssim_i = hf.calculate_metrics(ground_truth, input_image, 'ssim', IMAGE_NAME) with open(WRITE_FILENAME, 'a') as wf: wf.write('\ninput_image\tN/A\t{}\t{}\t{}'.format(snr_i, cnr_i, ssim_i)) # Placeholders: z = tf.placeholder(tf.float32, shape=[1, None, None, input_noise.shape[3]]) # input noise x = tf.placeholder(tf.float32, shape=[1, None, None, 1]) # input image # Network: y = network.inference(NETWORK_NAME, z, height=input_noise.shape[1], width=input_noise.shape[2], channels=input_noise.shape[3]) if LOSS_NAME == "mse_with_edge_reg" or LOSS_NAME == "mse_with_tv_reg": loss, mse, edge_h, edge_v = network.loss(y, x, LOSS_NAME, w_h, w_v, w_mse) else: loss = network.loss(y, x, LOSS_NAME) # Update moving mean and variance for batch normalization (if required): if NETWORK_NAME == "deep_decoder": update_op = tf.get_collection(tf.GraphKeys.UPDATE_OPS) # Create different optimizers here: if OPTIMIZER_TYPE == "sgd": train_op = tf.train.GradientDescentOptimizer( learning_rate=LEARNING_RATE).minimize(loss) elif OPTIMIZER_TYPE == "adam": train_op = tf.train.AdamOptimizer( learning_rate=LEARNING_RATE).minimize(loss) # Start session: with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # Keep track of metrics: track_iter = [] track_loss = [] track_snr = [] track_cnr = [] track_ssim = [] for i in range(NUM_ITERATIONS + 1): if NETWORK_NAME == "unet": if LOSS_NAME == "mse_with_edge_reg" or LOSS_NAME == "mse_with_tv_reg": _, output_image, loss_i, mse_i, edge_h_i, edge_v_i = sess.run( [train_op, y, loss, mse, edge_h, edge_v], feed_dict={ z: input_noise, x: input_image }) else: _, output_image, loss_i = sess.run([train_op, y, loss], feed_dict={ z: input_noise, x: input_image }) elif NETWORK_NAME == "deep_decoder": if LOSS_NAME == "mse_with_edge_reg" or LOSS_NAME == "mse_with_tv_reg": _, _, output_image, loss_i, mse_i, edge_h_i, edge_v_i = sess.run( [update_op, train_op, y, loss, mse, edge_h, edge_v], feed_dict={ z: input_noise, x: input_image }) else: _, _, output_image, loss_i = sess.run( [update_op, train_op, y, loss], feed_dict={ z: input_noise, x: input_image }) if i % ITERATIONS_TO_SAVE == 0: # Save image: save_filename = os.path.join(SAVE_FOLDER, 'iteration_{}.tif'.format(i)) imsave(save_filename, output_image[0, :, :, 0], cmap='gray') # Calculate metrics: snr_i = hf.calculate_metrics(ground_truth, output_image, 'snr', IMAGE_NAME) cnr_i = hf.calculate_metrics(ground_truth, output_image, 'cnr', IMAGE_NAME) ssim_i = hf.calculate_metrics(ground_truth, output_image, 'ssim', IMAGE_NAME) with open(WRITE_FILENAME, 'a') as wf: wf.write('\n{}\t{}\t{}\t{}\t{}'.format( i, loss_i, snr_i, cnr_i, ssim_i)) # Display: if LOSS_NAME == "mse_with_edge_reg" or LOSS_NAME == "mse_with_tv_reg": print( 'Iteration {}/{}\t| Loss: {}\tSNR: {}\tCNR: {}\tSSIM: {}\tMSE: {}\tEdge_h: {}\tEdge_v: {}' .format(i, NUM_ITERATIONS, loss_i, snr_i, cnr_i, ssim_i, mse_i, edge_h_i, edge_v_i)) else: print( 'Iteration {}/{}\t| Loss: {}\tSNR: {}\tCNR: {}\tSSIM: {}' .format(i, NUM_ITERATIONS, loss_i, snr_i, cnr_i, ssim_i)) # Track: track_iter.append(i) track_loss.append(loss_i) track_snr.append(snr_i) track_cnr.append(cnr_i) track_ssim.append(ssim_i) # Plot: hf.plot_metrics(track_iter, track_loss, 'loss', os.path.join(SAVE_FOLDER, 'loss.tif')) hf.plot_metrics(track_iter, track_snr, 'snr', os.path.join(SAVE_FOLDER, 'snr.tif')) hf.plot_metrics(track_iter, track_cnr, 'cnr', os.path.join(SAVE_FOLDER, 'cnr.tif')) hf.plot_metrics(track_iter, track_ssim, 'ssim', os.path.join(SAVE_FOLDER, 'ssim.tif')) print('Completed.')
import os sys.path.append(os.path.abspath(os.path.join( os.path.dirname(__file__), os.path.pardir, 'tracker'))) import network # Load tensorflow tf.Graph().as_default() batchSize = 1 delta = 1 imagePlaceholder = tf.placeholder(tf.float32, shape=(batchSize * delta * 2, 227, 227, 3)) labelsPlaceholder = tf.placeholder(tf.float32, shape=(batchSize * delta, 4)) learningRate = tf.placeholder(tf.float32) tfOutputs = network.inference(imagePlaceholder, num_unrolls=delta, train=True) tfLossFull, tfLoss = network.loss(tfOutputs, labelsPlaceholder) train_op = network.training(tfLossFull, learningRate) summary = tf.summary.merge_all() init = tf.global_variables_initializer() saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) summary_writer = tf.summary.FileWriter('logs/train/caffe_copy', sess.graph) ops = [] with sess.as_default(): sess.run(init) import caffe caffe.set_mode_cpu()
if not convolutional: x = tf.placeholder(tf.float32, shape=[None, img_size_flat], name='x') else: x = tf.placeholder(tf.float32, shape=[None, img_shape[0], img_shape[1], num_channels], name='x') x_image = tf.reshape(x, [-1, img_size, img_size, num_channels]) tf.summary.image('input', x_image, 3) y_true = tf.placeholder(tf.float32, shape=[None, num_classes], name='y_true') y_true_cls = tf.argmax(y_true, dimension=1) """ Inference (Forward Pass) """ logits, features = inference(x_image, num_classes=num_classes) y_probs = tf.nn.softmax(logits) tf.summary.histogram('probs', y_probs) y_pred = tf.argmax(logits, dimension=1) """ Restore variables """ saver = tf.train.Saver() saver.restore(session, tf.train.latest_checkpoint('./saved_models')) """ Data set Configuration """
def run_training(): """Train network for a number of epochs.""" # Tell TensorFlow that the model will be built into the default Graph. with tf.Graph().as_default(): with tf.name_scope('input'): # Input data, pin to CPU because rest of pipeline is CPU-only with tf.device('/cpu:0'): input_data = tf.constant(training_data) input_labels = tf.constant(training_labels) input, label = tf.train.slice_input_producer( [input_data, input_labels], num_epochs=FLAGS.num_epochs) label = tf.cast(label, tf.int32) input, labels = tf.train.batch([input, label], batch_size=FLAGS.batch_size) # Build a Graph that computes predictions from the inference model. logits = network.inference(input, FLAGS.hidden1, FLAGS.hidden2) # Add to the Graph the Ops for loss calculation. loss = network.loss(logits, labels) # Add to the Graph the Ops that calculate and apply gradients. train_op = network.training(loss, FLAGS.learning_rate) # Add the Op to compare the logits to the labels during evaluation. eval_correct = network.evaluation(logits, labels) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Create a saver for writing training checkpoints. saver = tf.train.Saver() # Create the op for initializing variables. init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # Create a session for running Ops on the Graph. sess = tf.Session() # Run the Op to initialize the variables. sess.run(init_op) # Instantiate a SummaryWriter to output summaries and the Graph. summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) # Start input enqueue threads. coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) # And then after everything is built, start the training loop. for ep in xrange(FLAGS.num_epochs): for step in xrange(FLAGS.max_steps): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time # Write the summaries and print an overview fairly often. if loss_value - 0.0 <= 0.00001: print( 'Loss value: %.4f, done training for %d epochs, %d steps.' % (loss_value, ep, ep * FLAGS.max_steps + step)) return if step % 100 == 0: # Print status to stdout. print('Epochs %d: loss = %.4f (%.3f sec)' % (ep, loss_value, duration)) # Update the events file. summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save a checkpoint periodically. if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_steps: print('Saving') saver.save(sess, FLAGS.train_dir, global_step=step)
print('Train shape: ', x_train.shape) print('Test shape: ', x_test.shape) print(x_test[0:3, :]) x = tf.placeholder(tf.float32, [None, predict_slide]) y = tf.placeholder(tf.float32, [None]) # Create train & test data pipeline total_train_data = x_train.shape[0] batch_size = 256 epochs = 10 learn_rate = 0.05 total_batch = int(np.floor(total_train_data / batch_size)) + 1 predict_y = net.inference(x, reuse=tf.AUTO_REUSE) predict_y = tf.squeeze(predict_y) #remove size 1 dimension mse = tf.losses.mean_squared_error(labels=y, predictions=predict_y) #mse = tf.reduce_mean(tf.squared_difference(predict_y, y)) with tf.variable_scope('opt', reuse=tf.AUTO_REUSE): train_op = tf.train.AdamOptimizer(learn_rate).minimize(mse) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) #print(tf.global_variables()) print('Train data size: ', x_train.shape[0]) print('Total batch:', total_batch) print('Start training...') print(x_test[0:3, :]) loss_curve = []
BATCH_SIZE = 100 LEARNING_RATE_BASE = 0.01 LEARNING_RATE_DECAY = 0.99 REGULARIZATION_RATE = 0.0001 TRAINING_STEPS = 30000 MOVING_AVERAGE_DECAY = 0.99 MODEL_SAVE_PATH = "net/" MODEL_NAME = "model" # 定义输入输出placeholder。 x = tf.placeholder(tf.float32, [None, network.INPUT_NODE], name='x-input') y_ = tf.placeholder(tf.float32, [None, network.OUTPUT_NODE], name='y-input') # 建立模型 regularizer = tf.contrib.layers.l2_regularizer(REGULARIZATION_RATE) y = network.inference(x, regularizer) # 定义滑动平均操作。 global_step = tf.Variable(0, trainable=False) variable_averages = tf.train.ExponentialMovingAverage(MOVING_AVERAGE_DECAY, global_step) variables_averages_op = variable_averages.apply(tf.trainable_variables()) # 定义损失函数 cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=y, labels=y_) cross_entropy_mean = tf.reduce_mean(cross_entropy) loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses')) # 定义优化函数 learning_rate = tf.train.exponential_decay(LEARNING_RATE_BASE, global_step, 1, LEARNING_RATE_DECAY, staircase=True)