def inference_simple(dataset): with tf.name_scope("conv1") as scope: W1 = utils.weight_variable([5, 5, 1, 32], name="W1") b1 = utils.bias_variable([32], name="b1") tf.histogram_summary("W1", W1) tf.histogram_summary("b1", b1) h_conv1 = tf.nn.relu(utils.conv2d_basic(dataset, W1, b1), name="h_conv1") h_pool1 = utils.max_pool_2x2(h_conv1) with tf.name_scope("conv2") as scope: W2 = utils.weight_variable([3, 3, 32, 64], name="W2") b2 = utils.bias_variable([64], name="b2") tf.histogram_summary("W2", W2) tf.histogram_summary("b2", b2) h_conv2 = tf.nn.relu(utils.conv2d_basic(h_pool1, W2, b2), name="h_conv2") h_pool2 = utils.max_pool_2x2(h_conv2) with tf.name_scope("fc") as scope: image_size = IMAGE_SIZE // 4 h_flat = tf.reshape(h_pool2, [-1, image_size * image_size * 64]) W_fc = utils.weight_variable([image_size * image_size * 64, NUM_LABELS], name="W_fc") b_fc = utils.bias_variable([NUM_LABELS], name="b_fc") tf.histogram_summary("W_fc", W_fc) tf.histogram_summary("b_fc", b_fc) pred = tf.matmul(h_flat, W_fc) + b_fc return pred
def read_dataset(data_dir): pickle_filename = "flowers_data.pickle" pickle_filepath = os.path.join(data_dir, pickle_filename) if not os.path.exists(pickle_filepath): utils.maybe_download_and_extract(data_dir, DATA_URL, is_tarfile=True) flower_folder = os.path.splitext(DATA_URL.split("/")[-1])[0] result = create_image_lists(os.path.join(data_dir, flower_folder)) print "Training set: %d" % len(result['train']) print "Test set: %d" % len(result['test']) print "Validation set: %d" % len(result['validation']) print "Pickling ..." with open(pickle_filepath, 'wb') as f: pickle.dump(result, f, pickle.HIGHEST_PROTOCOL) else: print "Found pickle file!" with open(pickle_filepath, 'rb') as f: result = pickle.load(f) training_images = result['train'] testing_images = result['test'] validation_images = result['validation'] del result print ("Training: %d, Validation: %d, Test: %d" % ( len(training_images), len(validation_images), len(testing_images))) return training_images, testing_images, validation_images
def vgg_net(weights, image): layers = ( 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3' # 'conv3_4', 'relu3_4', 'pool3', # 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', # 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', # # 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', # 'relu5_3', 'conv5_4', 'relu5_4' ) net = {} current = image for i, name in enumerate(layers): kind = name[:4] if kind == 'conv': kernels, bias = weights[i][0][0][0][0] # matconvnet: weights are [width, height, in_channels, out_channels] # tensorflow: weights are [height, width, in_channels, out_channels] kernels = np.transpose(kernels, (1, 0, 2, 3)) bias = bias.reshape(-1) current = utils.conv2d_basic(current, kernels, bias) elif kind == 'relu': current = tf.nn.relu(current) elif kind == 'pool': current = utils.avg_pool_2x2(current) net[name] = current assert len(net) == len(layers) return net
def activation_function(x, name=""): activation_dict = {'relu': tf.nn.relu(x, name), 'elu': tf.nn.elu(x, name), 'lrelu': utils.leaky_relu(x, 0.2, name), 'tanh': tf.nn.tanh(x, name), 'sigmoid': tf.nn.sigmoid(x, name)} act = activation_dict[FLAGS.activation] utils.add_activation_summary(act) return act
def main(argv=None): utils.maybe_download_and_extract(FLAGS.model_dir, DATA_URL) model_data = get_model_data() model_params = {} mean = model_data['normalization'][0][0][0] model_params["mean_pixel"] = np.mean(mean, axis=(0, 1)) model_params["weights"] = np.squeeze(model_data['layers']) visualize_layer(model_params)
def train(loss_val, var_list): optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate) grads = optimizer.compute_gradients(loss_val, var_list=var_list) if FLAGS.debug: # print(len(var_list)) for grad, var in grads: utils.add_gradient_summary(grad, var) return optimizer.apply_gradients(grads)
def visualize(): count = 20 z_feed = np.random.uniform(-1.0, 1.0, size=(count, FLAGS.z_dim)).astype(np.float32) # z_feed = np.tile(np.random.uniform(-1.0, 1.0, size=(1, FLAGS.z_dim)).astype(np.float32), (count, 1)) # z_feed[:, 25] = sorted(10.0 * np.random.randn(count)) image = sess.run(gen_images, feed_dict={z_vec: z_feed, train_phase: False}) for iii in xrange(count): print(image.shape) utils.save_image(image[iii, :, :, :], IMAGE_SIZE, FLAGS.logs_dir, name=str(iii)) print("Saving image" + str(iii))
def main(argv=None): utils.maybe_download_and_extract(FLAGS.model_dir, DATA_URL) model_data = get_model_data() dream_image = get_image(FLAGS.image_path) # dream_image = np.random.uniform(size=(1, 300, 300, 3)) + 100.0 print dream_image.shape model_params = {} mean = model_data['normalization'][0][0][0] model_params["mean_pixel"] = np.mean(mean, axis=(0, 1)) model_params["weights"] = np.squeeze(model_data['layers']) deepdream_image(model_params, dream_image, no_of_octave=3)
def main(argv=None): print "Reading notMNIST data..." train_dataset, train_labels, valid_dataset, valid_labels, test_dataset, test_labels = \ read_notMNIST.get_notMNISTData(FLAGS.data_dir) print "Setting up tf model..." dataset = tf.placeholder(tf.float32, shape=(None, IMAGE_SIZE * IMAGE_SIZE)) labels = tf.placeholder(tf.float32, shape=(None, NUMBER_OF_CLASSES)) global_step = tf.Variable(0, trainable=False) logits = inference_fully_convolutional(dataset) for var in tf.trainable_variables(): utils.add_to_regularization_and_summary(var) loss_val = loss(logits, labels) train_op = train(loss_val, global_step) summary_op = tf.merge_all_summaries() with tf.Session() as sess: print "Setting up summary and saver..." sess.run(tf.initialize_all_variables()) summary_writer = tf.train.SummaryWriter(FLAGS.logs_dir, sess.graph) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print "Model restored!" if FLAGS.mode == "train": for step in xrange(MAX_ITERATIONS): offset = (step * BATCH_SIZE) % (train_labels.shape[0] - BATCH_SIZE) batch_data = train_dataset[offset:(offset + BATCH_SIZE), :] batch_labels = train_labels[offset:(offset + BATCH_SIZE), :] feed_dict = {dataset: batch_data, labels: batch_labels} if step % 100 == 0: l, summary_str = sess.run([loss_val, summary_op], feed_dict=feed_dict) print "Step: %d Mini batch loss: %g"%(step, l) summary_writer.add_summary(summary_str, step) if step % 1000 == 0: valid_loss = sess.run(loss_val, feed_dict={dataset:valid_dataset, labels:valid_labels}) print "-- Validation loss %g" % valid_loss saver.save(sess, FLAGS.logs_dir +"model.ckpt", global_step=step) sess.run(train_op, feed_dict=feed_dict) test_loss = sess.run(loss_val, feed_dict={dataset:test_dataset, labels:test_labels}) print "Test loss: %g" % test_loss
def read_input(model_params): if FLAGS.mode == "test": content_image = get_image(FLAGS.test_image_path) print content_image.shape processed_content = utils.process_image(content_image, model_params["mean_pixel"]).astype(np.float32) / 255.0 return processed_content, None else: data_directory = os.path.join(FLAGS.data_dir, 'cifar-10-batches-bin') filenames = [os.path.join(data_directory, 'data_batch_%d.bin' % i) for i in xrange(1, 6)] for f in filenames: if not tf.gfile.Exists(f): raise ValueError('Failed to find file: ' + f) filename_queue = tf.train.string_input_producer(filenames) print "Reading cifar10 data" read_input = read_cifar10(model_params, filename_queue) num_preprocess_threads = 8 min_queue_examples = int(0.4 * NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN) print "Shuffling train batch" input_images, input_content_features = tf.train.shuffle_batch([read_input.image, read_input.content_features], batch_size=FLAGS.batch_size, num_threads=num_preprocess_threads, capacity=min_queue_examples + 3 * FLAGS.batch_size, min_after_dequeue=min_queue_examples) return input_images, input_content_features
def read_cifar10(model_params, filename_queue): class CIFAR10Record(object): pass result = CIFAR10Record() label_bytes = 1 # 2 for CIFAR-100 result.height = IMAGE_SIZE result.width = IMAGE_SIZE result.depth = 3 image_bytes = result.height * result.width * result.depth record_bytes = label_bytes + image_bytes reader = tf.FixedLengthRecordReader(record_bytes=record_bytes) result.key, value = reader.read(filename_queue) record_bytes = tf.decode_raw(value, tf.uint8) depth_major = tf.cast(tf.reshape(tf.slice(record_bytes, [label_bytes], [image_bytes]), [result.depth, result.height, result.width]), tf.float32) result.image = utils.process_image(tf.transpose(depth_major, [1, 2, 0]), model_params['mean_pixel']) / 255.0 extended_image = 255 * tf.reshape(result.image, (1, result.height, result.width, result.depth)) result.net = vgg_net(model_params["weights"], extended_image) content_feature = result.net[CONTENT_LAYER] result.content_features = content_feature return result
def inference(data, keep_prob): with tf.variable_scope("inference") as scope: weight_variable_size = IMAGE_SIZE * IMAGE_SIZE * 50 + 50 * 50 * 3 + 50 * 10 bias_variable_size = 4 * 50 + 10 print (weight_variable_size + bias_variable_size) variable = utils.weight_variable([weight_variable_size + bias_variable_size], name="variables") weight_variable = tf.slice(variable, [0], [weight_variable_size], name="weights") bias_variable = tf.slice(variable, [weight_variable_size], [bias_variable_size], name="biases") weight_offset = 0 bias_offset = 0 W_1 = tf.slice(weight_variable, [weight_offset], [IMAGE_SIZE * IMAGE_SIZE * 50], name="W_1") b_1 = tf.slice(bias_variable, [bias_offset], [50], name="b_1") h_1_relu = tf.nn.relu(tf.matmul(data, tf.reshape(W_1, [IMAGE_SIZE * IMAGE_SIZE, 50])) + b_1, name='h_1') h_1 = tf.nn.dropout(h_1_relu, keep_prob) utils.add_activation_summary(h_1) weight_offset += IMAGE_SIZE * IMAGE_SIZE * 50 bias_offset += 50 W_2 = tf.slice(weight_variable, [weight_offset], [50 * 50], name="W_2") b_2 = tf.slice(bias_variable, [bias_offset], [50], name="b_2") h_2_relu = tf.nn.relu(tf.matmul(h_1, tf.reshape(W_2, [50, 50])) + b_2, name='h_2') h_2 = tf.nn.dropout(h_2_relu, keep_prob) utils.add_activation_summary(h_2) weight_offset += 50 * 50 bias_offset += 50 W_3 = tf.slice(weight_variable, [weight_offset], [50 * 50], name="W_3") b_3 = tf.slice(bias_variable, [bias_offset], [50], name="b_3") h_3_relu = tf.nn.relu(tf.matmul(h_2, tf.reshape(W_3, [50, 50])) + b_3, name='h_3') h_3 = tf.nn.dropout(h_3_relu, keep_prob) utils.add_activation_summary(h_3) weight_offset += 50 * 50 bias_offset += 50 W_4 = tf.slice(weight_variable, [weight_offset], [50 * 50], name="W_4") b_4 = tf.slice(bias_variable, [bias_offset], [50], name="b_4") h_4_relu = tf.nn.relu(tf.matmul(h_3, tf.reshape(W_4, [50, 50])) + b_4, name='h_4') h_4 = tf.nn.dropout(h_4_relu, keep_prob) utils.add_activation_summary(h_4) weight_offset += 50 * 50 bias_offset += 50 W_final = tf.slice(weight_variable, [weight_offset], [50 * 10], name="W_final") b_final = tf.slice(bias_variable, [bias_offset], [10], name="b_final") pred = tf.nn.softmax(tf.matmul(h_4, tf.reshape(W_final, [50, 10])) + b_final, name='h_final') # utils.add_activation_summary(pred) return pred
def deepdream_image(model_params, image, octave_scale=1.4, no_of_octave=4): filename = "%s_deepdream_%s.jpg" % (os.path.splitext((FLAGS.image_path.split("/")[-1]))[0], DREAM_LAYER) processed_image = utils.process_image(image, model_params["mean_pixel"]).astype(np.float32) input_image = tf.placeholder(tf.float32) dream_net = vgg_net(model_params["weights"], input_image) def calc_grad_tiled(img, gradient, tile_size=512): sz = tile_size h, w = img.shape[1:3] sx, sy = np.random.randint(sz, size=2) img_shift = np.roll(np.roll(img, sx, 2), sy, 1) gradient_val = np.zeros_like(img) for y in xrange(0, max(h - sz // 2, sz), sz): for x in xrange(0, max(w - sz // 2, sz), sz): sub_img = img_shift[:, y:y + sz, x:x + sz] # print sub_img.shape g = sess.run(gradient, {input_image: sub_img}) gradient_val[:, y:y + sz, x:x + sz] = g return np.roll(np.roll(gradient_val, -sx, 2), -sy, 1) step = LEARNING_RATE feature = DREAM_FEATURE with tf.Session() as sess: dream_layer_features = dream_net[DREAM_LAYER][:, :, :, feature] feature_score = tf.reduce_mean(dream_layer_features) grad_op = tf.gradients(feature_score, input_image)[0] dummy_image = processed_image.copy()+100.0 for itr in xrange(5): octaves = [] for i in xrange(no_of_octave - 1): hw = dummy_image.shape[1:3] lo = resize_image(dummy_image, np.int32(np.float32(hw) / octave_scale)) hi = dummy_image - resize_image(dummy_image, hw) dummy_image = lo octaves.append(hi) for octave in xrange(no_of_octave): if octave > 0: hi = octaves[-octave] dummy_image = resize_image(dummy_image, hi.shape[1:3]) + hi for i in xrange(MAX_ITERATIONS): grad = calc_grad_tiled(dummy_image, grad_op) dummy_image += grad * (step / (np.abs(grad).mean() + 1e-8)) print '.', print "." # step /= 2.0 # halfing step size every itr feature += 15 temp_file = "%d_%s" % (itr, filename) # print dummy_image.shape output = dummy_image.reshape(processed_image.shape[1:]) - 100.0 save_image(os.path.join(FLAGS.logs_dir, "checkpoints", temp_file), output, model_params["mean_pixel"])
def read_dataset(data_dir): pickle_filename = "MITSceneParsing.pickle" pickle_filepath = os.path.join(data_dir, pickle_filename) if not os.path.exists(pickle_filepath): utils.maybe_download_and_extract(data_dir, DATA_URL, is_zipfile=True) SceneParsing_folder = os.path.splitext(DATA_URL.split("/")[-1])[0] result = create_image_lists(os.path.join(data_dir, SceneParsing_folder)) print ("Pickling ...") with open(pickle_filepath, 'wb') as f: pickle.dump(result, f, pickle.HIGHEST_PROTOCOL) else: print ("Found pickle file!") with open(pickle_filepath, 'rb') as f: result = pickle.load(f) training_records = result['training'] validation_records = result['validation'] del result return training_records, validation_records
def main(argv=None): utils.maybe_download_and_extract(FLAGS.data_dir, DATA_URL, is_tarfile=True) print "Setting up model..." global_step = tf.Variable(0, trainable=False) gray, color = inputs() pred = 255 * inference(gray) + 128 tf.image_summary("Gray", gray, max_images=1) tf.image_summary("Ground_truth", color, max_images=1) tf.image_summary("Prediction", pred, max_images=1) image_loss = loss(pred, color) train_op = train(image_loss, global_step) summary_op = tf.merge_all_summaries() with tf.Session() as sess: print "Setting up summary writer, queue, saver..." sess.run(tf.initialize_all_variables()) summary_writer = tf.train.SummaryWriter(FLAGS.logs_dir, sess.graph) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) if ckpt and ckpt.model_checkpoint_path: print "Restoring model from checkpoint..." saver.restore(sess, ckpt.model_checkpoint_path) tf.train.start_queue_runners(sess) for step in xrange(MAX_ITERATIONS): if step % 400 == 0: loss_val, summary_str = sess.run([image_loss, summary_op]) print "Step %d, Loss: %g" % (step, loss_val) summary_writer.add_summary(summary_str, global_step=step) if step % 1000 == 0: saver.save(sess, FLAGS.logs_dir + "model.ckpt", global_step=step) print "%s" % datetime.now() sess.run(train_op)
def decoder_fc(z): with tf.variable_scope("decoder") as scope: Wd_fc1 = utils.weight_variable([FLAGS.z_dim, 50], name="Wd_fc1") bd_fc1 = utils.bias_variable([50], name="bd_fc1") hd_relu1 = activation_function(tf.matmul(z, Wd_fc1) + bd_fc1, name="hdfc_1") Wd_fc2 = utils.weight_variable([50, 50], name="Wd_fc2") bd_fc2 = utils.bias_variable([50], name="bd_fc2") hd_relu2 = activation_function(tf.matmul(hd_relu1, Wd_fc2) + bd_fc2, name="hdfc_2") Wd_fc3 = utils.weight_variable([50, IMAGE_SIZE * IMAGE_SIZE], name="Wd_fc3") bd_fc3 = utils.bias_variable([IMAGE_SIZE * IMAGE_SIZE], name="bd_fc3") pred_image = tf.matmul(hd_relu2, Wd_fc3) + bd_fc3 return pred_image
def main(argv=None): utils.maybe_download_and_extract(FLAGS.model_dir, DATA_URL) model_data = get_model_data() invert_image = get_image(FLAGS.image_path) print invert_image.shape mean = model_data['normalization'][0][0][0] mean_pixel = np.mean(mean, axis=(0, 1)) processed_image = utils.process_image(invert_image, mean_pixel).astype(np.float32) weights = np.squeeze(model_data['layers']) invert_net = vgg_net(weights, processed_image) dummy_image = utils.weight_variable(invert_image.shape, stddev=np.std(invert_image) * 0.1) tf.histogram_summary("Image Output", dummy_image) image_net = vgg_net(weights, dummy_image) with tf.Session() as sess: invert_layer_features = invert_net[INVERT_LAYER].eval() loss = 2 * tf.nn.l2_loss(image_net[INVERT_LAYER] - invert_layer_features) / invert_layer_features.size tf.scalar_summary("Loss", loss) summary_op = tf.merge_all_summaries() train_op = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss) best_loss = float('inf') best = None summary_writer = tf.train.SummaryWriter(FLAGS.log_dir) sess.run(tf.initialize_all_variables()) for i in range(1, MAX_ITERATIONS): train_op.run() if i % 10 == 0 or i == MAX_ITERATIONS - 1: this_loss = loss.eval() print('Step %d' % (i)), print(' total loss: %g' % this_loss) summary_writer.add_summary(summary_op.eval(), global_step=i) if this_loss < best_loss: best_loss = this_loss best = dummy_image.eval() output = utils.unprocess_image(best.reshape(invert_image.shape[1:]), mean_pixel) scipy.misc.imsave("invert_check.png", output) output = utils.unprocess_image(best.reshape(invert_image.shape[1:]), mean_pixel) scipy.misc.imsave("output.png", output)
def vgg_net(weights, image): layers = ( 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'conv3_4', 'relu3_4', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'conv4_4', 'relu4_4', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'conv5_4', 'relu5_4' ) net = {} current = image for i, name in enumerate(layers): if name in ['conv3_4', 'relu3_4', 'conv4_4', 'relu4_4', 'conv5_4', 'relu5_4']: continue kind = name[:4] if kind == 'conv': kernels, bias = weights[i][0][0][0][0] # matconvnet: weights are [width, height, in_channels, out_channels] # tensorflow: weights are [height, width, in_channels, out_channels] kernels = utils.get_variable(np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w") bias = utils.get_variable(bias.reshape(-1), name=name + "_b") current = utils.conv2d_basic(current, kernels, bias) elif kind == 'relu': current = tf.nn.relu(current, name=name) if FLAGS.debug: utils.add_activation_summary(current) elif kind == 'pool': current = utils.avg_pool_2x2(current) net[name] = current return net
def inference(data): with tf.variable_scope("inference") as scope: W_1 = utils.weight_variable([IMAGE_SIZE * IMAGE_SIZE * 50], name="W_1") b_1 = utils.bias_variable([50], name="b_1") h_1 = tf.nn.relu( tf.matmul(data, tf.reshape(W_1, [IMAGE_SIZE * IMAGE_SIZE, 50])) + b_1, name='h_1') utils.add_activation_summary(h_1) W_2 = utils.weight_variable([50 * 50], name="W_2") b_2 = utils.bias_variable([50], name="b_2") h_2 = tf.nn.relu(tf.matmul(h_1, tf.reshape(W_2, [50, 50])) + b_2, name='h_2') utils.add_activation_summary(h_2) W_3 = utils.weight_variable([50 * 50], name="W_3") b_3 = utils.bias_variable([50], name="b_3") h_3 = tf.nn.relu(tf.matmul(h_2, tf.reshape(W_3, [50, 50])) + b_3, name='h_3') utils.add_activation_summary(h_3) W_4 = utils.weight_variable([50 * 50], name="W_4") b_4 = utils.bias_variable([50], name="b_4") h_4 = tf.nn.relu(tf.matmul(h_3, tf.reshape(W_4, [50, 50])) + b_4, name='h_4') utils.add_activation_summary(h_4) W_final = utils.weight_variable([50 * 10], name="W_final") b_final = utils.bias_variable([10], name="b_final") pred = tf.nn.softmax(tf.matmul(h_4, tf.reshape(W_final, [50, 10])) + b_final, name='h_final') # utils.add_activation_summary(pred) return pred
def main(argv=None): input_data = tf.placeholder(tf.float32, [None, 784]) truth_labels = tf.placeholder(tf.float32, [None, 10]) dataset = mnist.input_data.read_data_sets(FLAGS.data_dir, one_hot=True) pred_labels = inference(input_data) entropy = -tf.reduce_sum(truth_labels * tf.log(pred_labels)) tf.scalar_summary('Cross_entropy', entropy) train_vars = tf.trainable_variables() for v in train_vars: utils.add_to_regularization_and_summary(v) train_op, hess = train(entropy, train_vars) accuracy = tf.reduce_mean( tf.cast( tf.equal(tf.argmax(pred_labels, 1), tf.argmax(truth_labels, 1)), tf.float32)) # Session start sess = tf.InteractiveSession() sess.run(tf.initialize_all_variables()) saver = tf.train.Saver() summary_writer = tf.train.SummaryWriter(FLAGS.logs_dir, sess.graph) summary_op = tf.merge_all_summaries() def test(): test_accuracy = accuracy.eval(feed_dict={ input_data: dataset.test.images, truth_labels: dataset.test.labels }) return test_accuracy ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") else: for i in xrange(MAX_ITERATIONS): batch = dataset.train.next_batch(FLAGS.batch_size) feed_dict = {input_data: batch[0], truth_labels: batch[1]} train_op.run(feed_dict=feed_dict) if i % 10 == 0: summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, i) if i % 100 == 0: train_accuracy = accuracy.eval(feed_dict=feed_dict) print("step: %d, training accuracy: %g" % (i, train_accuracy)) if i % 5000 == 0: saver.save(sess, FLAGS.logs_dir + 'model.ckpt', i) train_vars_copy = sess.run([tf.identity(var) for var in train_vars]) print('Variables Perecent: %d, Test accuracy: %g' % (100, test())) k = tf.placeholder(tf.int32) def scatter_update(saliency, variables): shape = utils.get_tensor_size(variables) # print(utils.get_tensor_size(saliency)) # print(shape) values, indices = tf.nn.top_k(-1 * saliency, tf.cast(k * shape / 100, tf.int32)) return tf.scatter_update(variables, indices, tf.zeros_like(values)) def scatter_restore(saliency, variables1, variables2): shape = utils.get_tensor_size(variables2) values, indices = tf.nn.top_k(-1 * saliency, tf.cast(k * shape / 100, tf.int32)) values = tf.gather(variables1, indices) return tf.scatter_update(variables2, indices, values) scatter_update_op = [ scatter_update(sal, var) for sal, var in zip(hess, train_vars) ] scatter_restore_op = [ scatter_restore(sal, var1, var2) for sal, var1, var2 in zip(hess, train_vars_copy, train_vars) ] for count in range(1, 20): batch = dataset.train.next_batch(FLAGS.batch_size) feed_dict = {input_data: batch[0], truth_labels: batch[1], k: count} sess.run(scatter_update_op, feed_dict=feed_dict) print('Variables Perecent: %d, Test accuracy: %g' % ((100 - count), test())) sess.run(scatter_restore_op, feed_dict=feed_dict)
def main(argv=None): keep_probability = tf.placeholder(tf.float32, name="keep_probabilty") image = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3], name="input_image") annotation = tf.placeholder(tf.int32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name="annotation") pred_annotation, logits = inference(image, keep_probability) tf.summary.image("input_image", image, max_outputs=2) tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2) tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2) loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=tf.squeeze(annotation, squeeze_dims=[3]), name="entropy"))) loss_summary = tf.summary.scalar("entropy", loss) trainable_var = tf.trainable_variables() if FLAGS.debug: for var in trainable_var: utils.add_to_regularization_and_summary(var) train_op = train(loss, trainable_var) print("Setting up summary op...") summary_op = tf.summary.merge_all() print("Setting up image reader...") train_records, valid_records = scene_parsing.read_dataset(FLAGS.data_dir) print(len(train_records)) print(len(valid_records)) print("Setting up dataset reader") image_options = {'resize': True, 'resize_size': IMAGE_SIZE} if FLAGS.mode == 'train': train_dataset_reader = dataset.BatchDatset(train_records, image_options) validation_dataset_reader = dataset.BatchDatset(valid_records, image_options) sess = tf.Session() print("Setting up Saver...") saver = tf.train.Saver() # create two summary writers to show training loss and validation loss in the same graph # need to create two folders 'train' and 'validation' inside FLAGS.logs_dir train_writer = tf.summary.FileWriter(FLAGS.logs_dir + '/train', sess.graph) validation_writer = tf.summary.FileWriter(FLAGS.logs_dir + '/validation') sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") if FLAGS.mode == "train": for itr in xrange(MAX_ITERATION): train_images, train_annotations = train_dataset_reader.next_batch(FLAGS.batch_size) feed_dict = {image: train_images, annotation: train_annotations, keep_probability: 0.85} sess.run(train_op, feed_dict=feed_dict) if itr % 10 == 0: train_loss, summary_str = sess.run([loss, loss_summary], feed_dict=feed_dict) print("Step: %d, Train_loss:%g" % (itr, train_loss)) train_writer.add_summary(summary_str, itr) if itr % 500 == 0: valid_images, valid_annotations = validation_dataset_reader.next_batch(FLAGS.batch_size) valid_loss, summary_sva = sess.run([loss, loss_summary], feed_dict={image: valid_images, annotation: valid_annotations, keep_probability: 1.0}) print("%s ---> Validation_loss: %g" % (datetime.datetime.now(), valid_loss)) # add validation loss to TensorBoard validation_writer.add_summary(summary_sva, itr) saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr) elif FLAGS.mode == "visualize": valid_images, valid_annotations = validation_dataset_reader.get_random_batch(FLAGS.batch_size) pred = sess.run(pred_annotation, feed_dict={image: valid_images, annotation: valid_annotations, keep_probability: 1.0}) valid_annotations = np.squeeze(valid_annotations, axis=3) pred = np.squeeze(pred, axis=3) for itr in range(FLAGS.batch_size): utils.save_image(valid_images[itr].astype(np.uint8), FLAGS.logs_dir, name="inp_" + str(5+itr)) utils.save_image(valid_annotations[itr].astype(np.uint8), FLAGS.logs_dir, name="gt_" + str(5+itr)) utils.save_image(pred[itr].astype(np.uint8), FLAGS.logs_dir, name="pred_" + str(5+itr)) print("Saved image: %d" % itr)
def encoder_fc(images): with tf.variable_scope("encoder") as scope: W_fc1 = utils.weight_variable([IMAGE_SIZE * IMAGE_SIZE, 50], name="W_fc1") b_fc1 = utils.bias_variable([50], name="b_fc1") h_relu1 = activation_function(tf.matmul(images, W_fc1) + b_fc1, name="hfc_1") W_fc2 = utils.weight_variable([50, 50], name="W_fc2") b_fc2 = utils.bias_variable([50], name="b_fc2") h_relu2 = activation_function(tf.matmul(h_relu1, W_fc2) + b_fc2, name="hfc_2") W_fc3 = utils.weight_variable([50, FLAGS.z_dim], name="W_fc3") b_fc3 = utils.bias_variable([FLAGS.z_dim], name="b_fc3") mu = tf.add(tf.matmul(h_relu2, W_fc3), b_fc3, name="mu") utils.add_activation_summary(mu) W_fc4 = utils.weight_variable([50, FLAGS.z_dim], name="W_fc4") b_fc4 = utils.bias_variable([FLAGS.z_dim], name="b_fc4") log_var = tf.add(tf.matmul(h_relu2, W_fc4), b_fc4, name="log_var") utils.add_activation_summary(log_var) return mu, log_var
def main(argv=None): data = mnist.input_data.read_data_sets("MNIST_data", one_hot=False) images = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE * IMAGE_SIZE], name="input_image") tf.image_summary("Input", tf.reshape(images, [-1, IMAGE_SIZE, IMAGE_SIZE, 1]), max_images=2) mu, log_var = encoder_fc(images) epsilon = tf.random_normal(tf.shape(mu), name="epsilon") z = mu + tf.mul(tf.exp(log_var * 0.5), epsilon) pred_image = decoder_fc(z) entropy_loss = tf.reduce_sum( tf.nn.sigmoid_cross_entropy_with_logits(pred_image, images, name="entropy_loss"), reduction_indices=1) tf.histogram_summary("Entropy_loss", entropy_loss) pred_image_sigmoid = tf.nn.sigmoid(pred_image) tf.image_summary("Output", tf.reshape(pred_image_sigmoid, [-1, IMAGE_SIZE, IMAGE_SIZE, 1]), max_images=2) KL_loss = -0.5 * tf.reduce_sum(1 + log_var - tf.pow(mu, 2) - tf.exp(log_var), reduction_indices=1) tf.histogram_summary("KL_Divergence", KL_loss) train_variables = tf.trainable_variables() for v in train_variables: utils.add_to_regularization_and_summary(var=v) reg_loss = tf.add_n(tf.get_collection("reg_loss")) tf.scalar_summary("Reg_loss", reg_loss) total_loss = tf.reduce_mean(KL_loss + entropy_loss) + FLAGS.regularization * reg_loss tf.scalar_summary("total_loss", total_loss) train_op = train(total_loss, train_variables) sess = tf.Session() summary_op = tf.merge_all_summaries() saver = tf.train.Saver() summary_writer = tf.train.SummaryWriter(FLAGS.logs_dir, sess.graph) sess.run(tf.initialize_all_variables()) ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print ("Model restored...") for itr in xrange(MAX_ITERATIONS): batch_images, batch_labels = data.train.next_batch(FLAGS.batch_size) sess.run(train_op, feed_dict={images: batch_images}) if itr % 500 == 0: entr_loss, KL_div, tot_loss, summary_str = sess.run([entropy_loss, KL_loss, total_loss, summary_op], feed_dict={images: batch_images}) print ( "Step: %d, Entropy loss: %g, KL Divergence: %g, Total loss: %g" % (itr, np.mean(entr_loss), np.mean(KL_div), tot_loss)) summary_writer.add_summary(summary_str, itr) if itr % 1000 == 0: saver.save(sess, FLAGS.logs_dir + "model.ckpt", global_step=itr) def test(): z_vec = sess.run(z, feed_dict={images: data.test.images}) write_array = np.hstack((z_vec, np.reshape(data.test.labels, (-1, 1)))) df = pd.DataFrame(write_array) df.to_csv("z_vae_output.csv", header=False, index=False) test()
def inferece(dataset, prob): with tf.name_scope("conv1") as scope: W_conv1 = utils.weight_variable([5, 5, 1, 32]) b_conv1 = utils.bias_variable([32]) tf.histogram_summary("W_conv1", W_conv1) tf.histogram_summary("b_conv1", b_conv1) h_conv1 = utils.conv2d_basic(dataset, W_conv1, b_conv1) h_1 = tf.nn.relu(h_conv1) h_pool1 = utils.max_pool_2x2(h_1) add_to_regularization_loss(W_conv1, b_conv1) with tf.name_scope("conv2") as scope: W_conv2 = utils.weight_variable([3, 3, 32, 64]) b_conv2 = utils.bias_variable([64]) tf.histogram_summary("W_conv2", W_conv2) tf.histogram_summary("b_conv2", b_conv2) h_conv2 = utils.conv2d_basic(h_pool1, W_conv2, b_conv2) h_2 = tf.nn.relu(h_conv2) h_pool2 = utils.max_pool_2x2(h_2) add_to_regularization_loss(W_conv2, b_conv2) with tf.name_scope("fc_1") as scope: image_size = IMAGE_SIZE / 4 h_flat = tf.reshape(h_pool2, [-1, image_size * image_size * 64]) W_fc1 = utils.weight_variable([image_size * image_size * 64, 256]) b_fc1 = utils.bias_variable([256]) tf.histogram_summary("W_fc1", W_fc1) tf.histogram_summary("b_fc1", b_fc1) h_fc1 = tf.nn.relu(tf.matmul(h_flat, W_fc1) + b_fc1) h_fc1_dropout = tf.nn.dropout(h_fc1, prob) with tf.name_scope("fc_2") as scope: W_fc2 = utils.weight_variable([256, NUM_LABELS]) b_fc2 = utils.bias_variable([NUM_LABELS]) tf.histogram_summary("W_fc2", W_fc2) tf.histogram_summary("b_fc2", b_fc2) pred = tf.matmul(h_fc1, W_fc2) + b_fc2 return pred
def main(argv=None): MSE = [] ITR = [] ERROR = {} print("Setting up network...") train_phase = tf.placeholder(tf.bool, name="train_phase") images = tf.placeholder(tf.float32, shape=[None, None, None, 1], name='L_image') lab_images = tf.placeholder(tf.float32, shape=[None, None, None, 3], name="LAB_image") pred_image = generator(images, train_phase) gen_loss_mse = tf.reduce_mean( 2 * tf.nn.l2_loss(pred_image - lab_images)) / (IMAGE_SIZE * IMAGE_SIZE * 100 * 100) tf.summary.scalar("Generator_loss_MSE", gen_loss_mse) train_variables = tf.trainable_variables() for v in train_variables: utils.add_to_regularization_and_summary(var=v) train_op = train(gen_loss_mse, train_variables) print("Reading image dataset...") # train_images, testing_images, validation_images = flowers.read_dataset(FLAGS.data_dir) # train_images = lamem.read_dataset(FLAGS.data_dir) file_list = [] file_glob = os.path.join('/home/shikhar/Desktop/full run/lamem', "images", '*.' + 'jpg') file_list.extend(glob.glob(file_glob)) train_images = file_list image_options = {"resize": True, "resize_size": IMAGE_SIZE, "color": "LAB"} batch_reader = dataset.BatchDatset(train_images, image_options) print("Setting up session") sess = tf.Session() summary_op = tf.summary.merge_all() saver = tf.train.Saver() summary_writer = tf.summary.FileWriter(FLAGS.logs_dir, sess.graph) sess.run(tf.initialize_all_variables()) ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") if FLAGS.mode == 'train': for itr in xrange(MAX_ITERATION): l_image, color_images = batch_reader.next_batch(FLAGS.batch_size) feed_dict = { images: l_image, lab_images: color_images, train_phase: True } if itr % 10 == 0: mse, summary_str = sess.run([gen_loss_mse, summary_op], feed_dict=feed_dict) summary_writer.add_summary(summary_str, itr) MSE.append(mse) ITR.append(itr // 10) print("Step: %d, MSE: %g" % (itr, mse)) if itr % 100 == 0: saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr) pred = sess.run(pred_image, feed_dict=feed_dict) idx = np.random.randint(0, FLAGS.batch_size) save_dir = os.path.join(FLAGS.logs_dir, "image_checkpoints") print("shape of color images ", len(color_images)) utils.save_image(color_images[idx], save_dir, "gt" + str(itr // 100)) utils.save_image(pred[idx].astype(np.float64), save_dir, "pred" + str(itr // 100)) print("%s --> Model saved" % datetime.datetime.now()) sess.run(train_op, feed_dict=feed_dict) if itr % 10000 == 0: FLAGS.learning_rate /= 2 ERROR["mse"] = MSE ERROR["itr"] = ITR #with open(pickle_filepath, 'wb') as f: #pickle.dump(ERROR, f, pickle.HIGHEST_PROTOCOL) elif FLAGS.mode == "test": count = 10 l_image, color_images = batch_reader.get_random_batch(count) feed_dict = { images: l_image, lab_images: color_images, train_phase: False } save_dir = os.path.join(FLAGS.logs_dir, "image_pred") pred = sess.run(pred_image, feed_dict=feed_dict) for itr in range(count): utils.save_image(color_images[itr], save_dir, "gt" + str(itr)) utils.save_image(pred[itr].astype(np.float64), save_dir, "pred" + str(itr)) print("--- Images saved on test run ---")
def main(argv=None): keep_probability = tf.placeholder(tf.float32, name="keep_probabilty") image = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3], name="input_image") annotation = tf.placeholder(tf.int32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name="annotation") pred_annotation, logits = inference(image, keep_probability) tf.summary.image("input_image", image, max_outputs=2) tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2) tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2) loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=tf.squeeze(annotation, squeeze_dims=[3]), name="entropy"))) loss_summary = tf.summary.scalar("entropy", loss) trainable_var = tf.trainable_variables() if FLAGS.debug: for var in trainable_var: utils.add_to_regularization_and_summary(var) train_op = train(loss, trainable_var) print("Setting up summary op...") summary_op = tf.summary.merge_all() print("Setting up image reader...") train_records, valid_records = scene_parsing.read_dataset(FLAGS.data_dir) print(len(train_records)) print(len(valid_records)) print("Setting up dataset reader") image_options = {'resize': True, 'resize_size': IMAGE_SIZE} if FLAGS.mode == 'train': train_dataset_reader = dataset.BatchDatset(train_records, image_options) validation_dataset_reader = dataset.BatchDatset(valid_records, image_options) sess = tf.Session() print("Setting up Saver...") saver = tf.train.Saver() # create two summary writers to show training loss and validation loss in the same graph # need to create two folders 'train' and 'validation' inside FLAGS.logs_dir train_writer = tf.summary.FileWriter(FLAGS.logs_dir + '/train', sess.graph) validation_writer = tf.summary.FileWriter(FLAGS.logs_dir + '/validation') sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") if FLAGS.mode == "train": for itr in xrange(MAX_ITERATION): train_images, train_annotations = train_dataset_reader.next_batch( FLAGS.batch_size) feed_dict = { image: train_images, annotation: train_annotations, keep_probability: 0.85 } sess.run(train_op, feed_dict=feed_dict) if itr % 10 == 0: train_loss, summary_str = sess.run([loss, loss_summary], feed_dict=feed_dict) print("Step: %d, Train_loss:%g" % (itr, train_loss)) train_writer.add_summary(summary_str, itr) if itr % 500 == 0: valid_images, valid_annotations = validation_dataset_reader.next_batch( FLAGS.batch_size) valid_loss, summary_sva = sess.run( [loss, loss_summary], feed_dict={ image: valid_images, annotation: valid_annotations, keep_probability: 1.0 }) print("%s ---> Validation_loss: %g" % (datetime.datetime.now(), valid_loss)) # add validation loss to TensorBoard validation_writer.add_summary(summary_sva, itr) saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr) elif FLAGS.mode == "visualize": valid_images, valid_annotations = validation_dataset_reader.get_random_batch( FLAGS.batch_size) pred = sess.run(pred_annotation, feed_dict={ image: valid_images, annotation: valid_annotations, keep_probability: 1.0 }) valid_annotations = np.squeeze(valid_annotations, axis=3) pred = np.squeeze(pred, axis=3) for itr in range(FLAGS.batch_size): utils.save_image(valid_images[itr].astype(np.uint8), FLAGS.logs_dir, name="inp_" + str(5 + itr)) utils.save_image(valid_annotations[itr].astype(np.uint8), FLAGS.logs_dir, name="gt_" + str(5 + itr)) utils.save_image(pred[itr].astype(np.uint8), FLAGS.logs_dir, name="pred_" + str(5 + itr)) print("Saved image: %d" % itr)
def main(argv=None): keep_probability = tf.placeholder(tf.float32, name="keep_probabilty") image = tf.placeholder(tf.float32, shape=[None, IMAGE_HEIGHT, IMAGE_WIDTH, 3], name="input_image") annotation = tf.placeholder(tf.int32,shape=[None, IMAGE_HEIGHT, IMAGE_WIDTH, 1], name="annotation") pred_annotation, pred_annotation_no0, logits = inference(image, keep_probability) tf.summary.image("input_image", image, max_outputs=2) tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2) tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2) labels = tf.squeeze(annotation, squeeze_dims=[3]) logits = FixLogitsWithIgnoreClass(logits, labels) # Calculate loss class_weights = tf.constant([0.1, 1., 1., 0.1, 20., 0.1, 8., 8., 0.1]) onehot_labels = tf.one_hot(labels, depth=9) weights = tf.reduce_sum(class_weights * onehot_labels, axis=3) unweighted_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels, name="entropy") weighted_loss = unweighted_loss * weights loss = tf.reduce_mean(weighted_loss) # loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels, name="entropy"))) tf.summary.scalar("entropy", loss) trainable_var = tf.trainable_variables() if FLAGS.debug: for var in trainable_var: utils.add_to_regularization_and_summary(var) train_op = train(loss, trainable_var) print("Setting up summary op...") summary_op = tf.summary.merge_all() print("Setting up image reader...") train_records, valid_records, test_records = scene_parsing.read_dataset(FLAGS.data_dir) print(len(train_records)) print(len(valid_records)) print(len(test_records)) print("Setting up dataset reader") image_options = {'resize': False, 'resize_size': IMAGE_SIZE} if FLAGS.mode == 'train': train_dataset_reader = dataset.BatchDatset(train_records, image_options) validation_dataset_reader = dataset.BatchDatset(valid_records, image_options) if FLAGS.mode == 'test': test_dataset_reader = dataset.BatchDatset(test_records, image_options) sess = tf.Session() print("Setting up Saver...") saver = tf.train.Saver(max_to_keep = 20) summary_writer = tf.summary.FileWriter(FLAGS.logs_dir, sess.graph) sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") if FLAGS.mode == "train": for itr in xrange(MAX_ITERATION): train_images, train_annotations = train_dataset_reader.next_batch(FLAGS.batch_size) feed_dict = {image: train_images, annotation: train_annotations, keep_probability: 0.85} sess.run(train_op, feed_dict=feed_dict) # Debug # output = sess.run(logits, feed_dict=feed_dict) # print("logits shape : ", output.shape) # print("logits : ", logits[0][0][0]) # output = sess.run(weights, feed_dict=feed_dict) # print("weight shape : ", output.shape) # print("weight : ", output[0]) # output = sess.run(weighted_loss, feed_dict=feed_dict) # print("weighted_loss shape: ", output.shape) # print("weighted_loss : ", output) # output = sess.run(loss, feed_dict=feed_dict) # print("loss shape: ", output.shape) # print("loss : ", output) # Debug if itr % 100 == 0: train_loss, summary_str = sess.run([loss, summary_op], feed_dict=feed_dict) print("Step: %d, Train_loss:%g" % (itr, train_loss)) summary_writer.add_summary(summary_str, itr) if (itr % 5000 == 0): valid_images, valid_annotations = validation_dataset_reader.next_batch(FLAGS.batch_size) valid_loss = sess.run(loss, feed_dict={image: valid_images, annotation: valid_annotations, keep_probability: 1.0}) print("%s ---> Validation_loss: %g" % (datetime.datetime.now(), valid_loss)) saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr) elif FLAGS.mode == "visualize": valid_images, valid_annotations, valid_files = validation_dataset_reader.get_random_batch(FLAGS.batch_size) pred = sess.run(pred_annotation, feed_dict={image: valid_images, annotation: valid_annotations, keep_probability: 1.0}) valid_annotations = np.squeeze(valid_annotations, axis=3) pred = np.squeeze(pred, axis=3) for itr in range(FLAGS.batch_size): utils.save_image(valid_images[itr].astype(np.uint8), FLAGS.vis_dir, name="inp_" + valid_files[itr]) utils.save_image(valid_annotations[itr].astype(np.uint8), FLAGS.vis_dir, name="gt_" + valid_files[itr]) utils.save_image(pred[itr].astype(np.uint8), FLAGS.vis_dir, name="pred_" + valid_files[itr]) print("Saved image: %d" % itr) elif FLAGS.mode == "test": # videoWriter = cv2.VideoWriter('test.avi', cv2.cv.CV_FOURCC('M', 'J', 'P', 'G'), 5, (IMAGE_WIDTH, IMAGE_HEIGHT), False) for itr in range(len(test_records)): # for itr in range(len(train_records)): test_images, test_annotations = test_dataset_reader.next_batch(1) pred = sess.run(pred_annotation_no0, feed_dict={image: test_images, annotation: test_annotations, keep_probability: 1.0}) test_annotations = np.squeeze(test_annotations, axis=3) pred = np.squeeze(pred, axis=3) print(itr) # videoWriter.write(pred[0].astype(np.uint8)) # utils.save_image(test_images[0].astype(np.uint8), FLAGS.vis_dir, name="inp_" + train_records[itr]['filename']) # utils.save_image(test_annotations[0].astype(np.uint8), FLAGS.vis_dir, name="gt_" + train_records[itr]['filename']) # utils.save_image(pred[0].astype(np.uint8), FLAGS.vis_dir, name="pred_" + train_records[itr]['filename']) utils.save_image(test_images[0].astype(np.uint8), FLAGS.vis_dir, name="inp_" + test_records[itr]['filename']) utils.save_image(test_annotations[0].astype(np.uint8), FLAGS.vis_dir, name="gt_" + test_records[itr]['filename']) utils.save_image(pred[0].astype(np.uint8), FLAGS.vis_dir, name="pred_" + test_records[itr]['filename'])
def main(argv=None): keep_probability = tf.placeholder(tf.float32, name="keep_probabilty") image = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3], name="input_image") annotation = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3], name="annotation") z = tf.placeholder(tf.float32, shape=[None, 4, 4, 10], name="z") # pred_annotation, logits = inference(image, keep_probability,z) # tf.summary.image("input_image", image, max_outputs=2) # tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2) # tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2) # loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, # labels=tf.squeeze(annotation, squeeze_dims=[3]), # name="entropy"))) mask_ = tf.ones([FLAGS.batch_size,64,64,3]) mask = tf.pad(mask_, [[0,0],[32,32],[32,32],[0,0]]) mask2__ = tf.ones([FLAGS.batch_size,78,78,3]) mask2_ = tf.pad(mask2__, [[0,0],[25,25],[25,25],[0,0]]) mask2 = mask2_ - mask pred_annotation, logits = inference((1-mask)*image + mask*255, keep_probability,z) tf.summary.image("input_image", image, max_outputs=2) tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2) tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2) loss1 = 0.1 * tf.reduce_mean(tf.square((image - logits)*mask)) loss2 = tf.reduce_mean(tf.square((image - logits)*mask2)) loss = loss1 + loss2 # loss = tf.reduce_mean(tf.squared_difference(logits ,annotation )) loss_summary = tf.summary.scalar("entropy", loss) grads = train_z(loss,z) trainable_var = tf.trainable_variables() if FLAGS.debug: for var in trainable_var: utils.add_to_regularization_and_summary(var) train_op = train(loss, trainable_var) print("Setting up summary op...") summary_op = tf.summary.merge_all() print("Setting up image reader...") train_records, valid_records = scene_parsing.read_dataset(FLAGS.data_dir) print(len(train_records)) print(len(valid_records)) print("Setting up dataset reader") image_options = {'resize': True, 'resize_size': IMAGE_SIZE} if FLAGS.mode == 'train': train_dataset_reader = dataset.BatchDatset(train_records, image_options) validation_dataset_reader = dataset.BatchDatset(valid_records, image_options) sess = tf.Session() print("Setting up Saver...") saver = tf.train.Saver() # create two summary writers to show training loss and validation loss in the same graph # need to create two folders 'train' and 'validation' inside FLAGS.logs_dir train_writer = tf.summary.FileWriter(FLAGS.logs_dir + '/train', sess.graph) validation_writer = tf.summary.FileWriter(FLAGS.logs_dir + '/validation') sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") if FLAGS.mode == "train": for itr in xrange(MAX_ITERATION): train_images, train_annotations = train_dataset_reader.next_batch(FLAGS.batch_size) z_ = np.random.uniform(low=-1.0, high=1.0, size=(FLAGS.batch_size,4,4,10)) # print(train_images) feed_dict = {image: train_images, annotation: train_annotations, keep_probability: 0.85, z: z_} #train_images[:,50:100,50:100,:] =0 v = 0 for p in range(10): z_ol = np.copy(z_) # print("666666666666666666666666666666666666666") z_loss, summ = sess.run([loss,loss_summary], feed_dict=feed_dict) print("Step: %d, z_step: %d, Train_loss:%g" % (itr,p,z_loss)) # print(z_) g = sess.run([grads],feed_dict=feed_dict) v_prev = np.copy(v) # print(g[0][0].shape) v = 0.001*v - 0.1*g[0][0] z_ += 0.001 * v_prev + (1+0.001)*v # z_ = np.clip(z_, -1.0, 1.0) # print(v.shape) # print(z_.shape) feed_dict = {image: train_images, annotation: train_annotations, keep_probability: 0.85, z: z_} sess.run(train_op, feed_dict=feed_dict) if itr % 10 == 0: train_loss, summary_str = sess.run([loss, loss_summary], feed_dict=feed_dict) print("Step: %d, Train_loss:%g" % (itr, train_loss)) train_writer.add_summary(summary_str, itr) if itr % 500 == 0: valid_images, valid_annotations = validation_dataset_reader.next_batch(FLAGS.batch_size) valid_loss, summary_sva = sess.run([loss, loss_summary], feed_dict={image: valid_images, annotation: valid_annotations, keep_probability: 1.0, z: z_}) print("%s ---> Validation_loss: %g" % (datetime.datetime.now(), valid_loss)) # add validation loss to TensorBoard validation_writer.add_summary(summary_sva, itr) saver.save(sess, FLAGS.logs_dir + "model_z_center.ckpt", 500) elif FLAGS.mode == "visualize": valid_images, valid_annotations = validation_dataset_reader.get_random_batch(2) z_ = np.random.uniform(low=-1.0, high=1.0, size=(FLAGS.batch_size,4,4,10)) feed_dict = {image: valid_images, annotation: valid_annotations, keep_probability: 0.85, z: z_} v= 0 for p in range(10): z_ol = np.copy(z_) # print("666666666666666666666666666666666666666") z_loss, summ = sess.run([loss,loss_summary], feed_dict=feed_dict) print("z_step: %d, Train_loss:%g" % (p,z_loss)) # print(z_) g = sess.run([grads],feed_dict=feed_dict) v_prev = np.copy(v) # print(g[0][0].shape) v = 0.001*v - 0.1*g[0][0] z_ += 0.001 * v_prev + (1+0.001)*v # z_ = np.clip(z_, -1.0, 1.0) pred = sess.run(logits, feed_dict={image: valid_images, annotation: valid_annotations,z:z_, keep_probability: 1.0}) valid_images_masked = (1-sess.run(mask))*valid_images predicted_patch = sess.run(mask) * pred pred = valid_images_masked + predicted_patch # valid_annotations = np.squeeze(valid_annotations, axis=3) # pred = np.squeeze(pred, axis=3) print(valid_images.shape) print(valid_annotations.shape) print(pred.shape) for itr in range(FLAGS.batch_size): utils.save_image(valid_images_masked[itr].astype(np.uint8), FLAGS.logs_dir, name="inp_" + str(5+itr)) utils.save_image(valid_annotations[itr].astype(np.uint8), FLAGS.logs_dir, name="gt_" + str(5+itr)) utils.save_image(pred[itr].astype(np.uint8), FLAGS.logs_dir, name="pred_" + str(5+itr)) print("Saved image: %d" % itr)
def main(argv=None): keep_probability = tf.placeholder(tf.float32, name="keep_probabilty") image = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3], name="input_image") annotation = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name="annotation") pred_annotation_value, pred_annotation, logits = inference( image, keep_probability) tf.summary.image("input_image", image, max_outputs=2) tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2) tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2) #logits:the last layer of conv net #labels:the ground truth soft_logits = tf.nn.softmax(logits / FLAGS.temperature) loss = tf.reduce_mean((tf.nn.sigmoid_cross_entropy_with_logits( logits=soft_logits[:, :, :, 1], labels=tf.squeeze(annotation, squeeze_dims=[3]), name="entropy_soft"))) ''' hard_loss = tf.reduce_mean((tf.nn.sigmoid_cross_entropy_with_logits(logits=logits, labels = tf.squeeze(annotation/255, squeeze_dims=[3]), name="entropy_hard"))) ''' tf.summary.scalar("entropy", loss) trainable_var = tf.trainable_variables() if FLAGS.debug: for var in trainable_var: utils.add_to_regularization_and_summary(var) train_op = train(loss, trainable_var) print("Setting up summary op...") summary_op = tf.summary.merge_all() #Check if has the log file if not os.path.exists(FLAGS.logs_dir): print("The logs path '%s' is not found" % FLAGS.logs_dir) print("Create now..") os.makedirs(FLAGS.logs_dir) print("%s is created successfully!" % FLAGS.logs_dir) #Create a file to write logs. #filename='logs'+ FLAGS.mode + str(datatime.datatime.now()) + '.txt' filename = "logs_%s%s.txt" % (FLAGS.mode, datetime.datetime.now()) path_ = os.path.join(FLAGS.logs_dir, filename) with open(path_, 'w') as logs_file: logs_file.write("The logs file is created at %s.\n" % datetime.datetime.now()) logs_file.write("The model is ---%s---.\n" % FLAGS.logs_dir) logs_file.write("The mode is %s\n" % (FLAGS.mode)) logs_file.write( "The train data batch size is %d and the validation batch size is %d\n." % (FLAGS.batch_size, FLAGS.v_batch_size)) logs_file.write("The train data is %s.\n" % (FLAGS.data_dir)) logs_file.write("The data size is %d and the MAX_ITERATION is %d.\n" % (IMAGE_SIZE, MAX_ITERATION)) logs_file.write("Setting up image reader...") print("Setting up image reader...") train_records, valid_records = scene_parsing.my_read_dataset( FLAGS.data_dir) print('number of train_records', len(train_records)) print('number of valid_records', len(valid_records)) with open(path_, 'a') as logs_file: logs_file.write('number of train_records %d\n' % len(train_records)) logs_file.write('number of valid_records %d\n' % len(valid_records)) print("Setting up dataset reader") image_options = {'resize': True, 'resize_size': IMAGE_SIZE} if FLAGS.mode == 'train': train_dataset_reader = dataset.BatchDatset(train_records, image_options) validation_dataset_reader = dataset.BatchDatset(valid_records, image_options) sess = tf.Session() print("Setting up Saver...") saver = tf.train.Saver() summary_writer = tf.summary.FileWriter(FLAGS.logs_dir, sess.graph) sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) #if not train,restore the model trained before if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") num_ = 0 for itr in xrange(MAX_ITERATION): train_images, train_annotations = train_dataset_reader.next_batch( FLAGS.batch_size) feed_dict = { image: train_images, annotation: train_annotations, keep_probability: 0.85 } ''' for ii in range(224): for jj in range(224): if train_annotations[0][ii][jj] > 0: print('anno',ii,', ', jj,': ', train_annotations[0][ii][jj]) ''' sess.run(train_op, feed_dict=feed_dict) logs_file = open(path_, 'a') if itr % 10 == 0: train_loss, summary_str = sess.run([loss, summary_op], feed_dict=feed_dict) print("Step: %d, Train_loss:%g" % (itr, train_loss)) summary_writer.add_summary(summary_str, itr) if itr % 500 == 0: #Caculate the accurary at the training set. train_random_images, train_random_annotations = train_dataset_reader.get_random_batch_for_train( FLAGS.v_batch_size) train_loss, train_pred_anno = sess.run( [loss, pred_annotation], feed_dict={ image: train_random_images, annotation: train_random_annotations, keep_probability: 1.0 }) accu_iou_, accu_pixel_ = accu.caculate_accurary( train_pred_anno, train_random_annotations) print("%s ---> Training_loss: %g" % (datetime.datetime.now(), train_loss)) print("%s ---> Training_pixel_accuary: %g" % (datetime.datetime.now(), accu_pixel_)) print("%s ---> Training_iou_accuary: %g" % (datetime.datetime.now(), accu_iou_)) print("---------------------------") #Output the logs. num_ = num_ + 1 logs_file.write("No.%d the itr number is %d.\n" % (num_, itr)) logs_file.write("%s ---> Training_loss: %g.\n" % (datetime.datetime.now(), train_loss)) logs_file.write("%s ---> Training_pixel_accuary: %g.\n" % (datetime.datetime.now(), accu_pixel_)) logs_file.write("%s ---> Training_iou_accuary: %g.\n" % (datetime.datetime.now(), accu_iou_)) logs_file.write("---------------------------\n") valid_images, valid_annotations = validation_dataset_reader.next_batch( FLAGS.v_batch_size) valid_loss, pred_anno = sess.run( [loss, pred_annotation], feed_dict={ image: valid_images, annotation: valid_annotations, keep_probability: 1.0 }) accu_iou, accu_pixel = accu.caculate_accurary( pred_anno, valid_annotations) print("%s ---> Validation_loss: %g" % (datetime.datetime.now(), valid_loss)) print("%s ---> Validation_pixel_accuary: %g" % (datetime.datetime.now(), accu_pixel)) print("%s ---> Validation_iou_accuary: %g" % (datetime.datetime.now(), accu_iou)) #Output the logs. logs_file.write("%s ---> Validation_loss: %g.\n" % (datetime.datetime.now(), valid_loss)) logs_file.write("%s ---> Validation_pixel_accuary: %g.\n" % (datetime.datetime.now(), accu_pixel)) logs_file.write("%s ---> Validation_iou_accuary: %g.\n" % (datetime.datetime.now(), accu_iou)) saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr) #End the iterator logs_file.close()
def build( self, rgb, NUM_CLASSES, keep_prob ): #Build the fully convolutional neural network and load weight for decoder based on trained VGG16 network """ load variable from npy to build the VGG :param rgb: rgb image [batch, height, width, 3] values 0-255 """ self.SumWeights = tf.constant( 0.0, name="SumFiltersWeights" ) #Sum of weights of all filters for weight decay loss print("build model started") # rgb_scaled = rgb * 255.0 # Convert RGB to BGR and substract pixels mean red, green, blue = tf.split(axis=3, num_or_size_splits=3, value=rgb) bgr = tf.concat(axis=3, values=[ blue - VGG_MEAN[0], green - VGG_MEAN[1], red - VGG_MEAN[2], ]) #-----------------------------Build network encoder based on VGG16 network and load the trained VGG16 weights----------------------------------------- #Layer 1 self.conv1_1 = self.conv_layer( bgr, "conv1_1") #Build Convolution layer +Relu and load weights self.conv1_2 = self.conv_layer( self.conv1_1, "conv1_2") #Build Convolution layer +Relu and load weights self.pool1 = self.max_pool(self.conv1_2, 'pool1') #Max Pooling # Layer 2 self.conv2_1 = self.conv_layer(self.pool1, "conv2_1") self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2") self.pool2 = self.max_pool(self.conv2_2, 'pool2') # Layer 3 self.conv3_1 = self.conv_layer(self.pool2, "conv3_1") self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2") self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3") self.pool3 = self.max_pool(self.conv3_3, 'pool3') # Layer 4 self.conv4_1 = self.conv_layer(self.pool3, "conv4_1") self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2") self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3") self.pool4 = self.max_pool(self.conv4_3, 'pool4') # Layer 5 self.conv5_1 = self.conv_layer(self.pool4, "conv5_1") self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2") self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3") self.pool5 = self.max_pool(self.conv5_3, 'pool5') ##-----------------------Build Net Fully connvolutional layers------------------------------------------------------------------------------------ W6 = utils.weight_variable( [7, 7, 512, 4096], name="W6" ) # Create tf weight for the new layer with initial weights with normal random distrubution mean zero and std 0.02 b6 = utils.bias_variable( [4096], name="b6" ) # Create tf biasefor the new layer with initial weights of 0 self.conv6 = utils.conv2d_basic( self.pool5, W6, b6 ) # Check the size of this net input is it same as input or is it 1X1 self.relu6 = tf.nn.relu(self.conv6, name="relu6") # if FLAGS.debug: utils.add_activation_summary(relu6) self.relu_dropout6 = tf.nn.dropout( self.relu6, keep_prob=keep_prob ) # Apply dropout for traning need to be added only for training W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") # 1X1 Convloution b7 = utils.bias_variable([4096], name="b7") self.conv7 = utils.conv2d_basic(self.relu_dropout6, W7, b7) # 1X1 Convloution self.relu7 = tf.nn.relu(self.conv7, name="relu7") # if FLAGS.debug: utils.add_activation_summary(relu7) self.relu_dropout7 = tf.nn.dropout( self.relu7, keep_prob=keep_prob ) # Another dropout need to be used only for training W8 = utils.weight_variable( [1, 1, 4096, NUM_CLASSES], name="W8" ) # Basically the output num of classes imply the output is already the prediction this is flexible can be change however in multinet class number of 2 give good results b8 = utils.bias_variable([NUM_CLASSES], name="b8") self.conv8 = utils.conv2d_basic(self.relu_dropout7, W8, b8) # annotation_pred1 = tf.argmax(conv8, dimension=3, name="prediction1") #-------------------------------------Build Decoder -------------------------------------------------------------------------------------------------- # now to upscale to actual image size deconv_shape1 = self.pool4.get_shape( ) # Set the output shape for the the transpose convolution output take only the depth since the transpose convolution will have to have the same depth for output W_t1 = utils.weight_variable( [4, 4, deconv_shape1[3].value, NUM_CLASSES], name="W_t1" ) # Deconvolution/transpose in size 4X4 note that the output shape is of depth NUM_OF_CLASSES this is not necessary in will need to be fixed if you only have 2 catagories b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") self.conv_t1 = utils.conv2d_transpose_strided( self.conv8, W_t1, b_t1, output_shape=tf.shape(self.pool4) ) # Use strided convolution to double layer size (depth is the depth of pool4 for the later element wise addition self.fuse_1 = tf.add( self.conv_t1, self.pool4, name="fuse_1") # Add element wise the pool layer from the decoder deconv_shape2 = self.pool3.get_shape() W_t2 = utils.weight_variable( [4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") self.conv_t2 = utils.conv2d_transpose_strided(self.fuse_1, W_t2, b_t2, output_shape=tf.shape( self.pool3)) self.fuse_2 = tf.add(self.conv_t2, self.pool3, name="fuse_2") shape = tf.shape(rgb) W_t3 = utils.weight_variable( [16, 16, NUM_CLASSES, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_CLASSES], name="b_t3") self.conv_t3 = utils.conv2d_transpose_strided( self.fuse_2, W_t3, b_t3, output_shape=[shape[0], shape[1], shape[2], NUM_CLASSES], stride=8) #Split final probability map to set of categories in given granularity self.VesselProb, self.PhaseProb, self.LiquidSolidProb, self.AllPhasesProb = tf.split( self.conv_t3, [2, 3, 4, 15], 3) #--------------------Transform probability vectors to label maps----------------------------------------------------------------- self.AllPhasesPred = tf.argmax(self.AllPhasesProb, dimension=3, name="AllPhasesPred") self.LiquidSolidPred = tf.argmax(self.LiquidSolidProb, dimension=3, name="LiquidSolidPred") self.PhasePred = tf.argmax(self.PhaseProb, dimension=3, name="PhasePred") self.VesselPred = tf.argmax(self.VesselProb, dimension=3, name="VesselPred") print("FCN model built")
def unetinference(image, keep_prob): net = {} l2_reg = FLAGS.learning_rate # added for resume better global_iter_counter = tf.Variable(0, name='global_step', trainable=False) net['global_step'] = global_iter_counter with tf.variable_scope("inference"): inputs = image teacher = tf.placeholder(tf.float32, [None, IMAGE_SIZE, IMAGE_SIZE, 18]) is_training = True # 1, 1, 3 conv1_1 = utils.conv(inputs, filters=64, l2_reg_scale=l2_reg, batchnorm_istraining=is_training) conv1_2 = utils.conv(conv1_1, filters=64, l2_reg_scale=l2_reg, batchnorm_istraining=is_training) pool1 = utils.pool(conv1_2) # 1/2, 1/2, 64 conv2_1 = utils.conv(pool1, filters=128, l2_reg_scale=l2_reg, batchnorm_istraining=is_training) conv2_2 = utils.conv(conv2_1, filters=128, l2_reg_scale=l2_reg, batchnorm_istraining=is_training) pool2 = utils.pool(conv2_2) # 1/4, 1/4, 128 conv3_1 = utils.conv(pool2, filters=256, l2_reg_scale=l2_reg, batchnorm_istraining=is_training) conv3_2 = utils.conv(conv3_1, filters=256, l2_reg_scale=l2_reg, batchnorm_istraining=is_training) pool3 = utils.pool(conv3_2) # 1/8, 1/8, 256 conv4_1 = utils.conv(pool3, filters=512, l2_reg_scale=l2_reg, batchnorm_istraining=is_training) conv4_2 = utils.conv(conv4_1, filters=512, l2_reg_scale=l2_reg, batchnorm_istraining=is_training) pool4 = utils.pool(conv4_2) # 1/16, 1/16, 512 conv5_1 = utils.conv(pool4, filters=1024, l2_reg_scale=l2_reg) conv5_2 = utils.conv(conv5_1, filters=1024, l2_reg_scale=l2_reg) concated1 = tf.concat([ utils.conv_transpose(conv5_2, filters=512, l2_reg_scale=l2_reg), conv4_2 ], axis=3) conv_up1_1 = utils.conv(concated1, filters=512, l2_reg_scale=l2_reg) conv_up1_2 = utils.conv(conv_up1_1, filters=512, l2_reg_scale=l2_reg) concated2 = tf.concat([ utils.conv_transpose(conv_up1_2, filters=256, l2_reg_scale=l2_reg), conv3_2 ], axis=3) conv_up2_1 = utils.conv(concated2, filters=256, l2_reg_scale=l2_reg) conv_up2_2 = utils.conv(conv_up2_1, filters=256, l2_reg_scale=l2_reg) concated3 = tf.concat([ utils.conv_transpose(conv_up2_2, filters=128, l2_reg_scale=l2_reg), conv2_2 ], axis=3) conv_up3_1 = utils.conv(concated3, filters=128, l2_reg_scale=l2_reg) conv_up3_2 = utils.conv(conv_up3_1, filters=128, l2_reg_scale=l2_reg) concated4 = tf.concat([ utils.conv_transpose(conv_up3_2, filters=64, l2_reg_scale=l2_reg), conv1_2 ], axis=3) conv_up4_1 = utils.conv(concated4, filters=64, l2_reg_scale=l2_reg) conv_up4_2 = utils.conv(conv_up4_1, filters=64, l2_reg_scale=l2_reg) outputs = utils.conv(conv_up4_2, filters=18, kernel_size=[1, 1], activation=None) annotation_pred = tf.argmax(outputs, dimension=3, name="prediction") return tf.expand_dims(annotation_pred, dim=3), outputs, net
def main(argv=None): # 1. input placeholders keep_probability = tf.placeholder(tf.float32, name="keep_probabilty") image = tf.placeholder(tf.float32, shape=(None, IMAGE_SIZE, IMAGE_SIZE, 3), name="input_image") annotation = tf.placeholder(tf.int32, shape=(None, IMAGE_SIZE, IMAGE_SIZE, 1), name="annotation") # global_step = tf.Variable(0, trainable=False, name='global_step') # 2. construct inference network pred_annotation, logits, net = unetinference(image, keep_probability) tf.summary.image("input_image", image, max_outputs=3) tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=3) tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=3) # 3. loss measure loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=tf.squeeze(annotation, squeeze_dims=[3]), name="entropy"))) tf.summary.scalar("entropy", loss) # 4. optimizing trainable_var = tf.trainable_variables() if FLAGS.debug: for var in trainable_var: utils.add_to_regularization_and_summary(var) train_op = train(loss, trainable_var, net['global_step']) print("Setting up summary op...") summary_op = tf.summary.merge_all() print("Setting up image reader from ", FLAGS.data_dir, "...") train_records, valid_records = scene_parsing.read_dataset(FLAGS.data_dir) print("data dir:", FLAGS.data_dir) print("train_records length :", len(train_records)) print("valid_records length :", len(valid_records)) print("Setting up dataset reader") image_options = {'resize': True, 'resize_size': IMAGE_SIZE} if FLAGS.mode == 'train': train_dataset_reader = dataset.BatchDatset(train_records, image_options) validation_dataset_reader = dataset.BatchDatset(valid_records, image_options) sess = tf.Session() print("Setting up Saver...") saver = tf.train.Saver() summary_writer = tf.summary.FileWriter(FLAGS.logs_dir, sess.graph) # 5. paramter setup # 5.1 init params sess.run(tf.global_variables_initializer()) # 5.2 restore params if possible ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") # 6. train-mode if FLAGS.mode == "train": fd.mode_train(sess, FLAGS, net, train_dataset_reader, validation_dataset_reader, train_records, pred_annotation, image, annotation, keep_probability, logits, train_op, loss, summary_op, summary_writer, saver, DISPLAY_STEP) # test-random-validation-data mode elif FLAGS.mode == "visualize": fd.mode_visualize(sess, FLAGS, TEST_DIR, validation_dataset_reader, pred_annotation, image, annotation, keep_probability, NUM_OF_CLASSESS) # test-full-validation-dataset mode elif FLAGS.mode == "test": # heejune added fd.mode_test(sess, FLAGS, TEST_DIR, validation_dataset_reader, valid_records, pred_annotation, image, annotation, keep_probability, logits, NUM_OF_CLASSESS) sess.close()
def main(argv=None): keep_probability = tf.placeholder(tf.float32, name="keep_probabilty") image = tf.placeholder(tf.float32, shape=[None, IMAGE_HEIGHT, IMAGE_WIDTH, 3], name="input_image") annotation = tf.placeholder(tf.int32, shape=[None, IMAGE_HEIGHT, IMAGE_WIDTH, 1], name="annotation") pred_annotation, logits = inference(image, keep_probability) tf.summary.image("input_image", image, max_outputs=2) tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2) tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2) loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=tf.squeeze(annotation, squeeze_dims=[3]), name="entropy"))) tf.summary.scalar("entropy", loss) trainable_var = tf.trainable_variables() if FLAGS.debug: for var in trainable_var: utils.add_to_regularization_and_summary(var) train_op = train(loss, trainable_var) print("Setting up summary op...") summary_op = tf.summary.merge_all() ''' print("Setting up image reader...") train_records, valid_records = scene_parsing.read_dataset(FLAGS.data_dir) print(len(train_records)) print(len(valid_records)) print("Setting up dataset reader") image_options = {'resize': True, 'resize_size': IMAGE_SIZE} if FLAGS.mode == 'train': train_dataset_reader = dataset.BatchDatset(train_records, image_options) validation_dataset_reader = dataset.BatchDatset(valid_records, image_options) ''' train_dataset_reader = BatchDatset('data/trainlist.mat') sess = tf.Session() print("Setting up Saver...") saver = tf.train.Saver() summary_writer = tf.summary.FileWriter(FLAGS.logs_dir, sess.graph) sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") #if FLAGS.mode == "train": itr = 0 train_images, train_annotations = train_dataset_reader.next_batch() while len(train_annotations) > 0: #train_images, train_annotations = train_dataset_reader.next_batch(FLAGS.batch_size) #print('==> batch data: ', train_images[0][100][100], '===', train_annotations[0][100][100]) feed_dict = {image: train_images, annotation: train_annotations, keep_probability: 0.5} sess.run(train_op, feed_dict=feed_dict) if itr % 100 == 0: train_loss, summary_str, rpred = sess.run([loss, summary_op, pred_annotation], feed_dict=feed_dict) print("Step: %d, Train_loss:%g" % (itr, train_loss)) summary_writer.add_summary(summary_str, itr) print(np.sum(rpred)) print('=============') print(np.sum(train_annotations)) print('------------>>>') #if itr % 10000 == 0 and itr > 0: ''' valid_images, valid_annotations = validation_dataset_reader.next_batch(FLAGS.batch_size) valid_loss = sess.run(loss, feed_dict={image: valid_images, annotation: valid_annotations, keep_probability: 1.0}) print("%s ---> Validation_loss: %g" % (datetime.datetime.now(), valid_loss))''' itr += 1 train_images, train_annotations = train_dataset_reader.next_batch() saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr) '''elif FLAGS.mode == "visualize":
def inference(image, keep_prob): """ Semantic segmentation network definition :param image: input image. Should have values in range 0-255 :param keep_prob: :return: """ print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL) mean = model_data['normalization'][0][0][0] mean_pixel = np.mean(mean, axis=(0, 1)) weights = np.squeeze(model_data['layers']) processed_image = utils.process_image(image, mean_pixel) with tf.variable_scope("inference"): image_net = vgg_net(weights, processed_image) conv_final_layer = image_net["conv5_3"] pool5 = utils.max_pool_2x2(conv_final_layer) W6 = utils.weight_variable([7, 7, 512, 4096], name="W6") b6 = utils.bias_variable([4096], name="b6") conv6 = utils.conv2d_basic(pool5, W6, b6) relu6 = tf.nn.relu(conv6, name="relu6") if FLAGS.debug: utils.add_activation_summary(relu6) relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") b7 = utils.bias_variable([4096], name="b7") conv7 = utils.conv2d_basic(relu_dropout6, W7, b7) relu7 = tf.nn.relu(conv7, name="relu7") if FLAGS.debug: utils.add_activation_summary(relu7) relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob) W8 = utils.weight_variable([1, 1, 4096, NUM_OF_CLASSESS], name="W8") b8 = utils.bias_variable([NUM_OF_CLASSESS], name="b8") conv8 = utils.conv2d_basic(relu_dropout7, W8, b8) # annotation_pred1 = tf.argmax(conv8, dimension=3, name="prediction1") # now to upscale to actual image size deconv_shape1 = image_net["pool4"].get_shape() W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape(image_net["pool4"])) fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1") deconv_shape2 = image_net["pool3"].get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(image_net["pool3"])) fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2") shape = tf.shape(image) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSESS]) W_t3 = utils.weight_variable([16, 16, NUM_OF_CLASSESS, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) annotation_pred_no0 = tf.argmax(conv_t3[:,:,:,1:], dimension=3, name="prediction_no0") + 1 annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction") return tf.expand_dims(annotation_pred, dim=3), tf.expand_dims(annotation_pred_no0, dim=3), conv_t3
# In[ ]: db_helen = pickle.load(open(data_dir + conf.dataset + ".pickle", "rb")) # print the data structure print(db_helen.keys()) print(db_helen['trainset'].keys()) # print the shape of tratining set print(db_helen['trainset']['pts'].shape) print(db_helen['trainset']['img'].shape) # print the shape of testing set print(db_helen['testset']['pts'].shape) print(db_helen['testset']['img'].shape) # declear data iterator train_batches = utils.get_batch(db_helen['trainset']['img'], db_helen['trainset']['pts'], batch_size=conf.batch_size) valid_batches = utils.get_batch(db_helen['testset']['img'], db_helen['testset']['pts'], batch_size=conf.batch_size) # In[ ]: # load test_ gau map gaumap = pickle.load(open(data_dir + "gau_filter.pickle", "rb")) # # Loss for Heatmap # In[ ]:
def test(sess, output_image, mean_pixel): best = sess.run(output_image) output = utils.unprocess_image(best.reshape(best.shape[1:]), mean_pixel).astype(np.float32) scipy.misc.imsave("output.jpg", output)
def generator(images, train_phase): print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL) weights = np.squeeze(model_data['layers']) with tf.variable_scope("generator") as scope: W0 = utils.weight_variable([3, 3, 1, 64], name="W0") b0 = utils.bias_variable([64], name="b0") conv0 = utils.conv2d_basic(images, W0, b0) hrelu0 = tf.nn.relu(conv0, name="relu") image_net = vgg_net(weights, hrelu0) vgg_final_layer = image_net["relu5_3"] pool5 = utils.max_pool_2x2(vgg_final_layer) # now to upscale to actual image size deconv_shape1 = image_net["pool4"].get_shape() W_t1 = utils.weight_variable( [4, 4, deconv_shape1[3].value, pool5.get_shape()[3].value], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(pool5, W_t1, b_t1, output_shape=tf.shape( image_net["pool4"])) fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1") deconv_shape2 = image_net["pool3"].get_shape() W_t2 = utils.weight_variable( [4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape( image_net["pool3"])) fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2") shape = tf.shape(images) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], 2]) W_t3 = utils.weight_variable([16, 16, 2, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([2], name="b_t3") pred = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) return tf.concat(axis=3, values=[images, pred], name="pred_image")
def main(argv=None): keep_probability = tf.placeholder(tf.float32, name="keep_probabilty") image = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3], name="input_image") annotation = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3], name="annotation") z = tf.placeholder(tf.float32, shape=[None, 1, 1, 40], name="z") mask = tf.placeholder(tf.float32, shape=[None, 64, 64, 1], name="mask") mask2 = tf.placeholder(tf.float32, shape=[None, 64, 64, 1], name="mask2") z_new = tf.placeholder(tf.float32, shape=[None, 1, 1, 40], name="z_new") istrain = tf.placeholder(tf.bool) #z_lip = tf.placeholder(tf.float32, shape=[None, 1, 1, 10], name="z_lip") #z_lip_inv = tf.placeholder(tf.float32, shape=[None, 1, 1, 10], name="z_lip_inv") e = tf.placeholder(tf.float32, shape=[None, 4, 4, 552], name="e") e_p = tf.placeholder(tf.float32, shape=[None, 1, 1, 8232], name="e_p") # pred_annotation, logits = inference(image, keep_probability,z) # tf.summary.image("input_image", image, max_outputs=2) # tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2) # tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2) # loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, # labels=tf.squeeze(annotation, squeeze_dims=[3]), # name="entropy"))) # mask_ = tf.ones([FLAGS.batch_size,32,64,3]) # mask = tf.pad(mask_, [[0,0],[0,32],[0,0],[0,0]]) # mask2__ = tf.ones([FLAGS.batch_size,78,78,3]) # mask2_ = tf.pad(mask2__, [[0,0],[25,25],[25,25],[0,0]]) # mask2 = mask2_ - mask zero = tf.zeros([FLAGS.batch_size, 1, 1, 8232]) logits, h = inference((1 - mask) * image + mask * 1.0, keep_probability, z, 0.0, istrain) logits_e, h_e = inference((1 - mask) * image + mask * 1.0, keep_probability, z, e, istrain) #logits_lip,_ = inference((1-mask)*image + mask*0.0, keep_probability,z_lip,istrain ) #logits_lip_inv,_ = inference((1-mask)*image + mask*0.0, keep_probability,z_lip_inv,istrain ) z_pred = predictor(h, z, zero, istrain) z_pred_e = predictor(h, z, e_p, istrain) # z_pred_lip = predictor(h,z_lip,istrain) # z_pred_lip_inv = predictor(h,z_lip_inv,istrain) # logits = inference(image, keep_probability,z,istrain) tf.summary.image("input_image", image, max_outputs=2) tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2) # tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2) # lossz = 0.1 * tf.reduce_mean(tf.reduce_sum(tf.abs(z),[1,2,3])) # lossz = 0.1 * tf.reduce_mean(tf.abs(z)) # loss_all = tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square((image - logits)),[1,2,3]))) # loss_all = tf.reduce_mean(tf.reduce_sum(tf.contrib.layers.flatten(tf.abs(image - logits)),1)) # loss_mask = 0.8*tf.reduce_mean(tf.sqrt(tf.reduce_sum(tf.square((image - logits)*mask),[1,2,3]))) loss_mask = tf.reduce_mean( tf.reduce_sum( tf.contrib.layers.flatten(tf.abs((annotation - logits) * mask)), 1)) loss_mask2 = tf.reduce_mean( tf.reduce_sum( tf.contrib.layers.flatten(tf.abs((annotation - logits) * mask2)), 1)) loss_ = 0.8 * loss_mask + loss_mask2 # loss = tf.reduce_mean(tf.squared_difference(logits ,annotation )) loss_summary = tf.summary.scalar("entropy", loss_) # zloss = tf.reduce_mean(tf.losses.cosine_distance(tf.contrib.layers.flatten(z_new) ,tf.contrib.layers.flatten(z_pred),axis =1)) zloss_ = tf.reduce_mean( tf.reduce_sum(tf.contrib.layers.flatten(tf.abs((z_pred - z_new))), 1)) # zloss_lip = tf.reduce_mean(tf.reduce_sum(tf.contrib.layers.flatten(tf.abs((z_pred - z_pred_lip))),1)) # zloss_lip_inv = -tf.reduce_mean(tf.reduce_sum(tf.contrib.layers.flatten(tf.abs((z_pred - z_pred_lip_inv))),1)) # z_loss = zloss_ + 0.1* zloss_lip# + zloss_lip_inv lip_loss_dec = tf.reduce_mean( tf.reduce_sum(tf.contrib.layers.flatten(tf.abs((logits - logits_e))), 1)) loss = loss_ + 0.1 * lip_loss_dec lip_loss_pred = tf.reduce_mean( tf.reduce_sum(tf.contrib.layers.flatten(tf.abs((z_pred - z_pred_e))), 1)) zloss = zloss_ + 0.1 * lip_loss_pred grads = train_z(loss_mask, z) trainable_var = tf.trainable_variables() trainable_z_pred_var = tf.trainable_variables(scope="predictor") trainable_d_pred_var = tf.trainable_variables(scope="decoder") print(trainable_z_pred_var) if FLAGS.debug: for var in trainable_var: utils.add_to_regularization_and_summary(var) train_op = train(loss, trainable_var) train_pred = train_predictor(zloss, trainable_z_pred_var) print("Setting up summary op...") summary_op = tf.summary.merge_all() print("Setting up image reader...") train_records, valid_records = scene_parsing.read_dataset(FLAGS.data_dir) print(len(train_records)) print(len(valid_records)) print("Setting up dataset reader") image_options = {'resize': True, 'resize_size': IMAGE_SIZE} if FLAGS.mode == 'train': train_dataset_reader = dataset.BatchDatset(train_records, image_options) validation_dataset_reader = dataset.BatchDatset(valid_records, image_options) sess = tf.Session() print("Setting up Saver...") saver = tf.train.Saver() # create two summary writers to show training loss and validation loss in the same graph # need to create two folders 'train' and 'validation' inside FLAGS.logs_dir train_writer = tf.summary.FileWriter(FLAGS.logs_dir + '/train', sess.graph) validation_writer = tf.summary.FileWriter(FLAGS.logs_dir + '/validation') sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") saved = True if FLAGS.mode == "train": for itr in xrange(MAX_ITERATION): train_images, train_annotations = train_dataset_reader.next_batch( FLAGS.batch_size) print(np.max(train_images)) # z_ = np.reshape(signal.gaussian(200, std=1),(FLAGS.batch_size,1,1,10))-0.5 z_ = np.random.uniform(low=-1.0, high=1.0, size=(FLAGS.batch_size, 1, 1, 40)) # train_images[train_images < 0.] = -1. # train_annotations[train_annotations < 0.] = -1. # train_images[train_images >= 0.] = 1.0 # train_annotations[train_annotations >= 0.] = 1.0 x1 = random.randint(0, 10) w1 = random.randint(30, 54) y1 = random.randint(0, 10) h1 = random.randint(30, 54) cond = random.randint(0, 10) # saved = True if False: saved = False train_images_m, train_annotations_m = train_dataset_reader.get_random_batch( FLAGS.batch_size) train_images_m[train_images_m < 0.] = -1. train_annotations_m[train_annotations_m < 0.] = -1. train_images_m[train_images_m >= 0.] = 1.0 train_annotations_m[train_annotations_m >= 0.] = 1.0 train_images = (train_images + 1.) / 2.0 * 255.0 train_annotations = (train_annotations + 1.) / 2.0 * 255.0 train_images_m = (train_images_m + 1.) / 2.0 * 255.0 train_annotations_m = (train_annotations_m + 1.) / 2.0 * 255.0 train_images_m[:, 32:, :, :] = 0 train_annotations_m[:, 32:, :, :] = 0 train_images = np.clip((train_images + train_images_m), 0.0, 255.0) train_annotations = np.clip( (train_annotations + train_annotations_m), 0.0, 255.0) ''' train_images[train_images < 0.] = -1. train_annotations[train_annotations < 0.] = -1. train_images[train_images >= 0.] = 1.0 train_annotations[train_annotations >= 0.] = 1.0 ''' train_annotations_ = np.squeeze(train_annotations, axis=3) train_images_ = train_images train_images = train_images / 127.5 - 1.0 train_annotations = train_annotations / 127.5 - 1.0 # for itr_ in range(FLAGS.batch_size): # utils.save_image(train_images_[itr_].astype(np.uint8), FLAGS.logs_dir, name="inp_" + str(5+itr_) ) # utils.save_image(train_annotations_[itr_].astype(np.uint8), FLAGS.logs_dir, name="gt_" + str(5+itr_) ) # train_images[:,x1:w1,y1:h1,:] = 0 # print(train_images) r_m, r_m2 = random_mask(64) #feed_dict = {image: train_images, annotation: train_annotations, keep_probability: 0.85, z: z_,mask:r_m, istrain:True } #train_images[:,50:100,50:100,:] =0 v = 0 # print(train_images) error_dec = np.random.normal(0.0, 0.001, (FLAGS.batch_size, 4, 4, 552)) error_dec_ = np.random.normal(0.0, 0.001, (FLAGS.batch_size, 1, 1, 8232)) # z_l_inv = z_ + np.random.normal(0.0,0.1) # feed_dict = {image: train_images, annotation: train_annotations, keep_probability: 0.85, z: z_, e:error_dec, mask:r_m, istrain:True } # z_l = z_ + np.random.normal(0.0,0.001) # lloss,_ = sess.run([lip_loss, train_lip ], feed_dict=feed_dict) # z_l = z_ + np.random.normal(0.0,0.001) # print("Step: %d, lip_loss:%g" % (itr,lloss)) for p in range(20): z_ol = np.copy(z_) # z_l = z_ol + np.random.normal(0.0,0.001) # print("666666666666666666666666666666666666666") feed_dict = { image: train_images, annotation: train_annotations, keep_probability: 0.85, z: z_, e: error_dec, mask: r_m, mask2: r_m2, istrain: True } # lloss,_ = sess.run([lip_loss, train_lip ], feed_dict=feed_dict) # print("Step: %d, z_step: %d, lip_loss:%g" % (itr,p,lloss)) z_loss, summ = sess.run([loss, loss_summary], feed_dict=feed_dict) print("Step: %d, z_step: %d, Train_loss:%g" % (itr, p, z_loss)) # print(z_) g = sess.run([grads], feed_dict=feed_dict) v_prev = np.copy(v) # print(g[0][0].shape) v = 0.001 * v - 0.1 * g[0][0] z_ += 0.001 * v_prev + (1 + 0.001) * v z_ = np.clip(z_, -10.0, 10.0) ''' m = interp1d([-10.0,10.0],[-1.0,1.0]) print(np.max(z_)) print(np.min(z_)) z_ol_interp = m(z_ol) z_interp = m(z_) _,z_pred_loss =sess.run([train_pred,zloss],feed_dict={image: train_images,mask:r_m,z:z_ol_interp,z_new:z_interp,e_p:error_dec_,istrain:True,keep_probability: 0.85}) print("Step: %d, z_step: %d, z_pred_loss:%g" % (itr,p,z_pred_loss)) ''' # _,z_pred_loss =sess.run([train_pred,zloss],feed_dict={image: train_images,mask:r_m,z:z_ol,z_new:z_,istrain:True,keep_probability: 0.85}) # print("Step: %d, z_step: %d, z_pred_loss:%g" % (itr,p,z_pred_loss)) # z_ = np.clip(z_, -1.0, 1.0) # print(v.shape) # print(z_.shape) feed_dict = { image: train_images, annotation: train_annotations, keep_probability: 0.85, mask: r_m, e: error_dec, z: z_, mask2: r_m2, istrain: True } sess.run(train_op, feed_dict=feed_dict) if itr % 10 == 0: train_loss, summary_str = sess.run([loss, loss_summary], feed_dict=feed_dict) print("Step: %d, Train_loss:%g" % (itr, train_loss)) train_writer.add_summary(summary_str, itr) if itr % 500 == 0: valid_images, valid_annotations = validation_dataset_reader.next_batch( FLAGS.batch_size) # valid_annotations[valid_annotations < 0.] = -1. # valid_images[valid_images < 0.] = -1. # valid_annotations[valid_annotations >= 0.] = 1.0 # valid_images[valid_images >= 0.] = 1.0 x1 = random.randint(0, 10) w1 = random.randint(30, 54) y1 = random.randint(0, 10) h1 = random.randint(30, 54) # valid_images[:,x1:w1,y1:h1,:] = 0 valid_loss, summary_sva = sess.run( [loss, loss_summary], feed_dict={ image: valid_images, mask: r_m, annotation: valid_annotations, keep_probability: 1.0, z: z_, e: error_dec, istrain: False, mask2: r_m2 }) print("%s ---> Validation_loss: %g" % (datetime.datetime.now(), valid_loss)) # add validation loss to TensorBoard validation_writer.add_summary(summary_sva, itr) saver.save(sess, FLAGS.logs_dir + "model_z_center_7.ckpt", 500) elif FLAGS.mode == "visualize": valid_images, valid_annotations = validation_dataset_reader.get_random_batch( FLAGS.batch_size) # valid_annotations[valid_annotations < 0.] = -1.0 # valid_images[valid_images < 0.] = -1.0 # valid_annotations[valid_annotations >= 0.] = 1.0 # valid_images[valid_images >= 0.] = 1.0 x1 = random.randint(0, 10) w1 = random.randint(30, 54) y1 = random.randint(0, 10) h1 = random.randint(30, 54) # valid_images[:,x1:w1,y1:h1,:] = 0 r_m, r_m2 = random_mask(64) # z_ = np.zeros(low=-1.0, high=1.0, size=(FLAGS.batch_size,1,1,10)) # z_ = np.reshape(signal.gaussian(200, std=1),(FLAGS.batch_size,1,1,10))-0.5 z_ = np.random.uniform(low=-1.0, high=1.0, size=(FLAGS.batch_size, 1, 1, 40)) feed_dict = { image: valid_images, annotation: valid_annotations, keep_probability: 0.85, z: z_, istrain: False, mask: r_m, mask2: r_m2 } v = 0 m__ = interp1d([-10.0, 10.0], [-1.0, 1.0]) z_ = m__(z_) # feed_dict = {image: valid_images, annotation: valid_annotations, keep_probability: 0.85, z: z_, istrain:False,mask:r_m } for p in range(20): z_ol = np.copy(z_) # print("666666666666666666666666666666666666666") # print(z_) # feed_dict = {image: valid_images, annotation: valid_annotations, keep_probability: 0.85, z: z_, istrain:False,mask:r_m } # z_loss, summ = sess.run([loss,loss_summary], feed_dict=feed_dict) # print("z_step: %d, Train_loss:%g" % (p,z_loss)) # z_, z_pred_loss = sess.run(z_pred,zlossfeed_dict = {image: valid_images, annotation: valid_annotations, keep_probability: 1.0, z:z_ol, istrain:False,mask:r_m}) # print(z_) g = sess.run([grads], feed_dict=feed_dict) v_prev = np.copy(v) # print(g[0][0].shape) v = 0.001 * v - 0.1 * g[0][0] z_ = z_ol + 0.001 * v_prev + (1 + 0.001) * v # z_ = z_ol + 0.001 * v_prev + (1+0.001)*v # print("z_____________") # print(z__) # print("z_") # print(z_) # m__ = interp1d([-10.0,10.0],[-1.0,1.0]) # z_ol = m__(z_ol) # z_ = sess.run(z_pred,feed_dict = {image: valid_images, annotation: valid_annotations, keep_probability: 0.85, z:z_ol, istrain:False,mask:r_m}) # m_ = interp1d([-1.0,1.0],[-10.0,10.0]) # z_ = m_(z_) # z_ = np.clip(z_, -1.0, 1.0) # print(z_pred_loss) # m_ = interp1d([-1.0,1.0],[-10.0,10.0]) # z_ = m_(z_) pred = sess.run(logits, feed_dict={ image: valid_images, annotation: valid_annotations, z: z_, istrain: False, mask: r_m, mask2: r_m2, keep_probability: 0.85 }) valid_images_masked = ((1 - r_m) * valid_images + 1.) / 2.0 * 255 # valid_images = (valid_images +1.)/2.0*255 # predicted_patch = sess.run(mask) * pred # pred = valid_images_masked + predicted_patch pred_ = (pred + 1.) / 2.0 * 255 # pred = pred + 1./2.0*255 pred = valid_images_masked * (1 - r_m) + pred_ * r_m valid_annotations_ = (valid_annotations + 1.) / 2.0 * 255 # pred = np.squeeze(pred, axis=3) print(np.max(pred)) print(valid_images.shape) print(valid_annotations.shape) print(pred.shape) # for itr in range(FLAGS.batch_size): # utils.save_image(valid_images_masked[itr].astype(np.uint8), FLAGS.logs_dir, name="inp_" + str(5+itr)) # utils.save_image(valid_annotations[itr].astype(np.uint8), FLAGS.logs_dir, name="gt_" + str(5+itr)) # utils.save_image(pred[itr].astype(np.uint8), FLAGS.logs_dir, name="predz_" + str(5+itr)) # utils.save_image(valid_images_masked[itr].astype(np.uint8), FLAGS.logs_dir, name="inp_" + str(5+itr)+'_' + str(p) ) # utils.save_image(valid_annotations_[itr].astype(np.uint8), FLAGS.logs_dir, name="gt_" + str(5+itr)+'_' + str(p) ) # utils.save_image(pred[itr].astype(np.uint8), FLAGS.logs_dir, name="predz_" + str(5+itr)+'_' + str(p) ) # print("Saved image: %d" % itr) for itr in range(FLAGS.batch_size): utils.save_image(valid_images_masked[itr].astype(np.uint8), FLAGS.logs_dir, name="inp_" + str(5 + itr)) utils.save_image(pred[itr].astype(np.uint8), FLAGS.logs_dir, name="predz_" + str(5 + itr)) utils.save_image(valid_annotations_[itr].astype(np.uint8), FLAGS.logs_dir, name="gt_" + str(5 + itr))
def main(argv=None): keep_probability = tf.placeholder(tf.float32, name="keep_probabilty") image = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3], name="input_image") annotation = tf.placeholder(tf.int32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name="annotation") pred_annotation_value, pred_annotation, logits = inference( image, keep_probability) tf.summary.image("input_image", image, max_outputs=2) tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2) tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2) #logits:the last layer of conv net #labels:the ground truth loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=tf.squeeze(annotation, squeeze_dims=[3]), name="entropy"))) tf.summary.scalar("entropy", loss) trainable_var = tf.trainable_variables() if FLAGS.debug: for var in trainable_var: utils.add_to_regularization_and_summary(var) train_op = train(loss, trainable_var) print("Setting up summary op...") summary_op = tf.summary.merge_all() #Create a file to write logs. #filename='logs'+ FLAGS.mode + str(datetime.datetime.now()) + '.txt' filename = "logs%s%s.txt" % (FLAGS.mode, datetime.datetime.now()) path_ = os.path.join(FLAGS.logs_dir, filename) logs_file = open(path_, 'w') logs_file.write("The logs file is created at %s\n" % datetime.datetime.now()) logs_file.write("The mode is %s\n" % (FLAGS.mode)) logs_file.write( "The train data batch size is %d and the validation batch size is %d.\n" % (FLAGS.batch_size, FLAGS.v_batch_size)) logs_file.write("The train data is %s.\n" % (FLAGS.data_dir)) logs_file.write("The data size is %d and the MAX_ITERATION is %d.\n" % (IMAGE_SIZE, MAX_ITERATION)) logs_file.write("The model is ---%s---.\n" % FLAGS.logs_dir) print("Setting up image reader...") logs_file.write("Setting up image reader...\n") train_records, valid_records = scene_parsing.my_read_dataset( FLAGS.data_dir) print('number of train_records', len(train_records)) print('number of valid_records', len(valid_records)) logs_file.write('number of train_records %d\n' % len(train_records)) logs_file.write('number of valid_records %d\n' % len(valid_records)) print("Setting up dataset reader") image_options = {'resize': True, 'resize_size': IMAGE_SIZE} if FLAGS.mode == 'train': train_dataset_reader = dataset.BatchDatset(train_records, image_options) validation_dataset_reader = dataset.BatchDatset(valid_records, image_options) sess = tf.Session() print("Setting up Saver...") saver = tf.train.Saver() summary_writer = tf.summary.FileWriter(FLAGS.logs_dir, sess.graph) sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) #if not train,restore the model trained before if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") if FLAGS.mode == "visualize": #if need image_name to compare valid_images, valid_annotations, valid_filename = validation_dataset_reader.get_random_batch( FLAGS.v_batch_size) #valid_images, valid_annotations = validation_dataset_reader.get_random_batch(FLAGS.batch_size) t_start = time.time() pred_value, pred, logits_ = sess.run( [pred_annotation_value, pred_annotation, logits], feed_dict={ image: valid_images, annotation: valid_annotations, keep_probability: 1.0 }) print('the shape of pred_value', pred_value.shape) print('the shape of pred', pred.shape) valid_annotations = np.squeeze(valid_annotations, axis=3) pred = np.squeeze(pred, axis=3) pred_value = np.squeeze(pred_value, axis=3) print('the shape of pred_value after squeeze', pred_value.shape) print('the shape of pred after squeeze', pred.shape) t_elapsed_s = time.time() - t_start speed_s = len(pred) / t_elapsed_s print('the num of picture', len(pred)) print('speed_neddle', speed_s) re_save_dir = "%s%s" % (FLAGS.result_dir, datetime.datetime.now()) logs_file.write("The result is save at file'%s'.\n" % re_save_dir) logs_file.write("The number of part visualization is %d.\n" % FLAGS.v_batch_size) #Check the result path if exists. if not os.path.exists(re_save_dir): print("The path '%s' is not found." % re_save_dir) print("Create now ...") os.makedirs(re_save_dir) print("Create '%s' successfully." % re_save_dir) logs_file.write("Create '%s' successfully.\n" % re_save_dir) #label_predict_pixel for itr in range(FLAGS.v_batch_size): t_fit = time.time() filename = valid_filename[itr]['filename'] valid_images_ = pred_visualize(valid_images[itr], pred[itr]) #valid_images_=fit_ellipse_findContours(valid_images[itr],np.expand_dims(pred[itr],axis=2).astype(np.uint8)) #valid_images_=fit_ellipse(valid_images[itr],np.expand_dims(pred[itr],axis=2).astype(np.uint8)) utils.save_image(valid_images_.astype(np.uint8), re_save_dir, name="inp_" + filename) utils.save_image(valid_annotations[itr].astype(np.uint8), re_save_dir, name="gt_" + filename) utils.save_image(pred[itr].astype(np.uint8), re_save_dir, name="pred_" + filename) utils.save_image(pred_value[itr].astype(np.uint8), re_save_dir, name="heat_" + filename) print("Saved image: %d" % itr) t_elapsed_ = time.time() - t_start speed = len(pred) / t_elapsed_ print('the num of picture', len(pred)) print('speed_neddle', speed) elif FLAGS.mode == "accurary": count = 0 if_con = True accu_iou_t = 0 accu_pixel_t = 0 while if_con: count = count + 1 valid_images, valid_annotations, valid_filenames, if_con, start, end = validation_dataset_reader.next_batch_valid( FLAGS.v_batch_size) valid_loss, pred_anno = sess.run( [loss, pred_annotation], feed_dict={ image: valid_images, annotation: valid_annotations, keep_probability: 1.0 }) accu_iou, accu_pixel = accu.caculate_accurary( pred_anno, valid_annotations) print("Ture %d ---> the data from %d to %d" % (count, start, end)) print("%s ---> Validation_pixel_accuary: %g" % (datetime.datetime.now(), accu_pixel)) print("%s ---> Validation_iou_accuary: %g" % (datetime.datetime.now(), accu_iou)) #Output logs. logs_file.write("Ture %d ---> the data from %d to %d\n" % (count, start, end)) logs_file.write("%s ---> Validation_pixel_accuary: %g\n" % (datetime.datetime.now(), accu_pixel)) logs_file.write("%s ---> Validation_iou_accuary: %g\n" % (datetime.datetime.now(), accu_iou)) accu_iou_t = accu_iou_t + accu_iou accu_pixel_t = accu_pixel_t + accu_pixel print("%s ---> Total validation_pixel_accuary: %g" % (datetime.datetime.now(), accu_pixel_t / count)) print("%s ---> Total validation_iou_accuary: %g" % (datetime.datetime.now(), accu_iou_t / count)) #Output logs logs_file.write("%s ---> Total validation_pixel_accurary: %g\n" % (datetime.datetime.now(), accu_pixel_t / count)) logs_file.write("%s ---> Total validation_iou_accurary: %g\n" % (datetime.datetime.now(), accu_iou_t / count)) elif FLAGS.mode == "all_visualize": re_save_dir = "%s%s" % (FLAGS.result_dir, datetime.datetime.now()) logs_file.write("The result is save at file'%s'.\n" % re_save_dir) logs_file.write("The number of part visualization is %d.\n" % FLAGS.v_batch_size) #Check the result path if exists. if not os.path.exists(re_save_dir): print("The path '%s' is not found." % re_save_dir) print("Create now ...") os.makedirs(re_save_dir) print("Create '%s' successfully." % re_save_dir) logs_file.write("Create '%s' successfully.\n" % re_save_dir) count = 0 if_con = True accu_iou_t = 0 accu_pixel_t = 0 while if_con: count = count + 1 valid_images, valid_annotations, valid_filename, if_con, start, end = validation_dataset_reader.next_batch_valid( FLAGS.v_batch_size) pred_value, pred, logits_ = sess.run( [pred_annotation_value, pred_annotation, logits], feed_dict={ image: valid_images, annotation: valid_annotations, keep_probability: 1.0 }) valid_annotations = np.squeeze(valid_annotations, axis=3) pred = np.squeeze(pred, axis=3) pred_value = np.squeeze(pred_value, axis=3) #label_predict_pixel for itr in range(len(pred)): filename = valid_filename[itr]['filename'] valid_images_ = pred_visualize(valid_images[itr], pred[itr]) valid_images_ = anno_visualize(valid_images_, valid_annotations[itr]) #valid_images_=fit_ellipse_findContours(valid_images[itr],np.expand_dims(pred[itr],axis=2).astype(np.uint8)) #valid_images_=fit_ellipse(valid_images[itr],np.expand_dims(pred[itr],axis=2).astype(np.uint8)) #save result utils.save_image(valid_images_.astype(np.uint8), re_save_dir, name="inp_" + filename) #utils.save_image(valid_annotations[itr].astype(np.uint8), re_save_dir, name="gt_" + filename) #utils.save_image(pred[itr].astype(np.uint8), re_save_dir, name="pred_" + filename) #utils.save_image(pred_value[itr].astype(np.uint8), re_save_dir, name="heat_"+ filename) logs_file.close()
def main(argv=None): # 定义一下regularization: regularization = tf.Variable(0, dtype=tf.float32) keep_probability = tf.placeholder(tf.float32, name="keep_probabilty") # dropout image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name="input_image") # 输入 annotation = tf.placeholder(tf.int32, shape=[None, None, None, 1], name="annotation") # label mean = tf.placeholder(tf.float32, name='mean') pred_annotation, logits, softmax = inference(image, keep_probability, mean) # logits=resize_F8 tf.summary.image("input_image", image, max_outputs=2) tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2) tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2) loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, # tf.nn.sparse_softmax_cross_entropy_with_logits自动对logits(resize_F8)做了softmax处理. labels=tf.squeeze(annotation, squeeze_dims=[3]), name="entropy")) tf.summary.scalar("entropy", loss) # train val公用一个loss节点运算. trainable_var = tf.trainable_variables( ) # Variable被收集在名为tf.GraphKeys.VARIABLES的colletion中 if FLAGS.debug: for var in trainable_var: utils.add_to_regularization_and_summary(var) # 加了正则化. train_op = train(loss, trainable_var) print("Setting up summary op...") print("Setting up image reader...") train_records, valid_records = scene_parsing.read_dataset( FLAGS.data_dir) # 数据的转换输入. print(len(train_records)) print(len(valid_records)) print("Setting up dataset reader") image_options = { 'resize': True, 'resize_size': IMAGE_RESIZE } # 将IMAGE_SIZE大小作为一个batch if FLAGS.mode == 'train': train_dataset_reader = dataset.BatchDatset(train_records, image_options) validation_dataset_reader = dataset.BatchDatset( valid_records, image_options) # 是train模式也把validation载入进来. sess = tf.Session() print("Setting up Saver...") saver = tf.train.Saver() # 声明tf.train.Saver()类 用于存储模型. summary_op = tf.summary.merge_all() # 汇总所有summary. summary_writer_train = tf.summary.FileWriter(FLAGS.logs_dir + '/train', sess.graph) summary_writer_val = tf.summary.FileWriter(FLAGS.logs_dir + '/val') # 这里不需要再加入graph. sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state( FLAGS.logs_dir) # 生成check_point,下次可以从check_point继续训练 if ckpt and ckpt.model_checkpoint_path: # 这两行的作用是:tf.train.get_checkpoint_state()函数通过checkpoint文件(它存储所有模型的名字)找到目录下最新的模型. saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") if FLAGS.mode == "train": # mymean = train_dataset_reader._read_images() #强行运行这个函数,把mean传过来. # train_dataset_reader 调用._read_images() 需要在里面修改mean是否运算和return. # 生成本次数据集的mean值. # mymean = [42.11049008, 65.75782253, 74.11216841] #这里要根据数据集更新. mymean = [73.9524, 73.9524, 73.9524] for itr in xrange(MAX_ITERATION): # 修改itr数值,接着之前的模型数量后继续训练. train_images, train_annotations = train_dataset_reader.next_batch( FLAGS.batch_size) feed_dict = { image: train_images, annotation: train_annotations, keep_probability: 0.85, mean: mymean } sess.run(train_op, feed_dict=feed_dict) if itr % 10 == 0: # 这里不要运算loss train_loss, summary_str = sess.run([loss, summary_op], feed_dict=feed_dict) summary_writer_train.add_summary(summary_str, itr) print("Step: %d, Train_loss:%g" % (itr, train_loss)) if itr % 100 == 0: # 每训500次测试一下验证集. ''' valid_images, valid_annotations = validation_dataset_reader.next_batch(FLAGS.batch_size) valid_loss, summary_str = sess.run([loss, summary_op], feed_dict={image: valid_images, annotation: valid_annotations, keep_probability: 1.0, mean: mymean}) #计算新节点loss_valid的值. summary_writer_val.add_summary(summary_str, itr) print("%s ---> Validation_loss: %g" % (datetime.datetime.now(), valid_loss)) ''' saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr) # summary_writer_val.close() summary_writer_train.close() elif FLAGS.mode == "visualize": # mymean = [42.11049008, 65.75782253, 74.11216841] mymean = [73.9524, 73.9524, 73.9524] validation_dataset_reader = dataset.BatchDatset( valid_records, image_options) valid_images, valid_annotations = validation_dataset_reader.get_random_batch( FLAGS.batch_size) pred = sess.run(pred_annotation, feed_dict={ image: valid_images, annotation: valid_annotations, keep_probability: 1.0, mean: mymean }) valid_annotations = np.squeeze(valid_annotations, axis=3) pred = np.squeeze(pred, axis=3) for itr in range(FLAGS.batch_size): utils.save_image(valid_images[itr].astype(np.uint8), FLAGS.logs_dir, name="inp_" + str(5 + itr)) utils.save_image(valid_annotations[itr].astype(np.uint8), FLAGS.logs_dir, name="gt_" + str(5 + itr)) utils.save_image(pred[itr].astype(np.uint8), FLAGS.logs_dir, name="pred_" + str(5 + itr)) print("Saved image: %d" % itr) elif FLAGS.mode == "test": im_2017_list = [] im_2017_list2 = [] b = [] global im_2017 # [5106, 15106, 3] global new_2017 # [8106, 15106, 3] global new_2015 for i in range(25): b = new_2017[0:3000, i * 600:i * 600 + 600, 3] b = np.array([np.array([b for i in range(3)])]) b = b.transpose(0, 2, 3, 1) im_2017_list.append(b) print(b.shape) ''' # 多通道 b = im_2017[0:5106, i * 300 : i * 300 + 300, :] b = np.array([b]) # print(b.shape) # b = b.transpose(0, 2, 3, 1) im_2017_list.append(b) # im_2017_list.append(np.array([np.array([im_2017[0:5106, 15000:15106, 3] for i in range(3)])]).transpose(0, 2, 3, 1)) #im_2017_list.append(np.array([im_2017[0:5106, 15000:15106, :]])) # .transpose(0, 2, 3, 1)) im_2017_list.append(np.array([im_2017[0:5106, 15000:15106, :]])) ''' im_2017_list.append( np.array([ np.array([new_2017[0:3000, 15000:15106, 3] for i in range(3)]) ]).transpose(0, 2, 3, 1)) ''' for i in range(50): b = new_2017[5106:8106, i * 300:i * 300 + 300, 3] b = np.array([np.array([b for i in range(3)])]) b = b.transpose(0, 2, 3, 1) im_2017_list2.append(b) im_2017_list2.append( np.array([np.array([new_2017[5106:8106, 15000:15106, 3] for i in range(3)])]).transpose(0, 2, 3, 1)) ''' allImg = [] allImg2 = [] allImg_soft = [] mymean = [73.9524, 73.9524, 73.9524] # mymean = [42.18489547, 36.05229143, 25.13712141] for n, im_2017_part in enumerate(im_2017_list): # print(im_2017_part.shape) feed_dict_test = { image: im_2017_part, keep_probability: 1.0, mean: mymean } a = sess.run(pred_annotation, feed_dict=feed_dict_test) a = np.mean(a, axis=(0, 3)) allImg.append(a) ''' # Shift + Tab for n, im_2017_part2 in enumerate(im_2017_list2): # print(im_2017_part.shape) feed_dict_test = {image: im_2017_part2, keep_probability: 1.0, mean: mymean} a = sess.run(pred_annotation, feed_dict=feed_dict_test) a = np.mean(a, axis=(0, 3)) allImg2.append(a) ''' # 使用crf: soft = sess.run(softmax, feed_dict=feed_dict_test) # 运行 sess.run(softmax, feed_dict=feed_dict_test) 得到softmax,即网络前向运算并softmax为概率分布后的结果. soft = np.mean(soft, axis=0).transpose(2, 0, 1) # soft = soft.transpose(2, 0, 1) im_2017_mean = np.mean(im_2017_list[n], axis=0) # print (im_2017_mean.shape) #(5106, 300, 3) c = crf.crf(im_2017_mean, soft) # print (c.shape) #(5106, 300) allImg_soft.append(c) # 保存整张soft图. Crf = np.concatenate(tuple(allImg_soft), axis=1) # axis = 1 在第二个维度上进行拼接. tiff.imsave( '/home/lenovo/2Tdisk/Wkyao/_/2017/crf_build7_1030_23200.tif', Crf) res1 = np.concatenate(tuple(allImg), axis=1).astype(np.uint8) # res2 = np.concatenate(tuple(allImg2), axis=1).astype(np.uint8) # result = np.concatenate((res1, res2), axis=0) tiff.imsave('/home/lenovo/2Tdisk/Wkyao/_/2017/build7_1030_23200.tif', res1)
def train(loss, var_list): optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate, beta1=FLAGS.beta1) grads = optimizer.compute_gradients(loss, var_list=var_list) for grad, var in grads: utils.add_gradient_summary(grad, var) return optimizer.apply_gradients(grads)
def birdview_proposal_net(birdview, gt_anchor_labels, gt_anchor_regs, anchor_cls_masks, anchor_reg_masks, weight, reg_weight, model_dir, batch_size, debug, keep_prob = 1.0): #""" 3D region proposal network """ # input birdview, dropout probability, weight of vgg, ground-truth labels, ground-truth regression value, # anchor classification mask and anchor regression mask # The birdview has more than three channel, thus we need to train first two conv layers in vgg-16 # Miscellaneous definition MODEL_URL = 'http://www.vlfeat.org/matconvnet/models/beta16/imagenet-vgg-verydeep-16.mat' NUM_OF_REGRESSION_VALUE = 6 NUM_OF_ANCHOR = 4 FCN_KERNEL_SIZE = 3 print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(model_dir, MODEL_URL) weights = np.squeeze(model_data['layers']) # store output from classfication layer and regression layer all_rpn_logits = [] all_rpn_regs = [] current = birdview # vgg with tf.name_scope("Vgg-16"): net = vgg_net_birdview(weights, birdview, debug, keep_prob) current = net["birdview_relu4_3"] # upsample, output 256 channels with tf.name_scope("Upsample_layer"): kernels = utils.weight_variable([3, 3, 256, 256], name= "upsample_w") bias = utils.bias_variable([256], name="upsample_b") net['upsample'] = utils.conv2d_transpose_strided(current, kernels, bias, name = 'upsample', keep_prob = keep_prob) current = net['upsample'] if debug: utils.add_activation_summary(current) with tf.name_scope("Fully_conv_layer"): # Fully convolution layer of 3D proposal network. Similar to the last layer of Region Prosal Network. for j in range(NUM_OF_ANCHOR): kernels_cls = utils.weight_variable([FCN_KERNEL_SIZE, FCN_KERNEL_SIZE, 256, 2], name= "FCN_cls_w" + str(j)) kernels_reg = utils.weight_variable([FCN_KERNEL_SIZE, FCN_KERNEL_SIZE, 256, NUM_OF_REGRESSION_VALUE], name= "FCN_reg_w" + str(j)) bias_cls = utils.bias_variable([2], name="FCN_cls_b" + str(j)) bias_reg = utils.bias_variable([6], name="FCN_reg_b"+ str(j)) rpn_logits = utils.conv2d_basic(current, kernels_cls, bias_cls) rpn_regs = utils.conv2d_basic(current, kernels_reg, bias_reg) net["FCN_cls_" + str(j)] = rpn_logits net["FCN_reg_" + str(j)] = rpn_regs if debug: utils.add_activation_summary(rpn_logits) rpn_logits = tf.reshape(rpn_logits, [batch_size, -1, 2]) all_rpn_logits.append(rpn_logits) # Values required clip might be different # rpn_regs = tf.clip_by_value(rpn_regs, -0.2, 0.2) rpn_regs = tf.reshape(rpn_regs, [batch_size, -1, NUM_OF_REGRESSION_VALUE]) all_rpn_regs.append(rpn_regs) with tf.name_scope("Cls_and_reg_loss"): all_rpn_logits = tf.concat(all_rpn_logits, 1) all_rpn_regs = tf.concat(all_rpn_regs, 1) # pdb.set_trace() all_rpn_logits = tf.reshape(all_rpn_logits, [-1, 2]) all_rpn_logits_softmax = tf.nn.softmax(all_rpn_logits, dim = -1) all_rpn_regs = tf.reshape(all_rpn_regs, [-1, NUM_OF_REGRESSION_VALUE]) # Compute the loss function gt_anchor_labels = tf.reshape(gt_anchor_labels, [-1]) gt_anchor_regs = tf.reshape(gt_anchor_regs, [-1, NUM_OF_REGRESSION_VALUE]) anchor_cls_masks = tf.reshape(anchor_cls_masks, [-1]) anchor_reg_masks = tf.reshape(anchor_reg_masks, [-1]) # Classification loss classification_loss = tf.nn.sparse_softmax_cross_entropy_with_logits(labels = gt_anchor_labels, logits = all_rpn_logits) * anchor_cls_masks classification_loss = tf.reduce_sum(classification_loss) / tf.maximum(tf.reduce_sum(anchor_cls_masks), 1) #regression loss regression_loss = tf.reduce_sum(l2_loss(all_rpn_regs, gt_anchor_regs) * anchor_reg_masks) / tf.maximum(tf.reduce_sum(anchor_reg_masks), 1) # regularization trainable_var = tf.trainable_variables() weight_decay = 0 for var in trainable_var: weight_decay = weight_decay + tf.nn.l2_loss(var) Overall_loss = weight * classification_loss + regression_loss + reg_weight * weight_decay return net, classification_loss, regression_loss, Overall_loss, all_rpn_logits_softmax, all_rpn_regs
y_ = tf.placeholder(tf.bool, shape=[None, params.res["height"], params.res["width"]]) prev_y = tf.placeholder( tf.float32, shape=[None, params.res["height"], params.res["width"]]) prev_y_in = tf.sub(tf.expand_dims(prev_y, 3), 0.5) x_in = tf.concat(3, [x, prev_y_in]) ch_in = 2 ############################## # Section 1 # Convolution layer_name = "s1_conv1" with tf.name_scope(layer_name): W = utils.weight_variable([k_size, k_size, ch_in, ch]) b = utils.bias_variable([ch]) conv = utils.conv2d(x_in, W, b, 1) tanh = tf.nn.tanh(conv) s1_conv1 = tf.nn.dropout(tanh, keep_prob) # Convolution layer_name = "s1_conv2" with tf.name_scope(layer_name): W = utils.weight_variable([k_size, k_size, ch, ch]) b = utils.bias_variable([ch]) conv = utils.conv2d(s1_conv1, W, b, 1) tanh = tf.nn.tanh(conv) s1_conv2 = tf.nn.dropout(tanh, keep_prob)
def inference(image, keep_prob): """ Semantic segmentation network definition :param image: input image. Should have values in range 0-255 :param keep_prob: :return: """ print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL) mean = model_data['normalization'][0][0][0] mean_pixel = np.mean(mean, axis=(0, 1)) weights = np.squeeze(model_data['layers']) #processed_image = utils.process_image(image, mean_pixel) with tf.variable_scope("inference"): image_net = vgg_net(weights, image) conv_final_layer = image_net["conv5_3"] pool5 = utils.max_pool_2x2(conv_final_layer) W6 = utils.weight_variable([7, 7, 512, 4096], name="W6") b6 = utils.bias_variable([4096], name="b6") conv6 = utils.conv2d_basic(pool5, W6, b6) relu6 = tf.nn.relu(conv6, name="relu6") if FLAGS.debug: utils.add_activation_summary(relu6) relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") b7 = utils.bias_variable([4096], name="b7") conv7 = utils.conv2d_basic(relu_dropout6, W7, b7) relu7 = tf.nn.relu(conv7, name="relu7") if FLAGS.debug: utils.add_activation_summary(relu7) relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob) W8 = utils.weight_variable([1, 1, 4096, NUM_OF_CLASSESS], name="W8") b8 = utils.bias_variable([NUM_OF_CLASSESS], name="b8") conv8 = utils.conv2d_basic(relu_dropout7, W8, b8) # annotation_pred1 = tf.argmax(conv8, dimension=3, name="prediction1") # now to upscale to actual image size deconv_shape1 = image_net["pool4"].get_shape() W_t1 = utils.weight_variable([4, 4, deconv_shape1[3].value, NUM_OF_CLASSESS], name="W_t1") b_t1 = utils.bias_variable([deconv_shape1[3].value], name="b_t1") conv_t1 = utils.conv2d_transpose_strided(conv8, W_t1, b_t1, output_shape=tf.shape(image_net["pool4"])) fuse_1 = tf.add(conv_t1, image_net["pool4"], name="fuse_1") deconv_shape2 = image_net["pool3"].get_shape() W_t2 = utils.weight_variable([4, 4, deconv_shape2[3].value, deconv_shape1[3].value], name="W_t2") b_t2 = utils.bias_variable([deconv_shape2[3].value], name="b_t2") conv_t2 = utils.conv2d_transpose_strided(fuse_1, W_t2, b_t2, output_shape=tf.shape(image_net["pool3"])) fuse_2 = tf.add(conv_t2, image_net["pool3"], name="fuse_2") shape = tf.shape(image) deconv_shape3 = tf.stack([shape[0], shape[1], shape[2], NUM_OF_CLASSESS]) W_t3 = utils.weight_variable([16, 16, NUM_OF_CLASSESS, deconv_shape2[3].value], name="W_t3") b_t3 = utils.bias_variable([NUM_OF_CLASSESS], name="b_t3") conv_t3 = utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8) annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction") return tf.expand_dims(annotation_pred, dim=3), conv_t3
def save_image(filename, image, mean_pixel): output = utils.unprocess_image(image, mean_pixel) output = np.uint8(np.clip(output, 0, 255)) scipy.misc.imsave(filename, output) print "Image saved!"
def inference_res(input_image): W1 = utils.weight_variable([3, 3, 3, 32]) b1 = utils.bias_variable([32]) hconv_1 = tf.nn.relu(utils.conv2d_basic(input_image, W1, b1)) h_norm = utils.local_response_norm(hconv_1) bottleneck_1 = utils.bottleneck_unit(h_norm, 16, 16, down_stride=True, name="res_1") bottleneck_2 = utils.bottleneck_unit(bottleneck_1, 8, 8, down_stride=True, name="res_2") bottleneck_3 = utils.bottleneck_unit(bottleneck_2, 16, 16, up_stride=True, name="res_3") bottleneck_4 = utils.bottleneck_unit(bottleneck_3, 32, 32, up_stride=True, name="res_4") W5 = utils.weight_variable([3, 3, 32, 3]) b5 = utils.bias_variable([3]) out = tf.nn.tanh(utils.conv2d_basic(bottleneck_4, W5, b5)) return out
def inference_fully_convolutional(dataset): ''' Fully convolutional inference on notMNIST dataset :param datset: [batch_size, 28*28*1] tensor :return: logits ''' dataset_reshaped = tf.reshape(dataset, [-1, 28, 28, 1]) with tf.name_scope("conv1") as scope: W_conv1 = utils.weight_variable_xavier_initialized([3, 3, 1, 32], name="W_conv1") b_conv1 = utils.bias_variable([32], name="b_conv1") h_conv1 = tf.nn.relu(utils.conv2d_strided(dataset_reshaped, W_conv1, b_conv1)) with tf.name_scope("conv2") as scope: W_conv2 = utils.weight_variable_xavier_initialized([3, 3, 32, 64], name="W_conv2") b_conv2 = utils.bias_variable([64], name="b_conv2") h_conv2 = tf.nn.relu(utils.conv2d_strided(h_conv1, W_conv2, b_conv2)) with tf.name_scope("conv3") as scope: W_conv3 = utils.weight_variable_xavier_initialized([3, 3, 64, 128], name="W_conv3") b_conv3 = utils.bias_variable([128], name="b_conv3") h_conv3 = tf.nn.relu(utils.conv2d_strided(h_conv2, W_conv3, b_conv3)) with tf.name_scope("conv4") as scope: W_conv4 = utils.weight_variable_xavier_initialized([3, 3, 128, 256], name="W_conv4") b_conv4 = utils.bias_variable([256], name="b_conv4") h_conv4 = tf.nn.relu(utils.conv2d_strided(h_conv3, W_conv4, b_conv4)) with tf.name_scope("conv5") as scope: # W_conv5 = utils.weight_variable_xavier_initialized([2, 2, 256, 512], name="W_conv5") # b_conv5 = utils.bias_variable([512], name="b_conv5") # h_conv5 = tf.nn.relu(utils.conv2d_strided(h_conv4, W_conv5, b_conv5)) h_conv5 = utils.avg_pool_2x2(h_conv4) with tf.name_scope("conv6") as scope: W_conv6 = utils.weight_variable_xavier_initialized([1, 1, 256, 10], name="W_conv6") b_conv6 = utils.bias_variable([10], name="b_conv6") logits = tf.nn.relu(utils.conv2d_basic(h_conv5, W_conv6, b_conv6)) print logits.get_shape() logits = tf.reshape(logits, [-1, 10]) return logits
def vis_one_im(self): if cfgs.anno: im_ = pred_visualize(self.vis_image.copy(), self.vis_pred).astype(np.uint8) utils.save_image(im_, self.re_save_dir_im, name='inp_' + self.filename + '.jpg') if cfgs.fit_ellip: #im_ellip = fit_ellipse_findContours(self.vis_image.copy(), np.expand_dims(self.vis_pred, axis=2).astype(np.uint8)) im_ellip = fit_ellipse(self.vis_image.copy(), np.expand_dims(self.vis_pred, axis=2).astype(np.uint8)) utils.save_image(im_ellip, self.re_save_dir_ellip, name='ellip_' + self.filename + '.jpg') if cfgs.heatmap: heat_map = density_heatmap(self.vis_pred_prob[:, :, 1]) utils.save_image(heat_map, self.re_save_dir_heat, name='heat_' + self.filename + '.jpg') if cfgs.trans_heat and cfgs.heatmap: trans_heat_map = translucent_heatmap(self.vis_image.astype(np.uint8).copy(), heat_map.astype(np.uint8).copy()) utils.save_image(trans_heat_map, self.re_save_dir_transheat, name='trans_heat_' + self.filename + '.jpg') utils.save_image(im_, self.re_save_dir_im, name='fuse_' + self.filename + '.jpg') if cfgs.lower_anno: im_ = pred_visualize(self.vis_image.copy(), self.vis_pred_lower).astype(np.uint8) utils.save_image(im_, self.re_save_dir_im, name='inp_lower_' + self.filename + '.jpg') if cfgs.fit_ellip_lower: im_ellip = fit_ellipse_findContours(self.vis_image.copy(), np.expand_dims(self.vis_pred_lower, axis=2).astype(np.uint8)) utils.save_image(im_ellip, self.re_save_dir_ellip, name='ellip_lower' + self.filename + '.jpg')
def scatter_restore(saliency, variables1, variables2): shape = utils.get_tensor_size(variables2) values, indices = tf.nn.top_k(-1 * saliency, tf.cast(k * shape / 100, tf.int32)) values = tf.gather(variables1, indices) return tf.scatter_update(variables2, indices, values)
def vgg_net_rgb(weights, image, debug, keep_prob): layers = ( 'rgb_conv1_1', 'rgb_relu1_1', 'rgb_conv1_2', 'rgb_relu1_2', 'rgb_pool1', 'rgb_conv2_1', 'rgb_relu2_1', 'rgb_conv2_2', 'rgb_relu2_2', 'rgb_pool2', 'rgb_conv3_1', 'rgb_relu3_1', 'rgb_conv3_2', 'rgb_relu3_2', 'rgb_conv3_3', 'rgb_relu3_3', 'rgb_pool3', 'rgb_conv4_1', 'rgb_relu4_1', 'rgb_conv4_2', 'rgb_relu4_2', 'rgb_conv4_3', 'rgb_relu4_3' # 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', # 'relu5_3' ) # output of retrained layer for vgg net = {} current = image for i, name in enumerate(layers): kind = name[4:8] if kind == 'conv': if name == 'rgb_conv4_1': # Modify the senventh conv layer # pdb.set_trace() kernels, bias = weights[i][0][0][0][0] kernels = np.transpose(kernels, (1, 0, 2, 3)) sample_index = random.sample(range(512), 256) kernels = kernels[:, :, :, sample_index] bias = bias[:, sample_index] kernels = utils.get_variable(kernels, name=name + "_w") bias = utils.get_variable(bias.reshape(-1), name=name + "_b") current = utils.conv2d_basic(current, kernels, bias, keep_prob) elif name == 'rgb_conv4_2': # Modify the eighth conv layer # pdb.set_trace() kernels, bias = weights[i][0][0][0][0] kernels = np.transpose(kernels, (1, 0, 2, 3)) sample_index_1 = random.sample(range(512), 256) sample_index_2 = random.sample(range(512), 256) kernels = kernels[:, :, sample_index_1, :] kernels = kernels[:, :, :, sample_index_2] bias = bias[:, sample_index_2] kernels = utils.get_variable(kernels, name=name + "_w") bias = utils.get_variable(bias.reshape(-1), name=name + "_b") current = utils.conv2d_basic(current, kernels, bias, keep_prob) elif name == 'rgb_conv4_3': # pdb.set_trace() # Modify the ninth conv layer kernels, bias = weights[i][0][0][0][0] kernels = np.transpose(kernels, (1, 0, 2, 3)) sample_index_1 = random.sample(range(512), 256) sample_index_2 = random.sample(range(512), 256) kernels = kernels[:, :, sample_index_1, :] kernels = kernels[:, :, :, sample_index_2] bias = bias[:, sample_index_2] kernels = utils.get_variable(kernels, name=name + "_w") bias = utils.get_variable(bias.reshape(-1), name=name + "_b") current = utils.conv2d_basic(current, kernels, bias, keep_prob) else: kernels, bias = weights[i][0][0][0][0] # matconvnet: weights are [width, height, in_channels, out_channels] # tensorflow: weights are [height, width, in_channels, out_channels] kernels = utils.get_variable(np.transpose(kernels, (1, 0, 2, 3)), name=name + "_w") bias = utils.get_variable(bias.reshape(-1), name=name + "_b") current = utils.conv2d_basic(current, kernels, bias, keep_prob) elif kind == 'relu': current = tf.nn.relu(current, name=name) if debug: utils.add_activation_summary(current) elif kind == 'pool': current = utils.avg_pool_2x2(current) net[name] = current # pdb.set_trace() return net
def region_fusion_net(birdview_region, frontview_region, rgbview_region, NUM_OF_REGRESSION_VALUE, ROI_H, ROI_W): # flat birdview_flatregion = tf.reshape(birdview_region, [-1, ROI_W * ROI_H * 256], name = 'birdview_flatregion') frontview_flatregion = tf.reshape(frontview_region, [-1, ROI_W * ROI_H * 256], name = 'frontview_flatregion') rgbview_flatregion = tf.reshape(rgbview_region, [-1, ROI_W * ROI_H * 256], name = 'rgbview_flatregion') with tf.name_scope("fusion-1"): # first fusion # feature transformation is implemented by fully connected netwok joint_1 = utils.join(birdview_flatregion, frontview_flatregion, rgbview_flatregion, name = 'joint_1') fusion_birdview_1 = utils.fully_connected(joint_1, 1024, name = 'fusion_birdview_1') fusion_frontview_1 = utils.fully_connected(joint_1, 1024, name = 'fusion_frontview_1') fusion_rgbview_1 = utils.fully_connected(joint_1, 1024, name ='fusion_rgbview_1') with tf.name_scope("fusion-2"): # second fusion joint_2 = utils.join(fusion_birdview_1, fusion_frontview_1, fusion_rgbview_1, name = 'joint_2') fusion_birdview_2 = utils.fully_connected(joint_2, 1024, name ='fusion_birdview_2') fusion_frontview_2 = utils.fully_connected(joint_2, 1024,name = 'fusion_frontview_2') fusion_rgbview_2 = utils.fully_connected(joint_2, 1024, name = 'fusion_rgbview_2') with tf.name_scope("fusion-3"): # third fusion joint_3 = utils.join(fusion_birdview_2, fusion_frontview_2, fusion_rgbview_2, 'joint_3') fusion_birdview_3 = utils.fully_connected(joint_3, 1024, name ='fusion_birdview_3') fusion_frontview_3 = utils.fully_connected(joint_3, 1024,name = 'fusion_frontview_3') fusion_rgbview_3 = utils.fully_connected(joint_3, 1024,name = 'fusion_rgbview_3') with tf.name_scope("fusion-4"): joint_4 = utils.join(fusion_birdview_3, fusion_frontview_3, fusion_rgbview_3, name ='joint_4') #pdb.set_trace() #joint_4= utils.join(birdview_flatregion, frontview_flatregion, rgbview_flatregion, name = 'joint_1') logits_cls = utils.fully_connected(joint_4, 2, name = 'fusion_cls_4', relu = False) logits_reg = utils.fully_connected(joint_4, NUM_OF_REGRESSION_VALUE, name = 'fusion_reg_4', relu = False) return logits_cls, logits_reg
def inference(image, keep_prob,z): """ Semantic segmentation network definition :param image: input image. Should have values in range 0-255 :param keep_prob: :return: """ print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL) mean = model_data['normalization'][0][0][0] mean_pixel = np.mean(mean, axis=(0, 1)) weights = np.squeeze(model_data['layers']) processed_image = utils.process_image(image, mean_pixel) with tf.variable_scope("inference"): image_net = vgg_net(weights, processed_image) conv_final_layer = image_net["conv5_3"] pool5 = utils.max_pool_2x2(conv_final_layer) W6 = utils.weight_variable([7, 7, 512, 4096], name="W6") b6 = utils.bias_variable([4096], name="b6") conv6 = utils.conv2d_basic(pool5, W6, b6) relu6 = tf.nn.relu(conv6, name="relu6") if FLAGS.debug: utils.add_activation_summary(relu6) relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) W7 = utils.weight_variable([1, 1, 4096, 4096], name="W7") b7 = utils.bias_variable([4096], name="b7") conv7 = utils.conv2d_basic(relu_dropout6, W7, b7) relu7 = tf.nn.relu(conv7, name="relu7") if FLAGS.debug: utils.add_activation_summary(relu7) relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob) W8 = utils.weight_variable([1, 1, 4096, 150], name="W8") b8 = utils.bias_variable([150], name="b8") # W_h = utils.weight_variable([1, 7, 7, 4], name="Wh") conv8 = utils.conv2d_basic(relu_dropout7, W8, b8) concat1 = tf.concat([tf.reshape(conv8,[-1,4*4*150]), tf.reshape(z,[-1,4*4*10])],axis = 1) # concat1 = tf.reshape(tf.concat([conv8, z],axis = 3),[-1,4*4*160]) # fc1 = tf.reshape(tf.layers.dense(concat1,2*2*278,activation = tf.nn.relu),[-1,2,2,278]) s_h, s_w = IMAGE_SIZE, IMAGE_SIZE s_h2, s_w2 = conv_out_size_same(s_h, 2), conv_out_size_same(s_w, 2) s_h4, s_w4 = conv_out_size_same(s_h2, 2), conv_out_size_same(s_w2, 2) s_h8, s_w8 = conv_out_size_same(s_h4, 2), conv_out_size_same(s_w4, 2) s_h16, s_w16 = conv_out_size_same(s_h8, 2), conv_out_size_same(s_w8, 2) sz, h0_w, h0_b = linear( concat1, 64*8*s_h16*s_w16, 'g_h0_lin', with_w=True) h0 = tf.reshape( sz, [-1, s_h16, s_w16, 64 * 8]) h0 = tf.nn.relu(h0) h1, h1_w, h1_b = deconv2d( h0, [FLAGS.batch_size, s_h8, s_w8, 64*4], name='g_h1', with_w=True) h1 = tf.nn.relu(h1) h2, h2_w, h2_b = deconv2d( h1, [FLAGS.batch_size, s_h4, s_w4, 64*2], name='g_h2', with_w=True) h2 = tf.nn.relu(h2) h3, h3_w, h3_b = deconv2d( h2, [FLAGS.batch_size, s_h2, s_w2, 63*1], name='g_h3', with_w=True) h3 = tf.nn.relu(h3) h4, h4_w, h4_b = deconv2d( h3, [FLAGS.batch_size, s_h, s_w, 1], name='g_h4', with_w=True) conv_t3 = tf.nn.tanh(h4) # annotation_pred1 = tf.argmax(conv8, dimension=3, name="prediction1") print("###########################################################") # print(fc1) # now to upscale to actual image size # conv_t3 = tf.nn.tanh(utils.conv2d_transpose_strided(fuse_2, W_t3, b_t3, output_shape=deconv_shape3, stride=8)) annotation_pred = tf.argmax(conv_t3, dimension=3, name="prediction") return tf.expand_dims(annotation_pred, dim=3), conv_t3
def main(argv=None): keep_probability = tf.placeholder(tf.float32, name="keep_probabilty") image = tf.placeholder(tf.float32, shape=[None, IMAGE_HEIGHT, IMAGE_WIDTH, 3], name="input_image") annotation = tf.placeholder(tf.int32, shape=[None, IMAGE_HEIGHT, IMAGE_WIDTH, 1], name="annotation") pred_annotation, logits = inference(image, keep_probability) costProb = tf.nn.softmax(logits) labels = tf.squeeze(annotation, squeeze_dims=[3]) # merge label 3 to label 0 # labels = tf.where(tf.not_equal(labels, 3), labels, tf.ones_like(labels) * 0) annotation_BGR = Gray2BGR(annotation) pred_annotation_BGR = Gray2BGR(pred_annotation) input_sum = tf.summary.image("input_image", image, max_outputs=4) gt_sum = tf.summary.image("ground_truth", tf.cast(annotation_BGR, tf.uint8), max_outputs=4) pred_sum = tf.summary.image("pred_annotation", tf.cast(pred_annotation_BGR, tf.uint8), max_outputs=4) # Calculate loss class_weights = tf.constant([0.0, 1.0, 1.0, 1.0]) onehot_labels = tf.one_hot(labels, depth=NUM_OF_CLASSESS) weights = tf.reduce_sum(class_weights * onehot_labels, axis=3) # loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=logits, name="entropy"))) unweighted_loss = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=labels, logits=logits, name="entropy") weighted_loss = unweighted_loss * weights loss = tf.reduce_mean(weighted_loss) loss_sum = tf.summary.scalar("loss", loss) lr_sum = tf.summary.scalar("learning_rate", tf.Variable(FLAGS.learning_rate, tf.float64)) trainable_var = tf.trainable_variables() if FLAGS.debug: for var in trainable_var: utils.add_to_regularization_and_summary(var) train_op = train(loss, trainable_var) print("Setting up summary op...") summary_op = tf.summary.merge([input_sum, gt_sum, pred_sum, lr_sum]) print("Setting up image reader...") train_records, valid_records, test_records = image_reader.read_dataset( FLAGS.data_dir) print('Train num:', len(train_records)) print('Val num:', len(valid_records)) print('Test num:', len(test_records)) print("Setting up dataset reader") image_options = {'resize': False, 'resize_size': IMAGE_SIZE} if FLAGS.mode == 'train' or FLAGS.mode == 'test_trainset': train_dataset_reader = dataset.BatchDatset(train_records, image_options) validation_dataset_reader = dataset.BatchDatset(valid_records, image_options) test_dataset_reader = dataset.BatchDatset(test_records, image_options) sess = tf.Session() print("Setting up Model Saver...") saver = tf.train.Saver(max_to_keep=20) summary_writer = tf.summary.FileWriter(FLAGS.logs_dir, sess.graph) summary_train_writer = tf.summary.FileWriter(FLAGS.logs_dir + "/train/", sess.graph) summary_val_writer = tf.summary.FileWriter(FLAGS.logs_dir + "/val/", sess.graph) summary_test_writer = tf.summary.FileWriter(FLAGS.logs_dir + "/test/", sess.graph) sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") print("and path is : " + ckpt.model_checkpoint_path) if FLAGS.mode == "train": global fout fout = open(FLAGS.result_log, 'w') for itr in xrange(MAX_ITERATION): # decrease learning_rate after enough iteration if (itr % LR_DECREASE_ITER == 0 and itr > 0): FLAGS.learning_rate *= LR_DECREASE_RATE train_images, train_annotations, train_weak_annotations = train_dataset_reader.next_batch_weak( FLAGS.batch_size, random_rotate=1) feed_dict = { image: train_images, annotation: train_annotations, keep_probability: 0.85 } sess.run(train_op, feed_dict=feed_dict) if itr % 50 == 0: train_loss, summary_str, summary_loss = sess.run( [loss, summary_op, loss_sum], feed_dict=feed_dict) print("Step: %d, Train_loss:%g" % (itr, train_loss)) summary_writer.add_summary(summary_str, itr) summary_train_writer.add_summary(summary_loss, itr) if (itr % 2000 == 0): valid_images, valid_annotations, valid_weak_annotations = validation_dataset_reader.next_batch_weak( FLAGS.batch_size, random_rotate=0) valid_loss, summary_loss = sess.run( [loss, loss_sum], feed_dict={ image: valid_images, annotation: valid_annotations, keep_probability: 1.0 }) print("%s ---> Validation_loss: %g" % (datetime.datetime.now(), valid_loss)) saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr) summary_val_writer.add_summary(summary_loss, itr) # run test for evaluation result test_images, test_annotations, test_weak_annotations = test_dataset_reader.next_batch_weak( FLAGS.batch_size, random_rotate=0) test_loss, summary_loss = sess.run( [loss, loss_sum], feed_dict={ image: test_images, annotation: test_annotations, keep_probability: 1.0 }) summary_test_writer.add_summary(summary_loss, itr) cntTable = np.zeros((NUM_OF_CLASSESS, NUM_OF_CLASSESS)) for i in range(len(test_records)): test_images, test_annotations, test_weak_annotations = test_dataset_reader.next_batch_weak( 1, random_rotate=0) pred = sess.run(pred_annotation, feed_dict={ image: test_images, annotation: test_annotations, keep_probability: 1.0 }) # merge label 3 to label 0 # test_annotations = np.where(test_annotations != 3, test_annotations, 0) test_annotations = np.squeeze(test_annotations, axis=3) pred = np.squeeze(pred, axis=3) table = CalcConfusionMatrix(test_annotations[0], pred[0]) cntTable += table OutputResult(itr, cntTable) fout.close() elif FLAGS.mode == "test": # ftest = open(prob_log, 'w') if not os.path.exists(FLAGS.vis_dir): os.makedirs(FLAGS.vis_dir) for itr in range(len(test_records)): test_images, test_annotations, test_weak_annotations = test_dataset_reader.next_batch_weak( 1, random_rotate=0) pred = sess.run(pred_annotation, feed_dict={ image: test_images, annotation: test_annotations, keep_probability: 1.0 }) costMap = sess.run(costProb, feed_dict={ image: test_images, annotation: test_annotations, keep_probability: 1.0 }) test_annotations = np.squeeze(test_annotations, axis=3) test_weak_annotations = np.squeeze(test_weak_annotations, axis=3) pred = np.squeeze(pred, axis=3) costImg = np.zeros((IMAGE_HEIGHT, IMAGE_WIDTH)) # ftest.write(test_records[itr]['filename']) # ftest.write("\n") # for i in range(IMAGE_HEIGHT): # for j in range(IMAGE_WIDTH): # ftest.write("%.10f %.10f\n" % (float(costMap[0, i, j, 1]), float(costMap[0, i, j, 2]))) # if (test_images[0, i, j, 0] != 0): # costImg[i, j] = costMap[0, i, j, 1] / (costMap[0, i, j, 1] + costMap[0, i, j, 2]) * 255 print(str(itr) + '/' + str(len(test_records))) utils.save_image(test_images[0].astype(np.uint8), FLAGS.vis_dir, name="inp_" + test_records[itr]['filename']) utils.save_image(test_annotations[0].astype(np.uint8), FLAGS.vis_dir, name="gt_" + test_records[itr]['filename']) utils.save_image(test_weak_annotations[0].astype(np.uint8), FLAGS.vis_dir, name="wgt_" + test_records[itr]['filename']) utils.save_image(pred[0].astype(np.uint8), FLAGS.vis_dir, name="pred_" + test_records[itr]['filename']) utils.save_image(costImg.astype(np.uint8), FLAGS.vis_dir, name="cost_" + test_records[itr]['filename']) # ftest.close() elif FLAGS.mode == "test_trainset": for itr in range(len(train_records)): test_images, test_annotations = train_dataset_reader.next_batch( 1, random_rotate=0) pred = sess.run(pred_annotation, feed_dict={ image: test_images, annotation: test_annotations, keep_probability: 1.0 }) test_annotations = np.squeeze(test_annotations, axis=3) pred = np.squeeze(pred, axis=3) print(str(itr) + '/' + str(len(train_records))) utils.save_image(test_images[0].astype(np.uint8), FLAGS.vis_train_dir, name="inp_" + train_records[itr]['filename']) utils.save_image(test_annotations[0].astype(np.uint8), FLAGS.vis_train_dir, name="gt_" + train_records[itr]['filename']) utils.save_image(pred[0].astype(np.uint8), FLAGS.vis_train_dir, name="pred_" + train_records[itr]['filename'])
def Proposal_net(birdview, frontview, rgbview, model_dir, MODEL_URL, debug, keep_prob): #""" 3D region proposal network """ # input birdview, dropout probability, weight of vgg, ground-truth labels, ground-truth regression value, # anchor classification mask and anchor regression mask # The birdview has more than three channel, thus we need to train first two conv layers in vgg-16 MODEL_URL = 'http://www.vlfeat.org/matconvnet/models/beta16/imagenet-vgg-verydeep-16.mat' print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(model_dir, MODEL_URL) # preprocessing # mean = model_data['normalization'][0][0][0] # mean_pixel = np.mean(mean, axis=(0, 1)) # processed_image = utils.process_image(birdview, mean_pixel) weights = np.squeeze(model_data['layers']) # vgg-birdview with tf.name_scope("birdview-Vgg-16"): birdview_net = vgg_net_birdview(weights, birdview, debug, keep_prob) current = birdview_net["birdview_relu4_3"] # upsample, output 256 channels with tf.name_scope("birdview_Upsample_layer"): kernels = utils.weight_variable([3, 3, 256, 256], name= "birdview_upsample_w") bias = utils.bias_variable([256], name="birdview_upsample_b") output_shape = current.get_shape().as_list() output_shape[1] *= 4 output_shape[2] *= 4 output_shape[3] = kernels.get_shape().as_list()[2] birdview_net['birdview_upsample'] = utils.conv2d_transpose_strided(current, kernels, bias, output_shape = output_shape, stride = 4, name = 'birdview_upsample', keep_prob = keep_prob) current = birdview_net['birdview_upsample'] if debug: utils.add_activation_summary(current) # vgg-birdview with tf.name_scope("frontview-Vgg-16"): frontview_net = vgg_net_frontview(weights, frontview, debug, keep_prob) current = frontview_net["frontview_relu4_3"] # pdb.set_trace() # upsample, output 256 channels with tf.name_scope("frontview_Upsample_layer"): kernels = utils.weight_variable([3, 3, 256, 256], name= "frontview_upsample_w") bias = utils.bias_variable([256], name="frontview_upsample_b") output_shape = current.get_shape().as_list() output_shape[1] *= 4 output_shape[2] *= 4 output_shape[3] = kernels.get_shape().as_list()[2] frontview_net['frontview_upsample'] = utils.conv2d_transpose_strided(current, kernels, bias, output_shape = output_shape, stride = 4, name = 'frontview_upsample', keep_prob = keep_prob) current = frontview_net['frontview_upsample'] if debug: utils.add_activation_summary(current) # vgg-birdview with tf.name_scope("rgb-Vgg-16"): rgbview_net = vgg_net_rgb(weights, rgbview, debug, keep_prob) current = rgbview_net["rgb_relu4_3"] # upsample, output 256 channels with tf.name_scope("rgb_Upsample_layer"): kernels = utils.weight_variable([3, 3, 256, 256], name= "rgb_upsample_w") bias = utils.bias_variable([256], name="rgb_upsample_b") output_shape = current.get_shape().as_list() output_shape[1] *= 2 output_shape[2] *= 2 output_shape[3] = kernels.get_shape().as_list()[2] rgbview_net['rgb_upsample'] = utils.conv2d_transpose_strided(current, kernels, bias, output_shape = output_shape, name = 'rgb_upsample', keep_prob = keep_prob) current = rgbview_net['rgb_upsample'] if debug: utils.add_activation_summary(current) return birdview_net, frontview_net, rgbview_net
def inference_strided(input_image): W1 = utils.weight_variable([9, 9, 3, 32]) b1 = utils.bias_variable([32]) tf.histogram_summary("W1", W1) tf.histogram_summary("b1", b1) h_conv1 = tf.nn.relu(utils.conv2d_basic(input_image, W1, b1)) W2 = utils.weight_variable([3, 3, 32, 64]) b2 = utils.bias_variable([64]) tf.histogram_summary("W2", W2) tf.histogram_summary("b2", b2) h_conv2 = tf.nn.relu(utils.conv2d_strided(h_conv1, W2, b2)) W3 = utils.weight_variable([3, 3, 64, 128]) b3 = utils.bias_variable([128]) tf.histogram_summary("W3", W3) tf.histogram_summary("b3", b3) h_conv3 = tf.nn.relu(utils.conv2d_strided(h_conv2, W3, b3)) # upstrides W4 = utils.weight_variable([3, 3, 64, 128]) b4 = utils.bias_variable([64]) tf.histogram_summary("W4", W4) tf.histogram_summary("b4", b4) # print h_conv3.get_shape() # print W4.get_shape() h_conv4 = tf.nn.relu(utils.conv2d_transpose_strided(h_conv3, W4, b4)) W5 = utils.weight_variable([3, 3, 32, 64]) b5 = utils.bias_variable([32]) tf.histogram_summary("W5", W5) tf.histogram_summary("b5", b5) h_conv5 = tf.nn.relu(utils.conv2d_transpose_strided(h_conv4, W5, b5)) W6 = utils.weight_variable([9, 9, 32, 3]) b6 = utils.bias_variable([3]) tf.histogram_summary("W6", W6) tf.histogram_summary("b6", b6) pred_image = tf.nn.tanh(utils.conv2d_basic(h_conv5, W6, b6)) return pred_image
def main(argv=None): keep_probability = tf.placeholder(tf.float32, name="keep_probabilty") image = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 7], name="input_image") annotation = tf.placeholder(tf.int32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name="annotation") pred_annotation_value, pred_annotation, logits, pred_prob = inference( image, keep_probability) tf.summary.image("input_image", image, max_outputs=2) tf.summary.image("ground_truth", tf.cast(annotation, tf.uint8), max_outputs=2) tf.summary.image("pred_annotation", tf.cast(pred_annotation, tf.uint8), max_outputs=2) #logits:the last layer of conv net #labels:the ground truth loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=tf.squeeze(annotation, squeeze_dims=[3]), name="entropy"))) tf.summary.scalar("entropy", loss) trainable_var = tf.trainable_variables() if cfgs.debug: for var in trainable_var: utils.add_to_regularization_and_summary(var) print("Setting up summary op...") summary_op = tf.summary.merge_all() #Create a file to write logs. #filename='logs'+ cfgs.mode + str(datetime.datetime.now()) + '.txt' filename = "logs_%s%s.txt" % (cfgs.mode, datetime.datetime.now()) path_ = os.path.join(cfgs.logs_dir, filename) logs_file = open(path_, 'w') logs_file.write("The logs file is created at %s\n" % datetime.datetime.now()) logs_file.write("The mode is %s\n" % (cfgs.mode)) logs_file.write( "The train data batch size is %d and the validation batch size is %d.\n" % (cfgs.batch_size, cfgs.v_batch_size)) logs_file.write("The train data is %s.\n" % (cfgs.data_dir)) logs_file.write("The model is ---%s---.\n" % cfgs.logs_dir) print("Setting up image reader...") logs_file.write("Setting up image reader...\n") train_records, valid_records = scene_parsing.my_read_dataset( cfgs.seq_list_path, cfgs.anno_path) print('number of train_records', len(train_records)) print('number of valid_records', len(valid_records)) logs_file.write('number of train_records %d\n' % len(train_records)) logs_file.write('number of valid_records %d\n' % len(valid_records)) print("Setting up dataset reader") vis = True if cfgs.mode == 'all_visualize' else False image_options = { 'resize': True, 'resize_size': IMAGE_SIZE, 'visualize': vis } if cfgs.mode == 'train': train_dataset_reader = dataset.BatchDatset(train_records, image_options) validation_dataset_reader = dataset.BatchDatset(valid_records, image_options) sess = tf.Session() print("Setting up Saver...") saver = tf.train.Saver() summary_writer = tf.summary.FileWriter(cfgs.logs_dir, sess.graph) sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(cfgs.logs_dir) #if not train,restore the model trained before if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") if cfgs.mode == "accurary": count = 0 if_con = True accu_iou_t = 0 accu_pixel_t = 0 while if_con: count = count + 1 valid_images, valid_annotations, valid_filenames, if_con, start, end = validation_dataset_reader.next_batch_valid( cfgs.v_batch_size) valid_loss, pred_anno = sess.run( [loss, pred_annotation], feed_dict={ image: valid_images, annotation: valid_annotations, keep_probability: 1.0 }) accu_iou, accu_pixel = accu.caculate_accurary( pred_anno, valid_annotations) print("Ture %d ---> the data from %d to %d" % (count, start, end)) print("%s ---> Validation_pixel_accuary: %g" % (datetime.datetime.now(), accu_pixel)) print("%s ---> Validation_iou_accuary: %g" % (datetime.datetime.now(), accu_iou)) #Output logs. logs_file.write("Ture %d ---> the data from %d to %d\n" % (count, start, end)) logs_file.write("%s ---> Validation_pixel_accuary: %g\n" % (datetime.datetime.now(), accu_pixel)) logs_file.write("%s ---> Validation_iou_accuary: %g\n" % (datetime.datetime.now(), accu_iou)) accu_iou_t = accu_iou_t + accu_iou accu_pixel_t = accu_pixel_t + accu_pixel print("%s ---> Total validation_pixel_accuary: %g" % (datetime.datetime.now(), accu_pixel_t / count)) print("%s ---> Total validation_iou_accuary: %g" % (datetime.datetime.now(), accu_iou_t / count)) #Output logs logs_file.write("%s ---> Total validation_pixel_accurary: %g\n" % (datetime.datetime.now(), accu_pixel_t / count)) logs_file.write("%s ---> Total validation_iou_accurary: %g\n" % (datetime.datetime.now(), accu_iou_t / count)) elif cfgs.mode == "all_visualize": re_save_dir = "%s%s" % (cfgs.result_dir, datetime.datetime.now()) logs_file.write("The result is save at file'%s'.\n" % re_save_dir) logs_file.write("The number of part visualization is %d.\n" % cfgs.v_batch_size) #Check the result path if exists. if not os.path.exists(re_save_dir): print("The path '%s' is not found." % re_save_dir) print("Create now ...") os.makedirs(re_save_dir) print("Create '%s' successfully." % re_save_dir) logs_file.write("Create '%s' successfully.\n" % re_save_dir) re_save_dir_im = os.path.join(re_save_dir, 'images') re_save_dir_heat = os.path.join(re_save_dir, 'heatmap') re_save_dir_ellip = os.path.join(re_save_dir, 'ellip') re_save_dir_transheat = os.path.join(re_save_dir, 'transheat') if not os.path.exists(re_save_dir_im): os.makedirs(re_save_dir_im) if not os.path.exists(re_save_dir_heat): os.makedirs(re_save_dir_heat) if not os.path.exists(re_save_dir_ellip): os.makedirs(re_save_dir_ellip) if not os.path.exists(re_save_dir_transheat): os.makedirs(re_save_dir_transheat) count = 0 if_con = True accu_iou_t = 0 accu_pixel_t = 0 while if_con: count = count + 1 valid_images, valid_annotaions, valid_filename, valid_cur_images, if_con, start, end = validation_dataset_reader.next_batch_val_vis( cfgs.v_batch_size) pred_value, pred, logits_, pred_prob_ = sess.run( [pred_annotation_value, pred_annotation, logits, pred_prob], feed_dict={ image: valid_images, keep_probability: 1.0 }) print("Turn %d :----start from %d ------- to %d" % (count, start, end)) pred = np.squeeze(pred, axis=3) pred_value = np.squeeze(pred_value, axis=3) for itr in range(len(pred)): filename = valid_filename[itr]['filename'] valid_images_ = pred_visualize(valid_cur_images[itr].copy(), pred[itr]) utils.save_image(valid_images_.astype(np.uint8), re_save_dir_im, name="inp_" + filename) if cfgs.fit_ellip: #valid_images_ellip=fit_ellipse_findContours_ori(valid_images[itr].copy(),np.expand_dims(pred[itr],axis=2).astype(np.uint8)) valid_images_ellip = fit_ellipse_findContours( valid_cur_images[itr].copy(), np.expand_dims(pred[itr], axis=2).astype(np.uint8)) utils.save_image(valid_images_ellip.astype(np.uint8), re_save_dir_ellip, name="ellip_" + filename) if cfgs.heatmap: heat_map = density_heatmap(pred_prob_[itr, :, :, 1]) utils.save_image(heat_map.astype(np.uint8), re_save_dir_heat, name="heat_" + filename) if cfgs.trans_heat: trans_heat_map = translucent_heatmap( valid_cur_images[itr], heat_map.astype(np.uint8).copy()) utils.save_image(trans_heat_map, re_save_dir_transheat, name="trans_heat_" + filename)
def main(argv=None): utils.maybe_download_and_extract(FLAGS.model_dir, MODEL_URL) utils.maybe_download_and_extract(FLAGS.data_dir, DATA_URL, is_tarfile=True) model_data = get_model_data() model_params = {} mean = model_data['normalization'][0][0][0] model_params['mean_pixel'] = np.mean(mean, axis=(0, 1)) model_params['weights'] = np.squeeze(model_data['layers']) style_image = get_image(FLAGS.style_path) processed_style = utils.process_image(style_image, model_params['mean_pixel']).astype(np.float32) style_net = vgg_net(model_params['weights'], processed_style) tf.image_summary("Style_Image", style_image) with tf.Session() as sess: print "Evaluating style features..." style_features = {} for layer in STYLE_LAYERS: features = style_net[layer].eval() features = np.reshape(features, (-1, features.shape[3])) style_gram = np.matmul(features.T, features) / features.size style_features[layer] = style_gram print "Reading image inputs" input_image, input_content = read_input(model_params) print "Setting up inference" output_image = 255 * inference_strided(input_image) print "Creating saver.." saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(FLAGS.log_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print "Model restored..." if FLAGS.mode == "test": test(sess, output_image, model_params['mean_pixel']) return print "Calculating content loss..." image_net = vgg_net(model_params['weights'], output_image) content_loss = CONTENT_WEIGHT * tf.nn.l2_loss(image_net[CONTENT_LAYER] - input_content) / utils.get_tensor_size( input_content) print content_loss.get_shape() tf.scalar_summary("Content_loss", content_loss) print "Calculating style loss..." style_losses = [] for layer in STYLE_LAYERS: image_layer = image_net[layer] _, height, width, number = map(lambda i: i.value, image_layer.get_shape()) size = height * width * number feats = tf.reshape(image_layer, (-1, number)) image_gram = tf.matmul(tf.transpose(feats), feats) / size style_losses.append(0.5 * tf.nn.l2_loss(image_gram - style_features[layer])) style_loss = STYLE_WEIGHT * reduce(tf.add, style_losses) print style_loss.get_shape() tf.scalar_summary("Style_loss", style_loss) print "Calculating variational loss..." tv_y_size = utils.get_tensor_size(output_image[:, 1:, :, :]) tv_x_size = utils.get_tensor_size(output_image[:, :, 1:, :]) tv_loss = VARIATION_WEIGHT * ( (tf.nn.l2_loss(output_image[:, 1:, :, :] - output_image[:, :IMAGE_SIZE - 1, :, :]) / tv_y_size) + (tf.nn.l2_loss(output_image[:, :, 1:, :] - output_image[:, :, :IMAGE_SIZE - 1, :]) / tv_x_size)) print tv_loss.get_shape() tf.scalar_summary("Variation_loss", tv_loss) loss = content_loss + style_loss + tv_loss tf.scalar_summary("Total_loss", loss) print "Setting up train operation..." train_step = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss) print "Setting up summary write" summary_writer = tf.train.SummaryWriter(FLAGS.log_dir, sess.graph_def) summary_op = tf.merge_all_summaries() print "initializing all variables" sess.run(tf.initialize_all_variables()) tf.train.start_queue_runners(sess=sess) print "Running training..." for step in range(MAX_ITERATIONS): if step % 10 == 0: this_loss, summary_str = sess.run([loss, summary_op]) summary_writer.add_summary(summary_str, global_step=step) print('%s : Step %d' % (datetime.now(), step)), print('total loss: %g' % this_loss) if step % 100 == 0: print ("Step %d" % step), print(' content loss: %g' % content_loss.eval()), print(' style loss: %g' % style_loss.eval()), print(' tv loss: %g' % tv_loss.eval()) saver.save(sess, FLAGS.log_dir + "model.ckpt", global_step=step) sess.run(train_step)
def main(argv=None): keep_probability = tf.placeholder(tf.float32, name="keep_probabilty") image = tf.placeholder(tf.float32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 3], name="input_image") # phase_train=tf.placeholder(tf.bool) phase_train = tf.Variable(True, name="phase_train", trainable=False) annotation = tf.placeholder(tf.int32, shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name="annotation") pred_annotation, logits = inference(image, keep_probability, phase_train) ##compute accuracy correct_pred = tf.equal(tf.cast(pred_annotation, tf.int32), annotation) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) loss = tf.reduce_mean((tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=tf.squeeze(annotation, squeeze_dims=[3]), name="entropy"))) trainable_var = tf.trainable_variables() skipLayers = [] var_list = [ v for v in trainable_var if v.name.split('/')[1][0:7] not in skipLayers ] if FLAGS.debug: for var in var_list: utils.add_to_regularization_and_summary(var) train_op = train(loss, var_list) print("Setting up summary op...") tf.summary.image("input_image", image, max_outputs=2) tf.summary.image("ground_truth", tf.cast(tf.image.grayscale_to_rgb(annotation), tf.float32), max_outputs=2) tf.summary.image("pred_annotation", tf.cast(tf.image.grayscale_to_rgb(pred_annotation), tf.float32), max_outputs=2) summary_op = tf.summary.merge_all() ##two kinds of loss trainLoss_summary = tf.summary.scalar("entropy_train", loss) validationLoss_summary = tf.summary.scalar("entropy_validaton", loss) ##two kinds of acc trainAcc_summary = tf.summary.scalar("acc_train", accuracy) valAcc_summary = tf.summary.scalar("acc_val", accuracy) print("Setting up image reader...") train_records, valid_records = scene_parsing.read_dataset(FLAGS.data_dir) print(len(train_records)) print(len(valid_records)) print("Setting up dataset reader") image_options = {'resize': True, 'resize_size': IMAGE_SIZE} if FLAGS.mode == 'train': train_dataset_reader = dataset.BatchDatset(train_records, image_options) validation_dataset_reader = dataset.BatchDatset(valid_records, image_options) sess = tf.Session() print("Setting up Saver...") saver = tf.train.Saver(trainable_var) summary_writer = tf.summary.FileWriter(FLAGS.logs_dir, sess.graph) sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(FLAGS.logs_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print("Model restored...") if FLAGS.mode == "train": for itr in xrange(MAX_ITERATION): train_images, train_annotations = train_dataset_reader.next_batch( FLAGS.batch_size) valid_images, valid_annotations = validation_dataset_reader.get_random_batch( FLAGS.batch_size) feed_dict1 = { image: train_images, annotation: train_annotations, keep_probability: 0.85 } feed_dict2 = { image: valid_images, annotation: valid_annotations, keep_probability: 0.85 } sess.run(train_op, feed_dict=feed_dict1) if itr % 10 == 0: summary_merge = sess.run(summary_op, feed_dict=feed_dict2) train_loss, summary_train = sess.run([loss, trainLoss_summary], feed_dict=feed_dict1) val_loss, summary_val = sess.run( [loss, validationLoss_summary], feed_dict=feed_dict2) train_acc, trainAccSumm = sess.run( [accuracy, trainAcc_summary], feed_dict=feed_dict1) val_acc, valAccSumm = sess.run([accuracy, valAcc_summary], feed_dict=feed_dict2) print("Step: %d, Train_acc:%g" % (itr, train_acc)) print("Step: %d, Val_acc:%g" % (itr, val_acc)) print("==================>") summary_writer.add_summary(summary_train, itr) summary_writer.add_summary(summary_val, itr) summary_writer.add_summary(trainAccSumm, itr) summary_writer.add_summary(valAccSumm, itr) summary_writer.add_summary(summary_merge, itr) if itr % 500 == 0: valid_images, valid_annotations = validation_dataset_reader.next_batch( FLAGS.batch_size) valid_loss = sess.run(loss, feed_dict={ image: valid_images, annotation: valid_annotations, keep_probability: 1.0 }) print("%s ---> Validation_loss: %g" % (datetime.datetime.now(), valid_loss)) saver.save(sess, FLAGS.logs_dir + "model.ckpt", itr) elif FLAGS.mode == "visualize": valid_images, valid_annotations = validation_dataset_reader.get_random_batch( FLAGS.batch_size) pred = sess.run(pred_annotation, feed_dict={ image: valid_images, annotation: valid_annotations, keep_probability: 1.0 }) valid_annotations = np.squeeze(valid_annotations, axis=3) pred = np.squeeze(pred, axis=3) for itr in range(FLAGS.batch_size): utils.save_image(valid_images[itr].astype(np.uint8), FLAGS.logs_dir, name="inp_" + str(5 + itr)) utils.save_image(valid_annotations[itr].astype(np.uint8), FLAGS.logs_dir, name="gt_" + str(5 + itr)) utils.save_image(pred[itr].astype(np.uint8), FLAGS.logs_dir, name="pred_" + str(5 + itr)) print("Saved image: %d" % itr)
def u_net(image, phase_train): with tf.variable_scope("u_net"): w1_1 = utils.weight_variable([3, 3, int(image.shape[3]), 32], name="w1_1") b1_1 = utils.bias_variable([32], name="b1_1") conv1_1 = utils.conv2d_basic(image, w1_1, b1_1) relu1_1 = tf.nn.relu(conv1_1, name="relu1_1") w1_2 = utils.weight_variable([3, 3, 32, 32], name="w1_2") b1_2 = utils.bias_variable([32], name="b1_2") conv1_2 = utils.conv2d_basic(relu1_1, w1_2, b1_2) relu1_2 = tf.nn.relu(conv1_2, name="relu1_2") pool1 = utils.max_pool_2x2(relu1_2) bn1 = utils.batch_norm(pool1, pool1.get_shape()[3], phase_train, scope="bn1") w2_1 = utils.weight_variable([3, 3, 32, 64], name="w2_1") b2_1 = utils.bias_variable([64], name="b2_1") conv2_1 = utils.conv2d_basic(bn1, w2_1, b2_1) relu2_1 = tf.nn.relu(conv2_1, name="relu2_1") w2_2 = utils.weight_variable([3, 3, 64, 64], name="w2_2") b2_2 = utils.bias_variable([64], name="b2_2") conv2_2 = utils.conv2d_basic(relu2_1, w2_2, b2_2) relu2_2 = tf.nn.relu(conv2_2, name="relu2_2") pool2 = utils.max_pool_2x2(relu2_2) bn2 = utils.batch_norm(pool2, pool2.get_shape()[3], phase_train, scope="bn2") w3_1 = utils.weight_variable([3, 3, 64, 128], name="w3_1") b3_1 = utils.bias_variable([128], name="b3_1") conv3_1 = utils.conv2d_basic(bn2, w3_1, b3_1) relu3_1 = tf.nn.relu(conv3_1, name="relu3_1") w3_2 = utils.weight_variable([3, 3, 128, 128], name="w3_2") b3_2 = utils.bias_variable([128], name="b3_2") conv3_2 = utils.conv2d_basic(relu3_1, w3_2, b3_2) relu3_2 = tf.nn.relu(conv3_2, name="relu3_2") pool3 = utils.max_pool_2x2(relu3_2) bn3 = utils.batch_norm(pool3, pool3.get_shape()[3], phase_train, scope="bn3") w4_1 = utils.weight_variable([3, 3, 128, 256], name="w4_1") b4_1 = utils.bias_variable([256], name="b4_1") conv4_1 = utils.conv2d_basic(bn3, w4_1, b4_1) relu4_1 = tf.nn.relu(conv4_1, name="relu4_1") w4_2 = utils.weight_variable([3, 3, 256, 256], name="w4_2") b4_2 = utils.bias_variable([256], name="b4_2") conv4_2 = utils.conv2d_basic(relu4_1, w4_2, b4_2) relu4_2 = tf.nn.relu(conv4_2, name="relu4_2") pool4 = utils.max_pool_2x2(relu4_2) bn4 = utils.batch_norm(pool4, pool4.get_shape()[3], phase_train, scope="bn4") w5_1 = utils.weight_variable([3, 3, 256, 512], name="w5_1") b5_1 = utils.bias_variable([512], name="b5_1") conv5_1 = utils.conv2d_basic(bn4, w5_1, b5_1) relu5_1 = tf.nn.relu(conv5_1, name="relu5_1") w5_2 = utils.weight_variable([3, 3, 512, 512], name="w5_2") b5_2 = utils.bias_variable([512], name="b5_2") conv5_2 = utils.conv2d_basic(relu5_1, w5_2, b5_2) relu5_2 = tf.nn.relu(conv5_2, name="relu5_2") bn5 = utils.batch_norm(relu5_2, relu5_2.get_shape()[3], phase_train, scope="bn5") ###up6 W_t1 = utils.weight_variable([2, 2, 256, 512], name="W_t1") b_t1 = utils.bias_variable([256], name="b_t1") conv_t1 = utils.conv2d_transpose_strided( bn5, W_t1, b_t1, output_shape=tf.shape(relu4_2)) merge1 = tf.concat([conv_t1, relu4_2], 3) w6_1 = utils.weight_variable([3, 3, 512, 256], name="w6_1") b6_1 = utils.bias_variable([256], name="b6_1") conv6_1 = utils.conv2d_basic(merge1, w6_1, b6_1) relu6_1 = tf.nn.relu(conv6_1, name="relu6_1") w6_2 = utils.weight_variable([3, 3, 256, 256], name="w6_2") b6_2 = utils.bias_variable([256], name="b6_2") conv6_2 = utils.conv2d_basic(relu6_1, w6_2, b6_2) relu6_2 = tf.nn.relu(conv6_2, name="relu6_2") bn6 = utils.batch_norm(relu6_2, relu6_2.get_shape()[3], phase_train, scope="bn6") ###up7 W_t2 = utils.weight_variable([2, 2, 128, 256], name="W_t2") b_t2 = utils.bias_variable([128], name="b_t2") conv_t2 = utils.conv2d_transpose_strided( bn6, W_t2, b_t2, output_shape=tf.shape(relu3_2)) merge2 = tf.concat([conv_t2, relu3_2], 3) w7_1 = utils.weight_variable([3, 3, 256, 128], name="w7_1") b7_1 = utils.bias_variable([128], name="b7_1") conv7_1 = utils.conv2d_basic(merge2, w7_1, b7_1) relu7_1 = tf.nn.relu(conv7_1, name="relu7_1") w7_2 = utils.weight_variable([3, 3, 128, 128], name="w7_2") b7_2 = utils.bias_variable([128], name="b7_2") conv7_2 = utils.conv2d_basic(relu7_1, w7_2, b7_2) relu7_2 = tf.nn.relu(conv7_2, name="relu7_2") bn7 = utils.batch_norm(relu7_2, relu7_2.get_shape()[3], phase_train, scope="bn7") ###up8 W_t3 = utils.weight_variable([2, 2, 64, 128], name="W_t3") b_t3 = utils.bias_variable([64], name="b_t3") conv_t3 = utils.conv2d_transpose_strided( bn7, W_t3, b_t3, output_shape=tf.shape(relu2_2)) merge3 = tf.concat([conv_t3, relu2_2], 3) w8_1 = utils.weight_variable([3, 3, 128, 64], name="w8_1") b8_1 = utils.bias_variable([64], name="b8_1") conv8_1 = utils.conv2d_basic(merge3, w8_1, b8_1) relu8_1 = tf.nn.relu(conv8_1, name="relu8_1") w8_2 = utils.weight_variable([3, 3, 64, 64], name="w8_2") b8_2 = utils.bias_variable([64], name="b8_2") conv8_2 = utils.conv2d_basic(relu8_1, w8_2, b8_2) relu8_2 = tf.nn.relu(conv8_2, name="relu8_2") bn8 = utils.batch_norm(relu8_2, relu8_2.get_shape()[3], phase_train, scope="bn8") ###up9 W_t4 = utils.weight_variable([2, 2, 32, 64], name="W_t4") b_t4 = utils.bias_variable([32], name="b_t4") conv_t4 = utils.conv2d_transpose_strided( bn8, W_t4, b_t4, output_shape=tf.shape(relu1_2)) merge4 = tf.concat([conv_t4, relu1_2], 3) w9_1 = utils.weight_variable([3, 3, 64, 32], name="w9_1") b9_1 = utils.bias_variable([32], name="b9_1") conv9_1 = utils.conv2d_basic(merge4, w9_1, b9_1) relu9_1 = tf.nn.relu(conv9_1, name="relu9_1") w9_2 = utils.weight_variable([3, 3, 32, 32], name="w9_2") b9_2 = utils.bias_variable([32], name="b9_2") conv9_2 = utils.conv2d_basic(relu9_1, w9_2, b9_2) relu9_2 = tf.nn.relu(conv9_2, name="relu9_2") bn9 = utils.batch_norm(relu9_2, relu9_2.get_shape()[3], phase_train, scope="bn9") ###output scoreMap w10 = utils.weight_variable([1, 1, 32, NUM_OF_CLASSESS], name="w10") b10 = utils.bias_variable([NUM_OF_CLASSESS], name="b10") conv10 = utils.conv2d_basic(bn9, w10, b10) annotation_pred = tf.argmax(conv10, dimension=3, name="prediction") return annotation_pred, conv10
def inference(image, keep_prob, mean): # keep_prob为dropout的占位符. print("setting up vgg initialized conv layers ...") model_data = utils.get_model_data(FLAGS.model_dir, MODEL_URL) # 下载得到VGGmodel mean_pixel = mean weights = np.squeeze(model_data['layers']) processed_image = utils.process_image(image, mean_pixel) # 图像像素值-平均像素值 with tf.variable_scope("inference"): image_net = vgg_net(weights, processed_image) conv_final_layer = image_net["conv5_4"] pool5 = utils.max_pool_1x1(conv_final_layer) # w6~w8都可以做正则化的把?因为都是全连接层啊. # 7x7改成3x3,4096改成了1024,可能特征不够? # 新加的w6-w8 b6-b8都自带初始化. W6 = utils.weight_variable([3, 3, 512, 1024], name="W6") b6 = utils.bias_variable([1024], name="b6") # data_format = "channels_last" 为默认 # 使用不同rate的孔卷积. Fc6_1 = utils.atrous_conv2d_basic(pool5, W6, b6, 6) Fc6_2 = utils.atrous_conv2d_basic(pool5, W6, b6, 12) Fc6_3 = utils.atrous_conv2d_basic(pool5, W6, b6, 18) Fc6_4 = utils.atrous_conv2d_basic(pool5, W6, b6, 24) Bn6_1 = utils.batch_norm_layer(Fc6_1, FLAGS.mode, scope_bn='Bn') # bn处理要在relu之前. Bn6_2 = utils.batch_norm_layer(Fc6_2, FLAGS.mode, scope_bn='Bn') Bn6_3 = utils.batch_norm_layer(Fc6_3, FLAGS.mode, scope_bn='Bn') Bn6_4 = utils.batch_norm_layer(Fc6_4, FLAGS.mode, scope_bn='Bn') relu6_1 = tf.nn.relu(Bn6_1, name="relu6_1") relu6_2 = tf.nn.relu(Bn6_2, name="relu6_2") relu6_3 = tf.nn.relu(Bn6_3, name="relu6_3") relu6_4 = tf.nn.relu(Bn6_4, name="relu6_4") if FLAGS.debug: utils.add_activation_summary(relu6_1) utils.add_activation_summary(relu6_2) utils.add_activation_summary(relu6_3) utils.add_activation_summary(relu6_4) relu_dropout6_1 = tf.nn.dropout(relu6_1, keep_prob=keep_prob) relu_dropout6_2 = tf.nn.dropout(relu6_2, keep_prob=keep_prob) relu_dropout6_3 = tf.nn.dropout(relu6_3, keep_prob=keep_prob) relu_dropout6_4 = tf.nn.dropout(relu6_4, keep_prob=keep_prob) ''' # 原来的代码 conv6 = utils.conv2d_basic(pool5, W6, b6) relu6 = tf.nn.relu(conv6, name="relu6") relu_dropout6 = tf.nn.dropout(relu6, keep_prob=keep_prob) # 全连接层才使用dropout,丢弃一些连接,使部分节点的输出为0,避免过拟合. ''' W7 = utils.weight_variable([1, 1, 1024, 1024], name="W7") b7 = utils.bias_variable([1024], name="b7") Fc7_1 = utils.conv2d_basic(relu_dropout6_1, W7, b7) Fc7_2 = utils.conv2d_basic(relu_dropout6_2, W7, b7) Fc7_3 = utils.conv2d_basic(relu_dropout6_3, W7, b7) Fc7_4 = utils.conv2d_basic(relu_dropout6_4, W7, b7) Bn7_1 = utils.batch_norm_layer(Fc7_1, FLAGS.mode, scope_bn='Bn') Bn7_2 = utils.batch_norm_layer(Fc7_2, FLAGS.mode, scope_bn='Bn') Bn7_3 = utils.batch_norm_layer(Fc7_3, FLAGS.mode, scope_bn='Bn') Bn7_4 = utils.batch_norm_layer(Fc7_4, FLAGS.mode, scope_bn='Bn') relu7_1 = tf.nn.relu(Bn7_1, name="relu7_1") relu7_2 = tf.nn.relu(Bn7_2, name="relu7_2") relu7_3 = tf.nn.relu(Bn7_3, name="relu7_3") relu7_4 = tf.nn.relu(Bn7_4, name="relu7_4") if FLAGS.debug: utils.add_activation_summary(relu7_1) utils.add_activation_summary(relu7_2) utils.add_activation_summary(relu7_3) utils.add_activation_summary(relu7_4) relu_dropout7_1 = tf.nn.dropout(relu7_1, keep_prob=keep_prob) relu_dropout7_2 = tf.nn.dropout(relu7_2, keep_prob=keep_prob) relu_dropout7_3 = tf.nn.dropout(relu7_3, keep_prob=keep_prob) relu_dropout7_4 = tf.nn.dropout(relu7_4, keep_prob=keep_prob) ''' conv7 = utils.conv2d_basic(relu_dropout6, W7, b7) relu7 = tf.nn.relu(conv7, name="relu7") relu_dropout7 = tf.nn.dropout(relu7, keep_prob=keep_prob) ''' W8 = utils.weight_variable([1, 1, 1024, NUM_OF_CLASSESS], name="W8") b8 = utils.bias_variable([NUM_OF_CLASSESS], name="b8") Fc8_1 = utils.conv2d_basic(relu_dropout7_1, W8, b8) Fc8_2 = utils.conv2d_basic(relu_dropout7_2, W8, b8) Fc8_3 = utils.conv2d_basic(relu_dropout7_3, W8, b8) Fc8_4 = utils.conv2d_basic(relu_dropout7_4, W8, b8) ''' relu8_1 = tf.nn.relu(Fc8_1, name="relu8_1") relu8_2 = tf.nn.relu(Fc8_2, name="relu8_2") relu8_3 = tf.nn.relu(Fc8_3, name="relu8_3") relu8_4 = tf.nn.relu(Fc8_4, name="relu8_4") relu_dropout8_1 = tf.nn.dropout(relu8_1, keep_prob=keep_prob) relu_dropout8_2 = tf.nn.dropout(relu8_2, keep_prob=keep_prob) relu_dropout8_3 = tf.nn.dropout(relu8_3, keep_prob=keep_prob) relu_dropout8_4 = tf.nn.dropout(relu8_4, keep_prob=keep_prob) ''' Fc8 = tf.add_n([Fc8_1, Fc8_2, Fc8_3, Fc8_4], name="Fc8") # F8的各个层尺寸一样,感受野不同. ''' conv8 = utils.conv2d_basic(relu_dropout7, W8, b8) ''' shape = tf.shape(image) # print(shape[1], shape[2]) resize_Fc8 = tf.image.resize_images( Fc8, (shape[1], shape[2])) # tf自带的扩尺寸函数resize_images(),默认双线性插值.尺寸扩大8倍至原尺寸256x256 softmax = tf.nn.softmax(resize_Fc8) # tf.nn.softmax(),使前向计算结果转为概率分布 annotation_pred = tf.argmax(softmax, dimension=3, name="prediction") return tf.expand_dims(annotation_pred, dim=3), resize_Fc8, softmax