def main(args): # Some arguments os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu batch_size = args.batch_size epochs = args.epochs train_data_file = args.train_data_file learning_rate = args.learning_rate model_path = args.model_path save_dir = args.checkpoint if not os.path.exists(save_dir): os.makedirs(save_dir) # Training data data = TrainData(train_data_file) x = tf.placeholder(tf.float32, shape=[None, 256, 256, 3]) label = tf.placeholder(tf.float32, shape=[None, 256, 256, 3]) # Train net net = resfcn256(256, 256) x_op = net(x, is_training=True) # Loss loss = tf.losses.mean_squared_error(label, x_op) # This is for batch norm layer update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_step = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(loss) sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True))) sess.run(tf.global_variables_initializer()) if os.path.exists(model_path): tf.train.Saver(net.vars).restore(sess.model_path) saver = tf.train.Saver(var_list=tf.global_variables()) save_path = model_path # Begining train for epoch in xrange(epochs): for _ in xrange(int(math.ceil(1.0 * data.num_data / batch_size))): batch = data(batch_size) loss_res = sess.run(loss, feed_dict={x: batch[0], label: batch[1]}) sess.run(train_step, feed_dict={x: batch[0], label: batch[1]}) print('iters:%d/epoch:%d,learning rate:%f,loss:%f' % (iters, epoch, learn_rate, loss_res)) saver.save(sess=sess, save_path=save_path)
def main(args): torch_model = PRNet(3, 3) torch_model.load_state_dict(torch.load('from_tf.pth')) torch_model.eval() sys.path.append(args.prnet_dir) from predictor import resfcn256 tf_network_def = resfcn256(256, 256) x = tf.placeholder(tf.float32, shape=[None, 256, 256, 3]) tf_model = tf_network_def(x, is_training=False) tf_config = tf.ConfigProto(device_count={'GPU': 0}) sess = tf.Session(config=tf_config) saver = tf.train.Saver(tf_network_def.vars) saver.restore( sess, os.path.join(args.prnet_dir, 'Data', 'net-data', '256_256_resfcn256_weight')) for i in range(args.step): # Data random_image = np.random.randn(4, 256, 256, 3).astype(np.float32) # tf feed_dict = {x: random_image} tf_out = sess.run(tf_model, feed_dict=feed_dict) tf_watched_out = tf_out # torch image_bchw = np.transpose(random_image, (0, 3, 1, 2)) image_tensor = torch.tensor(image_bchw) # torch_watched_out = torch_model.input_conv(image_tensor) # torch_watched_out = np.transpose(torch_watched_out.cpu().detach().numpy(), (0, 2, 3, 1)) torch_out = torch_model(image_tensor) torch_out = np.transpose(torch_out.cpu().detach().numpy(), (0, 2, 3, 1)) torch_watched_out = torch_out print('step {}| is_close: {}| mse: {:.4f}'.format( i, np.allclose(tf_watched_out, torch_watched_out, rtol=1e-4, atol=1e-5), np.sum(np.square(tf_watched_out - torch_watched_out))))
def main(args): TRAIN_CONFIG = { 'learning_rate': 1e-3, } torch_model = PRNet(3, 3) torch_model.load_state_dict(torch.load('from_tf.pth')) torch_model.train() torch_optimizer = torch.optim.SGD( torch_model.parameters(), lr=TRAIN_CONFIG['learning_rate'], weight_decay=0.0002 # equalivent to tcl.L2_regularizer) ) torch_mse = torch.nn.MSELoss() sys.path.append(args.prnet_dir) from predictor import resfcn256 def tf_train_def(loss_val, var_list, train_config): lr = train_config['learning_rate'] global_step = tf.Variable(0, trainable=False) optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr) grads = optimizer.compute_gradients(loss_val, var_list=var_list) return optimizer.apply_gradients(grads, global_step=global_step), global_step tf_network_def = resfcn256(256, 256) x = tf.placeholder(tf.float32, shape=[None, 256, 256, 3]) y_ = tf.placeholder(tf.float32, shape=[None, 256, 256, 3]) tf_model = tf_network_def(x, is_training=True) tf_loss = tf.square(y_ - tf_model) tf_loss = tf.reduce_mean(tf_loss) tf_trainable_var = tf.trainable_variables() tf_train_op, tf_global_step_op = tf_train_def(tf_loss, tf_trainable_var, TRAIN_CONFIG) tf_watched_op = tf.get_default_graph().get_operation_by_name( 'resfcn256/Conv/Relu').outputs[0] tf_config = tf.ConfigProto(device_count={'GPU': 0}) sess = tf.Session(config=tf_config) init = tf.global_variables_initializer() sess.run(init) saver = tf.train.Saver(tf_network_def.vars) saver.restore( sess, os.path.join(args.prnet_dir, 'Data', 'net-data', '256_256_resfcn256_weight')) # Data random_image = np.random.randn(4, 256, 256, 3).astype(np.float32) random_label = np.random.randn(4, 256, 256, 3).astype(np.float32) for i in range(args.step): # tf feed_dict = {x: random_image, y_: random_label} tf_out, _, tf_train_loss, tf_watched_out = sess.run( [tf_model, tf_train_op, tf_loss, tf_watched_op], feed_dict=feed_dict) tf_watched_out = tf_out # torch image_bchw = np.transpose(random_image, (0, 3, 1, 2)) label_bchw = np.transpose(random_label, (0, 3, 1, 2)) image_tensor = torch.tensor(image_bchw) label_tensor = torch.tensor(label_bchw) # torch_watched_out = torch_model.input_conv(image_tensor) # torch_watched_out = np.transpose(torch_watched_out.cpu().detach().numpy(), (0, 2, 3, 1)) torch_out = torch_model(image_tensor) torch_train_loss = torch_mse(torch_out, label_tensor) torch_out = np.transpose(torch_out.cpu().detach().numpy(), (0, 2, 3, 1)) torch_watched_out = torch_out torch_optimizer.zero_grad() torch_train_loss.backward() torch_optimizer.step() torch_train_loss = torch_train_loss.item() print('step {}| is_close: {}| mse: {:.4f}| loss {:.6f}/{:.6f}'.format( i, np.allclose(tf_watched_out, torch_watched_out, atol=1e-3, rtol=1e-3), np.sum(np.square(tf_watched_out - torch_watched_out)), tf_train_loss, torch_train_loss))
def main(args): # Some arguments os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu batch_size = args.batch_size epochs = args.epochs train_data_file = args.train_data_file learning_rate = args.learning_rate model_path = args.model_path save_dir = args.checkpoint if not os.path.exists(save_dir): os.makedirs(save_dir) # Training data data = TrainData(train_data_file) x = tf.placeholder(tf.float32, shape=[None, 256, 256, 3]) label = tf.placeholder(tf.float32, shape=[None, 256, 256, 3]) # Train net net = resfcn256(256, 256) # sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) # sess.run(tf.global_variables_initializer()) # saver = tf.train.import_meta_graph(r'C:\Users\CVPR\PRNet\checkpoint\256_256_resfcn256_weight.meta') # saver.restore(sess, r'C:\Users\CVPR\PRNet\checkpoint\256_256_resfcn256_weight') # net = saver x_op = net(x, is_training=True) tf.summary.image('x_op', x_op) weights = imageio.imread( 'C:\\Users\\CVPR_01\\PRNet\\Data\\uv-data\\outfile.jpg') #weights = weights.astype(np.float32) / 255.0 weights = weights.reshape(1, weights.shape[0], weights.shape[1], 1) weights = tf.constant(weights) weights = tf.broadcast_to(weights, [1, 256, 256, 3]) # Loss loss = tf.losses.mean_squared_error(label, x_op) global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.cosine_decay_restarts(learning_rate=learning_rate, global_step=global_step, first_decay_steps=100000) # This is for batch norm layer update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_step = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999).minimize(loss) # train_step = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9).minimize(loss) sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions( allow_growth=True))) tf.summary.image('label', label) tf.summary.scalar('loss', loss) tf.summary.image('x_op', x_op) merged_summary = tf.summary.merge_all() writer = tf.summary.FileWriter("./logs/hello_tf_190704-1") writer.add_graph(sess.graph) sess.run(tf.global_variables_initializer()) # if os.path.exists('./checkpoint'): #if os.path.exists('./checkpoint'): #tf.compat.v1.train.Saver(net.vars).restore(sess, model_path) #print("restoring") saver = tf.train.Saver(var_list=tf.global_variables()) save_path = model_path # Begining train fig1 = plt.figure() for epoch in range(epochs): for i in range(int(math.ceil(1.0 * data.num_data / batch_size))): batch = data(batch_size) # loss_res = sess.run(loss,feed_dict={label:batch[1], x:batch[0]}) _, loss_res, uv_rec = sess.run([train_step, loss, x_op], feed_dict={ x: batch[0], label: batch[1] }) print('epoch:%d,loss:%f' % (epoch, loss_res)) #if i % 1000 == 0: #fig1.clf() #plt.imshow(uv_rec[0]) #plt.pause(0.0001) saver.save(sess=sess, save_path=save_path) plt.show()
def main(args): sys.path.append(args.prnet_dir) from predictor import resfcn256 # import from prnet_dir, maybe using importlib is a better idea tf_network_def = resfcn256(256, 256) # tensorflow network forward net_input = tf.placeholder(tf.float32, shape=[None, 256, 256, 3]) tf_model = tf_network_def(net_input, is_training=False) tf_config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=False)) tf_config = tf.ConfigProto(device_count={"GPU": 0}) sess = tf.Session(config=tf_config) saver = tf.train.Saver(tf_network_def.vars) saver.restore( sess, os.path.join(args.prnet_dir, 'Data', 'net-data', '256_256_resfcn256_weight')) graph = sess.graph # print([node.name for node in graph.as_graph_def().node]) torch_model = PRNetFull(3, 3) torch_dict = OrderedDict() for node in graph.as_graph_def().node: if node.name in torch_model.tf_map: torch_name = torch_model.tf_map[node.name] data = graph.get_operation_by_name(node.name).outputs[0] data_np = sess.run(data) if len(data_np.shape) > 1: # weight layouts | tensorflow | pytorch | transpose | # conv2d_transpose (H, W, out, in) -> (in, out, H, W) (3, 2, 0, 1) # conv2d (H, W, in, out) -> (out, in, H, W) (3, 2, 0, 1) torch_dict[torch_name] = torch.tensor( np.transpose(data_np, (3, 2, 0, 1)).astype(np.float32)) else: torch_dict[torch_name] = torch.tensor( data_np.astype(np.float32)) else: if node.name.find('save') == -1: pass # print('not in {}'.format(node.name)) torch.save(torch_dict, 'from_tf.pth') del torch_model torch_model = PRNet(3, 3) torch_model.load_state_dict(torch_dict) torch_model.eval() # Test with images from skimage.io import imread from skimage.transform import resize img = imread(os.path.join(args.prnet_dir, 'TestImages', '0.jpg')) / 255. img_np = resize(img, (256, 256))[np.newaxis, :, :, :] # simply using resize img_bchw = np.transpose( resize(img, (256, 256))[np.newaxis, :, :, :], (0, 3, 1, 2)).astype(np.float32) torch_input = torch.from_numpy(img_bchw) torch_out = torch_model(torch_input).cpu().detach().numpy() torch_out = np.transpose(torch_out, (0, 2, 3, 1)).squeeze() net_out = sess.run(tf_model, feed_dict={net_input: img_np}) tf_out = net_out.squeeze() print('shape', tf_out.shape, torch_out.shape) print('mse', np.sum(np.square(tf_out - torch_out))) print('close', np.allclose(tf_out, torch_out))
def main(args): # Some arguments os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu batch_size = args.batch_size epochs = args.epochs train_data_file = args.train_data_file model_path = args.model_path save_dir = args.checkpoint if not os.path.exists(save_dir): os.makedirs(save_dir) # Training data data = TrainData(train_data_file) begin_epoch = 0 if os.path.exists(model_path + '.data-00000-of-00001'): begin_epoch = int(model_path.split('_')[-1]) + 1 epoch_iters = data.num_data / batch_size global_step = tf.Variable(epoch_iters * begin_epoch, trainable=False) # Declay learning rate half every 5 epochs decay_steps = 5 * epoch_iters # learning_rate = learning_rate * 0.5 ^ (global_step / decay_steps) learning_rate = tf.train.exponential_decay(args.learning_rate, global_step, decay_steps, 0.5, staircase=True) x = tf.placeholder(tf.float32, shape=[None, 256, 256, 3]) label = tf.placeholder(tf.float32, shape=[None, 256, 256, 3]) # Train net net = resfcn256(256, 256) x_op = net(x, is_training=True) # Loss weights = cv2.imread("Data/uv-data/weight_mask_final.jpg") # [256, 256, 3] weights_data = np.zeros([1, 256, 256, 3], dtype=np.float32) weights_data[0, :, :, :] = weights / 16.0 loss = tf.losses.mean_squared_error(label, x_op, weights_data) # This is for batch norm layer update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_step = tf.train.AdamOptimizer(learning_rate=learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False).minimize(loss, global_step=global_step) sess = tf.Session(config=tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))) sess.run(tf.global_variables_initializer()) if os.path.exists(model_path + '.data-00000-of-00001'): tf.train.Saver(net.vars).restore(sess, model_path) saver = tf.train.Saver(var_list=tf.global_variables()) save_path = model_path # Begining train for epoch in range(begin_epoch, epochs): for iters in range(int(math.ceil(1.0 * data.num_data / batch_size))): batch = data(batch_size) loss_res, _, global_step_res, learning_rate_res = sess.run( [loss, train_step, global_step, learning_rate], feed_dict={x: batch[0], label: batch[1]}) print('global_step:%d:iters:%d/epoch:%d,learning rate:%f,loss:%f' % (global_step_res, iters, epoch, learning_rate_res, loss_res)) saver.save(sess=sess, save_path=save_path + '_' + str(epoch))