def process(args, image_path): posenet = PoseNet( model_path=args.model_file, image_path=image_path, ) person, elapsed = posenet.estimate_pose(verbose=args.verbose) if args.quiet: return elapsed image = Image.open(image_path) draw = ImageDraw.Draw(image) for line in JOINTS: if (person.key_points[line[0].value[0]].score > MIN_CONFIDENCE and person.key_points[line[1].value[0]].score > MIN_CONFIDENCE): start_point_x, start_point_y = ( int(person.key_points[line[0].value[0]].position.x), int(person.key_points[line[0].value[0]].position.y), ) end_point_x, end_point_y = ( int(person.key_points[line[1].value[0]].position.x), int(person.key_points[line[1].value[0]].position.y), ) draw.line( (start_point_x, start_point_y, end_point_x, end_point_y), fill=(255, 255, 0), width=3, ) for key_point in person.key_points: if key_point.score > MIN_CONFIDENCE: left_top_x, left_top_y = ( int(key_point.position.x) - 5, int(key_point.position.y) - 5, ) right_bottom_x, right_bottom_y = ( int(key_point.position.x) + 5, int(key_point.position.y) + 5, ) draw.ellipse( (left_top_x, left_top_y, right_bottom_x, right_bottom_y), fill=(0, 128, 0), outline=(255, 255, 0), ) image.save("www.jpg") return elapsed
def main(): image = tf.placeholder(tf.float32, [1, 224, 224, 30]) net = PoseNet({'data': image}) p3_x = net.layers['cls3_fc_pose_xyz'] init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init) saver.restore(sess, path) scene_dict = {} id_list = list() images = [] K = [] for each in test_image_list: images.append(test_dir + "/" + each + '/thumbnail.jpg') K.append(each) images_val = preprocess(images) index = 0 for img in images_val: np_image = img predicted_x = sess.run([p3_x], feed_dict={image: np_image}) predicted_x = np.squeeze(predicted_x) / 100.0 * ( max_train - min_train) + min_train scene_dict[K[index]] = predicted_x index = index + 1 for id in scene_dict.keys(): id_list.append(id) id_list1 = sorted(id_list) with open(result_dir, 'w') as csv_file: for k in id_list1: line = k + ',' + str('%.4f' % scene_dict[k][0]) + ',' + str( '%.4f' % scene_dict[k][1]) + ',' + str( '%.4f' % scene_dict[k][2]) + '\n' print(line) csv_file.write(line)
def main(): import sys config_file = "/home/weihao/posenet/paranet/config.json" if len(sys.argv) > 1: config_file = sys.argv[1] js = Utils.load_json_file(config_file) location = js['directory'] tr_dataset = js['training_dataset'] dataset = js['testing_dataset'] netFile = js['netFile'] images = tf.placeholder(tf.float32, [1, 224, 224, 3]) shift = tf.placeholder(tf.float32, [1, 1, 1, 4]) net = PoseNet({'data': images, 'shift': shift}) ds = [] rds = Utils.get_raw_data(location, dataset) for a in range(4): ds.append(Utils.get_data(rds, a)) #rds = Utils.get_raw_data(location, tr_dataset) #ds.append(Utils.get_data(rds, 1, False)) for a in range(len(ds)): process(ds[a], netFile, net, images, shift)
def test(): global frames net = PoseNet(model_path) Proc = Process(net, ActionCallback) drawer = PoseDrawer(net.InputSize) queue = PoseQueue() pose = None while True: frames += 1 image = read(iter, 1) Proc.setImage(image) result = Proc.getPose() if not result == None: (img, pose, score) = result queue.push(pose) drawer.Draw(img, pose) fps = Proc.getFPS() ShowFPS(img, fps, score) cv2.imshow("pic", img) ShowChart(queue) # PrintTimer() if cv2.waitKey(1) == 27: break Proc.Shutdwon() cv2.destroyAllWindows()
def __init__(self): rospy.init_node("PoseNet_ROS") self.bridge = CvBridge() self.predictedOdom = Odometry() self.predictedOdom.header.stamp = rospy.Time.now() self.predictedOdom.header.frame_id = "odom" subprocess.call("rosparam load params.yaml",shell=True) self.image_tf = tf.placeholder(tf.float32, [1, 224, 224, 3]) net = PoseNet({'data': self.image_tf}) self.p3_x = net.layers['cls3_fc_pose_xyz'] self.p3_q = net.layers['cls3_fc_pose_wpqr'] init = tf.initialize_all_variables() #To make tensor flow run properly on the TX2 #https://devtalk.nvidia.com/default/topic/1029742/jetson-tx2/tensorflow-1-6-not-working-with-jetpack-3-2/ config = tf.ConfigProto() config.gpu_options.allow_growth = True saver = tf.train.Saver() self.sess = tf.Session(config=config) self.sess.run(init) saver.restore(self.sess, 'PoseNet.ckpt') self.listener() self.initial = False self.predicted_q_init = [] self.predicted_p_init = []
def __init__(self, params, output_dir): self.strategy = tf.distribute.MirroredStrategy() self.params = params # Datasets tf_records = [os.path.join(params.data_dir,file) for file in os.listdir(params.data_dir) if file.endswith('.tfrecords')] self.train_dataset = self.strategy.experimental_distribute_dataset(input_fn(tf_records[:30])) self.val_dataset = self.strategy.experimental_distribute_dataset(input_fn(tf_records[30:])) num_samples = len(tf_records[:30]) self.total_iteration = (num_samples // params.batch_size) * params.epochs with self.strategy.scope(): # Models self.models = {} self.models['disparity'] = DisparityNet(input_shape=(params.input_h, params.input_w, 3)) self.models['pose'] = PoseNet(input_shape=(params.input_h, params.input_w, 3 * params.num_input_frames), num_input_frames=params.num_input_frames) # Optimizer learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay(0.0002, end_learning_rate=0.000001, decay_steps=self.total_iteration, power=0.5) self.optimizer = tf.keras.optimizers.Adam(learning_rate_fn) # Tensorboard & Meters train_log_dir = os.path.join(output_dir, 'train_logs') val_log_dir = os.path.join(output_dir, 'val_logs') self.train_summary_writer = tf.summary.create_file_writer(train_log_dir) self.test_summary_writer = tf.summary.create_file_writer(val_log_dir) self.train_meter = { 'ssim': tf.keras.metrics.Mean(name='ssim'), 'l1': tf.keras.metrics.Mean(name='l1'), 'smooth': tf.keras.metrics.Mean(name='smooth'), } self.val_meter = { 'ssim': tf.keras.metrics.Mean(name='ssim'), 'l1': tf.keras.metrics.Mean(name='l1'), 'smooth': tf.keras.metrics.Mean(name='smooth'), } self.step = 0 # Load states from optimiser and model if available self.ckpt_disp, self.manager_disp = self.setup_logger(self.models['disparity'], os.path.join(output_dir, 'disparity_model')) self.ckpt_pose, self.manager_pose = self.setup_logger(self.models['pose'], os.path.join(output_dir, 'pose_model')) self.start_epoch = int(self.ckpt_disp.step) + 1 if self.manager_disp.latest_checkpoint else int( self.ckpt_disp.step) # Helpers self.pix_coords = pixel_coord(params.batch_size, params.input_h, params.input_w, True) # [b, 3, npoints] print("Starting training step {}".format(self.ckpt_disp.step.numpy()))
def inference(): image = tf.placeholder(tf.float32, [1, 224, 224, 3]) predicted_X = np.zeros([1,3]) predicted_Q = np.zeros([1,4]) net = PoseNet({'data': image}) p3_x = net.layers['cls3_fc_pose_xyz'] p3_q = net.layers['cls3_fc_pose_wpqr'] #init = tf.initialize_all_variables() init = tf.global_variables_initializer() outputFile = "PoseNet.ckpt" saver = tf.train.Saver() with tf.Session() as sess: # Load the data sess.run(init) saver.restore(sess, directory + "PoseNet.ckpt")#path + 'PoseNet.ckpt') for index in range(len(dataset)): f = open(directory + dataset[index][:-4] + dataset_predict,'w') f.write('Visual Landmark Dataset V1\nImageFile, Camera Position [X Y Z W P Q R]\n') datasource = get_data(directory + dataset[index]) results = np.zeros((len(datasource.images),2)) #data_gen = gen_data_batch(datasource) for i in range(len(datasource.images)): np_image = datasource.images[i] feed = {image: np_image} pose_q= np.asarray(datasource.poses[i][3:7]) pose_x= np.asarray(datasource.poses[i][0:3]) predicted_x, predicted_q = sess.run([p3_x, p3_q], feed_dict=feed) predicted_q = np.squeeze(predicted_q).reshape([1,4]) predicted_x = np.squeeze(predicted_x).reshape([1,3]) #Compute Individual Sample Error q1 = pose_q / np.linalg.norm(pose_q) q2 = predicted_q / np.linalg.norm(predicted_q) d = abs(np.sum(np.multiply(q1,q2))) theta = 2 * np.arccos(d) * 180/math.pi error_x = np.linalg.norm(pose_x-predicted_x) results[i,:] = [error_x,theta] f.write('\n%s %s %s %s %s %s %s %s %s %s' % (datasource.fname[i],predicted_x[0,0],predicted_x[0,1],predicted_x[0,2],predicted_q[0,0],predicted_q[0,1],predicted_q[0,2],predicted_q[0,3], error_x, theta)) f.close() median_result = np.median(results,axis=0) print ('Median error ', median_result[0], 'm and ', median_result[1], 'degrees.')
def main(): images = tf.placeholder(tf.float32, [batch_size, 224, 224, 30]) poses_x = tf.placeholder(tf.float32, [batch_size, 3]) datasource = get_data() net = PoseNet({'data': images}) p1_x = net.layers['cls1_fc_pose_xyz'] p2_x = net.layers['cls2_fc_pose_xyz'] p3_x = net.layers['cls3_fc_pose_xyz'] l1_x = tf.reduce_mean(tf.square(tf.subtract(p1_x, poses_x))) * 0.3 l2_x = tf.reduce_mean(tf.square(tf.subtract(p2_x, poses_x))) * 0.3 l3_x = tf.reduce_mean(tf.square(tf.subtract(p3_x, poses_x))) * 1 loss = l1_x + l2_x + l3_x global_ = tf.Variable(tf.constant(0)) lr = tf.train.exponential_decay(0.0001, global_, 500, 0.1, staircase=False) opt = tf.train.AdamOptimizer(learning_rate=lr, beta1=0.9, beta2=0.999, epsilon=0.00000001, use_locking=False, name='Adam').minimize(loss) init = tf.global_variables_initializer() saver = tf.train.Saver() gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.7) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: sess.run(init) min_loss = 100 data_gen = gen_data_batch(datasource) for i in range(max_iterations): sess.run(lr, feed_dict={global_: i}) np_images, np_poses_x = next(data_gen) feed = {images: np_images, poses_x: np_poses_x} sess.run(opt, feed_dict=feed) np_loss = sess.run(loss, feed_dict=feed) if i % 20 == 0: print("iteration: " + str(i) + "\n\t" + "Loss is: " + str(np_loss)) if np_loss < min_loss: min_loss = np_loss saver.save(sess, outputFile + 'PoseNet_best.ckpt') print("loss minest model saved: " + outputFile + 'PoseNet_best.ckpt') if i % 1000 == 0 or i == max_iterations: saver.save(sess, outputFile + 'PoseNet'+'_'+str(i) +'.ckpt') print("Intermediate file saved at: " + outputFile + 'PoseNet'+'_'+str(i) +'.ckpt') saver.save(sess, outputFile + 'PoseNet' + '_' + str(i) + '.ckpt') print("Intermediate file saved at: " + outputFile + 'PoseNet' + '_' + str(i) + '.ckpt') time_end=time.time() print('totally cost', time_end-time_start)
def main(): images = tf.placeholder(tf.float32, [batch_size, 224, 224, 3]) poses_x = tf.placeholder(tf.float32, [batch_size, 3]) poses_q = tf.placeholder(tf.float32, [batch_size, 4]) datasource = get_data() net = PoseNet({'data': images}) p1_x = net.layers['cls1_fc_pose_xyz'] p1_q = net.layers['cls1_fc_pose_wpqr'] p2_x = net.layers['cls2_fc_pose_xyz'] p2_q = net.layers['cls2_fc_pose_wpqr'] p3_x = net.layers['cls3_fc_pose_xyz'] p3_q = net.layers['cls3_fc_pose_wpqr'] l1_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p1_x, poses_x)))) * 0.3 l1_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p1_q, poses_q)))) * 150 l2_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p2_x, poses_x)))) * 0.3 l2_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p2_q, poses_q)))) * 150 l3_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p3_x, poses_x)))) * 1 l3_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p3_q, poses_q)))) * 500 loss = l1_x + l1_q + l2_x + l2_q + l3_x + l3_q opt = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.9, beta2=0.999, epsilon=0.00000001, use_locking=False, name='Adam').minimize(loss) # Set GPU options gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6833) init = tf.global_variables_initializer() saver = tf.train.Saver() outputFile = "/home/prasenjit/posenet/trainingresult/PoseNet.ckpt" with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: # Load the data sess.run(init) net.load('posenet.npy', sess) data_gen = gen_data_batch(datasource) for i in range(max_iterations): np_images, np_poses_x, np_poses_q = next(data_gen) feed = {images: np_images, poses_x: np_poses_x, poses_q: np_poses_q} sess.run(opt, feed_dict=feed) np_loss = sess.run(loss, feed_dict=feed) if i % 20 == 0: print("iteration: " + str(i) + "\n\t" + "Loss is: " + str(np_loss)) if i % 5000 == 0: saver.save(sess, outputFile) print("Intermediate file saved at: " + outputFile) saver.save(sess, outputFile) print("Intermediate file saved at: " + outputFile)
def main(): image = tf.placeholder(tf.float32, [1, 224, 224, 3]) datasource = get_data() results = np.zeros((len(datasource.images), 2)) net = PoseNet({'data': image}) p3_x = net.layers['cls3_fc_pose_xyz'] p3_q = net.layers['cls3_fc_pose_wpqr'] init = tf.initialize_all_variables() outputFile = "PoseNet.ckpt" saver = tf.train.Saver() with tf.Session() as sess: # Load the data sess.run(init) saver.restore(sess, path + 'PoseNet_2019-01-30_60000.ckpt') data_gen = gen_data_batch(datasource) for i in range(len(datasource.images)): np_image = datasource.images[i] feed = {image: np_image} pose_q = np.asarray(datasource.poses[i][3:7]) pose_x = np.asarray(datasource.poses[i][0:3]) predicted_x, predicted_q = sess.run([p3_x, p3_q], feed_dict=feed) pose_q = np.squeeze(pose_q) pose_x = np.squeeze(pose_x) predicted_q = np.squeeze(predicted_q) predicted_x = np.squeeze(predicted_x) #Compute Individual Sample Error q1 = pose_q / np.linalg.norm(pose_q) q2 = predicted_q / np.linalg.norm(predicted_q) d = abs(np.sum(np.multiply(q1, q2))) theta = 2 * np.arccos(d) * 180 / math.pi error_x = np.linalg.norm(pose_x - predicted_x) results[i, :] = [error_x, theta] print 'Iteration: ', i, ' Error XYZ (m): ', error_x, ' Error Q (degrees): ', theta, ' predicted_x: ', predicted_x, ' predicted_q: ', predicted_q median_result = np.median(results, axis=0) mean_result = np.mean(results, axis=0) max_result = np.max(results, axis=0) print 'Median error ', median_result[0], 'm and ', median_result[ 1], 'degrees.' print 'Mean error ', mean_result[0], 'm and ', mean_result[1], 'degrees.' print 'Maximum error ', max_result[0], 'm and ', max_result[1], 'degrees.'
def main(): import sys config_file = "config.json" if len(sys.argv) > 1: config_file = sys.argv[1] rep = None if len(sys.argv) > 2: rep = int(sys.argv[2]) test_data = True if len(sys.argv) > 3: test_data = int(sys.argv[3]) == 0 js = Utils.load_json_file(config_file) location = js['directory'] if test_data: dataset = js['testing_dataset'] else: dataset = js['training_dataset'] netFile = js['netFile'] if rep is None: rep = int(js['rep']) if rep > -1: netFile = '{}/Net_{}/PNet'.format(netFile, rep) else: netFile = '{}/Net/PNet'.format(netFile) if len(sys.argv) > 4: netFile = sys.argv[4] classes = glob.glob(os.path.join(location, '*')) num_class = len(classes) images = tf.placeholder(tf.float32, [1, 224, 224, 3]) shift = tf.placeholder(tf.float32, [1, 1, 1, num_class]) net = PoseNet({'data': images, 'shift': shift}) for a in range(num_class): if rep == -1 or rep == a: rds = Utils.get_raw_data_indoor(location, dataset, a) ds = Utils.get_data(rds, a, 2.0, ss=480) process(ds, netFile, net, images, shift)
def main(): global args args = parser.parse_args() os.makedirs(args.output, exist_ok=True) # if don't call torch.cuda.current_device(), fails later with # "RuntimeError: cuda runtime error (30) : unknown error at ..\aten\src\THC\THCGeneral.cpp:87" torch.cuda.current_device() use_cuda = torch.cuda.is_available() and True device = torch.device("cuda:0" if use_cuda else "cpu") # try to get consistent results across runs # => currently still fails, however, makes runs a bit more consistent _set_random_seed() # create model model = PoseNet(arch=args.arch, num_features=args.features, dropout=args.dropout, pretrained=True, cache_dir=args.cache, loss=args.loss, excl_bn_affine=args.excl_bn, beta=args.beta, sx=args.sx, sq=args.sq) # create optimizer # - currently only Adam supported if args.optimizer == 'adam': eps = 0.1 if args.split_opt_params: new_biases, new_weights, biases, weights, others = model.params_to_optimize(split=True, excl_batch_norm=args.excl_bn) optimizer = torch.optim.Adam([ {'params': new_biases, 'lr': args.lr * 2, 'weight_decay': 0.0, 'eps': eps}, {'params': new_weights, 'lr': args.lr, 'weight_decay': args.weight_decay, 'eps': eps}, {'params': biases, 'lr': args.lr * 2, 'weight_decay': 0.0, 'eps': eps}, {'params': weights, 'lr': args.lr, 'weight_decay': args.weight_decay, 'eps': eps}, {'params': others, 'lr': 0, 'weight_decay': 0, 'eps': eps}, ]) else: params = model.params_to_optimize(excl_batch_norm=args.excl_bn) optimizer = torch.optim.Adam(params, lr=args.lr, weight_decay=args.weight_decay, eps=eps) else: assert False, 'Invalid optimizer: %s' % args.optimizer # optionally resume from a checkpoint best_loss = float('inf') best_epoch = -1 if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_epoch = checkpoint['best_epoch'] best_loss = checkpoint['best_loss'] model.load_state_dict(checkpoint['model']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) quit() # define overall training dataset, set output normalization, load model to gpu all_tr_data = PoseDataset(args.data, 'dataset_train.txt', random_crop=not args.center_crop) model.set_target_transform(all_tr_data.target_mean, all_tr_data.target_std) model.to(device) # split overall training data to training and validation sets # validation set is used for early stopping, or possibly in future for hyper parameter optimization lengths = [round(len(all_tr_data) * 0.75), round(len(all_tr_data) * 0.25)] tr_data, val_data = torch.utils.data.random_split(all_tr_data, lengths) # define data loaders train_loader = DataLoader(tr_data, batch_size=args.batch_size, num_workers=args.workers, shuffle=True, pin_memory=True, worker_init_fn=_worker_init_fn) val_loader = DataLoader(val_data, batch_size=args.batch_size, num_workers=args.workers, shuffle=False, pin_memory=True, worker_init_fn=_worker_init_fn) test_loader = DataLoader(PoseDataset(args.data, 'dataset_test.txt', random_crop=False), batch_size=args.batch_size, num_workers=args.workers, shuffle=False, pin_memory=True, worker_init_fn=_worker_init_fn) # evaluate model only if args.evaluate: validate(test_loader, model) return # training loop for epoch in range(args.start_epoch, args.epochs): # train for one epoch lss, pos, ori = process(train_loader, model, optimizer, epoch, device, adv_tr_eps=args.adv_tr_eps) stats = np.zeros(16) stats[:6] = [epoch, lss.avg, pos.avg, pos.median, ori.avg, ori.median] # evaluate on validation set if (epoch+1) % args.test_freq == 0: lss, pos, ori = validate(val_loader, model, device) stats[6:11] = [lss.avg, pos.avg, pos.median, ori.avg, ori.median] # remember best loss and save checkpoint is_best = lss.avg < best_loss best_epoch = epoch if is_best else best_epoch best_loss = lss.avg if is_best else best_loss # save best model if is_best: _save_checkpoint({ 'epoch': epoch + 1, 'best_epoch': best_epoch, 'best_loss': best_loss, 'arch': args.arch, 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), }, True) else: is_best = False # maybe save a checkpoint even if not best model if (epoch+1) % args.save_freq == 0 and not is_best: _save_checkpoint({ 'epoch': epoch + 1, 'best_epoch': best_epoch, 'best_loss': best_loss, 'arch': args.arch, 'model': model.state_dict(), 'optimizer': optimizer.state_dict(), }, False) # evaluate on test set if best yet result on validation set if is_best: lss, pos, ori = validate(test_loader, model, device) stats[11:] = [lss.avg, pos.avg, pos.median, ori.avg, ori.median] # add row to log file _save_log(stats, epoch == 0) # early stopping if args.early_stopping > 0 and epoch - best_epoch >= args.early_stopping: print('=====\nEARLY STOPPING CRITERION MET (%d epochs since best validation loss)' % args.early_stopping) break print('=====\n') if epoch+1 == args.epochs: print('MAX EPOCHS (%d) REACHED' % args.epochs) print('BEST VALIDATION LOSS: %.3f' % best_loss)
def optimization_run(optparams, debug=True): # create a plot for all results: nrow = settings.nModels ncol = 8 # figure plt.ioff() f1, axs = plt.subplots(nrows=nrow, ncols=ncol) # distance between subplots f1.subplots_adjust(wspace=0, hspace=0.1) # rownames names for i in xrange(nrow): for j in xrange(ncol): # axs[i, j].set_xticklabels([]) # axs[i, j].set_yticklabels([]) plt.sca(axs[i, j]) plt.axis('off') # columns names cols = ['conv1', 'conv2', 'conv3', 'conv4', 'conv5', 'pool5', 'fc6', 'fc7'] for ax, col in zip(axs[0], cols): ax.set_title(col) # for m in range(settings.nModels): # axs[m,0].set_ylabel(settings.model[m]['name'], rotation=0) # iterate over all models for m in range(settings.nModels): if settings.model[m] is None: continue # =============== MODEL m ==================================== # models means mean = np.load(settings.model[m]['mean']) mean = mean.squeeze() transformer.set_mean('data', mean.mean(1).mean(1)) # Load reference network which one want to investigate net = caffe.Classifier(settings.model[m]['prototxt'], settings.model[m]['weights'], caffe.TEST) print net.blobs.keys() if 'X' in net.blobs.keys(): net.blobs['data'] = net.blobs['X'] net.blobs.pop('X') # get original input size of network original_w = net.blobs['data'].width original_h = net.blobs['data'].height # setup the output path if not os.path.isdir(settings.model[m]['vis2folder']): os.mkdir(settings.model[m]['vis2folder']) output_folder = settings.model[m]['vis2folder'] + '/img_inv_' + \ os.path.splitext(settings.refimage_name)[0] + '/' if not os.path.isdir(output_folder): os.mkdir(output_folder) # which class to visualize nLayers = len(settings.model[m]['layers']) for l in xrange(nLayers): layer = settings.model[m]['layers'][l].name filename = 'layer_' + layer refimage_path = settings.refimage_path + settings.refimage_name print "----------" print "layer: %s\tref_image: %s\tfilename: %s" % ( layer, refimage_path, filename) print "----------" # if a specific output folder is provided if len(sys.argv) == 4: output_folder = str(sys.argv[3]) print "Output dir: %s" % output_folder print "-----------" # if os.path.isfile("%s/%s.jpg" % (output_folder, filename)): # print 'Inversion is already computed. Skipping the layer...' # continue # get the reference image ref_image = np.float32(PIL.Image.open(refimage_path)) image = transformer.preprocess('data', ref_image) net.blobs['data'].data[0] = image.copy() acts = net.forward(end=layer) phi_x0 = acts[layer][0] # reference representation print 'shape of the reference layer: ', phi_x0.shape if not os.path.isdir('./models/' + settings.model[m]['name']): os.mkdir('./models/' + settings.model[m]['name']) # initialize a new network params = { 'path2net': os.getcwd() + '/models/' + settings.model[m]['name'] + '/test_' + layer + '.prototxt', 'path2solver': os.getcwd() + '/models/' + settings.model[m]['name'] + '/solver_' + layer + '.prototxt', 'useGPU': settings.gpu, 'DEVICE_ID': 0 } # if not os.path.isfile(params['path2net']): # caffenet if settings.model[m]['name'] == 'alexnet': AlexNet(net.blobs['data'].data.shape, net.blobs[layer].data.shape, last_layer=layer, params=params) # cliqueCNN if settings.model[m]['name'] == 'cliqueCNN_long_jump': CliqueCNN(net.blobs['data'].data.shape, net.blobs[layer].data.shape, num_classes=settings.model[m]['nLabels'], last_layer=layer, params=params) # posenet if settings.model[m]['name'] == 'posenet': PoseNet(net.blobs['data'].data.shape, net.blobs[layer].data.shape, last_layer=layer, params=params) # videonet if settings.model[m]['name'] == 'videonet': VideoNet(net.blobs['data'].data.shape, net.blobs[layer].data.shape, last_layer=layer, params=params) # CNN_LSTM-Net if settings.model[m]['name'] == 'cnn_lstm': CNN_LSTN_Net(net.blobs['data'].data.shape, net.blobs[layer].data.shape, last_layer=layer, params=params) new_net = caffe.Net(params['path2net'], settings.model[m]['weights'], caffe.TEST) # !!!!! Adaptive jitter range receptiveFieldStride = np.load( str.split(params['path2net'], '.')[0] + '_stride.npy') optparams[0]['jitterT'] = np.max( [1, int(round(receptiveFieldStride[-1] / 4))]) - 1 # !!!! Adaptive weight factor optparams[0]['C'] = settings.model[m]['layers'][l].C optparams[1]['C'] = settings.model[m]['layers'][l].C assert new_net.blobs['data'].data.shape[2] == original_h assert new_net.blobs['data'].data.shape[3] == original_w # generate class visualization via octavewise gradient ascent output_image = inversion(new_net, phi_x0, optparams, debug=debug) # normalize image = vl_imsc output_image = output_image - output_image.min() output_image = output_image / output_image.max() output_image = 255 * np.clip(output_image, 0, 1) # save result image path = save_image(output_folder, filename, output_image) print "Saved to %s" % path # add result image to the common plot plt.sca(axs[m, l]) plt.imshow(np.uint8(output_image)) # plt.axis('off') print '----------------------------------------------------------------------------------------------------' # for i in range(ncol - nLayers): # plt.sca(axs[m, nLayers+i]) # plt.axis('off') if not os.path.isdir('./results/'): os.mkdir('./results/') f1.savefig('results/results_all_' + settings.refimage_name.split('.')[0] + '.png', dpi=600)
def main(): today = datetime.date.today() formatted_today = today.strftime('%y%m%d') outputFoldPath = path + formatted_today + '/' folder = os.path.exists(outputFoldPath) if not folder: os.makedirs(outputFoldPath) curFoldPath = os.getcwd() formatted_today_1 = today.strftime('%m%d') resFile = curFoldPath + '/' + formatted_today_1 # print curFoldPath images = tf.placeholder(tf.float32, [batch_size, 224, 224, 3]) poses_x = tf.placeholder(tf.float32, [batch_size, 3]) poses_q = tf.placeholder(tf.float32, [batch_size, 4]) datasource = get_data() net = PoseNet({'data': images}) p1_x = net.layers['cls1_fc_pose_xyz'] p1_q = net.layers['cls1_fc_pose_wpqr'] p2_x = net.layers['cls2_fc_pose_xyz'] p2_q = net.layers['cls2_fc_pose_wpqr'] p3_x = net.layers['cls3_fc_pose_xyz'] p3_q = net.layers['cls3_fc_pose_wpqr'] l1_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p1_x, poses_x)))) * 0.3 l1_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p1_q, poses_q)))) * 150 l2_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p2_x, poses_x)))) * 0.3 l2_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p2_q, poses_q)))) * 150 l3_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p3_x, poses_x)))) * 1 l3_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p3_q, poses_q)))) * 500 loss = l1_x + l1_q + l2_x + l2_q + l3_x + l3_q opt = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.9, beta2=0.999, epsilon=0.00000001, use_locking=False, name='Adam').minimize(loss) # Set GPU options gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6833) init = tf.global_variables_initializer() saver = tf.train.Saver(max_to_keep=5, keep_checkpoint_every_n_hours=2) # #saver=tf.train.import_meta_graph(preResPath+preTrainModelFile+'.meta') # #saver.restore(preResPath) # # Set GPU options # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6833) # # graph=tf.get_default_graph() # p1_x_w=graph.get_tensor_by_name("cls1_fc_pose_xyz/weights:0") # # #p1_x=graph.get_tensor_by_name('cls1_fc_pose_xyz:0') # # net = PoseNet({'data': images}) # p1_x = net.layers['cls1_fc_pose_xyz'] # p1_q = net.layers['cls1_fc_pose_wpqr'] # p2_x = net.layers['cls2_fc_pose_xyz'] # p2_q = net.layers['cls2_fc_pose_wpqr'] # p3_x = net.layers['cls3_fc_pose_xyz'] # p3_q = net.layers['cls3_fc_pose_wpqr'] # # l1_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p1_x, poses_x)))) * 0.3 # l1_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p1_q, poses_q)))) * 150 # l2_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p2_x, poses_x)))) * 0.3 # l2_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p2_q, poses_q)))) * 150 # l3_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p3_x, poses_x)))) * 1 # l3_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p3_q, poses_q)))) * 500 with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: sess.run(init) saver.restore(sess, tf.train.latest_checkpoint(preResPath)) resfo = open(resFile, 'w') resfo.write('load previous training weight file:' + path + '\n') resfo.close() data_gen = gen_data_batch(datasource) for i in range(max_iterations): np_images, np_poses_x, np_poses_q = next(data_gen) feed = { images: np_images, poses_x: np_poses_x, poses_q: np_poses_q } sess.run(opt, feed_dict=feed) np_loss = sess.run(loss, feed_dict=feed) # outputFile = outputFoldPath + 'PoseNet_' + str(i) + '.ckpt' time_str = time.strftime('%m-%d %H:%M:%S', time.localtime(time.time())) if i % 20 == 0: resfo = open(resFile, 'a') print(time_str + "\titeration: " + str(i) + "\n\t" + "Loss is: " + str(np_loss)) resfo.write(time_str + "\titeration: " + str(i) + "\n\t" + "Loss is: " + str(np_loss) + '\n') resfo.close() if i > 0 and i % 5000 == 0: #if i % 5000 == 0: resfo = open(resFile, 'a') outputFile = outputFoldPath + 'PoseNet_' + str(i) + '.ckpt' saver.save(sess, outputFile, write_meta_graph=False) print("Intermediate file saved at: " + outputFile) resfo.write("Intermediate file saved at: " + outputFile + '\n') resfo.close() liu = 0
def __init__(self, params, output_dir): self.params = params # Models self.models = {} self.models['disparity'] = DisparityNet(input_shape=(params.input_h, params.input_w, 3)) self.models['pose'] = PoseNet( input_shape=(params.input_h, params.input_w, 3 * params.num_input_frames), num_input_frames=params.num_input_frames) # Datasets train_dataset = KittiSFMDataset(params.data_dir, 'train', (params.input_h, params.input_w), batch_size=params.batch_size, frame_idx=params.frame_ids) val_dataset = KittiSFMDataset(params.data_dir, 'val', (params.input_h, params.input_w), frame_idx=params.frame_ids, batch_size=params.batch_size) self.train_dataset = train_dataset.load_tfdataset() self.val_dataset = val_dataset.load_tfdataset() # Optimizer self.total_iteration = (train_dataset.num_samples // params.batch_size) * params.epochs learning_rate_fn = tf.keras.optimizers.schedules.PolynomialDecay( 0.0002, end_learning_rate=0.000001, decay_steps=self.total_iteration, power=0.5) self.optimizer = tf.keras.optimizers.Adam(learning_rate_fn) # Tensorboard & Meters train_log_dir = os.path.join(output_dir, 'train_logs') val_log_dir = os.path.join(output_dir, 'val_logs') self.train_summary_writer = tf.summary.create_file_writer( train_log_dir) self.test_summary_writer = tf.summary.create_file_writer(val_log_dir) self.train_meter = { 'ssim': tf.keras.metrics.Mean(name='ssim'), 'l1': tf.keras.metrics.Mean(name='l1'), 'smooth': tf.keras.metrics.Mean(name='smooth'), } self.val_meter = { 'ssim': tf.keras.metrics.Mean(name='ssim'), 'l1': tf.keras.metrics.Mean(name='l1'), 'smooth': tf.keras.metrics.Mean(name='smooth'), } self.step = 0 # Load states from optimiser and model if available self.ckpt_disp, self.manager_disp = self.setup_logger( self.models['disparity'], os.path.join(output_dir, 'disparity_model')) self.ckpt_pose, self.manager_pose = self.setup_logger( self.models['pose'], os.path.join(output_dir, 'pose_model')) self.start_epoch = int( self.ckpt_disp.step ) + 1 if self.manager_disp.latest_checkpoint else int( self.ckpt_disp.step) print("Starting training step {}".format(self.ckpt_disp.step.numpy())) # Helpers self.pix_coords = pixel_coord(params.batch_size, params.input_h, params.input_w, True) # [b, 3, npoints]
def process(config_file, rep): sys.path.append('/home/weihao/posenet/my_nets') from posenet import GoogLeNet as PoseNet from utils import Utils js = Utils.load_json_file(config_file) location = js['directory'] batch_size = int(js['batch_size']) dataset = js['training_dataset'] netFile_base = js['netFile'] retrain = None if 'retrain' in js: retrain = js['retrain'] if rep is None: rep = int(js['rep']) lr = 1e-3 rg = 3 classes = glob.glob(os.path.join(location, '*')) num_class = len(classes) images = tf.placeholder(tf.float32, [batch_size, 224, 224, 3]) poses_x = tf.placeholder(tf.float32, [batch_size, 3]) poses_q = tf.placeholder(tf.float32, [batch_size, 4]) shift = tf.placeholder(tf.float32, [batch_size, 1, 1, num_class]) net = PoseNet({'data': images, 'shift': shift}) # p1_x = net.layers['cls1_fc_pose_xyz'] # p1_q = net.layers['cls1_fc_pose_wpqr'] # p2_x = net.layers['cls2_fc_pose_xyz'] # p2_q = net.layers['cls2_fc_pose_wpqr'] p3_x = net.layers['cls3_fc_pose_xyz'] p3_q = net.layers['cls3_fc_pose_wpqr'] # l1_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p1_x, poses_x)))) * 0.3 # l1_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p1_q, poses_q)))) * 150 # l2_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p2_x, poses_x)))) * 0.3 # l2_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p2_q, poses_q)))) * 150 l3_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p3_x, poses_x)))) * 1 l3_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p3_q, poses_q)))) * 500.0 loss = l3_x # + l3_q # l1_x + l1_q + l2_x + l2_q + l3_x + l3_q opts = [] for A in range(rg): ao = tf.train.AdamOptimizer(learning_rate=lr, beta1=0.9, beta2=0.999, epsilon=0.00000001, use_locking=False, name='Adam') opts.append(ao.minimize(loss)) lr /= 10 # learning_rate = tf.placeholder(tf.float32, shape=[]) # opt = tf.train.GradientDescentOptimizer( # learning_rate=learning_rate).minimize(loss) # Set GPU options # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6833) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) config_g = tf.ConfigProto(gpu_options=gpu_options) # device_count = {'CPU': 4}, config = tf.ConfigProto(inter_op_parallelism_threads=6, intra_op_parallelism_threads=6) init = tf.global_variables_initializer() saver = tf.train.Saver() rds = Utils.get_raw_data_indoor(location, dataset, rep) with tf.Session(config=config) as sess: # Load the data sess.run(init) if retrain: saver.restore(sess, retrain) ds = Utils.get_data(rds, rep) data_gen = Utils.gen_data_batch(ds, batch_size) reload = len(ds.images) / batch_size epoch = 50.0 if rep == -1: epoch = 3000.0 iterations = int(len(ds.images) * epoch / batch_size) print("Total images {}, rep {}, iter {}, reload {}".format( len(ds.images), rep, iterations, reload)) for A in range(rg): import datetime if rep > -1: netFile = '{}/Net{}_{}/PNet'.format(netFile_base, A, rep) else: netFile = '{}/Net{}/PNet'.format(netFile_base, A) t0 = datetime.datetime.now() for i in range(iterations): np_images, np_poses_x, np_poses_q, np_shift = next(data_gen) feed = { images: np_images, poses_x: np_poses_x, poses_q: np_poses_q, shift: np_shift } # , learning_rate: lr} sess.run(opts[A], feed_dict=feed) np_loss = sess.run(loss, feed_dict=feed) if (i + 1) % reload == 0: if (i + 1) % (reload * 20) == 0: t1 = datetime.datetime.now() print("iteration: {} loss {} time {} lr {}".format( i, np_loss, t1 - t0, A)) t0 = t1 saver.save(sess, netFile) ds = Utils.get_data(rds, rep) data_gen = Utils.gen_data_batch(ds, batch_size) if rep > -1: netFile = '{}/Net_{}/PNet'.format(netFile_base, rep) else: netFile = '{}/Net/PNet'.format(netFile_base) saver.save(sess, netFile) print("Intermediate file saved at: " + netFile)
def main(): today = datetime.date.today() formatted_today = today.strftime('%y%m%d') outputFoldPath = path + formatted_today + '/' folder = os.path.exists(outputFoldPath) if not folder: os.makedirs(outputFoldPath) curFoldPath = os.getcwd() formatted_today_1 = today.strftime('%m%d') resFile = curFoldPath + '/' + formatted_today_1 # print curFoldPath images = tf.placeholder(tf.float32, [batch_size, 224, 224, 3]) poses_x = tf.placeholder(tf.float32, [batch_size, 3]) poses_q = tf.placeholder(tf.float32, [batch_size, 4]) datasource = get_data() net = PoseNet({'data': images}) # p1_x = net.layers['cls1_fc_pose_xyz'] p1_q = net.layers['cls1_fc_pose_wpqr'] # p2_x = net.layers['cls2_fc_pose_xyz'] p2_q = net.layers['cls2_fc_pose_wpqr'] # p3_x = net.layers['cls3_fc_pose_xyz'] p3_q = net.layers['cls3_fc_pose_wpqr'] # l1_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p1_x, poses_x)))) * 0.3 l1_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p1_q, poses_q)))) * 0.3 # l2_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p2_x, poses_x)))) * 0.3 l2_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p2_q, poses_q)))) * 0.3 # l3_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p3_x, poses_x)))) * 1 l3_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p3_q, poses_q)))) # loss = l1_x + l1_q + l2_x + l2_q + l3_x + l3_q loss = l1_q + l2_q + l3_q opt = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.9, beta2=0.999, epsilon=0.00000001, use_locking=False, name='Adam').minimize(loss) # Set GPU options gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6833) init = tf.global_variables_initializer() saver = tf.train.Saver() #outputFile = path+'PoseNet.ckpt' with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: # Load the data sess.run(init) net.load(path + 'posenet.npy', sess) resfo = open(resFile, 'w') resfo.write('load initial weight file:' + path + 'posenet.npy\n') resfo.close() data_gen = gen_data_batch(datasource) for i in range(max_iterations): np_images, np_poses_x, np_poses_q = next(data_gen) feed = { images: np_images, poses_x: np_poses_x, poses_q: np_poses_q } sess.run(opt, feed_dict=feed) np_loss = sess.run(loss, feed_dict=feed) # outputFile = outputFoldPath + 'PoseNet_' + str(i) + '.ckpt' time_str = time.strftime('%m-%d %H:%M:%S', time.localtime(time.time())) if i % 20 == 0: resfo = open(resFile, 'a') print(time_str + "\titeration: " + str(i) + "\n\t" + "Loss is: " + str(np_loss)) resfo.write(time_str + "\titeration: " + str(i) + "\n\t" + "Loss is: " + str(np_loss) + '\n') resfo.close() if i > 0 and i % 5000 == 0: resfo = open(resFile, 'a') outputFile = outputFoldPath + 'PoseNet_' + str(i) + '.ckpt' saver.save(sess, outputFile) print("Intermediate file saved at: " + outputFile) resfo.write("Intermediate file saved at: " + outputFile + '\n') resfo.close() saver.save(sess, outputFile) print("Intermediate file saved at: " + outputFile)
import os import cv2 import csv import numpy as np from posenet import PoseNet import utils predictor = PoseNet.Predictor('models\\posenet101.pkl', 0) def load(path): with open(path) as f: rows = [rows.strip() for rows in f] head = rows.index('{') + 1 tail = rows.index('}') raw_points = rows[head:tail] coords_set = [point.split() for point in raw_points] points = np.array( [tuple([float(point) for point in coords]) for coords in coords_set]) return points thetas = [] with open('Data\\RAW\\valid_set\\validation_set.csv', newline='') as csvfile: spamreader = csv.reader(csvfile, delimiter=',', quotechar='|') for row in spamreader: filename = row[0]
def loadModel() -> typing.Dict[str, typing.Union[ torch.nn.Module, torch.optim.Optimizer, typing.List, float, float, int, typing.Tuple[float, float], typing.Tuple[float, float], typing.Tuple[ float, float], typing.Tuple[float, float], typing.Tuple[float, float]]]: """ Loads the model from file. If any keys mismatch, it'll wrap the network in a nn.DataParallel. If it gets any other errors it will initiate the network from the default GoogLeNet model found in pretrained-models. Also loads epochs, training and validation differences, and a plethora of other stuff. See util.saveModel for more information on what is loaded. :return: Everything that was in the file. """ # Create the network. network: torch.nn.Module = PoseNet(input_nc=3) # Default network should be placed on the GPU so that the other default things # can expect that to be the case. if Config.useCuda(): network = network.cuda() Logger.log("Creating learning objects.", logger="main") optimizer: torch.optim.Optimizer = torch.optim.Adam( network.parameters(), lr=Config.getArgs().learning_rate) scheduler1: torch.optim.lr_scheduler.ReduceLROnPlateau = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer=optimizer, threshold=Config.getArgs().beta, verbose=Logger.shouldLog(), threshold_mode="abs", factor=Config.getArgs().factor) scheduler2: torch.optim.lr_scheduler.StepLR = torch.optim.lr_scheduler.StepLR( optimizer=optimizer, step_size=80, gamma=Config.getArgs().factor) trainingLoss: typing.List[float] = [] validationLoss: typing.List[float] = [] trainingDiff: typing.List[float] = [] validationDiff: typing.List[float] = [] testingDiff: typing.List[float] = [] uncertainty: typing.List[float] = [] anees: typing.List[float] = [] starting_epoch: int = 0 hasParallelilized: bool = False defaultPretrainedModel: str = "pretrained-models/places-googlenet.pickle" Logger.log("Loading from pretrained model {}".format( Config.getArgs().pretrained_model), logger="main") # Load from pretrained model. path: str = Config.getArgs().pretrained_model if path is not None or Config.getArgs().resume: try: # Load checkpoint from file checkpoint: typing.Dict[str, typing.Any] = util.getPretrainedModel( path=path) # If we have version in the data of checkpoint if "version" in checkpoint: # But it's not the current version... if checkpoint["version"] != Config.version: # Load in GoogLeNet Logger.warn("User wants to load outdated model file!") # If the version matches, load in the posenet model try: # We can't load in from googlenet here because googlenet doesn't have a version key network.load_state_dict(checkpoint["model"]) except RuntimeError: # If we get a RuntimeError it's probably because we're trying to load something # that was wrapped in the nn.DataParallel layer. # We could prewrap it all beforehand, but that would mean we would need to # circumnavigate GoogLeNet and its keys. network = torch.nn.DataParallel(network) hasParallelilized = True try: network.load_state_dict(checkpoint["model"]) except RuntimeError: # if we still get an error message, that means we changed the architecture. Logger.warn( "Trying to load from version that has different architecture than current version." ) Logger.log("Loading from default model.", logger="min") network = torch.nn.DataParallel( PoseNet(input_nc=3, weights=util.getPretrainedModel( defaultPretrainedModel))) if checkpoint["version"] == Config.version: # We wouldn't want to load in these things from a different version. optimizer.load_state_dict(checkpoint["optimizer"]) scheduler1 = checkpoint["schedulers"][0] scheduler2.load_state_dict(checkpoint["schedulers"][1]) trainingLoss = checkpoint["trainingLoss"] validationLoss = checkpoint["validationLoss"] trainingDiff = checkpoint["trainingDifference"] validationDiff = checkpoint["validationDifference"] starting_epoch = checkpoint["epoch"] + 1 testingDiff = checkpoint["testingDifference"] uncertainty = checkpoint["uncertainty"] anees = checkpoint["anees"] else: # If we don't have a version in the model file, we'll load GoogLeNet. Logger.log("Loading network from default pretrained model.") network = PoseNet( input_nc=3, weights=util.getPretrainedModel(defaultPretrainedModel)) except FileNotFoundError: Logger.error("Cannot find pretrained model file!") Logger.log("Loading from default pretrained model.") # Load from googleNet network = PoseNet( input_nc=3, weights=util.getPretrainedModel(defaultPretrainedModel)) else: Logger.log("Loading from default pretrained model.") # Load from googleNet network = PoseNet( input_nc=3, weights=util.getPretrainedModel(defaultPretrainedModel)) # This will be the same on GPU or CPU so don't you worry baby. # Don't you worry OHHHH-OHOH if not hasParallelilized: network = torch.nn.DataParallel(network) # If we have access to GPUs, put PoseNet on them. if Config.useCuda(): network = network.cuda() return { "network": network, "optimizer": optimizer, "schedulers": [scheduler1, scheduler2], "trainingLoss": trainingLoss, "validationLoss": validationLoss, "startingEpoch": starting_epoch, "validationDifference": validationDiff, "trainingDifference": trainingDiff, "testingDifference": testingDiff, "uncertainty": uncertainty, "anees": anees }
# save the longitude binarizer to disk print("[INFO] serializing longitude label binarizer...") f = open(args["longitudebin"], "wb") f.write(pickle.dumps(longitudeLB)) f.close() # partition the data into training and testing splits using 80% of # the data for training and the remaining 20% for testing split = train_test_split(data, latitudeLabels, longitudeLabels, test_size=0.2) (trainX, testX, trainLatitudeY, testLatitudeY, trainLongitudeY, testLongitudeY) = split # initialize VGG multi-output network model = PoseNet.VGG16_mod(128, 128, numLatitudes=len(latitudeLB.classes_), numLongitudes=len(longitudeLB.classes_), finalAct="softmax") # define two dictionaries: one that specifies the loss method for # each output of the network along with a second dictionary that # specifies the weight per loss losses = { "latitude_output": "categorical_crossentropy", "longitude_output": "categorical_crossentropy", } lossWeights = {"latitude_output": 1.0, "longitude_output": 1.0} # initialize the optimizer and compile the model print("[INFO] compiling model...") opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
def train(): layers = [PoolAE(3, 40, 7, stride=3, padding=6), PoolAE(40, 80, 5), PoolAE(80, 160, 3), AE(160, 256, 1), AE(256, 256, 1)] voc_data = np.load('data/pretrain_data.npy') pose_data = np.load('data/pose_data.npy') google_data = np.load('data/google_data.npy') data = np.concatenate((voc_data, pose_data, google_data)) / 255 data = np.concatenate((data, np.flip(data, 2))) np.random.shuffle(data) data_cv = data[-10:] #data = data[:30000] # posenet_00 data = data[10000:-10] # posenet_01 print(data.shape) epochs = 15 learning_graphs = pretrain_layers(layers, epochs, data) deep_net = DeepAE(layers) deep_net.cuda() deep_net.train() training_loss = finetune(deep_net, 30, data) learning_graphs.append(np.array(training_loss)) plt.figure(0) for index, graph in enumerate(learning_graphs): plt.subplot(2, 4, index+1) plt.plot(graph) X = torch.from_numpy(data_cv.transpose(0,3,1,2)).type(dtype) X = Variable(X, requires_grad=False) print(X.size()) h = X for layer in layers: h = layer.encode(h) print(h.size()) for layer in reversed(layers): h = layer.decode(h) y = h.data.cpu().numpy().transpose(0,2,3,1) X = X.data.cpu().numpy().transpose(0,2,3,1) plt.figure(1) for i in range(10): plt.subplot(2,10,i+1) plt.imshow(X[i]) plt.axis('off') plt.subplot(2,10,i+1+10) plt.imshow((y[i] - y[i].min()) / (y[i].max() - y[i].min())) plt.axis('off') params = list(layers[0].parameters())[0].data.cpu().numpy().transpose(0,2,3,1) params = (params - params.min()) / (params.max() - params.min()) plt.figure(2) for i in range(40): plt.subplot(8,5,i+1) plt.imshow(params[i]) plt.axis('off') posenet = PoseNet(deep_net) #print(posenet) torch.save(posenet, 'models/posenet_01.model')
import tensorflow as tf import numpy as np import cv2 from posenet import PoseNet, detect_pose, draw_pose, draw_keypoints # itialize posenet from the package model_path = 'posenet_resnet50float_stride16' posenet = PoseNet(model_path) # SET UP WEBCAM # ------------- cap = cv2.VideoCapture(0) # Set VideoCaptureProperties cap.set(3, 1280) # width = 1280 cap.set(4, 720) # height = 720 CAMERA_RESOLUTION_WIDTH = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) CAMERA_RESOLUTION_HEIGHT = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) CENTER_X = CAMERA_RESOLUTION_WIDTH//2 CENTER_Y = CAMERA_RESOLUTION_HEIGHT//2 # MAIN LOOP # --------- while True: success, img = cap.read() # read webcam capture # get keypoints for single pose estimation. it is a list of 17 keypoints keypoints = posenet.predict_singlepose(img) # track nose nose_pos = keypoints[0]['position'] nose_x = nose_pos[0] - CENTER_X
def main(): random.seed(27) images = tf.placeholder(tf.float32, [batch_size, 224, 224, 3]) poses_x = tf.placeholder(tf.float32, [batch_size, 3]) poses_q = tf.placeholder(tf.float32, [batch_size, 4]) net = PoseNet({'data': images}) p1_x = net.layers['cls1_fc_pose_xyz'] p1_q = net.layers['cls1_fc_pose_wpqr'] p2_x = net.layers['cls2_fc_pose_xyz'] p2_q = net.layers['cls2_fc_pose_wpqr'] p3_x = net.layers['cls3_fc_pose_xyz'] p3_q = net.layers['cls3_fc_pose_wpqr'] l1_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p1_x, poses_x)))) * 0.3 l1_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p1_q, poses_q)))) * 225 l2_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p2_x, poses_x)))) * 0.3 l2_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p2_q, poses_q)))) * 225 l3_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p3_x, poses_x)))) * 1 l3_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p3_q, poses_q)))) * 500 WEIGHT_DECAY_FACTOR = 0.005 # Create your variables #weights = tf.get_variable('weights', collections=['variables']) with tf.variable_scope('weights_norm') as scope: weights_norm = tf.reduce_sum( input_tensor=WEIGHT_DECAY_FACTOR * tf.stack([tf.nn.l2_loss(i) for i in tf.get_collection('weights')]), name='weights_norm') # Add the weight decay loss to another collection called losses #tf.add_to_collection('losses', weights_norm) # To calculate your total loss #tf.add_n(tf.get_collection('losses'), name='total_loss') loss = l1_x + l1_q + l2_x + l2_q + l3_x + l3_q losses = loss + weights_norm opt = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.9, beta2=0.999, epsilon=0.00000001, use_locking=False, name='Adam').minimize(losses) # Set GPU options gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6833) init = tf.global_variables_initializer() saver = tf.train.Saver() outputFile = directory + "PoseNet.ckpt" #f = open('D:/PythonWorkSpace/posenet/PoseNet_AllDatasets_Closer/practiceCurves.txt','w') with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: # Load the data sess.run(init) net.load('.../weights/posenet.npy', sess) #saver.restore(sess, directory + "PoseNet.ckpt") datasource = get_data(directory + dataset) datasource_test = get_data(directory + dataset_test) data_gen = gen_data_batch(datasource) data_gen_test = gen_data_batch(datasource_test) for i in range(max_iterations): np_images, np_poses_x, np_poses_q = next(data_gen) feed = { images: np_images, poses_x: np_poses_x, poses_q: np_poses_q } sess.run(opt, feed_dict=feed) if i % 500 == 0: np_loss = sess.run(loss, feed_dict=feed) np_images_test, np_poses_x_test, np_poses_q_test = next( data_gen_test) feed_test = { images: np_images_test, poses_x: np_poses_x_test, poses_q: np_poses_q_test } np_loss_test = sess.run(loss, feed_dict=feed_test) print("iteration: " + str(i) + "\n\t" + "Loss is: " + str(np_loss) + "\n\t" + "test Loss is: " + str(np_loss_test)) f = open(directory + 'practiceCurves.txt', 'a') f.write( str(i) + " " + str(np_loss) + " " + str(np_loss_test) + "\n") f.close() if i % 2000 == 0: saver.save(sess, outputFile, global_step=i) print("Intermediate file saved at: " + outputFile) saver.save(sess, outputFile) print("Intermediate file saved at: " + outputFile)
def train(): batch_size = 75 max_iterations = 3000 images = tf.placeholder(tf.float32, [batch_size, 224, 224, 3]) poses_x = tf.placeholder(tf.float32, [batch_size, 3]) poses_q = tf.placeholder(tf.float32, [batch_size, 4]) datasource = utils.get_data("train") net = PoseNet({'data': images}) p1_x = net.layers['cls1_fc_pose_xyz'] p1_q = net.layers['cls1_fc_pose_wpqr'] p2_x = net.layers['cls2_fc_pose_xyz'] p2_q = net.layers['cls2_fc_pose_wpqr'] p3_x = net.layers['cls3_fc_pose_xyz'] p3_q = net.layers['cls3_fc_pose_wpqr'] l1_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p1_x, poses_x)))) * 0.3 l1_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p1_q, poses_q)))) * 150 l2_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p2_x, poses_x)))) * 0.3 l2_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p2_q, poses_q)))) * 150 l3_x = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p3_x, poses_x)))) * 1 l3_q = tf.sqrt(tf.reduce_sum(tf.square(tf.subtract(p3_q, poses_q)))) * 500 loss = l1_x + l1_q + l2_x + l2_q + l3_x + l3_q opt = tf.train.AdamOptimizer(learning_rate=0.0001, beta1=0.9, beta2=0.999, epsilon=0.00000001, use_locking=False, name='Adam').minimize(loss) # ---- create a summary to monitor cost tensor tf.summary.scalar("loss", loss) merged_summary_op = tf.summary.merge_all( ) # merge all summaries into a single op logs_path = './logs' # op to write logs to Tensorboard summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph()) print("Run the command line: --> tensorboard --logdir=./logs " \ "\nThen open http://0.0.0.0:6006/ into your web browser") # ---- Set GPU options gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6833) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: # Load the data sess.run(init) net.load('posenet.npy', sess) data_gen = utils.gen_data_batch(datasource, batch_size) for i in range(max_iterations): np_images, np_poses_x, np_poses_q = next(data_gen) feed = { images: np_images, poses_x: np_poses_x, poses_q: np_poses_q } sess.run(opt, feed_dict=feed) # run the optimizer np_loss = sess.run(loss, feed_dict=feed) #get the loss # ---- print the logs if i % 20 == 0: print("iteration: " + str(i) + "\n\t" + "Loss is: " + str(np_loss)) if i % 100 == 0: saver.save(sess, path_ckpt) print("Intermediate file saved at: " + path_ckpt) # ---- write logs at every iteration summary = merged_summary_op.eval(feed_dict=feed) summary_writer.add_summary(summary, i) saver.save(sess, path_ckpt) print("Intermediate file saved at: " + path_ckpt)
from config import Config from posenet import PoseNet from torch import save from logger import Logger fileName = Config.getArgs ().model_file.format ( "test", 0 ) network = PoseNet (input_nc = 3) try: save ( obj = { "model": network.state_dict () }, f = fileName ) except FileNotFoundError: Logger.log ( "ERROR: Can not find file.", logger = "min" ) Logger.log ( "\tCould it be that your working directory doesn't have the directory you specified?", logger = "min" ) Logger.log ( "Saving was successful.", logger = "min" )
## LOAD DATASETS print('\nDATASET INFO.') train_data = MPII('../data/mpii_poses.npy') print('Train size: {} x {}'.format(len(train_data), train_data[0].size())) ## LOAD MODEL print('\nLOADING GAN.') def weights_init(m): if type(m) == torch.nn.Linear: torch.nn.init.xavier_uniform_(m.weight) torch.nn.init.constant_(m.bias, 0.0) netG = PoseNet(n_hidden=N_HIDDEN, mode='generator').to(device) netD = PoseNet(n_hidden=N_HIDDEN, mode='discriminator').to(device) if args.model: netG.load_state_dict(torch.load(args.model)['netG']) netD.load_state_dict(torch.load(args.model)['netD']) print('=> Loaded models from {:s}'.format(args.model)) else: netG.apply(weights_init) netD.apply(weights_init) print('Model params: {:.2f}M'.format( sum(p.numel() for p in netG.parameters()) / 1e6)) ## TRAINING print('\nTRAINING.') data_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size,