def get_idx_hands_up(): from pose_augment import set_network_input_wh set_network_input_wh(368, 368) show_sample = True db = CocoPoseLMDB('/data/public/rw/coco-pose-estimation-lmdb/', is_train=True, decode_img=show_sample) db.reset_state() total_cnt = 0 handup_cnt = 0 for idx, metas in enumerate(db.get_data()): meta = metas[0] if len(meta.joint_list) <= 0: continue body = meta.joint_list[0] if body[CocoPart.Neck.value][1] <= 0: continue if body[CocoPart.LWrist.value][1] <= 0: continue if body[CocoPart.RWrist.value][1] <= 0: continue if body[CocoPart.Neck.value][1] > body[CocoPart.LWrist.value][1] or body[CocoPart.Neck.value][1] > body[CocoPart.RWrist.value][1]: print(meta.idx) handup_cnt += 1 if show_sample: l1, l2, l3 = pose_to_img(metas) CocoPoseLMDB.display_image(l1, l2, l3) total_cnt += 1 print('%d / %d' % (handup_cnt, total_cnt))
parser.add_argument('--lr', type=str, default='0.001') parser.add_argument('--tag', type=str, default='test') parser.add_argument('--checkpoint', type=str, default='') parser.add_argument('--input-width', type=int, default=432) parser.add_argument('--input-height', type=int, default=368) parser.add_argument('--quant-delay', type=int, default=-1) args = parser.parse_args() modelpath = logpath = '../models/train/' if args.gpus <= 0: raise Exception('gpus <= 0') # define input placeholder set_network_input_wh(args.input_width, args.input_height) scale = 4 if args.model in ['cmu', 'vgg'] or 'mobilenet' in args.model: scale = 8 set_network_scale(scale) output_w, output_h = args.input_width // scale, args.input_height // scale logger.info('define model+') with tf.device(tf.DeviceSpec(device_type="CPU")): input_node = tf.placeholder(tf.float32, shape=(args.batchsize, args.input_height, args.input_width, 3), name='image') vectmap_node = tf.placeholder(tf.float32, shape=(args.batchsize, output_h, output_w, 38), name='vectmap') heatmap_node = tf.placeholder(tf.float32, shape=(args.batchsize, output_h, output_w, 19), name='heatmap') # prepare data
try: self.close_op.run() except Exception: pass logger.info("{} Exited.".format(self.name)) def dequeue(self): return self.queue.dequeue() if __name__ == '__main__': os.environ['CUDA_VISIBLE_DEVICES'] = '' from pose_augment import set_network_input_wh, set_network_scale # set_network_input_wh(368, 368) set_network_input_wh(480, 320) set_network_scale(8) # df = get_dataflow('/data/public/rw/coco/annotations', True, '/data/public/rw/coco/') df = _get_dataflow_onlyread('/data/public/rw/coco/annotations', True, '/data/public/rw/coco/') # df = get_dataflow('/root/coco/annotations', False, img_path='http://gpu-twg.kakaocdn.net/braincloud/COCO/') from tensorpack.dataflow.common import TestDataSpeed TestDataSpeed(df).start() sys.exit(0) with tf.Session() as sess: df.reset_state() t1 = time.time() for idx, dp in enumerate(df.get_data()):
def train(): parser = argparse.ArgumentParser( description='Training codes for Openpose using Tensorflow') parser.add_argument('--batch_size', type=str, default=10) parser.add_argument('--continue_training', type=bool, default=False) parser.add_argument('--checkpoint_path', type=str, default='checkpoints/train/mn_sepconv_33') # parser.add_argument('--backbone_net_ckpt_path', type=str, default='checkpoints/vgg/vgg_19.ckpt') parser.add_argument( '--backbone_net_ckpt_path', type=str, default='checkpoints/mobilenet/mobilenet_v2_1.0_96.ckpt') parser.add_argument('--train_vgg', type=bool, default=True) parser.add_argument('--annot_path', type=str, default='./COCO/annotations/') parser.add_argument('--img_path', type=str, default='./COCO/images/') # parser.add_argument('--annot_path_val', type=str, # default='/run/user/1000/gvfs/smb-share:server=192.168.1.2,share=data/yzy/dataset/' # 'Realtime_Multi-Person_Pose_Estimation-master/training/dataset/COCO/annotations/' # 'person_keypoints_val2017.json') # parser.add_argument('--img_path_val', type=str, # default='/run/user/1000/gvfs/smb-share:server=192.168.1.2,share=data/yzy/dataset/' # 'Realtime_Multi-Person_Pose_Estimation-master/training/dataset/COCO/images/val2017/') parser.add_argument('--save_checkpoint_frequency', type=str, default=1000) parser.add_argument('--save_summary_frequency', type=str, default=100) parser.add_argument('--stage_num', type=str, default=6) parser.add_argument('--hm_channels', type=str, default=19) parser.add_argument('--paf_channels', type=str, default=38) parser.add_argument('--input-width', type=int, default=368) parser.add_argument('--input-height', type=int, default=368) parser.add_argument('--max_echos', type=str, default=5) parser.add_argument('--use_bn', type=bool, default=False) parser.add_argument('--loss_func', type=str, default='l2') args = parser.parse_args() if not args.continue_training: start_time = time.localtime(time.time()) checkpoint_path = args.checkpoint_path + ('%d-%d-%d-%d-%d-%d' % start_time[0:6]) os.mkdir(checkpoint_path) else: checkpoint_path = args.checkpoint_path logger = logging.getLogger('train') logger.setLevel(logging.DEBUG) fh = logging.FileHandler(checkpoint_path + '/train_log.log') fh.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) formatter = logging.Formatter( '[%(asctime)s] [%(name)s] [%(levelname)s] %(message)s') fh.setFormatter(formatter) ch.setFormatter(formatter) logger.addHandler(ch) logger.addHandler(fh) logger.info(args) logger.info('checkpoint_path: ' + checkpoint_path) # define input placeholder with tf.name_scope('inputs'): raw_img = tf.placeholder(tf.float32, shape=[args.batch_size, 368, 368, 3]) # mask_hm = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, 46, 46, args.hm_channels]) # mask_paf = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, 46, 46, args.paf_channels]) hm = tf.placeholder(dtype=tf.float32, shape=[args.batch_size, 46, 46, args.hm_channels]) paf = tf.placeholder( dtype=tf.float32, shape=[args.batch_size, 46, 46, args.paf_channels]) # defien data loader logger.info('initializing data loader...') set_network_input_wh(args.input_width, args.input_height) scale = 8 set_network_scale(scale) df = get_dataflow_batch(args.annot_path, True, args.batch_size, img_path=args.img_path) steps_per_echo = df.size() enqueuer = DataFlowToQueue(df, [raw_img, hm, paf], queue_size=100) q_inp, q_heat, q_vect = enqueuer.dequeue() q_inp_split, q_heat_split, q_vect_split = tf.split(q_inp, 1), tf.split( q_heat, 1), tf.split(q_vect, 1) img_normalized = q_inp_split[0] / 255 - 0.5 # [-0.5, 0.5] df_valid = get_dataflow_batch(args.annot_path, False, args.batch_size, img_path=args.img_path) df_valid.reset_state() validation_cache = [] logger.info('initializing model...') # define vgg19 # with slim.arg_scope(vgg.vgg_arg_scope()): # vgg_outputs, end_points = vgg.vgg_19(img_normalized) # with slim.arg_scope(mobilenet_v2.training_scope(is_training=False)): # logits, endpoints = mobilenet_v2.mobilenet(img_normalized) layers = {} name = "" with tf.contrib.slim.arg_scope(mobilenet_v2.training_scope()): logits, endpoints = mobilenet_v2.mobilenet(img_normalized) for k, tensor in sorted(list(endpoints.items()), key=lambda x: x[0]): layers['%s%s' % (name, k)] = tensor # print(k, tensor.shape) def upsample(input, target): return tf.image.resize_bilinear( input, tf.constant([target.shape[1].value, target.shape[2].value]), align_corners=False) mobilenet_feature = tf.concat([ layers['layer_7/output'], upsample(layers['layer_14/output'], layers['layer_7/output']) ], 3) # pdb.set_trace() # get net graph net = PafNet(inputs_x=mobilenet_feature, stage_num=args.stage_num, hm_channel_num=args.hm_channels, use_bn=args.use_bn) hm_pre, paf_pre, added_layers_out = net.gen_net() # two kinds of loss losses = [] with tf.name_scope('loss'): for idx, (l1, l2), in enumerate(zip(hm_pre, paf_pre)): if args.loss_func == 'square': hm_loss = tf.reduce_sum( tf.square(tf.concat(l1, axis=0) - q_heat_split[0])) paf_loss = tf.reduce_sum( tf.square(tf.concat(l2, axis=0) - q_vect_split[0])) losses.append(tf.reduce_sum([hm_loss, paf_loss])) logger.info('use square loss') else: hm_loss = tf.nn.l2_loss( tf.concat(l1, axis=0) - q_heat_split[0]) paf_loss = tf.nn.l2_loss( tf.concat(l2, axis=0) - q_vect_split[0]) losses.append(tf.reduce_mean([hm_loss, paf_loss])) logger.info('use l2 loss') loss = tf.reduce_sum(losses) / args.batch_size global_step = tf.Variable(0, name='global_step', trainable=False) learning_rate = tf.train.exponential_decay(1e-4, global_step, steps_per_echo, 0.5, staircase=True) trainable_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='openpose_layers') if args.train_vgg: trainable_var_list = trainable_var_list + tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='MobilenetV2') with tf.name_scope('train'): train = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=1e-8).minimize( loss=loss, global_step=global_step, var_list=trainable_var_list) logger.info('initialize saver...') restorer = tf.train.Saver(tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='MobilenetV2'), name='mobilenet_restorer') saver = tf.train.Saver(trainable_var_list) logger.info('initialize tensorboard') tf.summary.scalar("lr", learning_rate) tf.summary.scalar("loss2", loss) tf.summary.histogram('img_normalized', img_normalized) tf.summary.histogram('mobilenet_outputs', logits) tf.summary.histogram('added_layers_out', added_layers_out) tf.summary.image('mobilenet_out', tf.transpose(logits[0:1, :, :, :], perm=[3, 1, 2, 0]), max_outputs=512) tf.summary.image('added_layers_out', tf.transpose(added_layers_out[0:1, :, :, :], perm=[3, 1, 2, 0]), max_outputs=128) tf.summary.image('paf_gt', tf.transpose(q_vect_split[0][0:1, :, :, :], perm=[3, 1, 2, 0]), max_outputs=38) tf.summary.image('hm_gt', tf.transpose(q_heat_split[0][0:1, :, :, :], perm=[3, 1, 2, 0]), max_outputs=19) for i in range(args.stage_num): tf.summary.image('hm_pre_stage_%d' % i, tf.transpose(hm_pre[i][0:1, :, :, :], perm=[3, 1, 2, 0]), max_outputs=19) tf.summary.image('paf_pre_stage_%d' % i, tf.transpose(paf_pre[i][0:1, :, :, :], perm=[3, 1, 2, 0]), max_outputs=38) tf.summary.image('input', img_normalized, max_outputs=4) logger.info('initialize session...') merged = tf.summary.merge_all() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: writer = tf.summary.FileWriter(checkpoint_path, sess.graph) sess.run(tf.group(tf.global_variables_initializer())) if args.backbone_net_ckpt_path is not None: logger.info('restoring mobilenet weights from %s' % args.backbone_net_ckpt_path) restorer.restore(sess, args.backbone_net_ckpt_path) if args.continue_training: saver.restore( sess, tf.train.latest_checkpoint(checkpoint_dir=checkpoint_path)) logger.info('restoring from checkpoint...') logger.info('start training...') coord = tf.train.Coordinator() enqueuer.set_coordinator(coord) enqueuer.start() while True: best_checkpoint = float('inf') for _ in tqdm(range(steps_per_echo), ): total_loss, _, gs_num = sess.run([loss, train, global_step]) echo = gs_num / steps_per_echo if gs_num % args.save_summary_frequency == 0: total_loss, gs_num, summary, lr = sess.run( [loss, global_step, merged, learning_rate]) writer.add_summary(summary, gs_num) logger.info('echos=%f, setp=%d, total_loss=%f, lr=%f' % (echo, gs_num, total_loss, lr)) if gs_num % args.save_checkpoint_frequency == 0: valid_loss = 0 if len(validation_cache) == 0: for images_test, heatmaps, vectmaps in tqdm( df_valid.get_data()): validation_cache.append( (images_test, heatmaps, vectmaps)) df_valid.reset_state() del df_valid df_valid = None for images_test, heatmaps, vectmaps in validation_cache: valid_loss += sess.run(loss, feed_dict={ q_inp: images_test, q_vect: vectmaps, q_heat: heatmaps }) if valid_loss / len(validation_cache) <= best_checkpoint: best_checkpoint = valid_loss / len(validation_cache) saver.save(sess, save_path=checkpoint_path + '/' + 'model', global_step=gs_num) logger.info( 'best_checkpoint = %f, saving checkpoint to ' % best_checkpoint + checkpoint_path + '/' + 'model-%d' % gs_num) else: logger.info('loss = %f drop' % valid_loss / len(validation_cache)) if echo >= args.max_echos: sess.close() return 0
try: self.close_op.run() except Exception: pass logging.info("{} Exited.".format(self.name)) def dequeue(self): return self.queue.dequeue() if __name__ == '__main__': import os os.environ['CUDA_VISIBLE_DEVICES'] = '' from pose_augment import set_network_input_wh set_network_input_wh(368, 368) # df = get_dataflow('/data/public/rw/coco-pose-estimation-lmdb/', False) df = get_dataflow('/data/public/rw/coco-pose-estimation-lmdb/', True) # input_node = tf.placeholder(tf.float32, shape=(None, 368, 368, 3), name='image') with tf.Session() as sess: # net = CmuNetwork({'image': input_node}, trainable=False) # net.load('./models/numpy/openpose_coco.npy', sess) df.reset_state() t1 = time.time() for idx, dp in enumerate(df.get_data()): if idx == 0: for d in dp: logging.info('%d dp shape={}'.format(d.shape))
rbox_h = rbox[3] - rbox[1] for id in right_hand_parts: right_hand_parts[id].x = rbox[0] + right_hand_parts[id].x * rbox_w right_hand_parts[id].y = rbox[1] + right_hand_parts[id].y * rbox_h return (left_hand_parts, right_hand_parts) from pose_augment import set_network_input_wh, set_network_scale model_path = './2018-2-23/mobilenet_thin_batch_8_lr_0.01_gpus_1_184x184_/' # model_path = 'D:/wzchen/PythonProj/keras-openpose/checkpoints' net_w = net_h = 184 scale = 8 set_network_input_wh(net_w, net_h) set_network_scale(scale) if __name__ == '__main__': from networks import get_network from pose_dataset import _get_dataflow_onlyread, OpenOoseHand input_node = tf.placeholder(tf.float32, shape=(1, net_h, net_w, 3), name='image') net, pretrain_path, last_layer = get_network('mobilenet_thin', input_node) net_var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) handpredictor = HandPose(input_node, net.loss_last(), scale, net_var_list) print('load from: ' + tf.train.latest_checkpoint(model_path)) handpredictor.load_weights(tf.train.latest_checkpoint(model_path))
import argparse from tensorpack.dataflow.remote import send_dataflow_zmq from pose_dataset import get_dataflow_batch from pose_augment import set_network_input_wh, set_network_scale if __name__ == '__main__': """ OpenPose Data Preparation might be a bottleneck for training. You can run multiple workers to generate input batches in multi-nodes to make training process faster. """ parser = argparse.ArgumentParser(description='Worker for preparing input batches.') parser.add_argument('--datapath', type=str, default='/coco/annotations/') parser.add_argument('--imgpath', type=str, default='/coco/') parser.add_argument('--batchsize', type=int, default=64) parser.add_argument('--train', type=bool, default=True) parser.add_argument('--master', type=str, default='tcp://csi-cluster-gpu20.dakao.io:1027') parser.add_argument('--input-width', type=int, default=368) parser.add_argument('--input-height', type=int, default=368) parser.add_argument('--scale-factor', type=int, default=2) args = parser.parse_args() set_network_input_wh(args.input_width, args.input_height) set_network_scale(args.scale_factor) df = get_dataflow_batch(args.datapath, args.train, args.batchsize, args.imgpath) send_dataflow_zmq(df, args.master, hwm=10)
finally: try: self.close_op.run() except Exception: pass logger.info("{} Exited.".format(self.name)) def dequeue(self): return self.queue.dequeue() if __name__ == '__main__': os.environ['CUDA_VISIBLE_DEVICES'] = '' from pose_augment import set_network_input_wh set_network_input_wh(256, 144) df = get_dataflow('labels/', True, img_path='data/') with tf.Session() as sess: df.reset_state() t1 = time.time() for idx, dp in enumerate(df.get_data()): if idx == 0: for d in dp: logger.info('%d dp shape={}'.format(d.shape)) print(time.time() - t1) t1 = time.time() CocoPose.display_image(dp[0], dp[1].astype(np.float32), dp[2].astype(np.float32)) print(dp[1].shape, dp[2].shape)
try: self.close_op.run() except Exception: pass logger.info("{} Exited.".format(self.name)) def dequeue(self): return self.queue.dequeue() if __name__ == '__main__': os.environ['CUDA_VISIBLE_DEVICES'] = '' from pose_augment import set_network_input_wh, set_network_scale # set_network_input_wh(368, 368) set_network_input_wh(432, 368) set_network_scale(8) # df = get_dataflow('/data/public/rw/coco/annotations', True, '/data/public/rw/coco/') df = _get_dataflow_onlyread('dataset/annotations', True, 'dataset/') # df = get_dataflow('/root/coco/annotations', False, img_path='http://gpu-twg.kakaocdn.net/braincloud/COCO/') from tensorpack.dataflow.common import TestDataSpeed TestDataSpeed(df).start() sys.exit(0) with tf.Session() as sess: df.reset_state() t1 = time.time() for idx, dp in enumerate(df.get_data()): if idx == 0:
except Exception as e: logging.exception("Exception in {}:{}".format(self.name, str(e))) finally: try: self.close_op.run() except Exception: pass logging.info("{} Exited.".format(self.name)) def dequeue(self): return self.queue.dequeue() if __name__ == '__main__': from pose_augment import set_network_input_wh set_network_input_wh(320, 240) df = get_dataflow('/data/public/rw/coco-pose-estimation-lmdb/', False) # df = get_dataflow('/data/public/rw/coco-pose-estimation-lmdb/', True) # input_node = tf.placeholder(tf.float32, shape=(None, 368, 368, 3), name='image') with tf.Session() as sess: # net = CmuNetwork({'image': input_node}, trainable=False) # net.load('./models/numpy/openpose_coco.npy', sess) df.reset_state() t1 = time.time() for idx, dp in enumerate(df.get_data()): if idx == 0: for d in dp: logging.info('%d dp shape={}'.format(d.shape)) if idx % 100 == 0: