def make_data(self): ''' Load PoseTrack data ''' from AllJoints_PoseTrack import PoseTrackJoints from AllJoints_COCO import PoseTrackJoints_COCO from dataset import Preprocessing d = PoseTrackJoints() train_data, _ = d.load_data(cfg.min_kps) print(len(train_data)) #''' d2 = PoseTrackJoints_COCO() train_data_coco, _ = d2.load_data(cfg.min_kps) print(len(train_data_coco)) train_data.extend(train_data_coco) print(len(train_data)) #''' from random import shuffle shuffle(train_data) from tfflat.data_provider import DataFromList, MultiProcessMapDataZMQ, BatchData, MapData dp = DataFromList(train_data) if cfg.dpflow_enable: dp = MultiProcessMapDataZMQ(dp, cfg.nr_dpflows, Preprocessing) else: dp = MapData(dp, Preprocessing) dp = BatchData(dp, cfg.batch_size // cfg.nr_aug) dp.reset_state() dataiter = dp.get_data() return dataiter
def make_data(self): from COCOAllJoints import COCOJoints from dataset import Preprocessing d = COCOJoints() train_data, _ = d.load_data(cfg.min_kps) from tfflat.data_provider import DataFromList, MultiProcessMapDataZMQ, BatchData, MapData dp = DataFromList(train_data) if cfg.dpflow_enable: dp = MultiProcessMapDataZMQ(dp, cfg.nr_dpflows, Preprocessing) else: dp = MapData(dp, Preprocessing) dp = BatchData(dp, cfg.batch_size // cfg.nr_aug) dp.reset_state() dataiter = dp.get_data() return dataiter
def _make_data(self): database = cfg.database train_data = database.load_train_data() data_load_thread = DataFromList(train_data) if cfg.multi_thread_enable: data_load_thread = MultiProcessMapDataZMQ(data_load_thread, cfg.num_thread, generate_batch, strict=True) else: data_load_thread = MapData(data_load_thread, generate_batch) data_load_thread = BatchData(data_load_thread, cfg.batch_size) data_load_thread.reset_state() dataiter = data_load_thread.get_data() return dataiter, math.ceil( len(train_data) / cfg.batch_size / cfg.num_gpus)
def _make_data(self): from dataset01 import Dataset from gen_batch import generate_batch d = Dataset() train_data = d.load_train_data() from tfflat.data_provider import DataFromList, MultiProcessMapDataZMQ, BatchData, MapData data_load_thread = DataFromList(train_data) if self.cfg.multi_thread_enable: data_load_thread = MultiProcessMapDataZMQ(data_load_thread, self.cfg.num_thread, generate_batch, strict=True) else: data_load_thread = MapData(data_load_thread, generate_batch) data_load_thread = BatchData(data_load_thread, self.cfg.batch_size) data_load_thread.reset_state() dataiter = data_load_thread.get_data() return dataiter, math.ceil( len(train_data) / self.cfg.batch_size / self.cfg.num_gpus)
def make_data(self): from AllJoints import AllJoints from dataset import Preprocessing img_dir = '/home/lanhao/FashionAI/train/' ann_path = '/home/lanhao/tf-cpn-each/blouse/train_blouse.csv' d = AllJoints(img_dir, ann_path) train_data, _ = d.load_data(cfg.min_kps) from tfflat.data_provider import DataFromList, MultiProcessMapDataZMQ, BatchData, MapData dp = DataFromList(train_data) if cfg.dpflow_enable: dp = MultiProcessMapDataZMQ(dp, cfg.nr_dpflows, Preprocessing) else: dp = MapData(dp, Preprocessing) dp = BatchData(dp, cfg.batch_size) dp.reset_state() dataiter = dp.get_data() #test = dataiter.next() #print test[0].shape return dataiter
def make_data(self): from COCOAllJoints import COCOJoints from dataset import Preprocessing d = COCOJoints() train_data, _ = d.load_data(cfg.min_kps) def dataiter(train_data): ind = 0 while True: batch_data = [] for i in range(cfg.batch_size // cfg.nr_aug): ind += 1 if ind > len(train_data): ind %= len(train_data) data = Preprocessing(train_data[i]) batch_data.append(data) ret = [] # aggregate for i in range(len(batch_data[0])): ret.append( np.asarray([ batch_data[j][i] for j in range(len(batch_data)) ])) yield ret if not cfg.dpflow_enable: return dataiter(train_data) else: from tfflat.data_provider import DataFromList, MultiProcessMapDataZMQ, BatchData dp = MultiProcessMapDataZMQ(DataFromList(train_data), 10, Preprocessing) dp = BatchData(dp, cfg.batch_size // cfg.nr_aug) dp.reset_state() dataiter = dp.get_data() return dataiter
def train(self): from gen_batch import generate_batch from tfflat.data_provider import DataFromList, MultiProcessMapDataZMQ, BatchData, MapData from test import test start_val_itr = self.cfg.cnt_val_itr if self.cfg.cnt_val_itr >= 0 else 0 for out_itr in range(start_val_itr, self.d.num_val_split): # reset input and output lists self._input_list = [] self._output_list = [] self._outputs = [] self.graph_ops = None # reset current epoch self.cur_epoch = 0 #reset summary dict self.summary_dict = {} # timer self.tot_timer = Timer() self.gpu_timer = Timer() self.read_timer = Timer() run_pref = "run_{}".format(out_itr + 1) lr_eval = self.cfg.lr save_summary_steps = self.cfg.save_summary_steps summary_dir = os.path.join(self.cfg.summary_dir, run_pref) # train_data, val_data = self.d.load_train_data(out_itr) train_data, val_data = self.d.load_train_data() with tf.Session(config=self.tfconfig) as sess: lr = tf.Variable(self.cfg.lr, trainable=False) self._optimizer = get_optimizer(lr, self.cfg.optimizer) if self.cfg.equal_random_seed: # set random seed for the python pseudo random number generator in order to obtain comparable results tf.set_random_seed(2223) random.seed(2223) # build_graph self.build_graph() data_load_thread = DataFromList(train_data) if self.cfg.multi_thread_enable: data_thread = MultiProcessMapDataZMQ(data_load_thread, self.cfg.num_thread, generate_batch, strict=True) else: data_thread = MapData(data_load_thread, generate_batch) data_load_thread = BatchData(data_thread, self.cfg.batch_size) if self.cfg.equal_random_seed: data_load_thread.reset_state() dataiter = data_load_thread.get_data() itr_per_epoch = math.ceil( len(train_data) / self.cfg.batch_size / self.cfg.num_gpus) # summaries # merge all summaries, run this operation later in order to retain the added summaries merged_sums = tf.summary.merge_all() writer = tf.summary.FileWriter(summary_dir, sess.graph) # saver self.logger.info('Initialize saver ...') model_dump_dir = os.path.join(self.cfg.model_dump_dir, run_pref) train_saver = Saver(sess, tf.global_variables(), model_dump_dir) best_model_dir = os.path.join(model_dump_dir, "best_model") val_dir = os.path.join(self.cfg.val_dir, run_pref) if not os.path.isdir(best_model_dir): os.makedirs(best_model_dir) if not os.path.isdir(val_dir): os.makedirs(val_dir) best_saver = Saver(sess, tf.global_variables(), best_model_dir, max_to_keep=1) # initialize weights self.logger.info('Initialize all variables ...') sess.run( tf.variables_initializer(tf.global_variables(), name='init')) self.load_weights('last_epoch' if self.cfg.continue_train else self.cfg.init_model, model_dump_dir, sess=sess) # self.cfg.continue_train = False self.logger.info( 'Start training; validation iteration #{}...'.format( out_itr)) start_itr = self.cur_epoch * itr_per_epoch + 1 end_itr = itr_per_epoch * self.cfg.end_epoch + 1 best_loss = self.cfg.min_save_loss for itr in range(start_itr, end_itr): self.tot_timer.tic() self.cur_epoch = itr // itr_per_epoch setproctitle.setproctitle( 'val_it {};train epoch{}:'.format( out_itr, self.cur_epoch)) # apply current learning policy cur_lr = self.cfg.get_lr(self.cur_epoch) if not approx_equal(cur_lr, lr_eval): print(lr_eval, cur_lr) sess.run(tf.assign(lr, cur_lr)) # input data self.read_timer.tic() feed_dict = self.next_feed(dataiter) self.read_timer.toc() # train one step self.gpu_timer.tic() _, lr_eval, *summary_res, tb_summaries = sess.run( [ self.graph_ops[0], lr, *self.summary_dict.values(), merged_sums ], feed_dict=feed_dict) self.gpu_timer.toc() # write summary values to event file at disk if itr % save_summary_steps == 0: writer.add_summary(tb_summaries, itr) itr_summary = dict() for i, k in enumerate(self.summary_dict.keys()): itr_summary[k] = summary_res[i] screen = [ 'Validation itr %d' % (out_itr), 'Epoch %d itr %d/%d:' % (self.cur_epoch, itr, itr_per_epoch), 'lr: %g' % (lr_eval), 'speed: %.2f(%.2fs r%.2f)s/itr' % (self.tot_timer.average_time, self.gpu_timer.average_time, self.read_timer.average_time), '%.2fh/epoch' % (self.tot_timer.average_time / 3600. * itr_per_epoch), ' '.join( map(lambda x: '%s: %.4f' % (x[0], x[1]), itr_summary.items())), ] #TODO(display stall?) if itr % self.cfg.display == 0: self.logger.info(' '.join(screen)) # save best model loss = itr_summary['loss'] # print('current loss is:', loss, 'best loss is:', best_loss) if loss < best_loss: best_loss = loss print( "Saving model because best loss was undergone; Value is {}." .format(loss)) best_saver.save_model(self.cfg.end_epoch + 1) if itr % itr_per_epoch == 0: train_saver.save_model(self.cur_epoch) self.tot_timer.toc() #clean up sess.close() tf.reset_default_graph() if self.cfg.multi_thread_enable: data_thread.__del__() print("Finish training for val run #{}; Apply validation".format( out_itr + 1)) if self.cfg.additional_name == "CrowdPose": print( "Training on CrowdPose, no additional validation required!" ) else: self.cross_val(val_data, self.cfg.end_epoch + 1, val_dir, best_model_dir)
def _make_data(self): from dataset import Dataset from gen_batch import generate_batch d = Dataset() train_data = d.load_train_data() ## modify train_data to the result of the decoupled initial model with open(d.test_on_trainset_path, 'r') as f: test_on_trainset = json.load(f) for data in test_on_trainset: if isinstance(data['image_id'], str): data['image_id'] = int(data['image_id'].split('.')[0]) # sort list by img_id train_data = sorted(train_data, key=lambda k: k['image_id']) test_on_trainset = sorted(test_on_trainset, key=lambda k: k['image_id']) # cluster train_data and test_on_trainset by img_id cur_img_id = train_data[0]['image_id'] data_gt = [] data_gt_per_img = [] for i in range(len(train_data)): if train_data[i]['image_id'] == cur_img_id: data_gt_per_img.append(train_data[i]) else: data_gt.append(data_gt_per_img) cur_img_id = train_data[i]['image_id'] data_gt_per_img = [train_data[i]] if len(data_gt_per_img) > 0: data_gt.append(data_gt_per_img) cur_img_id = test_on_trainset[0]['image_id'] data_out = [] data_out_per_img = [] for i in range(len(test_on_trainset)): if test_on_trainset[i]['image_id'] == cur_img_id: data_out_per_img.append(test_on_trainset[i]) else: data_out.append(data_out_per_img) cur_img_id = test_on_trainset[i]['image_id'] data_out_per_img = [test_on_trainset[i]] if len(data_out_per_img) > 0: data_out.append(data_out_per_img) # remove false positive images i = 0 j = 0 aligned_data_out = [] while True: gt_img_id = data_gt[i][0]['image_id'] out_img_id = data_out[j][0]['image_id'] if gt_img_id > out_img_id: j = j + 1 elif gt_img_id < out_img_id: i = i + 1 else: aligned_data_out.append(data_out[j]) i = i + 1 j = j + 1 if j == len(data_out) or i == len(data_gt): break data_out = aligned_data_out # add false negative images j = 0 aligned_data_out = [] for i in range(len(data_gt)): gt_img_id = data_gt[i][0]['image_id'] out_img_id = data_out[j][0]['image_id'] if gt_img_id == out_img_id: aligned_data_out.append(data_out[j]) j = j + 1 else: aligned_data_out.append([]) if j == len(data_out): break data_out = aligned_data_out # they should contain annotations from all the images assert len(data_gt) == len(data_out) # for each img for i in range(len(data_gt)): bbox_out_per_img = np.zeros((len(data_out[i]), 4)) joint_out_per_img = np.zeros( (len(data_out[i]), self.cfg.num_kps * 3)) # for each data_out in an img for j in range(len(data_out[i])): joint = data_out[i][j]['keypoints'] if 'bbox' in data_out[i][j]: bbox = data_out[i][j]['bbox'] #x, y, width, height else: coords = np.array(joint).reshape(-1, 3) xmin = np.min(coords[:, 0]) xmax = np.max(coords[:, 0]) width = xmax - xmin if xmax > xmin else 20 center = (xmin + xmax) / 2. xmin = center - width / 2. * 1.1 xmax = center + width / 2. * 1.1 ymin = np.min(coords[:, 1]) ymax = np.max(coords[:, 1]) height = ymax - ymin if ymax > ymin else 20 center = (ymin + ymax) / 2. ymin = center - height / 2. * 1.1 ymax = center + height / 2. * 1.1 bbox = [xmin, xmax, ymin, ymax] bbox_out_per_img[j, :] = bbox joint_out_per_img[j, :] = joint # for each gt in an img for j in range(len(data_gt[i])): bbox_gt = np.array(data_gt[i][j]['bbox']) #x, y, width, height joint_gt = np.array(data_gt[i][j]['joints']) # IoU calculate with detection outputs of other methods iou = self.compute_iou(bbox_gt.reshape(1, 4), bbox_out_per_img) if len(iou) == 0: continue out_idx = np.argmax(iou) data_gt[i][j]['estimated_joints'] = [ joint_out_per_img[out_idx, :] ] # for swap num_overlap = 0 near_joints = [] for k in range(len(data_gt[i])): bbox_gt_k = np.array(data_gt[i][k]['bbox']) iou_with_gt_k = self.compute_iou(bbox_gt.reshape(1, 4), bbox_gt_k.reshape(1, 4)) if k == j or iou_with_gt_k < 0.1: continue num_overlap += 1 near_joints.append( np.array(data_gt[i][k]['joints']).reshape( self.cfg.num_kps, 3)) data_gt[i][j]['overlap'] = num_overlap if num_overlap > 0: data_gt[i][j]['near_joints'] = near_joints else: data_gt[i][j]['near_joints'] = [ np.zeros([self.cfg.num_kps, 3]) ] # flatten data_gt train_data = [y for x in data_gt for y in x] from tfflat.data_provider import DataFromList, MultiProcessMapDataZMQ, BatchData, MapData data_load_thread = DataFromList(train_data) if self.cfg.multi_thread_enable: data_load_thread = MultiProcessMapDataZMQ(data_load_thread, self.cfg.num_thread, generate_batch, strict=True, add_paf=self.cfg.add_paf) else: data_load_thread = MapData(data_load_thread, generate_batch, add_paf=self.cfg.add_paf) data_load_thread = BatchData(data_load_thread, self.cfg.batch_size) data_load_thread.reset_state() dataiter = data_load_thread.get_data() return dataiter, math.ceil( len(train_data) / self.cfg.batch_size / self.cfg.num_gpus)