def processor(self, sess): sess.run(tf.global_variables_initializer()) self.net.load_weigths(self.arg.weights, sess, self.saver) timer = Timer() vispy_init() positive_cnt = 0 negative_cnt = 0 data_use_for = 'train' if data_use_for == 'valid': length = self.dataset.validing_rois_length elif data_use_for == 'train': length = self.dataset.training_rois_length else: assert False, 'There is something wrong in dataset description' for idx in range(length): blobs = self.dataset.get_minibatch(idx, data_use_for) feed_dict = { self.net.lidar3d_data: blobs['lidar3d_data'], self.net.lidar_bv_data: blobs['lidar_bv_data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, self.net.gt_boxes_bv: blobs['gt_boxes_bv'], self.net.gt_boxes_3d: blobs['gt_boxes_3d'], self.net.gt_boxes_corners: blobs['gt_boxes_corners'], self.net.calib: blobs['calib'] } timer.tic() result_, label_ = sess.run([self.result, self.label], feed_dict=feed_dict) timer.toc() print('Begin to save data_cnt: ', idx) pos_p = os.path.join(self.arg.box_savepath, data_use_for, 'POSITIVE') neg_p = os.path.join(self.arg.box_savepath, data_use_for, 'NEGATIVE') if not os.path.exists(pos_p): os.makedirs(pos_p) if not os.path.exists(neg_p): os.makedirs(neg_p) for box_cnt in range(result_.shape[0]): box = result_[box_cnt].astype(np.int8) if label_[box_cnt]: filename = os.path.join( pos_p, str(positive_cnt).zfill(6) + '.npy') positive_cnt += 1 else: filename = os.path.join( neg_p, str(negative_cnt).zfill(6) + '.npy') negative_cnt += 1 np.save(filename, box)
def train_model(self): """Network training loop.""" timer = Timer() model_paths = [] while self.solver.iter < self.max_iters: # Make one SGD update timer.tic() self.solver.step(1) timer.toc() if self.solver.iter % (10 * self.solver_param.display) == 0: print 'speed: {:.3f}s / iter'.format(timer.average_time) if self.solver.iter % self.snapshot_iters == 0: model_paths.append(self.snapshot())
def training(self, sess): sess.run(tf.global_variables_initializer()) reader = pywrap_tensorflow.NewCheckpointReader(self.weights) var_to_shape_map = reader.get_variable_to_shape_map() glb_var = tf.global_variables() with tf.variable_scope('', reuse=tf.AUTO_REUSE) as scope: for key in var_to_shape_map: try: var = tf.get_variable(key, trainable=False) sess.run(var.assign(reader.get_tensor(key))) print " Assign pretrain model: " + key except ValueError: print " Ignore variable:" + key cubic_cls_score = tf.nn.softmax(self.result) timer = Timer() vispy_init() res = [] loop_parameters = np.arange(0, 360, 2) for data_idx in loop_parameters: # DO NOT EDIT the "training_series",for the latter shuffle run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() feed_dict = self.cubic_rpn_grid( 30, box_idx=0, angel=data_idx, scalar=1.0, #float(data_idx)/180.*1.0, translation=[0, 0, 0]) timer.tic() cubic_cls_score_ = sess.run(cubic_cls_score, feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) timer.toc() cubic_cls_score_ = np.array(cubic_cls_score_) cubic_result = cubic_cls_score_.argmax(axis=1) res.append(cubic_cls_score_[0, 1]) # print 'rotation: {:3d} score: {:>8,.7f} {:>8,.7f} result: {}'.format(data_idx,cubic_cls_score_[0,0],cubic_cls_score_[0,1],cubic_result[0]) plt.plot(loop_parameters, res) plt.grid(True, color='black', linestyle='--', linewidth='1') plt.title('Rubust Test') plt.xlabel('rotated angle metric:degree') plt.ylabel('score') plt.legend(['positive']) plt.savefig('Rotation.png') plt.show()
def get_minibatch(self, _idx=0): """Given a roidb, construct a minibatch sampled from it.""" index_dataset = self.test_set fname = index_dataset[_idx] timer = Timer() timer.tic() lidar_data = pcd2np.from_path(fname) angel = 0 # (np_random.rand() - 0.500) * np.pi * 0.9 points_rot = self.rotation(lidar_data.pc_data, angel) timer.toc() time1 = timer.average_time timer.tic() grid_voxel = voxel_grid(points_rot, cfg, thread_sum=cfg.CPU_CNT) timer.toc() time2 = timer.average_time timer.tic() apollo_8feature = np.load( path_add(self.data_path, fname.split('/')[-3], 'feature_pcd_name', fname.split('/')[-1][0:-4] + '.npy')).reshape( -1, cfg.CUBIC_SIZE[0], cfg.CUBIC_SIZE[1], 8) apollo_8feature_rot = self.apollo_feature_rotation(apollo_8feature, degree=angel * 57.29578) timer.toc() time3 = timer.average_time blob = dict({ 'serial_num': fname.split('/')[-1], 'lidar3d_data': lidar_data.pc_data, 'grid_stack': grid_voxel['feature_buffer'], 'coord_stack': grid_voxel['coordinate_buffer'], 'ptsnum_stack': grid_voxel['number_buffer'], 'apollo_8feature': apollo_8feature_rot, 'voxel_gen_time': (time1, time2, time3) }) return blob
def train(self): """ now tf_records are no used for the full image. :return: """ train_holder, seg_holder, dst_holder = self.provider.get_train_holder() if self.model_name == 'cnn_v2': model = self.model_class(self.is_training) model.build_model(train_holder, seg_holder) total_loss = model.total_loss total_dice_loss = model.total_dice_loss total_weight_loss = model.total_weight_loss #main_dice_loss = model.main_dice_loss #dice = model.dice_coefficient loss_op = model.entropy_loss train_op = self._get_optimizer(total_loss) else: model = self.model_class(self.is_training) inference_op = model.inference_op(train_holder) if cfg.use_dst_weight == True: loss_op = model.loss_op(inference_op, seg_holder, dst_holder) else: loss_op = model.loss_op(inference_op, seg_holder) #loss_op = model.loss_op(inference_op, seg_holder) total_dice_loss = model.total_dice_loss total_weight_loss = model.total_weight_loss main_weight_loss = model.main_weight_loss main_dice_loss = model.main_dice_loss train_op = self._get_optimizer(loss_op) merged = tf.summary.merge_all() self._count_trainables() log_output_path = os.path.join(self.output_path, "log") if not os.path.exists(log_output_path): os.makedirs(log_output_path) model_output_path = os.path.join(self.output_path, "model") if not os.path.exists(model_output_path): os.makedirs(model_output_path) loss_txt_path = os.path.join(self.output_path, "loss") if not os.path.exists(loss_txt_path): os.makedirs(loss_txt_path) train_writer = tf.summary.FileWriter( os.path.join(log_output_path, "train")) test_writer = tf.summary.FileWriter( os.path.join(log_output_path, "val")) line_buffer = 1 config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() train_timer = Timer() load_timer = Timer() # if model checkpoint exist, then load last checkpoint #self._load_model(saver, sess, model_output_path) with open(file=loss_txt_path + '/loss_' + cfg.name + '.txt', mode='w', buffering=line_buffer) as loss_log: for step in range(self.train_step): load_timer.tic() image, label, weight = self.provider.get_train_value( with_weight=cfg.use_weight) image_val, label_val, weight = self.provider.get_val_value( with_weight=cfg.use_weight) load_timer.toc() train_timer.tic() train_merge, train_loss, t_dice_loss, t_weight_loss, m_dice_loss, m_weight_loss, _ = sess.run( [ merged, loss_op, total_dice_loss, total_weight_loss, main_dice_loss, main_weight_loss, train_op ], feed_dict={ train_holder: image, seg_holder: label, dst_holder: weight }) valid_merge, val_loss = sess.run( [merged, loss_op], feed_dict={ train_holder: image_val, seg_holder: label_val, dst_holder: weight, self.is_training: False }) train_timer.toc() output_format = '[Epoch]%d, Speed: %.3fs/iter,Load: %.3fs/iter, Remain: %s' \ ' train_loss: %.8f, valid_loss: %.8f\n' \ '[Loss]dice_loss: %.8f, weight_loss: %.8f, main_dice_loss: %.8f, main_weight_loss: %.8f\n' \ % (step, train_timer.average_time, load_timer.average_time, train_timer.remain(step,self.train_step),train_loss, val_loss, t_dice_loss, t_weight_loss, m_dice_loss, m_weight_loss) print(output_format) train_writer.add_summary(train_merge, step) test_writer.add_summary(valid_merge, step) if step % 10 == 0: loss_log.write( 'train loss: %.5f, valid_loss: %.5f, glabl step: %d' % (train_loss, val_loss, step) + '\n') if np.mod(step + 1, self.save_interval) == 0: saver.save( sess, os.path.join(self.output_path, "model/model_saved")) train_writer.close() test_writer.close()
def train_per_epoch(self, epoch): conf_loss = 0 _t = Timer() conf_loss_v = 0 epoch_size = int( len(self.train_loader) ) train_end = int( epoch_size); batch_iterator = iter(self.train_loader) # print('epoch_size ', epoch_size, " train_end ", train_end) for iteration in range(epoch_size): images, targets,targets_src = next(batch_iterator) # print('images ', images.shape) if len (images) == 1: continue # print('imgs from data_load shape ', images.shape) targets = np.array(targets) # print('iteration ', iteration) if iteration == (train_end - 2): if self.use_gpu: images = Variable(images.cuda()) self.visualize_epoch(images, epoch) if iteration <= train_end: if self.use_gpu: images = Variable(images.cuda()) # targets = [Variable(anno.cuda(), volatile=True) for anno in targets] else: images = Variable(images) self.model.train() #train: _t.tic() # print('---img shape 2 ', images.shape) out = self.model(images, phase='train') self.optimizer.zero_grad() # print('tr_out ', out) # print('targets ', targets.shape) loss_c = self.criterion(out, targets) # some bugs in coco train2017. maybe the annonation bug. if loss_c.data[0] == float("Inf"): continue if math.isnan(loss_c.data[0]): continue # if loss_c.data[0] > 10000: # continue loss_c.backward() self.optimizer.step() time = _t.toc() conf_loss += loss_c.data[0] # log per iter log = '\r==>Train_class{}: || {iters:d}/{epoch_size:d} in {time:.3f}s [{prograss}] || cls_loss: {cls_loss:.4f}\r'.format(self.train_class, prograss='#'*int(round(10*iteration/epoch_size)) + '-'*int(round(10*(1-iteration/epoch_size))), iters=iteration, epoch_size=epoch_size, time=time, cls_loss=loss_c.data[0]) sys.stdout.write(log) sys.stdout.flush() if iteration == (train_end-2): # log per epoch sys.stdout.write('\r') sys.stdout.flush() lr = self.optimizer.param_groups[0]['lr'] log = '\r==>Train: || Total_time: {time:.3f}s || conf_loss: {conf_loss:.4f} || lr: {lr:.6f}\n'.format(lr=lr, time=_t.total_time, conf_loss=conf_loss/epoch_size) sys.stdout.write(log) sys.stdout.flush() # print(log) # log for tensorboard title = str(self.train_class) + '/conf_loss' # title = str(self.train_class)+'/conf_loss' self.writer.add_scalar(title, conf_loss/epoch_size, epoch) title = str(self.train_class) + '/lr' self.writer.add_scalar(title, lr, epoch) conf_loss = 0 val_epoch_size = int( len(self.val_loader) ) val_batch_iterator = iter(self.val_loader) pre_for_f1 = [] t_for_f1 = [] for iteration in range(val_epoch_size): images, targets, tar_srcs = next(val_batch_iterator) if iteration < (val_epoch_size - 1): # self.visualize_epoch(model, images[0], targets[0], self.priorbox, writer, epoch, use_gpu) #eval: # print('tar_srcs ', tar_srcs) targets = np.array(targets) if self.use_gpu: images = Variable(images.cuda()) else: images = Variable(images) self.model.eval() out = self.model(images, phase='eval') # loss loss_c = self.criterion(out, targets) if loss_c.data[0] == float("Inf"): continue if math.isnan(loss_c.data[0]): continue # if loss_c.data[0] > 100000000: # continue print('out ', out) for i_ys, ys in enumerate( out ): tail = '' mid = '' t_val = 0 targets_t = [int (tthis) for tthis in tar_srcs[i_ys].split(' ')] if self.train_class in targets_t: tail = '-----------' t_val = 1 t_for_f1.append(t_val) if ys[1] >= 0.5: mid = '||||||||' pre_for_f1.append(1) print('ci ', self.train_class, ' i_ys ', i_ys, ' pre ' , ys[1], mid, ' t ', tar_srcs[i_ys], tail) else: pre_for_f1.append(0) print('ci ', self.train_class, ' i_ys ', i_ys, ' pre ' , ys[1], ' t ', tar_srcs[i_ys], tail) time = _t.toc() conf_loss_v += loss_c.data[0] # log per iter log = '\r==>Eval_class{}: || {iters:d}/{epoch_size:d} in {time:.3f}s [{prograss}] || cls_loss: {cls_loss:.4f}\r'.format(self.train_class, prograss='#'*int(round(10*iteration/val_epoch_size)) + '-'*int(round(10*(1-iteration/val_epoch_size))), iters=iteration, epoch_size=val_epoch_size, time=time, cls_loss=loss_c.data[0]) #print(log) sys.stdout.write(log) sys.stdout.flush() # self.writer.add_scalar('Eval/conf_loss', conf_loss_v/epoch_size, epoch) # if iteration == (val_epoch_size - 1): # eval mAP # prec, rec, ap = cal_pr(label, score, npos) # log per epoch sys.stdout.write('\r') sys.stdout.flush() log = '\r==>Eval: || {iters:d}/{epoch_size:d} in {time:.3f}s [{prograss}] || cls_loss: {cls_loss:.4f}\r'.format( prograss='#'*int(round(10*iteration/val_epoch_size)) + '-'*int(round(10*(1-iteration/val_epoch_size))), iters=iteration, epoch_size=val_epoch_size, time=time, cls_loss=loss_c.data[0]) sys.stdout.write(log) sys.stdout.flush() # log for tensorboard title = str(self.train_class) +'/e_conf_loss' self.writer.add_scalar(title, conf_loss_v/epoch_size, epoch) f1 = f1_score(t_for_f1, pre_for_f1, average = "macro") print('c--- ',self.train_class, '---------f1 ',f1) title = str(self.train_class) + '/f' # title = str(self.train_class) + '/f' self.writer.add_scalar(title, f1, epoch)
def get_minibatch(self, _idx=0, name='train'): """Given a roidb, construct a minibatch sampled from it.""" if name == 'train': index_dataset = self.train_set elif name == 'valid': index_dataset = self.valid_set else: index_dataset = self.test_set fname = index_dataset[_idx]['files_name'] timer = Timer() timer.tic() lidar_data = pcd2np.from_path( path_add(self.data_path, fname.split('/')[0], 'pcd', fname.split('/')[1])) angel = (np_random.rand() - 0.500) * np.pi * 0.95 points_rot = self.rotation(lidar_data.pc_data, angel) boxes_rot = np.add(index_dataset[_idx]['boxes_labels'], [0., 0., 0., 0., 0., 0., angel, 0.]) # yaw category_rot = self.label_rotation( index_dataset[_idx]['object_labels'], degree=angel * 57.29578) timer.toc() time1 = timer.average_time timer.tic() grid_voxel = voxel_grid(points_rot, cfg, thread_sum=cfg.CPU_CNT) timer.toc() time2 = timer.average_time timer.tic() apollo_8feature = np.load( path_add(self.data_path, fname.split('/')[0], 'feature_pcd_name', fname.split('/')[1][0:-4] + '.npy')).reshape( -1, cfg.CUBIC_SIZE[0], cfg.CUBIC_SIZE[1], 8) apollo_8feature_rot = self.apollo_feature_rotation(apollo_8feature, degree=angel * 57.29578) timer.toc() time3 = timer.average_time blob = dict({ 'serial_num': fname, 'voxel_gen_time': (time1, time2, time3), 'lidar3d_data': np.hstack((points_rot, lidar_data.pc_data[:, 3:4])), 'boxes_labels': boxes_rot, 'object_labels': category_rot, 'grid_stack': grid_voxel['feature_buffer'], 'coord_stack': grid_voxel['coordinate_buffer'], 'ptsnum_stack': grid_voxel['number_buffer'], 'apollo_8feature': apollo_8feature_rot, }) return blob
def training(self, sess, train_writer): with tf.name_scope('loss_cubic'): rpn_cls_score = tf.reshape(self.net.get_output('rpn_cls_score'), [-1, 2]) rpn_label = tf.reshape( self.net.get_output('rpn_anchors_label')[0], [-1]) rpn_keep = tf.where(tf.not_equal(rpn_label, -1)) rpn_bbox_keep = tf.where(tf.equal( rpn_label, 1)) # only regression positive anchors rpn_cls_score = tf.reshape(tf.gather(rpn_cls_score, rpn_keep), [-1, 2]) rpn_label = tf.reshape(tf.gather(rpn_label, rpn_keep), [-1]) # cubic_cls_score = tf.reshape(self.net.get_output('cubic_cnn'), [-1, 2]) # cubic_cls_labels = tf.reshape(tf.cast(self.net.get_output('rpn_rois')[0][:, -2], tf.int64), [-1]) if not cfg.TRAIN.FOCAL_LOSS: rpn_cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=rpn_cls_score, labels=rpn_label)) # cubic_cross_entropy = tf.reduce_mean( # tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cubic_cls_score, labels=cubic_cls_labels)) else: #### use as reference for pos&neg proposal balance # self.cls_loss = alpha * ( # -self.pos_equal_one * tf.log(self.p_pos + small_addon_for_BCE)) / self.pos_equal_one_sum \ # + beta * (-self.neg_equal_one * tf.log( # 1 - self.p_pos + small_addon_for_BCE)) / self.neg_equal_one_sum # self.cls_loss = tf.reduce_sum(self.cls_loss) #### # alpha = [0.75,0.25] # 0.25 for label=1 gamma = 3 rpn_cls_probability = tf.nn.softmax(rpn_cls_score) # cubic_cls_probability = tf.nn.softmax(cubic_cls_score) # formula : Focal Loss for Dense Object Detection: FL(p)= -((1-p)**gama)*log(p) rpn_cross_entropy = tf.reduce_mean(-tf.reduce_sum( tf.one_hot(rpn_label, depth=2) * ((1 - rpn_cls_probability)**gamma) * tf.log([cfg.EPS, cfg.EPS] + rpn_cls_probability), axis=1)) # cubic_cross_entropy = tf.reduce_mean(-tf.reduce_sum( # tf.one_hot(cubic_cls_labels, depth=2) * ((1 - cubic_cls_probability) ** gamma) * tf.log( # [cfg.EPS, cfg.EPS] + cubic_cls_probability), axis=1)) # bounding box regression L1 loss rpn_bbox_pred = self.net.get_output('rpn_bbox_pred') rpn_bbox_targets = self.net.get_output('rpn_anchors_label')[1] rpn_bbox_pred = tf.reshape( tf.gather(tf.reshape(rpn_bbox_pred, [-1, 3]), rpn_bbox_keep), [-1, 3]) rpn_bbox_targets = tf.reshape( tf.gather(tf.reshape(rpn_bbox_targets, [-1, 3]), rpn_bbox_keep), [-1, 3]) rpn_smooth_l1 = self.modified_smooth_l1(3.0, rpn_bbox_pred, rpn_bbox_targets) rpn_loss_box = tf.multiply( tf.reduce_mean( tf.reduce_sum(rpn_smooth_l1, reduction_indices=[1])), 1.0) # loss = rpn_cross_entropy + rpn_loss_box + cubic_cross_entropy loss = rpn_cross_entropy with tf.name_scope('train_op'): global_step = tf.Variable(1, trainable=False, name='Global_Step') lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE, global_step, 10000, 0.92, name='decay-Lr') # train_op = tf.train.AdamOptimizer(lr).minimize(loss, global_step=global_step) with tf.name_scope('train_cubic'): tf.summary.scalar('total_loss', loss) # tf.summary.scalar('rpn_loss_box', rpn_loss_box) # tf.summary.scalar('rpn_cross_entropy', rpn_cross_entropy) # tf.summary.scalar('cubic_cross_entropy', cubic_cross_entropy) recall_RPN = 0. # bv_anchors = self.net.get_output('rpn_anchors_label')[2] # roi_bv = self.net.get_output('rpn_rois')[0] # (x1,y1),(x2,y2),score,label # data_bv = self.net.lidar_bv_data # data_gt = self.net.gt_boxes_bv # (x1,y1),(x2,y2),label # # gt_box = tf.concat([data_gt,data_gt[:, 4]], axis=1) # bbox = tf.concat([roi_bv,data_gt],axis=0) # image_rpn = tf.reshape(show_rpn_tf(data_bv, bbox), (1, 601, 601, -1)) # tf.summary.image('lidar_bv_test', image_rpn) glb_var = tf.global_variables() for i in range(len(glb_var)): # print glb_var[i].name if 'moving' not in str(glb_var[i].name): if 'Adam' not in str(glb_var[i].name): if 'weights' not in str(glb_var[i].name): if 'rpn' not in str(glb_var[i].name): if 'biases' not in str(glb_var[i].name): if 'beta' not in str(glb_var[i].name): if 'gamma' not in str(glb_var[i].name): if 'batch' not in str( glb_var[i].name): tf.summary.histogram( glb_var[i].name, glb_var[i]) merged = tf.summary.merge_all() with tf.name_scope('valid_cubic'): epoch_rpn_recall = tf.placeholder(dtype=tf.float32) rpn_recall_smy_op = tf.summary.scalar('rpn_recall', epoch_rpn_recall) epoch_cubic_recall = tf.placeholder(dtype=tf.float32) cubic_recall_smy_op = tf.summary.scalar('cubic_recall', epoch_cubic_recall) epoch_cubic_precise = tf.placeholder(dtype=tf.float32) cubic_prec_smy_op = tf.summary.scalar('cubic_precise', epoch_cubic_precise) sess.run(tf.global_variables_initializer()) if self.args.fine_tune: print 'Loading pre-trained model weights from {:s}'.format( self.args.weights) self.net.load_weigths(self.args.weights, sess, self.saver) trainable_var_for_chk = tf.trainable_variables( ) #tf.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) print 'Variables to train: ', trainable_var_for_chk timer = Timer() rpn_rois = self.net.get_output('rpn_rois') cubic_grid = self.net.get_output('cubic_grid') # cubic_cnn= self.net.get_output('cubic_cnn') if DEBUG: vispy_init( ) # TODO: Essential step(before sess.run) for using vispy beacuse of the bug of opengl or tensorflow # station = pcd_vispy_client(MSG_QUEUE,title='Vision') # vision_qt = Process(target=station.get_thread_data, args=(MSG_QUEUE,)) # vision_qt.start() # print 'Process vision_qt started ...' training_series = range(17, self.epoch) # self.epoch for epo_cnt in range(self.args.epoch_iters): for data_idx in training_series: # DO NOT EDIT the "training_series",for the latter shuffle iter = global_step.eval( ) # function "minimize()"will increase global_step blobs = self.dataset.get_minibatch(data_idx, 'train') # get one batch feed_dict = { self.net.lidar3d_data: blobs['lidar3d_data'], self.net.lidar_bv_data: blobs['lidar_bv_data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, self.net.gt_boxes_bv: blobs['gt_boxes_bv'], self.net.gt_boxes_3d: blobs['gt_boxes_3d'], self.net.gt_boxes_corners: blobs['gt_boxes_corners'], self.net.calib: blobs['calib'] } run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() timer.tic() rpn_rois_, cubic_grid_, loss_, merged_ = sess.run( [rpn_rois, cubic_grid, loss, merged], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) timer.toc() recall_RPN = recall_RPN + rpn_rois_[2][0] # cubic_result = cubic_cls_score_.argmax(axis=1) # one_hist = fast_hist(cubic_cls_labels_, cubic_result) cubic_car_cls_prec = 0 #one_hist[1, 1] / (one_hist[1, 1] + one_hist[0, 1]+1e-5) cubic_car_cls_recall = 0 #one_hist[1, 1] / (one_hist[1, 1] + one_hist[1, 0]+1e-5) if iter % cfg.TRAIN.ITER_DISPLAY == 0: print 'Iter: %d/%d, Serial_num: %s, speed: %.3fs/iter, loss: %.3f, rpn_recall: %.3f, cubic classify precise: %.3f,recall: %.3f' % \ (iter,self.args.epoch_iters * self.epoch, blobs['serial_num'],timer.average_time,loss_,recall_RPN / cfg.TRAIN.ITER_DISPLAY,cubic_car_cls_prec,cubic_car_cls_recall) recall_RPN = 0. # print 'divine: ', str(cubic_result).translate(None,'\n') # print 'labels: ', str(cubic_cls_labels_).translate(None,'\n'),'\n' if iter % 20 == 0 and cfg.TRAIN.TENSORBOARD: train_writer.add_summary(merged_, iter) pass if (iter % 4000 == 0 and cfg.TRAIN.DEBUG_TIMELINE) or (iter == 100): #chrome://tracing trace = timeline.Timeline( step_stats=run_metadata.step_stats) trace_file = open( cfg.LOG_DIR + '/' + 'training-step-' + str(iter).zfill(7) + '.ctf.json', 'w') trace_file.write( trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() if DEBUG: scan = blobs['lidar3d_data'] gt_box3d = blobs['gt_boxes_3d'][:, (0, 1, 2, 3, 4, 5, 6, 7)] gt_box3d = np.hstack( (gt_box3d, np.ones([gt_box3d.shape[0], 2]) * 4)) pred_boxes = rpn_rois_[1] # pred_boxes = np.hstack((rpn_rois_[1],cubic_result.reshape(-1,1)*2)) # bbox = np.vstack((pred_boxes, gt_box3d)) # pcd_vispy(scan, boxes=BoxAry_Theta(gt_box3d,pred_boxes,pre_cube_cls=cubic_result), name='CubicNet training') if cfg.TRAIN.EPOCH_MODEL_SAVE: self.snapshot(sess, iter) pass if cfg.TRAIN.USE_VALID: with tf.name_scope('valid_cubic_' + str(epo_cnt + 1)): print 'Valid the net at the end of epoch_{} ...'.format( epo_cnt + 1) # roi_bv = self.net.get_output('rpn_rois')[0] # cubu_bv = np.hstack((roi_bv,cubic_cls_labels.reshape(-1,1))) # pred_rpn_ = show_rpn_tf(self.net.lidar_bv_data,cubu_bv) # pred_rpn = tf.reshape(pred_rpn_,(1, 601, 601, -1)) # predicted_bbox = tf.summary.image('predict_bbox_bv', pred_rpn) # valid_result = tf.summary.merge([predicted_bbox]) recalls = self.net.get_output('rpn_rois')[2] pred_tp_cnt, gt_cnt = 0., 0. hist = np.zeros((cfg.NUM_CLASS, cfg.NUM_CLASS), dtype=np.float32) for data_idx in range(self.val_epoch): # self.val_epoch blobs = self.dataset.get_minibatch(data_idx, 'valid') feed_dict_ = { self.net.lidar3d_data: blobs['lidar3d_data'], self.net.lidar_bv_data: blobs['lidar_bv_data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, self.net.gt_boxes_bv: blobs['gt_boxes_bv'], self.net.gt_boxes_3d: blobs['gt_boxes_3d'], self.net.gt_boxes_corners: blobs['gt_boxes_corners'], self.net.calib: blobs['calib'] } cubic_cls_score_, cubic_cls_labels_, recalls_ = sess.run( [cubic_cls_score, cubic_cls_labels, recalls], feed_dict=feed_dict_) # train_writer.add_summary(valid, data_idx) pred_tp_cnt = pred_tp_cnt + recalls_[1] gt_cnt = gt_cnt + recalls_[2] cubic_class = cubic_cls_score_.argmax(axis=1) one_hist = fast_hist(cubic_cls_labels_, cubic_class) if not math.isnan(one_hist[1, 1] / (one_hist[1, 1] + one_hist[0, 1])): if not math.isnan( one_hist[1, 1] / (one_hist[1, 1] + one_hist[1, 0])): hist += one_hist if cfg.TRAIN.VISUAL_VALID: print 'Valid step: {:d}/{:d} , rpn recall = {:.3f}'\ .format(data_idx + 1,self.val_epoch,float(recalls_[1]) / recalls_[2]) print( ' class bg precision = {:.3f} recall = {:.3f}' .format( (one_hist[0, 0] / (one_hist[0, 0] + one_hist[1, 0] + 1e-6)), (one_hist[0, 0] / (one_hist[0, 0] + one_hist[0, 1] + 1e-6))) ) print( ' class car precision = {:.3f} recall = {:.3f}' .format( (one_hist[1, 1] / (one_hist[1, 1] + one_hist[0, 1] + 1e-6)), (one_hist[1, 1] / (one_hist[1, 1] + one_hist[1, 0] + 1e-6))) ) if data_idx % 20 == 0 and cfg.TRAIN.TENSORBOARD: pass # train_writer.add_summary(valid_result_, data_idx/20+epo_cnt*1000) precise_total = hist[1, 1] / (hist[1, 1] + hist[0, 1] + 1e-6) recall_total = hist[1, 1] / (hist[1, 1] + hist[1, 0] + 1e-6) recall_rpn = pred_tp_cnt / gt_cnt valid_summary = tf.summary.merge([ rpn_recall_smy_op, cubic_recall_smy_op, cubic_prec_smy_op ]) valid_res = sess.run(valid_summary, feed_dict={ epoch_rpn_recall: recall_rpn, epoch_cubic_recall: recall_total, epoch_cubic_precise: precise_total }) train_writer.add_summary(valid_res, epo_cnt + 1) print 'Validation of epoch_{}: rpn_recall {:.3f} cubic_precision = {:.3f} cubic_recall = {:.3f}'\ .format(epo_cnt + 1,recall_rpn,precise_total,recall_total) random.shuffle(training_series) # shuffle the training series print 'Training process has done, enjoy every day !'
def processor(self, sess, train_writer): with tf.name_scope('loss_design'): epsilon = tf.constant(value=1e-10) scores = tf.reshape(self.net.predicted_map, (-1, 2)) + epsilon labels = tf.reshape( tf.one_hot(tf.reshape(self.net.gt_map, (-1, 1)), depth=2, dtype=tf.float32), (-1, 2)) scores_softmax = tf.nn.softmax(scores) # focal loss # balance = np.array([1,1],dtype=np.float32) balance = 50.0 if cfg.TRAIN.FOCAL_LOSS: # TODO:add +- balance cross_entropy = -tf.reduce_sum(tf.multiply( labels * ((1 - scores_softmax)**3) * tf.log(scores_softmax + epsilon), balance), axis=[1]) else: pass cross_entropy = -tf.reduce_sum(tf.multiply( labels * tf.log(scores_softmax + epsilon), balance), axis=[1]) cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') loss = cross_entropy_mean with tf.name_scope('train_op'): global_step = tf.Variable(1, trainable=False, name='Global_Step') lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE, global_step, 10000, 0.90, name='decay-Lr') Optimizer = tf.train.AdamOptimizer(lr) # var_and_grad = Optimizer.compute_gradients(loss, var_list=tf.trainable_variables()) train_op = Optimizer.minimize(loss, global_step=global_step) with tf.name_scope('TrainingBoard'): res_map = tf.cast(tf.reshape( tf.argmax(self.net.predicted_map, axis=3), [-1, 640, 640, 1]), dtype=tf.float32) gt_map = tf.reshape(tf.cast(self.net.gt_map, dtype=tf.float32), (-1, 640, 640, 1)) cnt = tf.shape(self.net.coordinate)[0] updates = tf.ones([cnt], dtype=tf.float32) input_map = tf.reshape( tf.scatter_nd(self.net.coordinate, updates, shape=[640, 640]), (-1, 640, 640, 1)) tf.summary.image('InputData', input_map) tf.summary.image('PredMap', res_map) tf.summary.image('GtMap', gt_map) # apollo_feature = tf.transpose(self.net.apollo_8feature, perm=[3, 1, 2, 0]) # tf.summary.image('ApolloFeature', apollo_feature, max_outputs=8) tf.summary.scalar('TrainLoss', loss) glb_var = tf.trainable_variables() for i in range(len(glb_var)): tf.summary.histogram(glb_var[i].name, glb_var[i]) main_merged = tf.summary.merge_all( ) # hxd: before the next summary ops with tf.name_scope('TrainingMonitor'): epoch_valid_loss = tf.placeholder(dtype=tf.float32) epoch_valid_loss_sum_op = tf.summary.scalar( 'epoch_loss', epoch_valid_loss) epoch_valid_ac = tf.placeholder(dtype=tf.float32) epoch_valid_ac_sum_op = tf.summary.scalar('epoch_accurate', epoch_valid_ac) epoch_valid_rc = tf.placeholder(dtype=tf.float32) epoch_valid_rc_sum_op = tf.summary.scalar('epoch_recall', epoch_valid_rc) sess.run(tf.global_variables_initializer()) if self.args.fine_tune: print 'Loading pre-trained model weights from {:s}'.format( self.args.weights) self.net.load_weigths(self.args.weights, sess, self.saver) # trainable_var_for_chk = tf.trainable_variables() # tf.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) # print 'Variables to train: ', trainable_var_for_chk timer = Timer() if DEBUG: pass # TODO: Essential step(before sess.run) for using vispy beacuse of the bug of opengl or tensorflow vispy_init() training_series = range(self.epoch) # self.epoch for epo_cnt in range(self.args.epoch_iters): for data_idx in training_series: # DO NOT EDIT the "training_series",for the latter shuffle iter = global_step.eval( ) # function "minimize()"will increase global_step blobs = self.dataset.get_minibatch(data_idx, 'train') # get one batch feed_dict = { self.net.pc_input: blobs['lidar3d_data'], self.net.voxel_feature: blobs['grid_stack'], self.net.coordinate: blobs['coord_stack'], self.net.number: blobs['ptsnum_stack'], self.net.gt_map: blobs['object_labels'], self.net.apollo_8feature: blobs['apollo_8feature'], } run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() timer.tic() res_map_, loss_, merged_, _ = sess.run( [res_map, loss, main_merged, train_op], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) timer.toc() if iter % cfg.TRAIN.ITER_DISPLAY == 0: print 'Iter: %d/%d, Serial_num: %s, Speed: %.3fs/iter, Loss: %.3f ' % ( iter, self.args.epoch_iters * self.epoch, blobs['serial_num'], timer.average_time, loss_) print 'Loading pcd use: {:.3}s, and generating voxel points use: {:.3}s'.format( blobs['voxel_gen_time'][0], blobs['voxel_gen_time'][1]) if iter % 10 == 0 and cfg.TRAIN.TENSORBOARD: train_writer.add_summary(merged_, iter) # train_writer.add_run_metadata(run_metadata, 'step%03d' % iter,iter) pass if (iter % 4000 == 0 and cfg.TRAIN.DEBUG_TIMELINE) or (iter == 300): # chrome://tracing trace = timeline.Timeline( step_stats=run_metadata.step_stats) trace_file = open( cfg.LOG_DIR + '/' + 'training-step-' + str(iter).zfill(7) + '.ctf.json', 'w') trace_file.write( trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() if DEBUG: scan = blobs['lidar3d_data'] pcd_vispy(scan, boxes=None, name='CubicNet training', index=iter, vis_size=(800, 600), save_img=False, visible=False) if cfg.TRAIN.EPOCH_MODEL_SAVE: self.snapshot(sess, epo_cnt + 1) pass if cfg.TRAIN.USE_VALID: # TODO: to complete the valid process with tf.name_scope('Validations_' + str(epo_cnt + 1)): epoch_valid_gt_sum_op = tf.summary.image( 'valid_gt', gt_map) epoch_valid_det_sum_op = tf.summary.image( 'valid_predict', res_map) epoch_valid_input_sum_op = tf.summary.image( 'valid_input_data', input_map) valid_image_summary = tf.summary.merge([ epoch_valid_gt_sum_op, epoch_valid_det_sum_op, epoch_valid_input_sum_op ]) print 'Valid the net at the end of epoch_{} ...'.format( epo_cnt + 1) valid_loss_total = 0.0 hist = np.zeros((cfg.NUM_CLASS, cfg.NUM_CLASS)) for data_idx in range(self.val_epoch): # self.val_epoch blobs = self.dataset.get_minibatch(data_idx, 'valid') feed_dict_ = { self.net.pc_input: blobs['lidar3d_data'], self.net.voxel_feature: blobs['grid_stack'], self.net.coordinate: blobs['coord_stack'], self.net.number: blobs['ptsnum_stack'], self.net.gt_map: blobs['object_labels'], self.net.apollo_8feature: blobs['apollo_8feature'], } res_map_, valid_sum_, loss_valid_ = sess.run( [res_map, valid_image_summary, loss], feed_dict=feed_dict_) # train_writer.add_summary(valid, data_idx) valid_loss_total += loss_valid_ one_hist = get_hist(blobs['object_labels'], res_map_) # [[TN,FP],[FN,TP]] hist += one_hist if data_idx % 10 == 0 and cfg.TRAIN.TENSORBOARD: pass train_writer.add_summary(valid_sum_, data_idx) if cfg.TRAIN.VISUAL_VALID and data_idx % 20 == 0: print 'Valid step: {:d}/{:d} , theta_loss = {:.3f}'.format( data_idx + 1, self.val_epoch, float(loss_valid_)) print( 'Object precision = {:.4f} recall = {:.4f}'. format( (one_hist[1, 1] / (one_hist[1, 1] + one_hist[0, 1] + 1e-5)), (one_hist[1, 1] / (one_hist[1, 1] + one_hist[1, 0] + 1e-5))) ) object_valid_ac = (hist[1, 1] / (hist[1, 1] + hist[0, 1] + 1e-5)) object_valid_rc = (hist[1, 1] / (hist[1, 1] + hist[1, 0] + 1e-5)) valid_summary = tf.summary.merge([ epoch_valid_loss_sum_op, epoch_valid_ac_sum_op, epoch_valid_rc_sum_op ]) valid_res = sess.run(valid_summary, feed_dict={ epoch_valid_loss: float(valid_loss_total) / self.val_epoch, epoch_valid_ac: object_valid_ac, epoch_valid_rc: object_valid_rc }) train_writer.add_summary(valid_res, epo_cnt + 1) print 'Validation of epoch_{}:theta_loss_total = {:.3f}\n'.format( epo_cnt + 1, float(valid_loss_total) / self.val_epoch) random.shuffle(training_series) # shuffle the training series print 'Training process has done, enjoy every day !'
def training(self, sess): sess.run(tf.global_variables_initializer()) reader = pywrap_tensorflow.NewCheckpointReader(self.weights) var_to_shape_map = reader.get_variable_to_shape_map() glb_var = tf.global_variables() with tf.variable_scope('', reuse=tf.AUTO_REUSE) as scope: for key in var_to_shape_map: try: var = tf.get_variable(key, trainable=False) sess.run(var.assign(reader.get_tensor(key))) print " Assign pretrain model: " + key except ValueError: print " Ignore variable:" + key timer = Timer() vispy_init() res = [] input_series = [] merge_op = tf.summary.merge_all() train_writer = tf.summary.FileWriter(cfg.LOG_DIR, sess.graph, max_queue=1000, flush_secs=1) loop_parameters = np.arange(-90, 90, 1) data_id = 1 box_cnt = 0 for data_idx in loop_parameters: # DO NOT EDIT the "training_series",for the latter shuffle run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() debug_mod = True if data_idx == 0 else False # debug_mod = True feed_dict = self.cubic_rpn_grid( data_id, box_idx=box_cnt, angel=data_idx, scalar=1.00, #float(data_idx)/180.*1.0, translation=[0, 0, 0], DEBUG=debug_mod) timer.tic() img_tf_, cubic_theta_, merge_op_ = sess.run( [self.cubic_theta.img_tf, self.cubic_theta.res, merge_op], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) timer.toc() input_series.append(img_tf_) res.append(cubic_theta_[0] * 180 / 3.1415926) # print 'rotation: {:3d} score: {:>8,.7f} {:>8,.7f} result: {}'.format(data_idx,cubic_cls_score_[0,0],cubic_cls_score_[0,1],cubic_result[0]) train_writer.add_summary(merge_op_, data_idx) imge_op = tf.summary.image("imagesss", np.array(input_series, dtype=np.float32).reshape( -1, 30, 30, 1), max_outputs=180) imge_op_ = sess.run(imge_op) train_writer.add_summary(imge_op_, 1) plt.plot(loop_parameters, res) plt.grid(True, color='black', linestyle='--', linewidth='1') plt.title('Car_{}_{}'.format(data_id, box_cnt)) plt.xlabel('gt_yaw+') plt.ylabel('pred-yaw') plt.legend(['positive']) plt.savefig('Roation_of_Car2.png') xmajorLocator = MultipleLocator(10) # 将x主刻度标签设置为20的倍数 xmajorFormatter = FormatStrFormatter('%1.0f') # 设置x轴标签文本的格式 xminorLocator = MultipleLocator(5) # 将x轴次刻度标签设置为5的倍数 ymajorLocator = MultipleLocator(10) # 将y轴主刻度标签设置为0.5的倍数 ymajorFormatter = FormatStrFormatter('%1.0f') # 设置y轴标签文本的格式 yminorLocator = MultipleLocator(5) # 将此y轴次刻度标签设置为0.1的倍数 ax = plt.axes() # 设置主刻度标签的位置,标签文本的格式 ax.xaxis.set_major_locator(xmajorLocator) ax.xaxis.set_major_formatter(xmajorFormatter) ax.yaxis.set_major_locator(ymajorLocator) ax.yaxis.set_major_formatter(ymajorFormatter) # 显示次刻度标签的位置,没有标签文本 ax.xaxis.set_minor_locator(xminorLocator) ax.yaxis.set_minor_locator(yminorLocator) ax.xaxis.grid(True, which='major') # x坐标轴的网格使用主刻度 ax.yaxis.grid(True, which='minor') # y坐标轴的网格使用次刻度 plt.show()
def test_epoch(self): self.model.train() test_image_dir = os.path.join('../', 'test/') # vis = visdom.Visdom(server="http://localhost", port=8888) check_i = 0 _t = Timer() df = pd.DataFrame(columns=["Id", "Predicted"]) self.idx_df = 0 test_image_merge_list = self.get_testimg_merge_list(test_image_dir) banch_num = int(self.config.v('batch_size')) img_list = [] name_list = [] print('len ', len(test_image_merge_list)) for i, img_name in enumerate(test_image_merge_list): img = self.get_merge_image(test_image_dir + img_name) img = Variable(img, volatile=True) if self.use_gpu: img = img.cuda() if i % banch_num > 0 and i <= (len(test_image_merge_list) - 1): img_list.append(img.unsqueeze(0)) name_list.append(img_name) if i < (len(test_image_merge_list) - 1): continue if i % banch_num == 0: if i == 0: img_list.append(img.unsqueeze(0)) name_list.append(img_name) continue # images = images.unsqueeze(0) _t.tic() img_list = torch.cat(img_list, 0) # if check_i == 3: vis.images(img_list[0], win=2, opts={'title': 'Reals'}) self.visTest(self.model, img_list[0], self.priorbox, self.writer, 1, self.use_gpu) # print('imglist ', img_list.shape) out = self.model(img_list, phase='eval') # print('out ', out) for i_im, imname in enumerate(name_list): df.set_value(self.idx_df, 'Id', imname) data = out[i_im] result = '' cla = data.argmax(0).item() result = str(cla) data[cla] = 0 cla = data.argmax(0).item() if data[cla] > 0.5: result += ' ' result += str(cla) df.set_value(self.idx_df, 'Predicted', result) self.idx_df += 1 img_list = [] img_list.append(img.unsqueeze(0)) name_list = [] name_list.append(img_name) # check_i += 1 df.to_csv('pred.csv', index=None) df.head(10) print('Evaluating detections')
def training(self, sess, train_writer): with tf.name_scope('loss_function'): RNet_rpn_yaw_pred = self.net.get_output('RNet_theta')[1] RNet_rpn_yaw_gt_delta = self.net.get_output('cubic_grid')[1] RNet_rpn_yaw_gt = self.net.get_output( 'rpn_rois' )[1][:, -1] #rpn_3d_boxes:(x1,y1,z1),(x2,y2,z2),score,rpn_cls_label,yaw RNet_rpn_yaw_gt_new = RNet_rpn_yaw_gt - RNet_rpn_yaw_gt_delta RNet_rpn_yaw_pred_toshow = RNet_rpn_yaw_pred + RNet_rpn_yaw_gt_delta rpn_cls_labels = self.net.get_output( 'rpn_rois' )[1][:, -2] #rpn_3d_boxes:(x1,y1,z1),(x2,y2,z2),score,rpn_cls_label,yaw RNet_rpn_yaw_pred = self.angle_trans(RNet_rpn_yaw_pred) RNet_rpn_yaw_gt_new = self.angle_trans(RNet_rpn_yaw_gt_new) debug_pred = tf.multiply(rpn_cls_labels, self.angle_trans(RNet_rpn_yaw_pred)) debug_gt = tf.multiply(rpn_cls_labels, self.angle_trans(RNet_rpn_yaw_gt_new)) tower_l1_loss = self.Rnet_modified_smooth_l1( sigma=3, bbox_pred=RNet_rpn_yaw_pred, bbox_targets=RNet_rpn_yaw_gt_new) tower_l1_loss_keep_positive = tf.multiply(rpn_cls_labels, tower_l1_loss) loss = tf.reduce_sum(tower_l1_loss_keep_positive) / ( 1e-5 + tf.reduce_sum( tf.cast(tf.not_equal(tower_l1_loss_keep_positive, 0.0), dtype=tf.float32))) with tf.name_scope('train_op'): global_step = tf.Variable(1, trainable=False, name='Global_Step') lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE, global_step, 10000, 0.90, name='decay-Lr') Optimizer = tf.train.AdamOptimizer(lr) var_and_grad = Optimizer.compute_gradients( loss, var_list=tf.trainable_variables()) train_op = Optimizer.minimize(loss, global_step=global_step) with tf.name_scope('debug_board'): tf.summary.scalar('total_loss', loss) glb_var = tf.trainable_variables() for i in range(len(glb_var)): tf.summary.histogram(glb_var[i].name, glb_var[i]) tf.summary.image('theta', self.net.get_output('RNet_theta')[0], max_outputs=50) merged = tf.summary.merge_all() #hxd: before the next summary ops with tf.name_scope('epoch_valid'): epoch_cube_theta = tf.placeholder(dtype=tf.float32) epoch_cube_theta_sum_op = tf.summary.scalar( 'valid_los', epoch_cube_theta) sess.run(tf.global_variables_initializer()) if self.args.fine_tune: if True: # #full graph restore print 'Loading pre-trained model weights from {:s}'.format( self.args.weights) self.net.load(self.args.weights, sess, self.saver, True) else: # #part graph restore # # METHOD one # ref_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,scope=['vgg_feat_fc']) # saver1 = tf.train.Saver(ref_vars) # saver1.restore(sess, self.args.weights) # # METHOD two reader = pywrap_tensorflow.NewCheckpointReader( self.args.weights) var_to_shape_map = reader.get_variable_to_shape_map() with tf.variable_scope('', reuse=tf.AUTO_REUSE) as scope: for key in var_to_shape_map: try: var = tf.get_variable(key, trainable=False) sess.run(var.assign(reader.get_tensor(key))) print " Assign pretrain model: " + key except ValueError: print " Ignore variable:" + key trainable_var_for_chk = tf.trainable_variables( ) #tf.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) print 'Variables to train: ', trainable_var_for_chk timer = Timer() rpn_rois_3d = self.net.get_output('rpn_rois')[1] if DEBUG: pass # TODO: Essential step(before sess.run) for using vispy beacuse of the bug of opengl or tensorflow vispy_init() i = 0 training_series = range(10) #self.epoch for epo_cnt in range(self.args.epoch_iters): for data_idx in training_series: # DO NOT EDIT the "training_series",for the latter shuffle iter = global_step.eval( ) # function "minimize()"will increase global_step blobs = self.dataset.get_minibatch(data_idx, 'train') # get one batch feed_dict = { self.net.lidar3d_data: blobs['lidar3d_data'], self.net.lidar_bv_data: blobs['lidar_bv_data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, self.net.gt_boxes_bv: blobs['gt_boxes_bv'], self.net.gt_boxes_3d: blobs['gt_boxes_3d'], self.net.gt_boxes_corners: blobs['gt_boxes_corners'], self.net.calib: blobs['calib'], } run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() timer.tic() # debug_pred_,delta_,RNet_rpn_yaw_gt_delta_,rpn_rois_3d_,loss_,RNet_rpn_yaw_pred_toshow_,debug_gt_,merged_,_ = \ # sess.run([debug_pred,tower_l1_loss_keep_positive,RNet_rpn_yaw_gt_delta,rpn_rois_3d,loss,RNet_rpn_yaw_pred_toshow,debug_gt,merged,train_op,] # ,feed_dict=feed_dict,options=run_options, run_metadata=run_metadata) debug_pred_,delta_,RNet_rpn_yaw_gt_delta_,rpn_rois_3d_,RNet_rpn_yaw_pred_toshow_,debug_gt_,merged_, = \ sess.run([debug_pred,tower_l1_loss_keep_positive,RNet_rpn_yaw_gt_delta,rpn_rois_3d,RNet_rpn_yaw_pred_toshow,debug_gt,merged,] ,feed_dict=feed_dict,options=run_options, run_metadata=run_metadata) loss_ = 0 timer.toc() if iter % cfg.TRAIN.ITER_DISPLAY == 0: print 'Iter: %d/%d, Serial_num: %s, Speed: %.3fs/iter, Loss: %.3f ' % ( iter, self.args.epoch_iters * self.epoch, blobs['serial_num'], timer.average_time, loss_) print 'theta_delta: ', for i in range(50): if delta_[i] != 0.0: print '%6.3f' % (delta_[i]), print '\nPredicted angle: ', for j in range(50): if debug_pred_[j] != 0.0: print '%6.3f' % (debug_pred_[j]), print '\nGt yaw angle: ', for j in range(50): if debug_gt_[j] != 0.0: print '%6.3f' % (debug_gt_[j]), print '\n' if iter % 20 == 0 and cfg.TRAIN.TENSORBOARD: train_writer.add_summary(merged_, iter) pass if (iter % 4000 == 0 and cfg.TRAIN.DEBUG_TIMELINE) or (iter == 100): #chrome://tracing trace = timeline.Timeline( step_stats=run_metadata.step_stats) trace_file = open( cfg.LOG_DIR + '/' + 'training-step-' + str(iter).zfill(7) + '.ctf.json', 'w') trace_file.write( trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() if DEBUG: scan = blobs['lidar3d_data'] cubic_cls_value = np.ones([cfg.TRAIN.RPN_POST_NMS_TOP_N], dtype=np.float32) * 0 boxes = BoxAry_Theta( gt_box3d=blobs['gt_boxes_3d'], pre_box3d=rpn_rois_3d_, pre_theta_value=RNet_rpn_yaw_pred_toshow_, pre_cube_cls=cubic_cls_value ) # RNet_rpn_yaw_pred_toshow_ rpn_rois_3d_[:,-1] pcd_vispy(scan, boxes=boxes, name='CubicNet training', index=i, vis_size=(800, 600), save_img=False, visible=False) i += 1 if cfg.TRAIN.EPOCH_MODEL_SAVE: #iter % 2000==0 and : self.snapshot(sess, iter) pass if cfg.TRAIN.USE_VALID and True: #TODO: to complete the valid process with tf.name_scope('valid_cubic_' + str(epo_cnt + 1)): print 'Valid the net at the end of epoch_{} ...'.format( epo_cnt + 1) valid_loss_total = 0.0 for data_idx in range(self.val_epoch): # self.val_epoch blobs = self.dataset.get_minibatch(data_idx, 'valid') feed_dict_ = { self.net.lidar3d_data: blobs['lidar3d_data'], self.net.lidar_bv_data: blobs['lidar_bv_data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, self.net.gt_boxes_bv: blobs['gt_boxes_bv'], self.net.gt_boxes_3d: blobs['gt_boxes_3d'], self.net.gt_boxes_corners: blobs['gt_boxes_corners'], self.net.calib: blobs['calib'], } loss_valid = sess.run(loss, feed_dict=feed_dict_) # train_writer.add_summary(valid, data_idx) valid_loss_total += loss_valid if cfg.TRAIN.VISUAL_VALID and data_idx % 20 == 0: print 'Valid step: {:d}/{:d} , theta_loss = {:.3f}'\ .format(data_idx + 1,self.val_epoch,float(loss_valid)) if data_idx % 20 == 0 and cfg.TRAIN.TENSORBOARD: pass # train_writer.add_summary(valid_result_, data_idx/20+epo_cnt*1000) valid_summary = tf.summary.merge([epoch_cube_theta_sum_op]) valid_res = sess.run(valid_summary, feed_dict={ epoch_cube_theta: float(valid_loss_total) / self.val_epoch }) train_writer.add_summary(valid_res, epo_cnt + 1) print 'Validation of epoch_{}:theta_loss_total = {:.3f}\n'\ .format(epo_cnt + 1,float(valid_loss_total)/self.val_epoch) random.shuffle(training_series) # shuffle the training series print 'Training process has done, enjoy every day !'
def testing(self, sess, test_writer): with tf.name_scope('view_cubic_rpn'): roi_bv = self.net.get_output('rpn_rois')[0] data_bv = self.net.lidar_bv_data image_rpn = tf.reshape(test_show_rpn_tf(data_bv, roi_bv), (1, 601, 601, -1)) tf.summary.image('lidar_bv_test', image_rpn) merged = tf.summary.merge_all() with tf.name_scope('load_weights'): weights = self.args.weights if weights.endswith('.ckpt'): print 'Loading test model weights from {:s}'.format( self.args.weights) self.saver.restore(sess, weights) else: print "error: Function [combinet_test.testing] can not load weights {:s}!".format( self.args.weights) return 0 cubic_cls_score = tf.reshape(self.net.get_output('cubic_cnn'), [-1, 2]) rpn_3d = tf.reshape(self.net.get_output('rpn_rois')[1], [-1, 8]) vispy_init( ) # TODO: Essential step(before sess.run) for using vispy beacuse of the bug of opengl or tensorflow timer = Timer() for idx in range(self.epoch): blobs = self.dataset.get_minibatch(idx) feed_dict = { self.net.lidar3d_data: blobs['lidar3d_data'], self.net.lidar_bv_data: blobs['lidar_bv_data'], self.net.im_info: blobs['im_info'], self.net.calib: blobs['calib'] } run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() timer.tic() cubic_cls_score_,rpn_3d_,summary = \ sess.run([cubic_cls_score, rpn_3d, merged], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) timer.toc() cubic_result = cubic_cls_score_.argmax(axis=1) if idx % 3 == 0 and cfg.TEST.DEBUG_TIMELINE: # chrome://tracing trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file = open( cfg.LOG_DIR + '/' + 'testing-step-' + str(idx).zfill(7) + '.ctf.json', 'w') trace_file.write( trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() if idx % cfg.TEST.ITER_DISPLAY == 0: pass print 'Test: %06d/%06d speed: %.4f s / iter' % ( idx + 1, self.epoch, timer.average_time) if VISION_DEBUG: scan = blobs['lidar3d_data'] img = blobs['image_data'] pred_boxes = np.hstack( (rpn_3d_, cubic_result.reshape(-1, 1) * 2)) pcd_vispy(scan, img, pred_boxes, no_gt=True, index=idx, save_img=cfg.TEST.SAVE_IMAGE, visible=True, name='CubicNet testing') if idx % 1 == 0 and cfg.TEST.TENSORBOARD: test_writer.add_summary(summary, idx) pass print 'Testing process has done, happy every day !'
def testing(self, sess, test_writer): ##======================================= if USE_ROS: import rospy from sensor_msgs.msg import PointCloud,Image from visualization_msgs.msg import MarkerArray, Marker from tools.data_visualize import Boxes_labels_Gen, Image_Gen,PointCloud_Gen rospy.init_node('rostensorflow') pub = rospy.Publisher('prediction', PointCloud, queue_size=1000) img_pub = rospy.Publisher('images_rgb', Image, queue_size=1000) box_pub = rospy.Publisher('label_boxes', MarkerArray, queue_size=1000) rospy.loginfo("ROS begins ...") #======================================= with tf.name_scope("Inference"): RNet_rpn_yaw_pred = self.net.get_output('RNet_theta')[1] RNet_rpn_yaw_gt_delta = self.net.get_output('cubic_grid')[1] RNet_rpn_yaw_pred_toshow = RNet_rpn_yaw_pred+RNet_rpn_yaw_gt_delta rpn_rois_3d = self.net.get_output('rpn_rois')[1] with tf.name_scope('view_rpn_bv_tb'): roi_bv = self.net.get_output('rpn_rois')[0] data_bv = self.net.lidar_bv_data image_rpn = tf.reshape(test_show_rpn_tf(data_bv,roi_bv), (1, 601, 601, -1)) tf.summary.image('lidar_bv_test', image_rpn) merged = tf.summary.merge_all() with tf.name_scope('load_weights'): weights = self.args.weights if weights.endswith('.ckpt'): print 'Loading test model weights from {:s}'.format(self.args.weights) self.saver.restore(sess, weights) else: print "error: Function [combinet_test.testing] can not load weights {:s}!".format(self.args.weights) return 0 vispy_init() # TODO: Essential step(before sess.run) for using vispy beacuse of the bug of opengl or tensorflow timer = Timer() for idx in range(0,self.epoch): # index_ = input('Type a new index: ') blobs = self.dataset.get_minibatch(idx) feed_dict = { self.net.lidar3d_data: blobs['lidar3d_data'], self.net.lidar_bv_data: blobs['lidar_bv_data'], self.net.im_info: blobs['im_info'], self.net.calib: blobs['calib']} run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() timer.tic() pred_yaw_toshow_,rpn_rois_3d_,summary = \ sess.run([RNet_rpn_yaw_pred_toshow,rpn_rois_3d,merged], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) timer.toc() if idx % 3 ==0 and cfg.TEST.DEBUG_TIMELINE: # chrome://tracing trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file = open(cfg.LOG_DIR + '/' +'testing-step-'+ str(idx).zfill(7) + '.ctf.json', 'w') trace_file.write(trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() if idx % cfg.TEST.ITER_DISPLAY == 0: pass print 'Test: %06d/%06d speed: %.4f s / iter' % (idx+1, self.epoch, timer.average_time) if VISION_DEBUG: scan = blobs['lidar3d_data'] img = blobs['image_data'] cubic_cls_value = np.ones([cfg.TRAIN.RPN_POST_NMS_TOP_N],dtype=np.float32)*0 boxes=BoxAry_Theta(pre_box3d=rpn_rois_3d_,pre_theta_value=pred_yaw_toshow_,pre_cube_cls=cubic_cls_value)# RNet_rpn_yaw_pred_toshow_ rpn_rois_3d_[:,-1] if USE_ROS: from tools.data_visualize import PointCloud_Gen,Boxes_labels_Gen,Image_Gen pointcloud = PointCloud_Gen(scan) label_boxes = Boxes_labels_Gen(boxes, ns='Predict') img_ros = Image_Gen(img) pub.publish(pointcloud) img_pub.publish(img_ros) box_pub.publish(label_boxes) else: pcd_vispy(scan, img, boxes,index=idx, save_img=True,#cfg.TEST.SAVE_IMAGE, visible=False, name='CubicNet testing') if idx % 1 == 0 and cfg.TEST.TENSORBOARD: test_writer.add_summary(summary, idx) pass print 'Testing process has done, happy every day !'
def testing(self, sess, test_writer): # ======================================= if USE_ROS: import rospy from sensor_msgs.msg import PointCloud,Image from visualization_msgs.msg import MarkerArray, Marker from tools.data_visualize import Boxes_labels_Gen, Image_Gen,PointCloud_Gen rospy.init_node('rostensorflow') pub = rospy.Publisher('prediction', PointCloud, queue_size=1000) img_pub = rospy.Publisher('images_rgb', Image, queue_size=1000) box_pub = rospy.Publisher('label_boxes', MarkerArray, queue_size=1000) rospy.loginfo("ROS begins ...") # ======================================= with tf.name_scope("Inference"): # RNet_rpn_yaw_pred = self.net.get_output('RNet_theta')[1] # RNet_rpn_yaw_gt_delta = self.net.get_output('cubic_grid')[1] # RNet_rpn_yaw_pred_toshow = RNet_rpn_yaw_pred+RNet_rpn_yaw_gt_delta rpn_rois_3d = self.net.get_output('rpn_rois')[1] with tf.name_scope('view_rpn_bv_tb'): # roi_bv = self.net.get_output('rpn_rois')[0] # data_bv = self.net.lidar_bv_data # image_rpn = tf.reshape(test_show_rpn_tf(data_bv,roi_bv), (1, 601, 601, -1)) # tf.summary.image('lidar_bv_test', image_rpn) feature = tf.reshape(tf.transpose(tf.reduce_sum(self.net.watcher[0],axis=-2),[2,0,1]),[-1,30,30,1]) tf.summary.image('shape_extractor_P1', feature,max_outputs=50) # feature = tf.reshape(tf.transpose(tf.reduce_sum(self.net.watcher[1],axis=-1),[2,0,1]),[-1,30,30,1]) # tf.summary.image('shape_extractor_P2', feature,max_outputs=10) # feature = tf.reshape(tf.transpose(tf.reduce_sum(self.net.watcher[-1],axis=-1),[2,0,1]),[-1,30,30,1]) # tf.summary.image('shape_extractor_N1', feature,max_outputs=3) # feature = tf.reshape(tf.transpose(tf.reduce_sum(self.net.watcher[-2],axis=-1),[2,0,1]),[-1,30,30,1]) # tf.summary.image('shape_extractor_N2', feature,max_outputs=3) merged = tf.summary.merge_all() with tf.name_scope('load_weights'): print 'Loading pre-trained model weights from {:s}'.format(self.args.weights) self.net.load_weigths(self.args.weights, sess, self.saver) self.net.load_weigths(self.args.weights_cube, sess, self.saver,specical_flag=True) vispy_init() # TODO: Essential step(before sess.run) for using vispy beacuse of the bug of opengl or tensorflow timer = Timer() cubic_cls_score = tf.reshape(self.net.get_output('cubic_cnn'), [-1, 2]) for idx in range(0,self.epoch,1): # index_ = input('Type a new index: ') blobs = self.dataset.get_minibatch(idx) feed_dict = { self.net.lidar3d_data: blobs['lidar3d_data'], self.net.lidar_bv_data: blobs['lidar_bv_data'], self.net.im_info: blobs['im_info'], # self.net.calib: blobs['calib'] } run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() timer.tic() cubic_cls_score_,rpn_rois_3d_,summary = sess.run([cubic_cls_score,rpn_rois_3d,merged] ,feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) timer.toc() if idx % 3 ==0 and cfg.TEST.DEBUG_TIMELINE: # chrome://tracing trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file = open(cfg.LOG_DIR + '/' +'testing-step-'+ str(idx).zfill(7) + '.ctf.json', 'w') trace_file.write(trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() if idx % cfg.TEST.ITER_DISPLAY == 0: pass print 'Test: %06d/%06d speed: %.4f s / iter' % (idx+1, self.epoch, timer.average_time) if VISION_DEBUG: scan = blobs['lidar3d_data'] img = blobs['image_data'] cubic_cls_value = cubic_cls_score_.argmax(axis=1) if USE_ROS: import numpy as np from tools.data_visualize import PointCloud_Gen,Boxes_labels_Gen,Image_Gen pointcloud = PointCloud_Gen(scan) label_boxes = Boxes_labels_Gen(rpn_rois_3d_, ns='Predict') img_ros = Image_Gen(img) pub.publish(pointcloud) img_pub.publish(img_ros) box_pub.publish(label_boxes) else: boxes = BoxAry_Theta(pre_box3d=rpn_rois_3d_,pre_cube_cls=cubic_cls_value) # RNet_rpn_yaw_pred_toshow_ rpn_rois_3d_[:,-1] pcd_vispy(scan, img, boxes,index=idx, save_img=False,#cfg.TEST.SAVE_IMAGE, visible=True, name='CubicNet testing') if idx % 1 == 0 and cfg.TEST.TENSORBOARD: test_writer.add_summary(summary, idx) pass print 'Testing process has done, happy every day !'
def train_per_epoch(self, epoch): epoch_size = int(len(self.train_loader)) batch_iterator = iter(self.train_loader) train_end = int(epoch_size * 0.8) print('epoch_size ', epoch_size, " train_end ", train_end) conf_loss = 0 _t = Timer() conf_loss_v = 0 for iteration in range(epoch_size): images, targets = next(batch_iterator) # print('imgs from data_load shape ', images.shape) targets = np.array(targets) # print('iteration ', iteration) if iteration > train_end and iteration < train_end + 10: if self.use_gpu: images = Variable(images.cuda()) self.visualize_epoch(images, epoch) if iteration <= train_end: if self.use_gpu: images = Variable(images.cuda()) # targets = [Variable(anno.cuda(), volatile=True) for anno in targets] else: images = Variable(images) self.model.train() #train: _t.tic() out = self.model(images, phase='train', targets=targets) self.optimizer.zero_grad() # print('out ', out) # print('targets ', targets.shape) loss_c = self.criterion(out, targets) # some bugs in coco train2017. maybe the annonation bug. if loss_c.data[0] == float("Inf"): continue if math.isnan(loss_c.data[0]): continue # if loss_c.data[0] > 100000000: # continue loss_c.backward() self.optimizer.step() time = _t.toc() conf_loss += loss_c.data[0] # log per iter log = '\r==>Train: || {iters:d}/{epoch_size:d} in {time:.3f}s [{prograss}] || cls_loss: {cls_loss:.4f}\r'.format( prograss='#' * int(round(10 * iteration / epoch_size)) + '-' * int(round(10 * (1 - iteration / epoch_size))), iters=iteration, epoch_size=epoch_size, time=time, cls_loss=loss_c.data[0]) sys.stdout.write(log) sys.stdout.flush() if iteration == train_end: # log per epoch sys.stdout.write('\r') sys.stdout.flush() lr = self.optimizer.param_groups[0]['lr'] log = '\r==>Train: || Total_time: {time:.3f}s || conf_loss: {conf_loss:.4f} || lr: {lr:.6f}\n'.format( lr=lr, time=_t.total_time, conf_loss=conf_loss / epoch_size) sys.stdout.write(log) sys.stdout.flush() # print(log) # log for tensorboard self.writer.add_scalar('Train/conf_loss', conf_loss / epoch_size, epoch) self.writer.add_scalar('Train/lr', lr, epoch) conf_loss = 0 if iteration > train_end: # self.visualize_epoch(model, images[0], targets[0], self.priorbox, writer, epoch, use_gpu) #eval: if self.use_gpu: images = Variable(images.cuda()) else: images = Variable(images) # self.model.eval() out = self.model(images, phase='eval') # loss loss_c = self.criterion(out, targets) if loss_c.data[0] == float("Inf"): continue if math.isnan(loss_c.data[0]): continue # if loss_c.data[0] > 100000000: # continue time = _t.toc() conf_loss_v += loss_c.data[0] # log per iter log = '\r==>Eval: || {iters:d}/{epoch_size:d} in {time:.3f}s [{prograss}] || cls_loss: {cls_loss:.4f}\r'.format( prograss='#' * int(round(10 * iteration / epoch_size)) + '-' * int(round(10 * (1 - iteration / epoch_size))), iters=iteration, epoch_size=epoch_size, time=time, cls_loss=loss_c.data[0]) #print(log) sys.stdout.write(log) sys.stdout.flush() # self.writer.add_scalar('Eval/conf_loss', conf_loss_v/epoch_size, epoch) if iteration == (epoch_size - 1): # eval mAP # prec, rec, ap = cal_pr(label, score, npos) # log per epoch sys.stdout.write('\r') sys.stdout.flush() log = '\r==>Eval: || {iters:d}/{epoch_size:d} in {time:.3f}s [{prograss}] || cls_loss: {cls_loss:.4f}\r'.format( prograss='#' * int(round(10 * iteration / epoch_size)) + '-' * int(round(10 * (1 - iteration / epoch_size))), iters=iteration, epoch_size=epoch_size, time=time, cls_loss=loss_c.data[0]) sys.stdout.write(log) sys.stdout.flush() # log for tensorboard self.writer.add_scalar('Eval/conf_loss', conf_loss_v / epoch_size, epoch)
def train(self, fold_num): train_holder, seg_holder, dst_holder = self.provider.get_train_holder() model = self.model_class(self.is_training) inference_op = model.inference_op(train_holder) if cfg.use_dst_weight == True: loss_op, acc_op = model.loss_op(inference_op, seg_holder, dst_holder) else: loss_op, acc_op = model.loss_op(inference_op, seg_holder) train_op = self._get_optimizer(loss_op) merged = tf.summary.merge_all() self._count_trainables() log_output_path = os.path.join(self.output_path, "log") if not os.path.exists(log_output_path): os.makedirs(log_output_path) model_output_path = os.path.join(self.output_path, "model") if not os.path.exists(model_output_path): os.makedirs(model_output_path) loss_txt_path = os.path.join(self.output_path, "loss") if not os.path.exists(loss_txt_path): os.makedirs(loss_txt_path) train_writer = tf.summary.FileWriter( os.path.join(log_output_path, "train")) test_writer = tf.summary.FileWriter( os.path.join(log_output_path, "val")) line_buffer = 1 config = tf.ConfigProto() config.gpu_options.allow_growth = True config = config with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(max_to_keep=1) train_timer = Timer() load_timer = Timer() # if model checkpoint exist, then load last checkpoint #self._load_model(saver, sess, model_output_path) with open(file=loss_txt_path + '/loss_' + cfg.name + str(fold_num) + '.txt', mode='w', buffering=line_buffer) as loss_log: for step in range(self.train_step): if cfg.use_dst_weight == True: load_timer.tic() image, label, weights = self.provider.get_train_value( with_weight=cfg.use_dst_weight) image_val, label_val, val_weights = self.provider.get_val_value( with_weight=cfg.use_dst_weight) load_timer.toc() train_timer.tic() train_merge, train_loss, _, train_acc = sess.run( [merged, loss_op, train_op, acc_op], feed_dict={ train_holder: image, seg_holder: label, dst_holder: weights }) valid_merge, val_loss, val_acc = sess.run( [merged, loss_op, acc_op], feed_dict={ train_holder: image_val, seg_holder: label_val, dst_holder: val_weights, self.is_training: False }) train_timer.toc() else: load_timer.tic() image, label = self.provider.get_train_value( with_weight=cfg.use_dst_weight) image_val, label_val = self.provider.get_val_value( with_weight=cfg.use_dst_weight) load_timer.toc() train_timer.tic() train_merge, train_loss, _, train_acc = sess.run( [merged, loss_op, train_op, acc_op], feed_dict={ train_holder: image, seg_holder: label }) valid_merge, val_loss, val_acc = sess.run( [merged, loss_op, acc_op], feed_dict={ train_holder: image_val, seg_holder: label_val, self.is_training: False }) train_timer.toc() #if val_loss < self.min_valid_loss: #self.min_valid_loss = val_loss #saver.save(sess, os.path.join(self.output_path, "model/model_%d_%.6f"%(fold_num,self.min_valid_loss))) if np.mod(step + 1, self.save_interval) == 0: #saver_final = tf.train.Saver(max_to_keep=1) saver.save( sess, os.path.join(self.output_path, "model/model_saved_%d" % fold_num)) #saver_final.save(sess, os.path.join(self.output_path, "model_final/model_saved_%d"%fold_num)) '''train_merge, train_loss, t_dice_loss, t_weight_loss, m_dice_loss, m_weight_loss,_ = sess.run([merged, loss_op, total_dice_loss, total_weight_loss, main_dice_loss, main_weight_loss,train_op], feed_dict={train_holder: image, seg_holder: label})''' '''train_merge, train_loss, t_dice_loss, t_focal_loss, m_dice_loss, m_focal_loss, _ = sess.run( [merged, loss_op, total_dice_loss, total_focal_loss, main_dice_loss, main_focal_loss, train_op], feed_dict={train_holder: image, seg_holder: label})''' '''train_merge, train_loss, t_dice_loss, m_dice_loss, _ = sess.run( [merged, loss_op, total_dice_loss, main_dice_loss, train_op], feed_dict={train_holder: image, seg_holder: label})''' '''output_format = '[Epoch]%d, Speed: %.3fs/iter,Load: %.3fs/iter, Remain: %s' \ ' train_loss: %.8f, valid_loss: %.8f\n' \ '[Loss]dice_loss: %.8f,main_dice_loss: %.8f \n' \ % (step, train_timer.average_time, load_timer.average_time, train_timer.remain(step, self.train_step), train_loss, val_loss, t_dice_loss, m_dice_loss)''' '''output_format = '[Epoch]%d, Speed: %.3fs/iter,Load: %.3fs/iter, Remain: %s' \ ' train_loss: %.8f, valid_loss: %.8f\n' \ '[Loss]dice_loss: %.8f, focal_loss: %.8f, main_dice_loss: %.8f, main_focal_loss: %.8f\n' \ % (step, train_timer.average_time, load_timer.average_time, train_timer.remain(step,self.train_step),train_loss, val_loss, t_dice_loss, t_focal_loss, m_dice_loss, m_focal_loss)''' '''output_format = 'Epoch:%d,Speed: %.3fs/iter,Load: %.3fs/iter,Remain: %s\n'\ 'train_loss: %.8f,valid_loss: %.8f,main_dice_loss: %.8f,main_weight_loss: %.8f'\ % (step, train_timer.average_time, load_timer.average_time, train_timer.remain(step, self.train_step), train_loss, val_loss, m_dice_loss, m_weight_loss)''' '''output_format = '[Epoch]%d, Speed: %.3fs/iter,Load: %.3fs/iter, Remain: %s' \ ' train_loss: %.8f, valid_loss: %.8f\n' \ '[Loss] main_jacc_loss: %.8f, auxi_jacc_loss: %.8f\n' \ % (step, train_timer.average_time, load_timer.average_time, train_timer.remain(step, self.train_step), train_loss, val_loss, main_jacc_loss, auxi_jacc_loss)''' output_format = "train loss: %f, valid loss: %f, train accuracy: %f, val accuracy: %f, step: %d" % \ (train_loss, val_loss, train_acc, val_acc, step) print(output_format) train_writer.add_summary(train_merge, step) test_writer.add_summary(valid_merge, step) if step % 5 == 0: loss_log.write(output_format + '\n') #if np.mod(step + 1, self.save_interval) == 0: #saver.save(sess, os.path.join(self.output_path, "model/model_saved_%d"%fold_num)) train_writer.close() test_writer.close()
def training(self, sess): with tf.name_scope('loss_cube'): cube_score = self.network.cube_score cube_label = self.network.cube_label if self.arg.focal_loss: alpha = [1.0, 1.0] gamma = 2 cube_probi = tf.nn.softmax(cube_score) tmp = tf.one_hot(cube_label, depth=2) * ( (1 - cube_probi)** gamma) * tf.log([cfg.EPS, cfg.EPS] + cube_probi) * alpha cube_cross_entropy = tf.reduce_mean( -tf.reduce_sum(tmp, axis=1)) else: cube_probi = tf.nn.softmax(cube_score) # use for debug tmp = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=cube_score, labels=cube_label) cube_cross_entropy = tf.reduce_mean(tmp) loss = cube_cross_entropy with tf.name_scope('train_op'): global_step = tf.Variable(1, trainable=False, name='Global_Step') lr = tf.train.exponential_decay(self.arg.lr, global_step, 1000, 0.90, name='decay-Lr') train_op = tf.train.MomentumOptimizer(lr, momentum=0.9).minimize( loss, global_step=global_step) with tf.name_scope('train_cubic'): extractor_int = self.network.extractor_int extractor_float = self.network.extractor_weighs_float extractor_outs = self.network.extractor_outs #(160, 30, 30, 15, 32) # extractor_F_grad = tf.gradients(loss, extractor_float) # extractor_Int_grad = tf.gradients(loss, extractor_int) # conv1_grad = tf.gradients(loss, self.network.conv1) # conv2_grad = tf.gradients(loss, self.network.conv2) # conv3_grad = tf.gradients(loss, self.network.conv3) # fc1_grad = tf.gradients(loss, self.network.fc1) # fc2_grad = tf.gradients(loss, self.network.fc2) watch_data_idx = 0 inputs_cube = tf.reshape( tf.reduce_sum(tf.squeeze( self.network.cube_input[watch_data_idx, ...]), axis=-1, keep_dims=True), [-1, 30, 30, 1]) tf.summary.image('extractor_int', tf.reshape(extractor_int, [1, 27, -1, 1])) data0_kernel0_outs = tf.transpose( tf.reshape(extractor_outs[0, :, :, 2, :], [1, 30, 30, -1]), [3, 1, 2, 0]) data0_kernel1_outs = tf.transpose( tf.reshape(extractor_outs[1, :, :, 2, :], [1, 30, 30, -1])) data0_kernel2_outs = tf.transpose( tf.reshape(extractor_outs[2, :, :, 2, :], [1, 30, 30, -1])) data0_kernel3_outs = tf.transpose( tf.reshape(extractor_outs[3, :, :, 2, :], [1, 30, 30, -1])) tf.summary.image('extractor_inputs_cube', inputs_cube) tf.summary.image('extractor_outs1', data0_kernel0_outs, max_outputs=50) # tf.summary.image('extractor_outs2', data0_kernel1_outs,max_outputs=50) # tf.summary.image('extractor_outs3', data0_kernel2_outs,max_outputs=50) # tf.summary.image('extractor_outs2', data0_kernel3_outs,max_outputs=50) # tf.summary.image('extractor_two', tf.reshape(tf.transpose(extractor_int),[32,9,3,1])) # tf.summary.image('extractor_float', tf.reshape(extractor_float, [-1, 27, 32, 1])) # tf.summary.image('conv1_kernel', tf.reshape(self.network.conv1[0], [-1, 27, 32, 1]), max_outputs=3) # tf.summary.image('conv2_kernel', tf.reshape(self.network.conv2[0], [-1, 27, 64, 1]), max_outputs=3) # tf.summary.image('conv3_kernel', tf.reshape(self.network.conv3[0], [-1, 27, 128, 1]), max_outputs=3) # # tf.summary.histogram('float_grad', extractor_F_grad) # tf.summary.histogram('Int_grad', extractor_Int_grad) # tf.summary.histogram('conv1_grad', conv1_grad[0]) # tf.summary.histogram('conv2_grad', conv2_grad[0]) # tf.summary.histogram('conv3_grad', conv3_grad[0]) # tf.summary.histogram('fc1_grad', fc1_grad[0]) # tf.summary.histogram('fc2_grad', fc2_grad[0]) tf.summary.scalar('total_loss', loss) glb_var = tf.global_variables() # for var in glb_var: # tf.summary.histogram(var.name, var) merged_op = tf.summary.merge_all() with tf.name_scope('valid_cubic'): epoch_cubic_recall = tf.placeholder(dtype=tf.float32) cubic_recall_smy_op = tf.summary.scalar('cubic_recall', epoch_cubic_recall) epoch_cubic_precise = tf.placeholder(dtype=tf.float32) cubic_precise_smy_op = tf.summary.scalar('cubic_precise', epoch_cubic_precise) epoch_extractor_occupy = tf.placeholder(dtype=tf.float32) cubic_occupy_smy_op = tf.summary.scalar('extractor_occupy', epoch_extractor_occupy) valid_summary_op = tf.summary.merge([ cubic_recall_smy_op, cubic_precise_smy_op, cubic_occupy_smy_op ]) with tf.name_scope('load_weights'): sess.run(tf.global_variables_initializer()) if self.arg.weights is not None: self.network.load_weigths(self.arg.weights, sess, self.saver) print 'Loading pre-trained model weights from {:s}'.format( red(self.arg.weights)) else: print 'The network will be {} from default initialization!'.format( yellow('re-trained')) timer = Timer() if DEBUG: pass vispy_init() cube_label_gt = np.concatenate( (np.ones([self.arg.batch_size]), np.zeros([self.arg.batch_size ]))).astype(np.int32) train_epoch_cnt = int(self.dataset.train_positive_cube_cnt / self.arg.batch_size / 2) training_series = range( train_epoch_cnt) # range(train_epoch_cnt) # train_epoch_cnt for epo_cnt in range(self.arg.epoch_iters): for data_idx in training_series: iter = global_step.eval() timer.tic() series = self.train_series_Gen(self.arg.batch_size, 'train') data_batchP = self.dataset.get_minibatch(series[0], data_type='train', classify='positive') data_batchN = self.dataset.get_minibatch(series[1], data_type='train', classify='negative') data_batch = np.vstack((data_batchP, data_batchN)) timer.toc() time1 = timer.average_time timer.tic() if self.arg.use_aug_data_method: data_aug = self.cube_augmentation(data_batch, aug_data=True, DEBUG=False) else: data_aug = data_batch timer.toc() time2 = timer.average_time if DEBUG: a = data_batch[data_idx].sum() b = data_batch[data_idx].sum() if a != b: print 'There is some points loss' else: print 'points cnt: ', a box_np_view(data_aug[data_idx], data_aug[data_idx + self.arg.batch_size]) feed_dict = { self.network.cube_input: data_aug, self.network.cube_label: cube_label_gt, } timer.tic() extractor_outs_,extractor_int_, extractor_float_, cube_probi_, cube_label_, loss_, merge_op_, _ = \ sess.run([extractor_outs, extractor_int, extractor_float, cube_probi, cube_label, loss, merged_op, train_op], feed_dict=feed_dict) timer.toc() # print extractor_outs_.shape,"Look here!" if iter % 4 == 0: predict_result = cube_probi_.argmax(axis=1) one_train_hist = fast_hist(cube_label_gt, predict_result) occupy_part_pos = (extractor_int_.reshape( -1) == 1.0).astype(float).sum() / extractor_int_.size occupy_part_neg = (extractor_int_.reshape( -1) == -1.0).astype(float).sum() / extractor_int_.size print 'Training step: {:3d} loss: {:.4f} occupy: +{}% vs -{}% inference_time: {:.3f} '. \ format(iter, loss_, int(occupy_part_pos * 100), int(occupy_part_neg * 100), timer.average_time) # print(' class bg precision = {:.3f} recall = {:.3f}'.format( # (one_train_hist[0, 0] / (one_train_hist[0, 0] + one_train_hist[1, 0] + 1e-6)), # (one_train_hist[0, 0] / (one_train_hist[0, 0] + one_train_hist[0, 1] + 1e-6)))) print ' class car precision = {:.3f} recall = {:.3f}'.format( (one_train_hist[1, 1] / (one_train_hist[1, 1] + one_train_hist[0, 1] + 1e-6)), (one_train_hist[1, 1] / (one_train_hist[1, 1] + one_train_hist[1, 0] + 1e-6))), '\n' if socket.gethostname() == "szstdzcp0325" and False: with self.printoptions(precision=2, suppress=False, linewidth=10000): print 'scores: {}'.format(cube_probi_[:, 1]) print 'divine:', str(predict_result) print 'labels:', str(cube_label_), '\n' if iter % 1 == 0 and cfg.TRAIN.TENSORBOARD: pass self.writer.add_summary(merge_op_, iter) if (iter % 3000 == 0 and cfg.TRAIN.DEBUG_TIMELINE) or iter == 200: if socket.gethostname() == "szstdzcp0325": run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() _ = sess.run([cube_score], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) # chrome://tracing trace = timeline.Timeline( step_stats=run_metadata.step_stats) trace_file = open( cfg.LOG_DIR + '/' + 'training-step-' + str(iter).zfill(7) + '.ctf.json', 'w') trace_file.write( trace.generate_chrome_trace_format( show_memory=False)) trace_file.close() if epo_cnt % 10 == 0 and cfg.TRAIN.EPOCH_MODEL_SAVE: pass self.snapshot(sess, epo_cnt) if cfg.TRAIN.USE_VALID: with tf.name_scope('valid_cubic_' + str(epo_cnt + 1)): print 'Valid the net at the end of epoch_{} ...'.format( epo_cnt + 1) hist = np.zeros((cfg.NUM_CLASS, cfg.NUM_CLASS), dtype=np.float32) valid_epoch_cnt = int( self.dataset.valid_positive_cube_cnt / self.arg.batch_size / 2) for data_idx in range(valid_epoch_cnt): series = self.train_series_Gen(self.arg.batch_size, 'valid') data_batchP = self.dataset.get_minibatch( series[0], data_type='valid', classify='positive') data_batchN = self.dataset.get_minibatch( series[1], data_type='valid', classify='negative') data_batch = np.vstack((data_batchP, data_batchN)) feed_dict_ = { self.network.cube_input: data_batch, self.network.cube_label: cube_label_gt, } valid_cls_score_ = sess.run(cube_score, feed_dict=feed_dict_) valid_result = valid_cls_score_.argmax(axis=1) one_hist = fast_hist(cube_label_gt, valid_result) hist += one_hist if cfg.TRAIN.VISUAL_VALID: print 'Valid step: {:d}/{:d}'.format( data_idx + 1, valid_epoch_cnt) print( ' class bg precision = {:.3f} recall = {:.3f}' .format( (one_hist[0, 0] / (one_hist[0, 0] + one_hist[1, 0] + 1e-6)), (one_hist[0, 0] / (one_hist[0, 0] + one_hist[0, 1] + 1e-6))) ) print( ' class car precision = {:.3f} recall = {:.3f}' .format( (one_hist[1, 1] / (one_hist[1, 1] + one_hist[0, 1] + 1e-6)), (one_hist[1, 1] / (one_hist[1, 1] + one_hist[1, 0] + 1e-6))) ) if data_idx % 20 == 0 and cfg.TRAIN.TENSORBOARD: pass # train_writer.add_summary(valid_result_, data_idx/20+epo_cnt*1000) valid_extractor_int_ = sess.run(extractor_int) extractor_occupy = valid_extractor_int_.sum( ) / valid_extractor_int_.size precise_total = hist[1, 1] / (hist[1, 1] + hist[0, 1] + 1e-6) recall_total = hist[1, 1] / (hist[1, 1] + hist[1, 0] + 1e-6) valid_res = sess.run(valid_summary_op, feed_dict={ epoch_cubic_recall: recall_total, epoch_cubic_precise: precise_total, epoch_extractor_occupy: extractor_occupy }) self.writer.add_summary(valid_res, epo_cnt + 1) print 'Validation of epoch_{}: cubic_precision = {:.3f} cubic_recall = {:.3f}' \ .format(epo_cnt + 1, precise_total, recall_total) self.shuffle_series() print yellow('Training process has done, enjoy every day !')
def training(self, sess, train_writer): with tf.name_scope('loss_cubic'): cubic_cls_score = tf.reshape(self.net.get_output('cubic_cnn'), [-1, 2]) cubic_cls_labels = tf.reshape( tf.cast(self.net.get_output('rpn_rois')[:, -2], tf.int64), [-1]) if not cfg.TRAIN.FOCAL_LOSS: cubic_cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=cubic_cls_score, labels=cubic_cls_labels)) else: # alpha = [0.75,0.25] # 0.25 for label=1 gamma = 2 cubic_cls_probability = tf.nn.softmax(cubic_cls_score) # formula : Focal Loss for Dense Object Detection: FL(p)= -((1-p)**gama)*log(p) cubic_cross_entropy = tf.reduce_mean(-tf.reduce_sum( tf.one_hot(cubic_cls_labels, depth=2) * ((1 - cubic_cls_probability)**gamma) * tf.log([cfg.EPS, cfg.EPS] + cubic_cls_probability), axis=1)) loss = cubic_cross_entropy with tf.name_scope('train_op'): global_step = tf.Variable(1, trainable=False, name='Global_Step') lr = tf.train.exponential_decay(cfg.TRAIN.LEARNING_RATE, global_step, 10000, 0.996, name='decay-Lr') train_op = tf.train.AdamOptimizer(lr).minimize( loss, global_step=global_step) with tf.name_scope('train_cubic'): tf.summary.scalar('total_loss', loss) # bv_anchors = self.net.get_output('rpn_anchors_label')[2] # roi_bv = self.net.get_output('rpn_rois')[0] # data_bv = self.net.lidar_bv_data # data_gt = self.net.gt_boxes_bv # image_rpn = tf.reshape(show_rpn_tf(data_bv, data_gt, bv_anchors, roi_bv), (1, 601, 601, -1)) # tf.summary.image('lidar_bv_test', image_rpn) glb_var = tf.global_variables() for i in range(len(glb_var)): # print glb_var[i].name if 'moving' not in str(glb_var[i].name): if 'Adam' not in str(glb_var[i].name): if 'weights' not in str(glb_var[i].name): if 'rpn' not in str(glb_var[i].name): if 'biases' not in str(glb_var[i].name): if 'beta' not in str(glb_var[i].name): if 'gamma' not in str(glb_var[i].name): if 'batch' not in str( glb_var[i].name): tf.summary.histogram( glb_var[i].name, glb_var[i]) merged = tf.summary.merge_all() with tf.name_scope('valid_cubic'): epoch_rpn_recall = tf.placeholder(dtype=tf.float32) rpn_recall_smy_op = tf.summary.scalar('rpn_recall', epoch_rpn_recall) epoch_cubic_recall = tf.placeholder(dtype=tf.float32) cubic_recall_smy_op = tf.summary.scalar('cubic_recall', epoch_cubic_recall) epoch_cubic_precise = tf.placeholder(dtype=tf.float32) cubic_prec_smy_op = tf.summary.scalar('cubic_precise', epoch_cubic_precise) sess.run(tf.global_variables_initializer()) if self.args.fine_tune: if True: # #full graph restore print 'Loading pre-trained model weights from {:s}'.format( self.args.weights) self.net.load(self.args.weights, sess, self.saver, True) else: # #part graph restore # # METHOD one # ref_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,scope=['vgg_feat_fc']) # saver1 = tf.train.Saver(ref_vars) # saver1.restore(sess, self.args.weights) # # METHOD two reader = pywrap_tensorflow.NewCheckpointReader( self.args.weights) var_to_shape_map = reader.get_variable_to_shape_map() with tf.variable_scope('', reuse=tf.AUTO_REUSE) as scope: for key in var_to_shape_map: try: var = tf.get_variable(key, trainable=False) sess.run(var.assign(reader.get_tensor(key))) print " Assign pretrain model: " + key except ValueError: print " Ignore variable:" + key trainable_var_for_chk = tf.trainable_variables( ) #tf.get_collection(ops.GraphKeys.TRAINABLE_VARIABLES) print 'Variables to training: ', trainable_var_for_chk timer = Timer() rpn_rois = self.net.get_output('rpn_rois') cubic_grid = self.net.get_output('cubic_grid') cubic_cnn = self.net.get_output('cubic_cnn') if DEBUG: vispy_init( ) # TODO: Essential step(before sess.run) for using vispy beacuse of the bug of opengl or tensorflow # vision_qt = Process(target=pcd_vispy_client, args=(MSG_QUEUE,)) # vision_qt.start() # print 'Process vision_qt started ...' training_series = range(self.epoch) # self.epoch for epo_cnt in range(self.args.epoch_iters): for data_idx in training_series: # DO NOT EDIT the "training_series",for the latter shuffle iter = global_step.eval( ) # function "minimize()"will increase global_step blobs = self.dataset.get_minibatch(data_idx, 'train') # get one batch feed_dict = { self.net.lidar3d_data: blobs['lidar3d_data'], self.net.gt_boxes_3d: blobs['gt_boxes_3d'] } run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() timer.tic() cubic_cls_score_, cubic_cls_labels_, rpn_rois_, cubic_cnn_, cubic_grid_, loss_, merged_, _ = sess.run( [ cubic_cls_score, cubic_cls_labels, rpn_rois, cubic_cnn, cubic_grid, loss, merged, train_op ], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) timer.toc() cubic_result = cubic_cls_score_.argmax(axis=1) one_hist = fast_hist(cubic_cls_labels_, cubic_result) cubic_car_cls_prec = one_hist[1, 1] / (one_hist[1, 1] + one_hist[0, 1] + 1e-5) cubic_car_cls_recall = one_hist[1, 1] / (one_hist[1, 1] + one_hist[1, 0] + 1e-5) if iter % 1000 == 0 and cfg.TRAIN.DEBUG_TIMELINE: #chrome://tracing trace = timeline.Timeline( step_stats=run_metadata.step_stats) trace_file = open( cfg.LOG_DIR + '/' + 'training-StiData-step-' + str(iter).zfill(7) + '.ctf.json', 'w') trace_file.write( trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() if iter % cfg.TRAIN.ITER_DISPLAY == 0: print 'Iter: %d / %d, loss: %.3f' % ( iter, self.args.epoch_iters * self.epoch, loss_, ) print 'Cubic classify precise: {:.3f} recall: {:.3f}'.format( cubic_car_cls_prec, cubic_car_cls_recall) print 'Speed: {:.3f}s / iter'.format(timer.average_time) print 'divine: ', cubic_result print 'labels: ', cubic_cls_labels_ if iter % 10 == 0 and cfg.TRAIN.TENSORBOARD: train_writer.add_summary(merged_, iter) pass if iter % cfg.TRAIN.SNAPSHOT_ITERS == 0: self.snapshot(sess, iter) pass if DEBUG: scan = blobs['lidar3d_data'] gt_box3d = blobs['gt_boxes_3d'][:, (0, 1, 2, 3, 4, 5, 6)] gt_box3d = np.hstack( (gt_box3d, np.ones([gt_box3d.shape[0], 2]) * 4)) pred_boxes = np.hstack( (rpn_rois_, cubic_result.reshape(-1, 1) * 2)) bbox = np.vstack((pred_boxes, gt_box3d)) # msg = msg_qt(scans=scan, boxes=bbox,name='CubicNet training') # MSG_QUEUE.put(msg) pcd_vispy(scan, boxes=bbox, name='CubicNet training') random.shuffle(training_series) # shuffle the training series if cfg.TRAIN.USE_VALID: with tf.name_scope('valid_cubic_' + str(epo_cnt + 1)): print 'Valid the net at the end of epoch_{} ...'.format( epo_cnt + 1) # roi_bv = self.net.get_output('rpn_rois')[0] # bv_anchors = self.net.get_output('rpn_anchors_label')[2] # pred_rpn_ = show_rpn_tf(self.net.lidar_bv_data, self.net.gt_boxes_bv, bv_anchors, roi_bv) # pred_rpn = tf.reshape(pred_rpn_,(1, 601, 601, -1)) # predicted_bbox = tf.summary.image('predict_bbox_bv', pred_rpn) # valid_result = tf.summary.merge([predicted_bbox]) recalls = self.net.get_output('rpn_rois')[2] pred_tp_cnt, gt_cnt = 0., 0. hist = np.zeros((cfg.NUM_CLASS, cfg.NUM_CLASS), dtype=np.float32) for data_idx in range(self.val_epoch): # self.val_epoch blobs = self.dataset.get_minibatch(data_idx, 'valid') feed_dict_ = { self.net.lidar3d_data: blobs['lidar3d_data'], self.net.lidar_bv_data: blobs['lidar_bv_data'], self.net.im_info: blobs['im_info'], self.net.keep_prob: 0.5, self.net.gt_boxes_bv: blobs['gt_boxes_bv'], self.net.gt_boxes_3d: blobs['gt_boxes_3d'], self.net.gt_boxes_corners: blobs['gt_boxes_corners'], self.net.calib: blobs['calib'] } cubic_cls_score_, cubic_cls_labels_, recalls_ = sess.run( [cubic_cls_score, cubic_cls_labels, recalls], feed_dict=feed_dict_) # train_writer.add_summary(valid, data_idx) pred_tp_cnt = pred_tp_cnt + recalls_[1] gt_cnt = gt_cnt + recalls_[2] cubic_class = cubic_cls_score_.argmax(axis=1) one_hist = fast_hist(cubic_cls_labels_, cubic_class) if not math.isnan(one_hist[1, 1] / (one_hist[1, 1] + one_hist[0, 1])): if not math.isnan( one_hist[1, 1] / (one_hist[1, 1] + one_hist[1, 0])): hist += one_hist if cfg.TRAIN.VISUAL_VALID: print 'Valid step: {:d}/{:d} , rpn recall = {:.3f}'\ .format(data_idx + 1,self.val_epoch,float(recalls_[1]) / recalls_[2]) print( ' class bg precision = {:.3f} recall = {:.3f}' .format((one_hist[0, 0] / (one_hist[0, 0] + one_hist[1, 0])), (one_hist[0, 0] / (one_hist[0, 0] + one_hist[0, 1])))) print( ' class car precision = {:.3f} recall = {:.3f}' .format((one_hist[1, 1] / (one_hist[1, 1] + one_hist[0, 1])), (one_hist[1, 1] / (one_hist[1, 1] + one_hist[1, 0])))) precise_total = hist[1, 1] / (hist[1, 1] + hist[0, 1]) recall_total = hist[1, 1] / (hist[1, 1] + hist[1, 0]) recall_rpn = pred_tp_cnt / gt_cnt valid_summary = tf.summary.merge([ rpn_recall_smy_op, cubic_recall_smy_op, cubic_prec_smy_op ]) valid_res = sess.run(valid_summary, feed_dict={ epoch_rpn_recall: recall_rpn, epoch_cubic_recall: recall_total, epoch_cubic_precise: precise_total }) train_writer.add_summary(valid_res, epo_cnt + 1) print 'Validation of epoch_{}: rpn_recall {:.3f} cubic_precision = {:.3f} cubic_recall = {:.3f}'\ .format(epo_cnt + 1,recall_rpn,precise_total,recall_total) self.snapshot(sess, iter, final=True) print 'Training process has done, enjoy every day !'
def processor(self, sess, train_writer): with tf.name_scope('test_debug_board'): res_map = tf.cast(tf.reshape( tf.argmax(self.net.predicted_map, axis=3), [-1, 640, 640, 1]), dtype=tf.float32) cnt = tf.shape(self.net.coordinate)[0] updates = tf.ones([cnt], dtype=tf.float32) input_map = tf.reshape( tf.scatter_nd(self.net.coordinate, updates, shape=[640, 640]), (-1, 640, 640, 1)) apollo_feature = tf.transpose(self.net.apollo_8feature, perm=[3, 1, 2, 0]) tf.summary.image('ApolloFeature', apollo_feature, max_outputs=8) tf.summary.image('Input_Data', input_map) tf.summary.image('Pred-Map', res_map) merged = tf.summary.merge_all() # hxd: before the next summary ops sess.run(tf.global_variables_initializer()) print 'Loading pre-trained model weights from {:s}'.format( self.args.weights) self.net.load_weigths(self.args.weights, sess, self.saver) timer = Timer() training_series = range(self.epoch) for data_idx in training_series: # DO NOT EDIT the "training_series",for the latter shuffle blobs = self.dataset.get_minibatch(data_idx) # get one batch feed_dict = { # self.net.pc_input: blobs['lidar3d_data'], self.net.voxel_feature: blobs['grid_stack'], self.net.coordinate: blobs['coord_stack'], self.net.number: blobs['ptsnum_stack'], self.net.apollo_8feature: blobs['apollo_8feature'], } run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() timer.tic() res_map_, merged_ = sess.run([res_map, merged], feed_dict=feed_dict, options=run_options, run_metadata=run_metadata) timer.toc() self.save_pred_as_npy(res_map_, save_path=cfg.TEST_RESULT, folder=SUFFIX, idx_=data_idx) if data_idx % cfg.TEST.ITER_DISPLAY == 0: print 'Iter: %d/%d, Serial_num: %s, Speed: %.3fs/iter' % ( data_idx, self.epoch, blobs['serial_num'], timer.average_time) print 'Loading pcd use: {:.3}s, and generating voxel points use: {:.3}s'.format( blobs['voxel_gen_time'][0], blobs['voxel_gen_time'][1]) if data_idx % 1 == 0 and cfg.TEST.TENSORBOARD: train_writer.add_summary(merged_, data_idx) pass if (data_idx + 1) % 200 == 0 and cfg.TEST.DEBUG_TIMELINE: # chrome://tracing trace = timeline.Timeline(step_stats=run_metadata.step_stats) trace_file = open( cfg.LOG_DIR + '/' + 'testing-step-' + str(data_idx).zfill(7) + '.ctf.json', 'w') trace_file.write( trace.generate_chrome_trace_format(show_memory=False)) trace_file.close() print 'Training process has done, enjoy every day !'