def train(self, sess, data_gen): start_time = time.time() # continues until no more training data is generated losses, batch, acc, s_losses = 0, 0, 0, 0 pbar = tqdm(total=1434) while data_gen.has_data(): x_batch, bbox_batch, y_batch = data_gen.get_batch( config.batch_size) # runs network on batch _, loss, s_loss, preds = sess.run( [ self.train_op, self.class_loss, self.segmentation_loss, self.digit_preds ], feed_dict={ self.x_input: x_batch, self.y_input: y_batch, self.m: self.cur_m, self.is_train: True, self.y_bbox: bbox_batch }) pbar.update() # accumulates loses and accuracies acc += np.count_nonzero( np.argmax(preds, axis=1) == np.array( y_batch)) / config.batch_size losses += loss s_losses += s_loss batch += 1 if np.isnan(preds[0][0]): print(preds[0][:10]) print('NAN encountered.') config.write_output('NAN encountered.\n') return -1, -1, -1 # prints the loss and accuracy statistics after a certain number of batches if batch % config.batches_until_print == 0: print( preds[0][:10] ) # prints activations just in case of numerical instability print( 'Finished %d batches. %d(s) since start. Avg Classification Loss is %.4f. ' 'Avg Segmentation Loss is %.4f. Accuracy is %.4f.' % (batch, time.time() - start_time, losses / batch, s_losses / batch, acc / batch)) print(preds[0][:10]) pbar.close() print( 'Epoch finished in %d(s). Avg Classification loss is %.4f. Avg Segmentation Loss is %.4f. ' 'Accuracy is %.4f.' % (time.time() - start_time, losses / batch, s_losses / batch, acc / batch)) return losses / batch, s_losses / batch, acc / batch
def train_network(gpu_config): capsnet = Caps3d() with tf.Session(graph=capsnet.graph, config=gpu_config) as sess: tf.global_variables_initializer().run() get_num_params() config.clear_output() n_eps_after_acc, best_loss = -1, 100000 print('Training on UCF101') for ep in range(1, config.n_epochs + 1): print(20 * '*', 'epoch', ep, 20 * '*') # trains network for one epoch data_gen = TrainDataGen(config.wait_for_data, frame_skip=config.frame_skip) margin_loss, seg_loss, acc = capsnet.train(sess, data_gen) config.write_output('CL: %.4f. SL: %.4f. Acc: %.4f\n' % (margin_loss, seg_loss, acc)) # increments the margin if ep % config.n_eps_for_m == 0: capsnet.cur_m += config.m_delta capsnet.cur_m = min(capsnet.cur_m, 0.9) # only validates after a certain number of epochs and when the training accuracy is greater than a threshold # this is mainly used to save time, since validation takes about 10 minutes if (acc >= config.acc_for_eval or n_eps_after_acc >= 0) and ep >= config.n_eps_until_eval: n_eps_after_acc += 1 # validates the network if (acc >= config.acc_for_eval and n_eps_after_acc % config.n_eps_for_eval == 0) or ep == config.n_epochs: data_gen = TestDataGen(config.wait_for_data, frame_skip=1) margin_loss, seg_loss, accuracy, _ = capsnet.eval( sess, data_gen, validation=True) config.write_output( 'Validation\tCL: %.4f. SL: %.4f. Acc: %.4f.\n' % (margin_loss, seg_loss, accuracy)) # saves the network when validation loss in minimized t_loss = margin_loss + seg_loss if t_loss < best_loss: best_loss = t_loss try: capsnet.save(sess, config.save_file_name) config.write_output('Saved Network\n') except: print('Failed to save network!!!') # calculate final test accuracy, f-mAP, and v-mAP iou()
def train_one_epoch(sess, capsnet, data_gen, epoch): start_time = time.time() # continues until no more training data is generated batch, s_losses, seg_acc, reg_losses = 0.0, 0, 0, 0 while data_gen.has_data(): x_batch, seg_batch, crop1_batch, crop2_batch = data_gen.get_batch( config.batch_size) if config.multi_gpu and len(x_batch) == 1: print('Batch size of one, not running') continue n_samples = len(x_batch) use_gt_seg = epoch <= config.n_epochs_for_gt_seg use_gt_crop = epoch <= config.n_epochs_for_gt_crop hr_lstm_input = np.zeros( (n_samples, config.hr_lstm_size[0], config.hr_lstm_size[1], config.hr_lstm_feats)) lr_lstm_input = np.zeros( (n_samples, config.lr_lstm_size[0], config.lr_lstm_size[1], config.lr_lstm_feats)) outputs = sess.run( [ capsnet.train_op, capsnet.segmentation_loss, capsnet.pred_caps, capsnet.seg_acc, capsnet.regression_loss ], feed_dict={ capsnet.x_input_video: x_batch, capsnet.y_segmentation: seg_batch, capsnet.hr_cond_input: hr_lstm_input, capsnet.lr_cond_input: lr_lstm_input, capsnet.use_gt_seg: use_gt_seg, capsnet.use_gt_crop: use_gt_crop, capsnet.gt_crops1: crop1_batch, capsnet.gt_crops2: crop2_batch }) _, s_loss, cap_vals, s_acc, reg_loss = outputs s_losses += s_loss seg_acc += s_acc reg_losses += reg_loss batch += 1 if np.isnan(cap_vals[0][0]): print(cap_vals[0][:10]) print('NAN encountered.') config.write_output('NAN encountered.\n') return -1, -1, -1 if batch % config.batches_until_print == 0: print( 'Finished %d batches. %d(s) since start. Avg Segmentation Loss is %.4f. Avg Regression Loss is %.4f. ' 'Seg Acc is %.4f.' % (batch, time.time() - start_time, s_losses / batch, reg_losses / batch, seg_acc / batch)) sys.stdout.flush() print( 'Finish Epoch in %d(s). Avg Segmentation Loss is %.4f. Avg Regression Loss is %.4f. Seg Acc is %.4f.' % (time.time() - start_time, s_losses / batch, reg_losses / batch, seg_acc / batch)) sys.stdout.flush() return s_losses / batch, reg_losses / batch, seg_acc / batch
def train_network(gpu_config): capsnet = CapsNet() with tf.Session(graph=capsnet.graph, config=gpu_config) as sess: tf.global_variables_initializer().run() get_num_params() if config.start_at_epoch <= 1: config.clear_output() else: capsnet.load( sess, config.save_file_best_name % (config.start_at_epoch - 1)) print('Loading from epoch %d.' % (config.start_at_epoch - 1)) best_loss = 1000000 best_epoch = 1 print('Training on YoutubeVOS') for ep in range(config.start_at_epoch, config.n_epochs + 1): print(20 * '*', 'epoch', ep, 20 * '*') sys.stdout.flush() # Trains network for 1 epoch nan_tries = 0 while nan_tries < 3: data_gen = TrainDataGen(config.wait_for_data, crop_size=config.hr_frame_size, n_frames=config.n_frames, rand_frame_skip=config.rand_frame_skip, multi_objects=config.multiple_objects) seg_loss, reg_loss, seg_acc = train_one_epoch( sess, capsnet, data_gen, ep) if seg_loss < 0 or seg_acc < 0: nan_tries += 1 capsnet.load(sess, config.save_file_best_name % best_epoch) # loads in the previous epoch while data_gen.has_data(): data_gen.get_batch(config.batch_size) else: config.write_output( 'Epoch %d: SL: %.4f. RL: %.4f. SegAcc: %.4f.\n' % (ep, seg_loss, reg_loss, seg_acc)) break if nan_tries == 3: print('Network cannot be trained. Too many NaN issues.') exit() # Validates network data_gen = ValidDataGen(config.wait_for_data, crop_size=config.hr_frame_size, n_frames=config.n_frames) seg_loss, seg_acc = validate(sess, capsnet, data_gen) config.write_output('Validation\tSL: %.4f. SA: %.4f.\n' % (seg_loss, seg_acc)) # saves every 10 epochs if ep % config.save_every_n_epochs == 0: try: capsnet.save(sess, config.save_file_name % ep) config.write_output('Saved Network\n') except: print('Failed to save network!!!') sys.stdout.flush() # saves when validation loss becomes smaller (after 50 epochs to save space) t_loss = seg_loss if t_loss < best_loss: best_loss = t_loss try: capsnet.save(sess, config.save_file_best_name % ep) best_epoch = ep config.write_output('Saved Network - Minimum val\n') except: print('Failed to save network!!!') sys.stdout.flush() tf.reset_default_graph()
def iou(): """ Calculates the accuracy, f-mAP, and v-mAP over the test set """ gpu_config = tf.ConfigProto() gpu_config.gpu_options.allow_growth = True capsnet = Caps3d() with tf.Session(graph=capsnet.graph, config=gpu_config) as sess: tf.global_variables_initializer().run() capsnet.load(sess, config.save_file_name) data_gen = TestDataGen(config.wait_for_data) n_correct, n_vids, n_tot_frames = 0, np.zeros( (config.n_classes, 1)), np.zeros((config.n_classes, 1)) frame_ious = np.zeros((config.n_classes, 20)) video_ious = np.zeros((config.n_classes, 20)) iou_threshs = np.arange(0, 20, dtype=np.float32) / 20 while data_gen.has_data(): video, bbox, label = data_gen.get_next_video() f_skip = config.frame_skip clips = [] n_frames = video.shape[0] for i in range(0, video.shape[0], 8 * f_skip): for j in range(f_skip): b_vid, b_bbox = [], [] for k in range(8): ind = i + j + k * f_skip if ind >= n_frames: b_vid.append( np.zeros((1, 112, 112, 3), dtype=np.float32)) b_bbox.append( np.zeros((1, 112, 112, 1), dtype=np.float32)) else: b_vid.append(video[ind:ind + 1, :, :, :]) b_bbox.append(bbox[ind:ind + 1, :, :, :]) clips.append((np.concatenate(b_vid, axis=0), np.concatenate(b_bbox, axis=0), label)) if np.sum(clips[-1][1]) == 0: clips.pop(-1) if len(clips) == 0: print('Video has no bounding boxes') continue batches, gt_segmentations = [], [] for i in range(0, len(clips), config.batch_size): x_batch, bb_batch, y_batch = [], [], [] for j in range(i, min(i + config.batch_size, len(clips))): x, bb, y = clips[j] x_batch.append(x) bb_batch.append(bb) y_batch.append(y) batches.append((x_batch, bb_batch, y_batch)) gt_segmentations.append(np.stack(bb_batch)) gt_segmentations = np.concatenate(gt_segmentations, axis=0) gt_segmentations = gt_segmentations.reshape( (-1, 112, 112, 1)) # Shape N_FRAMES, 112, 112, 1 segmentations, predictions = [], [] for x_batch, bb_batch, y_batch in batches: segmentation, pred = sess.run( [capsnet.segment_layer_sig, capsnet.digit_preds], feed_dict={ capsnet.x_input: x_batch, capsnet.y_input: y_batch, capsnet.m: 0.9, capsnet.is_train: False }) segmentations.append(segmentation) predictions.append(pred) predictions = np.concatenate(predictions, axis=0) predictions = predictions.reshape((-1, config.n_classes)) fin_pred = np.mean(predictions, axis=0) fin_pred = np.argmax(fin_pred) if fin_pred == label: n_correct += 1 pred_segmentations = np.concatenate(segmentations, axis=0) pred_segmentations = pred_segmentations.reshape((-1, 112, 112, 1)) pred_segmentations = (pred_segmentations >= 0.5).astype(np.int32) seg_plus_gt = pred_segmentations + gt_segmentations vid_inter, vid_union = 0, 0 # calculates f_map for i in range(gt_segmentations.shape[0]): frame_gt = gt_segmentations[i] if np.sum(frame_gt) == 0: continue n_tot_frames[label] += 1 inter = np.count_nonzero(seg_plus_gt[i] == 2) union = np.count_nonzero(seg_plus_gt[i]) vid_inter += inter vid_union += union i_over_u = inter / union for k in range(iou_threshs.shape[0]): if i_over_u >= iou_threshs[k]: frame_ious[label, k] += 1 n_vids[label] += 1 i_over_u = vid_inter / vid_union for k in range(iou_threshs.shape[0]): if i_over_u >= iou_threshs[k]: video_ious[label, k] += 1 if np.sum(n_vids) % 100 == 0: print('Finished %d videos' % np.sum(n_vids)) print('Accuracy:', n_correct / np.sum(n_vids)) config.write_output('Test Accuracy: %.4f\n' % float(n_correct / np.sum(n_vids))) fAP = frame_ious / n_tot_frames fmAP = np.mean(fAP, axis=0) vAP = video_ious / n_vids vmAP = np.mean(vAP, axis=0) print('IoU f-mAP:') config.write_output('IoU f-mAP:\n') for i in range(20): print(iou_threshs[i], fmAP[i]) config.write_output('%.4f\t%.4f\n' % (iou_threshs[i], fmAP[i])) config.write_output(str(fAP[:, 10]) + '\n') print(fAP[:, 10]) print('IoU v-mAP:') config.write_output('IoU v-mAP:\n') for i in range(20): print(iou_threshs[i], vmAP[i]) config.write_output('%.4f\t%.4f\n' % (iou_threshs[i], vmAP[i])) config.write_output(str(vAP[:, 10]) + '\n') print(vAP[:, 10])
def train_network(gpu_config): capsnet = Caps3d() with tf.compat.v1.Session(graph=capsnet.graph, config=gpu_config) as sess: tf.compat.v1.global_variables_initializer().run() get_num_params() if config.start_at_epoch <= 1: config.clear_output() else: capsnet.load(sess, config.save_file_name % (config.start_at_epoch - 1)) print('Loading from epoch %d.' % (config.start_at_epoch - 1)) n_eps_after_acc, best_loss = -1, 100000 print('Training on UCF101') for ep in range(config.start_at_epoch, config.n_epochs + 1): print(20 * '*', 'epoch', ep, 20 * '*') nan_tries = 0 while nan_tries < 3: # trains network for one epoch data_gen = TrainDataGen(config.wait_for_data, frame_skip=config.frame_skip) margin_loss, seg_loss, acc = capsnet.train(sess, data_gen) if margin_loss < 0 or acc < 0: nan_tries += 1 # capsnet.load(sess, config.save_file_name % 20) # loads in the previous epoch # while data_gen.has_data(): # data_gen.get_batch(config.batch_size) else: config.write_output('CL: %.4f. SL: %.4f. Acc: %.4f\n' % (margin_loss, seg_loss, acc)) break if nan_tries == 3: print('Network cannot be trained. Too many NaN issues.') exit() if ep % config.save_every_n_epochs == 0: try: capsnet.save(sess, config.save_file_name % ep) config.write_output('Saved Network\n') except: print('Failed to save network!!!') # increments the margin if ep % config.n_eps_for_m == 0: capsnet.cur_m += config.m_delta capsnet.cur_m = min(capsnet.cur_m, 0.9) # only validates after a certain number of epochs and when the training accuracy is greater than a threshold # this is mainly used to save time, since validation takes about 10 minutes if (acc >= config.acc_for_eval or n_eps_after_acc >= 0) and ep >= config.n_eps_until_eval: n_eps_after_acc += 1 # validates the network if (acc >= config.acc_for_eval and n_eps_after_acc % config.n_eps_for_eval == 0) or ep == config.n_epochs: # data_gen = TestDataGen(config.wait_for_data, frame_skip=1) # margin_loss, seg_loss, accuracy, _ = capsnet.eval(sess, data_gen, validation=True) # # config.write_output('Validation\tCL: %.4f. SL: %.4f. Acc: %.4f.\n' % # (margin_loss, seg_loss, accuracy)) # # # saves the network when validation loss in minimized # t_loss = margin_loss + seg_loss # if t_loss < best_loss: # best_loss = t_loss try: capsnet.save(sess, config.save_file_name % ep) config.write_output('Saved Network\n') except: print('Failed to save network!!!')