def test(opt): logger = Logger(opt) dataset = VISTDataset(opt) opt.vocab_size = dataset.get_vocab_size() opt.seq_length = dataset.get_story_length() dataset.test() test_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.workers) evaluator = Evaluator(opt, 'test') model = models.setup(opt) model.cuda() predictions, metrics = evaluator.test_story(model, dataset, test_loader, opt)
def test_model(self): self.output_dir = self.create_output_dir() save_root = os.path.join('results', self.cfg.EXP_DIR) save_dir = os.path.join(save_root, self.cfg.DATASET.DATASET) self.writer = SummaryWriter(log_dir=self.output_dir) previous = self.find_previous() iteration = 0 eval_args = { 'checkpoints_dir': save_root, 'K': self.cfg.MODEL.K, 'dataset': self.cfg.DATASET.DATASET, 'path': self.cfg.DATASET.DATASET_DIR, 'split': self.test_loader.dataset.split, 'redo': False, 'eval_mode': self.cfg.DATASET.INPUT_TYPE } if previous: for iteration, resume_checkpoint in zip(previous[0], previous[1]): if self.cfg.TEST.TEST_SCOPE[ 0] <= iteration <= self.cfg.TEST.TEST_SCOPE[1]: self.resume_checkpoint(resume_checkpoint) self.validation_loop(iteration, phase=self.cfg.PHASE, save=save_dir) ## Call evaluator object eval_args['eval_iter'] = iteration # eval_args['checkpoints_dir'] = os.path.join(save_dir, # self.test_loader.dataset.input_type + '-' + str(self.test_loader.dataset.split).zfill(2) + '-' + str( # iteration).zfill(6)) evaluator_ = Evaluator(munchify(eval_args)) evaluator_.normal_summarize() else: print("Loading pretrained checkpoint") if len(self.cfg.RESUME_CHECKPOINT) == 2: self.resume_checkpoint_separate(self.cfg.RESUME_CHECKPOINT, self.cfg.TRAIN.RESUME_SCOPE) elif len(self.cfg.RESUME_CHECKPOINT) == 1: self.resume_checkpoint_pretrained( self.model, self.cfg.RESUME_CHECKPOINT[0], self.cfg.TRAIN.RESUME_SCOPE) self.validation_loop(iteration, phase=self.cfg.PHASE, save=save_dir) ## Call evaluator object eval_args['eval_iter'] = iteration # eval_args['checkpoints_dir'] = os.path.join(save_dir, # self.test_loader.dataset.input_type + '-' + str(self.test_loader.dataset.split).zfill(2) + '-' + str( # iteration).zfill(6)) evaluator_ = Evaluator(munchify(eval_args)) evaluator_.normal_summarize()
def train(opt): logger = Logger(opt) flag = Flag(D_iters=opt.D_iter, G_iters=opt.G_iter, always=opt.always) ################### set up dataset and dataloader ######################## dataset = VISTDataset(opt) opt.vocab_size = dataset.get_vocab_size() opt.seq_length = dataset.get_story_length() dataset.set_option(data_type={ 'whole_story': False, 'split_story': True, 'caption': False }) dataset.train() train_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=opt.shuffle, num_workers=opt.workers) dataset.val() val_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.workers) ##################### set up model, criterion and optimizer ###### bad_valid = 0 # set up evaluator evaluator = Evaluator(opt, 'val') # set up criterion crit = criterion.LanguageModelCriterion() rl_crit = criterion.ReinforceCriterion(opt, dataset) # set up model model = models.setup(opt) model.cuda() disc_opt = copy.copy(opt) disc_opt.model = 'RewardModel' disc = models.setup(disc_opt) if os.path.exists(os.path.join(logger.log_dir, 'disc-model.pth')): logging.info("loading pretrained RewardModel") disc.load_state_dict( torch.load(os.path.join(logger.log_dir, 'disc-model.pth'))) disc.cuda() # set up optimizer optimizer = setup_optimizer(opt, model) disc_optimizer = setup_optimizer(opt, disc) dataset.train() model.train() disc.train() ############################## training ################################## for epoch in range(logger.epoch_start, opt.max_epochs): # Assign the scheduled sampling prob start = time.time() for iter, batch in enumerate(train_loader): logger.iteration += 1 torch.cuda.synchronize() feature_fc = Variable(batch['feature_fc']).cuda() target = Variable(batch['split_story']).cuda() index = batch['index'] optimizer.zero_grad() disc_optimizer.zero_grad() if flag.flag == "Disc": model.eval() disc.train() if opt.decoding_method_DISC == 'sample': seq, seq_log_probs, baseline = model.sample( feature_fc, sample_max=False, rl_training=True, pad=True) elif opt.decoding_method_DISC == 'greedy': seq, seq_log_probs, baseline = model.sample( feature_fc, sample_max=True, rl_training=True, pad=True) else: model.train() disc.eval() seq, seq_log_probs, baseline = model.sample(feature_fc, sample_max=False, rl_training=True, pad=True) seq = Variable(seq).cuda() mask = (seq > 0).float() mask = to_contiguous( torch.cat([ Variable( mask.data.new(mask.size(0), mask.size(1), 1).fill_(1)), mask[:, :, :-1] ], 2)) normed_seq_log_probs = (seq_log_probs * mask).sum(-1) / mask.sum(-1) gen_score = disc(seq.view(-1, seq.size(2)), feature_fc.view(-1, feature_fc.size(2))) if flag.flag == "Disc": gt_score = disc(target.view(-1, target.size(2)), feature_fc.view(-1, feature_fc.size(2))) loss = -torch.sum(gt_score) + torch.sum(gen_score) avg_pos_score = torch.mean(gt_score) avg_neg_score = torch.mean(gen_score) if logger.iteration % 5 == 0: logging.info("pos reward {} neg reward {}".format( avg_pos_score.data[0], avg_neg_score.data[0])) print( "PREDICTION: ", utils.decode_story(dataset.get_vocab(), seq[:1].data)[0]) print( "GROUND TRUTH: ", utils.decode_story(dataset.get_vocab(), target[:1].data)[0]) else: rewards = Variable(gen_score.data - 0.001 * normed_seq_log_probs.data) #with open("/tmp/reward.txt", "a") as f: # print(" ".join(map(str, rewards.data.cpu().numpy())), file=f) loss, avg_score = rl_crit(seq.data, seq_log_probs, baseline, index, rewards) # if logger.iteration % opt.losses_log_every == 0: avg_pos_score = torch.mean(gen_score) logging.info("average reward: {} average IRL score: {}".format( avg_score.data[0], avg_pos_score.data[0])) if flag.flag == "Disc": loss.backward() nn.utils.clip_grad_norm(disc.parameters(), opt.grad_clip, norm_type=2) disc_optimizer.step() else: tf_loss = crit(model(feature_fc, target), target) print("rl_loss / tf_loss = ", loss.data[0] / tf_loss.data[0]) loss = opt.rl_weight * loss + (1 - opt.rl_weight) * tf_loss loss.backward() nn.utils.clip_grad_norm(model.parameters(), opt.grad_clip, norm_type=2) optimizer.step() train_loss = loss.data[0] torch.cuda.synchronize() # Write the training loss summary if logger.iteration % opt.losses_log_every == 0: logger.log_training(epoch, iter, train_loss, opt.learning_rate, model.ss_prob) logging.info( "Epoch {} Train {} - Iter {} / {}, loss = {:.5f}, time used = {:.3f}s" .format(epoch, flag.flag, iter, len(train_loader), train_loss, time.time() - start)) start = time.time() if logger.iteration % opt.save_checkpoint_every == 0: if opt.always is None: # Evaluate on validation dataset and save model for every epoch val_loss, predictions, metrics = evaluator.eval_story( model, crit, dataset, val_loader, opt) if opt.metric == 'XE': score = -val_loss else: score = metrics[opt.metric] logger.log_checkpoint(epoch, val_loss, metrics, predictions, opt, model, dataset, optimizer) # halve the learning rate if not improving for a long time if logger.best_val_score > score: bad_valid += 1 if bad_valid >= 10: opt.learning_rate = opt.learning_rate / 2.0 logging.info("halve learning rate to {}".format( opt.learning_rate)) checkpoint_path = os.path.join( logger.log_dir, 'model-best.pth') model.load_state_dict(torch.load(checkpoint_path)) utils.set_lr( optimizer, opt.learning_rate) # set the decayed rate bad_valid = 0 logging.info("bad valid : {}".format(bad_valid)) else: logging.info("achieving best {} score: {}".format( opt.metric, score)) bad_valid = 0 else: torch.save(disc.state_dict(), os.path.join(logger.log_dir, 'disc-model.pth')) flag.inc()
def train(opt): logger = Logger(opt) # 定义 logger flag = Flag(D_iters=opt.D_iter, G_iters=opt.G_iter, always=opt.always) # 初始化训练标签 dataset = VISTDataset(opt) # 加载数据 opt.vocab_size = dataset.get_vocab_size() opt.seq_length = dataset.get_story_length() dataset.set_option(data_type={ 'whole_story': False, 'split_story': True, 'caption': False }) dataset.train() train_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=opt.shuffle) dataset.val() val_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False) bad_valid = 0 evaluator = Evaluator(opt, 'val') crit = criterion.LanguageModelCriterion() rl_crit = criterion.ReinforceCriterion(opt, dataset) # 强化学习的损失函数 # set up model model = models.setup(opt) model.cuda() disc_opt = copy.copy(opt) disc_opt.model = 'RewardModel' # 加入model属性 disc = models.setup(disc_opt) # 判别器模型,实例化哪个模型的类 if os.path.exists(os.path.join('./data/save/', 'disc-model.pth')): # 若存在,则加载模型参数 logging.info("loading pretrained RewardModel") disc.load_state_dict( torch.load(os.path.join(logger.log_dir, 'disc-model.pth'))) disc.cuda() # 两个优化器,完全独立的两个模型 optimizer = setup_optimizer(opt, model) disc_optimizer = setup_optimizer(disc_opt, disc) # fix dataset.train() model.train() disc.train() ############################## training ################################## for epoch in range(logger.epoch_start, opt.max_epochs): # 最大轮数为 50 start = time.time() for iter, batch in enumerate(train_loader): # 开始迭代 logger.iteration += 1 # 记录迭代次数 torch.cuda.synchronize() # 获取数据 feature_fc = Variable(batch['feature_fc']).cuda() target = Variable(batch['split_story']).cuda() index = batch['index'] optimizer.zero_grad() disc_optimizer.zero_grad() if flag.flag == "Disc": model.eval() # policy model参数不更新 disc.train() # 更新判别器参数 if opt.decoding_method_DISC == 'sample': # True,返回 sample 的序列,根据概率分布 sample seq, seq_log_probs, baseline = model.sample( feature_fc, sample_max=False, rl_training=True, pad=True) elif opt.decoding_method_DISC == 'greedy': seq, seq_log_probs, baseline = model.sample( feature_fc, sample_max=True, rl_training=True, pad=True) else: model.train() # 更新模型 disc.eval() # 判别器不更新 seq, seq_log_probs, baseline = model.sample(feature_fc, sample_max=False, rl_training=True, pad=True) seq = Variable(seq).cuda() mask = (seq > 0).float() # 64,5,30 mask = to_contiguous( torch.cat([ Variable( mask.data.new(mask.size(0), mask.size(1), 1).fill_(1)), mask[:, :, :-1] ], 2)) normed_seq_log_probs = (seq_log_probs * mask).sum(-1) / mask.sum( -1) # 64,5,得到整个序列的概率 gen_score = disc( seq.view(-1, seq.size(2)), feature_fc.view(-1, feature_fc.size(2))) # 计算sample序列的reward分数 if flag.flag == "Disc": # 先训练判别器,生成器已经预训练好。训练该判别器参数,使其能对标签和生成数据进行打分。 gt_score = disc(target.view(-1, target.size(2)), feature_fc.view( -1, feature_fc.size(2))) # 计算真实序列的reward loss = -torch.sum(gt_score) + torch.sum( gen_score) # 计算损失,loss为负很正常 # 计算平均 reward,训练判别器希望能尽可能pos高 avg_pos_score = torch.mean(gt_score) avg_neg_score = torch.mean(gen_score) if logger.iteration % 5 == 0: logging.info("pos reward {} neg reward {}".format( avg_pos_score.item(), avg_neg_score.item())) # print("PREDICTION: ", utils.decode_story(dataset.get_vocab(), seq[:1].data)[0]) # print("GROUND TRUTH: ", utils.decode_story(dataset.get_vocab(), target[:1].data)[0]) else: rewards = Variable(gen_score.data - 0 * normed_seq_log_probs.view(-1).data) #with open("/tmp/reward.txt", "a") as f: # print(" ".join(map(str, rewards.data.cpu().numpy())), file=f) loss, avg_score = rl_crit(seq.data, seq_log_probs, baseline, index, rewards.view(-1, seq.size(1))) # if logger.iteration % opt.losses_log_every == 0: avg_pos_score = torch.mean(gen_score) # logging.info("average reward: {} average IRL score: {}".format(avg_score.item(), avg_pos_score.item())) if flag.flag == "Disc": loss.backward() nn.utils.clip_grad_norm(disc.parameters(), opt.grad_clip, norm_type=2) disc_optimizer.step() else: tf_loss = crit(model(feature_fc, target), target) # print("rl_loss / tf_loss = ", loss.item() / tf_loss.item()) loss = opt.rl_weight * loss + (1 - opt.rl_weight) * tf_loss loss.backward() nn.utils.clip_grad_norm(model.parameters(), opt.grad_clip, norm_type=2) optimizer.step() train_loss = loss.item() torch.cuda.synchronize() # Write the training loss summary if logger.iteration % opt.losses_log_every == 0: logger.log_training(epoch, iter, train_loss, opt.learning_rate, model.ss_prob) logging.info( "Epoch {} Train {} - Iter {} / {}, loss = {:.5f}, time used = {:.3f}s" .format(epoch, flag.flag, iter, len(train_loader), train_loss, time.time() - start)) start = time.time() if logger.iteration % opt.save_checkpoint_every == 0: if opt.always is None: # Evaluate on validation dataset and save model for every epoch val_loss, predictions, metrics = evaluator.eval_story( model, crit, dataset, val_loader, opt) if opt.metric == 'XE': score = -val_loss else: score = metrics[opt.metric] logger.log_checkpoint(epoch, val_loss, metrics, predictions, opt, model, dataset, optimizer) # halve the learning rate if not improving for a long time if logger.best_val_score > score: bad_valid += 1 if bad_valid >= 10: opt.learning_rate = opt.learning_rate / 2.0 logging.info("halve learning rate to {}".format( opt.learning_rate)) checkpoint_path = os.path.join( logger.log_dir, 'model-best.pth') model.load_state_dict(torch.load(checkpoint_path)) utils.set_lr( optimizer, opt.learning_rate) # set the decayed rate bad_valid = 0 logging.info("bad valid : {}".format(bad_valid)) else: logging.info("achieving best {} score: {}".format( opt.metric, score)) bad_valid = 0 else: torch.save(disc.state_dict(), os.path.join(logger.log_dir, 'disc-model.pth')) flag.inc()
def main(args): tf.config.experimental.list_physical_devices('GPU') # tf.device(f'/gpu:{args.gpu_num}') train_path = args.train_dataset valid_path = args.valid_dataset weights_path = args.weights # Path to text? file containing all classes, 1 per line classes_file = args.classes # Usually fit # mode = 'fit' # Can be 'fit', 'eager_fit', 'eager_tf', 'valid' mode = args.mode ''' 'fit: model.fit, ' 'eager_fit: model.fit(run_eagerly=True), ' 'eager_tf: custom GradientTape' ''' # Usually darknet transfer = args.transfer ''' 'none: Training from scratch, ' 'darknet: Transfer darknet, ' 'no_output: Transfer all but output, ' 'frozen: Transfer and freeze all, ' 'fine_tune: Transfer all and freeze darknet only'), 'pre': Use a pre-trained model for validation ''' image_size = cfg.IMAGE_SIZE num_epochs = args.epochs batch_size = args.batch_size learning_rate = cfg.LEARNING_RATE num_classes = args.num_classes # num class for `weights` file if different, useful in transfer learning with different number of classes weight_num_classes = args.num_weight_class # saved_weights_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/yolov3-tf2/weights/' saved_weights_path = '/home/justin/ml_models/yolov3-tf2/weights/trained_{}.tf'.format(num_epochs) saved_weights_path = args.saved_weights # Original Anchors below anchors = np.array([(10, 13), (16, 30), (33, 23), (30, 61), (62, 45), (59, 119), (116, 90), (156, 198), (373, 326)], np.float32) / 608 anchors = cfg.YOLO_ANCHORS anchor_masks = cfg.YOLO_ANCHOR_MASKS physical_devices = tf.config.experimental.list_physical_devices('GPU') for physical_device in physical_devices: tf.config.experimental.set_memory_growth(physical_device, True) if args.no_train: print('Skipping training...') else: start_time = time.time() model = YoloV3(image_size, training=True, classes=num_classes) train_dataset = dataset.load_tfrecord_dataset(train_path, classes_file, image_size) train_dataset = train_dataset.shuffle(buffer_size=512) train_dataset = train_dataset.batch(batch_size) train_dataset = train_dataset.map(lambda x, y: ( dataset.transform_images(x, image_size), dataset.transform_targets(y, anchors, anchor_masks, image_size))) train_dataset = train_dataset.prefetch( buffer_size=tf.data.experimental.AUTOTUNE) val_dataset = dataset.load_tfrecord_dataset(valid_path, classes_file, image_size) val_dataset = val_dataset.batch(batch_size) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, image_size), dataset.transform_targets(y, anchors, anchor_masks, image_size))) # Configure the model for transfer learning if transfer == 'none': pass # Nothing to do elif transfer in ['darknet', 'no_output']: # Darknet transfer is a special case that works # with incompatible number of classes # reset top layers model_pretrained = YoloV3(image_size, training=True, classes=weight_num_classes or num_classes) model_pretrained.load_weights(weights_path) if transfer == 'darknet': model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) freeze_all(model.get_layer('yolo_darknet')) elif transfer == 'no_output': for layer in model.layers: if not layer.name.startswith('yolo_output'): layer.set_weights(model_pretrained.get_layer( layer.name).get_weights()) freeze_all(layer) elif transfer == 'pre': model = YoloV3(image_size, training=False, classes=num_classes) model.load_weights(weights_path) else: # All other transfer require matching classes model.load_weights(weights_path) if transfer == 'fine_tune': # freeze darknet and fine tune other layers darknet = model.get_layer('yolo_darknet') freeze_all(darknet) elif transfer == 'frozen': # freeze everything freeze_all(model) optimizer = tf.keras.optimizers.Adam(lr=learning_rate) loss = [YoloLoss(anchors[mask], classes=num_classes) for mask in anchor_masks] # Passing loss as a list might sometimes fail? dict might be better? if mode == 'eager_tf': # Eager mode is great for debugging # Non eager graph mode is recommended for real training avg_loss = tf.keras.metrics.Mean('loss', dtype=tf.float32) avg_val_loss = tf.keras.metrics.Mean('val_loss', dtype=tf.float32) for epoch in range(1, num_epochs + 1): for batch, (images, labels) in enumerate(train_dataset): with tf.GradientTape() as tape: outputs = model(images, training=True) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss grads = tape.gradient(total_loss, model.trainable_variables) optimizer.apply_gradients( zip(grads, model.trainable_variables)) print("{}_train_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_loss.update_state(total_loss) for batch, (images, labels) in enumerate(val_dataset): outputs = model(images) regularization_loss = tf.reduce_sum(model.losses) pred_loss = [] for output, label, loss_fn in zip(outputs, labels, loss): pred_loss.append(loss_fn(label, output)) total_loss = tf.reduce_sum(pred_loss) + regularization_loss print("{}_val_{}, {}, {}".format( epoch, batch, total_loss.numpy(), list(map(lambda x: np.sum(x.numpy()), pred_loss)))) avg_val_loss.update_state(total_loss) print("{}, train: {}, val: {}".format( epoch, avg_loss.result().numpy(), avg_val_loss.result().numpy())) avg_loss.reset_states() avg_val_loss.reset_states() model.save_weights( 'checkpoints/yolov3_train_{}.tf'.format(epoch)) elif mode == 'valid': pass # Pass this step for validation only else: model.compile(optimizer=optimizer, loss=loss, run_eagerly=(mode == 'eager_fit')) callbacks = [ ReduceLROnPlateau(verbose=1, min_lr=1e-4, patience=50), # EarlyStopping(patience=3, verbose=1), ModelCheckpoint('checkpoints/midpoints/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True), TensorBoard(log_dir=f'logs/{saved_weights_path[:-3]}') ] history = model.fit(train_dataset, epochs=num_epochs, callbacks=callbacks, validation_data=val_dataset) print(f'Saving weights to: {saved_weights_path}') model.save_weights(saved_weights_path) finish_time = time.time() train_time = finish_time - start_time print('Training time elapsed: {}'.format(train_time)) # Calculate mAP if args.validate: print('Validating...') model = YoloV3(image_size, training=False, classes=num_classes) model.load_weights(saved_weights_path).expect_partial() batch_size = 1 val_dataset = dataset.load_tfrecord_dataset(valid_path, classes_file, image_size) val_dataset = val_dataset.batch(batch_size) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, image_size), dataset.transform_targets(y, anchors, anchor_masks, image_size))) images = [] for img, labs in val_dataset: img = np.squeeze(img) images.append(img) predictions = [] evaluator = Evaluator(iou_thresh=args.iou) # labels - (N, grid, grid, anchors, [x, y, w, h, obj, class]) boxes, scores, classes, num_detections = model.predict(val_dataset) print(boxes.shape) print(boxes[0]) # boxes -> (num_imgs, num_detections, box coords) filtered_labels = [] for _, label in val_dataset: filt_labels = flatten_labels(label) filtered_labels.append(filt_labels) # i is the num_images index for img in range(len(num_detections)): row = [] for sc in range(len(scores[img])): if scores[img][sc] > 0: row.append(np.hstack([boxes[img][sc] * image_size, scores[img][sc], classes[img][sc]])) predictions.append(np.asarray(row)) predictions = np.asarray(predictions) # numpy array of shape [num_imgs x num_preds x 6] if len(predictions) == 0: # No predictions made print('No predictions made - exiting.') exit() # predictions[:, :, 0:4] = predictions[:, :, 0:4] * image_size # Predictions format - [num_imgs x num_preds x [box coords x4, score, classes]] # Box coords should be in format x1 y1 x2 y2 evaluator(predictions, filtered_labels, images, roc=False) # Check gts box coords if args.valid_imgs: # Predictions print('Valid Images...') # yolo = YoloV3(classes=num_classes) yolo = YoloV3(image_size, training=False, classes=num_classes) yolo.load_weights(saved_weights_path).expect_partial() print('weights loaded') print('Validation Image...') # Find better way to do this so not requiring manual changes class_dict = cfg.CLASS_DICT class_names = list(class_dict.values()) print('classes loaded') val_dataset = dataset.load_tfrecord_dataset(valid_path, classes_file, image_size) val_dataset = val_dataset.batch(1) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, image_size), dataset.transform_targets(y, anchors, anchor_masks, image_size))) # boxes, scores, classes, num_detections index = 0 for img_raw, _label in val_dataset.take(5): print(f'Index {index}') #img = tf.expand_dims(img_raw, 0) img = transform_images(img_raw, image_size) img = img * 255 boxes, scores, classes, nums = yolo(img) filt_labels = flatten_labels(_label) boxes = tf.expand_dims(filt_labels[:, 0:4], 0) scores = tf.expand_dims(filt_labels[:, 4], 0) classes = tf.expand_dims(filt_labels[:, 5], 0) nums = tf.expand_dims(filt_labels.shape[0], 0) img = cv2.cvtColor(img_raw[0].numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names, thresh=0) # img = img * 255 output = 'test_images/test_{}.jpg'.format(index) # output = '/Users/justinbutler/Desktop/test/test_images/test_{}.jpg'.format(index) # print('detections:') # for i in range(nums[index]): # print('\t{}, {}, {}'.format(class_names[int(classes[index][i])], # np.array(scores[index][i]), # np.array(boxes[index][i]))) # if i > 10: # continue img = cv2.cvtColor(img_raw[0].numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names, thresh=0) img = img * 255 cv2.imwrite(output, img) index = index + 1 if args.visual_data: print('Visual Data...') val_dataset = dataset.load_tfrecord_dataset(valid_path, classes_file, image_size) val_dataset = val_dataset.batch(1) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, image_size), dataset.transform_targets(y, anchors, anchor_masks, image_size))) index = 0 for img_raw, _label in val_dataset.take(5): print(f'Index {index}') # img = tf.expand_dims(img_raw, 0) img = transform_images(img_raw, image_size) output = 'test_images/test_labels_{}.jpg'.format(index) # output = '/Users/justinbutler/Desktop/test/test_images/test_labels_{}.jpg'.format(index) filt_labels = flatten_labels(_label) boxes = tf.expand_dims(filt_labels[:, 0:4], 0) scores = tf.expand_dims(filt_labels[:, 4], 0) classes = tf.expand_dims(filt_labels[:, 5], 0) nums = tf.expand_dims(filt_labels.shape[0], 0) img = cv2.cvtColor(img_raw[0].numpy(), cv2.COLOR_RGB2BGR) img = draw_outputs(img, (boxes, scores, classes, nums), class_names, thresh=0) img = img * 255 cv2.imwrite(output, img) index = index + 1 return
def init_module(morph): global ev ev = Evaluator(morph=morph)
def train(opt): """ 模型训练函数 """ # 自定义的类,日志记录 logger = Logger(opt) # 获取数据 dataset = VISTDataset(opt) opt.vocab_size = dataset.get_vocab_size() opt.seq_length = dataset.get_story_length() # print(dataset.get_word2id()['the']) dataset.set_option(data_type={ 'whole_story': False, 'split_story': True, 'caption': True }) # 若不使用caption数据,则将其设为False dataset.train() train_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=opt.shuffle) dataset.test() # 改为valid val_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False) # m = dataset.word2id # 记录上升的 valid_loss 次数 bad_valid = 0 # 创建Evaluator evaluator = Evaluator(opt, 'val') # 损失 crit = criterion.LanguageModelCriterion() # 是否使用强化学习,默认为-1 if opt.start_rl >= 0: rl_crit = criterion.ReinforceCriterion(opt, dataset) # set up model,函数在init文件中,若有原来模型,则加载模型参数 model = models.setup(opt) model.cuda() optimizer = setup_optimizer(opt, model) dataset.train() model.train() for epoch in range(logger.epoch_start, opt.max_epochs): # 默认为 0-20 # scheduled_sampling_start表示在第几个epoch,衰减gt使用概率,最大到0.25,5个epoch之内还是0 if epoch > opt.scheduled_sampling_start and opt.scheduled_sampling_start >= 0: frac = ( epoch - opt.scheduled_sampling_start ) // opt.scheduled_sampling_increase_every # 后者默认值为5,//为向下取整除 opt.ss_prob = min(opt.scheduled_sampling_increase_prob * frac, opt.scheduled_sampling_max_prob) # 0.05、0.25 model.ss_prob = opt.ss_prob # 对数据进行一个batch一个batch的迭代 for iter, batch in enumerate(train_loader): start = time.time() logger.iteration += 1 torch.cuda.synchronize() # 获取batch中的数据,图像特征、caption、以及target features = Variable(batch['feature_fc']).cuda() # 64*5*2048 caption = None if opt.caption: caption = Variable(batch['caption']).cuda() # 64*5*20 target = Variable(batch['split_story']).cuda() # 64*5*30 index = batch['index'] optimizer.zero_grad() # 模型运行,返回一个概率分布,然后计算交叉熵损失 output = model(features, target, caption) loss = crit(output, target) if opt.start_rl >= 0 and epoch >= opt.start_rl: # reinforcement learning # 获取 sample 数据和 baseline 数据 seq, seq_log_probs, baseline = model.sample(features, caption=caption, sample_max=False, rl_training=True) rl_loss, avg_score = rl_crit(seq, seq_log_probs, baseline, index) print(rl_loss.data[0] / loss.data[0]) loss = opt.rl_weight * rl_loss + (1 - opt.rl_weight) * loss logging.info("average {} score: {}".format( opt.reward_type, avg_score)) # 反向传播 loss.backward() train_loss = loss.item() # 梯度裁剪,第二个参数为梯度最大范数,大于该值则进行裁剪 nn.utils.clip_grad_norm(model.parameters(), opt.grad_clip, norm_type=2) optimizer.step() torch.cuda.synchronize() # 日志记录时间以及损失 logging.info( "Epoch {} - Iter {} / {}, loss = {:.5f}, time used = {:.3f}s". format(epoch, iter, len(train_loader), train_loss, time.time() - start)) # Write the training loss summary,tensorboard记录 if logger.iteration % opt.losses_log_every == 0: logger.log_training(epoch, iter, train_loss, opt.learning_rate, model.ss_prob) # validation验证,每迭代save_checkpoint_every轮评测一次 if logger.iteration % opt.save_checkpoint_every == 0: val_loss, predictions, metrics = evaluator.eval_story( model, crit, dataset, val_loader, opt) if opt.metric == 'XE': score = -val_loss else: score = metrics[opt.metric] logger.log_checkpoint(epoch, val_loss, metrics, predictions, opt, model, dataset, optimizer) # halve the learning rate if not improving for a long time if logger.best_val_score > score: bad_valid += 1 if bad_valid >= 4: opt.learning_rate = opt.learning_rate / 2.0 logging.info("halve learning rate to {}".format( opt.learning_rate)) checkpoint_path = os.path.join(logger.log_dir, 'model-best.pth') model.load_state_dict(torch.load(checkpoint_path)) utils.set_lr(optimizer, opt.learning_rate) # set the decayed rate bad_valid = 0 logging.info("bad valid : {}".format(bad_valid)) else: logging.info("achieving best {} score: {}".format( opt.metric, score)) bad_valid = 0
def train(opt): # utils.setup_seed() logger = Logger(opt, save_code=opt.save_code) ################### set up dataset and dataloader ######################## dataset = VISTDataset(opt) opt.vocab_size = dataset.get_vocab_size() opt.seq_length = dataset.get_story_length() dataset.set_option(data_type={'whole_story': False, 'split_story': True, 'caption': False, 'prefix_story': True}) dataset.train() train_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=opt.shuffle, num_workers=opt.workers) dataset.val() val_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.workers) ##################### set up model, criterion and optimizer ###### bad_valid = 0 # set up evaluator evaluator = Evaluator(opt, 'val') # set up criterion crit = criterion.LanguageModelCriterion() # set up model model = models.setup(opt) model.cuda() # set up optimizer optimizer = setup_optimizer(opt, model) dataset.train() model.train() initial_lr = opt.learning_rate logging.info(model) ############################## training ################################## for epoch in range(logger.epoch_start, opt.max_epochs): # Assign the scheduled sampling prob if epoch > opt.scheduled_sampling_start and opt.scheduled_sampling_start >= 0: frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every opt.ss_prob = min(opt.scheduled_sampling_increase_prob * frac, opt.scheduled_sampling_max_prob) model.ss_prob = opt.ss_prob for iter, batch in enumerate(train_loader): start = time.time() logger.iteration += 1 torch.cuda.synchronize() feature_fc = batch['feature_fc'].cuda() if opt.use_obj: feature_obj = batch['feature_obj'].cuda() if opt.use_spatial: feature_obj_spatial = batch['feature_obj_spatial'].cuda() else: feature_obj_spatial = None if opt.use_classes: feature_obj_classes = batch['feature_obj_classes'].cuda() else: feature_obj_classes = None if opt.use_attrs: feature_obj_attrs = batch['feature_obj_attrs'].cuda() else: feature_obj_attrs = None target = batch['split_story'].cuda() prefix = batch['prefix_story'].cuda() history_count = batch['history_counter'].cuda() index = batch['index'] optimizer.zero_grad() # cross entropy loss output = model(feature_fc, feature_obj, target, history_count, spatial=feature_obj_spatial, clss=feature_obj_classes, attrs=feature_obj_attrs) loss = crit(output, target) loss.backward() train_loss = loss.item() nn.utils.clip_grad_norm_(model.parameters(), opt.grad_clip, norm_type=2) optimizer.step() torch.cuda.synchronize() if iter % opt.log_step == 0: logging.info("Epoch {} - Iter {} / {}, loss = {:.5f}, time used = {:.3f}s".format(epoch, iter, len(train_loader), train_loss, time.time() - start)) # Write the training loss summary if logger.iteration % opt.losses_log_every == 0: logger.log_training(epoch, iter, train_loss, opt.learning_rate, model.ss_prob) if logger.iteration % opt.save_checkpoint_every == 0: # Evaluate on validation dataset and save model for every epoch val_loss, predictions, metrics = evaluator.eval_story(model, crit, dataset, val_loader, opt) if opt.metric == 'XE': score = -val_loss else: score = metrics[opt.metric] logger.log_checkpoint(epoch, val_loss, metrics, predictions, opt, model, dataset, optimizer) # halve the learning rate if not improving for a long time if logger.best_val_score > score: bad_valid += 1 if bad_valid >= opt.bad_valid_threshold: opt.learning_rate = opt.learning_rate * opt.learning_rate_decay_rate logging.info("halve learning rate to {}".format(opt.learning_rate)) checkpoint_path = os.path.join(logger.log_dir, 'model-best.pth') model.load_state_dict(torch.load(checkpoint_path)) utils.set_lr(optimizer, opt.learning_rate) # set the decayed rate bad_valid = 0 logging.info("bad valid : {}".format(bad_valid)) else: opt.learning_rate = initial_lr logging.info("achieving best {} score: {}".format(opt.metric, score)) bad_valid = 0
def train(opt): setup_seed() logger = Logger(opt) ################### set up dataset and dataloader ######################## dataset = VISTDataset(opt) opt.vocab_size = dataset.get_vocab_size() opt.seq_length = dataset.get_story_length() dataset.set_option(data_type={'whole_story': False, 'split_story': True, 'caption': False}) dataset.train() train_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=opt.shuffle, num_workers=opt.workers) dataset.val() val_loader = DataLoader(dataset, batch_size=opt.batch_size, shuffle=False, num_workers=opt.workers) ##################### set up model, criterion and optimizer ###### bad_valid = 0 # set up evaluator evaluator = Evaluator(opt, 'val') # set up criterion crit = criterion.LanguageModelCriterion() if opt.start_rl >= 0: rl_crit = criterion.ReinforceCriterion(opt, dataset) # set up model model = models.setup(opt) model.cuda() # set up optimizer optimizer = setup_optimizer(opt, model) dataset.train() model.train() ############################## training ################################## for epoch in range(logger.epoch_start, opt.max_epochs): # Assign the scheduled sampling prob if epoch > opt.scheduled_sampling_start and opt.scheduled_sampling_start >= 0: frac = (epoch - opt.scheduled_sampling_start) // opt.scheduled_sampling_increase_every opt.ss_prob = min(opt.scheduled_sampling_increase_prob * frac, opt.scheduled_sampling_max_prob) model.ss_prob = opt.ss_prob for iter, batch in enumerate(train_loader): start = time.time() logger.iteration += 1 torch.cuda.synchronize() feature_fc = Variable(batch['feature_fc']).cuda() target = Variable(batch['split_story']).cuda() index = batch['index'] semantic = batch['semantic'] optimizer.zero_grad() # cross entropy loss output = model(feature_fc, target, semantic) loss = crit(output, target) if opt.start_rl >= 0 and epoch >= opt.start_rl: # reinforcement learning seq, seq_log_probs, baseline = model.sample(feature_fc, sample_max=False, rl_training=True) rl_loss, avg_score = rl_crit(seq, seq_log_probs, baseline, index) print(rl_loss.data[0] / loss.data[0]) loss = opt.rl_weight * rl_loss + (1 - opt.rl_weight) * loss logging.info("average {} score: {}".format(opt.reward_type, avg_score)) loss.backward() train_loss = loss.data[0] nn.utils.clip_grad_norm(model.parameters(), opt.grad_clip, norm_type=2) optimizer.step() torch.cuda.synchronize() logging.info("Epoch {} - Iter {} / {}, loss = {:.5f}, time used = {:.3f}s".format(epoch, iter, len(train_loader), train_loss, time.time() - start)) # Write the training loss summary if logger.iteration % opt.losses_log_every == 0: logger.log_training(epoch, iter, train_loss, opt.learning_rate, model.ss_prob) if logger.iteration % opt.save_checkpoint_every == 0: # Evaluate on validation dataset and save model for every epoch val_loss, predictions, metrics = evaluator.eval_story(model, crit, dataset, val_loader, opt) if opt.metric == 'XE': score = -val_loss else: score = metrics[opt.metric] logger.log_checkpoint(epoch, val_loss, metrics, predictions, opt, model, dataset, optimizer) # halve the learning rate if not improving for a long time if logger.best_val_score > score: bad_valid += 1 if bad_valid >= 4: opt.learning_rate = opt.learning_rate / 2.0 logging.info("halve learning rate to {}".format(opt.learning_rate)) checkpoint_path = os.path.join(logger.log_dir, 'model-best.pth') model.load_state_dict(torch.load(checkpoint_path)) utils.set_lr(optimizer, opt.learning_rate) # set the decayed rate bad_valid = 0 logging.info("bad valid : {}".format(bad_valid)) else: logging.info("achieving best {} score: {}".format(opt.metric, score)) bad_valid = 0
def main(): train_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/Datasets/Shapes/tfrecord_single/coco_train.record-00000-of-00001' valid_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/Datasets/Shapes/tfrecord_single/coco_val.record-00000-of-00001' weights_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/yolov3-tf2/checkpoints/yolov3.tf' # Path to text? file containing all classes, 1 per line classes = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/yolov3-tf2/shapes/shapes.names' # Usually fit # mode = 'fit' # Can be 'fit', 'eager_fit', 'eager_tf', 'valid' mode = 'fit' ''' 'fit: model.fit, ' 'eager_fit: model.fit(run_eagerly=True), ' 'eager_tf: custom GradientTape' ''' # Usually darknet transfer = 'none' ''' 'none: Training from scratch, ' 'darknet: Transfer darknet, ' 'no_output: Transfer all but output, ' 'frozen: Transfer and freeze all, ' 'fine_tune: Transfer all and freeze darknet only'), 'pre': Use a pre-trained model for validation ''' image_size = 416 num_epochs = 1 batch_size = 8 learning_rate = 1e-3 num_classes = 4 # num class for `weights` file if different, useful in transfer learning with different number of classes weight_num_classes = 80 iou_threshold = 0.5 # saved_weights_path = '/Users/justinbutler/Desktop/school/Calgary/ML_Work/yolov3-tf2/weights/' saved_weights_path = '/home/justin/ml_models/yolov3-tf2/weights/shapes_{}.tf'.format( num_epochs) anchors = yolo_anchors anchor_masks = yolo_anchor_masks # Training dataset #dataset_train = tf.data.TFRecordDataset(train_path) #dataset_val = tf.data.TFRecordDataset(valid_path) dataset_train = load_tfrecord_dataset(train_path, classes, image_size) dataset_train = dataset_train.shuffle(buffer_size=512) dataset_train = dataset_train.batch(batch_size) #dataset_train = dataset_train.map(lambda x, y: ( # transform_images(x, image_size), # transform_targets(y, anchors, anchor_masks, image_size))) #dataset_train = dataset_train.prefetch( # buffer_size=tf.data.experimental.AUTOTUNE) dataset_val = load_tfrecord_dataset(valid_path, classes, image_size) dataset_val = dataset_val.shuffle(buffer_size=512) dataset_val = dataset_val.batch(batch_size) #dataset_val = dataset_val.map(lambda x, y: ( # transform_images(x, image_size), # transform_targets(y, anchors, anchor_masks, image_size))) # Create model in training mode yolo = models.YoloV3(image_size, training=True, classes=num_classes) model_pretrained = YoloV3(image_size, training=True, classes=weight_num_classes or num_classes) model_pretrained.load_weights(weights_path) # Which weights to start with? print('Loading Weights...') #yolo.load_weights(weights_path) yolo.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) freeze_all(yolo.get_layer('yolo_darknet')) optimizer = tf.keras.optimizers.Adam(lr=learning_rate) loss = [ YoloLoss(anchors[mask], classes=num_classes) for mask in anchor_masks ] # Passing loss as a list might sometimes fail? dict might be better? yolo.compile(optimizer=optimizer, loss=loss, run_eagerly=(mode == 'eager_fit')) callbacks = [ ReduceLROnPlateau(verbose=1), EarlyStopping(patience=3, verbose=1), ModelCheckpoint('checkpoints/yolov3_train_{epoch}.tf', verbose=1, save_weights_only=True), TensorBoard(log_dir='logs') ] history = yolo.fit(dataset_train, epochs=num_epochs, callbacks=callbacks, validation_data=dataset_val) yolo.save_weights(saved_weights_path) # Detect/ROC model = YoloV3(image_size, training=False, classes=num_classes) model.load_weights(saved_weights_path).expect_partial() batch_size = 1 val_dataset = load_tfrecord_dataset(valid_path, classes, image_size) val_dataset = val_dataset.batch(batch_size) val_dataset = val_dataset.map( lambda x, y: (transform_images(x, image_size), transform_targets(y, anchors, anchor_masks, image_size))) images = [] for img, labs in val_dataset: img = np.squeeze(img) images.append(img) predictions = [] evaluator = Evaluator(iou_thresh=iou_threshold) # labels - (N, grid, grid, anchors, [x, y, w, h, obj, class]) boxes, scores, classes, num_detections = model.predict(val_dataset) # boxes -> (num_imgs, num_detections (200), box coords (4)) # scores -> (num_imgs, num_detections) # classes -> (num_imgs, num_detections) # num_detections -> num_imgs # Aim for labels shape (per batch): [num_imgs, 3x[num_boxes x [x1,y1,x2,y2,score,class]] # full_labels = [label for _, label in val_dataset] # Shape : [Num images, 3 scales, grid, grid, anchor, 6 ] filtered_labels = [] for _, label in val_dataset: img_labels = [] # Label has shape [3 scales x[1, grid, grid, 3, 6]] for scale in label: # Shape [1, grid, grid, 3, 6] scale = np.asarray(scale) grid = scale.shape[1] scale2 = np.reshape(scale, (3, grid * grid, 6)) # Shape: [3, grix*grid, 6] for anchor in scale2: filtered_anchors = [] for box in anchor: if box[4] > 0: filtered_anchors.append(np.asarray(box)) img_labels.append(filtered_anchors) img_labels = np.asarray(img_labels) filtered_labels.append(img_labels) print(len(filtered_labels)) print(len(filtered_labels[0])) print(len(filtered_labels[0][2])) # i is the num_images index # predictions = [np.hstack([boxes[i][x], scores[i][x], classes[i][x]]) for i in range(len(num_detections)) for x in range(len(scores[i])) if scores[i][x] > 0] for img in range(len(num_detections)): row = [] for sc in range(len(scores[img])): if scores[img][sc] > 0: row.append( np.hstack([ boxes[img][sc] * image_size, scores[img][sc], classes[img][sc] ])) predictions.append(np.asarray(row)) predictions = np.asarray( predictions) # numpy array of shape [num_imgs x num_preds x 6] if len(predictions) == 0: # No predictions made print('No predictions made - exiting.') exit() # Predictions shape: [num_imgs x num_preds x[box coords(4), conf, classes]] # Box coords should be in format x1 y1 x2 y2 # Labels shape: [num_imgs, 3x[num_boxes x [x1,y1,x2,y2,score,class]] evaluator(predictions, filtered_labels, images) # Check gts box coords '''
def main(args): image_size = 416 # 416 num_epochs = args.epochs batch_size = args.batch_size learning_rate = 1e-3 num_classes = args.num_classes # num class for `weights` file if different, useful in transfer learning with different number of classes weight_num_classes = args.num_weight_class valid_path = args.valid_dataset weights_path = args.weights # Path to text? file containing all classes, 1 per line classes = args.classes anchors = yolo_anchors anchor_masks = yolo_anchor_masks val_dataset = dataset.load_tfrecord_dataset(valid_path, classes, image_size) val_dataset = val_dataset.batch(batch_size) val_dataset = val_dataset.map(lambda x, y: ( dataset.transform_images(x, image_size), dataset.transform_targets(y, anchors, anchor_masks, image_size))) model = YoloV3(image_size, training=True, classes=num_classes) # Darknet transfer is a special case that works # with incompatible number of classes # reset top layers model_pretrained = YoloV3(image_size, training=True, classes=weight_num_classes or num_classes) model_pretrained.load_weights(weights_path) if transfer == 'darknet': model.get_layer('yolo_darknet').set_weights( model_pretrained.get_layer('yolo_darknet').get_weights()) freeze_all(model.get_layer('yolo_darknet')) predictions = [] evaluator = Evaluator(iou_thresh=args.iou) # labels - (N, grid, grid, anchors, [x, y, w, h, obj, class]) boxes, scores, classes, num_detections = model.predict(val_dataset) # boxes -> (num_imgs, num_detections, box coords) # Full labels shape -> [num_batches, grid scale, imgs] # Full labels shape -> [num_batches, [grid, grid, anchors, [x,y,w,h,obj,class]]] full_labels = np.asarray([label for _, label in val_dataset]) # Shape -> [num_batches, num_imgs_in_batch, 3] # Shape -> [num_batches, num_imgs, 3x[grid,grid,anchors,[x,y,w,h,score,class]]] full_labels_trans = full_labels.transpose(0, 2, 1) full_labels_flat = [] for batch in full_labels_trans: for img in batch: row = [] for scale in img: row.append(scale) full_labels_flat.append(row) # Shape -> [num_imgs x 3] full_labels_flat = np.asarray(full_labels_flat) # Remove any labels consisting of all 0's filt_labels = [] # for img in range(len(full_labels_flat)): for img in full_labels_flat: test = [] # for scale in full_labels_flat[img]: for scale in img: lab_list = [] for g1 in scale: for g2 in g1: for anchor in g2: if anchor[0] > 0: temp = [ anchor[0] * image_size, anchor[1] * image_size, anchor[2] * image_size, anchor[3] * image_size, anchor[4], anchor[5] ] temp = [float(x) for x in temp] lab_list.append(np.asarray(temp)) test.append(np.asarray(lab_list)) filt_labels.append(np.asarray(test)) filt_labels = np.asarray( filt_labels ) # Numpy array of shape [num_imgs, 3x[num_boxesx[x1,y1,x2,y2,score,class]]] # filt_labels = filt_labels[:, :4] * image_size # i is the num_images index # predictions = [np.hstack([boxes[i][x], scores[i][x], classes[i][x]]) for i in range(len(num_detections)) for x in range(len(scores[i])) if scores[i][x] > 0] for img in range(len(num_detections)): row = [] for sc in range(len(scores[img])): if scores[img][sc] > 0: row.append( np.hstack([ boxes[img][sc] * image_size, scores[img][sc], classes[img][sc] ])) predictions.append(np.asarray(row)) predictions = np.asarray( predictions) # numpy array of shape [num_imgs x num_preds x 6] if len(predictions) == 0: # No predictions made print('No predictions made - exiting.') exit() # predictions[:, :, 0:4] = predictions[:, :, 0:4] * image_size # Predictions format - [num_imgs x num_preds x [box coords x4, score, classes]] # Box coords should be in format x1 y1 x2 y2 evaluator(predictions, filt_labels, images) # Check gts box coords confidence_thresholds = np.linspace(0.1, 1, 15) confidence_thresholds = [0.5] all_tp_rates = [] all_fp_rates = [] # Compute ROCs for above range of thresholds # Compute one for each class vs. the other classes for index, conf in enumerate(confidence_thresholds): tp_of_img = [] fp_of_img = [] all_classes = [] tp_rates = {} fp_rates = {} boxes, scores, classes, num_detections = model.predict(val_dataset) # Full labels shape -> [num_batches, grid scale, imgs] # Full labels shape -> [num_batches, [grid, grid, anchors, [x,y,w,h,obj,class]]] full_labels = np.asarray([label for _, label in val_dataset]) # Shape -> [num_batches, num_imgs_in_batch, 3] # Shape -> [num_batches, num_imgs, 3x[grid,grid,anchors,[x,y,w,h,score,class]]] full_labels_trans = full_labels.transpose(0, 2, 1) full_labels_flat = [] for batch in full_labels_trans: for img in batch: row = [] for scale in img: row.append(scale) full_labels_flat.append(row) # Shape -> [num_imgs x 3] full_labels_flat = np.asarray(full_labels_flat) # Remove any labels consisting of all 0's filt_labels = [] # for img in range(len(full_labels_flat)): for img in full_labels_flat: test = [] # for scale in full_labels_flat[img]: for scale in img: lab_list = [] for g1 in scale: for g2 in g1: for anchor in g2: if anchor[0] > 0: temp = [ anchor[0] * image_size, anchor[1] * image_size, anchor[2] * image_size, anchor[3] * image_size, anchor[4], anchor[5] ] temp = [float(x) for x in temp] lab_list.append(np.asarray(temp)) test.append(np.asarray(lab_list)) filt_labels.append(np.asarray(test)) filt_labels = np.asarray( filt_labels ) # Numpy array of shape [num_imgs, 3x[num_boxesx[x1,y1,x2,y2,score,class]]] # filt_labels = filt_labels[:, :4] * image_size # i is the num_images index # predictions = [np.hstack([boxes[i][x], scores[i][x], classes[i][x]]) for i in range(len(num_detections)) for x in range(len(scores[i])) if scores[i][x] > 0] for img in range(len(num_detections)): row = [] for sc in range(len(scores[img])): if scores[img][sc] > 0: row.append( np.hstack([ boxes[img][sc] * image_size, scores[img][sc], classes[img][sc] ])) predictions.append(np.asarray(row)) predictions = np.asarray( predictions) # numpy array of shape [num_imgs x num_preds x 6] if len(predictions) == 0: # No predictions made print('No predictions made - exiting.') exit() # predictions[:, :, 0:4] = predictions[:, :, 0:4] * image_size # Predictions format - [num_imgs x num_preds x [box coords x4, score, classes]] # Box coords should be in format x1 y1 x2 y2 evaluator(predictions, filt_labels, images) # Check gts box coords classes = list(set(r['class_ids'])) # All unique class ids for c in classes: if c not in all_classes: all_classes.append(c) complete_classes = dataset_val.class_ids[1:] # Need TPR and FPR rates for each class versus the other classes # Recall == TPR tpr = utils.compute_ap_indiv_class(gt_bbox, gt_class_id, gt_mask, r["rois"], r["class_ids"], r["scores"], r['masks'], complete_classes) total_fpr = utils.compute_fpr_indiv_class(gt_bbox, gt_class_id, gt_mask, r["rois"], r["class_ids"], r["scores"], r['masks'], complete_classes) # print(f'For Image: TPR: {tpr} -- FPR: {total_fpr}') tp_of_img.append(tpr) fp_of_img.append(total_fpr) all_classes = dataset_val.class_ids[1:] # Need to get average TPR and FPR for number of images used for c in all_classes: tp_s = 0 for item in tp_of_img: if c in item.keys(): tp_s += item[c] else: tp_s += 0 tp_rates[c] = tp_s / len(image_ids) # tp_rates[c] = tp_s # print(tp_rates) for c in all_classes: fp_s = 0 for item in fp_of_img: if c in item.keys(): fp_s += item[c] else: fp_s += 0 fp_rates[c] = fp_s / len(image_ids) # fp_rates[c] = fp_s all_fp_rates.append(fp_rates) all_tp_rates.append(tp_rates) print(f'TP Rates: {all_tp_rates}') print(f'FP Rates: {all_fp_rates}')