def forward(self, class_preds, class_targets, loc_preds, loc_targets, anchors): b, num_anchors, _ = loc_preds.shape class_m = torch.LongTensor(b, num_anchors) loc_m = torch.Tensor(b, num_anchors, 4) class_m = to_var(class_m, self.use_gpu) loc_m = to_var(loc_m, self.use_gpu) for i in range(b): class_m[i], loc_m[i] = match(threshold=self.threshold, class_target=class_targets[i], loc_target=loc_targets[i], anchors=anchors.data, variances=self.variance) class_targets = class_m loc_targets = loc_m pos = class_targets > 0 # loc loss pos_mask = pos.unsqueeze(pos.dim()).expand_as(loc_preds) loc_loss = F.smooth_l1_loss(loc_preds[pos_mask].view(-1, 4), loc_targets[pos_mask].view(-1, 4), size_average=False) # compute max conf across batch for hard negative mining batch_conf = class_preds.view(-1, self.class_count) class_loss = self.log_sum_exp(batch_conf) - batch_conf.gather( 1, class_targets.view(-1, 1)) # hard negative mining class_loss = class_loss.view(b, -1) class_loss[pos] = 0 _, loss_index = class_loss.sort(1, descending=True) _, index_rank = loss_index.sort(1) num_pos = pos.long().sum(1, keepdim=True) num_neg = torch.clamp(self.pos_neg_ratio * num_pos, max=pos.shape[1] - 1) neg = index_rank < num_neg.expand_as(index_rank) # class loss including positive and negative examples pos_index = pos.unsqueeze(2).expand_as(class_preds) neg_index = neg.unsqueeze(2).expand_as(class_preds) preds = class_preds[(pos_index + neg_index).gt(0)] preds = preds.view(-1, self.class_count) targets_weighted = class_targets[(pos + neg).gt(0)] class_loss = F.cross_entropy(preds, targets_weighted, size_average=False) num_matched = num_pos.data.sum() class_loss /= num_matched.float() loc_loss /= num_matched.float() loss = class_loss + loc_loss return class_loss, loc_loss, loss
def forward(self, class_preds, class_targets, loc_preds, loc_targets, anchors): b, num_anchors, _ = loc_preds.shape class_m = torch.Tensor(b, num_anchors) loc_m = torch.Tensor(b, num_anchors, 4) class_m = to_var(class_m, self.use_gpu) loc_m = to_var(loc_m, self.use_gpu) for i in range(b): class_m[i], loc_m[i] = match(threshold=self.threshold, class_target=class_targets[i], loc_target=loc_targets[i], anchors=anchors.data, variances=self.variance) class_targets = class_m loc_targets = loc_m pos = class_targets > 0 num_matched = pos.data.long().sum() # loc_loss pos_mask = pos.unsqueeze(2).expand_as(loc_preds) loc_loss = F.smooth_l1_loss(loc_preds[pos_mask], loc_targets[pos_mask], size_average=False) # class loss class_preds = class_preds.view(-1, self.class_count) class_targets = class_targets.view(-1) class_loss = self.focal_loss(class_preds, class_targets) class_loss /= num_matched.float() loc_loss /= num_matched.float() loss = class_loss + loc_loss return class_loss, loc_loss, loss
def train_iter(self, start): step_index = 0 start_time = time.time() batch_iterator = iter(self.train_loader) count = 0 for i in range(start, self.num_iterations): if i in self.sched_milestones: step_index += 1 self.adjust_learning_rate(optimizer=self.optimizer, gamma=self.sched_gamma, step=step_index) try: images, targets = next(batch_iterator) except StopIteration: batch_iterator = iter(self.train_loader) images, targets = next(batch_iterator) images = to_var(images, self.use_gpu) targets = [to_var(target, self.use_gpu) for target in targets] class_loss, loc_loss, loss, count = self.model_step( images, targets, count) # print out loss log if (i + 1) % self.loss_log_step == 0: self.print_loss_log(start_time=start_time, cur=i, total=self.num_iterations, class_loss=class_loss, loc_loss=loc_loss, loss=loss) self.losses.append([i, class_loss, loc_loss, loss]) # save model if (i + 1) % self.model_save_step == 0: self.save_model(i) self.save_model(i)
def train_epoch(self, start): step_index = 0 start_time = time.time() iters_per_epoch = len(self.train_loader) for e in range(start, self.num_epochs): if e in self.sched_milestones: step_index += 1 for i, (images, targets) in enumerate(tqdm(self.train_loader)): self.adjust_learning_rate(optimizer=self.optimizer, gamma=self.sched_gamma, step=step_index, i=i, iters_per_epoch=iters_per_epoch, epoch=e) images = to_var(images, self.use_gpu) targets = [to_var(target, self.use_gpu) for target in targets] class_loss, loc_loss, loss = self.model_step(images, targets) # print out loss log if (e + 1) % self.loss_log_step == 0: self.print_loss_log(start_time=start_time, cur=e, total=self.num_epochs, class_loss=class_loss, loc_loss=loc_loss, loss=loss) self.losses.append([e, class_loss, loc_loss, loss]) # save model if (e + 1) % self.model_save_step == 0: self.save_model(e) self.save_model(e)
def eval(self, dataset, max_per_image, score_threshold): num_images = len(dataset) all_boxes = [[[] for _ in range(num_images)] for _ in range(self.class_count)] # prepare timers, paths, and files timer = {'detection': Timer(), 'nms': Timer()} results_path = osp.join(self.model_test_path, self.pretrained_model) detection_file = osp.join(results_path, 'detections.pkl') detect_times = [] nms_times = [] with torch.no_grad(): # for each image for i in range(num_images): # get image image, target, h, w = dataset.pull_item(i) image = to_var(image.unsqueeze(0), self.use_gpu) # get and time detection timer['detection'].tic() bboxes, scores = self.model(image) detect_time = timer['detection'].toc(average=False) detect_times.append(detect_time) # convert to CPU tensors bboxes = bboxes[0] scores = scores[0] bboxes = bboxes.cpu().numpy() scores = scores.cpu().numpy() # scale each detection back up to the image scale = torch.Tensor([w, h, w, h]).cpu().numpy() bboxes *= scale # perform and time NMS timer['nms'].tic() for j in range(1, self.class_count): # get scores greater than score_threshold selected_i = np.where(scores[:, j] > score_threshold)[0] # if there are scores greather than score_threshold if len(selected_i) > 0: bboxes_i = bboxes[selected_i] scores_i = scores[selected_i, j] detections_i = (bboxes_i, scores_i[:, np.newaxis]) detections_i = np.hstack(detections_i) detections_i = detections_i.astype(np.float32, copy=False) keep = nms(detections=detections_i, threshold=0.45, force_cpu=True) # keep = nms(boxes=bboxes_i, # scores=scores_i, # iou_threshold=0.45) keep = keep[:50] detections_i = detections_i[keep, :] # if len(detections_i.shape) == 1: # all_boxes[j][i] = np.expand_dims(detections_i, 0) # else: all_boxes[j][i] = detections_i elif len(selected_i) == 0: all_boxes[j][i] = np.empty([0, 5], dtype=np.float32) # if we need to limit the maximum per image if max_per_image > 0: # get all the scores for the image across all classes scores_i = np.hstack([ all_boxes[j][i][:, -1] for j in range(1, self.class_count) ]) # if the number of detections is greater than max_per_image if len(scores_i) > max_per_image: # get the score of the max_per_image-th image threshold_i = np.sort(scores_i)[-max_per_image] # keep detections with score greater than threshold_i for j in range(1, self.class_count): keep = np.where( all_boxes[j][i][:, -1] >= threshold_i)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] nms_time = timer['nms'].toc(average=False) nms_times.append(nms_time) temp_string = 'detection: {:d}/{:d} {:.4f}s {:.4f}s' temp_string = temp_string.format(i + 1, num_images, detect_time, nms_time) write_print(self.output_txt, temp_string) with open(detection_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) write_print(self.output_txt, '\nEvaluating detections') # perform evaluation if self.dataset == 'voc': voc_save(all_boxes=all_boxes, dataset=dataset, results_path=results_path, output_txt=self.output_txt) aps, mAP = do_python_eval(results_path=results_path, dataset=dataset, output_txt=self.output_txt, mode='test', use_07_metric=self.use_07_metric) detect_times = np.asarray(detect_times) nms_times = np.asarray(nms_times) total_times = np.add(detect_times, nms_times) write_print(self.output_txt, '\nfps[all]: ' + str(1 / np.mean(detect_times[1:]))) write_print(self.output_txt, 'fps[all]:' + str(1 / np.mean(nms_times[1:]))) write_print(self.output_txt, 'fps[all]:' + str(1 / np.mean(total_times[1:]))) write_print(self.output_txt, '\nResults:') for ap in aps: write_print(self.output_txt, '{:.4f}'.format(ap)) write_print(self.output_txt, '{:.4f}'.format(np.mean(aps))) write_print(self.output_txt, str(1 / np.mean(detect_times[1:]))) write_print(self.output_txt, str(1 / np.mean(nms_times[1:]))) write_print(self.output_txt, str(1 / np.mean(total_times[1:])))
def train(self): """ training process """ # set model in training mode self.model.train() self.losses = [] iters_per_epoch = len(self.data_loader) # start with a trained model if exists if self.pretrained_model: start = int(self.pretrained_model.split('/')[-1]) else: start = 0 sched = 0 # start training start_time = time.time() for e in range(start, self.num_epochs): for i, (images, targets) in enumerate(tqdm(self.data_loader)): images = to_var(images, self.use_gpu) targets = [to_var(target, self.use_gpu) for target in targets] class_loss, loc_loss, loss = self.model_step(images, targets) # print out loss log if (e + 1) % self.loss_log_step == 0: self.print_loss_log(start_time=start_time, iters_per_epoch=iters_per_epoch, e=e, i=i, class_loss=class_loss, loc_loss=loc_loss, loss=loss) self.losses.append([e, class_loss, loc_loss, loss]) # save model if (e + 1) % self.model_save_step == 0: self.save_model(e) num_sched = len(self.learning_sched) if num_sched != 0 and sched < num_sched: if (e + 1) == self.learning_sched[sched]: self.lr /= 10 write_print(self.output_txt, 'Learning rate reduced to ' + str(self.lr)) sched += 1 self.adjust_learning_rate(optimizer=self.optimizer, gamma=self.sched_gamma, step=sched) # print losses write_print(self.output_txt, '\n--Losses--') for e, class_loss, loc_loss, loss in self.losses: loss_string = ' {:.4f} {:.4f} {:.4f}'.format( class_loss, loc_loss, loss) write_print(self.output_txt, str(e) + loss_string)
def eval(self, dataset, max_per_image, threshold): num_images = len(dataset) all_boxes = [[[] for _ in range(num_images)] for _ in range(self.class_count)] _t = {'im_detect': Timer(), 'misc': Timer()} results_path = osp.join(self.result_save_path, self.pretrained_model) det_file = os.path.join(results_path, 'detections.pkl') detect_times = [] nms_times = [] with torch.no_grad(): for i in range(num_images): image, target, h, w = dataset.pull_item(i) image = to_var(image.unsqueeze(0), self.use_gpu) _t['im_detect'].tic() boxes, scores = self.model(image) detect_time = _t['im_detect'].toc(average=False) detect_times.append(detect_time) boxes = boxes[0] scores = scores[0] boxes = boxes.cpu().numpy() scores = scores.cpu().numpy() # scale each detection back up to the image scale = torch.Tensor([w, h, w, h]).cpu().numpy() boxes *= scale _t['misc'].tic() for j in range(1, self.class_count): inds = np.where(scores[:, j] > threshold)[0] if len(inds) == 0: all_boxes[j][i] = np.empty([0, 5], dtype=np.float32) continue c_bboxes = boxes[inds] c_scores = scores[inds, j] c_dets = np.hstack( (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32, copy=False) keep = nms(c_dets, 0.45, force_cpu=True) keep = keep[:50] c_dets = c_dets[keep, :] all_boxes[j][i] = c_dets if max_per_image > 0: image_scores = np.hstack([ all_boxes[j][i][:, -1] for j in range(1, self.class_count) ]) if len(image_scores) > max_per_image: image_thresh = np.sort(image_scores)[-max_per_image] for j in range(1, self.class_count): keep = np.where( all_boxes[j][i][:, -1] >= image_thresh)[0] all_boxes[j][i] = all_boxes[j][i][keep, :] nms_time = _t['misc'].toc(average=False) nms_times.append(nms_time) print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format( i + 1, num_images, detect_time, nms_time)) with open(det_file, 'wb') as f: pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL) print('Evaluating detections') if self.dataset == 'voc': voc_save(all_boxes, dataset, results_path) do_python_eval(results_path, dataset) detect_times = np.asarray(detect_times) nms_times = np.asarray(nms_times) total_times = np.add(detect_times, nms_times) print('fps[all]:', (1 / np.mean(detect_times[1:]))) print('fps[all]:', (1 / np.mean(nms_times[1:]))) print('fps[all]:', (1 / np.mean(total_times[1:])))