Пример #1
0
    def forward(self, class_preds, class_targets, loc_preds, loc_targets,
                anchors):

        b, num_anchors, _ = loc_preds.shape

        class_m = torch.LongTensor(b, num_anchors)
        loc_m = torch.Tensor(b, num_anchors, 4)

        class_m = to_var(class_m, self.use_gpu)
        loc_m = to_var(loc_m, self.use_gpu)

        for i in range(b):
            class_m[i], loc_m[i] = match(threshold=self.threshold,
                                         class_target=class_targets[i],
                                         loc_target=loc_targets[i],
                                         anchors=anchors.data,
                                         variances=self.variance)

        class_targets = class_m
        loc_targets = loc_m

        pos = class_targets > 0

        # loc loss
        pos_mask = pos.unsqueeze(pos.dim()).expand_as(loc_preds)
        loc_loss = F.smooth_l1_loss(loc_preds[pos_mask].view(-1, 4),
                                    loc_targets[pos_mask].view(-1, 4),
                                    size_average=False)

        # compute max conf across batch for hard negative mining
        batch_conf = class_preds.view(-1, self.class_count)
        class_loss = self.log_sum_exp(batch_conf) - batch_conf.gather(
            1, class_targets.view(-1, 1))

        # hard negative mining
        class_loss = class_loss.view(b, -1)
        class_loss[pos] = 0
        _, loss_index = class_loss.sort(1, descending=True)
        _, index_rank = loss_index.sort(1)
        num_pos = pos.long().sum(1, keepdim=True)
        num_neg = torch.clamp(self.pos_neg_ratio * num_pos,
                              max=pos.shape[1] - 1)
        neg = index_rank < num_neg.expand_as(index_rank)

        # class loss including positive and negative examples
        pos_index = pos.unsqueeze(2).expand_as(class_preds)
        neg_index = neg.unsqueeze(2).expand_as(class_preds)
        preds = class_preds[(pos_index + neg_index).gt(0)]
        preds = preds.view(-1, self.class_count)
        targets_weighted = class_targets[(pos + neg).gt(0)]
        class_loss = F.cross_entropy(preds,
                                     targets_weighted,
                                     size_average=False)

        num_matched = num_pos.data.sum()
        class_loss /= num_matched.float()
        loc_loss /= num_matched.float()
        loss = class_loss + loc_loss

        return class_loss, loc_loss, loss
Пример #2
0
    def forward(self, class_preds, class_targets, loc_preds, loc_targets,
                anchors):
        b, num_anchors, _ = loc_preds.shape

        class_m = torch.Tensor(b, num_anchors)
        loc_m = torch.Tensor(b, num_anchors, 4)

        class_m = to_var(class_m, self.use_gpu)
        loc_m = to_var(loc_m, self.use_gpu)

        for i in range(b):
            class_m[i], loc_m[i] = match(threshold=self.threshold,
                                         class_target=class_targets[i],
                                         loc_target=loc_targets[i],
                                         anchors=anchors.data,
                                         variances=self.variance)

        class_targets = class_m
        loc_targets = loc_m

        pos = class_targets > 0

        num_matched = pos.data.long().sum()

        # loc_loss
        pos_mask = pos.unsqueeze(2).expand_as(loc_preds)
        loc_loss = F.smooth_l1_loss(loc_preds[pos_mask],
                                    loc_targets[pos_mask],
                                    size_average=False)

        # class loss
        class_preds = class_preds.view(-1, self.class_count)
        class_targets = class_targets.view(-1)
        class_loss = self.focal_loss(class_preds, class_targets)

        class_loss /= num_matched.float()
        loc_loss /= num_matched.float()

        loss = class_loss + loc_loss

        return class_loss, loc_loss, loss
Пример #3
0
    def train_iter(self, start):
        step_index = 0
        start_time = time.time()
        batch_iterator = iter(self.train_loader)
        count = 0

        for i in range(start, self.num_iterations):

            if i in self.sched_milestones:
                step_index += 1
                self.adjust_learning_rate(optimizer=self.optimizer,
                                          gamma=self.sched_gamma,
                                          step=step_index)

            try:
                images, targets = next(batch_iterator)
            except StopIteration:
                batch_iterator = iter(self.train_loader)
                images, targets = next(batch_iterator)

            images = to_var(images, self.use_gpu)
            targets = [to_var(target, self.use_gpu) for target in targets]

            class_loss, loc_loss, loss, count = self.model_step(
                images, targets, count)

            # print out loss log
            if (i + 1) % self.loss_log_step == 0:
                self.print_loss_log(start_time=start_time,
                                    cur=i,
                                    total=self.num_iterations,
                                    class_loss=class_loss,
                                    loc_loss=loc_loss,
                                    loss=loss)
                self.losses.append([i, class_loss, loc_loss, loss])

            # save model
            if (i + 1) % self.model_save_step == 0:
                self.save_model(i)

        self.save_model(i)
Пример #4
0
    def train_epoch(self, start):
        step_index = 0
        start_time = time.time()
        iters_per_epoch = len(self.train_loader)

        for e in range(start, self.num_epochs):

            if e in self.sched_milestones:
                step_index += 1

            for i, (images, targets) in enumerate(tqdm(self.train_loader)):
                self.adjust_learning_rate(optimizer=self.optimizer,
                                          gamma=self.sched_gamma,
                                          step=step_index,
                                          i=i,
                                          iters_per_epoch=iters_per_epoch,
                                          epoch=e)

                images = to_var(images, self.use_gpu)
                targets = [to_var(target, self.use_gpu) for target in targets]

                class_loss, loc_loss, loss = self.model_step(images, targets)

            # print out loss log
            if (e + 1) % self.loss_log_step == 0:
                self.print_loss_log(start_time=start_time,
                                    cur=e,
                                    total=self.num_epochs,
                                    class_loss=class_loss,
                                    loc_loss=loc_loss,
                                    loss=loss)
                self.losses.append([e, class_loss, loc_loss, loss])

            # save model
            if (e + 1) % self.model_save_step == 0:
                self.save_model(e)

        self.save_model(e)
Пример #5
0
    def eval(self, dataset, max_per_image, score_threshold):

        num_images = len(dataset)
        all_boxes = [[[] for _ in range(num_images)]
                     for _ in range(self.class_count)]

        # prepare timers, paths, and files
        timer = {'detection': Timer(), 'nms': Timer()}
        results_path = osp.join(self.model_test_path, self.pretrained_model)
        detection_file = osp.join(results_path, 'detections.pkl')

        detect_times = []
        nms_times = []

        with torch.no_grad():

            # for each image
            for i in range(num_images):

                # get image
                image, target, h, w = dataset.pull_item(i)
                image = to_var(image.unsqueeze(0), self.use_gpu)

                # get and time detection
                timer['detection'].tic()
                bboxes, scores = self.model(image)
                detect_time = timer['detection'].toc(average=False)
                detect_times.append(detect_time)

                # convert to CPU tensors
                bboxes = bboxes[0]
                scores = scores[0]
                bboxes = bboxes.cpu().numpy()
                scores = scores.cpu().numpy()

                # scale each detection back up to the image
                scale = torch.Tensor([w, h, w, h]).cpu().numpy()
                bboxes *= scale

                # perform and time NMS
                timer['nms'].tic()

                for j in range(1, self.class_count):

                    # get scores greater than score_threshold
                    selected_i = np.where(scores[:, j] > score_threshold)[0]

                    # if there are scores greather than score_threshold
                    if len(selected_i) > 0:
                        bboxes_i = bboxes[selected_i]
                        scores_i = scores[selected_i, j]
                        detections_i = (bboxes_i, scores_i[:, np.newaxis])
                        detections_i = np.hstack(detections_i)
                        detections_i = detections_i.astype(np.float32,
                                                           copy=False)

                        keep = nms(detections=detections_i,
                                   threshold=0.45,
                                   force_cpu=True)

                        # keep = nms(boxes=bboxes_i,
                        #            scores=scores_i,
                        #            iou_threshold=0.45)

                        keep = keep[:50]
                        detections_i = detections_i[keep, :]
                        # if len(detections_i.shape) == 1:
                        #     all_boxes[j][i] = np.expand_dims(detections_i, 0)
                        # else:
                        all_boxes[j][i] = detections_i

                    elif len(selected_i) == 0:
                        all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)

                # if we need to limit the maximum per image
                if max_per_image > 0:

                    # get all the scores for the image across all classes
                    scores_i = np.hstack([
                        all_boxes[j][i][:, -1]
                        for j in range(1, self.class_count)
                    ])

                    # if the number of detections is greater than max_per_image
                    if len(scores_i) > max_per_image:

                        # get the score of the max_per_image-th image
                        threshold_i = np.sort(scores_i)[-max_per_image]

                        # keep detections with score greater than threshold_i
                        for j in range(1, self.class_count):
                            keep = np.where(
                                all_boxes[j][i][:, -1] >= threshold_i)[0]
                            all_boxes[j][i] = all_boxes[j][i][keep, :]

                nms_time = timer['nms'].toc(average=False)
                nms_times.append(nms_time)

                temp_string = 'detection: {:d}/{:d} {:.4f}s {:.4f}s'
                temp_string = temp_string.format(i + 1, num_images,
                                                 detect_time, nms_time)

                write_print(self.output_txt, temp_string)

        with open(detection_file, 'wb') as f:
            pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

        write_print(self.output_txt, '\nEvaluating detections')

        # perform evaluation
        if self.dataset == 'voc':

            voc_save(all_boxes=all_boxes,
                     dataset=dataset,
                     results_path=results_path,
                     output_txt=self.output_txt)

            aps, mAP = do_python_eval(results_path=results_path,
                                      dataset=dataset,
                                      output_txt=self.output_txt,
                                      mode='test',
                                      use_07_metric=self.use_07_metric)

        detect_times = np.asarray(detect_times)
        nms_times = np.asarray(nms_times)
        total_times = np.add(detect_times, nms_times)

        write_print(self.output_txt,
                    '\nfps[all]: ' + str(1 / np.mean(detect_times[1:])))
        write_print(self.output_txt,
                    'fps[all]:' + str(1 / np.mean(nms_times[1:])))
        write_print(self.output_txt,
                    'fps[all]:' + str(1 / np.mean(total_times[1:])))

        write_print(self.output_txt, '\nResults:')
        for ap in aps:
            write_print(self.output_txt, '{:.4f}'.format(ap))
        write_print(self.output_txt, '{:.4f}'.format(np.mean(aps)))
        write_print(self.output_txt, str(1 / np.mean(detect_times[1:])))
        write_print(self.output_txt, str(1 / np.mean(nms_times[1:])))
        write_print(self.output_txt, str(1 / np.mean(total_times[1:])))
Пример #6
0
    def train(self):
        """
        training process
        """

        # set model in training mode
        self.model.train()

        self.losses = []

        iters_per_epoch = len(self.data_loader)

        # start with a trained model if exists
        if self.pretrained_model:
            start = int(self.pretrained_model.split('/')[-1])
        else:
            start = 0

        sched = 0

        # start training
        start_time = time.time()
        for e in range(start, self.num_epochs):
            for i, (images, targets) in enumerate(tqdm(self.data_loader)):
                images = to_var(images, self.use_gpu)
                targets = [to_var(target, self.use_gpu) for target in targets]

                class_loss, loc_loss, loss = self.model_step(images, targets)

            # print out loss log
            if (e + 1) % self.loss_log_step == 0:
                self.print_loss_log(start_time=start_time,
                                    iters_per_epoch=iters_per_epoch,
                                    e=e,
                                    i=i,
                                    class_loss=class_loss,
                                    loc_loss=loc_loss,
                                    loss=loss)

                self.losses.append([e, class_loss, loc_loss, loss])

            # save model
            if (e + 1) % self.model_save_step == 0:
                self.save_model(e)

            num_sched = len(self.learning_sched)
            if num_sched != 0 and sched < num_sched:
                if (e + 1) == self.learning_sched[sched]:

                    self.lr /= 10
                    write_print(self.output_txt,
                                'Learning rate reduced to ' + str(self.lr))
                    sched += 1
                    self.adjust_learning_rate(optimizer=self.optimizer,
                                              gamma=self.sched_gamma,
                                              step=sched)

        # print losses
        write_print(self.output_txt, '\n--Losses--')
        for e, class_loss, loc_loss, loss in self.losses:
            loss_string = ' {:.4f} {:.4f} {:.4f}'.format(
                class_loss, loc_loss, loss)
            write_print(self.output_txt, str(e) + loss_string)
Пример #7
0
    def eval(self, dataset, max_per_image, threshold):
        num_images = len(dataset)
        all_boxes = [[[] for _ in range(num_images)]
                     for _ in range(self.class_count)]

        _t = {'im_detect': Timer(), 'misc': Timer()}
        results_path = osp.join(self.result_save_path, self.pretrained_model)
        det_file = os.path.join(results_path, 'detections.pkl')

        detect_times = []
        nms_times = []

        with torch.no_grad():
            for i in range(num_images):
                image, target, h, w = dataset.pull_item(i)
                image = to_var(image.unsqueeze(0), self.use_gpu)

                _t['im_detect'].tic()
                boxes, scores = self.model(image)
                detect_time = _t['im_detect'].toc(average=False)
                detect_times.append(detect_time)
                boxes = boxes[0]
                scores = scores[0]

                boxes = boxes.cpu().numpy()
                scores = scores.cpu().numpy()
                # scale each detection back up to the image
                scale = torch.Tensor([w, h, w, h]).cpu().numpy()
                boxes *= scale

                _t['misc'].tic()

                for j in range(1, self.class_count):
                    inds = np.where(scores[:, j] > threshold)[0]
                    if len(inds) == 0:
                        all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)
                        continue
                    c_bboxes = boxes[inds]
                    c_scores = scores[inds, j]
                    c_dets = np.hstack(
                        (c_bboxes, c_scores[:, np.newaxis])).astype(np.float32,
                                                                    copy=False)

                    keep = nms(c_dets, 0.45, force_cpu=True)
                    keep = keep[:50]
                    c_dets = c_dets[keep, :]
                    all_boxes[j][i] = c_dets
                if max_per_image > 0:
                    image_scores = np.hstack([
                        all_boxes[j][i][:, -1]
                        for j in range(1, self.class_count)
                    ])
                    if len(image_scores) > max_per_image:
                        image_thresh = np.sort(image_scores)[-max_per_image]
                        for j in range(1, self.class_count):
                            keep = np.where(
                                all_boxes[j][i][:, -1] >= image_thresh)[0]
                            all_boxes[j][i] = all_boxes[j][i][keep, :]

                nms_time = _t['misc'].toc(average=False)
                nms_times.append(nms_time)

                print('im_detect: {:d}/{:d} {:.3f}s {:.3f}s'.format(
                    i + 1, num_images, detect_time, nms_time))

        with open(det_file, 'wb') as f:
            pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

        print('Evaluating detections')
        if self.dataset == 'voc':
            voc_save(all_boxes, dataset, results_path)
            do_python_eval(results_path, dataset)

        detect_times = np.asarray(detect_times)
        nms_times = np.asarray(nms_times)
        total_times = np.add(detect_times, nms_times)

        print('fps[all]:', (1 / np.mean(detect_times[1:])))
        print('fps[all]:', (1 / np.mean(nms_times[1:])))
        print('fps[all]:', (1 / np.mean(total_times[1:])))