示例#1
0
    def print_loss_log(self, start_time, iters_per_epoch, e, i, class_loss,
                       loc_loss, loss):
        """
        Prints the loss and elapsed time for each epoch
        """
        """
        Prints the loss and elapsed time for each epoch
        """
        total_iter = self.num_epochs * iters_per_epoch
        cur_iter = e * iters_per_epoch + i

        elapsed = time.time() - start_time
        total_time = (total_iter - cur_iter) * elapsed / (cur_iter + 1)
        epoch_time = (iters_per_epoch - i) * elapsed / (cur_iter + 1)

        epoch_time = str(datetime.timedelta(seconds=epoch_time))
        total_time = str(datetime.timedelta(seconds=total_time))
        elapsed = str(datetime.timedelta(seconds=elapsed))

        log = "Elapsed {}/{} -- {}, Epoch [{}/{}], Iter [{}/{}], " \
              "class_loss: {:.4f}, loc_loss: {:.4f}, " \
              "loss: {:.4f}".format(elapsed,
                                    epoch_time,
                                    total_time,
                                    e + 1,
                                    self.num_epochs,
                                    i + 1,
                                    iters_per_epoch,
                                    class_loss.item(),
                                    loc_loss.item(),
                                    loss.item())

        write_print(self.output_txt, log)
示例#2
0
def save_results(all_boxes, dataset, results_path, output_txt):

    # for each class
    for class_i, class_name in enumerate(VOC_CLASSES):

        text = 'Writing {:s} VOC results file'.format(class_name)
        write_print(output_txt, text)
        filename = osp.join(results_path, class_name + '.txt')

        with open(filename, 'wt') as f:

            # get detections for the class in an image
            for image_i, image_id in enumerate(dataset.ids):
                detections = all_boxes[class_i + 1][image_i]
                # print('HELLO', detections)

                # if there are detections for the class in the image
                if len(detections) != 0:
                    for k in range(detections.shape[0]):
                        output = '{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'

                        # the VOCdevkit expects 1-based indices
                        output = output.format(image_id[2], detections[k, -1],
                                               detections[k, 0] + 1,
                                               detections[k, 1] + 1,
                                               detections[k, 2] + 1,
                                               detections[k, 3] + 1)

                        f.write(output)
示例#3
0
 def load_pretrained_model(self):
     """
     loads a pre-trained model from a .pth file
     """
     self.model.load_state_dict(
         torch.load(
             os.path.join(self.model_save_path,
                          '{}.pth'.format(self.pretrained_model))))
     write_print(self.output_txt,
                 'loaded trained model {}'.format(self.pretrained_model))
示例#4
0
 def print_network(self, model):
     """
     Prints the structure of the network and the total number of parameters
     """
     num_params = 0
     for p in model.parameters():
         num_params += p.numel()
     write_print(self.output_txt, str(model))
     write_print(self.output_txt,
                 'The number of parameters: {}'.format(num_params))
示例#5
0
def do_python_eval(results_path, dataset, output_txt, mode, use_07_metric):

    # annotation cache directory
    cache_dir = osp.join(results_path, 'annotations_cache')

    # path to VOC + year
    path = osp.join(dataset.data_path,
                    'VOC{}'.format(dataset.image_sets[0][0]))

    # path to XML annotation folder
    annotation_path = dataset.annotation_path

    # text file containing the list of (test) images
    list_path = dataset.text_path.format(path, mode, mode)

    # The PASCAL VOC metric changed in 2010
    write_print(output_txt,
                '\nVOC07 metric? ' + ('Yes\n' if use_07_metric else 'No\n'))

    # for each class, compute the recall, precision, and ap
    aps = []
    for class_name in VOC_CLASSES:
        detection_path = osp.join(results_path, class_name + '.txt')
        recall, precision, ap = voc_eval(detection_path=detection_path,
                                         path=path,
                                         annotation_path=annotation_path,
                                         list_path=list_path,
                                         class_name=class_name,
                                         cache_dir=cache_dir,
                                         output_txt=output_txt,
                                         overlap_threshold=0.5,
                                         use_07_metric=use_07_metric)
        aps += [ap]

        write_print(output_txt, 'AP for {} = {:.4f}'.format(class_name, ap))

        pickle_file = osp.join(results_path, class_name + '_pr.pkl')
        with open(pickle_file, 'wb') as f:
            pickle.dump({'rec': recall, 'prec': precision, 'ap': ap}, f)

    write_print(output_txt, 'Mean AP = {:.4f}'.format(np.mean(aps)))

    return aps, np.mean(aps)
示例#6
0
    def eval(self, dataset, max_per_image, score_threshold):

        num_images = len(dataset)
        all_boxes = [[[] for _ in range(num_images)]
                     for _ in range(self.class_count)]

        # prepare timers, paths, and files
        timer = {'detection': Timer(), 'nms': Timer()}
        results_path = osp.join(self.model_test_path, self.pretrained_model)
        detection_file = osp.join(results_path, 'detections.pkl')

        detect_times = []
        nms_times = []

        with torch.no_grad():

            # for each image
            for i in range(num_images):

                # get image
                image, target, h, w = dataset.pull_item(i)
                image = to_var(image.unsqueeze(0), self.use_gpu)

                # get and time detection
                timer['detection'].tic()
                bboxes, scores = self.model(image)
                detect_time = timer['detection'].toc(average=False)
                detect_times.append(detect_time)

                # convert to CPU tensors
                bboxes = bboxes[0]
                scores = scores[0]
                bboxes = bboxes.cpu().numpy()
                scores = scores.cpu().numpy()

                # scale each detection back up to the image
                scale = torch.Tensor([w, h, w, h]).cpu().numpy()
                bboxes *= scale

                # perform and time NMS
                timer['nms'].tic()

                for j in range(1, self.class_count):

                    # get scores greater than score_threshold
                    selected_i = np.where(scores[:, j] > score_threshold)[0]

                    # if there are scores greather than score_threshold
                    if len(selected_i) > 0:
                        bboxes_i = bboxes[selected_i]
                        scores_i = scores[selected_i, j]
                        detections_i = (bboxes_i, scores_i[:, np.newaxis])
                        detections_i = np.hstack(detections_i)
                        detections_i = detections_i.astype(np.float32,
                                                           copy=False)

                        keep = nms(detections=detections_i,
                                   threshold=0.45,
                                   force_cpu=True)

                        # keep = nms(boxes=bboxes_i,
                        #            scores=scores_i,
                        #            iou_threshold=0.45)

                        keep = keep[:50]
                        detections_i = detections_i[keep, :]
                        # if len(detections_i.shape) == 1:
                        #     all_boxes[j][i] = np.expand_dims(detections_i, 0)
                        # else:
                        all_boxes[j][i] = detections_i

                    elif len(selected_i) == 0:
                        all_boxes[j][i] = np.empty([0, 5], dtype=np.float32)

                # if we need to limit the maximum per image
                if max_per_image > 0:

                    # get all the scores for the image across all classes
                    scores_i = np.hstack([
                        all_boxes[j][i][:, -1]
                        for j in range(1, self.class_count)
                    ])

                    # if the number of detections is greater than max_per_image
                    if len(scores_i) > max_per_image:

                        # get the score of the max_per_image-th image
                        threshold_i = np.sort(scores_i)[-max_per_image]

                        # keep detections with score greater than threshold_i
                        for j in range(1, self.class_count):
                            keep = np.where(
                                all_boxes[j][i][:, -1] >= threshold_i)[0]
                            all_boxes[j][i] = all_boxes[j][i][keep, :]

                nms_time = timer['nms'].toc(average=False)
                nms_times.append(nms_time)

                temp_string = 'detection: {:d}/{:d} {:.4f}s {:.4f}s'
                temp_string = temp_string.format(i + 1, num_images,
                                                 detect_time, nms_time)

                write_print(self.output_txt, temp_string)

        with open(detection_file, 'wb') as f:
            pickle.dump(all_boxes, f, pickle.HIGHEST_PROTOCOL)

        write_print(self.output_txt, '\nEvaluating detections')

        # perform evaluation
        if self.dataset == 'voc':

            voc_save(all_boxes=all_boxes,
                     dataset=dataset,
                     results_path=results_path,
                     output_txt=self.output_txt)

            aps, mAP = do_python_eval(results_path=results_path,
                                      dataset=dataset,
                                      output_txt=self.output_txt,
                                      mode='test',
                                      use_07_metric=self.use_07_metric)

        detect_times = np.asarray(detect_times)
        nms_times = np.asarray(nms_times)
        total_times = np.add(detect_times, nms_times)

        write_print(self.output_txt,
                    '\nfps[all]: ' + str(1 / np.mean(detect_times[1:])))
        write_print(self.output_txt,
                    'fps[all]:' + str(1 / np.mean(nms_times[1:])))
        write_print(self.output_txt,
                    'fps[all]:' + str(1 / np.mean(total_times[1:])))

        write_print(self.output_txt, '\nResults:')
        for ap in aps:
            write_print(self.output_txt, '{:.4f}'.format(ap))
        write_print(self.output_txt, '{:.4f}'.format(np.mean(aps)))
        write_print(self.output_txt, str(1 / np.mean(detect_times[1:])))
        write_print(self.output_txt, str(1 / np.mean(nms_times[1:])))
        write_print(self.output_txt, str(1 / np.mean(total_times[1:])))
示例#7
0
    def train(self):
        """
        training process
        """

        # set model in training mode
        self.model.train()

        self.losses = []

        iters_per_epoch = len(self.data_loader)

        # start with a trained model if exists
        if self.pretrained_model:
            start = int(self.pretrained_model.split('/')[-1])
        else:
            start = 0

        sched = 0

        # start training
        start_time = time.time()
        for e in range(start, self.num_epochs):
            for i, (images, targets) in enumerate(tqdm(self.data_loader)):
                images = to_var(images, self.use_gpu)
                targets = [to_var(target, self.use_gpu) for target in targets]

                class_loss, loc_loss, loss = self.model_step(images, targets)

            # print out loss log
            if (e + 1) % self.loss_log_step == 0:
                self.print_loss_log(start_time=start_time,
                                    iters_per_epoch=iters_per_epoch,
                                    e=e,
                                    i=i,
                                    class_loss=class_loss,
                                    loc_loss=loc_loss,
                                    loss=loss)

                self.losses.append([e, class_loss, loc_loss, loss])

            # save model
            if (e + 1) % self.model_save_step == 0:
                self.save_model(e)

            num_sched = len(self.learning_sched)
            if num_sched != 0 and sched < num_sched:
                if (e + 1) == self.learning_sched[sched]:

                    self.lr /= 10
                    write_print(self.output_txt,
                                'Learning rate reduced to ' + str(self.lr))
                    sched += 1
                    self.adjust_learning_rate(optimizer=self.optimizer,
                                              gamma=self.sched_gamma,
                                              step=sched)

        # print losses
        write_print(self.output_txt, '\n--Losses--')
        for e, class_loss, loc_loss, loss in self.losses:
            loss_string = ' {:.4f} {:.4f} {:.4f}'.format(
                class_loss, loc_loss, loss)
            write_print(self.output_txt, str(e) + loss_string)
示例#8
0
                        help='Number of step for saving model')

    config = parser.parse_args()

    args = vars(config)
    output_txt = ''

    if args['mode'] == 'train':
        version = str(datetime.now()).replace(':', '_')
        version = '{}_train'.format(version)
        path = args['model_save_path']
        path = osp.join(path, version)
        output_txt = osp.join(path, '{}.txt'.format(version))

    elif args['mode'] == 'test':
        model = args['pretrained_model'].split('/')
        version = '{}_test_{}'.format(model[0], model[1])
        path = args['model_test_path']
        path = osp.join(path, model[0])
        output_txt = osp.join(path, '{}.txt'.format(version))

    mkdir(path)
    save_config(path, version, args)

    write_print(output_txt, '------------ Options -------------')
    for k, v in args.items():
        write_print(output_txt, '{}: {}'.format(str(k), str(v)))
    write_print(output_txt, '-------------- End ----------------')

    main(version, config, output_txt)
示例#9
0
def voc_eval(detection_path,
             path,
             annotation_path,
             list_path,
             class_name,
             cache_dir,
             output_txt,
             overlap_threshold=0.5,
             use_07_metric=True):

    # create or get the cache_file
    if not osp.isdir(cache_dir):
        os.mkdir(cache_dir)
    cache_file = osp.join(cache_dir, 'annotations.pkl')

    # read list of images
    with open(list_path, 'r') as f:
        lines = f.readlines()
    image_names = [x.strip() for x in lines]

    # if cache_file does not exists
    if not osp.isfile(cache_file):
        targets = {}

        # per image, read annotations from XML file
        write_print(output_txt, 'Reading annotations')
        for i, image_name in enumerate(image_names):
            temp_path = annotation_path.format(path, 'test', image_name)
            targets[image_name] = parse_annotation(temp_path)

        # save annotations to cache_file
        temp_string = 'Saving cached annotations to {:s}\n'.format(cache_file)
        write_print(output_txt, temp_string)
        with open(cache_file, 'wb') as f:
            pickle.dump(targets, f)

    # else if cache_file exists
    else:
        with open(cache_file, 'rb') as f:
            targets = pickle.load(f)

    class_targets = {}
    n_positive = 0

    # get targets for objects with class equal to class_name in image_name
    for image_name in image_names:
        target = [x for x in targets[image_name] if x['name'] == class_name]
        bbox = np.array([x['bbox'] for x in target])
        difficult = np.array([x['difficult'] for x in target]).astype(np.bool)
        det = [False] * len(target)
        n_positive += sum(~difficult)
        class_targets[image_name] = {
            'bbox': bbox,
            'difficult': difficult,
            'det': det
        }

    # read detections from class_name.txt
    detection_file = detection_path.format(class_name)
    with open(detection_file, 'r') as f:
        lines = f.readlines()

    # if there are detections
    if any(lines) == 1:

        # get ids, confidences, and bounding boxes
        values = [x.strip().split(' ') for x in lines]
        image_ids = [x[0] for x in values]
        confidences = np.array([float(x[1]) for x in values])
        bboxes = np.array([[float(z) for z in x[2:]] for x in values])

        # sort by confidence
        sorted_index = np.argsort(-confidences)
        bboxes = bboxes[sorted_index, :]
        image_ids = [image_ids[x] for x in sorted_index]

        num_detections = len(image_ids)
        tp = np.zeros(num_detections)
        fp = np.zeros(num_detections)

        # go through detections and mark TPs and FPs
        for i in range(num_detections):

            # get target bounding box
            image_target = class_targets[image_ids[i]]
            bbox_target = image_target['bbox'].astype(float)

            # get detected bounding box
            bbox = bboxes[i, :].astype(float)
            overlap_max = -np.inf

            if bbox_target.size > 0:

                # get the overlapping region
                # compute the area of intersection
                x_min = np.maximum(bbox_target[:, 0], bbox[0])
                y_min = np.maximum(bbox_target[:, 1], bbox[1])
                x_max = np.minimum(bbox_target[:, 2], bbox[2])
                y_max = np.minimum(bbox_target[:, 3], bbox[3])
                width = np.maximum(x_max - x_min, 0.)
                height = np.maximum(y_max - y_min, 0.)
                intersection = width * height

                # get the area of the gt and the detection
                # compute the union
                area_bbox = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
                area_bbox_target = ((bbox_target[:, 2] - bbox_target[:, 0]) *
                                    (bbox_target[:, 3] - bbox_target[:, 1]))
                union = area_bbox + area_bbox_target - intersection

                # compute the iou
                iou = intersection / union
                overlap_max = np.max(iou)
                j_max = np.argmax(iou)

            # if the maximum overlap is over the overlap threshold
            if overlap_max > overlap_threshold:
                # if it is not difficult
                if not image_target['difficult'][j_max]:
                    # if it is not yet detected, count as a true positive
                    if not image_target['det'][j_max]:
                        tp[i] = 1.
                        image_target['det'][j_max] = 1
                    # else, count as a false positive
                    else:
                        fp[i] = 1.

            # else, count as a false positive
            else:
                fp[i] = 1.

        # compute precision and recall
        # avoid divide by zero if the first detection matches a difficult gt
        tp = np.cumsum(tp)
        fp = np.cumsum(fp)
        recall = tp / float(n_positive)
        precision = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)

        ap = voc_ap(recall=recall,
                    precision=precision,
                    use_07_metric=use_07_metric)

    else:
        recall = -1.
        precision = -1.
        ap = -1.

    return recall, precision, ap