def _load_rpn_roidb(self, gt_roidb):
     filename = self.config['rpn_file']
     print('loading {}'.format(filename))
     check_if_exist('rpn data', filename)
     with open(filename, 'rb') as f:
         box_list = cPickle.load(f)
     return self.create_roidb_from_box_list(box_list, gt_roidb)
Пример #2
0
def evaluate_faster_rcnn(conf_thresh, nms_thresh):
    """Evaluate a Faster R-CNN network on a image database."""
    # Set prototxt
    prototxt = osp.join(cfg.MODELS_DIR, cfg.DATASET_NAME, cfg.METHOD_NAME,
                        cfg.MODEL_NAME, 'test.prototxt')
    check_if_exist('Prototxt', prototxt)

    # Get most recent model
    test_model = get_model_path(cfg.OUTPUT_DIR, '.caffemodel', '_iter_')

    if test_model is None:
        print('No model found in `{:s}`.'.format(cfg.OUTPUT_DIR))
        sys.exit()

    caffe.set_mode_gpu()
    caffe.set_device(cfg.GPU_ID)
    net = caffe.Net(prototxt, caffe.TEST, weights=test_model)
    net.name = osp.splitext(osp.basename(test_model))[0]

    # Get imdb
    imdb_name = '{:s}_val'.format(cfg.DATASET_NAME)
    imdb = get_imdb(imdb_name)

    # results_dir = osp.join(cfg.OUTPUT_DIR, 'results')
    # imdb._do_pascal_voc_eval(results_dir)

    if not cfg.TEST.HAS_RPN:
        imdb.set_proposal_method(cfg.TEST.PROPOSAL_METHOD)

    test_net(net, imdb, conf_thresh, nms_thresh)
    def _write_pascal_voc_results_files(self, all_boxes, results_dir):
        check_if_exist('Result directory', results_dir)
        # result structure: image_id score xmin ymin xmax ymax
        print(
            'Writing results in PASCAL VOC format to {:s}'.format(results_dir))
        for cls_ind, classname in enumerate(self.classes):
            if classname == 'background':
                continue
            print('Writing {:s} {:s} results file'.format(
                classname, cfg.DATASET_NAME))
            filename = osp.join(
                results_dir,
                '{:s}_det_test_{:s}.txt'.format(cfg.DATASET_NAME, classname))

            with open(filename, 'wt') as f:
                for im_ind, index in enumerate(self.image_index):
                    dets = all_boxes[cls_ind][im_ind]
                    if dets == []:
                        continue
                    # the VOCdevkit expects 1-based indices
                    for k in xrange(dets.shape[0]):
                        f.write(
                            '{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.format(
                                index, dets[k, -1], dets[k, 0] + 1,
                                dets[k, 1] + 1, dets[k, 2] + 1,
                                dets[k, 3] + 1))
def evaluate_ssd():
    """Evaluate a SSD network."""

    # Set results directory and solver
    results_dir = osp.join(cfg.OUTPUT_DIR, 'results')
    test_solver_file = osp.join(cfg.MODELS_DIR, cfg.DATASET_NAME,
                                cfg.METHOD_NAME, cfg.MODEL_NAME,
                                'test_solver.prototxt')

    make_if_not_exist(results_dir)
    check_if_exist('Solver', test_solver_file)

    # Find most recent model
    test_model = get_model_path(cfg.OUTPUT_DIR, '.caffemodel', '_iter_')

    if test_model is None:
        print('No model found in `{:s}`.'.format(cfg.OUTPUT_DIR))
        sys.exit()

    # Test model
    cmd = './frameworks/caffe-rcnn-ssd/build/tools/caffe train \
           --solver="{}" --weights="{}" --gpu="{}"\
          '.format(test_solver_file, test_model, cfg.GPU_ID)

    subprocess.call(cmd, shell=True)

    # Set imdb and do evaluation
    imdb_name = '{:s}_val'.format(cfg.DATASET_NAME)
    imdb = get_imdb(imdb_name)
    imdb._do_pascal_voc_eval(results_dir)
    def __init__(self, image_set, dataset_path=None):
        imdb.__init__(self, image_set)
        self._image_set = image_set
        if dataset_path is None:
            self._dataset_path = osp.join(cfg.DATA_DIR, cfg.DATASET_NAME)
        else:
            self._dataset_path = dataset_path

        self._classes = cfg.CLASSES

        self._class_to_ind = dict(zip(self.classes, xrange(self.num_classes)))

        # Assume all images have same extension
        # self._image_ext = '.JPG'
        self._image_ext = osp.splitext(
            glob.glob(osp.join(self._dataset_path, 'images', '*'))[0])[1]

        self._image_index = self._load_image_set_index()
        # Default to roidb handler
        self._roidb_handler = self.selective_search_roidb

        # specific config options
        self.config = {
            'cleanup': False,
            'use_diff': False,
            'rpn_file': None,
            'min_size': 2
        }

        check_if_exist('Dataset path', self._dataset_path)
def create_yolov2_names_data_config():
    train_set = osp.join(cfg.DATA_DIR, cfg.DATASET_NAME, 'yolov2_ImageSets',
                         'train.txt')
    val_set = osp.join(cfg.DATA_DIR, cfg.DATASET_NAME, 'yolov2_ImageSets',
                       'val.txt')

    check_if_exist('YOLOv2 train set', train_set)
    check_if_exist('YOLOv2 validation set', val_set)

    results_dir = osp.join(cfg.OUTPUT_DIR, 'results')

    data_cfg = osp.join(cfg.OUTPUT_DIR, '{}.data'.format(cfg.DATASET_NAME))
    names_cfg = osp.join(cfg.OUTPUT_DIR, '{}.names'.format(cfg.DATASET_NAME))

    num_classes = cfg.NUM_CLASSES - 1  # No background class

    # Create names file for yolov2
    with open(names_cfg, 'w') as f:
        for classname in cfg.CLASSES[1:]:  # No background class
            print(classname, file=f)

    # Create data configuration file for yolov2
    with open(data_cfg, 'w') as f:
        print('classes = {}'.format(num_classes), file=f)
        print('train = {}'.format(train_set), file=f)
        print('valid = {}'.format(val_set), file=f)
        # print('test = {}'.format(test_set), file=f)
        print('names = {}'.format(names_cfg), file=f)
        print('backup = {}'.format(cfg.OUTPUT_DIR), file=f)
        print('results = {}'.format(results_dir), file=f)
        print('eval = voc', file=f)
Пример #7
0
def train_faster_rcnn(no_pretrained, max_iters):
    """Train a Faster R-CNN network on a region of interest database."""
    # Set pretrained model
    if no_pretrained:
        pretrained_model = None
    else:
        pretrained_model = osp.join(cfg.DATA_DIR, 'imagenet_models',
                                    '{:s}.caffemodel'.format(cfg.MODEL_NAME))
        check_if_exist('Pretrained model', pretrained_model)

    # Change solver if OHEM is used
    postfix = ''
    if cfg.TRAIN.USE_OHEM:
        if cfg.MODEL_NAME != 'VGG16' and \
           cfg.MODEL_NAME != 'ResNet101_bn-scale-merged':

            print('Faster RCNN framework with OHEM does not currently '
                  'support model: {:s} (supported models: VGG16, '
                  'ResNet101_bn-scale-merged).').format(cfg.MODEL_NAME)
            sys.exit()
        else:
            postfix = '_ohem'

    # Check if custom anchors exist and copy them to output dir
    if cfg.CUSTOM_ANCHORS:
        anchor_file = osp.join(cfg.DATA_DIR, cfg.DATASET_NAME,
                               'custom_anchor_boxes', '9_anchor_boxes.txt')
        if not osp.exists(anchor_file):
            print('Custom anchor boxes `{:s}` does not exist.'.format(
                anchor_file))
            print('Generate custom anchor boxes with '
                  'data/data_utils/k_means_anchor_boxes.py')
            sys.exit()

        copy(anchor_file, osp.join(cfg.OUTPUT_DIR, '9_anchor_boxes.txt'))

    # Set solver
    solver = osp.join(cfg.MODELS_DIR, cfg.DATASET_NAME, cfg.METHOD_NAME,
                      cfg.MODEL_NAME, 'solver{}.prototxt'.format(postfix))
    check_if_exist('Solver', solver)

    # Set up caffe
    caffe.set_mode_gpu()
    caffe.set_device(cfg.GPU_ID)

    # Set imdb
    imdb_name = '{:s}_train'.format(cfg.DATASET_NAME)

    imdb = get_imdb(imdb_name)
    print('Loaded dataset `{:s}` for training'.format(imdb.name))
    imdb.set_proposal_method(cfg.TRAIN.PROPOSAL_METHOD)
    print('Set proposal method: {:s}'.format(cfg.TRAIN.PROPOSAL_METHOD))
    roidb = get_training_roidb(imdb)

    # Start training
    train_net(solver,
              roidb,
              pretrained_model=pretrained_model,
              max_iters=max_iters)
 def image_path_from_index(self, index):
     """
     Construct an image path from the image's "index" identifier.
     """
     image_path = osp.join(self._dataset_path, 'images',
                           index + self._image_ext)
     check_if_exist('Path', image_path)
     return image_path
def create_lmdb(dataset_dir, label_map_file, caffe_root, imagesets,
                resize_height, resize_width):
    anno_type = 'detection'
    label_type = 'xml'
    check_label = True
    min_dim = 0
    max_dim = 0
    resize_height = resize_height
    resize_width = resize_width
    backend = 'lmdb'
    shuffle = False
    check_size = False
    encode_type = 'jpg'
    encoded = True
    gray = False

    check_if_exist('Label map file', label_map_file)
    # Check if label map file has classes
    get_classnames_from_labelmap(label_map_file)

    # Create lmdb data for SSD
    for imageset in imagesets:
        print('Creating lmdb for Imageset: {:s}'.format(imageset))
        imageset_file = osp.join(ssd_imageset_dir, imageset)
        out_dir = osp.join(
            dataset_dir,
            '{}_{}'.format(os.path.splitext(imageset)[0], backend))

        if osp.exists(out_dir):
            shutil.rmtree(out_dir)

        cmd = "{}/build/tools/convert_annoset" \
              " --anno_type={}" \
              " --label_type={}" \
              " --label_map_file={}" \
              " --check_label={}" \
              " --min_dim={}" \
              " --max_dim={}" \
              " --resize_height={}" \
              " --resize_width={}" \
              " --backend={}" \
              " --shuffle={}" \
              " --check_size={}" \
              " --encode_type={}" \
              " --encoded={}" \
              " --gray={}" \
              " {}/ {} {}" \
              .format(caffe_root, anno_type, label_type, label_map_file, check_label,
                  min_dim, max_dim, resize_height, resize_width, backend, shuffle,
                  check_size, encode_type, encoded, gray, dataset_dir, imageset_file, out_dir)

        print cmd
        process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE)
        output = process.communicate()[0]
 def _load_image_set_index(self):
     """
     Load the indexes listed in this dataset's image set file.
     """
     # Example path to image set file:
     # self._dataset_path + pascal_ImageSets/val.txt
     image_set_file = osp.join(self._dataset_path, 'pascal_ImageSets',
                               self._image_set + '.txt')
     check_if_exist('Path', image_set_file)
     with open(image_set_file) as f:
         image_index = [x.strip() for x in f.readlines()]
     return image_index
def train_ssd(no_pretrained, resume_training=True):
    """Train a SSD network."""

    train_param = ''

    # Set pretrained model
    if not no_pretrained:
        pretrained_model = osp.join(cfg.DATA_DIR, 'imagenet_models',
                                    '{:s}.caffemodel'.format(cfg.MODEL_NAME))
        check_if_exist('Pretrained model', pretrained_model)
        train_param = '--weights="{:s}"'.format(pretrained_model)

    # Set solver
    train_solver_file = osp.join(cfg.MODELS_DIR, cfg.DATASET_NAME,
                                 cfg.METHOD_NAME, cfg.MODEL_NAME,
                                 'train_solver.prototxt')
    check_if_exist('Solver', train_solver_file)

    # Find most snapshot
    snapshot_file = get_model_path(cfg.OUTPUT_DIR, '.solverstate', '_iter_')

    # Load from most recently saved snapshot, if it exist
    if resume_training and snapshot_file != None:
        train_param = '--snapshot="{:s}"'.format(snapshot_file)

    # Train model
    cmd = './frameworks/caffe-rcnn-ssd/build/tools/caffe train \
           --solver="{}" {} --gpu="{}"\
          '.format(train_solver_file, train_param, cfg.GPU_ID)

    # subprocess.call(cmd, shell=True)

    process = subprocess.Popen(cmd,
                               stdout=subprocess.PIPE,
                               stderr=subprocess.STDOUT,
                               bufsize=1,
                               shell=True)

    # Log training
    try:
        with process.stdout, open(cfg.OUTPUT_DIR + '/logfile.txt', 'ab') as f:
            for line in iter(process.stdout.readline, b''):
                print(line, end='')
                f.write(line)
    except Exception as e:
        print(e)

    process.wait()
def detect_yolov2(image_paths, result_file, conf_thresh, nms_thresh):
    """Detect object classes in given images with a YOLOv2 network."""

    data_cfg = osp.join(cfg.OUTPUT_DIR, '{}.data'.format(cfg.DATASET_NAME))
    model_cfg = osp.join(cfg.OUTPUT_DIR, '{}.cfg'.format(cfg.DATASET_NAME))

    check_if_exist('YOLOv2 data config', data_cfg)
    check_if_exist('YOLOv2 model config', model_cfg)

    # Change model config for detection
    with open(model_cfg, 'r') as f:
        data = f.readlines()

    for i in range(len(data)):
        if 'height' in data[i]:
            data[i] = 'height={:d}\n'.format(cfg.TEST.SCALES[0])
            data[i + 1] = 'width={:d}\n'.format(cfg.TEST.MAX_SIZE)

    with open(model_cfg, 'w') as f:
        f.writelines(data)

    # Get model weights
    model_weights = get_model_path(cfg.OUTPUT_DIR, '.weights', '_batch_')

    if model_weights is None:
        print('No model weights found in `{:s}`.'.format(cfg.OUTPUT_DIR))
        sys.exit()

    # Create temporary list file with image paths
    detect_list_file = osp.join(os.getcwd(), 'detect_files.txt')
    with open(detect_list_file, "w") as f:
        for path in image_paths:
            print(path, file=f)

    # Add detection list file to data config
    with open(data_cfg, "a") as f:
        print('detect = {:s}'.format(detect_list_file), file=f)

    cmd = ('./frameworks/darknet/darknet-cpp detector detect {} {} {} -out {} '
           '-thresh {} -nms_thresh {} -gpus {}').format(
               data_cfg, model_cfg, model_weights, result_file, conf_thresh,
               nms_thresh, cfg.GPU_ID)

    subprocess.call(cmd, shell=True)

    # Remove temporary list file with image paths
    os.remove(detect_list_file)
    def _load_selective_search_roidb(self, gt_roidb):
        filename = osp.abspath(
            osp.join(cfg.DATA_DIR, 'selective_search_data',
                     self.name + '.mat'))
        check_if_exist('Selective search data', filename)
        raw_data = sio.loadmat(filename)['boxes'].ravel()

        box_list = []
        for i in xrange(raw_data.shape[0]):
            boxes = raw_data[i][:, (1, 0, 3, 2)] - 1
            keep = ds_utils.unique_boxes(boxes)
            boxes = boxes[keep, :]
            keep = ds_utils.filter_small_boxes(boxes, self.config['min_size'])
            boxes = boxes[keep, :]
            box_list.append(boxes)

        return self.create_roidb_from_box_list(box_list, gt_roidb)
def evaluate_yolov2(conf_thresh, nms_thresh):
    """Evaluate a YOLOv2 network."""

    results_dir = osp.join(cfg.OUTPUT_DIR, 'results')
    data_cfg = osp.join(cfg.OUTPUT_DIR, '{}.data'.format(cfg.DATASET_NAME))
    model_cfg = osp.join(cfg.OUTPUT_DIR, '{}.cfg'.format(cfg.DATASET_NAME))

    make_if_not_exist(results_dir)
    check_if_exist('YOLOv2 data config', data_cfg)
    check_if_exist('YOLOv2 model config', model_cfg)

    # Change model config for testing
    with open(model_cfg, 'r') as f:
        data = f.readlines()

    for i in range(len(data)):
        if 'height' in data[i]:
            data[i] = 'height={:d}\n'.format(cfg.TEST.SCALES[0])
            data[i + 1] = 'width={:d}\n'.format(cfg.TEST.MAX_SIZE)

    with open(model_cfg, 'w') as f:
        f.writelines(data)

    # Find most recent model
    test_model = get_model_path(cfg.OUTPUT_DIR, '.weights', '_batch_')

    if test_model is None:
        print('No model found in `{:s}`.'.format(cfg.OUTPUT_DIR))
        sys.exit()

    result_file_prefix = '{}_det_test_'.format(cfg.DATASET_NAME)

    # Test model
    cmd = ('./frameworks/darknet/darknet-cpp detector valid {} {} {} -out {} '
           '-gpus {} -nms_thresh {:f}').format(data_cfg, model_cfg, test_model,
                                               result_file_prefix, cfg.GPU_ID,
                                               nms_thresh)

    subprocess.call(cmd, shell=True)

    # Set imdb and evaluate
    imdb_name = '{:s}_val'.format(cfg.DATASET_NAME)
    imdb = get_imdb(imdb_name)
    imdb._do_pascal_voc_eval(results_dir)
def train_yolov2(no_pretrained=False, resume_training=True):
    """Train a YOLOv2 network."""

    data_cfg = osp.join(cfg.OUTPUT_DIR, '{}.data'.format(cfg.DATASET_NAME))
    model_cfg = osp.join(cfg.OUTPUT_DIR, '{}.cfg'.format(cfg.DATASET_NAME))

    check_if_exist('YOLOv2 data config', data_cfg)
    check_if_exist('YOLOv2 model config', model_cfg)

    # Set pretrained model
    if no_pretrained:
        pretrained_model = None
    else:
        pretrained_model = osp.join(cfg.DATA_DIR, 'imagenet_models',
                                    '{:s}.weights'.format(cfg.MODEL_NAME))
        check_if_exist('Pretrained model', pretrained_model)

    # Find most recent snapshot
    snapshot_file = get_model_path(cfg.OUTPUT_DIR, '.weights', '_batch_')

    # Load from most recently saved snapshot, if it exist
    if resume_training and snapshot_file != None:
        pretrained_model = snapshot_file

    snapshot_prefix = cfg.MODEL_NAME + '_' + cfg.METHOD_NAME

    # Train model
    cmd = ('./frameworks/darknet/darknet-cpp detector train {:s} {:s} {} '
           '-gpus {:d} -out {:s}').format(data_cfg, model_cfg,
                                          pretrained_model, cfg.GPU_ID,
                                          snapshot_prefix)
    # subprocess.call(cmd, shell=True)

    process = subprocess.Popen(cmd.split(),
                               stdout=subprocess.PIPE,
                               stderr=subprocess.STDOUT,
                               bufsize=1)

    # Log training
    try:
        with process.stdout, open(cfg.OUTPUT_DIR + '/logfile.txt', 'ab') as f:
            for line in iter(process.stdout.readline, b''):
                print(line, end='')
                f.write(line)
    except Exception as e:
        print(e)

    process.wait()
    def _do_pascal_voc_eval(self, results_dir):
        check_if_exist('Result directory', results_dir)

        annopath = osp.join(self._dataset_path, 'pascal_Annotations',
                            '{:s}.xml')

        imagesetfile = osp.join(self._dataset_path, 'pascal_ImageSets',
                                self._image_set + '.txt')

        cachedir = osp.join(self._dataset_path, 'annotations_cache')
        aps = []

        fig = plt.figure()
        fig.set_size_inches(18.75, 10.25)
        colors = plt.cm.hsv(np.linspace(0, 1, self.num_classes)).tolist()

        f = open(osp.join(results_dir, 'mAP_results.txt'), 'w')

        for i in xrange(self.num_classes):
            classname = self._classes[i]

            if classname == 'background':
                continue
            filename = osp.join(
                results_dir,
                '{:s}_det_test_{:s}.txt'.format(cfg.DATASET_NAME, classname))
            rec, prec, ap = custom_dataset_eval(filename,
                                                annopath,
                                                imagesetfile,
                                                classname,
                                                cachedir,
                                                ovthresh=0.5)
            aps += [ap]

            print('Average Precision for {} = {:.4f}'.format(classname, ap))
            print('Average Precision for {} = {:.4f}'.format(classname, ap),
                  file=f)

            color = colors[i % self.num_classes]

            plt.plot(rec,
                     prec,
                     color=color,
                     lw=1.5,
                     label='{0:s} (AP = {1:0.2f})'.format(classname, ap))

        mAP = np.mean(aps)
        print('Mean Average Precision = {:.4f}'.format(mAP))
        print('Mean Average Precision = {:.4f}'.format(mAP), file=f)

        f.close()

        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('Recall')
        plt.ylabel('Precision')
        plt.title('Precision-Recall curve')
        plt.legend(loc="lower left")
        print('Saving Precision-Recall curve to {:s}'.format(results_dir))
        fig.savefig(osp.join(results_dir, 'PRcurve.png'))

        new_output_dir = cfg.OUTPUT_DIR + '_{:.2f}'.format(mAP * 100)
        os.rename(cfg.OUTPUT_DIR, new_output_dir)
        cfg.OUTPUT_DIR = new_output_dir
def detect_ssd(image_paths, result_file, conf_thresh, cpu_mode=False):
    """Detect object classes in given images with a SSD network."""

    prototxt = osp.join(cfg.MODELS_DIR, cfg.DATASET_NAME, cfg.METHOD_NAME,
                        cfg.MODEL_NAME, 'deploy.prototxt')

    check_if_exist('Model file', prototxt)

    # Get model weights
    caffemodel = get_model_path(cfg.OUTPUT_DIR, '.caffemodel', '_iter_')

    if caffemodel is None:
        print('No model found in `{:s}`.'.format(cfg.OUTPUT_DIR))
        sys.exit()

    if cpu_mode:
        caffe.set_mode_cpu()
    else:
        caffe.set_mode_gpu()
        caffe.set_device(cfg.GPU_ID)

    # Load network
    net = caffe.Net(prototxt, caffe.TEST, weights=caffemodel)

    # input preprocessing: 'data' is the name of the input blob == net.inputs[0]
    transformer = caffe.io.Transformer({'data': net.blobs['data'].data.shape})
    # Change input order to caffe format
    transformer.set_transpose('data', (2, 0, 1))
    # Set pixel means
    transformer.set_mean('data', cfg.PIXEL_MEANS[0][0])

    # Detect with a batch size of 1
    image_resize = cfg.TRAIN.MAX_SIZE
    net.blobs['data'].reshape(1, 3, image_resize, image_resize)

    _t = Timer()
    f = open(result_file, 'w')

    num_images = len(image_paths)
    for i in range(0, num_images):
        path = image_paths[i]
        im = cv2.imread(path)
        image_name = path.split("/")[-1]

        # Crop borders for original baitcam images
        # if cfg.DATASET_NAME == 'baitcam':
        #     im = im[32:1504, 0:2043]

        # Preprocess image
        transformed_image = transformer.preprocess('data', im)
        net.blobs['data'].data[...] = transformed_image

        # Forward pass.
        _t.tic()
        detections = net.forward()['detection_out']
        _t.toc()
        print(
            'Detection took {:.3f}s for {:d} object proposals (image {:d}/{:d})'
            .format(_t.diff, detections.shape[2], i + 1, num_images))

        #### Feature Map visualization for Thesis
        # i=0
        # for layer_name, param in net.params.iteritems():
        #     if i==23:
        #         break
        #     print(layer_name + '\t' + str(param[0].data.shape), str(param[1].data.shape))
        #     i+=1
        # # filters = net.blobs['conv5_3'].data[0]
        # filters = net.blobs['conv6_2'].data[0, 5:14]
        # visualize_filters(filters)
        # break

        # Only keep detections with score higher than confidence threshold
        inds = np.where(detections[0, 0, :, 2] >= conf_thresh)[0]

        # Write to results file
        for i in inds:
            label = int(detections[0, 0, :, 1][i])
            score = float(detections[0, 0, :, 2][i])
            xmin = int(np.around(detections[0, 0, :, 3][i] * im.shape[1]))
            ymin = int(np.around(detections[0, 0, :, 4][i] * im.shape[0]))
            xmax = int(np.around(detections[0, 0, :, 5][i] * im.shape[1]))
            ymax = int(np.around(detections[0, 0, :, 6][i] * im.shape[0]))

            # Compensate for cropped borders in original baitcam images
            # if cfg.DATASET_NAME == 'baitcam':
            #     ymin += 32
            #     ymax += 32

            # Detection format: [image_id, label, score, xmin, ymin, xmax, ymax].
            print('{:s} {:d} {:f} {:d} {:d} {:d} {:d}'.format(
                path, label, score, xmin, ymin, xmax, ymax),
                  file=f)

    f.close()
def create_yolov2_model_definition(max_iters):
    """Create YOLOv2 model definition and config files."""

    default_model_cfg = osp.join(cfg.MODELS_DIR, cfg.DATASET_NAME,
                                 cfg.METHOD_NAME, cfg.MODEL_NAME,
                                 '{}.cfg'.format(cfg.MODEL_NAME))

    check_if_exist('YOLOv2 default model config', default_model_cfg)

    # Create model config in output dir
    model_cfg = osp.join(cfg.OUTPUT_DIR, '{}.cfg'.format(cfg.DATASET_NAME))

    num_classes = cfg.NUM_CLASSES - 1  # No background class

    # Get custom anchors
    if cfg.CUSTOM_ANCHORS:
        anchor_file = osp.join(cfg.DATA_DIR, cfg.DATASET_NAME,
                               'custom_anchor_boxes', '5_anchor_boxes.txt')
        if not osp.exists(anchor_file):
            print('Custom anchor boxes `{:s}` does not exist.'.format(
                anchor_file))
            print('Generate custom anchor boxes with '
                  'data/data_utils/k_means_anchor_boxes.py')
            sys.exit()

        # Copy them to output dir
        copy(anchor_file, osp.join(cfg.OUTPUT_DIR, '5_anchor_boxes.txt'))

        # Read anchor file
        with open(anchor_file, 'r') as f:
            data = f.readlines()

        # Get custom anchors
        custom_anchors = ''
        for i in range(1, len(data)):
            splt = data[i].split(',')
            anchor_width = (float(splt[0]) * cfg.TRAIN.MAX_SIZE) / 32
            anchor_height = (float(splt[1]) * cfg.TRAIN.SCALES[0]) / 32

            custom_anchors += '{:.6f}, {:.6f}, '.format(
                anchor_width, anchor_height)

        # Remove last comma
        custom_anchors = custom_anchors[:-2]

    # Get default model settings
    with open(default_model_cfg, 'r') as f:
        data = f.readlines()

    # Change model settings according to our dataset and config
    for i in range(len(data)):
        if 'batch' in data[i] and 'subdivisions' in data[i + 1]:
            data[i] = 'batch={:d}\n'.format(cfg.TRAIN.IMS_PER_BATCH)
            data[i + 1] = 'subdivisions={:d}\n'.format(cfg.TRAIN.BATCH_SIZE)
            data[i + 2] = 'height={:d}\n'.format(cfg.TRAIN.SCALES[0])
            data[i + 3] = 'width={:d}\n'.format(cfg.TRAIN.MAX_SIZE)
        elif 'max_batches' in data[i]:
            data[i] = 'max_batches={:d}\n'.format(max_iters)
            step1 = int(np.ceil(0.5 * max_iters))
            step2 = int(np.ceil(0.75 * max_iters))
            data[i + 2] = 'steps={:d},{:d}\n'.format(step1, step2)
        elif 'filters' in data[i]:
            last_filters_idx = i
        elif 'anchors' in data[i]:
            if cfg.CUSTOM_ANCHORS:
                data[i] = 'anchors={:s}\n'.format(custom_anchors)
            data[i + 2] = 'classes={:d}\n'.format(num_classes)
            num_anchors = len(data[i].split(',')) / 2
            data[i + 4] = 'num={:d}\n'.format(num_anchors)
        elif 'random' in data[i]:
            data[i] = 'random=0\n'

    # last filter size is (num_classes + num_coords + 1)*num_anchors)
    last_filter_size = (num_classes + 5) * num_anchors
    data[last_filters_idx] = 'filters={:d}\n'.format(last_filter_size)

    # Write to our own model config
    with open(model_cfg, 'w') as f:
        f.writelines(data)
def create_ssd_model_definition(max_iters, conf_thresh, nms_thresh):
    """Create SSD network definition files based on config settings."""

    # Training and testing data created by data/data_utils/pascal_voc_to_ssd.py
    train_data = osp.join(cfg.DATA_DIR, cfg.DATASET_NAME, 'train_lmdb')
    test_data = osp.join(cfg.DATA_DIR, cfg.DATASET_NAME, 'val_lmdb')

    models_dir = osp.join(cfg.MODELS_DIR, cfg.DATASET_NAME, cfg.METHOD_NAME,
                          cfg.MODEL_NAME)

    make_if_not_exist(models_dir)
    check_if_exist('Training data', train_data)
    check_if_exist('Test data', test_data)

    # Directory which stores the detection results
    results_dir = osp.join(cfg.OUTPUT_DIR, 'results')

    # Model definition files.
    train_net_file = osp.join(models_dir, 'train.prototxt')
    test_net_file = osp.join(models_dir, 'test.prototxt')
    deploy_net_file = osp.join(models_dir, 'deploy.prototxt')
    train_solver_file = osp.join(models_dir, 'train_solver.prototxt')
    test_solver_file = osp.join(models_dir, 'test_solver.prototxt')

    # The name of the model
    model_name = '{}_ssd'.format(cfg.MODEL_NAME.lower())

    # Snapshot prefix.
    snapshot_prefix = osp.join(cfg.OUTPUT_DIR, model_name)

    # Stores the test image names and sizes
    name_size_file = osp.join(cfg.DATA_DIR, cfg.DATASET_NAME, 'ssd_ImageSets',
                              'val_name_size.txt')

    label_map_file = osp.join(cfg.DATA_DIR, cfg.DATASET_NAME,
                              '{}_labelmap.prototxt'.format(cfg.DATASET_NAME))

    # Specify the batch sampler.
    resize_width = cfg.TRAIN.MAX_SIZE
    resize_height = cfg.TRAIN.MAX_SIZE
    resize = '{}x{}'.format(resize_width, resize_height)
    batch_sampler = [
        {
            'sampler': {},
            'max_trials': 1,
            'max_sample': 1,
        },
        {
            'sampler': {
                'min_scale': 0.3,
                'max_scale': 1.0,
                'min_aspect_ratio': 0.5,
                'max_aspect_ratio': 2.0,
            },
            'sample_constraint': {
                'min_jaccard_overlap': 0.1,
            },
            'max_trials': 50,
            'max_sample': 1,
        },
        {
            'sampler': {
                'min_scale': 0.3,
                'max_scale': 1.0,
                'min_aspect_ratio': 0.5,
                'max_aspect_ratio': 2.0,
            },
            'sample_constraint': {
                'min_jaccard_overlap': 0.3,
            },
            'max_trials': 50,
            'max_sample': 1,
        },
        {
            'sampler': {
                'min_scale': 0.3,
                'max_scale': 1.0,
                'min_aspect_ratio': 0.5,
                'max_aspect_ratio': 2.0,
            },
            'sample_constraint': {
                'min_jaccard_overlap': 0.5,
            },
            'max_trials': 50,
            'max_sample': 1,
        },
        {
            'sampler': {
                'min_scale': 0.3,
                'max_scale': 1.0,
                'min_aspect_ratio': 0.5,
                'max_aspect_ratio': 2.0,
            },
            'sample_constraint': {
                'min_jaccard_overlap': 0.7,
            },
            'max_trials': 50,
            'max_sample': 1,
        },
        {
            'sampler': {
                'min_scale': 0.3,
                'max_scale': 1.0,
                'min_aspect_ratio': 0.5,
                'max_aspect_ratio': 2.0,
            },
            'sample_constraint': {
                'min_jaccard_overlap': 0.9,
            },
            'max_trials': 50,
            'max_sample': 1,
        },
        {
            'sampler': {
                'min_scale': 0.3,
                'max_scale': 1.0,
                'min_aspect_ratio': 0.5,
                'max_aspect_ratio': 2.0,
            },
            'sample_constraint': {
                'max_jaccard_overlap': 1.0,
            },
            'max_trials': 50,
            'max_sample': 1,
        },
    ]
    train_transform_param = {
        'mirror': True,
        # 'mean_value': [104, 117, 124],
        'mean_value': list(cfg.PIXEL_MEANS[0][0]),
        'force_color': True,
        'resize_param': {
            'prob':
            1,
            'resize_mode':
            P.Resize.WARP,
            'height':
            resize_height,
            'width':
            resize_width,
            # 'resize_mode': P.Resize.FIT_SMALL_SIZE,
            # 'height': resize_height,
            # 'width': resize_width,
            # 'height_scale': resize_height,
            # 'width_scale': resize_width,
            'interp_mode': [
                P.Resize.LINEAR,
                P.Resize.AREA,
                P.Resize.NEAREST,
                P.Resize.CUBIC,
                P.Resize.LANCZOS4,
            ],
        },
        'distort_param': {
            'brightness_prob': 0.5,
            'brightness_delta': 32,
            'contrast_prob': 0.5,
            'contrast_lower': 0.5,
            'contrast_upper': 1.5,
            'hue_prob': 0.5,
            'hue_delta': 18,
            'saturation_prob': 0.5,
            'saturation_lower': 0.5,
            'saturation_upper': 1.5,
            'random_order_prob': 0.0,
        },
        'expand_param': {
            'prob': 0.5,
            'max_expand_ratio': 4.0,
        },
        'emit_constraint': {
            'emit_type': caffe_pb2.EmitConstraint.CENTER,
        }
    }
    test_transform_param = {
        # 'mean_value': [104, 117, 124],
        'mean_value': list(cfg.PIXEL_MEANS[0][0]),
        'force_color': True,
        'resize_param': {
            'prob': 1,
            'resize_mode': P.Resize.WARP,
            'height': resize_height,
            'width': resize_width,
            # 'resize_mode': P.Resize.FIT_SMALL_SIZE,
            # 'height': resize_height,
            # 'width': resize_width,
            # 'height_scale': resize_height,
            # 'width_scale': resize_height,
            'interp_mode': [P.Resize.LINEAR],
        },
    }

    # If true, use batch norm for all newly added layers.
    # Currently only the non batch norm version has been tested.
    use_batchnorm = False
    lr_mult = 1
    # Use different initial learning rate.
    if use_batchnorm:
        base_lr = 0.0004
    else:
        # A learning rate for batch_size = 1, num_gpus = 1.
        base_lr = 0.00004

    # MultiBoxLoss parameters.
    num_classes = cfg.NUM_CLASSES
    share_location = True
    background_label_id = 0
    output_name_prefix = '{}_det_test_'.format(cfg.DATASET_NAME)
    train_on_diff_gt = False
    normalization_mode = P.Loss.VALID
    code_type = P.PriorBox.CENTER_SIZE
    ignore_cross_boundary_bbox = False
    mining_type = P.MultiBoxLoss.MAX_NEGATIVE
    neg_pos_ratio = 3.
    loc_weight = (neg_pos_ratio + 1.) / 4.
    multibox_loss_param = {
        'loc_loss_type': P.MultiBoxLoss.SMOOTH_L1,
        'conf_loss_type': P.MultiBoxLoss.SOFTMAX,
        'loc_weight': loc_weight,
        'num_classes': num_classes,
        'share_location': share_location,
        'match_type': P.MultiBoxLoss.PER_PREDICTION,
        'overlap_threshold': 0.5,
        'use_prior_for_matching': True,
        'background_label_id': background_label_id,
        'use_difficult_gt': train_on_diff_gt,
        'mining_type': mining_type,
        'neg_pos_ratio': neg_pos_ratio,
        'neg_overlap': 0.5,
        'code_type': code_type,
        'ignore_cross_boundary_bbox': ignore_cross_boundary_bbox,
    }
    loss_param = {
        'normalization': normalization_mode,
    }

    # parameters for generating priors.
    # minimum dimension of input image
    min_dim = cfg.TRAIN.MAX_SIZE
    # conv4_3 ==> 38 x 38 (300x300) ==> 64 x 64 (512x512)  ==> 76 x 76 (608x608)
    # fc7 ==> 19 x 19 (300x300) ==> 32 x 32 (512x512) ==> 38 x 38 (608x608)
    # conv6_2 ==> 10 x 10 (300x300) ==> 16 x 16 (512x512) ==> 19 x 19 (608x608)
    # conv7_2 ==> 5 x 5 (300x300) ==> 8 x 8 (512x512) ==> 10 x 10 (608x608)
    # conv8_2 ==> 3 x 3 (300x300) ==> 4 x 4 (512x512) ==> 5 x 5 (608x608)
    # conv9_2 ==> 1 x 1 (300x300) ==> 2 x 2 (512x512) ==> 3 x 3 (608x608)
    #                    conv10_2 ==> 1 x 1 (512x512) ==> 1 x 1 (608x608)

    if cfg.CUSTOM_ANCHORS:
        anchor_file = osp.join(cfg.DATA_DIR, cfg.DATASET_NAME,
                               'custom_anchor_boxes', '6_anchor_boxes.txt')
        if not osp.exists(anchor_file):
            print('Custom anchor boxes `{:s}` does not exist.'.format(
                anchor_file))
            print('Generate custom anchor boxes with '
                  'data/data_utils/k_means_anchor_boxes.py')
            sys.exit()

        # Read anchor file
        with open(anchor_file, 'r') as f:
            data = f.readlines()

        custom_anchors = []
        # aspect_ratio = []
        for i in range(1, len(data)):
            splt = data[i].split(',')
            anchor_width = float(splt[0]) * min_dim
            anchor_height = float(splt[1]) * min_dim
            # aspect_ratio.append(anchor_height/anchor_width)

            custom_anchors.append([anchor_width, anchor_height])

        custom_anchors = np.asarray(custom_anchors)
        print(custom_anchors)

        min_ratio = int(np.floor(np.min(custom_anchors) / min_dim * 100))
        max_ratio = int(np.ceil(np.amax(custom_anchors) / min_dim * 100))

        nb = 1
    else:
        # in percent %
        min_const = 10
        max_const = 20
        min_ratio = 20
        max_ratio = 90

        if min_dim == 512 or min_dim == 608:
            max_const = 10
            min_ratio = 10
            min_const = 4

        nb = 2

    mbox_source_layers = [
        'conv4_3', 'fc7', 'conv6_2', 'conv7_2', 'conv8_2', 'conv9_2'
    ]

    if min_dim == 512 or min_dim == 608:
        mbox_source_layers.append('conv10_2')

    step = int(
        np.floor((max_ratio - min_ratio) / (len(mbox_source_layers) - nb)))

    min_sizes = []
    max_sizes = []
    for ratio in xrange(min_ratio, max_ratio + 1, step):
        print(ratio)
        min_sizes.append(min_dim * ratio / 100.)
        max_sizes.append(min_dim * (ratio + step) / 100.)

    steps = [8, 16, 32, 64, 100, 300]
    aspect_ratios = [[2], [2, 3], [2, 3], [2, 3], [2], [2]]
    # L2 normalize conv4_3.
    normalizations = [20, -1, -1, -1, -1, -1]

    if min_dim == 512:
        steps = [8, 16, 32, 64, 128, 256, 512]
        aspect_ratios.insert(2, [2, 3])
        normalizations.append(-1)
    elif min_dim == 608:
        steps = [8, 16, 32, 61, 122, 203, 608]
        aspect_ratios.insert(2, [2, 3])
        normalizations.append(-1)

    print("minsize: ", min_sizes)
    print("maxsize: ", max_sizes)
    if not cfg.CUSTOM_ANCHORS:
        min_sizes = [min_dim * min_const / 100.] + min_sizes
        max_sizes = [min_dim * max_const / 100.] + max_sizes
    print("minsize: ", min_sizes)
    print("maxsize: ", max_sizes)

    if min_dim != 300 and min_dim != 512:
        print('SSD anchor boxes are not optimized for size {}'.format(min_dim))

    # variance used to encode/decode prior bboxes.
    if code_type == P.PriorBox.CENTER_SIZE:
        prior_variance = [0.1, 0.1, 0.2, 0.2]
    else:
        prior_variance = [0.1]

    flip = True
    clip = False

    ### PRIOR CALCULATIONS THAT ARE DONE IN CAFFE LAYER
    for s in range(0, len(min_sizes)):
        min_size = min_sizes[s]
        # first prior: aspect_ratio = 1, size = min_size
        box_width = min_size
        box_height = min_size
        print('\nfirst: {} X {}'.format(box_width, box_height))

        if len(max_sizes) > 0:
            max_size = max_sizes[s]
            box_width = np.sqrt(min_size * max_size)
            box_height = np.sqrt(min_size * max_size)
            print('second: {} X {}'.format(box_width, box_height))

        for r in range(0, len(aspect_ratios[s])):
            ar = aspect_ratios[s][r]
            if np.fabs(ar - 1.) < 1e-6:
                continue

            box_width = min_size * np.sqrt(ar)
            box_height = min_size / np.sqrt(ar)
            print('rest: {} X {}'.format(box_width, box_height))

    # sys.exit()

    # Solver parameters.
    # Defining which GPUs to use.
    gpus = '{:d}'.format(cfg.GPU_ID)
    gpulist = gpus.split(',')
    num_gpus = len(gpulist)

    # Divide the mini-batch to different GPUs.
    batch_size = cfg.TRAIN.IMS_PER_BATCH
    accum_batch_size = cfg.TRAIN.BATCH_SIZE
    iter_size = accum_batch_size / batch_size
    solver_mode = P.Solver.CPU
    device_id = 0
    batch_size_per_device = batch_size
    if num_gpus > 0:
        batch_size_per_device = int(np.ceil(float(batch_size) / num_gpus))
        iter_size = int(
            np.ceil(
                float(accum_batch_size) / (batch_size_per_device * num_gpus)))
        solver_mode = P.Solver.GPU
        device_id = int(gpulist[0])

    if normalization_mode == P.Loss.NONE:
        base_lr /= batch_size_per_device
    elif normalization_mode == P.Loss.VALID:
        base_lr *= 25. / loc_weight
    elif normalization_mode == P.Loss.FULL:
        # Roughly there are 2000 prior bboxes per image.
        # TODO(weiliu89): Estimate the exact # of priors.
        base_lr *= 2000.

    # Get number of test images from name_size_file
    num_test_image = sum(1 for line in open(name_size_file))
    test_batch_size = 8

    # Ideally test_batch_size should be divisible by num_test_image,
    test_iter = int(np.ceil(float(num_test_image) / test_batch_size))

    stepvalue = []
    stepvalue.append(int(np.ceil(max_iters * 0.6667)))
    stepvalue.append(int(np.ceil(max_iters * 0.8333)))
    stepvalue.append(max_iters)

    train_solver_param = {
        # Train parameters
        'base_lr': base_lr,
        'weight_decay': 0.0005,
        'lr_policy': 'multistep',
        'stepvalue': stepvalue,
        'gamma': 0.1,
        'momentum': 0.9,
        'iter_size': iter_size,
        'max_iter': max_iters,
        'snapshot': cfg.TRAIN.SNAPSHOT_ITERS,
        'display': 20,
        'average_loss': 10,
        'type': 'SGD',
        'solver_mode': solver_mode,
        'device_id': device_id,
        'debug_info': False,
        'snapshot_after_train': True,
    }

    test_solver_param = {
        # Test parameters
        'snapshot': 1,
        'snapshot_after_train': False,
        'test_iter': [test_iter],
        'test_interval': 1,
        'eval_type': 'detection',
        'ap_version': 'MaxIntegral',
        'test_initialization': True,
    }

    # Parameters for generating detection output.
    det_out_param = {
        'num_classes': num_classes,
        'share_location': share_location,
        'background_label_id': background_label_id,
        'nms_param': {
            'nms_threshold': nms_thresh,
            'top_k': 200
        },
        'save_output_param': {
            'output_directory': results_dir,
            'output_name_prefix': output_name_prefix,
            'output_format': 'VOC',
            'label_map_file': label_map_file,
            'name_size_file': name_size_file,
            'num_test_image': num_test_image,
        },
        'keep_top_k': 50,
        'confidence_threshold': conf_thresh,
        'code_type': code_type,
    }

    # Parameters for evaluating detection results.
    det_eval_param = {
        'num_classes': num_classes,
        'background_label_id': background_label_id,
        'overlap_threshold': 0.5,
        'evaluate_difficult_gt': False,
        'name_size_file': name_size_file,
    }

    # Create train net.
    net = caffe.NetSpec()
    net.data, net.label = CreateAnnotatedDataLayer(
        train_data,
        batch_size=batch_size_per_device,
        train=True,
        output_label=True,
        label_map_file=label_map_file,
        transform_param=train_transform_param,
        batch_sampler=batch_sampler)

    VGGNetBody(net,
               from_layer='data',
               fully_conv=True,
               reduced=True,
               dilated=True,
               dropout=False)

    AddExtraLayers(net, use_batchnorm, lr_mult=lr_mult)

    mbox_layers = CreateMultiBoxHead(net,
                                     data_layer='data',
                                     from_layers=mbox_source_layers,
                                     use_batchnorm=use_batchnorm,
                                     min_sizes=min_sizes,
                                     max_sizes=max_sizes,
                                     aspect_ratios=aspect_ratios,
                                     steps=steps,
                                     normalizations=normalizations,
                                     num_classes=num_classes,
                                     share_location=share_location,
                                     flip=flip,
                                     clip=clip,
                                     prior_variance=prior_variance,
                                     kernel_size=3,
                                     pad=1,
                                     lr_mult=lr_mult)

    # Create the MultiBoxLossLayer.
    name = "mbox_loss"
    mbox_layers.append(net.label)
    net[name] = L.MultiBoxLoss(
        *mbox_layers,
        multibox_loss_param=multibox_loss_param,
        loss_param=loss_param,
        include=dict(phase=caffe_pb2.Phase.Value('TRAIN')),
        propagate_down=[True, True, False, False])

    with open(train_net_file, 'w') as f:
        print('name: "{}_train"'.format(model_name), file=f)
        print(net.to_proto(), file=f)

    # Create test net.
    net = caffe.NetSpec()
    net.data, net.label = CreateAnnotatedDataLayer(
        test_data,
        batch_size=test_batch_size,
        train=False,
        output_label=True,
        label_map_file=label_map_file,
        transform_param=test_transform_param)

    VGGNetBody(net,
               from_layer='data',
               fully_conv=True,
               reduced=True,
               dilated=True,
               dropout=False)

    AddExtraLayers(net, use_batchnorm, lr_mult=lr_mult)

    mbox_layers = CreateMultiBoxHead(net,
                                     data_layer='data',
                                     from_layers=mbox_source_layers,
                                     use_batchnorm=use_batchnorm,
                                     min_sizes=min_sizes,
                                     max_sizes=max_sizes,
                                     aspect_ratios=aspect_ratios,
                                     steps=steps,
                                     normalizations=normalizations,
                                     num_classes=num_classes,
                                     share_location=share_location,
                                     flip=flip,
                                     clip=clip,
                                     prior_variance=prior_variance,
                                     kernel_size=3,
                                     pad=1,
                                     lr_mult=lr_mult)

    conf_name = 'mbox_conf'
    if multibox_loss_param['conf_loss_type'] == P.MultiBoxLoss.SOFTMAX:
        reshape_name = '{}_reshape'.format(conf_name)
        net[reshape_name] = L.Reshape(net[conf_name],
                                      shape=dict(dim=[0, -1, num_classes]))
        softmax_name = '{}_softmax'.format(conf_name)
        net[softmax_name] = L.Softmax(net[reshape_name], axis=2)
        flatten_name = '{}_flatten'.format(conf_name)
        net[flatten_name] = L.Flatten(net[softmax_name], axis=1)
        mbox_layers[1] = net[flatten_name]
    elif multibox_loss_param['conf_loss_type'] == P.MultiBoxLoss.LOGISTIC:
        sigmoid_name = '{}_sigmoid'.format(conf_name)
        net[sigmoid_name] = L.Sigmoid(net[conf_name])
        mbox_layers[1] = net[sigmoid_name]

    net.detection_out = L.DetectionOutput(
        *mbox_layers,
        detection_output_param=det_out_param,
        include=dict(phase=caffe_pb2.Phase.Value('TEST')))
    net.detection_eval = L.DetectionEvaluate(
        net.detection_out,
        net.label,
        detection_evaluate_param=det_eval_param,
        include=dict(phase=caffe_pb2.Phase.Value('TEST')))

    with open(test_net_file, 'w') as f:
        print('name: "{}_test"'.format(model_name), file=f)
        print(net.to_proto(), file=f)

    # Create deploy net.
    # Remove the first and last layer from test net.
    deploy_net = net
    with open(deploy_net_file, 'w') as f:
        net_param = deploy_net.to_proto()
        # Remove the first (AnnotatedData) and last (DetectionEvaluate) layer from test net.
        del net_param.layer[0]
        del net_param.layer[-1]
        net_param.name = '{}_deploy'.format(model_name)
        net_param.input.extend(['data'])
        net_param.input_shape.extend(
            [caffe_pb2.BlobShape(dim=[1, 3, resize_height, resize_width])])
        print(net_param, file=f)

    # Create training solver.
    train_solver = caffe_pb2.SolverParameter(train_net=train_net_file,
                                             snapshot_prefix=snapshot_prefix,
                                             **train_solver_param)

    with open(train_solver_file, 'w') as f:
        print(train_solver, file=f)

    # Create testing solver.
    test_solver = caffe_pb2.SolverParameter(train_net=train_net_file,
                                            test_net=[test_net_file],
                                            snapshot_prefix=snapshot_prefix,
                                            **test_solver_param)

    with open(test_solver_file, 'w') as f:
        print(test_solver, file=f)
    # eval_set = args.eval_set
    detect = args.detect
    output_dir = args.output_dir
    image_dir = args.image_dir
    conf_thresh = args.conf_thresh
    nms_thresh = args.nms_thresh

    _t = Timer()

    if not evaluate and not detect:
        if cfg_file == None:
            cfg_file = osp.join('configs', method_name, 'default.yml')
            print('No config file given, '
                  'using default config: {:s}'.format(cfg_file))

        check_if_exist('Config', cfg_file)

        extra_cfg = ('METHOD_NAME {:s} MODEL_NAME {:s} '
                     'DATASET_NAME {:s} GPU_ID {:d}'.format(
                         method_name, model_name, dataset_name, gpu_id))

        set_cfgs = extra_cfg.split()

        # Update config
        cfg_from_file(cfg_file)
        cfg_from_list(set_cfgs)

        # Set and create output dir
        cfg.OUTPUT_DIR = osp.join(cfg.OUTPUT_DIR, cfg.DATASET_NAME,
                                  cfg.METHOD_NAME, cfg.MODEL_NAME)
        make_if_not_exist(cfg.OUTPUT_DIR)
    args = parse_args()

    print('Called with args:')
    print(args)

    dataset_dir = args.dataset_dir
    label_map_file = args.label_map_file

    # Remove trailing slash
    dataset_dir = dataset_dir.rstrip('/')

    image_dir = osp.join(dataset_dir, 'images')
    pascal_annotations_dir = osp.join(dataset_dir, 'pascal_Annotations')
    pascal_imageset_dir = osp.join(dataset_dir, 'pascal_ImageSets')

    check_if_exist('Dataset directory', dataset_dir)
    check_if_exist('Image directory', image_dir)
    check_if_exist('Annotation directory', pascal_annotations_dir)
    check_if_exist('Imageset directory', pascal_imageset_dir)

    imagesets = [
        osp.basename(s) for s in glob.glob(pascal_imageset_dir + '/*.txt')
    ]
    # imagesets = ['train', 'val']

    classnames = get_classnames_from_labelmap(label_map_file)
    # classnames = ['ArcticFox', 'Crow', 'Eagle', 'GoldenEagle', 'Raven',
    #                 'RedFox', 'Reindeer', 'SnowyOwl', 'Wolverine']

    # Remove background class
    if 'background' in classnames:
Пример #22
0
def detect_faster_rcnn(image_paths,
                       result_file,
                       conf_thresh,
                       nms_thresh,
                       cpu_mode=False):
    """Detect object classes in given images with a Faster R-CNN network."""

    prototxt = osp.join(cfg.MODELS_DIR, cfg.DATASET_NAME, cfg.METHOD_NAME,
                        cfg.MODEL_NAME, 'test.prototxt')
    check_if_exist('Prototxt', prototxt)

    # Get model weights
    caffemodel = get_model_path(cfg.OUTPUT_DIR, '.caffemodel', '_iter_')

    if caffemodel is None:
        print('No model found in `{:s}`.'.format(cfg.OUTPUT_DIR))
        sys.exit()

    if cpu_mode:
        caffe.set_mode_cpu()
    else:
        caffe.set_mode_gpu()
        caffe.set_device(cfg.GPU_ID)

    # Load network
    net = caffe.Net(prototxt, caffe.TEST, weights=caffemodel)

    f = open(result_file, "w")
    _t = Timer()

    num_images = len(image_paths)
    num_classes = len(cfg.CLASSES)

    for i in range(0, num_images):
        # Load image
        path = image_paths[i]
        im = cv2.imread(path)
        image_name = path.split("/")[-1]

        # Crop borders for original baitcam images
        # if cfg.DATASET_NAME == 'baitcam':
        #     im = im[32:1504, 0:2043]

        # Detect all object classes and regress object bounds
        _t.tic()
        scores, boxes = im_detect(net, im)
        _t.toc()
        print(
            'Detection took {:.3f}s for {:d} object proposals (image {:d}/{:d})'
            .format(_t.diff, boxes.shape[0], i + 1, num_images))

        for cls_ind in range(1, num_classes):  # skip background
            # Get results for class
            cls_boxes = boxes[:, 4 * cls_ind:4 * (cls_ind + 1)]
            cls_scores = scores[:, cls_ind]
            detections = np.hstack(
                (cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)

            # Non maximum suppression to remove redundant overlapping detections
            keep = nms(detections, nms_thresh)
            detections = detections[keep, :]

            # Only keep detections with score higher than confidence threshold
            inds = np.where(detections[:, -1] >= conf_thresh)[0]

            # Write to results file
            for i in inds:
                bbox = detections[i, :4]
                score = float(detections[i, -1])
                xmin = int(np.around(bbox[0]))
                ymin = int(np.around(bbox[1]))
                xmax = int(np.around(bbox[2]))
                ymax = int(np.around(bbox[3]))

                # Compensate for cropped borders in original baitcam images
                # if cfg.DATASET_NAME == 'baitcam':
                #     ymin += 32
                #     ymax += 32

                # Detection format: [image_id, label, score, xmin, ymin, xmax, ymax].
                print('{:s} {:d} {:f} {:d} {:d} {:d} {:d}'.format(
                    path, cls_ind, score, xmin, ymin, xmax, ymax),
                      file=f)

    f.close()
if __name__ == "__main__":
    args = parse_args()

    print('Called with args:')
    print(args)

    result_file = args.result_file
    labelmap_file = args.labelmap
    vis_thresh = args.vis_thresh
    vis_class = args.vis_class
    save = args.save
    skip = args.skip

    classnames = []
    if labelmap_file:
        check_if_exist('Label map file', labelmap_file)
        labelmap = cpb2.LabelMap()
        with open(labelmap_file, 'r') as f:
            text_format.Merge(str(f.read()), labelmap)

        for item in labelmap.item:
            classname = str(item.display_name)
            classnames.append(classname)

    if save:
        save_dir = osp.splitext(result_file)[0]
        make_if_not_exist(save_dir)
        print('Saving to directory: {}'.format(save_dir))

    img_results = OrderedDict()
    with open(result_file, "r") as f:
    args = parse_args()

    print('Called with args:')
    print(args)

    dataset_dir = args.dataset_dir
    k = args.k
    INFO = args.info

    # Remove trailing slash
    dataset_dir = dataset_dir.rstrip('/')

    yolov2_annotations_dir = osp.join(dataset_dir, 'yolov2_Annotations')
    yolov2_imageset_dir = osp.join(dataset_dir, 'yolov2_ImageSets')

    check_if_exist('Dataset directory', dataset_dir)
    check_if_exist('Annotation directory', yolov2_annotations_dir)
    check_if_exist('Imageset directory', yolov2_imageset_dir)

    train_imageset = osp.join(yolov2_imageset_dir, 'train.txt')
    bb_data = []
    extensions = ['.png', '.jpg', '.JPEG', '.JPG']

    # Load dataset bounding box annotation data (yolov2 format)
    # shape: [[x1,y1,w1,h1],...,[xn,yn,wn,hn]]
    with open(train_imageset, 'r') as f:
        for line in f:
            line = line.replace('images', 'yolov2_Annotations')
            for ext in extensions:
                if ext in line:
                    line = line.replace(ext, '.txt').strip()