Пример #1
0
    def __init__(self, net, checkpoint, cfg):
        super().__init__("TrackerDefault")

        self.cfg = cfg

        self.net = net
        if checkpoint is not None:
            utils.load_checkpoint(checkpoint, self.net)

        self.net.eval()

        self.anchors = utils.generate_anchors(cfg)

        if torch.cuda.is_available():
            self.net.cuda()
            self.anchors = self.anchors.cuda()

        self.z_transform = Compose([
            ToAbsoluteCoords(),
            Crop(context_amount=cfg.TRAIN.CROP_CONTEXT_AMOUNT_Z, make_square=False),
            ToPercentCoords(),
            Resize(cfg.MODEL.Z_SIZE),
        ])

        self.x_crop = Crop(context_amount=cfg.TRAIN.CROP_CONTEXT_AMOUNT_X, return_rect=True, make_square=True)
        self.x_resize = Resize(size=cfg.MODEL.X_SIZE)

        self.z_crop = Crop(context_amount=cfg.TRAIN.CROP_CONTEXT_AMOUNT_Z, return_rect=True, make_square=False)
        self.z_resize = Resize(size=cfg.MODEL.Z_SIZE)

        self.criterion = MultiBoxLoss(self.anchors, self.cfg)
Пример #2
0
    def __init__(self,
                 resized_image_size,
                 test_images_list=cfg.TEST_IMAGE_LIST.copy(),
                 anchor_box_file=cfg.ANCHOR_BOXES_STORE,
                 subsampled_ratio=cfg.SUBSAMPLED_RATIO,
                 transform=None):

        self.anchor_box_file = anchor_box_file
        self.subsampled_ratio = subsampled_ratio
        self.resized_image_size = resized_image_size
        self.test_images_list = test_images_list
        self.transform = transform

        anchor_file = open(self.anchor_box_file, 'r')
        anchor_tuples = anchor_file.read().replace('\n', '')

        assert (len(anchor_tuples) > 0, "Anchor data is empty!")

        self.all_anchor_sizes = dict(eval(anchor_tuples))
        self.anchor_sizes = np.asarray(eval(self.all_anchor_sizes[str(
            self.resized_image_size)]),
                                       dtype=np.float32)

        self.anchors_list = generate_anchors(
            anchor_sizes=self.anchor_sizes,
            subsampled_ratio=self.subsampled_ratio,
            resized_image_size=self.resized_image_size)
Пример #3
0
    def initialize(self, feat, label_map, gt_bbox):

        # build initial target classifier
        init_params, init_lrs = self.meta_init.initialize()
        self.meta_opti.initialize(init_lrs)
        target_cell_sz = torch.ceil(gt_bbox[:, 0, 2:]/config.cell_sz)
        filter_size = compute_filter_size(target_cell_sz)
        base_anchor_sizes = filter_size/config.filter_scale*config.cell_sz
        self.map_size = feat.shape[3]
        self.anchors = generate_anchors(self.map_size, base_anchor_sizes)

        # calculate initial update loss
        pred_map, pred_bbox = adaptable_conv2d(feat, init_params, filter_size)
        map_loss = self.l2loss(pred_map, label_map)
        bbox_loss = self.smoothl1loss(pred_bbox, gt_bbox, self.anchors)
        loss = map_loss + bbox_loss
        # meta update
        grads = torch.autograd.grad(loss, init_params)
        self.updated_params = init_params - init_lrs * grads[0]
        pred_map, pred_bbox = adaptable_conv2d(feat, self.updated_params, filter_size)
        lh_map_loss = self.l2loss(pred_map, label_map)
        lh_bbox_loss = self.smoothl1loss(pred_bbox, gt_bbox, self.anchors)
        lh_loss = lh_map_loss + lh_bbox_loss
        print(map_loss.data.item(), lh_map_loss.data.item())
        print(bbox_loss.data.item(), lh_bbox_loss.data.item())
        print(loss.data.item(), lh_loss.data.item())

        self.updating_feats = []
        self.updating_maps = []
        self.updating_bboxes = []
        self.filter_size = filter_size
        self.ref_score = np.max(pred_map.data.cpu().numpy())
Пример #4
0
 def anchorboxes(self):
     from utils import generate_anchors
     layer_anchors = []
     for feat_shape, step, scale, ratio in zip(
             self.params.feat_shapes, self.params.anchor_steps,
             self.params.anchor_scales, self.params.anchor_aspectratios):
         layer_anchors.append(
             generate_anchors(feat_shape, step, scale, ratio))
     return layer_anchors
class FaceDetector:
    # load the model
    ort_session = onnxruntime.InferenceSession(
        "./face_recognition/data/ssd_mini_w360.onnx")

    # anchor configuration
    feature_map_sizes = [[45, 45], [23, 23], [12, 12], [6, 6], [4, 4]]
    anchor_sizes = [[0.04, 0.056], [0.08, 0.11], [0.16, 0.22], [0.32, 0.45],
                    [0.64, 0.72]]
    anchor_ratios = [[1, 0.62, 0.42]] * 5

    # generate anchors
    anchors = generate_anchors(feature_map_sizes, anchor_sizes, anchor_ratios)

    # for inference , the batch size is 1, the model output shape is [1, N, 4],
    # so we expand dim for anchors to [1, anchor_num, 4]
    anchors_exp = np.expand_dims(anchors, axis=0)

    id2class = {0: 'Mask', 1: 'NoMask'}

    def detect(self,
               image,
               conf_thresh=0.6,
               iou_thresh=0.4,
               target_shape=(360, 360)):
        height, width, _ = image.shape
        image_resized = cv2.resize(image, target_shape)
        image_np = image_resized / 255.0
        image_exp = np.expand_dims(image_np, axis=0)
        image_transposed = image_exp.transpose((0, 3, 1, 2)).astype(np.float32)
        ort_inputs = {self.ort_session.get_inputs()[0].name: image_transposed}
        y_bboxes_output, y_cls_output = self.ort_session.run(None, ort_inputs)
        # remove the batch dimension, for batch is always 1 for inference.
        y_bboxes = decode_bbox(self.anchors_exp, y_bboxes_output)[0]
        y_cls = y_cls_output[0]
        # To speed up, do single class NMS, not multiple classes NMS.
        bbox_max_scores = np.max(y_cls, axis=1)
        bbox_max_score_classes = np.argmax(y_cls, axis=1)
        # keep_idx is the alive bounding box after nms.
        keep_idxs = single_class_non_max_suppression(y_bboxes, bbox_max_scores,
                                                     conf_thresh, iou_thresh)
        max_area, r_item = -1, None
        for idx in keep_idxs:
            # conf = float(bbox_max_scores[idx])
            class_id = bbox_max_score_classes[idx]
            bbox = y_bboxes[idx]
            # clip the coordinate, avoid the value exceed the image boundary.
            xmin = max(0, int(bbox[0] * width))
            ymin = max(0, int(bbox[1] * height))
            xmax = min(int(bbox[2] * width), width)
            ymax = min(int(bbox[3] * height), height)
            item = (xmin, ymin, xmax, ymax), class_id
            area = (xmax - xmin) * (ymax - ymin)
            if max_area < area:
                max_area, r_item = area, item
        return r_item
Пример #6
0
    def __init__(self, args):
        super().__init__()
        self.args = args

        self.dropout = args.dropout
        self.max_num_frames = args.max_num_frames

        self.anchors = generate_anchors(dataset=args.dataset)
        self.num_anchors = self.anchors.shape[0]
        widths = (self.anchors[:, 1] - self.anchors[:, 0] + 1)  # [num_anchors]
        centers = np.arange(0, args.max_num_frames)  # [video_len]
        start = np.expand_dims(centers,
                               1) - 0.5 * (np.expand_dims(widths, 0) - 1)
        end = np.expand_dims(centers,
                             1) + 0.5 * (np.expand_dims(widths, 0) - 1)
        self.proposals = np.stack([start, end],
                                  -1)  # [video_len, num_anchors, 2]

        # VideoEncoder
        self.video_encoder = VideoEncoder(args)

        # SentenceEncoder
        self.sentence_encoder = SentenceEncoder(args)

        #attentive graph
        self.atten = CoAttention(args.d_model, args.d_model)
        self.intra_v = CoAttention_intra(args.max_num_frames, args.d_model)
        self.intra_s = CoAttention_intra(args.max_num_words, args.d_model)

        self.update_v = ConvGRUCell(args.d_model, args.d_model)
        self.update_s = ConvGRUCell(args.d_model, args.d_model)
        self.update_v_intra = ConvGRUCell(args.d_model, args.d_model)
        self.update_s_intra = ConvGRUCell(args.d_model, args.d_model)
        self.v2s = TanhAttention(args.d_model)

        self.rnn = DynamicGRU(args.d_model << 1,
                              args.d_model >> 1,
                              bidirectional=True,
                              batch_first=True)

        self.fc_score = nn.Conv1d(args.d_model,
                                  self.num_anchors,
                                  kernel_size=1,
                                  padding=0,
                                  stride=1)
        self.fc_reg = nn.Conv1d(args.d_model,
                                self.num_anchors << 1,
                                kernel_size=1,
                                padding=0,
                                stride=1)

        # loss function
        self.criterion1 = nn.BCELoss()
        self.criterion2 = nn.SmoothL1Loss()
Пример #7
0
def rpn_model_fn(features, labels, mode):
    feature_maps = ResNet_w_FPN.forward(features['img'])
    pred_rpn_anchor_logits, pred_rpn_anchor_probs, pred_rpn_anchor_deltas = RPN.forward(
        feature_maps)
    class_loss, bbox_loss = tf.map_fn(
        RPN.rpn_loss,
        (pred_rpn_anchor_logits, pred_rpn_anchor_deltas, labels['rpn_labels'],
         labels['rpn_deltas'], labels['rpn_mask'],
         labels['rpn_positive_range'], labels['rpn_positive_mask']),
        dtype=(tf.float32, tf.float32))
    class_loss = tf.reduce_sum(class_loss)
    # shape = tf.shape(bbox_loss, name='shape')
    bbox_loss = tf.reduce_sum(bbox_loss)
    loss = tf.identity(class_loss + bbox_loss, name='loss')

    all_anchors = tf.convert_to_tensor(
        utils.generate_anchors(format=utils.BBOX_FORMAT.YXYX),
        dtype=tf.float32)
    all_anchors = tf.tile(tf.expand_dims(all_anchors, 0),
                          [tf.shape(pred_rpn_anchor_logits)[0], 1, 1])
    proposals = tf.map_fn(
        RPN.inference,
        (pred_rpn_anchor_probs, pred_rpn_anchor_deltas, all_anchors),
        dtype=tf.float32)
    # get proposals in YXYX format
    proposals = tf.identity(proposals, name='proposals')
    rois, target_class, target_deltas, target_mask = tf.map_fn(
        utils.generate_mask_rcnn_x_y_tf,
        (proposals, labels['bboxs'], labels['cls'], labels['masks'],
         labels['valid_label_ranges']),
        dtype=(tf.float32, tf.int32, tf.float32, tf.float32))
    # rois = tf.Print(rois, [tf.shape(rois), tf.shape(target_class), tf.shape(target_deltas), tf.shape(target_mask)])
    mrcnn_cls_bbox_in = RoiAlign.forward(rois, feature_maps[:-1],
                                         config.CLS_BBOX_ROI_POOL_SIZE)
    mrcnn_mask_in = RoiAlign.forward(rois, feature_maps[:-1],
                                     config.MASK_ROI_POOL_SIZE)
    mrcnn_mask_out_logits, _ = rcnn_head.forward_mask(mrcnn_mask_in)
    mrcnn_cls_logits, _, mrcnn_deltas = rcnn_head.forward_cls_bbox(
        mrcnn_cls_bbox_in)
    cls_loss, bbox_loss, mask_loss = tf.map_fn(
        rcnn_head.mrcnn_loss,
        (mrcnn_cls_logits, mrcnn_deltas, mrcnn_mask_out_logits, target_class,
         target_deltas, target_mask, rois),
        dtype=(tf.float32, tf.float32, tf.float32))

    # return proposals, rois, target_class, target_mask, mrcnn_cls_bbox_in, mrcnn_deltas, cls_loss, bbox_loss, mask_loss

    global_step = tf.train.get_global_step()
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.AdamOptimizer()
        train_op = optimizer.minimize(loss, global_step=global_step)
        return tf.estimator.EstimatorSpec(mode=mode,
                                          loss=loss,
                                          train_op=train_op)
Пример #8
0
 def __init__(self, model_path, gpu_id):
     self.gpu_id = gpu_id
     with torch.cuda.device(gpu_id):
         self.model = SiamRPN()
         self.model.load_model(model_path)
         self.model = self.model.cuda()
         self.model.eval()
     self.response_sz = config.response_sz
     self.anchors = generate_anchors(config.total_stride,
                                     config.anchor_base_size,
                                     config.anchor_scales,
                                     config.anchor_ratios, self.response_sz)
     self.transforms = transforms.Compose([ToTensor()])
Пример #9
0
 def __init__(self, feature_path, data_path, word2vec,
              max_num_frames, max_num_words, max_num_nodes, is_training=True):
     data = load_json(data_path)
     super().__init__(feature_path, data, word2vec, is_training)
     self.max_num_frames = max_num_frames
     self.max_num_words = max_num_words
     self.max_num_nodes = max_num_nodes
     self.anchors = generate_anchors(dataset='ActivityNet')
     widths = (self.anchors[:, 1] - self.anchors[:, 0] + 1)  # [num_anchors]
     centers = np.arange(0, max_num_frames)  # [video_len]
     start = np.expand_dims(centers, 1) - 0.5 * (np.expand_dims(widths, 0) - 1)
     end = np.expand_dims(centers, 1) + 0.5 * (np.expand_dims(widths, 0) - 1)
     self.proposals = np.stack([start, end], -1)  # [video_len, num_anchors, 2]
Пример #10
0
 def get_anchors(self, image_shape):
     """Returns anchor pyramid for the given image size."""
     feature_map_size = image_shape[0] // config.RPN_DOWNSCALE
     # Cache anchors and reuse if image shape is the same
     if tuple(image_shape) not in self._anchor_cache:
         # Generate Anchors
         a = utils.generate_anchors(config.RPN_ANCHOR_HEIGHTS,
                                    config.RPN_ANCHOR_WIDTHS,
                                    feature_map_size, config.RPN_DOWNSCALE,
                                    config.RPN_ANCHOR_STRIDE)
         # Normalize coordinates
         self._anchor_cache[tuple(image_shape)] = utils.norm_boxes(
             a, image_shape[:2])
     return self._anchor_cache[tuple(image_shape)]
Пример #11
0
    def __init__(self,
                 resized_image_size,
                 k=cfg.K,
                 classes=cfg.CLASSES.copy(),
                 list_images=cfg.LIST_IMAGES.copy(),
                 list_annotations=cfg.LIST_ANNOTATIONS.copy(),
                 total_images=cfg.TOTAL_IMAGES,
                 subsampled_ratio=cfg.SUBSAMPLED_RATIO,
                 detection_conv_size=cfg.DETECTION_CONV_SIZE,
                 excluded_classes=cfg.EXCLUDED_CLASSES.copy(),
                 anchor_box_write=cfg.ANCHOR_BOXES_STORE,
                 transform=None):
        '''
        Initialize parameters and anchors using KMeans.
        '''

        self.resized_image_size = resized_image_size
        self.classes = classes
        self.list_images = list_images
        self.list_annotations = list_annotations
        self.total_images = total_images
        self.k = k
        self.subsampled_ratio = subsampled_ratio
        self.detection_conv_size = detection_conv_size
        self.excluded_classes = excluded_classes
        self.transform = transform
        self.anchor_boxes_write = anchor_box_write

        #get the top-k anchor sizes using modifed K-Means clustering.
        self.anchor_sizes = cluster_bounding_boxes(
            k=self.k,
            total_images=self.total_images,
            resized_image_size=self.resized_image_size,
            list_annotations=cfg.LIST_ANNOTATIONS,
            classes=cfg.CLASSES,
            excluded_classes=cfg.EXCLUDED_CLASSES)

        #python dbm to store the anchor sizes for a specific training set for every image size.
        #the anchor sizes are necessary for the use of evaluation later as we would not have training data to perform clustering.
        yolo_db = dbm.open(cfg.YOLO_DB, 'c')
        #for every image size, different anchor set. Each set will be stored in database for the use of evaluation later.
        yolo_db[str(resized_image_size)] = str(self.anchor_sizes.tolist())
        yolo_db.close()

        self.anchors_list = generate_anchors(
            anchor_sizes=self.anchor_sizes,
            subsampled_ratio=self.subsampled_ratio,
            resized_image_size=self.resized_image_size)
Пример #12
0
def generate_proposals(data):
    # Extract feature map
    feature_map = CNN_model_cut.predict(
        data.reshape(-1, data.shape[0], data.shape[1], data.shape[2]))
    padded_fcmap = np.pad(feature_map, ((0, 0), (1, 1), (1, 1), (0, 0)),
                          mode='constant')

    # Extract RPN results
    RPN_results = RPN_model.predict(padded_fcmap)
    anchor_probs = RPN_results[0].reshape((-1, 1))
    anchor_targets = RPN_results[1].reshape((-1, 4))

    # Original anchors
    feature_size = feature_map.shape[1]
    number_feature_points = feature_size * feature_size
    feature_stride = int(image_size / feature_size)
    base_anchors = generate_anchors(feature_stride,
                                    feature_stride,
                                    ratios=ANCHOR_RATIOS,
                                    scales=ANCHOR_SCALES)
    shift = np.arange(0, feature_size) * feature_stride
    shift_x, shift_y = np.meshgrid(shift, shift)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
    original_anchors = (base_anchors.reshape(
        (1, anchor_number, 4)) + shifts.reshape(
            (1, number_feature_points, 4)).transpose((1, 0, 2)))
    original_anchors = original_anchors.reshape((-1, 4))

    # Proposals by the RPN
    proposals = bbox_transform_inv(original_anchors, anchor_targets)
    proposals = clip_boxes(proposals,
                           (data.shape[0], data.shape[1]))  # clip to image.
    high_to_low_scores = anchor_probs.ravel().argsort()[::-1]  # highest scores
    high_to_low_scores = high_to_low_scores[0:N]
    proposals = proposals[high_to_low_scores, :]
    anchor_probs = anchor_probs[high_to_low_scores]

    del original_anchors
    del RPN_results
    del feature_map
    del padded_fcmap

    return proposals, anchor_probs
Пример #13
0
 def __init__(self,in_channel=512,out_channel=512,feature_stride=16):
     super().__init__()
     self.in_channel=in_channel
     self.out_channel=out_channel
     base_anchors=utils.generate_base_anchors(feature_stride=feature_stride)
     self.anchors=utils.generate_anchors(base_anchors)
     self.num_anchors=self.anchors.size(0)
     self.proposal_layer=Proposal()
     self.rpn_conv=nn.Sequential(nn.Conv2d(in_channel,out_channel,
                                           kernel_size=3,
                                           stride=1,
                                           padding=1),
                                 nn.LeakyReLU(0.2,inplace=True))
     self.cls_conv=nn.Conv2d(in_channel,self.num_anchors,
                             kernel_size=1,
                             stride=1,
                             padding=0)
     self.reg_conv=nn.Conv2d(in_channel,self.num_anchors*4,
                             kernel_size=1,
                             stride=1,
                             padding=0)
     self.softmax=nn.Sigmoid(dim=2)
Пример #14
0
from networks import backbone
import tensorflow as tf
import numpy as np

from utils import generate_anchors, read_batch_data
from ops import smooth_l1, focal_loss
from config import BATCH_SIZE, IMG_H, IMG_W, K, WEIGHT_DECAY, LEARNING_RATE

anchors_p3 = generate_anchors(area=32, stride=8)
anchors_p4 = generate_anchors(area=64, stride=16)
anchors_p5 = generate_anchors(area=128, stride=32)
anchors_p6 = generate_anchors(area=256, stride=64)
anchors_p7 = generate_anchors(area=512, stride=128)
anchors = np.concatenate(
    (anchors_p3, anchors_p4, anchors_p5, anchors_p6, anchors_p7), axis=0)


def train():
    inputs = tf.placeholder(tf.float32, [BATCH_SIZE, IMG_H, IMG_W, 3])
    labels = tf.placeholder(tf.float32, [BATCH_SIZE, None, K])
    target_bbox = tf.placeholder(tf.float32, [BATCH_SIZE, None, 4])
    foreground_mask = tf.placeholder(tf.float32, [BATCH_SIZE, None])
    valid_mask = tf.placeholder(
        tf.float32, [BATCH_SIZE, None]
    )  # valid_mask = foreground_mask + background_mask, remove the influence of the bbox iou in [0.4, 0.5]
    is_training = tf.placeholder(tf.bool)
    learning_rate = tf.placeholder(tf.float32)
    class_logits, box_logits, _, _ = backbone(inputs, is_training)
    class_loss = tf.reduce_sum(focal_loss(class_logits, labels) *
                               valid_mask) / tf.reduce_sum(foreground_mask)
    box_loss = tf.reduce_sum(
Пример #15
0
def data_generator(dataset,
                   config,
                   shuffle=True,
                   augment=True,
                   random_rois=0,
                   batch_size=1,
                   detection_targets=False):
    b = 0  # batch item index
    image_index = -1
    image_ids = np.copy(dataset.image_ids)
    error_count = 0

    anchors = []
    for i, scale in enumerate(config.ANCHOR_SCALES):
        anchors.append(
            utils.generate_anchors(scales=scale,
                                   ratios=config.ANCHOR_RATIOS,
                                   shape=[8 * 2**i, 8 * 2**i],
                                   feature_stride=config.FEATURE_STRIDE /
                                   (2**i),
                                   anchor_stride=1))
    anchors = np.concatenate(anchors, axis=0)

    while True:
        try:
            # Increment index to pick next image. Shuffle if at the start of an epoch.
            image_index = (image_index + 1) % len(image_ids)
            if shuffle and image_index == 0:
                np.random.shuffle(image_ids)

            # Get GT bounding boxes and masks for image.
            image_id = image_ids[image_index]
            image, image_shape = dataset.load_image(image_id)
            image_bbox, image_class_id, attribute, class_attribute = dataset.load_bbox_class_attr(
                image_id)

            image_bbox = translate_bbox(image_bbox,
                                        input_shape=image_shape[:2],
                                        output_shape=config.IMAGE_SHAPE[:2])
            image = cv2.resize(image, tuple(config.IMAGE_SHAPE[:2]))
            gt_target_object, gt_target_bbox, gt_target_bbox_object = assign_bbox_to_anchors(
                config, image_bbox, anchors, image_id)

            # Skip images that have no instances. This can happen in cases
            # where we train on a subset of classes and the image doesn't
            # have any of the classes we care about.
            # if not np.any(gt_class_ids > 0):
            #     continue

            # Init batch arrays
            if b == 0:
                batch_images = np.zeros((batch_size, ) + image.shape,
                                        dtype=np.float32)
                batch_gt_class_ids = np.zeros(
                    (batch_size, config.MAX_GT_INSTANCES), dtype=np.int32)
                batch_gt_object = np.zeros((batch_size, anchors.shape[0]),
                                           dtype=np.int32)
                batch_gt_boxes = np.zeros((batch_size, anchors.shape[0], 4),
                                          dtype=np.float32)
                batch_gt_boxes_object = np.zeros(
                    (batch_size, anchors.shape[0]), dtype=np.int32)
                batch_gt_attribute = np.zeros(
                    (batch_size, config.NUM_ATTRIBUTE))

            # Add to batch
            batch_images[b] = image.astype(
                np.float32)  # mold_image(image.astype(np.float32), config)
            batch_gt_class_ids[b, 0] = image_class_id
            batch_gt_object[b] = gt_target_object
            batch_gt_boxes[b, :gt_target_bbox.shape[0]] = gt_target_bbox
            batch_gt_boxes_object[b] = gt_target_bbox_object
            batch_gt_attribute[b] = attribute

            b += 1

            # Batch full?
            if b >= batch_size:
                inputs = [
                    batch_images, batch_gt_attribute, batch_gt_object,
                    batch_gt_boxes, batch_gt_boxes_object
                ]
                outputs = []
                yield inputs, outputs

                # start a new batch
                b = 0
        except (GeneratorExit, KeyboardInterrupt):
            raise
        except:
            # Log it and skip the image
            logging.exception("Error processing image {}".format(
                dataset.print_image_info[image_id]))
            error_count += 1
            if error_count > 5:
                raise
Пример #16
0
    def forward(self, patches, label_maps, gt_bboxes, iter_step=0):
        # ----------------- feature extraction -----------------------------------------
        with torch.no_grad():
            feats = self.feat_extractor(patches.contiguous().view([-1] + list(patches.shape[2:])))
        win_cell_sz = label_maps.size(3)
        feats = F.interpolate(feats, (win_cell_sz, win_cell_sz), mode='bilinear', align_corners=True)
        feats = feats.view(list(patches.shape[0:2]) + list(feats.shape[1:]))

        map_loss_total, bbox_loss_total, meta_loss_total = 0, 0, 0
        # --------------- initial frame meta updating ----------------------------------
        init_params, init_lrs = self.meta_init.initialize()
        self.meta_opti.initialize(init_lrs)
        # random scale params to prevent overfitting
        offset = (config.feat_channels + 1) * config.cf_channels + \
                 config.base_filter_size[0] * config.base_filter_size[1] * config.cf_channels + 1
        rand_scales_cf = torch.exp(config.rand_scale_radius_cf * (2 * torch.rand(offset) - 1))
        rand_scales_reg = torch.exp(config.rand_scale_radius_reg * (2 * torch.rand(len(init_params)-offset) - 1))
        rand_scales = torch.cat([rand_scales_cf, rand_scales_reg], 0)
        if torch.cuda.is_available():
            rand_scales = rand_scales.cuda()
        init_params = init_params / rand_scales

        filter_size = compute_filter_size(gt_bboxes[:, 0, 2:])
        base_anchor_sizes = filter_size/config.filter_scale*config.cell_sz
        anchors = generate_anchors(win_cell_sz, base_anchor_sizes)
        # calculate initial update loss
        n_init_aug = len(config.aug_init_scales)*len(config.aug_init_ratios)
        pred_map, pred_bbox = adaptable_conv2d(feats[:, 0:n_init_aug], init_params*rand_scales, filter_size)
        map_loss = self.l2loss(pred_map, label_maps[:, 0:n_init_aug])
        bbox_loss = self.smoothl1loss(pred_bbox, gt_bboxes[:, 0:n_init_aug], anchors)
        loss = map_loss + bbox_loss
        grads = torch.autograd.grad(loss, init_params, create_graph=True)
        updated_params = init_params - init_lrs * grads[0]

        # calculate initial meta loss
        if self.training:
            lh_idx = np.random.randint(config.look_ahead) + n_init_aug
            lh_pred_map, lh_pred_bbox = adaptable_conv2d(feats[:, lh_idx: lh_idx + 1], updated_params * rand_scales, filter_size)
            lh_map_loss = self.l2loss(lh_pred_map, label_maps[:, lh_idx:lh_idx + 1])
            lh_bbox_loss = self.smoothl1loss(lh_pred_bbox, gt_bboxes[:, lh_idx: lh_idx + 1], anchors)
        else:
            lh_pred_map, lh_pred_bbox = adaptable_conv2d(feats[:, n_init_aug:config.look_ahead + n_init_aug], updated_params * rand_scales, filter_size)
            lh_map_loss = self.l2loss(lh_pred_map, label_maps[:, n_init_aug:config.look_ahead + n_init_aug])
            lh_bbox_loss = self.smoothl1loss(lh_pred_bbox, gt_bboxes[:, n_init_aug:config.look_ahead + n_init_aug], anchors)

        meta_loss_init = lh_map_loss + lh_bbox_loss
        if self.training and iter_step % config.disp_inter == 0 and torch.cuda.current_device() == 0:
            self.writer.add_scalar('roam_training/init_update_loss', loss.data.item(), iter_step)
            self.writer.add_scalar('roam_training/init_meta_loss', meta_loss_init.data.item(), iter_step)
            self.writer.add_histogram('roam_training/init_cf_params', init_params[:offset], iter_step)
            self.writer.add_histogram('roam_training/init_cf_lrs', init_lrs[:offset], iter_step)
            self.writer.add_histogram('roam_training/init_reg_params', init_params[offset:], iter_step)
            self.writer.add_histogram('roam_training/init_reg_lrs', init_lrs[offset:], iter_step)
        print(bbox_loss.data.item(), lh_bbox_loss.data.item())
        map_loss_total += lh_map_loss
        bbox_loss_total += lh_bbox_loss
        meta_loss_total += meta_loss_init

        # --------------- subsequent frames meta updating ------------------------------
        for k in range(1, config.time_step):
            # adapt to new size
            filter_size = compute_filter_size(gt_bboxes[:, k * config.look_ahead + n_init_aug, 2:])
            base_anchor_sizes = filter_size / config.filter_scale * config.cell_sz
            anchors = generate_anchors(win_cell_sz, base_anchor_sizes)

            # calculate update loss
            training_idxes = range((k - 1) * config.look_ahead + n_init_aug, k * config.look_ahead + n_init_aug)
            pred_map, pred_bbox = adaptable_conv2d(feats[:, training_idxes], updated_params * rand_scales, filter_size)
            map_loss = self.l2loss(pred_map, label_maps[:, training_idxes])
            bbox_loss = self.smoothl1loss(pred_bbox, gt_bboxes[:, training_idxes], anchors)
            loss = map_loss + bbox_loss
            # meta update
            grads = torch.autograd.grad(loss, updated_params, retain_graph=True)
            updated_params = self.meta_opti.meta_update(updated_params, loss, grads[0], self.writer, iter_step)

            # calculate meta loss
            if self.training:
                delta_lh_idx = np.random.randint(config.look_ahead)
                lh_idx = k * config.look_ahead + n_init_aug + delta_lh_idx
                lh_pred_map, lh_pred_bbox = adaptable_conv2d(feats[:, lh_idx: lh_idx + 1], updated_params * rand_scales, filter_size)
                lh_map_loss = self.l2loss(lh_pred_map, label_maps[:, lh_idx:lh_idx + 1])
                lh_bbox_loss = self.smoothl1loss(lh_pred_bbox, gt_bboxes[:, lh_idx:lh_idx + 1], anchors)
            else:
                chosen_idxes = range(k * config.look_ahead + n_init_aug, (k + 1) * config.look_ahead + n_init_aug)
                lh_pred_map, lh_pred_bbox= adaptable_conv2d(feats[:, chosen_idxes], updated_params * rand_scales, filter_size)
                lh_map_loss = self.l2loss(lh_pred_map, label_maps[:, chosen_idxes])
                lh_bbox_loss = self.smoothl1loss(lh_pred_bbox, gt_bboxes[:, chosen_idxes], anchors)
            meta_loss_update = lh_map_loss + lh_bbox_loss

            map_loss_total += lh_map_loss
            bbox_loss_total += lh_bbox_loss
            meta_loss_total += meta_loss_update

        map_loss_avg = map_loss_total / config.time_step
        bbox_loss_avg = bbox_loss_total / config.time_step
        meta_loss_avg = meta_loss_total / config.time_step
        return map_loss_avg, bbox_loss_avg, meta_loss_avg
Пример #17
0
nms_thresh = 0.4
box_num = 5
use_fpn = 1
box_thresh = 0.6
cls_num = len(classes)

(input_w, input_h) = input_size = (672, 224)
(out_w, out_h) = output_size = (input_w/16, input_h/16)
biases = [8.00, 10.27, 15.51, 26.77, 35.73, 44.89, 59.50, 103.39, 160.69, 162.92]
biases = np.array(biases, dtype=np.float32)
if use_fpn == 1:
    biases = 1.0 * biases  * out_w / input_w

box_num = len(biases) / 2

anchors = generate_anchors(output_size, box_num, biases)

def_img_path = '../assets/000000.png'
def_model = '../models_maskyolo/mb_v2_t4_cls5_yolo/mb_v2_t4_cls5_deploy.prototxt'
def_weights = '../models_maskyolo/pretrained_models/mb_v2_t4_cls5.caffemodel'

def run(img_path=def_img_path, model=def_model, weights=def_weights):
    net = caffe.Net(model, weights, caffe.TEST) 

    img = cv2.imread(img_path)
    (height, width, channel) = img.shape

    img_org = img.copy()
    #img = cv2.resize(img, (480, 480))#, interpolation=cv2.INTER_AREA)
    img = cv2.resize(img, input_size, interpolation=cv2.INTER_AREA)
    (height, width, channel) = img.shape
Пример #18
0
from data import create_data
from svhn_dataset import SVHN
from train import RetinaTrainer, parse_args, output_predictions
import efficientdet
import utils
from coco_eval import CocoEvaluation


if __name__ == '__main__':
    args, argstr = parse_args(skip_name = True)

    # Prepare data
    num_classes = SVHN.LABELS
    pyramid_levels = args.pyramid_levels
    anchors = utils.generate_anchors(pyramid_levels, args.image_size, 
            num_scales=args.num_scales, aspect_ratios=args.aspect_ratios)

    train_dataset, dev_dataset, _ = create_data(args.batch_size, 
            anchors, image_size = args.image_size,
            test=args.test, augmentation=args.augmentation)

    # Prepare network and trainer
    anchors_per_level = args.num_scales * len(args.aspect_ratios)
    network = efficientdet.EfficientDet(num_classes, anchors_per_level,
            input_size = args.image_size, pyramid_levels = pyramid_levels,
            filters=args.efficientdet_filters, num_layers = args.efficientdet_layers) 
    model = RetinaTrainer(network, anchors, train_dataset, dev_dataset, args)

    # Load weights
    model.model.load_weights('model.h5')
    
def view(dataset, config, shuffle=True, augment=True, batch_size=1):
    """
    shuffle: shuffle image every epoch
    return:
    - input_image
    - input_image_meta
    - input_rpn_match
    - input_rpn_bbox
    - input_gt_class_ids
    - input_gt_boxes
    """
    anchors = utils.generate_anchors(config.ANCHOR_SCALES,
                                     config.ANCHOR_RATIOS,
                                     config.ANCHOR_STRIDE,
                                     config.BACKBONE_SHAPES,
                                     config.BACKBONE_STRIDES)

    b = 0 #batch index
    image_ids = np.copy(dataset.image_ids)
    print(len(image_ids))
    error_count = 0

    index=1723

    while True:
        image_id = image_ids[index]

        input_image, input_image_meta, input_gt_class_ids, input_gt_boxes =\
            load_image_gt(dataset, config, image_id)

        print(input_image.shape)
        print(input_image_meta)
        print(input_gt_class_ids)
        print(input_gt_boxes)

        if not np.any(input_gt_class_ids > 0):
            continue

        #RPN targets
        rpn_match, rpn_bbox = RPN.build_targets(input_image.shape, anchors, input_gt_class_ids, input_gt_boxes, config)

        #print(input_gt_boxes)
        for gt_box in input_gt_boxes:
            y1,x1,y2,x2 = gt_box
            print(y1,x1,y2,x2)

            y1 = int(y1)
            x1 = int(x1)
            y2 = int(y2)
            x2 = int(x2)

            for y in range(y1,y2+1):
                input_image[y][x1][0] = 255.0
                input_image[y][x1][1] = 0.0
                input_image[y][x1][2] = 0.0

            for y in range(y1,y2+1):
                input_image[y][x2][0] = 255.0
                input_image[y][x2][1] = 0.0
                input_image[y][x2][2] = 0.0

            for x in range(x1,x2+1):
                input_image[y1][x][0] = 255.0
                input_image[y1][x][1] = 0.0
                input_image[y1][x][2] = 0.0
            for x in range(x1,x2+1):
                input_image[y2][x][0] = 255.0
                input_image[y2][x][1] = 0.0
                input_image[y2][x][2] = 0.0

        for i,anchor in enumerate(anchors):
            #anchor = utils.clip_boxes(anchor, np.array([0,0,832,832]))
            if rpn_match[i]==0:
                continue
            y1,x1,y2,x2 = anchor

            y1 = max(min(y1,512-1), 0)
            x1 = max(min(x1,512-1), 0)
            y2 = max(min(y2,512-1), 0)
            x2 = max(min(x2,512-1), 0)

            y1 = int(y1)
            x1 = int(x1)
            y2 = int(y2)
            x2 = int(x2)
            #print(y1,x1,y2,x2)
            if rpn_match[i]==1:
                for y in range(y1,y2+1):
                    input_image[y][x1][0] = 0.0
                    input_image[y][x1][1] = 255.0
                    input_image[y][x1][2] = 0.0
                    input_image[y][x2][0] = 0.0
                    input_image[y][x2][1] = 255.0
                    input_image[y][x2][2] = 0.0

                for x in range(x1,x2+1):
                    input_image[y1][x][0] = 0.0
                    input_image[y1][x][1] = 255.0
                    input_image[y1][x][2] = 0.0
                    input_image[y2][x][0] = 0.0
                    input_image[y2][x][1] = 255.0
                    input_image[y2][x][2] = 0.0
            else:
                for y in range(y1,y2+1):
                    input_image[y][x1][0] = 0.0
                    input_image[y][x1][1] = 0.0
                    input_image[y][x1][2] = 255.0
                    input_image[y][x2][0] = 0.0
                    input_image[y][x2][1] = 0.0
                    input_image[y][x2][2] = 255.0

                for x in range(x1,x2+1):
                    input_image[y1][x][0] = 0.0
                    input_image[y1][x][1] = 0.0
                    input_image[y1][x][2] = 255.0
                    input_image[y2][x][0] = 0.0
                    input_image[y2][x][1] = 0.0
                    input_image[y2][x][2] = 255.0
        """
        f = open("todo.txt", "w")
        for x in rpn_match:
            f.write(str(x))
        for x in rpn_bbox:
            f.write(str(x))
        f.close()
        """
        from skimage.io import imsave
        imsave('GT_RPN_input_val.png',input_image)

        break
Пример #20
0
def produce_batch(image_file, true_boxes):

    image_name = image_file.replace('.jpg','').replace(trainDIR ,'')
    image = Image.open(image_file).resize((image_size ,image_size ), Image.NEAREST)
    data =  asarray(image)/255.0
    del image
    feature_map = pretrained_model.predict(data.reshape(-1,data.shape[0],data.shape[1],data.shape[2]))
    del data  

    feature_size = feature_map.shape[1]
    feature_stride = int( image_size / feature_size ) 
    number_feature_points = feature_size * feature_size 
    shift = np.arange(0, feature_size) * feature_stride
    shift_x, shift_y = np.meshgrid(shift, shift)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose() 
    base_anchors = generate_anchors(feature_stride, feature_stride,ratios = ANCHOR_RATIOS, scales = ANCHOR_SCALES)
    all_anchors = (base_anchors.reshape((1, anchor_number, 4)) + shifts.reshape((1, number_feature_points, 4)).transpose((1, 0, 2)))
    total_anchor_number = anchor_number*number_feature_points
    all_anchors = all_anchors.reshape((total_anchor_number , 4))

    #only keep anchors inside image+border.
    border=0 # could also be FILTER_SIZE x feature stride
    inds_inside = np.where(
            (all_anchors[:, 0] >= -border) &
            (all_anchors[:, 1] >= -border) &
            (all_anchors[:, 2] < image_size+border ) &  
            (all_anchors[:, 3] < image_size+border)    
    )[0]
    anchors=all_anchors[inds_inside]
    useful_anchor_number = len(inds_inside)


    overlaps = bbox_overlaps(anchors, true_boxes) 

    which_box = overlaps.argmax(axis=1) # Which true box has more overlap with each anchor?
    anchor_max_overlaps = overlaps[np.arange(overlaps.shape[0]), which_box] 

    which_anchor = overlaps.argmax(axis=0) # Which anchor has more overlap for each true box?
    box_max_overlaps = overlaps[which_anchor, np.arange(overlaps.shape[1])] 
    which_anchor_v2 = np.where(overlaps == box_max_overlaps)[0]

    labels = np.empty((useful_anchor_number, ), dtype=np.float32)
    labels.fill(-1)

    labels[ which_anchor_v2 ] = 1
    labels[ anchor_max_overlaps >= FG_THRESHOLD] = 1
    labels[ anchor_max_overlaps <= BG_THRESHOLD] = 0

    fg_inds = np.where(labels == 1)[0]
    bg_inds = np.where(labels == 0)[0]

    num_fg = int(BATCH_SIZE/(1+BG_FG_FRAC))
    if len(fg_inds) > num_fg:
      disable_inds = np.random.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
      labels[disable_inds] = -1
    fg_inds = np.where(labels == 1)[0]

    num_bg = int(len(fg_inds) * BG_FG_FRAC) 
    if len(bg_inds) > num_bg:
        disable_inds = np.random.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
        labels[disable_inds] = -1
    bg_inds = np.where(labels == 0)[0]

    anchor_batch_inds = inds_inside[labels!=-1]
    np.random.shuffle(anchor_batch_inds)  
    feature_batch_inds=(anchor_batch_inds / anchor_number).astype(np.int)

    pad_size = int((FILTER_SIZE-1)/2)
    padded_fcmap=np.pad(feature_map,((0,0),(pad_size,pad_size),(pad_size,pad_size),(0,0)),mode='constant')
    padded_fcmap=np.squeeze(padded_fcmap)
    batch_tiles=[]  
    for ind in feature_batch_inds:
        # x,y are the point in the feature map pointed at by feature_batch_inds indices
        x = ind % feature_size
        y = int(ind/feature_size)
        fc_snip=padded_fcmap[y:y+FILTER_SIZE,x:x+FILTER_SIZE,:] 
        batch_tiles.append(fc_snip)

    # unmap creates another array of labels that includes a -1 for the originally deleted anchors for being out of bounds.
    full_labels = unmap(labels, total_anchor_number , inds_inside, fill=-1)
    batch_labels =full_labels.reshape(-1,1,1,1*anchor_number)[feature_batch_inds]


    targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    pos_anchors=all_anchors[inds_inside[labels==1]] # positive anchors
    targets = bbox_transform(pos_anchors, true_boxes[which_box, :][labels==1])
    targets = unmap(targets, total_anchor_number, inds_inside[labels==1], fill=0)
    batch_targets = targets.reshape(-1,1,1,4*anchor_number)[feature_batch_inds]

    return np.asarray(batch_tiles), batch_labels.tolist(), batch_targets.tolist()
Пример #21
0
def main():


    train_z_transforms = transforms.Compose([
        # RandomStretch(),
        # CenterCrop((config.exemplar_size, config.exemplar_size)),
        ToTensor()
    ])
    train_x_transforms = transforms.Compose([
        # RandomStretch(),
        # RandomCrop((config.instance_size, config.instance_size),
        #            config.max_translate),
        # ColorAug(config.color_ratio),
        ToTensor()
    ])
    val_z_transforms = transforms.Compose([
        # CenterCrop((config.exemplar_size, config.exemplar_size)),
        ToTensor()
    ])
    val_x_transforms = transforms.Compose([
        ToTensor()
    ])

    score_size = int((config.instance_size - config.exemplar_size) / config.total_stride + 1)

    anchors = generate_anchors(config.total_stride, config.anchor_base_size, config.anchor_scales,
                                    config.anchor_ratios,
                                    score_size)
    # create dataset
    train_dataset = GOT_10KDataset(train_data_dir, train_z_transforms, train_x_transforms, anchors)
    valid_dataset = GOT_10KDataset(val_data_dir, val_z_transforms, val_x_transforms, anchors)

    trainloader = DataLoader(train_dataset, batch_size=config.train_batch_size,
                             shuffle=True, pin_memory=True, num_workers=config.train_num_workers, drop_last=True)
    validloader = DataLoader(valid_dataset, batch_size=config.valid_batch_size,
                             shuffle=False, pin_memory=True, num_workers=config.valid_num_workers, drop_last=True)
    # create summary writer
    if not os.path.exists(config.log_dir):
        os.mkdir(config.log_dir)
    summary_writer = SummaryWriter(config.log_dir)

    # start training
    with torch.cuda.device(config.gpu_id):
        model = SiamRPN()
        model.load_pretrain(pretrain_model_dir)
        model.freeze_layers()
        model = model.cuda()
        optimizer = torch.optim.SGD(model.parameters(), lr=config.lr,
                                    momentum=config.momentum, weight_decay=config.weight_decay)
        # schdeuler = StepLR(optimizer, step_size=config.step_size, gamma=config.gamma)

        scheduler = np.logspace(math.log10(config.lr), math.log10(config.end_lr), config.epoch)


        for epoch in range(config.epoch):
            train_loss = []
            model.train()
            curlr = scheduler[epoch]
            for param_group in optimizer.param_groups:
                param_group['lr'] = curlr
            for i, data in enumerate(tqdm(trainloader)):
                z, x, reg_label, cls_label = data
                z, x = Variable(z.cuda()), Variable(x.cuda())
                reg_label, cls_label = Variable(reg_label.cuda()), Variable(cls_label.cuda())
                pred_cls, pred_reg = model(z, x)
                optimizer.zero_grad()
                # permute
                pred_cls = pred_cls.reshape(-1, 1, config.anchor_num * score_size * score_size).permute(0,2,1)
                pred_reg = pred_reg.reshape(-1, 4, config.anchor_num * score_size * score_size).permute(0,2,1)
                cls_loss = rpn_cross_entropy_balance(pred_cls, cls_label, config.num_pos, config.num_neg)
                reg_loss = rpn_smoothL1(pred_reg, reg_label, cls_label, config.num_pos)
                loss = cls_loss + config.lamb * reg_loss
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), config.clip)
                optimizer.step()
                step = epoch * len(trainloader) + i
                summary_writer.add_scalar('train/loss', loss.data, step)
                train_loss.append(loss.data.cpu().numpy())
            train_loss = np.mean(train_loss)
            valid_loss = []
            model.eval()
            for i, data in enumerate(tqdm(validloader)):
                z, x, reg_label, cls_label = data
                z, x = Variable(z.cuda()), Variable(x.cuda())
                reg_label, cls_label = Variable(reg_label.cuda()), Variable(cls_label.cuda())
                pred_cls, pred_reg = model(z, x)
                pred_cls = pred_cls.reshape(-1, 1, config.anchor_num * score_size * score_size).permute(0, 2, 1)
                pred_reg = pred_reg.reshape(-1, 4, config.anchor_num * score_size * score_size).permute(0, 2, 1)
                cls_loss = rpn_cross_entropy_balance(pred_cls, cls_label, config.num_pos, config.num_neg)
                reg_loss = rpn_smoothL1(pred_reg, reg_label, cls_label, config.num_pos)
                loss = cls_loss + config.lamb * reg_loss
                valid_loss.append(loss.data.cpu().numpy())
            valid_loss = np.mean(valid_loss)
            print("EPOCH %d valid_loss: %.4f, train_loss: %.4f, learning_rate: %.4f" %
                  (epoch, valid_loss, train_loss, optimizer.param_groups[0]["lr"]))
            summary_writer.add_scalar('valid/loss',
                                      valid_loss, epoch + 1)
            torch.save(model.cpu().state_dict(),
                       "./models/siamrpn_{}.pth".format(epoch + 1))
            model.cuda()
Пример #22
0
def verify_label(h5_path):
    hdf5_dataset = h5py.File(h5_path, 'r')
    hdf5_gt_boxes = hdf5_dataset['gt_boxes']
    hdf5_num_gt_boxes = hdf5_dataset['num_gt_boxes']
    for i in range(len(hdf5_num_gt_boxes)):
        num_gt_boxes = hdf5_num_gt_boxes[i]
        gt_boxes = hdf5_gt_boxes[i].reshape((num_gt_boxes, 4))
        w = gt_boxes[:, 3] - gt_boxes[:, 1]
        print(np.min(w))


if __name__ == '__main__':
    generator = data_generator('ctpn/train_art_0722_4482_1121.h5')
    anchors = utils.generate_anchors(config.RPN_ANCHOR_HEIGHTS,
                                     config.RPN_ANCHOR_WIDTHS,
                                     config.RPN_INPUT_SIZE // config.RPN_DOWNSCALE,
                                     config.RPN_DOWNSCALE,
                                     config.RPN_ANCHOR_STRIDE,
                                     )
    while True:
        # next(generator)
        batch_images, batch_rpn_match, batch_rpn_bbox, batch_gt_boxes = next(generator)[0]
        for i in range(len(batch_images)):
            image = batch_images[i]
            image = utils.unmold_image(image)[:, :, ::-1].astype(np.uint8)
            rpn_match = batch_rpn_match[i]
            rpn_bbox = batch_rpn_bbox[i]
            gt_boxes = batch_gt_boxes[i]
            # anchors = utils.generate_anchors_2(config.RPN_ANCHOR_HEIGHTS,
            #                                    config.RPN_ANCHOR_WIDTHS,
            #                                    image.shape[0] // config.RPN_DOWNSCALE,
            #                                    image.shape[1] // config.RPN_DOWNSCALE,
Пример #23
0
def data_generator(h5_path, batch_size=config.BATCH_SIZE, input_size=config.RPN_INPUT_SIZE, is_training=False):
    """
    A generator that returns images and corresponding target class ids, bounding box deltas.

    Args:
        h5_path:
        batch_size: How many images to return in each call
        input_size:
        is_training:

    Returns:
        Returns a Python generator. Upon calling next() on it, the generator returns two lists, inputs and outputs.
        The contents of the lists differs depending on the received arguments:
        inputs list:
            - images: [batch, H, W, C]
            - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral)
            - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas.
            - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs
            - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)]

        outputs list: Usually empty in regular training. But if detection_targets
            is True then the outputs list contains target class_ids, bbox deltas,
            and masks.

    """
    # Anchors
    # num_anchors = feature_map_size * feature_map_size * num_anchor_scales * num_anchor_ratios
    # [num_anchors, (y1, x1, y2, x2)]

    feature_map_size = input_size // config.RPN_DOWNSCALE
    anchors = utils.generate_anchors(config.RPN_ANCHOR_HEIGHTS,
                                     config.RPN_ANCHOR_WIDTHS,
                                     feature_map_size,
                                     config.RPN_DOWNSCALE,
                                     config.RPN_ANCHOR_STRIDE,
                                     )
    current_idx = 0
    hdf5_dataset = h5py.File(h5_path, 'r')
    hdf5_images = hdf5_dataset['images']
    dataset_size = len(hdf5_images)
    # hdf5_image_shapes = hdf5_dataset['image_shapes']
    hdf5_gt_boxes = hdf5_dataset['gt_boxes']
    hdf5_num_gt_boxes = hdf5_dataset['num_gt_boxes']
    indicies = np.arange(dataset_size)
    if is_training:
        np.random.shuffle(indicies)
    # batch item index
    b = 0
    # Keras requires a generator to run indefinitely.
    while True:
        if current_idx >= dataset_size:
            if is_training:
                np.random.shuffle(indicies)
            current_idx = 0
        if b == 0:
            # Init batch arrays
            batch_rpn_match = np.zeros([batch_size, anchors.shape[0], 1], dtype=np.int32)
            batch_rpn_bbox = np.zeros([batch_size, config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4], dtype=np.float32)
            batch_images = np.zeros((batch_size, input_size, input_size, 3), dtype=np.float32)
            # batch_gt_boxes = np.zeros((batch_size, config.MAX_GT_INSTANCES, 4), dtype=np.int32)
        i = indicies[current_idx]
        image = hdf5_images[i]
        # image_shape = hdf5_image_shapes[i]
        # image = image.reshape(image_shape)
        num_gt_boxes = hdf5_num_gt_boxes[i]
        gt_boxes = hdf5_gt_boxes[i].reshape((num_gt_boxes, 4))
        # image, scale, pad, window = utils.resize_and_pad_image(image, input_size)
        # gt_boxes = np.round(gt_boxes * scale).astype(np.int32)
        # (y1, x1, y2, x2)
        # gt_boxes = gt_boxes[:, [1, 0, 3, 2]]
        # + pad_left
        # gt_boxes[:, [1, 3]] += pad[1][0]
        # + pad top
        # gt_boxes[:, [0, 2]] += pad[0][0]
        # gt_boxes[:, [0, 2]] = np.clip(gt_boxes[:, [0, 2]], window[0], window[2])
        # gt_boxes[:, [1, 3]] = np.clip(gt_boxes[:, [1, 3]], window[1], window[3])

        # for gt_box in gt_boxes:
        #     cv2.rectangle(image, (gt_box[1], gt_box[0]), (gt_box[3], gt_box[2]), (0, 255, 0), 1)
        # cv2.namedWindow('image', cv2.WINDOW_NORMAL)
        # cv2.imshow('image', image)
        # cv2.waitKey(0)

        # RPN Targets
        rpn_match, rpn_bbox = build_rpn_targets(anchors, gt_boxes)

        # Add to batch
        batch_rpn_match[b] = rpn_match[:, np.newaxis]
        batch_rpn_bbox[b] = rpn_bbox
        batch_images[b] = image
        # batch_gt_boxes[b, :gt_boxes.shape[0]] = gt_boxes
        b += 1
        if b == batch_size:
            # inputs = [batch_images, batch_rpn_match, batch_rpn_bbox, batch_gt_boxes]
            inputs = [batch_images, batch_rpn_match, batch_rpn_bbox]
            outputs = []
            yield inputs, outputs
            b = 0
        current_idx += 1
Пример #24
0
    def __init__(self, net, cfg):

        self.cfg = cfg

        self.net = net
        self.anchors = generate_anchors(cfg)

        if torch.cuda.is_available():
            self.net.cuda()
            self.anchors = self.anchors.cuda()

        # Dataset transform
        transform = [
            Transform(context_amount=cfg.TRAIN.CROP_CONTEXT_AMOUNT_Z, size=cfg.MODEL.Z_SIZE),
            Transform(context_amount=cfg.TRAIN.CROP_CONTEXT_AMOUNT_X, size=cfg.MODEL.X_SIZE,
                      random_translate=True, random_resize=True, motion_blur=True,
                      random_translate_range=cfg.TRAIN.DATA_AUG_TRANSLATE_RANGE,
                      random_resize_scale_min=cfg.TRAIN.DATA_AUG_RESIZE_SCALE_MIN,
                      random_resize_scale_max=cfg.TRAIN.DATA_AUG_RESIZE_SCALE_MAX
                      )
        ]

        # Training dataset
        trackingnet = TrackingNet(cfg.PATH.TRACKINGNET, subset="train", debug_seq=cfg.TRAIN.DEBUG_SEQ)
        imagenet = ImageNetVID(cfg.PATH.ILSVRC, subset="train")
        sampler = PairSampler([trackingnet, imagenet], cfg=cfg, transform=transform, pairs_per_video=cfg.TRAIN.PAIRS_PER_VIDEO,
                              frame_range=cfg.TRAIN.FRAME_RANGE)
        # Distractor dataset
        coco = CocoDetection(cfg.PATH.COCO, cfg.PATH.COCO_ANN_FILE)
        # coco_distractor = COCODistractor(coco, 4000)
        coco_positive = COCOPositivePair(coco, 4000, cfg=cfg, transform=transform)
        coco_negative = COCONegativePair(coco, 12000, cfg=cfg, transform=transform)

        dataset = ConcatDataset([sampler, coco_positive, coco_negative])
        self.dataloader = DataLoader(dataset, batch_size=cfg.TRAIN.BATCH_SIZE, num_workers=4, shuffle=True,
                                     pin_memory=True, drop_last=True)

        # Validation dataset
        val_trackingnet = TrackingNet(cfg.PATH.TRACKINGNET, subset="val")
        val_imagenet = ImageNetVID(cfg.PATH.ILSVRC, subset="val")
        validation_sampler = PairSampler([val_trackingnet, val_imagenet], cfg=cfg, transform=transform,
                                         pairs_per_video=1, frame_range=cfg.TRAIN.FRAME_RANGE)
        val_coco_positive = COCOPositivePair(coco, 100, cfg=cfg, transform=transform)
        val_dataset = ConcatDataset([validation_sampler, val_coco_positive])

        if cfg.TRAIN.DEBUG_SEQ >= 0:  # When debugging on a single sequence, the validation is performed on the same one
            val_dataset = PairSampler([trackingnet], cfg=cfg, transform=transform, pairs_per_video=200)

        self.validation_dataloader = DataLoader(val_dataset, batch_size=min(cfg.TRAIN.BATCH_SIZE, 20), num_workers=4,
                                                shuffle=True, pin_memory=True, drop_last=False)

        # Loss
        self.criterion = MultiBoxLoss(self.anchors, cfg)

        self.optimizer = optim.Adam(self.net.parameters(), lr=cfg.TRAIN.LR, weight_decay=cfg.TRAIN.WEIGHT_DECAY)
        self.scheduler = optim.lr_scheduler.StepLR(self.optimizer, step_size=cfg.TRAIN.SCHEDULER_STEP_SIZE,
                                                   gamma=cfg.TRAIN.SCHEDULER_GAMMA)

        # Summary Writer
        self.run_id = datetime.now().strftime('%b%d_%H-%M-%S')
        if not cfg.DEBUG:
            self.save_config()
            self.save_code()
            self.writer = SummaryWriter(log_dir=os.path.join(cfg.PATH.DATA_DIR, "runs", self.run_id))

        self.start_epoch = 0

        if cfg.TRAIN.RESUME_CHECKPOINT:
            self.start_epoch = utils.load_checkpoint(cfg.TRAIN.RESUME_CHECKPOINT, self.net, self.optimizer)

        if torch.cuda.is_available():
            self.net = nn.DataParallel(self.net)

        self.best_IOU = 0.
Пример #25
0
def produce_batch(filepath, gt_boxes, w_h):
    # 首先加载feature_map
    feature_map=np.load(filepath)["fc"]
    # print("load feature map done.")
    # 获得feature map的长乘宽,即所有像素点数量
    height = np.shape(feature_map)[1]
    width = np.shape(feature_map)[2]
    num_feature_map=width*height
    # 用图片的长宽除以feature map的长宽,获得步长
    img_width = w_h[0]
    img_height = w_h[1]
    w_stride = img_width / width
    h_stride = img_height / height
    # print("w_stride, h_stride", w_stride, h_stride)
    # 根据步长计算anchors
    #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1)
    # base_anchors = generate_anchors(w_stride, h_stride, scales=np.asarray([1, 2, 4]))
    base_anchors = generate_anchors(16, 16, ratios=[0.5, 1], scales=np.asarray([1, 2, 8, 16]))
    #slice tiles according to image size and stride.
    #each 1x1x1532 feature map is mapping to a tile.
    shift_x = np.arange(0, width) * w_stride
    shift_y = np.arange(0, height) * h_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y) #这一步获得了分割点的所有横坐标及纵坐标
    # 计算出了所有偏移的(x, y, x, y)值,为什么会重复两下,因为base_anchors输出的就是(0,0,w_stride-1,h_stride-1)的模式,需要同步偏移
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(), shift_y.ravel())).transpose()

    # 事实证明,对shape为(1, 9, 4)的矩阵与shape为(num_feature_map, 1, 4)的矩阵相加结果是得到shape为(num_feature_map, 9, 4)
    all_anchors = (base_anchors.reshape((1, k, 4)) + shifts.reshape((1, num_feature_map, 4)).transpose((1, 0, 2)))
    total_anchors = num_feature_map*k
    all_anchors = all_anchors.reshape((total_anchors, 4))
    #only keep anchors inside image+borader.
    border=0
    inds_inside = np.where(
            (all_anchors[:, 0] >= -border) &
            (all_anchors[:, 1] >= -border) &
            (all_anchors[:, 2] < img_width+border ) &  # width
            (all_anchors[:, 3] < img_height+border)    # height
    )[0]
    anchors=all_anchors[inds_inside]
    if len(anchors) == 0:
        return None, None, None
    # calculate overlaps each anchors to each gt boxes,
    # a matrix with shape [len(anchors) x len(gt_boxes)]
    overlaps = bbox_overlaps(anchors, gt_boxes)
    # find the gt box with biggest overlap to each anchors,
    # and the overlap ratio. result (len(anchors),)
    argmax_overlaps = overlaps.argmax(axis=1) # overlaps中每一行的最大值的索引值,即每一个anchor与哪一个gt_box得分最高,返回的是一维张量
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] # 获得overlaps中每一列的最大值,即得分
    # find the anchor with biggest overlap to each gt boxes,
    # and the overlap ratio. result (len(gt_boxes),)
    gt_argmax_overlaps = overlaps.argmax(axis=0) # overlaps中每一列的最大值的索引,即gt与哪个anchor最接近
    gt_max_overlaps = overlaps[gt_argmax_overlaps, 
                                np.arange(overlaps.shape[1])] # 获得overlaps中每一列的最大值
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] # 获得与最大值相同的列值(纵坐标)
    #labels, 1=fg/0=bg/-1=ignore 指在图片范围内的anchors的标签
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)
    # 根据论文,设置positive标签:
    # 只对两种anchor设置positive标签
    # (1)与对每一个gt,IoU值最高的anchor
    # (2)对每一个anchor,其与所有gt的IoU最高分大于0.7的anchor
    labels[gt_argmax_overlaps] = 1
    labels[max_overlaps >= .7] = 1
    # 设置负面标签
    labels[max_overlaps <= .3] = 0
    # subsample positive labels if we have too many
    # num_fg = int(RPN_FG_FRACTION * RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    # if len(fg_inds) > num_fg:
    #     disable_inds = npr.choice(
    #         fg_inds, size=(len(fg_inds) - num_fg), replace=False)
    #     labels[disable_inds] = -1
    # subsample negative labels if we have too many
    num_bg = int(len(fg_inds) * BG_FG_FRAC)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        # 因为背景太多了,随机选出多余个的设置成忽略
        disable_inds = npr.choice(
            bg_inds, size=(len(bg_inds) - num_bg), replace=False) # 从np.arange(0, bg_inds)中随机选len(bg_inds) - num_bg个
        labels[disable_inds] = -1
    # 从这里开始,计算batch,batch_inds是所有不被忽略的points
    batch_inds=inds_inside[labels!=-1]
    # 是这样的,首先batch_inds获得了在特征图内部的的anchor的索引值,又因为anchor排列是按9个9个排下来的,因此除9就是为了得到这个anchor对应的坐标
    batch_inds=(batch_inds / k).astype(np.int)
    # 获得对应于所有anchos的label
    full_labels = unmap(labels, total_anchors, inds_inside, fill=-1)
    # batch_label_targets为n个1*1*k的
    batch_label_targets=full_labels.reshape(-1,1,1,1*k)[batch_inds]

    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # bbox_targets = bbox_transform(anchors, gt_boxes[argmax_overlaps, :]
    # 获得标签为fg的anchors
    pos_anchors=all_anchors[inds_inside[labels==1]]
    # 归一化?
    bbox_targets = bbox_transform(pos_anchors, gt_boxes[argmax_overlaps, :][labels==1])
    bbox_targets = unmap(bbox_targets, total_anchors, inds_inside[labels==1], fill=0)
    batch_bbox_targets = bbox_targets.reshape(-1,1,1,4*k)[batch_inds]
    # 在feature_map的第二个和第三个轴前后各填充一个值
    padded_fcmap=np.pad(feature_map,((0,0),(1,1),(1,1),(0,0)),mode='constant')
    # 把padded_fcmap中维度为1的轴去掉,预期得到的是3维
    padded_fcmap=np.squeeze(padded_fcmap)
    batch_tiles=[]
    for ind in batch_inds:
        x = ind % width
        y = int(ind/width)
        fc_3x3=padded_fcmap[y:y+3,x:x+3,:]
        batch_tiles.append(fc_3x3)
    # print("produce batch done.")
    return np.asarray(batch_tiles), batch_label_targets.tolist(), batch_bbox_targets.tolist()
Пример #26
0
def produce_batch(filepath, gt_boxes, scale):
    img = load_img(filepath)
    img_width = np.shape(img)[1] * scale[1]
    img_height = np.shape(img)[0] * scale[0]
    img = img.resize((int(img_width), int(img_height)))
    #feed image to pretrained model and get feature map
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    feature_map = pretrained_model.predict(img)
    height = np.shape(feature_map)[1]
    width = np.shape(feature_map)[2]
    num_feature_map = width * height
    #calculate output w, h stride
    w_stride = img_width / width
    h_stride = img_height / height
    #generate base anchors according output stride.
    #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1)
    base_anchors = generate_anchors(w_stride, h_stride)
    #slice tiles according to image size and stride.
    #each 1x1x1532 feature map is mapping to a tile.
    shift_x = np.arange(0, width) * w_stride
    shift_y = np.arange(0, height) * h_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()
    #apply base anchors to all tiles, to have a num_feature_map*9 anchors.
    all_anchors = (base_anchors.reshape((1, 9, 4)) + shifts.reshape(
        (1, num_feature_map, 4)).transpose((1, 0, 2)))
    total_anchors = num_feature_map * 9
    all_anchors = all_anchors.reshape((total_anchors, 4))
    #only keep anchors inside image+borader.
    border = 0
    inds_inside = np.where((all_anchors[:, 0] >= -border)
                           & (all_anchors[:, 1] >= -border)
                           & (all_anchors[:, 2] < img_width + border)
                           &  # width
                           (all_anchors[:, 3] < img_height + border)  # height
                           )[0]
    anchors = all_anchors[inds_inside]
    # calculate overlaps each anchors to each gt boxes,
    # a matrix with shape [len(anchors) x len(gt_boxes)]
    overlaps = bbox_overlaps(anchors, gt_boxes)
    # find the gt box with biggest overlap to each anchors,
    # and the overlap ratio. result (len(anchors),)
    argmax_overlaps = overlaps.argmax(axis=1)
    max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
    # find the anchor with biggest overlap to each gt boxes,
    # and the overlap ratio. result (len(gt_boxes),)
    gt_argmax_overlaps = overlaps.argmax(axis=0)
    gt_max_overlaps = overlaps[gt_argmax_overlaps,
                               np.arange(overlaps.shape[1])]
    gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
    #labels, 1=fg/0=bg/-1=ignore
    labels = np.empty((len(inds_inside), ), dtype=np.float32)
    labels.fill(-1)
    # set positive label, define in Paper3.1.2:
    # We assign a positive label to two kinds of anchors: (i) the
    # anchor/anchors with the highest Intersection-overUnion
    # (IoU) overlap with a ground-truth box, or (ii) an
    # anchor that has an IoU overlap higher than 0.7 with any gt boxes
    labels[gt_argmax_overlaps] = 1
    labels[max_overlaps >= .7] = 1
    # set negative labels
    labels[max_overlaps <= .3] = 0
    # subsample positive labels if we have too many
    #     num_fg = int(RPN_FG_FRACTION * RPN_BATCHSIZE)
    fg_inds = np.where(labels == 1)[0]
    #     if len(fg_inds) > num_fg:
    #         disable_inds = npr.choice(
    #             fg_inds, size=(len(fg_inds) - num_fg), replace=False)
    #         labels[disable_inds] = -1
    # subsample negative labels if we have too many
    num_bg = int(len(fg_inds) * BG_FG_FRAC)
    bg_inds = np.where(labels == 0)[0]
    if len(bg_inds) > num_bg:
        disable_inds = npr.choice(bg_inds,
                                  size=(len(bg_inds) - num_bg),
                                  replace=False)
        labels[disable_inds] = -1
    #
    batch_inds = inds_inside[labels != -1]
    batch_inds = (batch_inds / k).astype(np.int)
    full_labels = unmap(labels, total_anchors, inds_inside, fill=-1)
    batch_label_targets = full_labels.reshape(-1, 1, 1, 1 * k)[batch_inds]
    bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
    # bbox_targets = bbox_transform(anchors, gt_boxes[argmax_overlaps, :]
    pos_anchors = all_anchors[inds_inside[labels == 1]]
    bbox_targets = bbox_transform(pos_anchors,
                                  gt_boxes[argmax_overlaps, :][labels == 1])
    bbox_targets = unmap(bbox_targets,
                         total_anchors,
                         inds_inside[labels == 1],
                         fill=0)
    batch_bbox_targets = bbox_targets.reshape(-1, 1, 1, 4 * k)[batch_inds]
    padded_fcmap = np.pad(feature_map, ((0, 0), (1, 1), (1, 1), (0, 0)),
                          mode='constant')
    padded_fcmap = np.squeeze(padded_fcmap)
    batch_tiles = []
    for ind in batch_inds:
        x = ind % width
        y = int(ind / width)
        fc_3x3 = padded_fcmap[y:y + 3, x:x + 3, :]
        batch_tiles.append(fc_3x3)
    return np.asarray(batch_tiles), batch_label_targets.tolist(
    ), batch_bbox_targets.tolist()
Пример #27
0
def gen(dataset, config, shuffle=True, batch_size=1):
    """
    shuffle: shuffle image every epoch
    return:
    - input_image
    - input_image_meta
    - input_rpn_match
    - input_rpn_bbox
    - input_gt_class_ids
    - input_gt_boxes
    """
    anchors = utils.generate_anchors(config.ANCHOR_SCALES,
                                     config.ANCHOR_RATIOS,
                                     config.ANCHOR_STRIDE,
                                     config.BACKBONE_SHAPES,
                                     config.BACKBONE_STRIDES)

    b = 0  #batch index
    image_ids = np.copy(dataset.image_ids)
    #print(image_ids)
    error_count = 0

    index = -1

    while True:
        try:
            index = (index + 1) % len(image_ids)

            if shuffle and index == 0:
                np.random.shuffle(image_ids)

            image_id = image_ids[index]

            input_image, input_image_meta, input_gt_class_ids, input_gt_boxes =\
                load_image_gt(dataset, config, image_id)

            #print(input_image_meta)
            #print(input_gt_class_ids)

            if not np.any(input_gt_class_ids > 0):
                continue

            #RPN targets
            rpn_match, rpn_bbox = RPN.build_targets(input_image.shape, anchors,
                                                    input_gt_class_ids,
                                                    input_gt_boxes, config)
            if b == 0:
                #initial
                batch_image = np.zeros((batch_size, ) + input_image.shape,
                                       dtype=np.float32)
                batch_image_meta = np.zeros(
                    (batch_size, ) + input_image_meta.shape,
                    dtype=input_image_meta.dtype)
                batch_gt_class_ids = np.zeros(
                    (batch_size, config.MAX_GT_INSTANCES), dtype=np.int32)
                batch_gt_boxes = np.zeros(
                    (batch_size, config.MAX_GT_INSTANCES, 4), dtype=np.float32)
                batch_rpn_match = np.zeros([batch_size, anchors.shape[0], 1],
                                           dtype=rpn_match.dtype)
                batch_rpn_bbox = np.zeros(
                    [batch_size, config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4],
                    dtype=rpn_bbox.dtype)

            if input_gt_boxes.shape[0] > config.MAX_GT_INSTANCES:
                ids = np.random.choice(np.arange(input_gt_boxes.shape[0]),
                                       config.MAX_GT_INSTANCES,
                                       replace=False)
                input_gt_class_ids = input_gt_class_ids[ids]
                input_gt_boxes = input_gt_boxes[ids]

            batch_image[b] = utils.mold_image(input_image.astype(np.float32),
                                              config)
            batch_image_meta[b] = input_image_meta
            batch_gt_class_ids[
                b, :input_gt_class_ids.shape[0]] = input_gt_class_ids
            batch_gt_boxes[b, :input_gt_boxes.shape[0]] = input_gt_boxes
            batch_rpn_match[b] = rpn_match[:, np.newaxis]
            batch_rpn_bbox[b] = rpn_bbox

            b += 1

            if b >= batch_size:
                """
                inputs = [batch_image, batch_image_meta, batch_rpn_match, batch_gt_boxes,
                          batch_gt_class_ids,batch_gt_boxes, batch_gt_masks]
                outputs = []
                """
                inputs = [
                    batch_image, batch_image_meta, batch_rpn_match,
                    batch_rpn_bbox, batch_gt_class_ids, batch_gt_boxes
                ]
                outputs = []

                yield inputs, outputs

                b = 0

        except (GeneratorExit, KeyboardInterrupt):
            raise
        except:
            # Log it and skip the image
            print("Error processing image: ", image_id)
            error_count += 1
            if error_count > 5:
                raise
Пример #28
0
    def build(self, mode, config):

        # build anchors
        anchors = []
        for i, scale in enumerate(config.ANCHOR_SCALES):
            anchors.append(
                utils.generate_anchors(scales=scale,
                                       ratios=config.ANCHOR_RATIOS,
                                       shape=[8 * 2**i, 8 * 2**i],
                                       feature_stride=config.FEATURE_STRIDE /
                                       (2**i),
                                       anchor_stride=1))
        self.anchors = np.concatenate(anchors, axis=0)

        # Define the input layers
        input_image = KL.Input(shape=config.IMAGE_SHAPE, name="input_image")
        if not self.mode == 'detection':
            input_attribute = KL.Input(shape=[config.NUM_ATTRIBUTE],
                                       name="input_features")

        # darknet53
        S1, S2, S3, S4, S5 = darknet_graph(input_image,
                                           architecture='darknet53')

        def output_detection_layers(x, num_filters, out_filters, name):
            for i in range(2):
                conv_name_base = "last_conv_" + name + "_" + str(i)
                x = KL.Conv2D(num_filters, (1, 1),
                              padding="SAME",
                              name=conv_name_base + '_a',
                              use_bias=True)(x)
                # x = BatchNorm(axis=3, name=bn_name_base + 'b')(x)
                x = KL.LeakyReLU(alpha=0.1)(x)

                x = KL.Conv2D(num_filters * 2, (3, 3),
                              padding="SAME",
                              name=conv_name_base + '_b',
                              use_bias=True)(x)
                # x = BatchNorm(axis=3, name=bn_name_base + 'b')(x)
                x = KL.LeakyReLU(alpha=0.1)(x)
            x = KL.Conv2D(num_filters, (1, 1),
                          padding="SAME",
                          name=conv_name_base + "_out",
                          use_bias=True)(x)
            # x = BatchNorm(axis=3, name=bn_name_base + 'b')(x)
            x = KL.LeakyReLU(alpha=0.1)(x)

            conv_name_base = "detection_head_" + name
            y = KL.Conv2D(num_filters * 2, (3, 3),
                          padding="SAME",
                          name=conv_name_base + 'a',
                          use_bias=True)(x)
            # x = BatchNorm(axis=3, name=bn_name_base + 'b')(x)
            y = KL.LeakyReLU(alpha=0.1)(y)
            y = KL.Conv2D(out_filters, (1, 1),
                          padding="SAME",
                          name=conv_name_base + 'b',
                          use_bias=True)(y)
            # x = BatchNorm(axis=3, name=bn_name_base + 'b')(x)
            y = KL.LeakyReLU(alpha=0.1)(y)

            return x, y

        # FPN: top to bottom
        # stage 1
        x, y1 = output_detection_layers(
            S5,
            num_filters=512,
            out_filters=len(config.ANCHOR_SCALES[0]) * 3 * (4 + 1),
            name="fpn1")

        # stage 2
        # x = KL.Conv2D(256, (1, 1), padding="SAME", name="last", use_bias=True)(x)
        # # x = BatchNorm(axis=3, name=bn_name_base + 'b')(x)
        # x = KL.LeakyReLU(alpha=0.1)(x)
        x = KL.UpSampling2D(2)(x)
        x = KL.Concatenate()([x, S4])
        x, y2 = output_detection_layers(
            x,
            num_filters=256,
            out_filters=len(config.ANCHOR_SCALES[1]) * 3 * (4 + 1),
            name="fpn2")

        # stage 3
        # x = KL.Conv2D(128, (1, 1), padding="SAME", name="last", use_bias=True)(x)
        # # x = BatchNorm(axis=3, name=bn_name_base + 'b')(x)
        # x = KL.LeakyReLU(alpha=0.1)(x)
        x = KL.UpSampling2D(2)(x)
        x = KL.Concatenate()([x, S3])
        x, y3 = output_detection_layers(
            x,
            num_filters=128,
            out_filters=len(config.ANCHOR_SCALES[2]) * 3 * (4 + 1),
            name="fpn3")

        # detection: 3 * (4 + 1) for each anchor
        y1 = KL.Reshape((-1, 5))(y1)
        y2 = KL.Reshape((-1, 5))(y2)
        y3 = KL.Reshape((-1, 5))(y3)
        detection = KL.Concatenate(name="final_detection",
                                   axis=1)([y1, y2, y3])

        # image feature
        image_feature = x

        if mode == 'training':
            # Define the input layers for ground truth
            num_anchors = K.int_shape(detection)[0]
            gt_bbox = KL.Input(shape=[num_anchors, 4],
                               name="ground_truth_bbox")
            gt_bbox_object = KL.Input(shape=[num_anchors],
                                      name="ground_truth_bbox_object")
            gt_object = KL.Input(shape=[num_anchors],
                                 name="ground_truth_object")

            # Define the loss
            object_loss = KL.Lambda(lambda x: yolo_object_loss(*x),
                                    name='yolo_object_loss')(
                                        [detection, gt_object])
            bbox_loss = KL.Lambda(lambda x: yolo_bbox_loss(*x),
                                  name='yolo_bbox_loss')(
                                      [detection, gt_bbox, gt_bbox_object])
            bbox_object_loss = KL.Lambda(
                lambda x: yolo_bbox_object_loss(*x),
                name='yolo_bbox_object_loss')([detection, gt_bbox_object])

            return KM.Model([
                input_image, input_attribute, gt_object, gt_bbox,
                gt_bbox_object
            ], [
                image_feature, detection, object_loss, bbox_loss,
                bbox_object_loss
            ])

        if mode == 'detection':

            def transform_detection(detection):
                to = K.expand_dims(detection[..., 0], axis=-1)
                to = KL.Activation('sigmoid')(to)

                tx = K.expand_dims(detection[..., 1], axis=-1)
                ty = K.expand_dims(detection[..., 2], axis=-1)
                tw = K.expand_dims(detection[..., 3], axis=-1)
                th = K.expand_dims(detection[..., 4], axis=-1)

                tx = KL.Activation('sigmoid')(tx)
                ty = KL.Activation('sigmoid')(ty)

                detection = KL.Concatenate(name="final_transformed_detection",
                                           axis=-1)([to, tx, ty, tw, th])

                return detection

            detection = KL.Lambda(lambda x: transform_detection(x),
                                  name="transform_final_detection")(detection)

            return KM.Model([input_image], [detection])
Пример #29
0
def data_generator():
    ann_file = '{}/annotations/instances_{}.json'.format(
        config.DATASET_DIR, config.DATASET_TYPE)
    coco = COCO(ann_file)
    categories = coco.loadCats(coco.getCatIds())
    nms = [cat['name'] for cat in categories]
    print('COCO categories: \n{}\n'.format(' '.join(nms)))

    img_ids = coco.getImgIds()
    all_anchors = utils.generate_anchors()
    while True:
        rand = np.random.randint(0, len(img_ids))
        # rand = 3118
        # print(rand)

        img_info = coco.loadImgs(img_ids[rand])[0]
        img = scipy.ndimage.imread(config.DATASET_DIR + '\\' +
                                   config.DATASET_TYPE + '\\' +
                                   img_info['file_name'])
        img = img.astype(np.float32) / 255.
        ratio, img, offset = utils.resize_keep_ratio(img, (1024, 1024))

        ann_ids = coco.getAnnIds(imgIds=img_info['id'], iscrowd=0)
        anns = coco.loadAnns(ann_ids)
        bboxs = [ann['bbox'] for ann in anns]
        bboxs = np.vstack(bboxs)
        # OFFSET one for backgroound
        cls = np.array([ann['category_id'] + 1 for ann in anns])
        masks = np.array([
            utils.annToMask(ann, img_info['height'], img_info['width'])
            for ann in anns
        ])

        # resize masks to desired shape
        bboxs_ind = bboxs.astype(np.int)
        masks = np.array([
            cv2.resize(
                mask[bboxs_ind[i, 1]:bboxs_ind[i, 1] + bboxs_ind[i, 3],
                     bboxs_ind[i, 0]:bboxs_ind[i, 0] + bboxs_ind[i, 2]],
                (config.MASK_OUTPUT_SHAPE, config.MASK_OUTPUT_SHAPE))
            for i, mask in enumerate(masks)
        ])
        bboxs = bboxs * ratio
        bboxs[:, :2] += offset
        bboxs_rpn = bboxs

        valid_label_range = 0
        # we pad ot trim all labels to MAX_GT_TRAIN_INSTANCES to make it batched
        if bboxs.shape[0] > config.MAX_GT_TRAIN_INSTANCES:
            valid_label_range = config.MAX_GT_TRAIN_INSTANCES
            bboxs = bboxs[:config.MAX_GT_TRAIN_INSTANCES, :]
            cls = cls[:config.MAX_GT_TRAIN_INSTANCES]
            masks = masks[:config.MAX_GT_TRAIN_INSTANCES, :, :]
        else:
            valid_label_range = bboxs.shape[0]
            bboxs = np.pad(
                bboxs,
                ((0, config.MAX_GT_TRAIN_INSTANCES - bboxs.shape[0]), (0, 0)),
                mode='constant',
                constant_values=((0, 0), (0, 0)))
            cls = np.pad(cls,
                         (0, config.MAX_GT_TRAIN_INSTANCES - cls.shape[0]),
                         mode='constant',
                         constant_values=(0, 0))
            masks = np.pad(
                masks, ((0, config.MAX_GT_TRAIN_INSTANCES - masks.shape[0]),
                        (0, 0), (0, 0)),
                mode='constant',
                constant_values=((0, 0), (0, 0), (0, 0)))

        # pre compute rpn targets
        anchor_types, matches = utils.generate_anchor_types(
            all_anchors, bboxs_rpn)
        rpn_positive_mask, rpn_mask = utils.get_mask(anchor_types)
        rpn_labels = utils.generate_rpn_labels(anchor_types, rpn_mask)
        rpn_deltas = utils.generate_rpn_deltas(all_anchors, bboxs_rpn,
                                               rpn_positive_mask, matches)
        rpn_positive_range = rpn_deltas.shape[0]
        # do some padding
        rpn_deltas = np.pad(
            rpn_deltas,
            ((0, config.RPN_ANCHORS_TRAIN_PER_IMAGE - rpn_positive_range),
             (0, 0)), 'constant')
        rpn_positive_mask = np.pad(
            rpn_positive_mask,
            (0, config.RPN_ANCHORS_TRAIN_PER_IMAGE - rpn_positive_range),
            'constant',
            constant_values=-1)

        if config.DEBUG:
            fig = plt.figure()
            ax = fig.add_subplot(111)
            plt.imshow(img)
            # coco.showAnns(anns)
            for bbox in bboxs:
                ax.add_patch(
                    patches.Rectangle(
                        (bbox[0], bbox[1]),
                        bbox[2],
                        bbox[3],
                        edgecolor="red",
                        fill=False  # remove background
                    ))
            for m in matches:
                ax.add_patch(
                    patches.Rectangle(
                        (all_anchors[m][0], all_anchors[m][1]),
                        all_anchors[m][2],
                        all_anchors[m][3],
                        edgecolor="blue",
                        fill=False  # remove background
                    ))
            plt.show()
        # we feed precomputed rpn masks on multi-threaded cpu
        print()
        yield img, bboxs, rpn_labels, rpn_deltas, rpn_mask, rpn_positive_range, rpn_positive_mask, cls, masks, valid_label_range
def produce_batch(feature_map, gt_boxes, h_w=None, category=None):
    height = np.shape(feature_map)[1]
    width = np.shape(feature_map)[2]
    num_feature_map = width * height

    w_stride = h_w[1] / width
    h_stride = h_w[0] / height
    #base anchors are 9 anchors wrt a tile (0,0,w_stride-1,h_stride-1)
    base_anchors = generate_anchors(w_stride, h_stride)
    shift_x = np.arange(0, width) * w_stride
    shift_y = np.arange(0, height) * h_stride
    shift_x, shift_y = np.meshgrid(shift_x, shift_y)
    shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), shift_x.ravel(),
                        shift_y.ravel())).transpose()

    all_anchors = (base_anchors.reshape((1, anchors_num, 4)) + shifts.reshape(
        (1, num_feature_map, 4)).transpose((1, 0, 2)))
    total_anchors = num_feature_map * anchors_num
    all_anchors = all_anchors.reshape((total_anchors, 4))
    # 用训练好的rpn进行预测,得出scores和deltas
    res = rpn_model.query_cnn(feature_map)
    scores = res[0]
    scores = scores.reshape(-1, 1)
    deltas = res[1]
    deltas = np.reshape(deltas, (-1, 4))
    # 把dx dy转换成具体的xy值,并把照片以外的anchors去掉
    proposals = bbox_transform_inv(all_anchors, deltas)
    proposals = clip_boxes(proposals, (h_w[0], h_w[1]))
    # remove small boxes
    keep = filter_boxes(proposals,
                        small_box_threshold)  # here threshold is 40 pixel
    proposals = proposals[keep, :]
    scores = scores[keep]

    # sort socres and only keep top 6000.
    pre_nms_topN = 6000
    order = scores.ravel().argsort()[::-1]
    if pre_nms_topN > 0:
        order = order[:pre_nms_topN]
    proposals = proposals[order, :]
    scores = scores[order]
    # apply NMS to to 6000, and then keep top 300
    post_nms_topN = 300
    keep = py_cpu_nms(np.hstack((proposals, scores)), 0.7)
    if post_nms_topN > 0:
        keep = keep[:post_nms_topN]
    proposals = proposals[keep, :]
    scores = scores[keep]
    # 把ground true也加到proposals中
    proposals = np.vstack((proposals, gt_boxes))
    # calculate overlaps of proposal and gt_boxes
    overlaps = bbox_overlaps(proposals, gt_boxes)
    gt_assignment = overlaps.argmax(axis=1)
    max_overlaps = overlaps.max(axis=1)
    # labels = gt_labels[gt_assignment] #?

    # sub sample
    fg_inds = np.where(max_overlaps >= FG_THRESH)[0]
    fg_rois_per_this_image = min(int(BATCH * FG_FRAC), fg_inds.size)
    # Sample foreground regions without replacement
    if fg_inds.size > 0:
        fg_inds = npr.choice(fg_inds,
                             size=fg_rois_per_this_image,
                             replace=False)
    bg_inds = np.where((max_overlaps < BG_THRESH_HI)
                       & (max_overlaps >= BG_THRESH_LO))[0]
    bg_rois_per_this_image = BATCH - fg_rois_per_this_image
    bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
    # Sample background regions without replacement
    if bg_inds.size > 0:
        bg_inds = npr.choice(bg_inds,
                             size=bg_rois_per_this_image,
                             replace=False)
    # The indices that we're selecting (both fg and bg)
    keep_inds = np.append(fg_inds, bg_inds)
    # Select sampled values from various arrays:
    # labels = labels[keep_inds]
    rois = proposals[keep_inds]
    gt_rois = gt_boxes[gt_assignment[keep_inds]]
    targets = bbox_transform(rois, gt_rois)  #input rois
    rois_num = targets.shape[0]
    batch_box = np.zeros((rois_num, 200, 4))
    for i in range(rois_num):
        batch_box[i, category] = targets[i]
    batch_box = np.reshape(batch_box, (rois_num, -1))
    # get gt category
    batch_categories = np.zeros((rois_num, 200, 1))
    for i in range(rois_num):
        batch_categories[i, category] = 1
    batch_categories = np.reshape(batch_categories, (rois_num, -1))
    return rois, batch_box, batch_categories