Пример #1
0
def iou_matrix(preds):
    '''
	Calculate iou matrix for a list of predictions.

	Parameters
	----------
	preds : theano.tensor
		:math:`N \\times 4` `theano.tensor` list of bounding box parameters parameterized as :math:`(x_i, y_i, x_f, y_f)`.

	Returns
	-------
	theano.tensor
		Matrix of IOU values.
	'''
    idx1, idx2 = meshgrid(T.arange(preds.shape[0]), T.arange(preds.shape[0]))
    preds1, preds2 = preds[idx1, :], preds[idx2, :]

    xi, yi = T.maximum(preds1[:, :, 0],
                       preds2[:, :, 0]), T.maximum(preds1[:, :, 1],
                                                   preds2[:, :, 1])
    xf, yf = T.minimum(preds1[:, :, 2],
                       preds2[:, :, 2]), T.minimum(preds1[:, :, 3],
                                                   preds2[:, :, 3])

    w, h = T.maximum(xf - xi, 0.), T.maximum(yf - yi, 0.)

    isec = w * h
    u = (preds1[:, :, 2] -
         preds1[:, :, 0]) * (preds1[:, :, 3] - preds1[:, :, 1]) + (
             preds2[:, :, 2] - preds2[:, :, 0]) * (preds2[:, :, 3] -
                                                   preds2[:, :, 1]) - isec

    return isec / u
Пример #2
0
    def _get_cost(self, input, truth, alpha=1., min_iou=0.5):
        cost = 0.

        # create ground truth for non-object class
        neg_example = theano.shared(
            np.zeros(self.num_classes + 1, dtype=theano.config.floatX))
        neg_example = T.set_subtensor(neg_example[-1], 1.)
        neg_example = neg_example.dimshuffle('x', 'x', 0, 'x', 'x')

        cost_coord, cost_class, cost_noobj = 0., 0., 0.

        for i in range(self._predictive_maps.__len__()):
            dmap = self._default_maps[i]
            fmap = self._predictive_maps[i]
            shape = layers.get_output_shape(self.network['detection'][i])[2:]

            # get iou between default maps and ground truth
            iou_default = self._get_iou(
                dmap.dimshuffle('x', 'x', 0, 1, 2, 3),
                truth.dimshuffle(0, 1, 'x', 2, 'x', 'x'))
            #pdb.set_trace()
            # get which object for which cell
            idx_match = T.argmax(iou_default, axis=1)

            # extend truth to cover all cell/box/examples
            truth_extended = T.repeat(T.repeat(T.repeat(truth.dimshuffle(
                0, 1, 'x', 2, 'x', 'x'),
                                                        self.ratios.__len__(),
                                                        axis=2),
                                               shape[0],
                                               axis=4),
                                      shape[1],
                                      axis=5)

            idx1, idx2, idx3, idx4 = meshgrid(T.arange(truth.shape[0]),
                                              T.arange(self.ratios.__len__()),
                                              T.arange(shape[0]),
                                              T.arange(shape[1]))

            # copy truth for every cell/box.
            truth_extended = truth_extended[idx1, idx_match, idx2, :, idx3,
                                            idx4].dimshuffle(0, 1, 4, 2, 3)

            iou_default = iou_default.max(axis=1)

            iou_gt_min = iou_default >= min_iou

            dmap_extended = dmap.dimshuffle('x', 0, 1, 2, 3)

            # penalize coordinates
            # cost_fmap = 0.

            cost_coord_fmap = 0.
            cost_coord_fmap += ((
                (fmap[:, :, 0] -
                 (truth_extended[:, :, 0] - dmap_extended[:, :, 0]) /
                 dmap_extended[:, :, 2])[iou_gt_min.nonzero()])**2).sum()
            cost_coord_fmap += ((
                (fmap[:, :, 1] -
                 (truth_extended[:, :, 1] - dmap_extended[:, :, 1]) /
                 dmap_extended[:, :, 3])[iou_gt_min.nonzero()])**2).sum()
            cost_coord_fmap += ((
                (fmap[:, :, 2] -
                 T.log(truth_extended[:, :, 2] / dmap_extended[:, :, 2])
                 )[iou_gt_min.nonzero()])**2).sum()
            cost_coord_fmap += ((
                (fmap[:, :, 3] -
                 T.log(truth_extended[:, :, 3] / dmap_extended[:, :, 3])
                 )[iou_gt_min.nonzero()])**2).sum()

            cost_class_fmap = -(
                truth_extended[:, :, -(self.num_classes + 1):] *
                T.log(fmap[:, :, -(self.num_classes + 1):])).sum(axis=2)
            cost_class_fmap = cost_class_fmap[iou_gt_min.nonzero()].sum()

            # find negative examples
            iou_default = iou_default.reshape((-1, ))
            # iou_idx_sorted = T.argsort(iou_default)[::-1]

            # iou_st_min = iou_default < min_iou
            iou_st_min = T.bitwise_and(iou_default >= 0.1,
                                       iou_default < min_iou)

            # Choose index for top boxes whose overlap is smaller than the min overlap.
            pos_size = iou_gt_min[iou_gt_min.nonzero()].size
            neg_size = pos_size * 3  # ratio of 3 to 1
            #neg_size = 10

            idx_neg = T.arange(iou_default.shape[0])[iou_st_min.nonzero()]
            replace = T.le(idx_neg.shape[0], neg_size)
            idx_neg = theano.ifelse.ifelse(
                idx_neg.shape[0] > 0,
                self._random_stream.choice((neg_size, ),
                                           a=idx_neg,
                                           replace=replace), T.arange(0))

            # iou_idx_sorted = iou_idx_sorted[iou_st_min[iou_idx_sorted].nonzero()][:neg_size]
            # neg_size = iou_idx_sorted.size

            neg_size, pos_size = T.maximum(1.,
                                           neg_size), T.maximum(1., pos_size)

            # Add the negative examples to the costs.
            cost_noobj_fmap = -(neg_example * T.log(
                fmap[:, :, -(self.num_classes + 1):])).sum(axis=2).reshape(
                    (-1, ))
            cost_noobj_fmap = cost_noobj_fmap[idx_neg].sum()

            #
            # NEW STUFF
            #
            cost_coord += cost_coord_fmap / pos_size
            cost_class += alpha * cost_class_fmap / pos_size
            cost_noobj += alpha * cost_noobj_fmap / neg_size
            # cost += cost_fmap

        cost = cost_coord + cost_class + cost_noobj

        return cost, [cost_coord, cost_class, cost_noobj]
Пример #3
0
	def _get_cost(
			self,
			output,
			truth,
			rescore=True
		):
		if not hasattr(self, '_lambda_obj'):
			lambda_obj, lambda_noobj, lambda_anchor = T.scalar('lambda_obj'), T.scalar('lambda_noobj'), T.scalar('lambda_anchor')
			self._lambda_obj, self._lambda_noobj, self._lambda_anchor = lambda_obj, lambda_noobj, lambda_anchor
		else:
			lambda_obj, lambda_noobj, lambda_anchor = self._lambda_obj, self._lambda_noobj, self._lambda_anchor
			
		# lambda_obj, lambda_noobj, lambda_anchor = 1., 5., 0.1

		w_cell, h_cell = 1./self.output_shape[1], 1./self.output_shape[0]
		x, y = T.arange(w_cell/2, 1., w_cell), T.arange(h_cell/2, 1., h_cell)
		y, x = meshgrid(x, y)
		x, y = x.dimshuffle('x','x','x',0,1), y.dimshuffle('x','x','x',0,1)

		# create anchors for later
		w_acr = theano.shared(np.asarray([b[0] for b in self.boxes]), name='w_acr').dimshuffle('x',0,'x','x','x') * T.ones_like(x)
		h_acr = theano.shared(np.asarray([b[1] for b in self.boxes]), name='h_acr').dimshuffle('x',0,'x','x','x') * T.ones_like(y)
		anchors = T.concatenate((x * T.ones_like(w_acr), y * T.ones_like(h_acr), w_acr, h_acr), axis=2)
		anchors = T.repeat(anchors, truth.shape[0], axis=0)

		cell_coord = T.concatenate((x,y), axis=2)
		gt_coord = (truth[:,:,:2] + truth[:,:,2:4]/2).dimshuffle(0,1,2,'x','x')
		
		gt_dist = T.sum((gt_coord - cell_coord)**2, axis=2).reshape((truth.shape[0],truth.shape[1],-1))
		
		cell_idx = argmin_unique(gt_dist, 1, 2).reshape((-1,)) # assign unique cell to each obj per example
		row_idx = T.cast(cell_idx // self.output_shape[1], 'int64')
		col_idx = cell_idx - row_idx * self.output_shape[1]
		num_idx = T.repeat(T.arange(truth.shape[0]).reshape((-1,1)), truth.shape[1], axis=1).reshape((-1,))
		obj_idx = T.repeat(T.arange(truth.shape[1]).reshape((1,-1)), truth.shape[0], axis=0).reshape((-1,))
		
		valid_example = gt_dist[num_idx, obj_idx, cell_idx] < 1 # if example further than 1 away from cell it's a garbage example
		
		num_idx, obj_idx = num_idx[valid_example.nonzero()], obj_idx[valid_example.nonzero()]
		row_idx, col_idx = row_idx[valid_example.nonzero()], col_idx[valid_example.nonzero()]
		
		truth_flat = truth[num_idx, obj_idx, :].dimshuffle(0,'x',1)
		
		pred_matched = output[num_idx,:,:,row_idx, col_idx]
		x, y = x[:,0,0,row_idx, col_idx].dimshuffle(1,0), y[:,0,0,row_idx, col_idx].dimshuffle(1,0)
		w_acr = theano.shared(np.asarray([b[0] for b in self.boxes]), name='w_acr').dimshuffle('x',0)
		h_acr = theano.shared(np.asarray([b[1] for b in self.boxes]), name='h_acr').dimshuffle('x',0)
	
		# reformat prediction
		pred_shift = pred_matched
		pred_shift = T.set_subtensor(pred_shift[:,:,2], w_acr * T.exp(pred_shift[:,:,2]))
		pred_shift = T.set_subtensor(pred_shift[:,:,3], h_acr * T.exp(pred_shift[:,:,3]))
		pred_shift = T.set_subtensor(pred_shift[:,:,0], pred_shift[:,:,0] + T.repeat(x, pred_shift.shape[1], axis=1) - pred_shift[:,:,2]/2)
		pred_shift = T.set_subtensor(pred_shift[:,:,1], pred_shift[:,:,1] + T.repeat(y, pred_shift.shape[1], axis=1) - pred_shift[:,:,3]/2)
		
		# calculate iou
		xi = T.maximum(pred_shift[:,:,0], truth_flat[:,:,0])
		yi = T.maximum(pred_shift[:,:,1], truth_flat[:,:,1])
		xf = T.minimum(pred_shift[:,:,[0,2]].sum(axis=2), truth_flat[:,:,[0,2]].sum(axis=2))
		yf = T.minimum(pred_shift[:,:,[1,3]].sum(axis=2), truth_flat[:,:,[1,3]].sum(axis=2))
		w, h = T.maximum(xf - xi, 0), T.maximum(yf - yi, 0)
		
		isec = w * h
		union = T.prod(pred_shift[:,:,[2,3]], axis=2) + T.prod(truth_flat[:,:,[2,3]], axis=2) - isec
		iou = isec / union

		# calculate iou for anchor
		anchors_matched = anchors[num_idx,:,:,row_idx,col_idx]
		xi = T.maximum(anchors_matched[:,:,0], truth_flat[:,:,0])
		yi = T.maximum(anchors_matched[:,:,1], truth_flat[:,:,1])
		xf = T.minimum(anchors_matched[:,:,[0,2]].sum(axis=2), truth_flat[:,:,[0,2]].sum(axis=2))
		yf = T.minimum(anchors_matched[:,:,[1,3]].sum(axis=2), truth_flat[:,:,[1,3]].sum(axis=2))
		w, h = T.maximum(xf - xi, 0), T.maximum(yf - yi, 0)
		
		isec = w * h
		union = T.prod(anchors_matched[:,:,[2,3]], axis=2) + T.prod(truth_flat[:,:,[2,3]], axis=2) - isec
		iou_acr = isec / union
		
		# get max iou
		acr_idx = T.argmax(iou_acr, axis=1)
		
		# reformat truth
		truth_formatted = truth_flat
		truth_formatted = T.repeat(truth_formatted, self.boxes.__len__(), axis=1)
		truth_formatted = T.set_subtensor(truth_formatted[:,:,0], truth_formatted[:,:,0] + truth_formatted[:,:,2]/2 - T.repeat(x, truth_formatted.shape[1], axis=1))
		truth_formatted = T.set_subtensor(truth_formatted[:,:,1], truth_formatted[:,:,1] + truth_formatted[:,:,3]/2 - T.repeat(y, truth_formatted.shape[1], axis=1))
		truth_formatted = T.set_subtensor(truth_formatted[:,:,2], T.log(truth_formatted[:,:,2] / w_acr))
		truth_formatted = T.set_subtensor(truth_formatted[:,:,3], T.log(truth_formatted[:,:,3] / h_acr))
		truth_formatted = truth_formatted[T.arange(truth_formatted.shape[0]),acr_idx,:]
		
			
		#
		# calculate cost
		#
		item_idx = T.arange(pred_matched.shape[0])
		anchors = T.set_subtensor(anchors[:,:,:2], 0.)

		cost = 0.

		cost_noobject = lambda_noobj * (T.mean(output[:,:,4]**2) - T.sum(pred_matched[item_idx, acr_idx,4]**2) / output[:,:,4].size)
		cost_anchor = lambda_anchor * (T.mean(T.sum(output[:,:,:4]**2, axis=2)) - T.sum(T.sum(pred_matched[item_idx,acr_idx,:4]**2, axis=1)) / output[:,:,0].size)
		cost_coord = lambda_obj * T.mean(T.sum((pred_matched[item_idx,acr_idx,:4] - truth_formatted[:,:4])**2, axis=1))
		cost_class = lambda_obj * T.mean(T.sum(-truth_formatted[:,-self.num_classes:] * T.log(pred_matched[item_idx, acr_idx, -self.num_classes:]), axis=1))

		if rescore:
			cost_obj = lambda_obj * T.mean((pred_matched[item_idx, acr_idx,4] - iou[item_idx, acr_idx])**2)
		else:
			cost_obj = lambda_obj * T.mean((pred_matched[item_idx, acr_idx,4] - 1)**2)
		
		cost = cost_noobject + cost_obj + cost_anchor + cost_coord + cost_class

		return cost, [iou], [row_idx, col_idx, acr_idx, cost_noobject, cost_anchor, cost_coord, cost_class, cost_obj]
Пример #4
0
	def _get_cost3(
			self,
			output,
			truth,
			rescore=True
		):

		if not hasattr(self, '_lambda_obj'):
			lambda_obj, lambda_noobj = T.scalar('lambda_obj'), T.scalar('lambda_noobj')
			self._lambda_obj, self._lambda_noobj = lambda_obj, lambda_noobj
		else:
			lambda_obj, lambda_noobj, thresh = self._lambda_obj, self._lambda_noobj, self._thresh

		cost = 0.
		
		# penalize everything, this will be undone if box matches ground truth
		#cost += lambda_noobj_coord * T.mean(output[:,:,:4]**2)
		cost += lambda_noobj * T.mean(output[:,:,4]**2)
		
		# get index for each truth
		row_idx = T.cast(T.floor((truth[:,:,0] + 0.5 * truth[:,:,2]) * self.output_shape[1]), 'int32')
		col_idx = T.cast(T.floor((truth[:,:,1] + 0.5 * truth[:,:,3]) * self.output_shape[0]), 'int32')
				
		# image index
		img_idx = T.repeat(T.arange(truth.shape[0]).dimshuffle(0,'x'), truth.shape[1], axis=1)
		
		# index for each object in an image
		obj_idx = T.repeat(T.arange(truth.shape[1]), truth.shape[0], axis=0)
		
		# reshape to flat
		row_idx = row_idx.reshape((-1,))
		col_idx = col_idx.reshape((-1,))
		img_idx = img_idx.reshape((-1,))
		obj_idx = obj_idx.reshape((-1,))
		
		# use only valid indices (i.e. greater or equal to zero)
		valid_idx = T.bitwise_and(row_idx >= 0, col_idx >= 0).reshape((-1,))
		row_idx = row_idx[valid_idx.nonzero()]
		col_idx = col_idx[valid_idx.nonzero()]
		img_idx = img_idx[valid_idx.nonzero()]
		obj_idx = obj_idx[valid_idx.nonzero()]
				
		# reshape output and truth
		output = output.dimshuffle(0,'x',1,2,3,4)
		truth = truth.dimshuffle(0,1,'x',2,'x','x')
		
		output = T.repeat(output, truth.shape[1], axis=1)
		truth = T.repeat(truth, self.boxes.__len__(), axis=2)
		truth = T.repeat(T.repeat(truth, self.output_shape[0], axis=4), self.output_shape[1], axis=5)
		
		# reformat ground truth labels so that they are relative to offsets
		# and that the width/height are log scale relative to the box height.
		
		# add offset to the x,y coordinates
		x_diff, y_diff = 1./self.output_shape[0], 1./self.output_shape[1]
		y, x = meshgrid(T.arange(0 + x_diff/2,1,x_diff), T.arange(0 + y_diff/2,1,y_diff))
		x, y = x.dimshuffle('x','x',0,1), y.dimshuffle('x','x',0,1)
		
		# scaling from each anchor box
		x_scale = theano.shared(np.asarray([b[0] for b in self.boxes]), name='x_scale', borrow=True).dimshuffle('x',0,'x','x')
		y_scale = theano.shared(np.asarray([b[1] for b in self.boxes]), name='y_scale', borrow=True).dimshuffle('x',0,'x','x')

		# change predicted output to proper scale
		pred = T.set_subtensor(output[:,:,:,0], output[:,:,:,0] + x)
		pred = T.set_subtensor(pred[:,:,:,1], pred[:,:,:,1] + y)
		pred = T.set_subtensor(pred[:,:,:,2], x_scale * T.exp(pred[:,:,:,2]))
		pred = T.set_subtensor(pred[:,:,:,3], y_scale * T.exp(pred[:,:,:,3]))
		
		# determine iou of chosen boxes
		xi = T.maximum(pred[img_idx, obj_idx, :, 0, row_idx, col_idx], truth[img_idx, obj_idx, :, 0, row_idx, col_idx])
		yi = T.maximum(pred[img_idx, obj_idx, :, 1, row_idx, col_idx], truth[img_idx, obj_idx, :, 1, row_idx, col_idx])
		xf = T.minimum(
			pred[img_idx, obj_idx, :, 0, row_idx, col_idx] + pred[img_idx, obj_idx, :, 2, row_idx, col_idx],
			truth[img_idx, obj_idx, :, 0, row_idx, col_idx] + truth[img_idx, obj_idx, :, 2, row_idx, col_idx]
		)
		yf = T.minimum(
			pred[img_idx, obj_idx, :, 1, row_idx, col_idx] + pred[img_idx, obj_idx, :, 3, row_idx, col_idx],
			truth[img_idx, obj_idx, :, 1, row_idx, col_idx] + truth[img_idx, obj_idx, :, 3, row_idx, col_idx]
		)
		w, h = T.maximum(xf - xi, 0.), T.maximum(yf - yi, 0.)
		
		isec = w * h
		iou = isec / (pred[img_idx, obj_idx, :, 2, row_idx, col_idx] * pred[img_idx, obj_idx, :, 3, row_idx, col_idx] + \
					truth[img_idx, obj_idx, :, 2, row_idx, col_idx] * truth[img_idx, obj_idx, :, 3, row_idx, col_idx] - isec)
					 
		# get index for matched boxes
		match_idx = T.argmax(iou, axis=1)
		
		# change truth to proper scale for error
		truth = T.set_subtensor(truth[:,:,:,0,:,:], truth[:,:,:,0,:,:] - x)
		truth = T.set_subtensor(truth[:,:,:,1,:,:], truth[:,:,:,1,:,:] - y)
		truth = T.set_subtensor(truth[:,:,:,2,:,:], T.log(truth[:,:,:,2,:,:] / x_scale))
		truth = T.set_subtensor(truth[:,:,:,3,:,:], T.log(truth[:,:,:,3,:,:] / y_scale))
		
		# add to cost boxes which have been matched
		
		# correct for matched boxes
		#cost -= lambda_noobj_coord * T.mean(output[img_idx, obj_idx, :, :4, row_idx, col_idx][:,match_idx]**2)
		cost -= lambda_noobj * T.mean(output[img_idx, obj_idx, :, 4, row_idx, col_idx][:,match_idx]**2)
		
		# coordinate errors
		cost += lambda_obj * T.mean(
			(output[img_idx, obj_idx, :, 0, row_idx, col_idx][:,match_idx] - truth[img_idx, obj_idx, :, 0, row_idx, col_idx][:,match_idx])**2
		)
		cost += lambda_obj * T.mean(
			(output[img_idx, obj_idx, :, 1, row_idx, col_idx][:,match_idx] - truth[img_idx, obj_idx, :, 1, row_idx, col_idx][:,match_idx])**2
		)
		cost += lambda_obj * T.mean(
			(output[img_idx, obj_idx, :, 2, row_idx, col_idx][:,match_idx] - truth[img_idx, obj_idx, :, 2, row_idx, col_idx][:,match_idx])**2
		)
		cost += lambda_obj * T.mean(
			(output[img_idx, obj_idx, :, 3, row_idx, col_idx][:,match_idx] - truth[img_idx, obj_idx, :, 3, row_idx, col_idx][:,match_idx])**2
		)
		
		# objectness error
		if rescore:
			cost += lambda_obj * T.mean(
				(output[img_idx, obj_idx, :, 4, row_idx, col_idx][:,match_idx] - iou[:,match_idx])**2
			)
		else:
			cost += lambda_obj * T.mean(
				(output[img_idx, obj_idx, :, 4, row_idx, col_idx][:,match_idx] - 1)**2
			)
		
		# class error
		cost += lambda_obj * T.mean(
			(
				-truth[img_idx, obj_idx, :, -self.num_classes:, row_idx, col_idx][:,match_idx] * \
				T.log(output[img_idx, obj_idx, :, -self.num_classes:, row_idx, col_idx][:,match_idx])
			)
		)
				
		return cost, [iou]
Пример #5
0
	def _get_cost2(
			self,
			output,
			truth,
			rescore=True
		):

		if not hasattr(self, '_lambda_obj'):
			lambda_obj, lambda_noobj, thresh = T.scalar('lambda_obj'), T.scalar('lambda_noobj'), T.scalar('thresh')
			self._lambda_obj, self._lambda_noobj, self._thresh = lambda_obj, lambda_noobj, thresh
		else:
			lambda_obj, lambda_noobj, thresh = self._lambda_obj, self._lambda_noobj, self._thresh
		
		cost = 0.
		# create grid for cells
		w_cell, h_cell =  1. / self.output_shape[1], 1. / self.output_shape[0]
		x, y = T.arange(w_cell / 2, 1., w_cell), T.arange(h_cell / 2, 1., h_cell)
		y, x = meshgrid(x, y)
		
		# reshape truth to match with cell
		truth_cell = truth.dimshuffle(0, 1, 2, 'x','x')
		x, y = x.dimshuffle('x','x',0,1), y.dimshuffle('x','x',0,1)
		
		# calculate overlap between cell and ground truth boxes
		xi, yi = T.maximum(truth_cell[:,:,0], x - w_cell/2), T.maximum(truth_cell[:,:,1], y - h_cell/2)
		xf = T.minimum(truth_cell[:,:,[0,2]].sum(axis=2), x + w_cell/2)
		yf = T.minimum(truth_cell[:,:,[1,3]].sum(axis=2), y + h_cell/2)
		w, h = T.maximum(xf - xi, 0), T.maximum(yf - yi, 0)
		
		# overlap between cell and ground truth box
		overlap = (w * h) / (w_cell * h_cell)
		
		# repeat truth boxes
		truth_boxes = truth.dimshuffle(0, 1, 'x', 2, 'x', 'x')
		
		# create grid for anchor boxes
		anchors = T.concatenate((x.dimshuffle(0,1,'x','x',2,3) - w_cell/2, y.dimshuffle(0,1,'x','x',2,3) - h_cell/2), axis=3)
		anchors = T.concatenate((anchors, T.ones_like(anchors)), axis=3)
		anchors = T.repeat(anchors, self.boxes.__len__(), axis=2)
		
		w_acr = theano.shared(np.asarray([b[0] for b in self.boxes]), name='w_acr', borrow=True).dimshuffle('x','x',0,'x','x')
		h_acr = theano.shared(np.asarray([b[1] for b in self.boxes]), name='h_acr', borrow=True).dimshuffle('x','x',0,'x','x')
		
		anchors = T.set_subtensor(anchors[:,:,:,2], anchors[:,:,:,2] * w_acr)
		anchors = T.set_subtensor(anchors[:,:,:,3], anchors[:,:,:,3] * h_acr)
		
		# find iou between anchors and ground truths
		xi, yi = T.maximum(truth_boxes[:,:,:,0], anchors[:,:,:,0]), T.maximum(truth_boxes[:,:,:,1], anchors[:,:,:,1])
		xf = T.minimum(truth_boxes[:,:,:,[0,2]].sum(axis=3), anchors[:,:,:,[0,2]].sum(axis=3))
		yf = T.minimum(truth_boxes[:,:,:,[1,3]].sum(axis=3), anchors[:,:,:,[1,3]].sum(axis=3))
		w, h = T.maximum(xf - xi, 0), T.maximum(yf - yi, 0)
		
		isec = w * h
		iou = isec / (T.prod(truth_boxes[:,:,:,[2,3]], axis=3) + T.prod(anchors[:,:,:,[2,3]], axis=3) - isec)
		
		overlap = overlap.dimshuffle(0,1,'x',2,3)
		
		best_iou_obj_idx = T.argmax(iou, axis=1).dimshuffle(0,'x',1,2,3)
		best_iou_box_idx = T.argmax(iou, axis=2).dimshuffle(0,1,'x',2,3)
		
		_,obj_idx,box_idx,_,_ = meshgrid(
			T.arange(truth.shape[0]),
			T.arange(truth.shape[1]),
			T.arange(self.boxes.__len__()),
			T.arange(self.output_shape[0]),
			T.arange(self.output_shape[1])
		)
		
		# define logical matrix assigning object to correct anchor box and cell.
		best_iou_idx = T.bitwise_and(
			T.bitwise_and(
				T.eq(best_iou_box_idx, box_idx),
				T.eq(best_iou_obj_idx, obj_idx)
			),
			overlap >= thresh
		)
		
		constants = []
		if rescore: 
			# scale predictions correctly
			pred = output.dimshuffle(0,'x',1,2,3,4)
			pred = T.set_subtensor(pred[:,:,:,0], pred[:,:,:,0] + x.dimshuffle(0,1,'x',2,3))
			pred = T.set_subtensor(pred[:,:,:,1], pred[:,:,:,1] + y.dimshuffle(0,1,'x',2,3))
			pred = T.set_subtensor(pred[:,:,:,2], w_acr * T.exp(pred[:,:,:,2]))
			pred = T.set_subtensor(pred[:,:,:,3], h_acr * T.exp(pred[:,:,:,3]))
			
			xi, yi = T.maximum(pred[:,:,:,0], truth_boxes[:,:,:,0]), T.maximum(pred[:,:,:,1], truth_boxes[:,:,:,1])
			xf = T.minimum(pred[:,:,:,[0,2]].sum(axis=3), truth_boxes[:,:,:,[0,2]].sum(axis=3))
			yf = T.minimum(pred[:,:,:,[1,3]].sum(axis=3), truth_boxes[:,:,:,[1,3]].sum(axis=3))
			w, h = T.maximum(xf - xi, 0.), T.maximum(yf - yi, 0.)
			
			isec = w * h
			iou = isec / (pred[:,:,:,[2,3]].prod(axis=3) + truth_boxes[:,:,:,[2,3]].prod(axis=3) - isec)

			# make sure iou is considered constant when taking gradient
			constants.append(iou)
	
		# format ground truths correclty
		truth_boxes = truth_boxes = T.repeat(
			T.repeat(
				T.repeat(truth_boxes, self.boxes.__len__(), axis=2),
				self.output_shape[0], axis=4
			),
			self.output_shape[1], axis=5
		)
		
		truth_boxes = T.set_subtensor(truth_boxes[:,:,:,0], truth_boxes[:,:,:,0] - anchors[:,:,:,0])
		truth_boxes = T.set_subtensor(truth_boxes[:,:,:,1], truth_boxes[:,:,:,1] - anchors[:,:,:,1])
		truth_boxes = T.set_subtensor(truth_boxes[:,:,:,2], T.log(truth_boxes[:,:,:,2] / anchors[:,:,:,2]))
		truth_boxes = T.set_subtensor(truth_boxes[:,:,:,3], T.log(truth_boxes[:,:,:,3] / anchors[:,:,:,3]))
		
		# add dimension for objects per image
		pred = T.repeat(output.dimshuffle(0,'x',1,2,3,4), truth.shape[1], axis=1)
				
		# penalize coordinates
		cost += lambda_obj * T.mean(((pred[:,:,:,:4] - truth_boxes[:,:,:,:4])**2).sum(axis=3)[best_iou_idx.nonzero()])
				
		# penalize class scores
		cost += lambda_obj * T.mean((-truth_boxes[:,:,:,-self.num_classes:] * T.log(pred[:,:,:,-self.num_classes:])).sum(axis=3)[best_iou_idx.nonzero()])
		
		# penalize objectness score
		if rescore:
			cost += lambda_obj * T.mean(((pred[:,:,:,4] - iou)**2)[best_iou_idx.nonzero()])
		else:
			cost += lambda_obj * T.mean(((pred[:,:,:,4] - 1.)**2)[best_iou_idx.nonzero()])
		
		# flip all matched and penalize all un-matched objectness scores
		not_matched_idx = best_iou_idx.sum(axis=1) > 0
		not_matched_idx = bitwise_not(not_matched_idx)

		# penalize objectness score for non-matched boxes
		cost += lambda_noobj * T.mean((pred[:,0,:,4]**2)[not_matched_idx.nonzero()])
		
		return cost, constants
Пример #6
0
	def detect(self, im, thresh=0.75, overlap=0.5, num_to_label=None, return_iou=False):
		im = format_image(im, dtype=theano.config.floatX)

		old_size = im.shape[:2]
		im = cv2.resize(im, self.input_shape[::-1], interpolation=cv2.INTER_LINEAR).swapaxes(2,1).swapaxes(1,0).reshape((1,3) + self.input_shape)

		if not hasattr(self, '_detect_fn'):
			'''
			Make theano do all the heavy lifting for detection, this should speed up the process marginally.
			'''

			output = self.output_test

			if self.use_custom_cost:
				new_output = None
				for i in range(len(self.boxes)):
					cls_idx = T.arange(i * (5 + self.num_classes), (i+1) * (5 + self.num_classes))
					if new_output is None:
						new_output = output[:,cls_idx,:,:].dimshuffle(0,'x',1,2,3)
					else:
						new_output = T.concatenate((new_output, output[:,cls_idx,:,:].dimshuffle(0,'x',1,2,3)), axis=1)
				output = new_output

			thresh_var = T.scalar(name='thresh')
			conf = output[:,:,4] * T.max(output[:,:,-self.num_classes:], axis=2)

			# define offsets to predictions
			w_cell, h_cell =  1. / self.output_shape[1], 1. / self.output_shape[0]
			x, y = T.arange(w_cell / 2, 1., w_cell), T.arange(h_cell / 2, 1., h_cell)
			y, x = meshgrid(x, y)

			x, y = x.dimshuffle('x','x',0,1), y.dimshuffle('x','x',0,1)
			
			# define scale
			w_acr = theano.shared(np.asarray([b[0] for b in self.boxes]), name='w_acr', borrow=True).dimshuffle('x',0,'x','x')
			h_acr = theano.shared(np.asarray([b[1] for b in self.boxes]), name='h_acr', borrow=True).dimshuffle('x',0,'x','x')

			# rescale output
			output = T.set_subtensor(output[:,:,2], w_acr * T.exp(output[:,:,2]))
			output = T.set_subtensor(output[:,:,3], h_acr * T.exp(output[:,:,3]))
			output = T.set_subtensor(output[:,:,0], output[:,:,0] + x - output[:,:,2] / 2)
			output = T.set_subtensor(output[:,:,1], output[:,:,1] + y - output[:,:,3] / 2)
			output = T.set_subtensor(output[:,:,2:4], output[:,:,2:4] + output[:,:,:2])

			# define confidence in prediction
			conf = output[:,:,4] * T.max(output[:,:,-self.num_classes:], axis=2)
			cls = T.argmax(output[:,:,-self.num_classes:], axis=2)

			# filter out all below thresh
			above_thresh_idx = conf > thresh_var			
			pred = T.concatenate(
				(
					output[:,:,0][above_thresh_idx.nonzero()].dimshuffle(0,'x'),
					output[:,:,1][above_thresh_idx.nonzero()].dimshuffle(0,'x'),
					output[:,:,2][above_thresh_idx.nonzero()].dimshuffle(0,'x'),
					output[:,:,3][above_thresh_idx.nonzero()].dimshuffle(0,'x'),
					conf[above_thresh_idx.nonzero()].dimshuffle(0,'x'),
					cls[above_thresh_idx.nonzero()].dimshuffle(0,'x')
				),
				axis=1
			)

			iou_matrix = utils.iou_matrix(pred)
			
			self._detect_fn = theano.function([self.input, thresh_var], [pred, iou_matrix])

		output, iou_matrix = self._detect_fn(im, thresh)

		boxes = []
		for i in range(output.shape[0]):
			coord, conf, cls = output[i,:4], output[i,4], output[i,5]
			coord[2:] += coord[:2]
			if num_to_label is not None:
				cls =num_to_label[cls]
			box = utils.BoundingBox(*coord.tolist(), confidence=conf, cls=cls)
			boxes.append(box)

		boxes = [b * old_size for b in boxes]

		if return_iou:
			return boxes, iou_matrix
		else:
			return boxes