def boxRegressionLoss(boxes, refBoxes, boxSizes): with tf.name_scope("boxRegressionLoss"): x, y, w, h = BoxUtils.x0y0x1y1_to_xywh(*tf.split(1, 4, boxes)) boxH, boxW = tf.split(1, 2, boxSizes) ref_x, ref_y, ref_w, ref_h = BoxUtils.x0y0x1y1_to_xywh( *tf.split(1, 4, refBoxes)) x = tf.reshape(x, [-1]) y = tf.reshape(y, [-1]) w = tf.reshape(w, [-1]) h = tf.reshape(h, [-1]) boxH = tf.reshape(boxH, [-1]) boxW = tf.reshape(boxW, [-1]) ref_x = tf.reshape(ref_x, [-1]) ref_y = tf.reshape(ref_y, [-1]) ref_w = tf.reshape(ref_w, [-1]) ref_h = tf.reshape(ref_h, [-1]) # Smooth L1 loss is defined on NN output values, which is not available here. However # we can transform the loss back in the NN output space (the same holds for y and h): # # tx-tx' = (x-x')/wa # tw-tw' = log(w/w') return smooth_l1((x - ref_x) / boxW) + smooth_l1( (y - ref_y) / boxH) + smooth_l1(tf.log(w / ref_w)) + smooth_l1( tf.log(h / ref_h))
def define(self, immediateSize, weightDecay): with tf.name_scope('RPN'): with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(weightDecay), padding='SAME'): #box prediction layers with tf.name_scope('NN'): net = slim.conv2d(self.input, immediateSize, 3, activation_fn=tf.nn.relu) scores = slim.conv2d(net, 2*self.nAnchors, 1, activation_fn=None) boxRelativeCoordinates = slim.conv2d(net, 4*self.nAnchors, 1, activation_fn=None) #split coordinates x_raw, y_raw, w_raw, h_raw = tf.split(boxRelativeCoordinates, 4, axis=3) #Save raw box sizes for loss self.rawSizes = BoxUtils.mergeBoxData([w_raw, h_raw]) #Convert NN outputs to BBox coordinates self.boxes = BoxUtils.nnToImageBoxes(x_raw, y_raw, w_raw, h_raw, self.wA, self.hA, self.inputDownscale, self.offset) #store the size of every box with tf.name_scope('box_sizes'): boxSizes = tf.reshape(self.tfAnchors, [1,1,1,-1,2]) boxSizes = tf.tile(boxSizes, tf.stack([1,self.hIn,self.wIn,1,1])) self.boxSizes = tf.reshape(boxSizes, [-1,2]) #scores self.scores = tf.reshape(scores, [-1,2])
def filterOutputBoxes(self, boxes, scores, others=[], preNmsCount=6000, maxOutSize=300, nmsThreshold=0.7): with tf.name_scope("filter_output_boxes"): scores = tf.nn.softmax(scores)[:, 1] scores = tf.reshape(scores, [-1]) #Clip boxes to edge boxes = self.clipBoxesToEdge(boxes) #Remove empty boxes boxes, scores = BoxUtils.filterSmallBoxes(boxes, [scores]) scores, boxes = tf.cond( tf.shape(scores)[0] > preNmsCount, lambda: tf.tuple( MultiGather.gatherTopK(scores, preNmsCount, [boxes])), lambda: tf.tuple([scores, boxes])) #NMS filter nmsIndices = tf.image.non_max_suppression( boxes, scores, iou_threshold=nmsThreshold, max_output_size=maxOutSize) nmsIndices = tf.expand_dims(nmsIndices, axis=-1) return MultiGather.gather([boxes, scores] + others, nmsIndices)
def getPositiveBoxes(boxes): with tf.name_scope('getPositiveBoxes'): iou = BoxUtils.iou(boxes, refBoxes) maxIou = tf.reduce_max(iou, axis=1) bestIou = tf.expand_dims(tf.cast(tf.argmax(iou, axis=1), tf.int32), axis=-1) bestAnchors = tf.argmax(iou, axis=0) #Box matching matrix boxMatches = tf.cast(iou > self.positiveIouThreshold, tf.float32) boxMatches = tf.minimum( boxMatches + tf.transpose( tf.one_hot(bestAnchors, tf.shape(boxMatches)[0])), 1.0) boxMatchMatrix = tf.stop_gradient(boxMatches) #Find positive boxes oneIfPositive = tf.reduce_max(boxMatchMatrix, axis=1) oneIfPositive = tf.stop_gradient(oneIfPositive) return oneIfPositive, maxIou, bestIou
def refineBoxes(self, boxes): with tf.name_scope("refineBoxes"): boxFineData = self.roiMean(self.regressionMap, boxes) x, y, w, h = BoxUtils.x0y0x1y1_to_xywh(*tf.unpack(boxes, axis=1)) x_rel, y_rel, w_rel, h_rel = tf.unpack(boxFineData, axis=1) refSizes = tf.pack([h, w], axis=1) x = x + x_rel * w y = y + y_rel * h w = w * tf.exp(w_rel) h = h * tf.exp(h_rel) return tf.pack(BoxUtils.xywh_to_x0y0x1y1(x, y, w, h), axis=1), refSizes
def boxRegressionLoss(boxes, rawSizes, refBoxes, boxSizes): with tf.name_scope("rawBoxRegressionLoss"): x, y, w, h = BoxUtils.x0y0x1y1_to_xywh(*tf.unstack(boxes, axis=1)) wRel, hRel = tf.unstack(rawSizes, axis=1) boxH, boxW = tf.unstack(boxSizes, axis=1) ref_x, ref_y, ref_w, ref_h = BoxUtils.x0y0x1y1_to_xywh( *tf.unstack(refBoxes, axis=1)) x, y, wRel, hRel, boxH, boxW, ref_x, ref_y, ref_w, ref_h = reshapeAll( [x, y, wRel, hRel, boxH, boxW, ref_x, ref_y, ref_w, ref_h]) wrelRef = tf.log(ref_w / boxW) hrelRef = tf.log(ref_h / boxH) # Smooth L1 loss is defined on NN output values, but only the box sizes are available here. However # we can transform back the coordinates in a numerically stable way in the NN output space: # # tx-tx' = (x-x')/wa return smooth_l1((x - ref_x) / boxW) + smooth_l1( (y - ref_y) / boxH) + smooth_l1(wRel - wrelRef) + smooth_l1(hRel - hrelRef)
def getRefinementLoss(): with tf.name_scope("getRefinementLoss"): iou = BoxUtils.iou(proposals, refBoxes) maxIou = tf.reduce_max(iou, axis=1) bestIou = tf.expand_dims(tf.cast(tf.argmax(iou, axis=1), tf.int32), axis=-1) #Find positive and negative indices based on their IOU posBoxIndices = tf.cast(tf.where(maxIou > self.posIouTheshold), tf.int32) negBoxIndices = tf.cast(tf.where(tf.logical_and(maxIou < self.negIouThesholdHi, maxIou > self.negIouThesholdLo)), tf.int32) #Split the boxes and references posBoxes, posRefIndices = MultiGather.gather([proposals, bestIou], posBoxIndices) negBoxes = tf.gather_nd(proposals, negBoxIndices) #Add GT boxes posBoxes = tf.concat([posBoxes,refBoxes], 0) posRefIndices = tf.concat([posRefIndices, tf.reshape(tf.range(tf.shape(refClasses)[0]), [-1,1])], 0) #Call the loss if the box collection is not empty nPositive = tf.shape(posBoxes)[0] nNegative = tf.shape(negBoxes)[0] if self.hardMining: posLoss = tf.cond(nPositive > 0, lambda: getPosLoss(posBoxes, posRefIndices, 0)[0], lambda: tf.zeros((0,), tf.float32)) negLoss = tf.cond(nNegative > 0, lambda: getNegLoss(negBoxes, 0), lambda: tf.zeros((0,), tf.float32)) allLoss = tf.concat([posLoss, negLoss], 0) return tf.cond(tf.shape(allLoss)[0]>0, lambda: tf.reduce_mean(Utils.MultiGather.gatherTopK(allLoss, self.nTrainBoxes)), lambda: tf.constant(0.0)) else: posLoss, posCount = tf.cond(nPositive > 0, lambda: getPosLoss(posBoxes, posRefIndices, self.nTrainPositives), lambda: tf.tuple([tf.constant(0.0), tf.constant(0,tf.int32)])) negLoss = tf.cond(nNegative > 0, lambda: getNegLoss(negBoxes, self.nTrainBoxes-posCount), lambda: tf.constant(0.0)) nPositive = tf.cast(tf.shape(posLoss)[0], tf.float32) nNegative = tf.cond(nNegative > 0, lambda: tf.cast(tf.shape(negLoss)[0], tf.float32), lambda: tf.constant(0.0)) return (tf.reduce_mean(posLoss)*nPositive + tf.reduce_mean(negLoss)*nNegative)/(nNegative+nPositive)
def genAllAnchors(self): with tf.name_scope('genAllAnchors'): z = tf.zeros([1, self.hIn, self.wIn, self.nAnchors], tf.float32) return BoxUtils.nnToImageBoxes(z, z, z, z, self.wA, self.hA, self.inputDownscale, self.offset)