def forward(self, predictions): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ # loc, conf, priors = predictions if self.use_arm: arm_loc, arm_conf, loc, conf, priors = predictions arm_loc_data = arm_loc.data arm_conf_data = arm_conf.data arm_object_conf = arm_conf_data[:, 1:] no_object_index = arm_object_conf <= 0.01 #self.object_score conf.data[no_object_index.expand_as(conf.data)] = 0 else: loc, conf, priors = predictions loc_data = loc.data conf_data = conf.data prior_data = priors.data num = loc_data.size(0) # batch size self.num_priors = prior_data.size(0) self.boxes = torch.zeros(1, self.num_priors, 4) self.scores = torch.zeros(1, self.num_priors, self.num_classes) if num == 1: # size batch x num_classes x num_priors conf_preds = conf_data.unsqueeze(0) else: conf_preds = conf_data.view(num, num_priors, self.num_classes) self.boxes.expand_(num, self.num_priors, 4) self.scores.expand_(num, self.num_priors, self.num_classes) # Decode predictions into bboxes. for i in range(num): if self.use_arm: default = decode(arm_loc_data[i], prior_data, self.variance) default = center_size(default) decoded_boxes = decode(loc_data[i], default, self.variance) # decoded_boxes = decode((loc_data[i]+arm_loc_data[i]), prior_data, self.variance) else: decoded_boxes = decode(loc_data[i], prior_data, self.variance) # For each class, perform nms conf_scores = conf_preds[i].clone() ''' c_mask = conf_scores.gt(self.thresh) decoded_boxes = decoded_boxes[c_mask] conf_scores = conf_scores[c_mask] ''' self.boxes[i] = decoded_boxes self.scores[i] = conf_scores return self.boxes, self.scores
def forward(self, predictions): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ # loc, conf, priors = predictions if self.cfg.MODEL.CASCADE: arm_loc, arm_conf, loc, conf, priors = predictions arm_conf = F.softmax(arm_conf.view(-1, 2), 1) conf = F.softmax(conf.view(-1, self.num_classes), 1) arm_loc_data = arm_loc.data arm_conf_data = arm_conf.data arm_object_conf = arm_conf_data[:, 1:] no_object_index = arm_object_conf <= self.object_score # print(torch.sum(no_object_index) / loc.data.size(0), loc.data.size(1)) conf.data[no_object_index.expand_as(conf.data)] = 0 else: loc, conf, priors = predictions conf = F.softmax(conf.view(-1, self.num_classes), 1) loc_data = loc.data conf_data = conf.data # prior_data = priors.data prior_data = priors[:loc_data.size(1), :] num = loc_data.size(0) # batch size self.num_priors = prior_data.size(0) self.boxes = torch.zeros(num, self.num_priors, 4) self.scores = torch.zeros(num, self.num_priors, self.num_classes) conf_preds = conf_data.view(num, self.num_priors, self.num_classes) batch_prior = prior_data.view(-1, self.num_priors, 4).expand( (num, self.num_priors, 4)) batch_prior = batch_prior.contiguous().view(-1, 4) if self.cfg.MODEL.CASCADE: default = decode(arm_loc_data.view(-1, 4), batch_prior, self.variance) default = center_size(default) decoded_boxes = decode(loc_data.view(-1, 4), default, self.variance1) else: decoded_boxes = decode(loc_data.view(-1, 4), batch_prior, self.variance) self.scores = conf_preds.view(num, self.num_priors, self.num_classes) self.boxes = decoded_boxes.view(num, self.num_priors, 4) return self.boxes, self.scores
def lincomb_mask_loss(positive_bool, prior_max_index, coef_p, proto_p, mask_gt, prior_max_box): proto_h = proto_p.size(1) # 138 proto_w = proto_p.size(2) # 138 loss_m = 0 for i in range(coef_p.size(0)): # coef_p.shape: (n, 19248, 32) with torch.no_grad(): # downsample the gt mask to the size of 'proto_p' downsampled_masks = F.interpolate(mask_gt[i].unsqueeze(0), (proto_h, proto_w), mode='bilinear', align_corners=False).squeeze(0) downsampled_masks = downsampled_masks.permute(1, 2, 0).contiguous() # (138, 138, num_objects) # binarize the gt mask because of the downsample operation downsampled_masks = downsampled_masks.gt(0.5).float() pos_prior_index = prior_max_index[i, positive_bool[i]] # pos_prior_index.shape: [num_positives] pos_prior_box = prior_max_box[i, positive_bool[i]] pos_coef = coef_p[i, positive_bool[i]] if pos_prior_index.size(0) == 0: continue # If exceeds the number of masks for training, select a random subset old_num_pos = pos_coef.size(0) if old_num_pos > cfg.masks_to_train: perm = torch.randperm(pos_coef.size(0)) select = perm[:cfg.masks_to_train] pos_coef = pos_coef[select] pos_prior_index = pos_prior_index[select] pos_prior_box = pos_prior_box[select] num_pos = pos_coef.size(0) pos_mask_gt = downsampled_masks[:, :, pos_prior_index] # mask assembly by linear combination # @ means dot product mask_p = torch.sigmoid(proto_p[i] @ pos_coef.t()) # mask_p.shape: (138, 138, num_pos) mask_p = crop(mask_p, pos_prior_box) # pos_prior_box.shape: (num_pos, 4) mask_loss = F.binary_cross_entropy(torch.clamp(mask_p, 0, 1), pos_mask_gt, reduction='none') # Normalize the mask loss to emulate roi pooling's effect on loss. pos_get_csize = center_size(pos_prior_box) mask_loss = mask_loss.sum(dim=(0, 1)) / pos_get_csize[:, 2] / pos_get_csize[:, 3] if old_num_pos > num_pos: mask_loss *= old_num_pos / num_pos loss_m += torch.sum(mask_loss) loss_m *= cfg.mask_alpha / proto_h / proto_w return loss_m
def detection_out(self,input,num_classes,objectness_score,name): variance = [0.1,0.2] loc, conf = input[0],input[1] prior_data = input[2] arm_loc, arm_conf = input[4], input[3] arm_loc = tf.reshape(arm_loc, [arm_loc.shape[0], -1, 4]) arm_conf = tf.reshape(arm_conf, [-1, 2]) # conf preds loc = tf.reshape(loc, [loc.shape[0], -1, 4]) conf = tf.reshape(conf, [-1, num_classes]) prior_data = tf.reshape(prior_data, [-1, 4]) loc_data = loc conf_data = conf num = loc_data.shape[0] # batch size arm_loc_data = arm_loc arm_conf_data = arm_conf arm_object_conf = arm_conf_data[:, 1:] no_object_index = arm_object_conf <= objectness_score expands = tf.tile(no_object_index, [1, num_classes]) conf_data = tf.where(expands, tf.zeros_like(conf_data), conf_data) num_priors = prior_data.shape[0] if num == 1: # size batch x num_classes x num_priors conf_preds = tf.expand_dims(conf_data, 0) # Decode predictions into bboxes. for i in range(num): default = decode(arm_loc_data[i], prior_data, variance) default = center_size(default) decoded_boxes = decode(loc_data[i], default, variance) # For each class, perform nms conf_scores = conf_preds[i] boxes = tf.expand_dims(decoded_boxes, 0) scores = tf.expand_dims(conf_scores, 0) return boxes, scores
def forward(self, predictions): """ Args: loc_data: (tensor) Loc preds from loc layers Shape: [batch,num_priors*4] conf_data: (tensor) Shape: Conf preds from conf layers Shape: [batch*num_priors,num_classes] prior_data: (tensor) Prior boxes and variances from priorbox layers Shape: [1,num_priors,4] """ # loc, conf, priors = predictions if self.cfg.MODEL.REFINE: # start_time = time.time() arm_loc, arm_conf, loc, conf, priors = predictions arm_conf = F.softmax(arm_conf.view(-1, 2), 1) conf = F.softmax(conf.view(-1, self.num_classes), 1) arm_loc_data = arm_loc.data arm_conf_data = arm_conf.data arm_object_conf = arm_conf_data[:, 1:] no_object_index = arm_object_conf <= self.object_score conf.data[no_object_index.expand_as(conf.data)] = 0 # time1 = time.time() - start_time # print('prediction_time_first:', time1) else: loc, conf, priors = predictions conf = F.softmax(conf.view(-1, self.num_classes), 1) # start_time2 = time.time() loc_data = loc.data conf_data = conf.data # prior_data = priors.data prior_data = priors[:loc_data.size(1), :] num = loc_data.size(0) # batch size self.num_priors = prior_data.size(0) # time2 = time.time() - start_time2 # print('prepare_time:', time2) # start_time3 = time.time() self.boxes = torch.zeros(num, self.num_priors, 4) self.scores = torch.zeros(num, self.num_priors, self.num_classes) conf_preds = conf_data.view(num, self.num_priors, self.num_classes) batch_prior = prior_data.view(-1, self.num_priors, 4).expand( (num, self.num_priors, 4)) batch_prior = batch_prior.contiguous().view(-1, 4) # time3 = time.time() - start_time3 # print('prepare_time2:', time3) # start_time4 = time.time() if self.cfg.MODEL.REFINE: default = decode( arm_loc_data.view(-1, 4), batch_prior, self.variance) default = center_size(default) decoded_boxes = decode( loc_data.view(-1, 4), default, self.variance) else: decoded_boxes = decode( loc_data.view(-1, 4), batch_prior, self.variance) self.scores = conf_preds.view(num, self.num_priors, self.num_classes) self.boxes = decoded_boxes.view(num, self.num_priors, 4) # time4 = time.time() - start_time4 # print('prediction_time2:', time4) return self.boxes, self.scores