def foward_global_domain_cls(self, base_feat, eta, target): if self.gc: domain_global, _ = self.netD(grad_reverse(base_feat, lambd=eta)) if not target: _, feat = self.netD(base_feat.detach()) else: feat = None else: domain_global = self.netD(grad_reverse(base_feat, lambd=eta)) feat = None return domain_global, feat # , diff
def foward_local_domain_cls(self, base_feat, eta, target): if self.lc: d_pixel, _ = self.netD_pixel(grad_reverse(base_feat, lambd=eta)) # print(d_pixel.mean()) if not target: _, feat_pixel = self.netD_pixel(base_feat.detach()) else: feat_pixel = None else: d_pixel = self.netD_pixel(grad_reverse(base_feat, lambd=eta)) feat_pixel = None return d_pixel, feat_pixel
def adv_forward(self, base_feat1, base_feat2, base_feat, pooled_feat, adv_num, eta=1.0): if self.lc: d_pixel, _ = self.netD_pixels[adv_num](grad_reverse(base_feat1, lambd=eta)) #print(d_pixel) # if not target: _, feat_pixel = self.netD_pixels[adv_num](base_feat1.detach()) else: d_pixel = self.netD_pixels[adv_num](grad_reverse(base_feat1, lambd=eta)) if self.gc: domain_mid, _ = self.netD_mids[adv_num](grad_reverse(base_feat2, lambd=eta)) # if not target: _, feat_mid = self.netD_mids[adv_num](base_feat2.detach()) else: domain_mid = self.netD_mids[adv_num](grad_reverse(base_feat2, lambd=eta)) if self.gc: domain_p, _ = self.netDs[adv_num](grad_reverse(base_feat, lambd=eta)) # if target: # return d_pixel,domain_p,domain_mid#, diff _,feat = self.netDs[adv_num](base_feat.detach()) else: domain_p = self.netDs[adv_num](grad_reverse(base_feat, lambd=eta)) # feat_pixel = feat_pixel.view(1, -1).repeat(pooled_feat.size(0), 1) feat_mid = feat_mid.view(1, -1).repeat(pooled_feat.size(0), 1) feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1) # concat feat = torch.cat((feat_mid, feat), 1) feat = torch.cat((feat_pixel, feat), 1) # feat_random = self.RandomLayers[adv_num]([pooled_feat, feat]) d_ins = self.netD_das[adv_num](grad_reverse(feat_random, lambd=eta)) return d_pixel, domain_p, domain_mid, d_ins
def forward(self, x): x_base = F.relu(self.bn0(x)) x_base = F.avg_pool2d(x_base, (x_base.size(2), x_base.size(3))) x_base = x_base.view(-1, 1024) x = F.relu(self.bn1(self.conv1(grad_reverse(x, lambd=1.0)))) x = F.relu(self.bn2(self.conv2(x))) x = F.relu(self.bn3(self.conv3(x))) x = F.avg_pool2d(x, (x.size(2), x.size(3))) x1 = x.view(-1, 256) x2 = self.fc(x1) return x_base, x1, x2
def forward(self, im_data, im_info, gt_boxes, num_boxes, target=False, eta=1.0): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) if self.context: domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta)) if target: domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta)) return domain_p #, diff _, feat = self.netD(base_feat.detach()) else: domain_p = self.netD(grad_reverse(base_feat, lambd=eta)) if target: return domain_p #,diff # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) if self.context: feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1) pooled_feat = torch.cat((feat, pooled_feat), 1) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: # select the corresponding columns according to roi labels bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, domain_p #,diff
def forward(self, im_data, im_info, gt_boxes, num_boxes, target=False, test=False, eta=1.0, hints=False): if test: self.training = False batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat1 = self.RCNN_base1(im_data) if self.dc == 'swda': if self.lc: d_pixel, _ = self.netD_pixel( grad_reverse(base_feat1, lambd=eta)) # print(d_pixel) if not target: _, feat_pixel = self.netD_pixel(base_feat1.detach()) else: d_pixel = self.netD_pixel(grad_reverse(base_feat1, lambd=eta)) base_feat = self.RCNN_base2(base_feat1) if self.dc == 'vanilla': domain = self.netD_dc(grad_reverse(base_feat, lambd=eta)) if target: return None, domain elif self.dc == 'swda': if self.gc: domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta)) if target: return d_pixel, domain_p _, feat = self.netD(base_feat.detach()) else: domain_p = self.netD(grad_reverse(base_feat, lambd=eta)) if target: return d_pixel, domain_p # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) #feat_pixel = torch.zeros(feat_pixel.size()).cuda() if self.lc: feat_pixel = feat_pixel.view(1, -1).repeat(pooled_feat.size(0), 1) pooled_feat = torch.cat((feat_pixel, pooled_feat), 1) if self.gc: feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1) pooled_feat = torch.cat((feat, pooled_feat), 1) # compute bbox offset # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) if self.conf: # confidence confidence = F.sigmoid(self.netD_confidence(pooled_feat)) # Make sure we don't have any numerical instability eps = 1e-12 pred_original = torch.clamp(cls_prob, 0. + eps, 1. - eps) confidence = torch.clamp(confidence, 0. + eps, 1. - eps) confidence_loss = (-torch.log(confidence)) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss if self.conf and hints: # Randomly set half of the confidences to 1 (i.e. no hints) b = torch.bernoulli( torch.Tensor(confidence.size()).uniform_(0, 1)).cuda() conf = confidence * b + (1 - b) labels_onehot = encode_onehot(rois_label, pred_original.size(1)) pred_new = pred_original * conf.expand_as(pred_original) + \ labels_onehot * (1 - conf.expand_as(labels_onehot)) pred_new = torch.log(pred_new) RCNN_loss_cls = F.nll_loss(pred_new, rois_label) else: RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) if test: self.training = True if self.dc == 'swda' and self.conf is None: return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_pixel, domain_p, None, None elif self.dc == 'vanilla' and self.conf is None: return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, None, domain, None, None elif self.conf and self.dc is None: return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, None, None, confidence_loss, confidence elif self.conf and self.dc == "swda": return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_pixel, domain_p, confidence_loss, confidence elif self.conf and self.dc == "vanilla": return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, None, domain, confidence_loss, confidence else: return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, None, None, None, None
def forward(self, im_data, im_info, gt_boxes, num_boxes, target=False, eta=1.0): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data lossQ = -1 # feed image data to base model to obtain base feature map base_feat = self.RCNN_base(im_data) # feed base feature map tp RPN to obtain rois''' #print("target is ",target) rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes, target) #print("rois.shape:",rois.shape) # if it is training phrase, then use ground trubut bboxes for refining if self.training and not target: #print("source traning---------------------------") #print("batch_size:",batch_size) #print("gt_boxes.shape:",gt_boxes.shape) #print("num_boxes:",num_boxes.data) ''' print(self.training) print(~target) print("use ground trubut bboxes for refining")''' roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 lossQ = -1 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model #print("pooled_feat before _head_to_tail:",pooled_feat.shape) if self.context: d_instance, _ = self.netD_pixel( grad_reverse(pooled_feat, lambd=eta)) #if target: #d_instance, _ = self.netD_pixel(grad_reverse(pooled_feat, lambd=eta)) #return d_pixel#, diff d_score_total, feat = self.netD_pixel(pooled_feat.detach()) else: d_score_total = self.netD_pixel(pooled_feat.detach()) d_instance = self.netD_pixel(grad_reverse(pooled_feat, lambd=eta)) #if target: #return d_pixel#,diff #d_score_total, _ = self.netD_pixel(pooled_feat.detach()) #print("d_score_total.shape",d_score_total.shape) #print("pooled_feat.shape:",pooled_feat.shape) d_instance_q = d_instance.split(128, 0) d_score_total_q = d_score_total.split(128, 0) d_score_total_qs = [] for img in range(batch_size): temp = torch.mean(d_score_total_q[img], dim=3) d_score_total_qs.append(torch.mean(temp, dim=2)) #d_score_total = torch.mean(d_score_total,dim=3) #d_score_total = torch.mean(d_score_total,dim=2) pooled_feat = self._head_to_tail(pooled_feat) #print("pooled_feat.shape:",pooled_feat.shape) if self.training and self.S_agent: pooled_feat_s = pooled_feat.split(128, 0) for img in range(batch_size): pooled_feat_d = pooled_feat_s[img] #print("------------------begain selecting in the source-----------------------") select_iter = int(pooled_feat_d.shape[0] / self.candidate_num) total_index = list(range(0, pooled_feat_d.shape[0])) np.random.shuffle(total_index) select_index = [] for eposide in range(select_iter): #print("#################################begain batch-%d-th the %d-th eposide##################################" % (img,eposide)) select_list = list(range(0, self.candidate_num)) batch_idx = total_index[eposide * self.candidate_num:(eposide + 1) * self.candidate_num] state = pooled_feat_d[batch_idx] #print("state.shape:",state.shape) d_score = d_score_total_qs[img][batch_idx] #print("d_score.shape:",d_score.shape) for it in range(self.select_num): #print("#########begain the %d-th selection################" % (it)) epsilon = self.epsilon_by_epoch(self.iter_dqn) action_index = self.current_model.act( state, epsilon, select_list) #print("action_index:",action_index) #action_episode.append(action_index) try: select_list.remove(action_index) except: print("select_list:", select_list) print("action_index:", action_index) print( "error!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") continue #print("the %d-th select, action_index is %d"%(it,action_index)) if d_score[action_index] > self.ts: reward = -1 else: reward = 1 #print("reward:",reward) next_state = torch.tensor(state) next_state[action_index] = torch.zeros( 1, next_state.shape[1]) if it == (self.select_num - 1): done = 1 else: done = 0 self.replay_buffer.push(state, action_index, reward, next_state, done, select_list) self.iter_dqn = self.iter_dqn + 1 state = next_state select_index = select_index + [ batch_idx[i] for i in select_list ] if len(self.replay_buffer) > cfg.BATCH_SIZE_DQN: lossQ = DQN.compute_td_loss(self.current_model, self.target_model, self.replay_buffer, cfg.BATCH_SIZE_DQN) if np.mod(self.iter_dqn, cfg.replace_target_iter) == 0: DQN.update_target(self.current_model, self.target_model) if img == 0: d_instance_refine = d_instance_q[img][select_index] else: d_instance_refine = torch.cat( (d_instance_refine, d_instance_q[img][select_index]), 0) pooled_feat_original = torch.tensor(pooled_feat) if self.context: feat = feat.view(feat.size(0), -1) pooled_feat = torch.cat((feat, pooled_feat), 1) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic and not target: bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability #print("pooled_feat.shape in faster_rcnn_global_pixel_instance:",pooled_feat.shape) cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) #print("cls_prob is ",cls_prob.shape) if self.training and target and self.T_agent: pooled_feat_t = pooled_feat_original.split(128, 0) for img in range(batch_size): pooled_feat_d = pooled_feat_t[img] select_iter_T = int(pooled_feat_d.shape[0] / self.candidate_num) #print("select_iter_T:",select_iter_T) total_index_T = list(range(0, pooled_feat_d.shape[0])) np.random.shuffle(total_index_T) #print("gt_label:",gt_label) #print("total_index:",len(total_index)) select_index_T = [] for eposide_T in range(select_iter_T): select_list_T = list(range(0, self.candidate_num)) batch_idx_T = total_index_T[eposide_T * self.candidate_num:(eposide_T + 1) * self.candidate_num] state_T = pooled_feat_d[batch_idx_T] d_score_T = d_score_total_qs[img][batch_idx_T] #print("label_pre:",label_pre) for it in range(self.select_num): epsilon_T = self.epsilon_by_epoch_T(self.iter_dqn_T) action_index_T = self.current_model_T.act( state_T, epsilon_T, select_list_T) #select_list_T.remove(action_index_T) try: select_list_T.remove(action_index_T) except: print("select_list_T:", select_list_T) print("action_index:", action_index_T) print( "error!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!") continue #print("label_pre[action_index_T]:",label_pre[action_index_T]) #print("torch.eq(gt_label,label_pre[action_index_T]):",torch.eq(gt_label,label_pre[action_index_T])) if d_score_T[action_index_T] > self.tt: reward = 1 else: reward = -1 #print("D_score:",d_score_T[action_index_T][1],"reward:",reward) next_state_T = torch.tensor(state_T) next_state_T[action_index_T] = torch.zeros( 1, next_state_T.shape[1]) if it == (self.select_num - 1): done = 1 else: done = 0 self.replay_buffer_T.push(state_T, action_index_T, reward, next_state_T, done, select_list_T) self.iter_dqn_T = self.iter_dqn_T + 1 state_T = next_state_T #print("select_list_T:",select_list_T) #if len(self.replay_buffer_T)>cfg.BATCH_SIZE_DQN: # lossQ = DQN.compute_td_loss(self.current_model_T,self.target_model_T,self.replay_buffer_T,cfg.BATCH_SIZE_DQN) #if np.mod(self.iter_dqn_T,cfg.replace_target_iter)==0: # DQN.update_target(self.current_model_T,self.target_model_T) select_index_T = select_index_T + [ batch_idx_T[i] for i in select_list_T ] if len(self.replay_buffer_T) > cfg.BATCH_SIZE_DQN: lossQ = DQN.compute_td_loss(self.current_model_T, self.target_model_T, self.replay_buffer_T, cfg.BATCH_SIZE_DQN) if np.mod(self.iter_dqn_T, cfg.replace_target_iter) == 0: DQN.update_target(self.current_model_T, self.target_model_T) #d_instance = d_instance[select_index_T] if img == 0: d_instance_refine = d_instance_q[img][select_index_T] else: d_instance_refine = torch.cat( (d_instance_refine, d_instance_q[img][select_index_T]), 0) if target: return d_instance_refine, lossQ RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) if self.S_agent: return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_instance_refine, lossQ #,diff else: return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_instance, lossQ
def forward(self, im_data, im_info, im_cls_lb, gt_boxes, num_boxes, target=False, eta=1.0): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat1 = self.RCNN_base1(im_data) if self.lc: d_pixel, _ = self.netD_pixel(grad_reverse(base_feat1, lambd=eta)) # print(d_pixel) if not target: _, feat_pixel = self.netD_pixel(base_feat1.detach()) else: d_pixel = self.netD_pixel(grad_reverse(base_feat1, lambd=eta)) base_feat = self.RCNN_base2(base_feat1) if self.gc: domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta)) if target: return d_pixel, domain_p # , diff _, feat = self.netD(base_feat.detach()) else: domain_p = self.netD(grad_reverse(base_feat, lambd=eta)) if target: return d_pixel, domain_p # ,diff # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # supervise base feature map with category level label cls_feat = self.avg_pool(base_feat) cls_feat = self.conv_lst(cls_feat).squeeze(-1).squeeze(-1) # cls_feat = self.conv_lst(self.bn1(self.avg_pool(base_feat))).squeeze(-1).squeeze(-1) category_loss_cls = nn.BCEWithLogitsLoss()(cls_feat, im_cls_lb) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == "align": pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == "pool": pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) # feat_pixel = torch.zeros(feat_pixel.size()).cuda() if self.lc: feat_pixel = feat_pixel.view(1, -1).repeat(pooled_feat.size(0), 1) pooled_feat = torch.cat((feat_pixel, pooled_feat), 1) if self.gc: feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1) pooled_feat = torch.cat((feat, pooled_feat), 1) # compute bbox offset # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4), ) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) return ( rois, cls_prob, bbox_pred, category_loss_cls, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_pixel, domain_p, ) # ,diff
def forward(self, im_data, im_info, gt_boxes, num_boxes,target=False,eta=1.0): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat1 = self.RCNN_base1(im_data) if self.lc: d_pixel, _ = self.netD_pixel(grad_reverse(base_feat1, lambd=eta)) #print(d_pixel) # if not target: _, feat_pixel = self.netD_pixel(base_feat1.detach()) else: d_pixel = self.netD_pixel(grad_reverse(base_feat1, lambd=eta)) if self.la_attention: base_feat1 = local_attention(base_feat1, d_pixel.detach()) base_feat2 = self.RCNN_base2(base_feat1) if self.gc: domain_mid, _ = self.netD_mid(grad_reverse(base_feat2, lambd=eta)) # if not target: _, feat_mid = self.netD_mid(base_feat2.detach()) else: domain_mid = self.netD_mid(grad_reverse(base_feat2, lambd=eta)) if self.mid_attention: base_feat2 = middle_attention(base_feat2, domain_mid.detach()) base_feat = self.RCNN_base3(base_feat2) if self.gc: domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta)) # if target: # return d_pixel,domain_p,domain_mid#, diff _,feat = self.netD(base_feat.detach()) else: domain_p = self.netD(grad_reverse(base_feat, lambd=eta)) # if target: # return d_pixel,domain_p,domain_mid#,diff # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) #feat_pixel = torch.zeros(feat_pixel.size()).cuda() # feat_pixel = feat_pixel.view(1, -1).repeat(pooled_feat.size(0), 1) feat_mid = feat_mid.view(1, -1).repeat(pooled_feat.size(0), 1) feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1) # concat feat = torch.cat((feat_mid, feat), 1) feat = torch.cat((feat_pixel, feat), 1) # feat_random = self.RandomLayer([pooled_feat, feat]) d_ins = self.netD_da(grad_reverse(feat_random, lambd=eta)) if target: return d_pixel, domain_p, domain_mid, d_ins pooled_feat = torch.cat((feat, pooled_feat), 1) # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label,d_pixel, domain_p,domain_mid, d_ins#,diff
def forward(self, im_data, im_info, gt_boxes, num_boxes, target=False, eta=1.0): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # get all vector of class for label if self.training and target: cls_label_ind = torch.unique(gt_boxes[:, :, 4].cpu()) cls_label = torch.zeros(self.n_classes) cls_label[cls_label_ind.long()] = 1 # assume always have backgound categories cls_label[0] = 1 cls_label = cls_label.cuda() cls_label.requires_grad = False # feed image data to base model to obtain base feature map base_feat1 = self.RCNN_base1(im_data) if self.lc: d_pixel, _ = self.netD_pixel_1(grad_reverse(base_feat1, lambd=eta)) # print(d_pixel) if not target: _, feat_pixel = self.netD_pixel_1(base_feat1.detach()) else: d_pixel = self.netD_pixel_1(grad_reverse(base_feat1, lambd=eta)) base_feat2 = self.RCNN_base2(base_feat1) if self.lc: d_pixel_2, _ = self.netD_pixel_2( grad_reverse(base_feat2, lambd=eta)) else: d_pixel_2 = self.netD_pixel_2(grad_reverse(base_feat2, lambd=eta)) base_feat3 = self.RCNN_base3(base_feat2) if self.lc: d_pixel_3, _ = self.netD_pixel_3( grad_reverse(base_feat3, lambd=eta)) else: d_pixel_3 = self.netD_pixel_3(grad_reverse(base_feat3, lambd=eta)) # print(d_pixel_3.mean()) base_feat4 = self.RCNN_base4(base_feat3) if self.gc: d_pixel_4, _ = self.netD_1(grad_reverse(base_feat4, lambd=eta)) else: d_pixel_4 = self.netD_1(grad_reverse(base_feat4, lambd=eta)) # something wrong base_feat = self.RCNN_base5(base_feat4) # for target domain training, we need to return the d_pixel, domain_p if self.gc: domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta)) if target: return d_pixel, d_pixel_2, d_pixel_3, d_pixel_4, domain_p _, feat = self.netD(base_feat.detach()) else: domain_p = self.netD(grad_reverse(base_feat, lambd=eta)) if target: return d_pixel, d_pixel_2, d_pixel_3, d_pixel_4, domain_p # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground trubut bboxes for refining if self.training and not target: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack( [grid_xy.data[:, :, :, 1], grid_xy.data[:, :, :, 0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) #feat_pixel = torch.zeros(feat_pixel.size()).cuda() if self.lc: feat_pixel = feat_pixel.view(1, -1).repeat(pooled_feat.size(0), 1) pooled_feat = torch.cat((feat_pixel, pooled_feat), 1) if self.gc: feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1) pooled_feat = torch.cat((feat, pooled_feat), 1) # compute bbox offset # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic and not target: bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) # compute the sum of weakly score if False: #cls_prob_sum = torch.sum(cls_prob, 0) # x = max(1, x) #cls_prob_sum = cls_prob_sum.repeat(2, 1) #cls_prob_sum = torch.min(cls_prob_sum, 0)[0] max_roi_cls_prob = torch.max(cls_prob, 0)[0] #assert (max_roi_cls_prob.data.cpu().numpy().all() >= 0. and max_roi_cls_prob.data.cpu().numpy().all() <= 1.) if not (max_roi_cls_prob.data.cpu().numpy().all() >= 0. and max_roi_cls_prob.data.cpu().numpy().all() <= 1.): pdb.set_trace() if not (cls_label.data.cpu().numpy().all() >= 0. and cls_label.data.cpu().numpy().all() <= 1.): pdb.set_trace() BCE_loss = F.binary_cross_entropy(max_roi_cls_prob, cls_label) return d_pixel, domain_p, BCE_loss RCNN_loss_cls = 0 RCNN_loss_bbox = 0 # for weakly detection, concentrate the cls_score and calculate the loss if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) # return d_pixel, d_pixel_2, d_pixel_3, d_pixel_4, domain_p return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_pixel, d_pixel_2, d_pixel_3, d_pixel_4, domain_p # ,diff
def forward( self, im_data, im_info, im_cls_lb, gt_boxes, num_boxes, target=False, eta=1.0, weight_value=1.0, ): if target: need_backprop = torch.Tensor([0]).cuda() self.RCNN_rpn.eval() else: need_backprop = torch.Tensor([1]).cuda() self.RCNN_rpn.train() batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data # feed image data to base model to obtain base feature map base_feat1 = self.RCNN_base1(im_data) if self.lc: d_pixel, _ = self.netD_pixel(grad_reverse(base_feat1, lambd=eta)) # print(d_pixel) # if not target: if True: _, feat_pixel = self.netD_pixel(base_feat1.detach()) else: d_pixel = self.netD_pixel(grad_reverse(base_feat1, lambd=eta)) base_feat = self.RCNN_base2(base_feat1) if self.gc: domain_p, _ = self.netD(grad_reverse(base_feat, lambd=eta)) # if target: # return d_pixel,domain_p#, diff _, feat = self.netD(base_feat.detach()) else: domain_p = self.netD(grad_reverse(base_feat, lambd=eta)) # if target: # return d_pixel,domain_p#,diff # feed base feature map tp RPN to obtain rois rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn( base_feat, im_info, gt_boxes, num_boxes) # supervise base feature map with category level label cls_feat = self.avg_pool(base_feat) cls_feat = self.conv_lst(cls_feat).squeeze(-1).squeeze(-1) # cls_feat = self.conv_lst(self.bn1(self.avg_pool(base_feat))).squeeze(-1).squeeze(-1) category_loss_cls = nn.BCEWithLogitsLoss()(cls_feat, im_cls_lb) # if it is training phrase, then use ground trubut bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable( rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable( rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == "align": pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == "pool": pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1, 5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) instance_pooled_feat = pooled_feat # feat_pixel = torch.zeros(feat_pixel.size()).cuda() if self.lc: feat_pixel = feat_pixel.view(1, -1).repeat(pooled_feat.size(0), 1) pooled_feat = torch.cat((feat_pixel, pooled_feat), 1) if self.da_use_contex: instance_pooled_feat = torch.cat( (feat_pixel.detach(), instance_pooled_feat), 1) if self.gc: feat = feat.view(1, -1).repeat(pooled_feat.size(0), 1) pooled_feat = torch.cat((feat, pooled_feat), 1) if self.da_use_contex: instance_pooled_feat = torch.cat( (feat.detach(), instance_pooled_feat), 1) # compute bbox offset # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) # add instance da instance_sigmoid, same_size_label = self.RCNN_instanceDA( instance_pooled_feat, need_backprop) if target: cls_pre_label = cls_prob.argmax(1).detach() cls_feat_sig = F.sigmoid(cls_feat[0]).detach() target_weight = [] for i in range(len(cls_pre_label)): label_i = cls_pre_label[i].item() if label_i > 0: diff_value = torch.exp( weight_value * torch.abs(cls_feat_sig[label_i - 1] - cls_prob[i][label_i])).item() target_weight.append(diff_value) else: target_weight.append(1.0) instance_loss = nn.BCELoss( weight=torch.Tensor(target_weight).view(-1, 1).cuda()) else: instance_loss = nn.BCELoss() DA_ins_loss_cls = instance_loss(instance_sigmoid, same_size_label) if target: return d_pixel, domain_p, DA_ins_loss_cls # compute bbox offset bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather( bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4), ) bbox_pred = bbox_pred_select.squeeze(1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) return ( rois, cls_prob, bbox_pred, category_loss_cls, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label, d_pixel, domain_p, DA_ins_loss_cls, )
def forward(self, im_data, im_info, gt_boxes, num_boxes,target=False,eta=1.0): batch_size = im_data.size(0) im_info = im_info.data gt_boxes = gt_boxes.data num_boxes = num_boxes.data #------------------------------------------------------------- # feed image data to base model to obtain base feature map base_feat1 = self.RCNN_base1(im_data) # print('\nbase_feat1: ', base_feat1.shape) #torch.Size([1, 256, 150, 184]) domain_p1 = self.netD1(grad_reverse(base_feat1, lambd=eta)) # get att map * base_feat1, use 1 atten only # print('\ncam_logit_p1: ', cam_logit_p1.shape) #torch.Size([1, 2]) domain_p1_en = prob2entropy2(domain_p1) base_feat1 = base_feat1 * domain_p1_en # print('\nbase_feat1 af: ', base_feat1.shape) #torch.Size([1, 256, 150, 238]) # print('\natt_map: ', att_map.shape) # print('\ndomain_p1: ', domain_p1.shape) # torch.Size([1, 1, 150, 200]) # base_feat1 = base_feat1 * att_map_256 # atten 1 # print('\n att base_feat1 map: ', base_feat1.shape) feat1 = self.netD_forward1(base_feat1.detach()) # add attention module! # test no .detach() # base_feat1.detach(): the gradients of self.netD_forward1() will update parameters of ifself, # don't update the previous ones! Example: # def forward(self, x): # x = self.net1(x) # return self.net2(x.detach()) # training will only update parameters on net2, not net1 feat1_p = F.softmax(feat1, 1) feat1_en = prob2entropy(feat1_p) feat1 = feat1 * feat1_en # feat1 = self.netD_forward1(base_feat1) # feat1 = feat1 * att_map # atten 2 # print('\nfeat1: ', feat1.shape) # torch.Size([1, 128, 1, 1]) # domain_p12, _ = self.netD21(grad_reverse(base_feat1, lambd=eta)) # cuda our of memory # base_feat1 = base_feat1 * att_map_256 # atten 1 DON'T WORK!! #---------------------------------------------------------------- base_feat2 = self.RCNN_base2(base_feat1) domain_p2 = self.netD2(grad_reverse(base_feat2, lambd=eta)) # print('\ndomain_p2: ', domain_p2.shape) #torch.Size([1, 2]) # base_feat2 = base_feat2 * att_map_512 feat2 = self.netD_forward2(base_feat2.detach()) feat2_p = self.fc2(feat2.view(-1, 128)) # nn.Linear(128,2) feat2 = global_attention(feat2, feat2_p) # feat2 = self.netD_forward2(base_feat2) # feat2 = feat2 * att_map_128 # print('\nbase_feat2: ', base_feat2.shape) #torch.Size([1, 512, 75, 92] # print('\ncam_logit_p2: ', cam_logit_p2.shape) # torch.Size([1, 2]) # print('\nfeat2: ', feat2.shape) #torch.Size([1, 128, 1, 1]) # domain_p2_sig, _ = self.netD12(grad_reverse(base_feat2, lambd=eta)) # base_feat2 = base_feat2 * att_map_512 #---------------------------------------------------------------- base_feat = self.RCNN_base3(base_feat2) domain_p3 = self.netD3(grad_reverse(base_feat, lambd=eta)) # print('\ndomain_p3: ', domain_p3.shape) #torch.Size([1, 2]) # print('\nbase_feat: ', base_feat.shape) #torch.Size([1, 1024, 38, 46]) # print('\ncam_logit_p3: ', cam_logit_p3.shape) #torch.Size([1, 2]) # base_feat = base_feat * att_map_1024 feat3 = self.netD_forward3(base_feat.detach()) feat3_p = self.fc3(feat3.view(-1, 128)) feat3 = global_attention(feat3, feat3_p) # feat3_en = prob2entropy(F.sigmoid(feat3)) # feat3 = feat3 * feat3_en # feat3 = self.netD_forward3(base_feat) # print('\nfeat3: ', feat3.shape) # torch.Size([1, 128, 1, 1]) # feat3 = feat3 * att_map_128 # domain_p3_sig, _ = self.netD13(grad_reverse(base_feat, lambd=eta)) # base_feat = base_feat * att_map_1024 #---------------------------------------------------------------- rois, rpn_loss_cls, rpn_loss_bbox = self.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes) # if it is training phrase, then use ground truth bboxes for refining if self.training: roi_data = self.RCNN_proposal_target(rois, gt_boxes, num_boxes) rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data rois_label = Variable(rois_label.view(-1).long()) rois_target = Variable(rois_target.view(-1, rois_target.size(2))) rois_inside_ws = Variable(rois_inside_ws.view(-1, rois_inside_ws.size(2))) rois_outside_ws = Variable(rois_outside_ws.view(-1, rois_outside_ws.size(2))) else: rois_label = None rois_target = None rois_inside_ws = None rois_outside_ws = None rpn_loss_cls = 0 rpn_loss_bbox = 0 rois = Variable(rois) # do roi pooling based on predicted rois if cfg.POOLING_MODE == 'crop': # pdb.set_trace() # pooled_feat_anchor = _crop_pool_layer(base_feat, rois.view(-1, 5)) grid_xy = _affine_grid_gen(rois.view(-1, 5), base_feat.size()[2:], self.grid_size) grid_yx = torch.stack([grid_xy.data[:,:,:,1], grid_xy.data[:,:,:,0]], 3).contiguous() pooled_feat = self.RCNN_roi_crop(base_feat, Variable(grid_yx).detach()) if cfg.CROP_RESIZE_WITH_MAX_POOL: pooled_feat = F.max_pool2d(pooled_feat, 2, 2) elif cfg.POOLING_MODE == 'align': pooled_feat = self.RCNN_roi_align(base_feat, rois.view(-1, 5)) elif cfg.POOLING_MODE == 'pool': pooled_feat = self.RCNN_roi_pool(base_feat, rois.view(-1,5)) # feed pooled features to top model pooled_feat = self._head_to_tail(pooled_feat) feat1 = feat1.view(1, -1).repeat(pooled_feat.size(0), 1) pooled_feat = torch.cat((feat1, pooled_feat), 1) feat2 = feat2.view(1, -1).repeat(pooled_feat.size(0), 1) pooled_feat = torch.cat((feat2, pooled_feat), 1) feat3 = feat3.view(1, -1).repeat(pooled_feat.size(0), 1) pooled_feat = torch.cat((feat3, pooled_feat), 1) #--------------------------------------------------------------- d_inst = self.netD_inst(grad_reverse(pooled_feat, lambd=eta)) ## Add entropy!!? #--------------------------------------------------------------- # print('\nd_inst: ', d_inst.shape) #torch.Size([128, 2]) #--- # add entropy loss here #--- if target: return d_inst, domain_p1, domain_p2, domain_p3, \ feat1_p, feat2_p, feat3_p # cam_logit_p1, cam_logit_p2, cam_logit_p3 # domain_p12, domain_p2_sig, domain_p3_sig bbox_pred = self.RCNN_bbox_pred(pooled_feat) if self.training and not self.class_agnostic: bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4) bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4)) bbox_pred = bbox_pred_select.squeeze(1) # compute object classification probability cls_score = self.RCNN_cls_score(pooled_feat) cls_prob = F.softmax(cls_score, 1) RCNN_loss_cls = 0 RCNN_loss_bbox = 0 if self.training: # classification loss RCNN_loss_cls = F.cross_entropy(cls_score, rois_label) # bounding box regression L1 loss RCNN_loss_bbox = _smooth_l1_loss(bbox_pred, rois_target, rois_inside_ws, rois_outside_ws) cls_prob = cls_prob.view(batch_size, rois.size(1), -1) bbox_pred = bbox_pred.view(batch_size, rois.size(1), -1) return rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, \ RCNN_loss_cls, RCNN_loss_bbox, rois_label, \ d_inst, domain_p1, domain_p2, domain_p3, \ feat1_p, feat2_p, feat3_p