def estimation(self, img): # activate GPUs CUDA = torch.cuda.is_available() torch.manual_seed(self.seed) if CUDA: torch.cuda.manual_seed(self.seed) self.eval_net.cuda() cv2.imshow( 'Raw Image', cv2.resize(img, (img.shape[1], img.shape[0]), interpolation=PIL_Image.BILINEAR)) cv2.waitKey(1) # Transform image from array to PIL image img = PIL_Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) img = self.transform(img) if self.model.find('mapnet') >= 0: if len(self.tmp_img) > 2 * self.skip: self.tmp_img.remove(self.tmp_img[0]) self.tmp_img.append(img) skips = self.skip * np.ones(self.steps - 1) offsets = np.insert(skips, 0, 0).cumsum() offsets -= offsets[-1] offsets = offsets.astype(np.int) if self.idx > 2 * self.skip: index = 2 * self.skip + offsets else: index = self.idx + offsets index = np.minimum(np.maximum(index, 0), len(self.tmp_img) - 1) clip = [self.tmp_img[i] for i in index] img = torch.stack([c for c in clip], dim=0) img = img.unsqueeze(0) # output : 1 x 6 or 1 x STEPS x 6 _, pose = step_feedfwd(img, self.eval_net, CUDA, train=False) s = pose.size() pose = pose.cpu().data.numpy().reshape((-1, s[-1])) # normalize the predicted quaternions q = [qexp(p[3:]) for p in pose] pose = np.hstack((pose[:, :3], np.asarray(q))) # un-normalize the predicted and target translations pose[:, :3] = pose[:, :3] * self.max_value if args.model.find('mapnet') >= 0: pred_pose = pose[-1] else: pred_pose = pose[0] self.idx += 1 return pred_pose
targ_poses = np.zeros((L, 7)) # store all target poses # inference loop for batch_idx, (data, target) in enumerate(loader): if batch_idx % 200 == 0: print 'Image {:d} / {:d}'.format(batch_idx, len(loader)) # indices into the global arrays storing poses if (args.model.find('vid') >= 0) or args.pose_graph: idx = data_set.get_indices(batch_idx) else: idx = [batch_idx] idx = idx[len(idx) / 2] # output : 1 x 6 or 1 x STEPS x 6 _, output = step_feedfwd(data, model, CUDA, train=False) s = output.size() output = output.cpu().data.numpy().reshape((-1, s[-1])) target = target.numpy().reshape((-1, s[-1])) # normalize the predicted quaternions q = [qexp(p[3:]) for p in output] output = np.hstack((output[:, :3], np.asarray(q))) q = [qexp(p[3:]) for p in target] target = np.hstack((target[:, :3], np.asarray(q))) if args.pose_graph: # do pose graph optimization kwargs = {'sax': sax, 'saq': saq, 'srx': srx, 'srq': srq} # target includes both absolute poses and vos vos = target[len(output):] target = target[:len(output)]
def forward(self, data, target, criterion, retain_graph=False): """ Args: input: input image with shape of (1, 3, H, W) class_idx (int): class index for calculating GradCAM. If not specified, the class index that makes the highest model prediction score will be used. Return: mask: saliency map of the same spatial dimension with input logit: model output """ b, c, h, w = data.size() score, output = step_feedfwd(data, self.model_arch, torch.cuda.is_available(), criterion=criterion, target=target, train=True, activation_maps=True) """ data_var = Variable(data, requires_grad=False) if torch.cuda.is_available(): data_var = data_var.cuda(async=True) output = self.model_arch(data_var) ##WARNING: Backpropagation on loss not classification dual_target = type(target) is list or type(target) is tuple if torch.cuda.is_available(): if dual_target: target = tuple(single_target.cuda(async=True) for single_target in target) else: target = target.cuda(async=True) if dual_target: target = tuple(Variable(t, requires_grad=False) for t in target) for i in range(len(output)): print('Output shape[%d]: %s'%(i, output[i].shape)) print('Target shape[%d]: %s'%(i, target[i].shape)) else: target = Variable(target, requires_grad=False) score = criterion(output, target) """ self.model_arch.zero_grad() score.backward(retain_graph=retain_graph) gradients = self.gradients['value'] # dS/dA activations = self.activations['value'] # A #print('Shape gradients: %s'%str(gradients.size())) b, k, u, v = gradients.size() alpha_num = gradients.pow(2) alpha_denom = gradients.pow(2).mul(2) + \ activations.mul(gradients.pow(3)).view(b, k, u*v).sum(-1, keepdim=True).view(b, k, 1, 1) alpha_denom = torch.where(alpha_denom != 0.0, alpha_denom, torch.ones_like(alpha_denom)) alpha = alpha_num.div(alpha_denom + 1e-7) positive_gradients = F.relu( score.exp() * gradients) # ReLU(dY/dA) == ReLU(exp(S)*dS/dA)) weights = (alpha * positive_gradients).view(b, k, u * v).sum(-1).view( b, k, 1, 1) saliency_map = (weights * activations).sum(1, keepdim=True) saliency_map = F.relu(saliency_map) saliency_map = F.interpolate(saliency_map, size=(h, w), mode='bilinear', align_corners=False) saliency_map_min, saliency_map_max = saliency_map.min( ), saliency_map.max() saliency_map = (saliency_map - saliency_map_min).div(saliency_map_max - saliency_map_min).data return saliency_map, score