def visualize(self,parameters={}): image_original = PTImage.from_cwh_torch(self.data[0]) ImageVisualizer().set_image(image_original,parameters.get('title','') + ' : Input') # need to draw the mask layers ontop of the data with transparency target_mask_chw = self.target[0] output_mask_chw = self.output[0][-1] # draw a separate image for each channel for now for i in range(target_mask_chw.size(0)): imt = PTImage.from_cwh_torch(target_mask_chw[i,:,:].unsqueeze(0)) imo = PTImage.from_cwh_torch(output_mask_chw[i,:,:].unsqueeze(0)) ImageVisualizer().set_image(imt,parameters.get('title','') + ' : Target-{}'.format(self.class_lookup[i])) ImageVisualizer().set_image(imo,parameters.get('title','') + ' : LOutput-{}'.format(self.class_lookup[i]))
def visualize(self, parameters={}): # visualizes a sequence for i in range(self.data[0].shape[0]): img = PTImage.from_cwh_torch(self.data[0][i]) ImageVisualizer().set_image( img, parameters.get('title', '') + ' : Image {}'.format(i)) for i in range(self.output[2].shape[0]): dmap = self.output[2][i] depth_map = PTImage.from_2d_wh_torch(dmap) ImageVisualizer().set_image( depth_map, parameters.get('title', '') + ' : DepthMap {}'.format(i))
def visualize(self,parameters={}): # here output[0] could either be a single image or a sequence of images if isinstance(self.output[0],list): image_target = PTImage.from_cwh_torch(self.target[0]) ImageVisualizer().set_image(image_target,parameters.get('title','') + ' : Target') for i,o in enumerate(self.output[0]): image_output = PTImage.from_cwh_torch(o) ImageVisualizer().set_image(image_output,parameters.get('title','') + ' : Output{:02d}'.format(i)) else: image_target = PTImage.from_cwh_torch(self.target[0]) image_output = PTImage.from_cwh_torch(self.output[0]) ImageVisualizer().set_image(image_target,parameters.get('title','') + ' : Target') ImageVisualizer().set_image(image_output,parameters.get('title','') + ' : Output')
def visualize(self, parameters={}): # image_frame = PTImage.from_cwh_torch(self.data[0]) if parameters.get('mode', 'train') == 'train': image_pos = PTImage.from_cwh_torch(self.data[0]) image_neg = PTImage.from_cwh_torch(self.data[1]) image_anchor = PTImage.from_cwh_torch(self.output[0]) image_pos_map = PTImage.from_2d_wh_torch( F.sigmoid(self.output[1]).data) image_neg_map = PTImage.from_2d_wh_torch( F.sigmoid(self.output[2]).data) image_pos_tar = PTImage.from_2d_wh_torch(self.target[0]) image_neg_tar = PTImage.from_2d_wh_torch(self.target[1]) # target_box = Box.tensor_to_box(self.target[0].cpu(),image_pos.get_wh()) # objs = [Object(target_box,0,obj_type='T')] # pos_frame = Frame.from_image_and_objects(image_pos,objs) # ImageVisualizer().set_image(image_frame,parameters.get('title','') + ' : Frame') ImageVisualizer().set_image( image_anchor, parameters.get('title', '') + ' : anchor') ImageVisualizer().set_image( image_pos, parameters.get('title', '') + ' : pos_frame') ImageVisualizer().set_image( image_neg, parameters.get('title', '') + ' : neg_frame') ImageVisualizer().set_image( image_pos_tar, parameters.get('title', '') + ' : pos_target') ImageVisualizer().set_image( image_neg_tar, parameters.get('title', '') + ' : neg_target') ImageVisualizer().set_image( image_pos_map, parameters.get('title', '') + ' : pos_res') ImageVisualizer().set_image( image_neg_map, parameters.get('title', '') + ' : neg_res') else: img_frame = PTImage.from_cwh_torch(self.data[0]) img_frame_xcor = PTImage.from_2d_wh_torch( F.sigmoid(self.output[0]).data) # img_pos = PTImage.from_cwh_torch(self.data[1]) # img_neg = PTImage.from_cwh_torch(self.data[2]) # image_pos_map = PTImage.from_2d_wh_torch(F.sigmoid(self.output[1]).data) # image_neg_map = PTImage.from_2d_wh_torch(F.sigmoid(self.output[2]).data) ImageVisualizer().set_image( img_frame, parameters.get('title', '') + ' : Frame') ImageVisualizer().set_image( img_frame_xcor, parameters.get('title', '') + ' : Frame xcor')
def visualize(self, parameters={}): image_original = PTImage.from_cwh_torch(self.data[0]) drawing_image = image_original.to_order_and_class( Ordering.HWC, ValueClass.BYTE0255).get_data().copy() boxes, classes = self.output[1:] # Nx4 boxes and N class tensor valid_boxes, valid_classes = MultiObjectDetector.post_process_boxes( self.data[0], boxes, classes, len(self.class_lookup)) # convert targets real_targets = self.target[0][:, 0] > -1 filtered_targets = self.target[0][real_targets].reshape( -1, self.target[0].shape[1]) target_boxes = filtered_targets[:, 1:] target_classes = filtered_targets[:, 0] if target_boxes.shape[0] > 0: draw_objects_on_np_image(drawing_image, self.__convert_to_objects( target_boxes, target_classes), color=(255, 0, 0)) if valid_boxes.shape[0] > 0: draw_objects_on_np_image(drawing_image, self.__convert_to_objects( valid_boxes, valid_classes), color=None) ImageVisualizer().set_image(PTImage(drawing_image), parameters.get('title', '') + ' : Output')
def forward(self, x): batch_size,chans,height,width = x.size() # need to first determine the hidden state size, which is tied to the cnn feature size dummy_glimpse = torch.Tensor(batch_size,chans,self.attn_grid_size,self.attn_grid_size) if x.is_cuda: dummy_glimpse = dummy_glimpse.cuda() dummy_feature_map = self.encoder.forward(dummy_glimpse) self.att_rnn.forward(dummy_feature_map.view(batch_size,dummy_feature_map.nelement()/batch_size)) self.att_rnn.reset_hidden_state(batch_size,x.data.is_cuda) outputs = [] init_tensor = torch.zeros(batch_size,self.num_classes,height,width) if x.data.is_cuda: init_tensor = init_tensor.cuda() outputs.append(init_tensor) self.init_weights(self.att_rnn.get_hidden_state()) for t in range(self.timesteps): # 1) decode hidden state to generate gaussian attention parameters state = self.att_rnn.get_hidden_state() gauss_attn_params = F.tanh(F.linear(state,self.att_decoder_weights)) # 2) extract glimpse glimpse = self.attn_reader.forward(x,gauss_attn_params,self.attn_grid_size) # visualize first glimpse in batch for all t torch_glimpses = torch.chunk(glimpse,batch_size,dim=0) ImageVisualizer().set_image(PTImage.from_cwh_torch(torch_glimpses[0].squeeze().data),'zGlimpse {}'.format(t)) # 3) use conv stack or resnet to extract features feature_map = self.encoder.forward(glimpse) conv_output_dims = self.encoder.get_output_dims()[:-1][::-1] conv_output_dims.append(glimpse.size()) # import ipdb;ipdb.set_trace() # 4) update hidden state # think about this connection a bit more self.att_rnn.forward(feature_map.view(batch_size,feature_map.nelement()/batch_size)) # 5) use deconv network to get partial masks partial_mask = self.decoder.forward(feature_map,conv_output_dims) # 6) write masks additively to mask canvas partial_canvas = self.attn_writer.forward(partial_mask,gauss_attn_params,(height,width)) outputs.append(torch.add(outputs[-1],partial_canvas)) # return the sigmoided versions for i in range(len(outputs)): outputs[i] = F.sigmoid(outputs[i]) return outputs
def train(self): # load after a forward call for dynamic models batched_data,_,_ = load_samples(self.model.get_loader(),self.model.cuda,self.args.batch_size) self.evaluate_model(batched_data) self.iteration = load(self.args.output_dir,self.model.get_model(),self.iteration,self.model.get_optimizer()) for i in range(self.iteration,self.iteration+self.args.iterations): #################### LOAD INPUTS ############################ # TODO, make separate timer class if more complex timings arise t0 = time.time() batched_data,batched_targets,sample_array = load_samples(self.model.get_loader(),self.model.cuda,self.args.batch_size) self.logger.set('timing.input_loading_time',time.time() - t0) ############################################################# #################### FORWARD ################################ t1 = time.time() outputs = self.evaluate_model(batched_data) self.logger.set('timing.foward_pass_time',time.time() - t1) ############################################################# #################### BACKWARD AND SGD ##################### t2 = time.time() loss = self.model.get_lossfn()(*(outputs + batched_targets)) self.model.get_optimizer().zero_grad() loss.backward() self.model.get_optimizer().step() self.logger.set('timing.loss_backward_update_time',time.time() - t2) ############################################################# #################### LOGGING, VIZ and SAVE ################### print 'iteration: {0} loss: {1}'.format(self.iteration,loss.data[0]) if self.args.compute_graph and i==self.iteration: compute_graph(loss,output_file=os.path.join(self.args.output_dir,self.args.compute_graph)) if self.iteration%self.args.save_iter==0: save(self.model.get_model(),self.model.get_optimizer(),self.iteration,self.args.output_dir) self.logger.set('time',time.time()) self.logger.set('date',str(datetime.now())) self.logger.set('loss',loss.data[0]) self.logger.set('iteration',self.iteration) self.logger.dump_line() self.iteration+=1 if self.args.visualize_iter>0 and self.iteration%self.args.visualize_iter==0: Batcher.debatch_outputs(sample_array,outputs) map(lambda x:x.visualize({'title':random_str(5)}),sample_array) ImageVisualizer().dump_image(os.path.join(self.args.output_dir,'visualizations_{0:08d}.svg'.format(self.iteration)))
def test(self): # load after a forward call for dynamic models batched_data, _, _ = load_samples(self.model.get_loader(), self.model.cuda, self.args.batch_size) self.evaluate_model(batched_data) self.iteration = load(self.args.output_dir, self.model.get_model(), self.iteration) for i in range(self.iteration, self.iteration + self.args.iterations): #################### LOAD INPUTS ############################ t0 = time.time() batched_data, batched_targets, sample_array = load_samples( self.model.get_loader(), self.model.cuda, self.args.batch_size) self.logger.set('timing.input_loading_time', time.time() - t0) ############################################################# #################### FORWARD ################################ t1 = time.time() outputs = self.evaluate_model(batched_data) self.logger.set('timing.foward_pass_time', time.time() - t1) ############################################################# #################### LOGGING, VIZ ################### print('iteration: {0}'.format(self.iteration)) self.logger.set('time', time.time()) self.logger.set('date', str(datetime.now())) self.logger.set('iteration', self.iteration) self.logger.dump_line() self.iteration += 1 Batcher.debatch_outputs(sample_array, outputs) list( map( lambda x: x.visualize({ 'title': random_str(5), 'mode': 'test' }), sample_array)) if self.args.visualize_iter > 0 and self.iteration % self.args.visualize_iter == 0: print('dumping {}'.format('testviz_{0:08d}.svg'.format( self.iteration))) ImageVisualizer().dump_image( os.path.join(self.args.output_dir, 'testviz_{0:08d}.svg'.format(self.iteration)))
def process_single_batch(original_images, ego_motion_vectors, disp_maps, calib_frames, batch_number=0, mask_loss_factor=0.1): cam_coords = [] num_frames = calib_frames.shape[0] Logger().set('loss_component.disp_maps_mean', disp_maps.data.mean().item()) Logger().set('loss_component.disp_maps_min', disp_maps.data.min().item()) Logger().set('loss_component.disp_maps_max', disp_maps.data.max().item()) Logger().set('loss_component.ego_motion_vectors[0]', np.array2string(ego_motion_vectors[0].detach().cpu().numpy())) # step 1) Use inverse cam_matrix and depth to convert # frame 1,2,3 into camera coordinates for i in range(0, num_frames): cam_coords.append( image_to_cam(original_images[i], disp_maps[i], calib_frames[i])) transforms = [] # step 2) Generate transformation matrix from ego_motion_vectors for i in range(0, num_frames - 1): # fake_ego_motion_vec = torch.zeros_like(ego_motion_vectors[i]) transforms.append(six_dof_vec_to_matrix(ego_motion_vectors[i])) # step 3) Transform Frame i (cam_coords) -> Frame i+1(cam_coords) # Then construct a new 2D image using new projection matrix total_re_loss = torch.zeros([], dtype=original_images.dtype, device=original_images.device) total_ssim_loss = torch.zeros([], dtype=original_images.dtype, device=original_images.device) total_mask_loss = torch.zeros([], dtype=original_images.dtype, device=original_images.device) out_images = [] for i in range(0, num_frames - 1): # augment cam coords with row of 1's to 4D vecs ones_row = torch.ones_like(cam_coords[i])[0, :].unsqueeze(0) augmented_vecs = torch.cat((cam_coords[i], ones_row), dim=0) cur_frame_coords = torch.matmul(transforms[i], augmented_vecs) intrin_filler_right = torch.zeros( 3, dtype=original_images.dtype, device=original_images.device).unsqueeze(1) intrin_filler_bottom = torch.zeros( 4, dtype=original_images.dtype, device=original_images.device).unsqueeze(0) intrin_filler_bottom[0, 3] = 1 hom_calib = torch.cat((calib_frames[i], intrin_filler_right), dim=1) hom_calib = torch.cat((hom_calib, intrin_filler_bottom), dim=0) warped_image, mask = cam_to_image(hom_calib, cur_frame_coords, original_images[i]) out_images.append(warped_image) # compare warped_image to next real image # don't use 0 pixels for loss ptimage = PTImage.from_cwh_torch(warped_image) ptmask = PTImage.from_2d_wh_torch(mask) orig_image = PTImage.from_cwh_torch(original_images[i]) # ImageVisualizer().set_image(orig_image,'original_images {}'.format(i)) ImageVisualizer().set_image( ptimage, 'warped_image {}-{}'.format(batch_number, i)) ImageVisualizer().set_image(ptmask, 'mask {}-{}'.format(batch_number, i)) Logger().set('loss_component.mask_mean.{}-{}'.format(batch_number, i), mask.mean().data.item()) masked_warp_image = warped_image.unsqueeze(0) * mask masked_gt_image = original_images[i + 1].unsqueeze(0) * mask re_loss = F.smooth_l1_loss(masked_warp_image, masked_gt_image, reduction='none') # add loss to prevent mask from going to 0 # total_mask_loss += mask_loss_factor*F.smooth_l1_loss(mask, torch.ones_like(mask)) total_re_loss += re_loss.mean() total_ssim_loss += old_div( (1 - ssim(masked_warp_image, masked_gt_image)), 2) Logger().set('loss_component.mask_loss.{}'.format(batch_number), total_mask_loss.data.item()) Logger().set('loss_component.batch_re_loss.{}'.format(batch_number), total_re_loss.data.item()) Logger().set('loss_component.batch_ssim_loss.{}'.format(batch_number), total_ssim_loss.data.item()) return total_re_loss + total_ssim_loss + total_mask_loss, out_images