def verify_img_data(img_data, expected_output, mode): if mode is None: img = transforms.ToPILImage()(img_data) assert img.mode == 'RGB' # default should assume RGB else: img = transforms.ToPILImage(mode=mode)(img_data) assert img.mode == mode split = img.split() for i in range(3): assert np.allclose(expected_output[i].numpy(), F.to_tensor(split[i]).numpy())
model_g = DSGAN.Generator(n_res_blocks=opt.num_res_blocks) model_g.load_state_dict(torch.load(model_path), strict=True) model_g.eval() model_g = model_g.cuda() print('# generator parameters:', sum(param.numel() for param in model_g.parameters())) # generate the noisy images idx = 0 with torch.no_grad(): for file_hr, file_lr in zip(target_hr_files, target_lr_files): idx += 1 print('Image No.:', idx) # load HR image input_img_hr = Image.open(file_hr) input_img_hr = TF.to_tensor(input_img_hr) # Save input_img as HR image for TDSR path = os.path.join(tdsr_hr_dir, os.path.basename(file_hr)) TF.to_pil_image(input_img_hr).save(path, 'PNG') # load LR image input_img_lr = Image.open(file_lr) input_img_lr = TF.to_tensor(input_img_lr) # Apply model to generate the noisy resize_img if torch.cuda.is_available(): input_img_lr = input_img_lr.unsqueeze(0).cuda() resize_noisy_img = model_g(input_img_lr).squeeze(0).cpu()
def preprocess_image(image_path): "Load Image, normalize and convert to tensor." img = Image.open(image_path) img_tensor = F.to_tensor(np.float32(img)) return fixed_image_standardization(image_tensor=img_tensor) # in [-1, 1]
def __call__(self, image, target): return F.to_tensor(image), target
import argparse from PIL import Image from torchvision.transforms.functional import to_tensor import torchjpeg.codec parser = argparse.ArgumentParser("Tests the pytorch DCT loader by reading and image, quantizing its pixels, and writing the DCT coefficients to a JPEG") parser.add_argument("input", help="Input image, should be lossless") parser.add_argument("output", help="Output image, must be a JPEG") parser.add_argument("quality", type=int, help="Output quality on the 0-100 scale") args = parser.parse_args() im = to_tensor(Image.open(args.input)) if im.shape[0] > 3: im = im[:3] dimensions, quantization, Y_coefficients, CbCr_coefficients = torchjpeg.codec.quantize_at_quality(im, args.quality) torchjpeg.codec.write_coefficients(args.output, dimensions, quantization, Y_coefficients, CbCr_coefficients)
def __call__(self, results): results['img'] = TF.to_tensor(results['img'].copy()) return results
def __call__(self, image, mask, joints, area): return F.to_tensor(image), mask, joints, area
cur_land[1] = proj_num_rows - 1 - cur_land[1] # Indicators that there is enough of a femur visible in the field of view # that we can use ground truth pose for any additional experiments, such as # evaluating another femur registration method left_femur_good_fov = cur_proj_g[ 'gt-poses/left-femur-good-fov'][()] right_femur_good_fov = cur_proj_g[ 'gt-poses/right-femur-good-fov'][()] # Next the segmentation labels and landmark locations will be overlaid on the projections pil = TF.to_pil_image(cur_proj) pil = pil.convert('RGB') cur_proj = TF.to_tensor(pil) pil = None # alpha blending for segmentation overlay of pixels that are not background # 0 --> seg. not visible, only projection shows # 1 --> only seg. shows, proj. not visible in seg. regions alpha = 0.35 label_colors = [ [0.0, 1.0, 0.0], # green [1.0, 0.0, 0.0], # red [0.0, 0.0, 1.0], # blue [1.0, 1.0, 0.0], # yellow [0.0, 1.0, 1.0], # cyan [1.0, 0.5, 0.0] ] # orange
def test_image(img): global model global params model.eval() # img = Image.open(img_path).convert('RGB') img = cv2.resize(img, dsize=(1280, 720)) img_pil = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_pil = Image.fromarray(img) # w, h = img.size h, w, _ = img.shape print(img.shape) img_pil = F.crop(img_pil, h-640, 0, 640, w) img_pil = F.resize(img_pil, size=(256, 512), interpolation=Image.BILINEAR) input = F.to_tensor(img_pil).float() # print("input:") # print(type(input)) # print("input size:") # print(input.size()) print("##################") input = torch.Tensor([torch.Tensor.numpy(input)]) # print("input:") # print(type(input)) # print("input size:") # print(input.size()) # Reset coordinates x_cal0, x_cal1, x_cal2, x_cal3 = [None]*4 # Put inputs on gpu if possible if not args.no_cuda: input = input.cuda(non_blocking=True).float() # Run model torch.cuda.synchronize() a = time.time() start1 = time.time() beta0, beta1, beta2, beta3, weightmap_zeros, \ output_net, outputs_line, outputs_horizon, output_seg = model(input, gt_line=np.array([1,1]), end_to_end=args.end_to_end, gt=None) torch.cuda.synchronize() b = time.time() # Horizon task & Line classification task if args.clas: horizon_pred = nn.Sigmoid()(outputs_horizon).sum(dim=1) horizon_pred = (torch.round((resize_coordinates(horizon_pred) + 80)/10)*10).int() line_pred = torch.round(nn.Sigmoid()(outputs_line)) else: assert False # Calculate X coordinates x_cal0 = params.compute_coordinates(beta0) x_cal1 = params.compute_coordinates(beta1) x_cal2 = params.compute_coordinates(beta2) x_cal3 = params.compute_coordinates(beta3) lanes_pred = torch.stack((x_cal0, x_cal1, x_cal2, x_cal3), dim=1) print("DL FPS: {0}".format(1.0/(time.time()-start1))) # Check line type branch line_pred = line_pred[:, [1, 2, 0, 3]] lanes_pred[(1 - line_pred[:, :, None]).byte().expand_as(lanes_pred)] = -2 # Check horizon branch bounds = ((horizon_pred - 160) / 10) for k, bound in enumerate(bounds): lanes_pred[k, :, :bound.item()] = -2 # TODO check intersections lanes_pred[lanes_pred > 1279] = -2 lanes_pred[lanes_pred < 0] = -2 lanes_pred = np.int_(np.round(lanes_pred.data.cpu().numpy())).tolist() num_el = input.size(0) for j in range(num_el): lanes_to_write = lanes_pred[j] if args.draw_testset: test = weightmap_zeros[j] weight0= test[0] weight1= test[1] weight2= test[2] weight3= test[3] # img_name = img_path h_samples = [160, 170, 180, 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310, 320, 330, 340, 350, 360, 370, 380, 390, 400, 410, 420, 430, 440, 450, 460, 470, 480, 490, 500, 510, 520, 530, 540, 550, 560, 570, 580, 590, 600, 610, 620, 630, 640, 650, 660, 670, 680, 690, 700, 710] colormap = [(255,0,0), (0,255,0), (255,255,0), (0,0,255), (0, 128, 128)] # with open(img_name, 'rb') as f: # img = np.array(Image.open(f).convert('RGB')) for lane_i in range(len(lanes_to_write)): x_orig = lanes_to_write[lane_i] pt_or = [(xcord, ycord) for (xcord, ycord) in zip(x_orig, h_samples) if xcord!=-2] for point in pt_or: img = cv2.circle(img, tuple(np.int32(point)), thickness=-1, color=colormap[lane_i], radius = 3) # img = Image.fromarray(np.uint8(img)) # img.show() return img
def read_im(): im = Image.open('data/night.jpg') im = TF.crop(im, 16, 0, 704, 1280) im = TF.to_tensor(im) im = im - 0.5 return im[None, ...]
def __call__(self, sample): image, image_seg, label = sample['image'], sample['image_seg'], sample[ 'label'] img = trF.to_tensor(image) img_seg = trF.to_tensor(image_seg) return {'image': img, 'image_seg': img_seg, 'label': label}
def __call__(self, data): data['input'] = F.to_tensor(data['input']).float() data['label'] = F.to_tensor(data['label']).float() return data
def __call__(self, image, *args): return (F.to_tensor(image), ) + args
def get_contour_gain_vs_length(model, device_to_use, g_params, k_idx, ch_mus, ch_sigmas, rslt_dir, c_len_arr, frag_size=np.array([7, 7]), full_tile_size=np.array([14, 14]), img_size=np.array([256, 256, 3]), n_images=50, epsilon=1e-5, iou_results=True): """ :param iou_results: :param c_len_arr: :param rslt_dir: :param epsilon: :param model: :param device_to_use: :param g_params: :param k_idx: :param ch_mus: :param ch_sigmas: :param frag_size: :param full_tile_size: :param img_size: :param n_images: :return: """ global edge_extract_act global cont_int_in_act global cont_int_out_act # tracking variables ------------------------------------------------- iou_arr = [] tgt_n = k_idx max_act_n_idx = g_params[0]['extra_info']['max_active_neuron_idx'] tgt_n_out_acts = np.zeros((n_images, len(c_len_arr))) max_act_n_acts = np.zeros_like(tgt_n_out_acts) # ----------------------------------------------------------------- frag = gabor_fits.get_gabor_fragment(g_params, spatial_size=frag_size) bg = g_params[0]['bg'] for c_len_idx, c_len in enumerate(c_len_arr): print("Processing contour length = {}".format(c_len)) iou = 0 for img_idx in range(n_images): # (1) Create Test Image test_img, test_img_label, contour_frags_starts, end_acc_angle, start_acc_angle = \ fields1993_stimuli.generate_contour_image( frag=frag, frag_params=g_params, c_len=c_len, beta=0, alpha=0, f_tile_size=full_tile_size, img_size=img_size, random_alpha_rot=True, rand_inter_frag_direction_change=True, use_d_jitter=False, bg_frag_relocate=True, bg=bg ) test_img = transform_functional.to_tensor(test_img) test_img_label = torch.from_numpy( np.array(test_img_label)).unsqueeze(0) # # Debug - Plot Test Image # # ------------------------ # if img_idx == 0: # disp_img = np.transpose(test_img.numpy(), axes=(1, 2, 0)) # disp_img = (disp_img - disp_img.min()) / (disp_img.max() - disp_img.min()) * 255. # disp_img = disp_img.astype('uint8') # disp_label = test_img_label.numpy() # # print(disp_label) # print("Label is valid? {}".format(fields1993_stimuli.is_label_valid(disp_label))) # # plt.figure() # plt.imshow(disp_img) # plt.title("Input Image. Contour Length = {}".format(c_len)) # # # Highlight Label Tiles # disp_label_image = fields1993_stimuli.plot_label_on_image( # disp_img, # disp_label, # full_tile_size, # edge_color=(250, 0, 0), # edge_width=2, # display_figure=False # ) # # # Highlight All background Tiles # full_tile_starts = fields1993_stimuli.get_background_tiles_locations( # frag_len=full_tile_size[0], # img_len=img_size[1], # row_offset=0, # space_bw_tiles=0, # tgt_n_visual_rf_start=img_size[0] // 2 - (full_tile_size[0] // 2) # ) # # disp_label_image = fields1993_stimuli.highlight_tiles( # disp_label_image, # full_tile_size, # full_tile_starts, # edge_color=(255, 255, 0)) # # plt.figure() # plt.imshow(disp_label_image) # plt.title("Labeled Image. Countour Length = {}".format(c_len)) # (2) Get output Activations if iou_results: label = test_img_label iou += process_image(model, device_to_use, ch_mus, ch_sigmas, test_img, label) else: label = None process_image(model, device_to_use, ch_mus, ch_sigmas, test_img, label) center_n_acts = \ cont_int_out_act[ 0, :, cont_int_out_act.shape[2] // 2, cont_int_out_act.shape[3] // 2] tgt_n_out_acts[img_idx, c_len_idx] = center_n_acts[tgt_n] max_act_n_acts[img_idx, c_len_idx] = center_n_acts[max_act_n_idx] iou_arr.append(iou / n_images) # # --------------------------------- # import pdb # pdb.set_trace() # plt.close('all') # IOU if iou_results: # print("IoU per length {}".format(iou_arr)) f_title = "Iou vs length - Neuron {}".format(k_idx) f_name = "neuron {}".format(k_idx) plot_iou_vs_contour_length(c_len_arr, iou_arr, rslt_dir, f_title, f_name) # ------------------------------------------- # Gain # ------------------------------------------- # In Li2006, Gain was defined as output of neuron / mean output to noise pattern # where the noise pattern was defined as optimal stimulus at center of RF and all # others fragments were random. This corresponds to resp c_len=x/ mean resp clen=1 tgt_n_avg_noise_resp = np.mean(tgt_n_out_acts[:, 0]) max_active_n_avg_noise_resp = np.mean(max_act_n_acts[:, 0]) tgt_n_gains = tgt_n_out_acts / (tgt_n_avg_noise_resp + epsilon) max_active_n_gains = max_act_n_acts / (max_active_n_avg_noise_resp + epsilon) tgt_n_mean_gain_arr = np.mean(tgt_n_gains, axis=0) tgt_n_std_gain_arr = np.std(tgt_n_gains, axis=0) max_act_n_mean_gain_arr = np.mean(max_active_n_gains, axis=0) max_act_n_std_gain_arr = np.std(max_active_n_gains, axis=0) # ----------------------------------------------------------------------------------- # Plots # ----------------------------------------------------------------------------------- # Gain vs Length # f = plt.figure() f, ax_arr = plt.subplots(1, 2) ax_arr[0].errorbar(c_len_arr, tgt_n_mean_gain_arr, tgt_n_std_gain_arr, label='Target Neuron {}'.format(tgt_n)) ax_arr[1].errorbar(c_len_arr, max_act_n_mean_gain_arr, max_act_n_std_gain_arr, label='Max Active Neuron {}'.format(max_act_n_idx)) ax_arr[0].set_xlabel("Contour Length") ax_arr[1].set_xlabel("Contour Length") ax_arr[0].set_ylabel("Gain") ax_arr[1].set_ylabel("Gain") ax_arr[0].set_ylim(bottom=0) ax_arr[1].set_ylim(bottom=0) ax_arr[0].grid() ax_arr[1].grid() ax_arr[0].legend() ax_arr[1].legend() f.suptitle("Contour Gain Vs length - Neuron {}".format(k_idx)) f.savefig(os.path.join(rslt_dir, 'gain_vs_len.jpg'), format='jpg') plt.close(f) # Output activations vs Length f = plt.figure() plt.errorbar(c_len_arr, np.mean(tgt_n_out_acts, axis=0), np.std(tgt_n_out_acts, axis=0), label='target_neuron_{}'.format(tgt_n)) plt.errorbar(c_len_arr, np.mean(max_act_n_acts, axis=0), np.std(max_act_n_acts, axis=0), label='max_active_neuron_{}'.format(max_act_n_idx)) plt.legend() plt.grid() plt.xlabel("Contour Length") plt.ylabel("Activations") plt.title("Output Activations") f.savefig(os.path.join(rslt_dir, 'output_activations_vs_len.jpg'), format='jpg') plt.close(f) # Save output Activations tgt_n_mean_out_acts = np.mean(tgt_n_out_acts, axis=0) tgt_n_std_out_acts = np.std(tgt_n_out_acts, axis=0) return iou_arr, tgt_n_mean_gain_arr, tgt_n_std_gain_arr, max_act_n_mean_gain_arr, \ max_act_n_std_gain_arr, tgt_n_avg_noise_resp, max_active_n_avg_noise_resp, \ tgt_n_mean_out_acts, tgt_n_std_out_acts
def find_optimal_stimulus(model, device_to_use, k_idx, ch_mus, ch_sigmas, extract_point, frag_size=np.array([7, 7]), img_size=np.array([256, 256, 3])): """ :return: """ global edge_extract_act global cont_int_in_act global cont_int_out_act orient_arr = np.arange(0, 180, 5) img_center = img_size[0:2] // 2 tgt_n_acts = np.zeros((len(base_gabor_parameters), len(orient_arr))) tgt_n_max_act = 0 tgt_n_opt_params = None for base_gp_idx, base_gabor_params in enumerate(base_gabor_parameters): print("Processing Base Gabor Param Set {}".format(base_gp_idx)) for o_idx, orient in enumerate(orient_arr): # Change orientation g_params = copy.deepcopy(base_gabor_params) for c_idx in range(len(g_params)): g_params[c_idx]["theta_deg"] = orient # Create Test Image - Single fragment @ center frag = gabor_fits.get_gabor_fragment(g_params, spatial_size=frag_size) bg = base_gabor_params[0]['bg'] if bg is None: bg = fields1993_stimuli.get_mean_pixel_value_at_boundary(frag) test_img = np.ones(img_size, dtype='uint8') * bg add_one = 1 if frag_size[0] % 2 == 0: add_one = 0 test_img[img_center[0] - frag_size[0] // 2:img_center[0] + frag_size[0] // 2 + add_one, img_center[0] - frag_size[0] // 2:img_center[0] + frag_size[0] // 2 + add_one, :, ] = frag test_img = transform_functional.to_tensor(test_img) # # Debug - Show Test Image # plt.figure() # plt.imshow(np.transpose(test_img,axes=(1, 2, 0))) # plt.title("Input Image - Find optimal stimulus") # import pdb # pdb.set_trace() # Get target activations process_image(model, device_to_use, ch_mus, ch_sigmas, test_img) if extract_point == 'edge_extract_layer_out': center_n_acts = \ edge_extract_act[ 0, :, edge_extract_act.shape[2]//2, edge_extract_act.shape[3]//2] elif extract_point == 'contour_integration_layer_in': center_n_acts = \ cont_int_in_act[ 0, :, cont_int_in_act.shape[2]//2, cont_int_in_act.shape[3]//2] else: # 'contour_integration_layer_out' center_n_acts = \ cont_int_out_act[ 0, :, cont_int_out_act.shape[2]//2, cont_int_out_act.shape[3]//2] tgt_n_act = center_n_acts[k_idx] tgt_n_acts[base_gp_idx, o_idx] = tgt_n_act # # # Debug - Display all channel responses to individual test image # plt.figure() # plt.plot(center_n_acts) # plt.title("Center Neuron Activations. Base Gabor Set {}. Orientation {}".format( # base_gp_idx, orient)) if tgt_n_act > tgt_n_max_act: tgt_n_max_act = tgt_n_act tgt_n_opt_params = copy.deepcopy(g_params) max_active_n = int(np.argmax(center_n_acts)) extra_info = { 'optim_stim_act_value': tgt_n_max_act, 'optim_stim_base_gabor_set': base_gp_idx, 'optim_stim_act_orient': orient, 'max_active_neuron_is_target': (max_active_n == k_idx), 'max_active_neuron_value': center_n_acts[max_active_n], 'max_active_neuron_idx': max_active_n } for item in tgt_n_opt_params: item['extra_info'] = extra_info # # ----------------------------------------- # # Debug - Tuning Curve for Individual base Gabor Params # plt.figure() # plt.plot(orient_arr, tgt_n_acts[base_gp_idx, :]) # plt.title("Neuron {}: responses vs Orientation. Gabor Set {}".format(k_idx, base_gp_idx)) # import pdb # pdb.set_trace() # --------------------------- if tgt_n_opt_params is not None: # Save optimal tuning curve for item in tgt_n_opt_params: opt_base_g_params_set = item['extra_info'][ 'optim_stim_base_gabor_set'] item['extra_info']['orient_tuning_curve_x'] = orient_arr item['extra_info']['orient_tuning_curve_y'] = tgt_n_acts[ opt_base_g_params_set, ] # # Debug: plot tuning curves for all gabor sets # # ------------------------------------------------ # plt.figure() # for base_gp_idx, base_gabor_params in enumerate(base_gabor_parameters): # # if base_gp_idx == tgt_n_opt_params[0]['extra_info']['optim_stim_base_gabor_set']: # line_width = 5 # plt.plot( # tgt_n_opt_params[0]['extra_info']['optim_stim_act_orient'], # tgt_n_opt_params[0]['extra_info']['max_active_neuron_value'], # marker='x', markersize=10, # label='max active neuron Index {}'.format( # tgt_n_opt_params[0]['extra_info']['max_active_neuron_idx']) # ) # else: # line_width = 2 # # plt.plot( # orient_arr, tgt_n_acts[base_gp_idx, ], # label='param set {}'.format(base_gp_idx), linewidth=line_width # ) # # plt.legend() # plt.grid(True) # plt.title( # "Kernel {}. Max Active Base Set {}. Is most responsive to this stimulus {}".format( # k_idx, # tgt_n_opt_params[0]['extra_info']['optim_stim_base_gabor_set'], # tgt_n_opt_params[0]['extra_info']['max_active_neuron_is_target']) # ) # # import pdb # pdb.set_trace() return tgt_n_opt_params
def to_tensor(self, img): img = Image.fromarray(img) img_t = F.to_tensor(img).float() return img_t
def __call__(self, image, target): image = F.to_tensor(image).contiguous() return image, target
def load(cfg): all_imgs, all_poses = [], [] counts = [0] for s in ['train', 'val', 'test']: meta = None fname = os.path.join(cfg.dataset.path, 'transforms_{}.json'.format(s)) with open(fname, 'r') as fp: meta = json.load(fp) imgs = [] poses = [] if s == 'train' or cfg.dataset.testskip < 2: skip = 1 else: skip = cfg.dataset.testskip for frame in meta['frames'][::skip]: fname = os.path.join(cfg.dataset.path, frame['file_path'] + '.png') im = Image.open(fname) if cfg.dataset.half_res: im = TF.resize(im, (400, 400)) imgs.append(TF.to_tensor(im)) poses.append(torch.Tensor(frame['transform_matrix'])) # keep all 4 channels (RGBA) imgs = torch.stack(imgs, dim=0).permute(0, 2, 3, 1) poses = torch.stack(poses, dim=0) counts.append(counts[-1] + imgs.shape[0]) all_imgs.append(imgs) all_poses.append(poses) i_split = [torch.arange(counts[i], counts[i + 1]) for i in range(3)] imgs = torch.cat(all_imgs, dim=0) poses = torch.cat(all_poses, dim=0) H, W = imgs.shape[1:3] camera_angle_x = float(meta['camera_angle_x']) focal = .5 * W / torch.tan(torch.Tensor([.5 * camera_angle_x])) hwf = [int(H), int(W), focal.numpy()[0]] render_poses = torch.stack([ pose_spherical(angle, -30., 4.) for angle in torch.linspace(-180., 180., 41)[:-1] ], dim=0) if cfg.train.white_background: imgs = imgs[..., :3] * imgs[..., -1:] + (1. - imgs[..., -1:]) else: imgs = imgs[..., :3] print( 'Loaded blender', cfg.dataset.path, imgs.shape, poses.shape, render_poses.shape, hwf ) return imgs, poses, render_poses, i_split, hwf
def data_transform(img, im_size): img = img.resize(im_size, Image.BILINEAR) img = F.to_tensor(img) # convert to tensor (values between 0 and 1) img = F.normalize(img, MEAN, STD) # normalize the tensor return img
def __call__(self, img, target): return F.to_tensor(img), target
def __call__(self, images, intrinsics): tensors = [F.to_tensor(im) for im in images] return tensors, torch.from_numpy(intrinsics)
def __call__(self, sample): return {'original_image': F.to_tensor(sample['original_image']), 'downsampled_image': F.to_tensor(sample['downsampled_image']), 'label': sample['label']}
def transform_image(self, image): #img_resized_tensor = TF.to_tensor(image) normalize_img = (image - 127.5) / 127.5 return TF.to_tensor(normalize_img).type(torch.FloatTensor)
def to_tensor(im1, im2): a, b = im1 a = tvF.to_tensor(a) b = tvF.to_tensor(b) im2 = tvF.to_tensor(im2) return torch.cat([a, b]), im2
def resize_batch(self, label, size): """Resize and convert to tensor""" resized = F.resize(label, size, interpolation=Image.NEAREST) return F.to_tensor(resized) * 255
def main(): if torch.cuda.is_available(): device = torch.device('cuda') else: device = torch.device('cpu') parser = argparse.ArgumentParser(description='Test trained models') parser.add_argument( '--options-file', '-o', default='options-and-config.pickle', type=str, help='The file where the simulation options are stored.') parser.add_argument('--checkpoint-file', '-c', required=True, type=str, help='Model checkpoint file') parser.add_argument('--batch-size', '-b', default=12, type=int, help='The batch size.') parser.add_argument('--source-image', '-s', required=True, type=str, help='The image to watermark') # parser.add_argument('--times', '-t', default=10, type=int, # help='Number iterations (insert watermark->extract).') args = parser.parse_args() train_options, hidden_config, noise_config = utils.load_options( args.options_file) noiser = Noiser(noise_config) checkpoint = torch.load(args.checkpoint_file) hidden_net = Hidden(hidden_config, device, noiser, None) utils.model_from_checkpoint(hidden_net, checkpoint) image_pil = Image.open(args.source_image) image = randomCrop(np.array(image_pil), hidden_config.H, hidden_config.W) image_tensor = TF.to_tensor(image).to(device) image_tensor = image_tensor * 2 - 1 # transform from [0, 1] to [-1, 1] image_tensor.unsqueeze_(0) # for t in range(args.times): message = torch.Tensor( np.random.choice( [0, 1], (image_tensor.shape[0], hidden_config.message_length))).to(device) losses, (encoded_images, noised_images, decoded_messages) = hidden_net.validate_on_batch( [image_tensor, message]) decoded_rounded = decoded_messages.detach().cpu().numpy().round().clip( 0, 1) message_detached = message.detach().cpu().numpy() print('original: {}'.format(message_detached)) print('decoded : {}'.format(decoded_rounded)) print('error : {:.3f}'.format( np.mean(np.abs(decoded_rounded - message_detached)))) utils.save_images(image_tensor.cpu(), encoded_images.cpu(), 'test', '.', resize_to=(256, 256))
def compute_rcnn_embs(ds_root, mode='train', debug=False, world_size=1, rank=0): # type: (str, str, bool, int, int) -> None """ :param ds_root: Cityflow-NL dataset root (absolute path) :return: """ print(f"Started process {rank}") # Load BERT model gpu_id = rank % torch.cuda.device_count() model = fasterrcnn_resnet50_fpn(pretrained=True).to(f"cuda:{gpu_id}") model.eval() userful_classes = [1, 2, 3, 4, 5, 7, 8, 10, 11, 12, 13, 14] # Load train json if mode == 'train': tracks_root = os.path.join(ds_root, 'data/train-tracks.json') elif mode == 'test': tracks_root = os.path.join(ds_root, 'data/test-tracks.json') else: raise Exception(f"Only train and test are valid split/modes") with open(tracks_root, "r") as f: tracks = json.load(f) keys = list(tracks.keys()) # Output out_dir = os.path.join(ds_root, f"rcnn_embs_{mode}") if os.path.isdir(out_dir): # Remove already computed keys prec_keys = [k.split('.')[0] for k in os.listdir(out_dir)] keys = [k for k in keys if k not in prec_keys] for key_idx, id in tqdm(enumerate(keys), total=len(keys)): if (key_idx % world_size) != rank: continue result = { "frames": [], "detected_boxes": [], "features": [], "ego_ind": [] } frames = tracks[id]['frames'] ego = tracks[id]['boxes'] for frame_path, ego in zip(frames, ego): frame_abspath = os.path.join(ds_root, frame_path) frame_orig = Image.open(frame_abspath) frame = to_tensor(frame_orig).to(f"cuda:{gpu_id}") with torch.no_grad(): # object detection predictions = model([ frame, ]) boxes = predictions[0]["boxes"].cpu() features = predictions[0]["features"].cpu() labels = predictions[0]["labels"].cpu() scores = predictions[0]["scores"].cpu() # Filter boxes based on class and confidence score labels_filter = [ i for i, l in enumerate(labels) if l in userful_classes ] scores_filter = [i for i, s in enumerate(scores) if s > 0.65] indices = [ i for i in range(len(boxes)) if i in labels_filter and i in scores_filter ] # determine index of the ego vehicle (if detected) ego_bb = np.array(ego)[np.newaxis, :] # (x,y,w,h) -> (x1,y1,x2,y2) ego_bb[:, 2], ego_bb[:, 3] = ego_bb[:, 2] + ego_bb[:, 0], ego_bb[:, 3] + ego_bb[:, 1] if len(labels) > 1: # at least one box is detected ious = iou_of(boxes.numpy(), ego_bb) ego_ind = np.argmax(ious) if ious[ego_ind] < 0.2: ego_ind = -1 else: if ego_ind not in indices: indices.append(ego_ind.item()) ego_ind = len(indices) - 1 # the last element else: ego_ind = indices.index( ego_ind) # position relative to indices not labels else: ego_ind = -1 # prepare result boxes[:, 2], boxes[:, 3] = boxes[:, 2] - boxes[:, 0], boxes[:, 3] - boxes[:, 1] # (x1,y1,x2,y2) -> (x,y,w,h) boxes = torch.cat([labels[:, None], boxes, scores[:, None]], dim=-1) # class, (x,y,w,h), score if len(indices) == 0: print(1) # filter boxes = boxes[indices] features = features[indices] result["frames"].append(frame_path) result["detected_boxes"].append(boxes) result["features"].append(features) result["ego_ind"].append(ego_ind) # save out_file = os.path.join(out_dir, f'{id}.pt') torch.save(result, out_file)
def evaluate(cls, env, model, r_idx, resnet, traj_data, args, lock, successes, failures, results): # reset model model.reset() # setup scene reward_type = 'dense' cls.setup_scene(env, traj_data, r_idx, args, reward_type=reward_type) # extract language features feat = model.featurize([(traj_data, False)], load_mask=False) # goal instr goal_instr = traj_data['turk_annotations']['anns'][r_idx]['task_desc'] maskrcnn = maskrcnn_resnet50_fpn(num_classes=119) maskrcnn.eval() maskrcnn.load_state_dict(torch.load('weight_maskrcnn.pt')) maskrcnn = maskrcnn.cuda() prev_image = None prev_action = None nav_actions = [ 'MoveAhead_25', 'RotateLeft_90', 'RotateRight_90', 'LookDown_15', 'LookUp_15' ] prev_class = 0 prev_center = torch.zeros(2) done, success = False, False fails = 0 t = 0 reward = 0 while not done: # break if max_steps reached if t >= args.max_steps: break # extract visual features curr_image = Image.fromarray(np.uint8(env.last_event.frame)) feat['frames'] = resnet.featurize([curr_image], batch=1).unsqueeze(0) # forward model m_out = model.step(feat) m_pred = model.extract_preds(m_out, [(traj_data, False)], feat, clean_special_tokens=False) m_pred = list(m_pred.values())[0] # action prediction action = m_pred['action_low'] if prev_image == curr_image and prev_action == action and prev_action in nav_actions and action in nav_actions and action == 'MoveAhead_25': dist_action = m_out['out_action_low'][0][0].detach().cpu() idx_rotateR = model.vocab['action_low'].word2index( 'RotateRight_90') idx_rotateL = model.vocab['action_low'].word2index( 'RotateLeft_90') action = 'RotateLeft_90' if dist_action[ idx_rotateL] > dist_action[ idx_rotateR] else 'RotateRight_90' if action == cls.STOP_TOKEN: print("\tpredicted STOP") break # mask prediction mask = None if model.has_interaction(action): class_dist = m_pred['action_low_mask'][0] pred_class = np.argmax(class_dist) # mask generation with torch.no_grad(): out = maskrcnn([to_tensor(curr_image).cuda()])[0] for k in out: out[k] = out[k].detach().cpu() if sum(out['labels'] == pred_class) == 0: mask = np.zeros( (constants.SCREEN_WIDTH, constants.SCREEN_HEIGHT)) else: masks = out['masks'][out['labels'] == pred_class].detach().cpu() scores = out['scores'][out['labels'] == pred_class].detach().cpu() # Instance selection based on the minimum distance between the prev. and cur. instance of a same class. if prev_class != pred_class: scores, indices = scores.sort(descending=True) masks = masks[indices] prev_class = pred_class prev_center = masks[0].squeeze( dim=0).nonzero().double().mean(dim=0) else: cur_centers = torch.stack([ m.nonzero().double().mean(dim=0) for m in masks.squeeze(dim=1) ]) distances = ((cur_centers - prev_center)**2).sum(dim=1) distances, indices = distances.sort() masks = masks[indices] prev_center = cur_centers[0] mask = np.squeeze(masks[0].numpy(), axis=0) # print action if args.debug: print(action) # use predicted action and mask (if available) to interact with the env t_success, _, _, err, _ = env.va_interact( action, interact_mask=mask, smooth_nav=args.smooth_nav, debug=args.debug) if not t_success: fails += 1 if fails >= args.max_fails: print("Interact API failed %d times" % fails + "; latest error '%s'" % err) break # next time-step t_reward, t_done = env.get_transition_reward() reward += t_reward t += 1 prev_image = curr_image prev_action = action # check if goal was satisfied goal_satisfied = env.get_goal_satisfied() if goal_satisfied: print("Goal Reached") success = True # goal_conditions pcs = env.get_goal_conditions_met() goal_condition_success_rate = pcs[0] / float(pcs[1]) # SPL path_len_weight = len(traj_data['plan']['low_actions']) s_spl = (1 if goal_satisfied else 0) * min( 1., path_len_weight / (float(t) + 1e-4)) pc_spl = goal_condition_success_rate * min( 1., path_len_weight / (float(t) + 1e-4)) # path length weighted SPL plw_s_spl = s_spl * path_len_weight plw_pc_spl = pc_spl * path_len_weight # log success/fails lock.acquire() log_entry = { 'trial': traj_data['task_id'], 'type': traj_data['task_type'], 'repeat_idx': int(r_idx), 'goal_instr': goal_instr, 'completed_goal_conditions': int(pcs[0]), 'total_goal_conditions': int(pcs[1]), 'goal_condition_success': float(goal_condition_success_rate), 'success_spl': float(s_spl), 'path_len_weighted_success_spl': float(plw_s_spl), 'goal_condition_spl': float(pc_spl), 'path_len_weighted_goal_condition_spl': float(plw_pc_spl), 'path_len_weight': int(path_len_weight), 'reward': float(reward) } if success: successes.append(log_entry) else: failures.append(log_entry) # overall results results['all'] = cls.get_metrics(successes, failures) print("-------------") print("SR: %d/%d = %.5f" % (results['all']['success']['num_successes'], results['all']['success']['num_evals'], results['all']['success']['success_rate'])) print("PLW SR: %.5f" % (results['all']['path_length_weighted_success_rate'])) print( "GC: %d/%d = %.5f" % (results['all']['goal_condition_success'] ['completed_goal_conditions'], results['all']['goal_condition_success']['total_goal_conditions'], results['all']['goal_condition_success'] ['goal_condition_success_rate'])) print( "PLW GC: %.5f" % (results['all']['path_length_weighted_goal_condition_success_rate'] )) print("-------------") # task type specific results task_types = [ 'pick_and_place_simple', 'pick_clean_then_place_in_recep', 'pick_heat_then_place_in_recep', 'pick_cool_then_place_in_recep', 'pick_two_obj_and_place', 'look_at_obj_in_light', 'pick_and_place_with_movable_recep' ] for task_type in task_types: task_successes = [ s for s in (list(successes)) if s['type'] == task_type ] task_failures = [ f for f in (list(failures)) if f['type'] == task_type ] if len(task_successes) > 0 or len(task_failures) > 0: results[task_type] = cls.get_metrics(task_successes, task_failures) else: results[task_type] = {} lock.release()
def __call__(self, image, target): image = F.to_tensor(image) return image, target
def act(self, image, player_info): """ :param image: numpy array of shape (300, 400, 3) :param player_info: pystk.Player object for the current kart. return: Dict describing the action """ global FIRST, IM, BACKUP score_goal = None if self.team == 0: score_goal = GOAL_0 else: score_goal = GOAL_1 front = np.array(HACK_DICT['kart'].front)[[0, 2]] kart = np.array(HACK_DICT['kart'].location)[[0, 2]] puck = np.array(HACK_DICT['state'].soccer.ball.location)[[0, 2]] # player vector u = front - kart u = u / np.linalg.norm(u) # player to puck v = puck - kart v = v / np.linalg.norm(v) # goal to puck w = puck - score_goal w = w / np.linalg.norm(w) # adjust for scoring v2 = v + (w / 2) v2 = v2 / np.linalg.norm(v2) theta = np.arccos(np.dot(u, v2)) signed_theta = -np.sign(np.cross(u, v2)) * theta steer = 5 * signed_theta accel = random.uniform(0.4, 0.8) brake = False drift = False if np.degrees(theta) > 20 and np.degrees(theta) < 90: drift = True if np.degrees(theta) > 90 and not BACKUP: BACKUP = True if BACKUP: if np.degrees(theta) > 30: accel = 0 brake = True steer = -steer else: BACKUP = False p_info = [] mask = (HACK_DICT['race'].render_data[self.player_id].instance == 134217729) mask = F.to_tensor(mask) nz = torch.nonzero(mask) if nz.numel() == 0: HACK_DICT['player_bool_%d' % self.player_id] = False else: HACK_DICT['player_bool_%d' % self.player_id] = True p_info.extend(kart) p_info.extend(u) peak = self.extract_peak(nz) p_info.extend(peak) #test = self.get_puck_coords(image) #p_info.extend(self.get_puck_coords(image)) HACK_DICT['player_info_%d' % self.player_id] = p_info HACK_DICT['puck_vec_%d' % self.player_id] = (puck - kart) # visualize the controller in real time # if player_info.kart.id == 0: # ax1 = plt.subplot(111) # if FIRST: # IM = ax1.imshow(image) # FIRST = False # else: # IM.set_data(image) # # print('p_to_fron: ', u) # # print('WORLD p_to_puck: ', (puck - kart)) # # puck_xy = self.get_puck_coords(image) # # screen_p_to_puck = np.array([200, 180]) - puck_xy # # screen_p_to_puck[0] = -screen_p_to_puck[0] # # print('SCREEN p_to_puck: ', screen_p_to_puck) # # print('puck_to_g: ', w) # # print('aim_vec__: ', v2) # # print('signed theta: ', signed_theta) # # print('steer: ', steer) # # print('loc: ' + str(kart)) # # print('-----------------------') # plt.pause(0.001) action = { 'steer': steer, 'acceleration': accel, 'brake': brake, 'drift': drift, 'nitro': False, 'rescue': False } return action
height = scene_size[1] device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") traj_old = traj.clone().numpy() traj[:, 0] = traj[:, 0] - width // 2 traj[:, 1] = traj[:, 1] - height // 2 traj = traj.unsqueeze(0) traj = traj.permute(0, 2, 1) traj = traj.to(device) rel_traj = absolute_to_rel_traj(traj).to(device) logger.info("Moving static image to the device") image = Image.open(path_of_static_image) scene = TF.to_tensor(image) scene.unsqueeze_(0) scene = scene.to(device) # Create model. desire = DESIRE(IOCParams(), SGMParams()) logger.info("Created model") logger.debug(desire) logger.info("Moving to device: {}".format(device)) desire.to(device) logger.info("Loading state dict") state_dict_checkpoint = torch.load(restore_model_path) desire.load_state_dict(state_dict_checkpoint) state_dict_checkpoint = torch.load(restore_model_path)