示例#1
0
 def verify_img_data(img_data, expected_output, mode):
     if mode is None:
         img = transforms.ToPILImage()(img_data)
         assert img.mode == 'RGB'  # default should assume RGB
     else:
         img = transforms.ToPILImage(mode=mode)(img_data)
         assert img.mode == mode
     split = img.split()
     for i in range(3):
         assert np.allclose(expected_output[i].numpy(), F.to_tensor(split[i]).numpy())
示例#2
0
model_g = DSGAN.Generator(n_res_blocks=opt.num_res_blocks)
model_g.load_state_dict(torch.load(model_path), strict=True)
model_g.eval()
model_g = model_g.cuda()
print('# generator parameters:',
      sum(param.numel() for param in model_g.parameters()))

# generate the noisy images
idx = 0
with torch.no_grad():
    for file_hr, file_lr in zip(target_hr_files, target_lr_files):
        idx += 1
        print('Image No.:', idx)
        # load HR image
        input_img_hr = Image.open(file_hr)
        input_img_hr = TF.to_tensor(input_img_hr)

        # Save input_img as HR image for TDSR
        path = os.path.join(tdsr_hr_dir, os.path.basename(file_hr))
        TF.to_pil_image(input_img_hr).save(path, 'PNG')

        # load LR image
        input_img_lr = Image.open(file_lr)
        input_img_lr = TF.to_tensor(input_img_lr)

        # Apply model to generate the noisy resize_img
        if torch.cuda.is_available():
            input_img_lr = input_img_lr.unsqueeze(0).cuda()

        resize_noisy_img = model_g(input_img_lr).squeeze(0).cpu()
示例#3
0
def preprocess_image(image_path):
    "Load Image, normalize and convert to tensor."
    img = Image.open(image_path)
    img_tensor = F.to_tensor(np.float32(img))
    return fixed_image_standardization(image_tensor=img_tensor)  # in [-1, 1]
示例#4
0
 def __call__(self, image, target):
     return F.to_tensor(image), target
示例#5
0
import argparse

from PIL import Image
from torchvision.transforms.functional import to_tensor

import torchjpeg.codec

parser = argparse.ArgumentParser("Tests the pytorch DCT loader by reading and image, quantizing its pixels, and writing the DCT coefficients to a JPEG")
parser.add_argument("input", help="Input image, should be lossless")
parser.add_argument("output", help="Output image, must be a JPEG")
parser.add_argument("quality", type=int, help="Output quality on the 0-100 scale")
args = parser.parse_args()

im = to_tensor(Image.open(args.input))

if im.shape[0] > 3:
    im = im[:3]

dimensions, quantization, Y_coefficients, CbCr_coefficients = torchjpeg.codec.quantize_at_quality(im, args.quality)
torchjpeg.codec.write_coefficients(args.output, dimensions, quantization, Y_coefficients, CbCr_coefficients)
示例#6
0
    def __call__(self, results):
        results['img'] = TF.to_tensor(results['img'].copy())

        return results
示例#7
0
 def __call__(self, image, mask, joints, area):
     return F.to_tensor(image), mask, joints, area
示例#8
0
                        cur_land[1] = proj_num_rows - 1 - cur_land[1]

                # Indicators that there is enough of a femur visible in the field of view
                # that we can use ground truth pose for any additional experiments, such as
                # evaluating another femur registration method
                left_femur_good_fov = cur_proj_g[
                    'gt-poses/left-femur-good-fov'][()]
                right_femur_good_fov = cur_proj_g[
                    'gt-poses/right-femur-good-fov'][()]

                # Next the segmentation labels and landmark locations will be overlaid on the projections

                pil = TF.to_pil_image(cur_proj)
                pil = pil.convert('RGB')

                cur_proj = TF.to_tensor(pil)
                pil = None

                # alpha blending for segmentation overlay of pixels that are not background
                # 0 --> seg. not visible, only projection shows
                # 1 --> only seg. shows, proj. not visible in seg. regions
                alpha = 0.35

                label_colors = [
                    [0.0, 1.0, 0.0],  # green
                    [1.0, 0.0, 0.0],  # red
                    [0.0, 0.0, 1.0],  # blue
                    [1.0, 1.0, 0.0],  # yellow
                    [0.0, 1.0, 1.0],  # cyan
                    [1.0, 0.5, 0.0]
                ]  # orange
def test_image(img):
    global model
    global params

    model.eval()

    # img = Image.open(img_path).convert('RGB')
    img = cv2.resize(img, dsize=(1280, 720))
    img_pil = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_pil = Image.fromarray(img)

    # w, h = img.size
    h, w, _ = img.shape
    print(img.shape)

    img_pil = F.crop(img_pil, h-640, 0, 640, w)
    img_pil = F.resize(img_pil, size=(256, 512), interpolation=Image.BILINEAR)
    input = F.to_tensor(img_pil).float()
    # print("input:")
    # print(type(input))
    # print("input size:")
    # print(input.size())

    print("##################")

    input = torch.Tensor([torch.Tensor.numpy(input)])
    # print("input:")
    # print(type(input))
    # print("input size:")
    # print(input.size())

    # Reset coordinates
    x_cal0, x_cal1, x_cal2, x_cal3 = [None]*4

    # Put inputs on gpu if possible
    if not args.no_cuda:
        input = input.cuda(non_blocking=True).float()

    # Run model
    torch.cuda.synchronize()
    a = time.time()
    start1 = time.time()
    beta0, beta1, beta2, beta3, weightmap_zeros, \
                    output_net, outputs_line, outputs_horizon, output_seg = model(input, gt_line=np.array([1,1]), 
                                                                                  end_to_end=args.end_to_end, gt=None)
    
    torch.cuda.synchronize()
    b = time.time()

    # Horizon task & Line classification task
    if args.clas:
        horizon_pred = nn.Sigmoid()(outputs_horizon).sum(dim=1)
        horizon_pred = (torch.round((resize_coordinates(horizon_pred) + 80)/10)*10).int()
        line_pred = torch.round(nn.Sigmoid()(outputs_line))
    else:
        assert False
    
    # Calculate X coordinates
    x_cal0 = params.compute_coordinates(beta0)
    x_cal1 = params.compute_coordinates(beta1)
    x_cal2 = params.compute_coordinates(beta2)
    x_cal3 = params.compute_coordinates(beta3)
    lanes_pred = torch.stack((x_cal0, x_cal1, x_cal2, x_cal3), dim=1)

    print("DL FPS: {0}".format(1.0/(time.time()-start1)))

    # Check line type branch
    line_pred = line_pred[:, [1, 2, 0, 3]]
    lanes_pred[(1 - line_pred[:, :, None]).byte().expand_as(lanes_pred)] = -2

    # Check horizon branch
    bounds = ((horizon_pred - 160) / 10)
    for k, bound in enumerate(bounds):
        lanes_pred[k, :, :bound.item()] = -2

    # TODO check intersections
    lanes_pred[lanes_pred > 1279] = -2
    lanes_pred[lanes_pred < 0] = -2

    lanes_pred = np.int_(np.round(lanes_pred.data.cpu().numpy())).tolist()
    num_el = input.size(0)

    for j in range(num_el):
        lanes_to_write = lanes_pred[j]
        
        if args.draw_testset:
            test = weightmap_zeros[j]
            weight0= test[0]
            weight1= test[1]
            weight2= test[2]
            weight3= test[3]
            
            # img_name = img_path
            h_samples = [160, 170, 180, 190, 200, 210, 220, 230, 240, 250, 260, 270, 280, 290, 300, 310, 320, 330, 340, 350, 360, 370, 380, 390, 400, 410, 420, 430, 440, 450, 460, 470, 480, 490, 500, 510, 520, 530, 540, 550, 560, 570, 580, 590, 600, 610, 620, 630, 640, 650, 660, 670, 680, 690, 700, 710]
            colormap = [(255,0,0), (0,255,0), (255,255,0), (0,0,255), (0, 128, 128)]

            # with open(img_name, 'rb') as f:
            #     img = np.array(Image.open(f).convert('RGB'))
            for lane_i in range(len(lanes_to_write)):
                x_orig = lanes_to_write[lane_i]
                pt_or = [(xcord, ycord) for (xcord, ycord) in zip(x_orig, h_samples) if xcord!=-2]
                for point in pt_or:
                    img = cv2.circle(img, tuple(np.int32(point)), thickness=-1, color=colormap[lane_i], radius = 3)
            # img = Image.fromarray(np.uint8(img))
            # img.show()
            return img
示例#10
0
文件: eval.py 项目: brianlan/HR-Net
def read_im():
    im = Image.open('data/night.jpg')
    im = TF.crop(im, 16, 0, 704, 1280)
    im = TF.to_tensor(im)
    im = im - 0.5
    return im[None, ...]
示例#11
0
 def __call__(self, sample):
     image, image_seg, label = sample['image'], sample['image_seg'], sample[
         'label']
     img = trF.to_tensor(image)
     img_seg = trF.to_tensor(image_seg)
     return {'image': img, 'image_seg': img_seg, 'label': label}
示例#12
0
 def __call__(self, data):
     data['input'] = F.to_tensor(data['input']).float()
     data['label'] = F.to_tensor(data['label']).float()
     return data
示例#13
0
 def __call__(self, image, *args):
     return (F.to_tensor(image), ) + args
示例#14
0
def get_contour_gain_vs_length(model,
                               device_to_use,
                               g_params,
                               k_idx,
                               ch_mus,
                               ch_sigmas,
                               rslt_dir,
                               c_len_arr,
                               frag_size=np.array([7, 7]),
                               full_tile_size=np.array([14, 14]),
                               img_size=np.array([256, 256, 3]),
                               n_images=50,
                               epsilon=1e-5,
                               iou_results=True):
    """

    :param iou_results:
    :param c_len_arr:
    :param rslt_dir:
    :param epsilon:
    :param model:
    :param device_to_use:
    :param g_params:
    :param k_idx:
    :param ch_mus:
    :param ch_sigmas:
    :param frag_size:
    :param full_tile_size:
    :param img_size:
    :param n_images:
    :return:
    """
    global edge_extract_act
    global cont_int_in_act
    global cont_int_out_act

    # tracking variables  -------------------------------------------------
    iou_arr = []

    tgt_n = k_idx
    max_act_n_idx = g_params[0]['extra_info']['max_active_neuron_idx']

    tgt_n_out_acts = np.zeros((n_images, len(c_len_arr)))
    max_act_n_acts = np.zeros_like(tgt_n_out_acts)
    # -----------------------------------------------------------------
    frag = gabor_fits.get_gabor_fragment(g_params, spatial_size=frag_size)
    bg = g_params[0]['bg']

    for c_len_idx, c_len in enumerate(c_len_arr):
        print("Processing contour length = {}".format(c_len))
        iou = 0

        for img_idx in range(n_images):

            # (1) Create Test Image
            test_img, test_img_label, contour_frags_starts, end_acc_angle, start_acc_angle = \
                fields1993_stimuli.generate_contour_image(
                    frag=frag,
                    frag_params=g_params,
                    c_len=c_len,
                    beta=0,
                    alpha=0,
                    f_tile_size=full_tile_size,
                    img_size=img_size,
                    random_alpha_rot=True,
                    rand_inter_frag_direction_change=True,
                    use_d_jitter=False,
                    bg_frag_relocate=True,
                    bg=bg
                )

            test_img = transform_functional.to_tensor(test_img)
            test_img_label = torch.from_numpy(
                np.array(test_img_label)).unsqueeze(0)

            # # Debug - Plot Test Image
            # # ------------------------
            # if img_idx == 0:
            #     disp_img = np.transpose(test_img.numpy(), axes=(1, 2, 0))
            #     disp_img = (disp_img - disp_img.min()) / (disp_img.max() - disp_img.min()) * 255.
            #     disp_img = disp_img.astype('uint8')
            #     disp_label = test_img_label.numpy()
            #
            #     print(disp_label)
            #     print("Label is valid? {}".format(fields1993_stimuli.is_label_valid(disp_label)))
            #
            #     plt.figure()
            #     plt.imshow(disp_img)
            #     plt.title("Input Image. Contour Length = {}".format(c_len))
            #
            #     # Highlight Label Tiles
            #     disp_label_image = fields1993_stimuli.plot_label_on_image(
            #         disp_img,
            #         disp_label,
            #         full_tile_size,
            #         edge_color=(250, 0, 0),
            #         edge_width=2,
            #         display_figure=False
            #     )
            #
            #     # Highlight All background Tiles
            #     full_tile_starts = fields1993_stimuli.get_background_tiles_locations(
            #         frag_len=full_tile_size[0],
            #         img_len=img_size[1],
            #         row_offset=0,
            #         space_bw_tiles=0,
            #         tgt_n_visual_rf_start=img_size[0] // 2 - (full_tile_size[0] // 2)
            #     )
            #
            #     disp_label_image = fields1993_stimuli.highlight_tiles(
            #         disp_label_image,
            #         full_tile_size,
            #         full_tile_starts,
            #         edge_color=(255, 255, 0))
            #
            #     plt.figure()
            #     plt.imshow(disp_label_image)
            #     plt.title("Labeled Image. Countour Length = {}".format(c_len))

            # (2) Get output Activations
            if iou_results:
                label = test_img_label
                iou += process_image(model, device_to_use, ch_mus, ch_sigmas,
                                     test_img, label)
            else:
                label = None
                process_image(model, device_to_use, ch_mus, ch_sigmas,
                              test_img, label)

            center_n_acts = \
                cont_int_out_act[
                    0, :, cont_int_out_act.shape[2] // 2, cont_int_out_act.shape[3] // 2]

            tgt_n_out_acts[img_idx, c_len_idx] = center_n_acts[tgt_n]
            max_act_n_acts[img_idx, c_len_idx] = center_n_acts[max_act_n_idx]

        iou_arr.append(iou / n_images)

    # # ---------------------------------
    # import pdb
    # pdb.set_trace()
    # plt.close('all')

    # IOU
    if iou_results:
        # print("IoU per length {}".format(iou_arr))
        f_title = "Iou vs length - Neuron {}".format(k_idx)
        f_name = "neuron {}".format(k_idx)
        plot_iou_vs_contour_length(c_len_arr, iou_arr, rslt_dir, f_title,
                                   f_name)

    # -------------------------------------------
    # Gain
    # -------------------------------------------
    # In Li2006, Gain was defined as output of neuron / mean output to noise pattern
    # where the noise pattern was defined as optimal stimulus at center of RF and all
    # others fragments were random. This corresponds to resp c_len=x/ mean resp clen=1
    tgt_n_avg_noise_resp = np.mean(tgt_n_out_acts[:, 0])
    max_active_n_avg_noise_resp = np.mean(max_act_n_acts[:, 0])

    tgt_n_gains = tgt_n_out_acts / (tgt_n_avg_noise_resp + epsilon)
    max_active_n_gains = max_act_n_acts / (max_active_n_avg_noise_resp +
                                           epsilon)

    tgt_n_mean_gain_arr = np.mean(tgt_n_gains, axis=0)
    tgt_n_std_gain_arr = np.std(tgt_n_gains, axis=0)

    max_act_n_mean_gain_arr = np.mean(max_active_n_gains, axis=0)
    max_act_n_std_gain_arr = np.std(max_active_n_gains, axis=0)

    # -----------------------------------------------------------------------------------
    # Plots
    # -----------------------------------------------------------------------------------
    # Gain vs Length
    # f = plt.figure()
    f, ax_arr = plt.subplots(1, 2)
    ax_arr[0].errorbar(c_len_arr,
                       tgt_n_mean_gain_arr,
                       tgt_n_std_gain_arr,
                       label='Target Neuron {}'.format(tgt_n))
    ax_arr[1].errorbar(c_len_arr,
                       max_act_n_mean_gain_arr,
                       max_act_n_std_gain_arr,
                       label='Max Active Neuron {}'.format(max_act_n_idx))
    ax_arr[0].set_xlabel("Contour Length")
    ax_arr[1].set_xlabel("Contour Length")
    ax_arr[0].set_ylabel("Gain")
    ax_arr[1].set_ylabel("Gain")
    ax_arr[0].set_ylim(bottom=0)
    ax_arr[1].set_ylim(bottom=0)
    ax_arr[0].grid()
    ax_arr[1].grid()
    ax_arr[0].legend()
    ax_arr[1].legend()
    f.suptitle("Contour Gain Vs length - Neuron {}".format(k_idx))
    f.savefig(os.path.join(rslt_dir, 'gain_vs_len.jpg'), format='jpg')
    plt.close(f)

    # Output activations vs Length
    f = plt.figure()
    plt.errorbar(c_len_arr,
                 np.mean(tgt_n_out_acts, axis=0),
                 np.std(tgt_n_out_acts, axis=0),
                 label='target_neuron_{}'.format(tgt_n))
    plt.errorbar(c_len_arr,
                 np.mean(max_act_n_acts, axis=0),
                 np.std(max_act_n_acts, axis=0),
                 label='max_active_neuron_{}'.format(max_act_n_idx))
    plt.legend()
    plt.grid()
    plt.xlabel("Contour Length")
    plt.ylabel("Activations")
    plt.title("Output Activations")
    f.savefig(os.path.join(rslt_dir, 'output_activations_vs_len.jpg'),
              format='jpg')
    plt.close(f)

    # Save output Activations
    tgt_n_mean_out_acts = np.mean(tgt_n_out_acts, axis=0)
    tgt_n_std_out_acts = np.std(tgt_n_out_acts, axis=0)

    return iou_arr, tgt_n_mean_gain_arr, tgt_n_std_gain_arr, max_act_n_mean_gain_arr, \
        max_act_n_std_gain_arr, tgt_n_avg_noise_resp, max_active_n_avg_noise_resp, \
        tgt_n_mean_out_acts, tgt_n_std_out_acts
示例#15
0
def find_optimal_stimulus(model,
                          device_to_use,
                          k_idx,
                          ch_mus,
                          ch_sigmas,
                          extract_point,
                          frag_size=np.array([7, 7]),
                          img_size=np.array([256, 256, 3])):
    """

    :return:
    """
    global edge_extract_act
    global cont_int_in_act
    global cont_int_out_act

    orient_arr = np.arange(0, 180, 5)

    img_center = img_size[0:2] // 2

    tgt_n_acts = np.zeros((len(base_gabor_parameters), len(orient_arr)))
    tgt_n_max_act = 0
    tgt_n_opt_params = None

    for base_gp_idx, base_gabor_params in enumerate(base_gabor_parameters):
        print("Processing Base Gabor Param Set {}".format(base_gp_idx))
        for o_idx, orient in enumerate(orient_arr):

            # Change orientation
            g_params = copy.deepcopy(base_gabor_params)
            for c_idx in range(len(g_params)):
                g_params[c_idx]["theta_deg"] = orient

            # Create Test Image - Single fragment @ center
            frag = gabor_fits.get_gabor_fragment(g_params,
                                                 spatial_size=frag_size)
            bg = base_gabor_params[0]['bg']
            if bg is None:
                bg = fields1993_stimuli.get_mean_pixel_value_at_boundary(frag)

            test_img = np.ones(img_size, dtype='uint8') * bg

            add_one = 1
            if frag_size[0] % 2 == 0:
                add_one = 0

            test_img[img_center[0] - frag_size[0] // 2:img_center[0] +
                     frag_size[0] // 2 + add_one,
                     img_center[0] - frag_size[0] // 2:img_center[0] +
                     frag_size[0] // 2 + add_one, :, ] = frag

            test_img = transform_functional.to_tensor(test_img)

            # # Debug - Show Test Image
            # plt.figure()
            # plt.imshow(np.transpose(test_img,axes=(1, 2, 0)))
            # plt.title("Input Image - Find optimal stimulus")
            # import pdb
            # pdb.set_trace()

            # Get target activations
            process_image(model, device_to_use, ch_mus, ch_sigmas, test_img)

            if extract_point == 'edge_extract_layer_out':
                center_n_acts = \
                    edge_extract_act[
                        0, :, edge_extract_act.shape[2]//2, edge_extract_act.shape[3]//2]
            elif extract_point == 'contour_integration_layer_in':
                center_n_acts = \
                    cont_int_in_act[
                        0, :, cont_int_in_act.shape[2]//2, cont_int_in_act.shape[3]//2]
            else:  # 'contour_integration_layer_out'
                center_n_acts = \
                    cont_int_out_act[
                        0, :, cont_int_out_act.shape[2]//2, cont_int_out_act.shape[3]//2]

            tgt_n_act = center_n_acts[k_idx]
            tgt_n_acts[base_gp_idx, o_idx] = tgt_n_act

            # # # Debug - Display all channel responses to individual test image
            # plt.figure()
            # plt.plot(center_n_acts)
            # plt.title("Center Neuron Activations. Base Gabor Set {}. Orientation {}".format(
            #     base_gp_idx, orient))

            if tgt_n_act > tgt_n_max_act:

                tgt_n_max_act = tgt_n_act
                tgt_n_opt_params = copy.deepcopy(g_params)

                max_active_n = int(np.argmax(center_n_acts))

                extra_info = {
                    'optim_stim_act_value': tgt_n_max_act,
                    'optim_stim_base_gabor_set': base_gp_idx,
                    'optim_stim_act_orient': orient,
                    'max_active_neuron_is_target': (max_active_n == k_idx),
                    'max_active_neuron_value': center_n_acts[max_active_n],
                    'max_active_neuron_idx': max_active_n
                }

                for item in tgt_n_opt_params:
                    item['extra_info'] = extra_info

        # # -----------------------------------------
        # # Debug - Tuning Curve for Individual base Gabor Params
        # plt.figure()
        # plt.plot(orient_arr, tgt_n_acts[base_gp_idx, :])
        # plt.title("Neuron {}: responses vs Orientation. Gabor Set {}".format(k_idx, base_gp_idx))
        # import pdb
        # pdb.set_trace()

    # ---------------------------
    if tgt_n_opt_params is not None:

        # Save optimal tuning curve
        for item in tgt_n_opt_params:
            opt_base_g_params_set = item['extra_info'][
                'optim_stim_base_gabor_set']
            item['extra_info']['orient_tuning_curve_x'] = orient_arr
            item['extra_info']['orient_tuning_curve_y'] = tgt_n_acts[
                opt_base_g_params_set, ]

        # # Debug: plot tuning curves for all gabor sets
        # # ------------------------------------------------
        # plt.figure()
        # for base_gp_idx, base_gabor_params in enumerate(base_gabor_parameters):
        #
        #     if base_gp_idx == tgt_n_opt_params[0]['extra_info']['optim_stim_base_gabor_set']:
        #         line_width = 5
        #         plt.plot(
        #             tgt_n_opt_params[0]['extra_info']['optim_stim_act_orient'],
        #             tgt_n_opt_params[0]['extra_info']['max_active_neuron_value'],
        #             marker='x', markersize=10,
        #             label='max active neuron Index {}'.format(
        #                 tgt_n_opt_params[0]['extra_info']['max_active_neuron_idx'])
        #         )
        #     else:
        #         line_width = 2
        #
        #     plt.plot(
        #         orient_arr, tgt_n_acts[base_gp_idx, ],
        #         label='param set {}'.format(base_gp_idx), linewidth=line_width
        #     )
        #
        # plt.legend()
        # plt.grid(True)
        # plt.title(
        #     "Kernel {}. Max Active Base Set {}. Is most responsive to this stimulus {}".format(
        #         k_idx,
        #         tgt_n_opt_params[0]['extra_info']['optim_stim_base_gabor_set'],
        #         tgt_n_opt_params[0]['extra_info']['max_active_neuron_is_target'])
        # )
        #
        # import pdb
        # pdb.set_trace()

    return tgt_n_opt_params
示例#16
0
 def to_tensor(self, img):
     img = Image.fromarray(img)
     img_t = F.to_tensor(img).float()
     return img_t
 def __call__(self, image, target):
     image = F.to_tensor(image).contiguous()
     return image, target
示例#18
0
def load(cfg):
    all_imgs, all_poses = [], []
    counts = [0]

    for s in ['train', 'val', 'test']:
        meta = None
        fname = os.path.join(cfg.dataset.path, 'transforms_{}.json'.format(s))
        with open(fname, 'r') as fp:
            meta = json.load(fp)

        imgs = []
        poses = []
        if s == 'train' or cfg.dataset.testskip < 2:
            skip = 1
        else:
            skip = cfg.dataset.testskip

        for frame in meta['frames'][::skip]:
            fname = os.path.join(cfg.dataset.path, frame['file_path'] + '.png')

            im = Image.open(fname)
            if cfg.dataset.half_res:
                im = TF.resize(im, (400, 400))
            imgs.append(TF.to_tensor(im))
            poses.append(torch.Tensor(frame['transform_matrix']))

        # keep all 4 channels (RGBA)
        imgs = torch.stack(imgs, dim=0).permute(0, 2, 3, 1)
        poses = torch.stack(poses, dim=0)
        counts.append(counts[-1] + imgs.shape[0])
        all_imgs.append(imgs)
        all_poses.append(poses)

    i_split = [torch.arange(counts[i], counts[i + 1]) for i in range(3)]

    imgs = torch.cat(all_imgs, dim=0)
    poses = torch.cat(all_poses, dim=0)

    H, W = imgs.shape[1:3]
    camera_angle_x = float(meta['camera_angle_x'])
    focal = .5 * W / torch.tan(torch.Tensor([.5 * camera_angle_x]))
    hwf = [int(H), int(W), focal.numpy()[0]]

    render_poses = torch.stack([
        pose_spherical(angle, -30., 4.)
        for angle in torch.linspace(-180., 180., 41)[:-1]
    ], dim=0)

    if cfg.train.white_background:
        imgs = imgs[..., :3] * imgs[..., -1:] + (1. - imgs[..., -1:])
    else:
        imgs = imgs[..., :3]

    print(
        'Loaded blender',
        cfg.dataset.path,
        imgs.shape,
        poses.shape,
        render_poses.shape,
        hwf
    )
    return imgs, poses, render_poses, i_split, hwf
示例#19
0
def data_transform(img, im_size):
    img = img.resize(im_size, Image.BILINEAR)
    img = F.to_tensor(img)  # convert to tensor (values between 0 and 1)
    img = F.normalize(img, MEAN, STD)  # normalize the tensor
    return img
示例#20
0
 def __call__(self, img, target):
     return F.to_tensor(img), target
示例#21
0
 def __call__(self, images, intrinsics):
     tensors = [F.to_tensor(im) for im in images]
     return tensors, torch.from_numpy(intrinsics)
示例#22
0
 def __call__(self, sample):
     return {'original_image': F.to_tensor(sample['original_image']),
             'downsampled_image': F.to_tensor(sample['downsampled_image']),
             'label': sample['label']}
 def transform_image(self, image):
     #img_resized_tensor = TF.to_tensor(image)
     normalize_img = (image - 127.5) / 127.5
     return TF.to_tensor(normalize_img).type(torch.FloatTensor)
示例#24
0
def to_tensor(im1, im2):
    a, b = im1
    a = tvF.to_tensor(a)
    b = tvF.to_tensor(b)
    im2 = tvF.to_tensor(im2)
    return torch.cat([a, b]), im2
示例#25
0
 def resize_batch(self, label, size):
     """Resize and convert to tensor"""
     resized = F.resize(label, size, interpolation=Image.NEAREST)
     return F.to_tensor(resized) * 255
示例#26
0
def main():
    if torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    parser = argparse.ArgumentParser(description='Test trained models')
    parser.add_argument(
        '--options-file',
        '-o',
        default='options-and-config.pickle',
        type=str,
        help='The file where the simulation options are stored.')
    parser.add_argument('--checkpoint-file',
                        '-c',
                        required=True,
                        type=str,
                        help='Model checkpoint file')
    parser.add_argument('--batch-size',
                        '-b',
                        default=12,
                        type=int,
                        help='The batch size.')
    parser.add_argument('--source-image',
                        '-s',
                        required=True,
                        type=str,
                        help='The image to watermark')
    # parser.add_argument('--times', '-t', default=10, type=int,
    #                     help='Number iterations (insert watermark->extract).')

    args = parser.parse_args()

    train_options, hidden_config, noise_config = utils.load_options(
        args.options_file)
    noiser = Noiser(noise_config)

    checkpoint = torch.load(args.checkpoint_file)
    hidden_net = Hidden(hidden_config, device, noiser, None)
    utils.model_from_checkpoint(hidden_net, checkpoint)

    image_pil = Image.open(args.source_image)
    image = randomCrop(np.array(image_pil), hidden_config.H, hidden_config.W)
    image_tensor = TF.to_tensor(image).to(device)
    image_tensor = image_tensor * 2 - 1  # transform from [0, 1] to [-1, 1]
    image_tensor.unsqueeze_(0)

    # for t in range(args.times):
    message = torch.Tensor(
        np.random.choice(
            [0, 1],
            (image_tensor.shape[0], hidden_config.message_length))).to(device)
    losses, (encoded_images, noised_images,
             decoded_messages) = hidden_net.validate_on_batch(
                 [image_tensor, message])
    decoded_rounded = decoded_messages.detach().cpu().numpy().round().clip(
        0, 1)
    message_detached = message.detach().cpu().numpy()
    print('original: {}'.format(message_detached))
    print('decoded : {}'.format(decoded_rounded))
    print('error : {:.3f}'.format(
        np.mean(np.abs(decoded_rounded - message_detached))))
    utils.save_images(image_tensor.cpu(),
                      encoded_images.cpu(),
                      'test',
                      '.',
                      resize_to=(256, 256))
示例#27
0
def compute_rcnn_embs(ds_root,
                      mode='train',
                      debug=False,
                      world_size=1,
                      rank=0):
    # type: (str, str, bool, int, int) -> None
    """
	:param ds_root: Cityflow-NL dataset root (absolute path)
	:return:
	"""
    print(f"Started process {rank}")
    # Load BERT model
    gpu_id = rank % torch.cuda.device_count()
    model = fasterrcnn_resnet50_fpn(pretrained=True).to(f"cuda:{gpu_id}")
    model.eval()

    userful_classes = [1, 2, 3, 4, 5, 7, 8, 10, 11, 12, 13, 14]

    # Load train json
    if mode == 'train':
        tracks_root = os.path.join(ds_root, 'data/train-tracks.json')
    elif mode == 'test':
        tracks_root = os.path.join(ds_root, 'data/test-tracks.json')
    else:
        raise Exception(f"Only train and test are valid split/modes")

    with open(tracks_root, "r") as f:
        tracks = json.load(f)

    keys = list(tracks.keys())

    # Output
    out_dir = os.path.join(ds_root, f"rcnn_embs_{mode}")
    if os.path.isdir(out_dir):
        # Remove already computed keys
        prec_keys = [k.split('.')[0] for k in os.listdir(out_dir)]
        keys = [k for k in keys if k not in prec_keys]

    for key_idx, id in tqdm(enumerate(keys), total=len(keys)):

        if (key_idx % world_size) != rank:
            continue

        result = {
            "frames": [],
            "detected_boxes": [],
            "features": [],
            "ego_ind": []
        }

        frames = tracks[id]['frames']
        ego = tracks[id]['boxes']

        for frame_path, ego in zip(frames, ego):
            frame_abspath = os.path.join(ds_root, frame_path)
            frame_orig = Image.open(frame_abspath)
            frame = to_tensor(frame_orig).to(f"cuda:{gpu_id}")

            with torch.no_grad():
                # object detection
                predictions = model([
                    frame,
                ])

            boxes = predictions[0]["boxes"].cpu()
            features = predictions[0]["features"].cpu()
            labels = predictions[0]["labels"].cpu()
            scores = predictions[0]["scores"].cpu()

            # Filter boxes based on class and confidence score
            labels_filter = [
                i for i, l in enumerate(labels) if l in userful_classes
            ]
            scores_filter = [i for i, s in enumerate(scores) if s > 0.65]

            indices = [
                i for i in range(len(boxes))
                if i in labels_filter and i in scores_filter
            ]

            # determine index of the ego vehicle (if detected)
            ego_bb = np.array(ego)[np.newaxis, :]
            # (x,y,w,h) -> (x1,y1,x2,y2)
            ego_bb[:,
                   2], ego_bb[:,
                              3] = ego_bb[:,
                                          2] + ego_bb[:,
                                                      0], ego_bb[:,
                                                                 3] + ego_bb[:,
                                                                             1]

            if len(labels) > 1:  # at least one box is detected
                ious = iou_of(boxes.numpy(), ego_bb)
                ego_ind = np.argmax(ious)

                if ious[ego_ind] < 0.2:
                    ego_ind = -1
                else:
                    if ego_ind not in indices:
                        indices.append(ego_ind.item())
                        ego_ind = len(indices) - 1  # the last element
                    else:
                        ego_ind = indices.index(
                            ego_ind)  # position relative to indices not labels
            else:
                ego_ind = -1

            # prepare result
            boxes[:,
                  2], boxes[:,
                            3] = boxes[:,
                                       2] - boxes[:,
                                                  0], boxes[:,
                                                            3] - boxes[:,
                                                                       1]  # (x1,y1,x2,y2) -> (x,y,w,h)
            boxes = torch.cat([labels[:, None], boxes, scores[:, None]],
                              dim=-1)  # class, (x,y,w,h), score

            if len(indices) == 0:
                print(1)

            # filter
            boxes = boxes[indices]
            features = features[indices]

            result["frames"].append(frame_path)
            result["detected_boxes"].append(boxes)
            result["features"].append(features)
            result["ego_ind"].append(ego_ind)

        # save
        out_file = os.path.join(out_dir, f'{id}.pt')
        torch.save(result, out_file)
示例#28
0
    def evaluate(cls, env, model, r_idx, resnet, traj_data, args, lock,
                 successes, failures, results):
        # reset model
        model.reset()

        # setup scene
        reward_type = 'dense'
        cls.setup_scene(env, traj_data, r_idx, args, reward_type=reward_type)

        # extract language features
        feat = model.featurize([(traj_data, False)], load_mask=False)

        # goal instr
        goal_instr = traj_data['turk_annotations']['anns'][r_idx]['task_desc']

        maskrcnn = maskrcnn_resnet50_fpn(num_classes=119)
        maskrcnn.eval()
        maskrcnn.load_state_dict(torch.load('weight_maskrcnn.pt'))
        maskrcnn = maskrcnn.cuda()

        prev_image = None
        prev_action = None
        nav_actions = [
            'MoveAhead_25', 'RotateLeft_90', 'RotateRight_90', 'LookDown_15',
            'LookUp_15'
        ]

        prev_class = 0
        prev_center = torch.zeros(2)

        done, success = False, False
        fails = 0
        t = 0
        reward = 0
        while not done:
            # break if max_steps reached
            if t >= args.max_steps:
                break

            # extract visual features
            curr_image = Image.fromarray(np.uint8(env.last_event.frame))
            feat['frames'] = resnet.featurize([curr_image],
                                              batch=1).unsqueeze(0)

            # forward model
            m_out = model.step(feat)
            m_pred = model.extract_preds(m_out, [(traj_data, False)],
                                         feat,
                                         clean_special_tokens=False)
            m_pred = list(m_pred.values())[0]

            # action prediction
            action = m_pred['action_low']
            if prev_image == curr_image and prev_action == action and prev_action in nav_actions and action in nav_actions and action == 'MoveAhead_25':
                dist_action = m_out['out_action_low'][0][0].detach().cpu()
                idx_rotateR = model.vocab['action_low'].word2index(
                    'RotateRight_90')
                idx_rotateL = model.vocab['action_low'].word2index(
                    'RotateLeft_90')
                action = 'RotateLeft_90' if dist_action[
                    idx_rotateL] > dist_action[
                        idx_rotateR] else 'RotateRight_90'

            if action == cls.STOP_TOKEN:
                print("\tpredicted STOP")
                break

            # mask prediction
            mask = None
            if model.has_interaction(action):
                class_dist = m_pred['action_low_mask'][0]
                pred_class = np.argmax(class_dist)

                # mask generation
                with torch.no_grad():
                    out = maskrcnn([to_tensor(curr_image).cuda()])[0]
                    for k in out:
                        out[k] = out[k].detach().cpu()

                if sum(out['labels'] == pred_class) == 0:
                    mask = np.zeros(
                        (constants.SCREEN_WIDTH, constants.SCREEN_HEIGHT))
                else:
                    masks = out['masks'][out['labels'] ==
                                         pred_class].detach().cpu()
                    scores = out['scores'][out['labels'] ==
                                           pred_class].detach().cpu()

                    # Instance selection based on the minimum distance between the prev. and cur. instance of a same class.
                    if prev_class != pred_class:
                        scores, indices = scores.sort(descending=True)
                        masks = masks[indices]
                        prev_class = pred_class
                        prev_center = masks[0].squeeze(
                            dim=0).nonzero().double().mean(dim=0)
                    else:
                        cur_centers = torch.stack([
                            m.nonzero().double().mean(dim=0)
                            for m in masks.squeeze(dim=1)
                        ])
                        distances = ((cur_centers - prev_center)**2).sum(dim=1)
                        distances, indices = distances.sort()
                        masks = masks[indices]
                        prev_center = cur_centers[0]

                    mask = np.squeeze(masks[0].numpy(), axis=0)

            # print action
            if args.debug:
                print(action)

            # use predicted action and mask (if available) to interact with the env
            t_success, _, _, err, _ = env.va_interact(
                action,
                interact_mask=mask,
                smooth_nav=args.smooth_nav,
                debug=args.debug)

            if not t_success:
                fails += 1
                if fails >= args.max_fails:
                    print("Interact API failed %d times" % fails +
                          "; latest error '%s'" % err)
                    break

            # next time-step
            t_reward, t_done = env.get_transition_reward()
            reward += t_reward
            t += 1

            prev_image = curr_image
            prev_action = action

        # check if goal was satisfied
        goal_satisfied = env.get_goal_satisfied()
        if goal_satisfied:
            print("Goal Reached")
            success = True

        # goal_conditions
        pcs = env.get_goal_conditions_met()
        goal_condition_success_rate = pcs[0] / float(pcs[1])

        # SPL
        path_len_weight = len(traj_data['plan']['low_actions'])
        s_spl = (1 if goal_satisfied else 0) * min(
            1., path_len_weight / (float(t) + 1e-4))
        pc_spl = goal_condition_success_rate * min(
            1., path_len_weight / (float(t) + 1e-4))

        # path length weighted SPL
        plw_s_spl = s_spl * path_len_weight
        plw_pc_spl = pc_spl * path_len_weight

        # log success/fails
        lock.acquire()
        log_entry = {
            'trial': traj_data['task_id'],
            'type': traj_data['task_type'],
            'repeat_idx': int(r_idx),
            'goal_instr': goal_instr,
            'completed_goal_conditions': int(pcs[0]),
            'total_goal_conditions': int(pcs[1]),
            'goal_condition_success': float(goal_condition_success_rate),
            'success_spl': float(s_spl),
            'path_len_weighted_success_spl': float(plw_s_spl),
            'goal_condition_spl': float(pc_spl),
            'path_len_weighted_goal_condition_spl': float(plw_pc_spl),
            'path_len_weight': int(path_len_weight),
            'reward': float(reward)
        }
        if success:
            successes.append(log_entry)
        else:
            failures.append(log_entry)

        # overall results
        results['all'] = cls.get_metrics(successes, failures)

        print("-------------")
        print("SR: %d/%d = %.5f" % (results['all']['success']['num_successes'],
                                    results['all']['success']['num_evals'],
                                    results['all']['success']['success_rate']))
        print("PLW SR: %.5f" %
              (results['all']['path_length_weighted_success_rate']))
        print(
            "GC: %d/%d = %.5f" %
            (results['all']['goal_condition_success']
             ['completed_goal_conditions'],
             results['all']['goal_condition_success']['total_goal_conditions'],
             results['all']['goal_condition_success']
             ['goal_condition_success_rate']))
        print(
            "PLW GC: %.5f" %
            (results['all']['path_length_weighted_goal_condition_success_rate']
             ))
        print("-------------")

        # task type specific results
        task_types = [
            'pick_and_place_simple', 'pick_clean_then_place_in_recep',
            'pick_heat_then_place_in_recep', 'pick_cool_then_place_in_recep',
            'pick_two_obj_and_place', 'look_at_obj_in_light',
            'pick_and_place_with_movable_recep'
        ]
        for task_type in task_types:
            task_successes = [
                s for s in (list(successes)) if s['type'] == task_type
            ]
            task_failures = [
                f for f in (list(failures)) if f['type'] == task_type
            ]
            if len(task_successes) > 0 or len(task_failures) > 0:
                results[task_type] = cls.get_metrics(task_successes,
                                                     task_failures)
            else:
                results[task_type] = {}

        lock.release()
示例#29
0
 def __call__(self, image, target):
     image = F.to_tensor(image)
     return image, target
示例#30
0
    def act(self, image, player_info):
        """
        :param image: numpy array of shape (300, 400, 3)
        :param player_info: pystk.Player object for the current kart.
        return: Dict describing the action
        """
        global FIRST, IM, BACKUP

        score_goal = None
        if self.team == 0:
            score_goal = GOAL_0
        else:
            score_goal = GOAL_1

        front = np.array(HACK_DICT['kart'].front)[[0, 2]]
        kart = np.array(HACK_DICT['kart'].location)[[0, 2]]
        puck = np.array(HACK_DICT['state'].soccer.ball.location)[[0, 2]]

        # player vector
        u = front - kart
        u = u / np.linalg.norm(u)

        # player to puck
        v = puck - kart
        v = v / np.linalg.norm(v)

        # goal to puck
        w = puck - score_goal
        w = w / np.linalg.norm(w)

        # adjust for scoring
        v2 = v + (w / 2)
        v2 = v2 / np.linalg.norm(v2)

        theta = np.arccos(np.dot(u, v2))
        signed_theta = -np.sign(np.cross(u, v2)) * theta

        steer = 5 * signed_theta
        accel = random.uniform(0.4, 0.8)
        brake = False
        drift = False

        if np.degrees(theta) > 20 and np.degrees(theta) < 90:
            drift = True

        if np.degrees(theta) > 90 and not BACKUP:
            BACKUP = True

        if BACKUP:
            if np.degrees(theta) > 30:
                accel = 0
                brake = True
                steer = -steer
            else:
                BACKUP = False

        p_info = []
        mask = (HACK_DICT['race'].render_data[self.player_id].instance ==
                134217729)
        mask = F.to_tensor(mask)
        nz = torch.nonzero(mask)

        if nz.numel() == 0:
            HACK_DICT['player_bool_%d' % self.player_id] = False
        else:
            HACK_DICT['player_bool_%d' % self.player_id] = True

            p_info.extend(kart)
            p_info.extend(u)
            peak = self.extract_peak(nz)
            p_info.extend(peak)

        #test = self.get_puck_coords(image)
        #p_info.extend(self.get_puck_coords(image))

        HACK_DICT['player_info_%d' % self.player_id] = p_info
        HACK_DICT['puck_vec_%d' % self.player_id] = (puck - kart)

        # visualize the controller in real time
        # if player_info.kart.id == 0:
        #     ax1 = plt.subplot(111)
        #     if FIRST:
        #         IM = ax1.imshow(image)
        #         FIRST = False
        #     else:
        #         IM.set_data(image)

        #     # print('p_to_fron: ', u)
        #     # print('WORLD p_to_puck: ', (puck - kart))
        #     # puck_xy = self.get_puck_coords(image)
        #     # screen_p_to_puck = np.array([200, 180]) - puck_xy
        #     # screen_p_to_puck[0] = -screen_p_to_puck[0]
        #     # print('SCREEN p_to_puck: ', screen_p_to_puck)
        #     # print('puck_to_g: ', w)
        #     # print('aim_vec__: ', v2)
        #     # print('signed theta: ', signed_theta)
        #     # print('steer: ', steer)
        #     # print('loc: ' + str(kart))
        #     # print('-----------------------')
        #     plt.pause(0.001)

        action = {
            'steer': steer,
            'acceleration': accel,
            'brake': brake,
            'drift': drift,
            'nitro': False,
            'rescue': False
        }

        return action
示例#31
0
height = scene_size[1]

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
traj_old = traj.clone().numpy()

traj[:, 0] = traj[:, 0] - width // 2
traj[:, 1] = traj[:, 1] - height // 2
traj = traj.unsqueeze(0)
traj = traj.permute(0, 2, 1)
traj = traj.to(device)
rel_traj = absolute_to_rel_traj(traj).to(device)


logger.info("Moving static image to the device")
image = Image.open(path_of_static_image)
scene = TF.to_tensor(image)
scene.unsqueeze_(0)
scene = scene.to(device)

# Create model.

desire = DESIRE(IOCParams(),
                SGMParams())
logger.info("Created model")
logger.debug(desire)
logger.info("Moving to device: {}".format(device))
desire.to(device)
logger.info("Loading state dict")
state_dict_checkpoint = torch.load(restore_model_path)
desire.load_state_dict(state_dict_checkpoint)
state_dict_checkpoint = torch.load(restore_model_path)