示例#1
0
def bbox_detector_single_frame(detector, frame):
    img = Image.fromarray(frame)
    pil_image = ImageOps.fit(img, (256, 256), Image.ANTIALIAS)
    pil_image_rgb = pil_image.convert("RGB")
    converted_img = tf.image.convert_image_dtype(
        np.array(img), dtype=tf.float32)[tf.newaxis, ...]
    print('converted_img - type: {} shape: {}'.format(type(converted_img), converted_img.shape))
    result = detector(converted_img)
    result = {key: value.numpy() for key, value in result.items()}
    print("Found %d objects." % len(result["detection_scores"]))
    detection_class_entities = result["detection_class_entities"]
    detection_scores = result['detection_scores']
    detection_boxes = result['detection_boxes']

    person_detection_scores = []
    person_class_entities = []
    person_bounding_boxes = []
    for i, entity in enumerate(detection_class_entities):
        if detection_class_entities[i] == b'Person':
            person_class_entities.append(detection_class_entities[i])
            person_bounding_boxes.append(detection_boxes[i])
            person_detection_scores.append(detection_scores[i])

    image_with_boxes = draw_boxes(
        np.array(img),
        np.array(person_bounding_boxes),
        np.array(person_class_entities),
        np.array(person_detection_scores))
    display_image(image_with_boxes)

    return result
示例#2
0
def download_and_resize_image(url, new_width=512, new_height=512, save_path=None,
                              show=False):
    if save_path is None:
        _, save_path = tempfile.mkstemp(suffix=".jpg")
    else:
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
    pil_image = get_image(url, rotate='auto')
    pil_image = ImageOps.fit(pil_image, (new_width, new_height), Image.ANTIALIAS)
    pil_image_rgb = pil_image.convert("RGB")
    pil_image_rgb.save(save_path, format="JPEG", quality=90)
    if show:
        display_image(pil_image)
    return save_path
示例#3
0
    def capture(self, frame):
        if self.processing:
            return
        self.processing = True

        # Discard the canvas.
        self.canvas = None

        # Display the captured frame.
        self.im_captured = frame
        util.display_image(CAP_WINDOW, self.im_captured)

        # Segment the captured frame.
        self.model_process.submit(COMMAND_SEGMENT, (EVENT_SEGMENT, frame))
示例#4
0
    def cb_result(self, data):
        if data is None:
            sys.stdout.write(']\n')
            self.processing = False
            self.model_process.submit(COMMAND_ACCURACY,
                                      (self.canvas.get_map(), self.im_processed, self.loss))
            return

        _, self.im_processed, self.loss = data

        # Display the processed image.
        util.display_image(RES_WINDOW, self.im_processed)

        # Update the progress bar.
        sys.stdout.write('=')
        sys.stdout.flush()
示例#5
0
def build_model_input():

    print("Import class names")
    cols = [
        'Label', 'Latin Name', 'Common Name', 'Train Images',
        'Validation Images'
    ]
    info = pd.read_csv("monkey_labels.txt", names=cols, skiprows=1)
    global LABELS
    LABELS = info['Common Name']

    util.display_image('training/n0/n0018.jpg')

    print("Data augmentation/Preprocessing")
    height, width, channels = 299, 299, 3

    train_datagen = ImageDataGenerator(rescale=1. / 255)
    train_generator = train_datagen.flow_from_directory(
        TRAIN_DIR,
        target_size=(height, width),
        batch_size=BATCH_SIZE,
        class_mode='categorical')

    test_datagen = ImageDataGenerator(rescale=1. / 255)
    test_generator = test_datagen.flow_from_directory(TEST_DIR,
                                                      target_size=(height,
                                                                   width),
                                                      batch_size=BATCH_SIZE,
                                                      class_mode='categorical')

    print("Import pretrained Inception module")
    base_model = Xception(weights=INCEPTION_DIR,
                          include_top=False,
                          input_shape=(height, width, channels))

    print("Extract features")
    train_features, train_labels = util.extract_features(
        1097, BATCH_SIZE, train_generator, base_model)
    test_features, test_labels = util.extract_features(272, BATCH_SIZE,
                                                       test_generator,
                                                       base_model)

    return train_features, train_labels, test_features, test_labels
示例#6
0
    def update(self):
        # Overlay the segmented frame.
        segmented_image = util.label_to_color_image(self.segment_map).astype(
            np.uint8)
        self.combined = self.image // 6 + segmented_image

        # Resize the frame.
        self.combined = cv2.resize(self.combined, (CAP_WIDTH, CAP_HEIGHT))

        # Update the legend.
        legend = self.legend.copy()
        if self.selected is not None:
            x = PALETTE_SPACING
            y = PALETTE_SPACING + (PALETTE_SIZE +
                                   PALETTE_SPACING) * self.selected
            w = PALETTE_SIZE
            h = PALETTE_SIZE
            cv2.rectangle(legend, (x, y), (x + w, y + h), (255, 255, 255), 2)
            cv2.rectangle(legend, (x, y), (x + w, y + h), (0, 0, 0), 1)

        # Show the frame.
        util.display_image(CAP_WINDOW,
                           np.concatenate((self.combined, legend), axis=1))
示例#7
0
def fit_earth(max_iter=20000,
              log_interval=10,
              display_interval=None,
              display_res=1024,
              enable_mip=True,
              res=512,
              ref_res=4096,
              lr_base=1e-2,
              lr_ramp=0.1,
              out_dir=None,
              log_fn=None,
              texsave_interval=None,
              texsave_fn=None,
              imgsave_interval=None,
              imgsave_fn=None):

    log_file = None
    if out_dir:
        os.makedirs(out_dir, exist_ok=True)
        if log_fn:
            log_file = open(out_dir + '/' + log_fn, 'wt')
    else:
        imgsave_interval, texsave_interval = None, None

    # Mesh and texture adapted from "3D Earth Photorealistic 2K" model at
    # https://www.turbosquid.com/3d-models/3d-realistic-earth-photorealistic-2k-1279125
    datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
    with np.load(f'{datadir}/earth.npz') as f:
        pos_idx, pos, uv_idx, uv, tex = f.values()
    tex = tex.astype(np.float32) / 255.0
    max_mip_level = 9  # Texture is a 4x3 atlas of 512x512 maps.
    print("Mesh has %d triangles and %d vertices." %
          (pos_idx.shape[0], pos.shape[0]))

    # Some input geometry contains vertex positions in (N, 4) (with v[:,3]==1).  Drop
    # the last column in that case.
    if pos.shape[1] == 4: pos = pos[:, 0:3]

    # Create position/triangle index tensors
    pos_idx = torch.from_numpy(pos_idx.astype(np.int32)).cuda()
    vtx_pos = torch.from_numpy(pos.astype(np.float32)).cuda()
    uv_idx = torch.from_numpy(uv_idx.astype(np.int32)).cuda()
    vtx_uv = torch.from_numpy(uv.astype(np.float32)).cuda()

    tex = torch.from_numpy(tex.astype(np.float32)).cuda()
    tex_opt = torch.full(tex.shape, 0.2, device='cuda', requires_grad=True)
    glctx = dr.RasterizeGLContext()

    ang = 0.0

    # Adam optimizer for texture with a learning rate ramp.
    optimizer = torch.optim.Adam([tex_opt], lr=lr_base)
    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer, lr_lambda=lambda x: lr_ramp**(float(x) / float(max_iter)))

    # Render.
    ang = 0.0
    texloss_avg = []
    for it in range(max_iter + 1):
        # Random rotation/translation matrix for optimization.
        r_rot = util.random_rotation_translation(0.25)

        # Smooth rotation for display.
        a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))
        dist = np.random.uniform(0.0, 48.5)

        # Modelview and modelview + projection matrices.
        proj = util.projection(x=0.4, n=1.0, f=200.0)
        r_mv = np.matmul(util.translate(0, 0, -1.5 - dist), r_rot)
        r_mvp = np.matmul(proj, r_mv).astype(np.float32)
        a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot)
        a_mvp = np.matmul(proj, a_mv).astype(np.float32)

        # Measure texture-space RMSE loss
        with torch.no_grad():
            texmask = torch.zeros_like(tex)
            tr = tex.shape[1] // 4
            texmask[tr + 13:2 * tr - 13, 25:-25, :] += 1.0
            texmask[25:-25, tr + 13:2 * tr - 13, :] += 1.0
            # Measure only relevant portions of texture when calculating texture
            # PSNR.
            texloss = (torch.sum(texmask * (tex - tex_opt)**2) /
                       torch.sum(texmask))**0.5  # RMSE within masked area.
            texloss_avg.append(float(texloss))

        # Render reference and optimized frames. Always enable mipmapping for reference.
        color = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_uv, uv_idx, tex,
                       ref_res, True, max_mip_level)
        color_opt = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_uv, uv_idx,
                           tex_opt, res, enable_mip, max_mip_level)

        # Reduce the reference to correct size.
        while color.shape[1] > res:
            color = util.bilinear_downsample(color)

        # Compute loss and perform a training step.
        loss = torch.mean((color - color_opt)**2)  # L2 pixel loss.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        # Print/save log.
        if log_interval and (it % log_interval == 0):
            texloss_val = np.mean(np.asarray(texloss_avg))
            texloss_avg = []
            psnr = -10.0 * np.log10(texloss_val**
                                    2)  # PSNR based on average RMSE.
            s = "iter=%d,loss=%f,psnr=%f" % (it, texloss_val, psnr)
            print(s)
            if log_file:
                log_file.write(s + '\n')

        # Show/save image.
        display_image = display_interval and (it % display_interval == 0)
        save_image = imgsave_interval and (it % imgsave_interval == 0)
        save_texture = texsave_interval and (it % texsave_interval) == 0

        if display_image or save_image:
            ang = ang + 0.1

            with torch.no_grad():
                result_image = render(glctx, a_mvp, vtx_pos, pos_idx, vtx_uv,
                                      uv_idx, tex_opt, res, enable_mip,
                                      max_mip_level)[0].cpu().numpy()

                if display_image:
                    util.display_image(result_image,
                                       size=display_res,
                                       title='%d / %d' % (it, max_iter))
                if save_image:
                    util.save_image(out_dir + '/' + (imgsave_fn % it),
                                    result_image)

                if save_texture:
                    texture = tex_opt.cpu().numpy()[::-1]
                    util.save_image(out_dir + '/' + (texsave_fn % it), texture)

    # Done.
    if log_file:
        log_file.close()
示例#8
0
def fit_pose(max_iter=10000,
             repeats=1,
             log_interval=10,
             display_interval=None,
             display_res=512,
             lr_base=0.01,
             lr_falloff=1.0,
             nr_base=1.0,
             nr_falloff=1e-4,
             grad_phase_start=0.5,
             resolution=256,
             out_dir=None,
             log_fn=None,
             mp4save_interval=None,
             mp4save_fn=None):

    log_file = None
    writer = None
    if out_dir:
        os.makedirs(out_dir, exist_ok=True)
        if log_fn:
            log_file = open(out_dir + '/' + log_fn, 'wt')
        if mp4save_interval != 0:
            writer = imageio.get_writer(f'{out_dir}/{mp4save_fn}',
                                        mode='I',
                                        fps=30,
                                        codec='libx264',
                                        bitrate='16M')
    else:
        mp4save_interval = None

    datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
    with np.load(f'{datadir}/cube_p.npz') as f:
        pos_idx, pos, col_idx, col = f.values()
    print("Mesh has %d triangles and %d vertices." %
          (pos_idx.shape[0], pos.shape[0]))

    # Some input geometry contains vertex positions in (N, 4) (with v[:,3]==1).  Drop
    # the last column in that case.
    if pos.shape[1] == 4: pos = pos[:, 0:3]

    # Create position/triangle index tensors
    pos_idx = torch.from_numpy(pos_idx.astype(np.int32)).cuda()
    vtx_pos = torch.from_numpy(pos.astype(np.float32)).cuda()
    col_idx = torch.from_numpy(col_idx.astype(np.int32)).cuda()
    vtx_col = torch.from_numpy(col.astype(np.float32)).cuda()

    glctx = dr.RasterizeGLContext()

    for rep in range(repeats):
        pose_target = torch.tensor(q_rnd(), device='cuda')
        pose_init = q_rnd()
        pose_opt = torch.tensor(pose_init / np.sum(pose_init**2)**0.5,
                                dtype=torch.float32,
                                device='cuda',
                                requires_grad=True)

        loss_best = np.inf
        pose_best = pose_opt.detach().clone()

        # Modelview + projection matrix.
        mvp = torch.tensor(np.matmul(util.projection(x=0.4),
                                     util.translate(0, 0,
                                                    -3.5)).astype(np.float32),
                           device='cuda')

        # Adam optimizer for texture with a learning rate ramp.
        optimizer = torch.optim.Adam([pose_opt],
                                     betas=(0.9, 0.999),
                                     lr=lr_base)
        # Render.
        for it in range(max_iter + 1):
            # Set learning rate.
            itf = 1.0 * it / max_iter
            nr = nr_base * nr_falloff**itf
            lr = lr_base * lr_falloff**itf
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr

            # Noise input.
            if itf >= grad_phase_start:
                noise = q_unit()
            else:
                noise = q_scale(q_rnd(), nr)
                noise = q_mul(noise, q_rnd_S4())  # Orientation noise.

            # Render.
            color = render(glctx, torch.matmul(mvp, q_to_mtx(pose_target)),
                           vtx_pos, pos_idx, vtx_col, col_idx, resolution)
            pose_total_opt = q_mul_torch(pose_opt, noise)
            mtx_total_opt = torch.matmul(mvp, q_to_mtx(pose_total_opt))
            color_opt = render(glctx, mtx_total_opt, vtx_pos, pos_idx, vtx_col,
                               col_idx, resolution)

            # Image-space loss.
            diff = (color_opt - color)**2  # L2 norm.
            diff = torch.tanh(5.0 * torch.max(diff, dim=-1)[0])
            loss = torch.mean(diff)

            # Measure image-space loss and update best found pose.
            loss_val = float(loss)
            if (loss_val < loss_best) and (loss_val > 0.0):
                pose_best = pose_total_opt.detach().clone()
                loss_best = loss_val
                if itf < grad_phase_start:
                    with torch.no_grad():
                        pose_opt[:] = pose_best

            # Print/save log.
            if log_interval and (it % log_interval == 0):
                err = q_angle_deg(pose_opt, pose_target)
                ebest = q_angle_deg(pose_best, pose_target)
                s = "rep=%d,iter=%d,err=%f,err_best=%f,loss=%f,loss_best=%f,lr=%f,nr=%f" % (
                    rep, it, err, ebest, loss_val, loss_best, lr, nr)
                print(s)
                if log_file:
                    log_file.write(s + "\n")

            # Run gradient training step.
            if itf >= grad_phase_start:
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

            with torch.no_grad():
                pose_opt /= torch.sum(pose_opt**2)**0.5

            # Show/save image.
            display_image = display_interval and (it % display_interval == 0)
            save_mp4 = mp4save_interval and (it % mp4save_interval == 0)

            if display_image or save_mp4:
                c = color[0].detach().cpu().numpy()
                img_ref = color[0].detach().cpu().numpy()
                img_opt = color_opt[0].detach().cpu().numpy()
                img_best = render(glctx, torch.matmul(mvp,
                                                      q_to_mtx(pose_best)),
                                  vtx_pos, pos_idx, vtx_col, col_idx,
                                  resolution)[0].detach().cpu().numpy()
                result_image = np.concatenate([img_ref, img_best, img_opt],
                                              axis=1)

                if display_image:
                    util.display_image(result_image,
                                       size=display_res,
                                       title='(%d) %d / %d' %
                                       (rep, it, max_iter))
                if save_mp4:
                    writer.append_data(
                        np.clip(np.rint(result_image * 255.0), 0,
                                255).astype(np.uint8))

    # Done.
    if writer is not None:
        writer.close()
    if log_file:
        log_file.close()
示例#9
0
    os.path.join('..', 'open_images_', 'validation-images-with-rotation.csv'))
meta.set_index('ImageID', inplace=True)

print('data loaded')

for index, row in meta.iterrows():
    if index not in id2url:
        continue

    #if not np.isnan(row.Rotation):
    if row.Rotation != 270:
        continue
    '''
    image_url = id2url[index]
    try:
        image = get_image(image_url, rotate=id2rot[index])
    except:
        print('error downloading: ' + image_url)
        continue
    '''

    image = get_image_from_s3(index)

    result = get_image_boxes(objects, index)
    print(row.Rotation)
    print(result)

    image_with_boxes = draw_boxes(image, result["detection_boxes"],
                                  result["detection_class_names"])
    display_image(image_with_boxes)
def fit_cube(max_iter          = 5000,
             resolution        = 4, 
             discontinuous     = False,
             repeats           = 1,
             log_interval      = 10, 
             display_interval  = None,
             display_res       = 512,
             out_dir           = '.',
             log_fn            = None,
             imgsave_interval  = None,
             imgsave_fn        = None):

    if out_dir:
        os.makedirs(out_dir, exist_ok=True)
    
    datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
    fn = 'cube_%s.npz' % ('d' if discontinuous else 'c')
    with np.load(f'{datadir}/{fn}') as f:
        pos_idx, vtxp, col_idx, vtxc = f.values()
    print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], vtxp.shape[0]))
        
    # Transformation matrix input to TF graph.
    mtx_in = tf.placeholder(tf.float32, [4, 4])

    # Setup TF graph for reference.
    vtxw = np.concatenate([vtxp, np.ones([vtxp.shape[0], 1])], axis=1).astype(np.float32)
    pos_clip = tf.matmul(vtxw, mtx_in, transpose_b=True)[tf.newaxis, ...]
    rast_out, _ = dr.rasterize(pos_clip, pos_idx, resolution=[resolution, resolution], output_db=False)
    color, _ = dr.interpolate(vtxc[tf.newaxis, ...], rast_out, col_idx)
    color = dr.antialias(color, rast_out, pos_clip, pos_idx)

    # Optimized variables.
    vtxc_opt = tf.get_variable('vtxc', initializer=tf.zeros_initializer(), shape=vtxc.shape)
    vtxp_opt = tf.get_variable('vtxp', initializer=tf.zeros_initializer(), shape=vtxp.shape)

    # Optimization variable setters for initialization.
    vtxc_opt_in = tf.placeholder(tf.float32, vtxc.shape)
    vtxp_opt_in = tf.placeholder(tf.float32, vtxp.shape)
    opt_set = tf.group(tf.assign(vtxc_opt, vtxc_opt_in), tf.assign(vtxp_opt, vtxp_opt_in))

    # Setup TF graph for what we optimize result.
    vtxw_opt = tf.concat([vtxp_opt, tf.ones([vtxp.shape[0], 1], tf.float32)], axis=1)
    pos_clip_opt = tf.matmul(vtxw_opt, mtx_in, transpose_b=True)[tf.newaxis, ...]
    rast_out_opt, _ = dr.rasterize(pos_clip_opt, pos_idx, resolution=[resolution, resolution], output_db=False)
    color_opt, _ = dr.interpolate(vtxc_opt[tf.newaxis, ...], rast_out_opt, col_idx)
    color_opt = dr.antialias(color_opt, rast_out_opt, pos_clip_opt, pos_idx)

    # Image-space loss and optimizer.
    loss = tf.reduce_mean((color_opt - color)**2)
    lr_in = tf.placeholder(tf.float32, [])
    train_op = tf.train.AdamOptimizer(lr_in, 0.9, 0.999).minimize(loss, var_list=[vtxp_opt, vtxc_opt])

    # Setup TF graph for display.
    rast_out_disp, _ = dr.rasterize(pos_clip_opt, pos_idx, resolution=[display_res, display_res], output_db=False)
    color_disp, _ = dr.interpolate(vtxc_opt[tf.newaxis, ...], rast_out_disp, col_idx)
    color_disp = dr.antialias(color_disp, rast_out_disp, pos_clip_opt, pos_idx)
    rast_out_disp_ref, _ = dr.rasterize(pos_clip, pos_idx, resolution=[display_res, display_res], output_db=False)
    color_disp_ref, _ = dr.interpolate(vtxc[tf.newaxis, ...], rast_out_disp_ref, col_idx)
    color_disp_ref = dr.antialias(color_disp_ref, rast_out_disp_ref, pos_clip, pos_idx)

    # Geometric error calculation
    geom_loss = tf.reduce_mean(tf.reduce_sum((tf.abs(vtxp_opt) - .5)**2, axis=1)**0.5)

    # Open log file.
    log_file = open(out_dir + '/' + log_fn, 'wt') if log_fn else None

    # Repeats.
    for rep in range(repeats):

        # Optimize.
        ang = 0.0
        gl_avg = []
        util.init_uninitialized_vars()
        for it in range(max_iter + 1):
            # Initialize optimization.
            if it == 0:
                vtxp_init = np.random.uniform(-0.5, 0.5, size=vtxp.shape) + vtxp
                vtxc_init = np.random.uniform(0.0, 1.0, size=vtxc.shape)
                util.run(opt_set, {vtxc_opt_in: vtxc_init.astype(np.float32), vtxp_opt_in: vtxp_init.astype(np.float32)})

            # Learning rate ramp.
            lr = 1e-2
            lr = lr * max(0.01, 10**(-it*0.0005))

            # Random rotation/translation matrix for optimization.
            r_rot = util.random_rotation_translation(0.25)

            # Smooth rotation for display.
            a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))

            # Modelview and modelview + projection matrices.
            proj  = util.projection(x=0.4)
            r_mv  = np.matmul(util.translate(0, 0, -3.5), r_rot)
            r_mvp = np.matmul(proj, r_mv).astype(np.float32)
            a_mv  = np.matmul(util.translate(0, 0, -3.5), a_rot)
            a_mvp = np.matmul(proj, a_mv).astype(np.float32)
        
            # Run training and measure geometric error.
            gl_val, _ = util.run([geom_loss, train_op], {mtx_in: r_mvp, lr_in: lr})
            gl_avg.append(gl_val)

            # Print/save log.
            if log_interval and (it % log_interval == 0):
                gl_val, gl_avg = np.mean(np.asarray(gl_avg)), []
                s = ("rep=%d," % rep) if repeats > 1 else ""
                s += "iter=%d,err=%f" % (it, gl_val)
                print(s)
                if log_file:
                    log_file.write(s + "\n")

            # Show/save image.
            display_image = display_interval and (it % display_interval == 0)
            save_image = imgsave_interval and (it % imgsave_interval == 0)

            if display_image or save_image:
                ang = ang + 0.1
                img_o = util.run(color_opt,      {mtx_in: r_mvp})[0]
                img_b = util.run(color,          {mtx_in: r_mvp})[0]
                img_d = util.run(color_disp,     {mtx_in: a_mvp})[0]
                img_r = util.run(color_disp_ref, {mtx_in: a_mvp})[0]

                scl = display_res // img_o.shape[0]
                img_b = np.repeat(np.repeat(img_b, scl, axis=0), scl, axis=1)
                img_o = np.repeat(np.repeat(img_o, scl, axis=0), scl, axis=1)
                result_image = np.concatenate([img_o, img_b, img_d, img_r], axis=1)

            if display_image:
                util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter))
            if save_image:
                util.save_image(out_dir + '/' + (imgsave_fn % it), result_image)

    # All repeats done.
    if log_file:
        log_file.close()
def fit_cube(max_iter=5000,
             resolution=4,
             discontinuous=False,
             repeats=1,
             log_interval=10,
             display_interval=None,
             display_res=512,
             out_dir=None,
             log_fn=None,
             mp4save_interval=None,
             mp4save_fn=None):

    log_file = None
    writer = None
    if out_dir:
        os.makedirs(out_dir, exist_ok=True)
        if log_fn:
            log_file = open(f'{out_dir}/{log_fn}', 'wt')
        if mp4save_interval != 0:
            writer = imageio.get_writer(f'{out_dir}/{mp4save_fn}',
                                        mode='I',
                                        fps=30,
                                        codec='libx264',
                                        bitrate='16M')
    else:
        mp4save_interval = None

    datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
    fn = 'cube_%s.npz' % ('d' if discontinuous else 'c')
    with np.load(f'{datadir}/{fn}') as f:
        pos_idx, vtxp, col_idx, vtxc = f.values()
    print("Mesh has %d triangles and %d vertices." %
          (pos_idx.shape[0], vtxp.shape[0]))

    # Create position/triangle index tensors
    pos_idx = torch.from_numpy(pos_idx.astype(np.int32)).cuda()
    col_idx = torch.from_numpy(col_idx.astype(np.int32)).cuda()
    vtx_pos = torch.from_numpy(vtxp.astype(np.float32)).cuda()
    vtx_col = torch.from_numpy(vtxc.astype(np.float32)).cuda()

    glctx = dr.RasterizeGLContext()

    # Repeats.
    for rep in range(repeats):

        ang = 0.0
        gl_avg = []

        vtx_pos_rand = np.random.uniform(-0.5, 0.5, size=vtxp.shape) + vtxp
        vtx_col_rand = np.random.uniform(0.0, 1.0, size=vtxc.shape)
        vtx_pos_opt = torch.tensor(vtx_pos_rand,
                                   dtype=torch.float32,
                                   device='cuda',
                                   requires_grad=True)
        vtx_col_opt = torch.tensor(vtx_col_rand,
                                   dtype=torch.float32,
                                   device='cuda',
                                   requires_grad=True)

        # Adam optimizer for vertex position and color with a learning rate ramp.
        optimizer = torch.optim.Adam([vtx_pos_opt, vtx_col_opt], lr=1e-2)
        scheduler = torch.optim.lr_scheduler.LambdaLR(
            optimizer, lr_lambda=lambda x: max(0.01, 10**(-x * 0.0005)))

        for it in range(max_iter + 1):
            # Random rotation/translation matrix for optimization.
            r_rot = util.random_rotation_translation(0.25)

            # Smooth rotation for display.
            a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))

            # Modelview and modelview + projection matrices.
            proj = util.projection(x=0.4)
            r_mv = np.matmul(util.translate(0, 0, -3.5), r_rot)
            r_mvp = np.matmul(proj, r_mv).astype(np.float32)
            a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot)
            a_mvp = np.matmul(proj, a_mv).astype(np.float32)

            # Compute geometric error for logging.
            with torch.no_grad():
                geom_loss = torch.mean(
                    torch.sum((torch.abs(vtx_pos_opt) - .5)**2, dim=1)**0.5)
                gl_avg.append(float(geom_loss))

            # Print/save log.
            if log_interval and (it % log_interval == 0):
                gl_val = np.mean(np.asarray(gl_avg))
                gl_avg = []
                s = ("rep=%d," % rep) if repeats > 1 else ""
                s += "iter=%d,err=%f" % (it, gl_val)
                print(s)
                if log_file:
                    log_file.write(s + "\n")

            color = render(glctx, r_mvp, vtx_pos, pos_idx, vtx_col, col_idx,
                           resolution)
            color_opt = render(glctx, r_mvp, vtx_pos_opt, pos_idx, vtx_col_opt,
                               col_idx, resolution)

            # Compute loss and train.
            loss = torch.mean((color - color_opt)**2)  # L2 pixel loss.
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            scheduler.step()

            # Show/save image.
            display_image = display_interval and (it % display_interval == 0)
            save_mp4 = mp4save_interval and (it % mp4save_interval == 0)

            if display_image or save_mp4:
                ang = ang + 0.01

                img_b = color[0].cpu().numpy()
                img_o = color_opt[0].detach().cpu().numpy()
                img_d = render(glctx, a_mvp, vtx_pos_opt, pos_idx, vtx_col_opt,
                               col_idx, display_res)[0]
                img_r = render(glctx, a_mvp, vtx_pos, pos_idx, vtx_col,
                               col_idx, display_res)[0]

                scl = display_res // img_o.shape[0]
                img_b = np.repeat(np.repeat(img_b, scl, axis=0), scl, axis=1)
                img_o = np.repeat(np.repeat(img_o, scl, axis=0), scl, axis=1)
                result_image = make_grid(
                    np.stack([
                        img_o, img_b,
                        img_d.detach().cpu().numpy(),
                        img_r.cpu().numpy()
                    ]))

                if display_image:
                    util.display_image(result_image,
                                       size=display_res,
                                       title='%d / %d' % (it, max_iter))
                if save_mp4:
                    writer.append_data(
                        np.clip(np.rint(result_image * 255.0), 0,
                                255).astype(np.uint8))

    # Done.
    if writer is not None:
        writer.close()
    if log_file:
        log_file.close()
def fit_earth(max_iter=20000,
              log_interval=10,
              display_interval=None,
              display_res=1024,
              enable_mip=True,
              res=512,
              ref_res=4096,
              lr_base=1e-2,
              lr_ramp=0.1,
              out_dir='.',
              log_fn=None,
              texsave_interval=None,
              texsave_fn=None,
              imgsave_interval=None,
              imgsave_fn=None):

    if out_dir:
        os.makedirs(out_dir, exist_ok=True)

    # Mesh and texture adapted from "3D Earth Photorealistic 2K" model at
    # https://www.turbosquid.com/3d-models/3d-realistic-earth-photorealistic-2k-1279125
    datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
    with np.load(f'{datadir}/earth.npz') as f:
        pos_idx, pos, uv_idx, uv, tex = f.values()
    tex = tex.astype(np.float32) / 255.0
    max_mip_level = 9  # Texture is a 4x3 atlas of 512x512 maps.
    print("Mesh has %d triangles and %d vertices." %
          (pos_idx.shape[0], pos.shape[0]))

    # Transformation matrix input to TF graph.
    mtx_in = tf.placeholder(tf.float32, [4, 4])

    # Learned texture.
    tex_var = tf.get_variable('tex',
                              initializer=tf.constant_initializer(0.2),
                              shape=tex.shape)

    # Setup TF graph for reference rendering in high resolution.
    pos_clip = tf.matmul(pos, mtx_in, transpose_b=True)[tf.newaxis, ...]
    rast_out, rast_out_db = dr.rasterize(pos_clip, pos_idx, [ref_res, ref_res])
    texc, texd = dr.interpolate(uv[tf.newaxis, ...],
                                rast_out,
                                uv_idx,
                                rast_db=rast_out_db,
                                diff_attrs='all')
    color = dr.texture(tex[np.newaxis],
                       texc,
                       texd,
                       filter_mode='linear-mipmap-linear',
                       max_mip_level=max_mip_level)
    color = color * tf.clip_by_value(rast_out[..., -1:], 0,
                                     1)  # Mask out background.

    # Reduce the reference to correct size.
    while color.shape[1] > res:
        color = util.bilinear_downsample(color)

    # TF Graph for rendered candidate.
    if enable_mip:
        # With mipmaps.
        rast_out_opt, rast_out_db_opt = dr.rasterize(pos_clip, pos_idx,
                                                     [res, res])
        texc_opt, texd_opt = dr.interpolate(uv[tf.newaxis, ...],
                                            rast_out_opt,
                                            uv_idx,
                                            rast_db=rast_out_db_opt,
                                            diff_attrs='all')
        color_opt = dr.texture(tex_var[np.newaxis],
                               texc_opt,
                               texd_opt,
                               filter_mode='linear-mipmap-linear',
                               max_mip_level=max_mip_level)
    else:
        # No mipmaps: no image-space derivatives anywhere.
        rast_out_opt, _ = dr.rasterize(pos_clip,
                                       pos_idx, [res, res],
                                       output_db=False)
        texc_opt, _ = dr.interpolate(uv[tf.newaxis, ...], rast_out_opt, uv_idx)
        color_opt = dr.texture(tex_var[np.newaxis],
                               texc_opt,
                               filter_mode='linear')
    color_opt = color_opt * tf.clip_by_value(rast_out_opt[..., -1:], 0,
                                             1)  # Mask out background.

    # Measure only relevant portions of texture when calculating texture PSNR.
    loss = tf.reduce_mean((color - color_opt)**2)
    texmask = np.zeros_like(tex)
    tr = tex.shape[1] // 4
    texmask[tr + 13:2 * tr - 13, 25:-25, :] += 1.0
    texmask[25:-25, tr + 13:2 * tr - 13, :] += 1.0
    texloss = (tf.reduce_sum(texmask * (tex - tex_var)**2) /
               np.sum(texmask))**0.5  # RMSE within masked area.

    # Training driven by image-space loss.
    lr_in = tf.placeholder(tf.float32, [])
    train_op = tf.train.AdamOptimizer(lr_in, 0.9,
                                      0.99).minimize(loss, var_list=[tex_var])

    # Open log file.
    log_file = open(out_dir + '/' + log_fn, 'wt') if log_fn else None

    # Render.
    ang = 0.0
    util.init_uninitialized_vars()
    texloss_avg = []
    for it in range(max_iter + 1):
        lr = lr_base * lr_ramp**(float(it) / float(max_iter))

        # Random rotation/translation matrix for optimization.
        r_rot = util.random_rotation_translation(0.25)

        # Smooth rotation for display.
        ang = ang + 0.01
        a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))
        dist = np.random.uniform(0.0, 48.5)

        # Modelview and modelview + projection matrices.
        proj = util.projection(x=0.4, n=1.0, f=200.0)
        r_mv = np.matmul(util.translate(0, 0, -1.5 - dist), r_rot)
        r_mvp = np.matmul(proj, r_mv).astype(np.float32)
        a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot)
        a_mvp = np.matmul(proj, a_mv).astype(np.float32)

        # Run training and measure texture-space RMSE loss.
        texloss_val, _ = util.run([texloss, train_op], {
            mtx_in: r_mvp,
            lr_in: lr
        })
        texloss_avg.append(texloss_val)

        # Print/save log.
        if log_interval and (it % log_interval == 0):
            texloss_val, texloss_avg = np.mean(np.asarray(texloss_avg)), []
            psnr = -10.0 * np.log10(texloss_val**
                                    2)  # PSNR based on average RMSE.
            s = "iter=%d,loss=%f,psnr=%f" % (it, texloss_val, psnr)
            print(s)
            if log_file:
                log_file.write(s + '\n')

        # Show/save result images/textures.
        display_image = display_interval and (it % display_interval) == 0
        save_image = imgsave_interval and (it % imgsave_interval) == 0
        save_texture = texsave_interval and (it % texsave_interval) == 0

        if display_image or save_image:
            result_image = util.run(color_opt, {mtx_in: a_mvp})[0]
        if display_image:
            util.display_image(result_image,
                               size=display_res,
                               title='%d / %d' % (it, max_iter))
        if save_image:
            util.save_image(out_dir + '/' + (imgsave_fn % it), result_image)
        if save_texture:
            util.save_image(out_dir + '/' + (texsave_fn % it),
                            util.run(tex_var)[::-1])

    # Done.
    if log_file:
        log_file.close()
示例#13
0
def fit_env_phong(max_iter          = 1000,
                  log_interval      = 10,
                  display_interval  = None,
                  display_res       = 1024,
                  res               = 1024,
                  lr_base           = 1e-2,
                  lr_ramp           = 1.0,
                  out_dir           = None,
                  log_fn            = None,
                  mp4save_interval  = None,
                  mp4save_fn        = None):

    log_file = None
    writer = None
    if out_dir:
        os.makedirs(out_dir, exist_ok=True)
        if log_fn:
            log_file = open(out_dir + '/' + log_fn, 'wt')
        if mp4save_interval != 0:
            writer = imageio.get_writer(f'{out_dir}/{mp4save_fn}', mode='I', fps=30, codec='libx264', bitrate='16M')
    else:
        mp4save_interval = None

    # Texture adapted from https://github.com/WaveEngine/Samples/tree/master/Materials/EnvironmentMap/Content/Assets/CubeMap.cubemap
    datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
    with np.load(f'{datadir}/envphong.npz') as f:
        pos_idx, pos, normals, env = f.values()
    env = env.astype(np.float32)/255.0
    env = np.stack(env)[:, ::-1].copy()
    print("Mesh has %d triangles and %d vertices." % (pos_idx.shape[0], pos.shape[0]))

    # Move all the stuff to GPU.
    pos_idx = torch.as_tensor(pos_idx, dtype=torch.int32, device='cuda')
    pos = torch.as_tensor(pos, dtype=torch.float32, device='cuda')
    normals = torch.as_tensor(normals, dtype=torch.float32, device='cuda')
    env = torch.as_tensor(env, dtype=torch.float32, device='cuda')

    # Target Phong parameters.
    phong_rgb = np.asarray([1.0, 0.8, 0.6], np.float32)
    phong_exp = 25.0
    phong_rgb_t = torch.as_tensor(phong_rgb, dtype=torch.float32, device='cuda')

    # Learned variables: environment maps, phong color, phong exponent.
    env_var = torch.ones_like(env) * .5
    env_var.requires_grad_()
    phong_var_raw = torch.as_tensor(np.random.uniform(size=[4]), dtype=torch.float32, device='cuda')
    phong_var_raw.requires_grad_()
    phong_var_mul = torch.as_tensor([1.0, 1.0, 1.0, 10.0], dtype=torch.float32, device='cuda')

    # Render.
    ang = 0.0
    imgloss_avg, phong_avg = [], []
    glctx = dr.RasterizeGLContext()
    zero_tensor = torch.as_tensor(0.0, dtype=torch.float32, device='cuda')
    one_tensor = torch.as_tensor(1.0, dtype=torch.float32, device='cuda')

    # Adam optimizer for environment map and phong with a learning rate ramp.
    optimizer = torch.optim.Adam([env_var, phong_var_raw], lr=lr_base)
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda x: lr_ramp**(float(x)/float(max_iter)))

    for it in range(max_iter + 1):
        phong_var = phong_var_raw * phong_var_mul

        # Random rotation/translation matrix for optimization.
        r_rot = util.random_rotation_translation(0.25)

        # Smooth rotation for display.
        ang = ang + 0.01
        a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))

        # Modelview and modelview + projection matrices.
        proj  = util.projection(x=0.4, n=1.0, f=200.0)
        r_mv  = np.matmul(util.translate(0, 0, -3.5), r_rot)
        r_mvp = np.matmul(proj, r_mv).astype(np.float32)
        a_mv  = np.matmul(util.translate(0, 0, -3.5), a_rot)
        a_mvp = np.matmul(proj, a_mv).astype(np.float32)
        a_mvc = a_mvp
        r_mvp = torch.as_tensor(r_mvp, dtype=torch.float32, device='cuda')
        a_mvp = torch.as_tensor(a_mvp, dtype=torch.float32, device='cuda')

        # Solve camera positions.
        a_campos = torch.as_tensor(np.linalg.inv(a_mv)[:3, 3], dtype=torch.float32, device='cuda')
        r_campos = torch.as_tensor(np.linalg.inv(r_mv)[:3, 3], dtype=torch.float32, device='cuda')

        # Random light direction.        
        lightdir = np.random.normal(size=[3])
        lightdir /= np.linalg.norm(lightdir) + 1e-8
        lightdir = torch.as_tensor(lightdir, dtype=torch.float32, device='cuda')

        def render_refl(ldir, cpos, mvp):
            # Transform and rasterize.
            viewvec = pos[..., :3] - cpos[np.newaxis, np.newaxis, :] # View vectors at vertices.
            reflvec = viewvec - 2.0 * normals[np.newaxis, ...] * torch.sum(normals[np.newaxis, ...] * viewvec, -1, keepdim=True) # Reflection vectors at vertices.
            reflvec = reflvec / torch.sum(reflvec**2, -1, keepdim=True)**0.5 # Normalize.
            pos_clip = torch.matmul(pos, mvp.t())[np.newaxis, ...]
            rast_out, rast_out_db = dr.rasterize(glctx, pos_clip, pos_idx, [res, res])
            refl, refld = dr.interpolate(reflvec, rast_out, pos_idx, rast_db=rast_out_db, diff_attrs='all') # Interpolated reflection vectors.

            # Phong light.
            refl = refl / (torch.sum(refl**2, -1, keepdim=True) + 1e-8)**0.5  # Normalize.
            ldotr = torch.sum(-ldir * refl, -1, keepdim=True) # L dot R.

            # Return
            return refl, refld, ldotr, (rast_out[..., -1:] == 0)

        # Render the reflections.
        refl, refld, ldotr, mask = render_refl(lightdir, r_campos, r_mvp)

        # Reference color. No need for AA because we are not learning geometry.
        color = dr.texture(env[np.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube')
        color = color + phong_rgb_t * torch.max(zero_tensor, ldotr) ** phong_exp # Phong.
        color = torch.where(mask, one_tensor, color) # White background.

        # Candidate rendering same up to this point, but uses learned texture and Phong parameters instead.
        color_opt = dr.texture(env_var[np.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube')
        color_opt = color_opt + phong_var[:3] * torch.max(zero_tensor, ldotr) ** phong_var[3] # Phong.
        color_opt = torch.where(mask, one_tensor, color_opt) # White background.

        # Compute loss and train.
        loss = torch.mean((color - color_opt)**2) # L2 pixel loss.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()

        # Collect losses.
        imgloss_avg.append(loss.detach().cpu().numpy())
        phong_avg.append(phong_var.detach().cpu().numpy())

        # Print/save log.
        if log_interval and (it % log_interval == 0):
            imgloss_val, imgloss_avg = np.mean(np.asarray(imgloss_avg, np.float32)), []
            phong_val, phong_avg = np.mean(np.asarray(phong_avg, np.float32), axis=0), []
            phong_rgb_rmse = np.mean((phong_val[:3] - phong_rgb)**2)**0.5
            phong_exp_rel_err = np.abs(phong_val[3] - phong_exp)/phong_exp
            s = "iter=%d,phong_rgb_rmse=%f,phong_exp_rel_err=%f,img_rmse=%f" % (it, phong_rgb_rmse, phong_exp_rel_err, imgloss_val)
            print(s)
            if log_file:
                log_file.write(s + '\n')

        # Show/save result image.        
        display_image = display_interval and (it % display_interval == 0)
        save_mp4 = mp4save_interval and (it % mp4save_interval == 0)

        if display_image or save_mp4:
            lightdir = np.asarray([.8, -1., .5, 0.0])
            lightdir = np.matmul(a_mvc, lightdir)[:3]
            lightdir /= np.linalg.norm(lightdir)
            lightdir = torch.as_tensor(lightdir, dtype=torch.float32, device='cuda')
            refl, refld, ldotr, mask = render_refl(lightdir, a_campos, a_mvp)
            color_opt = dr.texture(env_var[np.newaxis, ...], refl, uv_da=refld, filter_mode='linear-mipmap-linear', boundary_mode='cube')
            color_opt = color_opt + phong_var[:3] * torch.max(zero_tensor, ldotr) ** phong_var[3]
            color_opt = torch.where(mask, one_tensor, color_opt)
            result_image = color_opt.detach()[0].cpu().numpy()
            if display_image:
                util.display_image(result_image, size=display_res, title='%d / %d' % (it, max_iter))
            if save_mp4:
                writer.append_data(np.clip(np.rint(result_image*255.0), 0, 255).astype(np.uint8))

    # Done.
    if writer is not None:
        writer.close()
    if log_file:
        log_file.close()
def fit_pose(max_iter=10000,
             repeats=1,
             log_interval=10,
             display_interval=None,
             display_res=512,
             lr_base=0.01,
             lr_falloff=1.0,
             nr_base=1.0,
             nr_falloff=1e-4,
             grad_phase_start=0.5,
             resolution=256,
             out_dir='.',
             log_fn=None,
             imgsave_interval=None,
             imgsave_fn=None):

    if out_dir:
        os.makedirs(out_dir, exist_ok=True)

    datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
    with np.load(f'{datadir}/cube_p.npz') as f:
        pos_idx, pos, col_idx, col = f.values()
    print("Mesh has %d triangles and %d vertices." %
          (pos_idx.shape[0], pos.shape[0]))

    # Transformation matrix input to TF graph.
    mtx_in = tf.placeholder(tf.float32, [4, 4])

    # Pose matrix input to TF graph.
    pose_in = tf.placeholder(tf.float32, [4])  # Quaternion.
    noise_in = tf.placeholder(tf.float32, [4])  # Mollification noise.

    # Setup TF graph for reference.
    mtx_total = tf.matmul(mtx_in, q_to_mtx_tf(pose_in))
    pos_clip = tf.matmul(pos, mtx_total, transpose_b=True)[tf.newaxis, ...]
    rast_out, _ = dr.rasterize(pos_clip,
                               pos_idx,
                               resolution=[resolution, resolution],
                               output_db=False)
    color, _ = dr.interpolate(col[tf.newaxis, ...], rast_out, col_idx)
    color = dr.antialias(color, rast_out, pos_clip, pos_idx)

    # Setup TF graph for optimization candidate.
    pose_var = tf.get_variable('pose',
                               initializer=tf.zeros_initializer(),
                               shape=[4])
    pose_var_in = tf.placeholder(tf.float32, [4])
    pose_set = tf.assign(pose_var, pose_var_in)
    pose_norm_op = tf.assign(
        pose_var,
        pose_var / tf.reduce_sum(pose_var**2)**0.5)  # Normalization operation.
    pose_total = q_mul_tf(pose_var, noise_in)
    mtx_total_opt = tf.matmul(mtx_in, q_to_mtx_tf(pose_total))
    pos_clip_opt = tf.matmul(pos, mtx_total_opt, transpose_b=True)[tf.newaxis,
                                                                   ...]
    rast_out_opt, _ = dr.rasterize(pos_clip_opt,
                                   pos_idx,
                                   resolution=[resolution, resolution],
                                   output_db=False)
    color_opt, _ = dr.interpolate(col[tf.newaxis, ...], rast_out_opt, col_idx)
    color_opt = dr.antialias(color_opt, rast_out_opt, pos_clip_opt, pos_idx)

    # Image-space loss.
    diff = (color_opt - color)**2  # L2 norm.
    diff = tf.tanh(5.0 *
                   tf.reduce_max(diff, axis=-1))  # Add some oomph to the loss.
    loss = tf.reduce_mean(diff)
    lr_in = tf.placeholder(tf.float32, [])
    train_op = tf.train.AdamOptimizer(lr_in, 0.9,
                                      0.999).minimize(loss,
                                                      var_list=[pose_var])

    # Open log file.
    log_file = open(out_dir + '/' + log_fn, 'wt') if log_fn else None

    # Repeats.
    for rep in range(repeats):

        # Optimize.
        util.init_uninitialized_vars()
        loss_best = np.inf
        pose_best = None
        for it in range(max_iter + 1):
            # Modelview + projection matrix.
            mvp = np.matmul(util.projection(x=0.4),
                            util.translate(0, 0, -3.5)).astype(np.float32)

            # Learning and noise rate scheduling.
            itf = 1.0 * it / max_iter
            lr = lr_base * lr_falloff**itf
            nr = nr_base * nr_falloff**itf

            # Noise input.
            if itf >= grad_phase_start:
                noise = q_unit()
            else:
                noise = q_scale(q_rnd(), nr)
                noise = q_mul(noise, q_rnd_S4())  # Orientation noise.

            # Initialize optimization.
            if it == 0:
                pose_target = q_rnd()
                util.run(pose_set, {pose_var_in: q_rnd()})
                util.run(pose_norm_op)
                util.run(loss, {
                    mtx_in: mvp,
                    pose_in: pose_target,
                    noise_in: noise
                })  # Pipecleaning pass.

            # Run gradient training step.
            if itf >= grad_phase_start:
                util.run(train_op, {
                    mtx_in: mvp,
                    pose_in: pose_target,
                    noise_in: noise,
                    lr_in: lr
                })
                util.run(pose_norm_op)

            # Measure image-space loss and update best found pose.
            loss_val = util.run(loss, {
                mtx_in: mvp,
                pose_in: pose_target,
                noise_in: noise,
                lr_in: lr
            })
            if loss_val < loss_best:
                pose_best = util.run(pose_total, {noise_in: noise})
                if loss_val > 0.0:
                    loss_best = loss_val
            else:
                # Return to best pose in the greedy phase.
                if itf < grad_phase_start:
                    util.run(pose_set, {pose_var_in: pose_best})

            # Print/save log.
            if log_interval and (it % log_interval == 0):
                err = q_angle_deg(util.run(pose_var), pose_target)
                ebest = q_angle_deg(pose_best, pose_target)
                s = "rep=%d,iter=%d,err=%f,err_best=%f,loss=%f,loss_best=%f,lr=%f,nr=%f" % (
                    rep, it, err, ebest, loss_val, loss_best, lr, nr)
                print(s)
                if log_file:
                    log_file.write(s + "\n")

            # Show/save image.
            display_image = display_interval and (it % display_interval == 0)
            save_image = imgsave_interval and (it % imgsave_interval == 0)

            if display_image or save_image:
                img_ref, img_opt = util.run([color, color_opt], {
                    mtx_in: mvp,
                    pose_in: pose_target,
                    noise_in: noise
                })
                img_best, = util.run([color_opt], {
                    mtx_in: mvp,
                    pose_in: pose_best,
                    noise_in: q_unit()
                })
                img_ref = img_ref[0]
                img_opt = img_opt[0]
                img_best = img_best[0]
                result_image = np.concatenate([img_ref, img_best, img_opt],
                                              axis=1)

            if display_image:
                util.display_image(result_image,
                                   size=display_res,
                                   title='(%d) %d / %d' % (rep, it, max_iter))
            if save_image:
                util.save_image(out_dir + '/' + (imgsave_fn % (rep, it)),
                                result_image)

    # All repeats done.
    if log_file:
        log_file.close()
示例#15
0
    def tick(self):
        # Read from the camera.
        self.camera_frame = self.camera.get()
        if self.live_segment:
            if self.live_segment_ready:
                self.live_segment_ready = False
                self.model_process.submit(COMMAND_SEGMENT,
                                          (EVENT_CAMERA_SEGMENT, self.camera_frame))
        else:
            self.camera_segmented = None

        # Display the current camera frame.
        camera_combined = self.camera_frame
        if self.camera_segmented is not None:
            camera_combined = self.camera_frame // 3 + self.camera_segmented
        util.display_image(CAM_WINDOW, camera_combined)

        # Capture a pressed key.
        self.key = cv2.waitKey(1) & 0xff

        # Toggle live segmenting if the live segmenting key is pressed.
        if self.key_pressed(KEY_LIVE):
            self.live_segment = not self.live_segment

        # Capture a frame if the capture key is pressed.
        if self.key_pressed(KEY_CAPTURE):
            self.capture(self.camera_frame)

        # Open a file if the open key is pressed.
        if self.key_pressed(KEY_OPEN):
            path = input('path> ')
            try:
                image = cv2.imread(path)
                image = util.convert(image)
                image = cv2.resize(image, (CAM_WIDTH, CAM_HEIGHT))
                self.capture(image)
                print('Image loaded')
            except:
                print('Invalid path')

        # Export the segmented image if the export key is pressed.
        if self.key_pressed(KEY_EXPORT):
            if self.canvas is not None:
                path = input('path> ')
                try:
                    cv2.imwrite(path, util.convert(self.canvas.get_combined()))
                    print('Image saved')
                except:
                    print('Invalid path')

        # Fill the canvas if the fill key is pressed.
        if self.key_pressed(KEY_FILL):
            if self.canvas is not None:
                self.canvas.fill()

        # Process the segment map if the process key is pressed.
        if self.key_pressed(KEY_PROCESS):
            if self.canvas is not None:
                self.process(self.canvas.get_map())

        # Save the result if the save key is pressed.
        if self.key_pressed(KEY_SAVE):
            if self.im_processed is not None:
                path = input('path> ')
                try:
                    cv2.imwrite(path, util.convert(self.im_processed))
                    print('Image saved')
                except:
                    print('Invalid path')

        # Segment the result if the test key is pressed.
        if self.key_pressed(KEY_TEST):
            if self.im_processed is not None:
                self.capture(self.im_processed)

        # Quit if the quit key is pressed.
        if self.key_pressed(KEY_QUIT):
            self.camera.stop()
            self.model_process.stop()
            return False

        # Tick the model process.
        self.model_process.tick()

        return True
def fit_env_phong(max_iter=1000,
                  log_interval=10,
                  display_interval=None,
                  display_res=1024,
                  res=1024,
                  lr_base=1e-2,
                  lr_ramp=1.0,
                  out_dir='.',
                  log_fn=None,
                  imgsave_interval=None,
                  imgsave_fn=None):

    if out_dir:
        os.makedirs(out_dir, exist_ok=True)

    # Texture adapted from https://github.com/WaveEngine/Samples/tree/master/Materials/EnvironmentMap/Content/Assets/CubeMap.cubemap
    datadir = f'{pathlib.Path(__file__).absolute().parents[1]}/data'
    with np.load(f'{datadir}/envphong.npz') as f:
        pos_idx, pos, normals, env = f.values()
    env = env.astype(np.float32) / 255.0
    print("Mesh has %d triangles and %d vertices." %
          (pos_idx.shape[0], pos.shape[0]))

    # Target Phong parameters.
    phong_rgb = np.asarray([1.0, 0.8, 0.6], np.float32)
    phong_exp = 25.0

    # Inputs to TF graph.
    mtx_in = tf.placeholder(tf.float32, [4, 4])
    invmtx_in = tf.placeholder(tf.float32, [4, 4])  # Inverse.
    campos_in = tf.placeholder(tf.float32,
                               [3])  # Camera position in world space.
    lightdir_in = tf.placeholder(tf.float32, [3])  # Light direction.

    # Learned variables: environment maps, phong color, phong exponent.
    env_var = tf.get_variable('env_var',
                              initializer=tf.constant_initializer(0.5),
                              shape=env.shape)
    phong_var_raw = tf.get_variable('phong_var',
                                    initializer=tf.random_uniform_initializer(
                                        0.0, 1.0),
                                    shape=[4])  # R, G, B, exp.
    phong_var = phong_var_raw * [1.0, 1.0, 1.0, 10.0
                                 ]  # Faster learning rate for the exponent.

    # Transform and rasterize.
    viewvec = pos[..., :3] - campos_in[
        np.newaxis, np.newaxis, :]  # View vectors at vertices.
    reflvec = viewvec - 2.0 * normals[tf.newaxis, ...] * tf.reduce_sum(
        normals[tf.newaxis, ...] * viewvec, axis=-1,
        keepdims=True)  # Reflection vectors at vertices.
    reflvec = reflvec / tf.reduce_sum(reflvec**2, axis=-1,
                                      keepdims=True)**0.5  # Normalize.
    pos_clip = tf.matmul(pos, mtx_in, transpose_b=True)[tf.newaxis, ...]
    rast_out, rast_out_db = dr.rasterize(pos_clip, pos_idx, [res, res])
    refl, refld = dr.interpolate(
        reflvec, rast_out, pos_idx, rast_db=rast_out_db,
        diff_attrs='all')  # Interpolated reflection vectors.

    # Phong light.
    refl = refl / tf.reduce_sum(refl**2, axis=-1,
                                keepdims=True)**0.5  # Normalize.
    ldotr = tf.reduce_sum(-lightdir_in * refl, axis=-1,
                          keepdims=True)  # L dot R.

    # Reference color. No need for AA because we are not learning geometry.
    env = np.stack(env)[:, ::-1]
    color = dr.texture(env[np.newaxis, ...],
                       refl,
                       refld,
                       filter_mode='linear-mipmap-linear',
                       boundary_mode='cube')
    color = tf.reduce_sum(tf.stack(color), axis=0)
    color = color + phong_rgb * tf.maximum(0.0, ldotr)**phong_exp  # Phong.
    color = tf.maximum(
        color,
        1.0 - tf.clip_by_value(rast_out[..., -1:], 0, 1))  # White background.

    # Candidate rendering same up to this point, but uses learned texture and Phong parameters instead.
    color_opt = dr.texture(env_var[tf.newaxis, ...],
                           refl,
                           uv_da=refld,
                           filter_mode='linear-mipmap-linear',
                           boundary_mode='cube')
    color_opt = tf.reduce_sum(tf.stack(color_opt), axis=0)
    color_opt = color_opt + phong_var[:3] * tf.maximum(
        0.0, ldotr)**phong_var[3]  # Phong.
    color_opt = tf.maximum(
        color_opt,
        1.0 - tf.clip_by_value(rast_out[..., -1:], 0, 1))  # White background.

    # Training.
    loss = tf.reduce_mean((color - color_opt)**2)  # L2 pixel loss.
    lr_in = tf.placeholder(tf.float32, [])
    train_op = tf.train.AdamOptimizer(lr_in, 0.9, 0.99).minimize(
        loss, var_list=[env_var, phong_var_raw])

    # Open log file.
    log_file = open(out_dir + '/' + log_fn, 'wt') if log_fn else None

    # Render.
    ang = 0.0
    util.init_uninitialized_vars()
    imgloss_avg, phong_avg = [], []
    for it in range(max_iter + 1):
        lr = lr_base * lr_ramp**(float(it) / float(max_iter))

        # Random rotation/translation matrix for optimization.
        r_rot = util.random_rotation_translation(0.25)

        # Smooth rotation for display.
        ang = ang + 0.01
        a_rot = np.matmul(util.rotate_x(-0.4), util.rotate_y(ang))

        # Modelview and modelview + projection matrices.
        proj = util.projection(x=0.4, n=1.0, f=200.0)
        r_mv = np.matmul(util.translate(0, 0, -3.5), r_rot)
        r_mvp = np.matmul(proj, r_mv).astype(np.float32)
        a_mv = np.matmul(util.translate(0, 0, -3.5), a_rot)
        a_mvp = np.matmul(proj, a_mv).astype(np.float32)

        # Solve camera positions.
        a_campos = np.linalg.inv(a_mv)[:3, 3]
        r_campos = np.linalg.inv(r_mv)[:3, 3]

        # Random light direction.
        lightdir = np.random.normal(size=[3])
        lightdir /= np.linalg.norm(lightdir) + 1e-8

        # Run training and measure image-space RMSE loss.
        imgloss_val, phong_val, _ = util.run(
            [loss, phong_var, train_op], {
                mtx_in: r_mvp,
                invmtx_in: np.linalg.inv(r_mvp),
                campos_in: r_campos,
                lightdir_in: lightdir,
                lr_in: lr
            })
        imgloss_avg.append(imgloss_val**0.5)
        phong_avg.append(phong_val)

        # Print/save log.
        if log_interval and (it % log_interval == 0):
            imgloss_val, imgloss_avg = np.mean(
                np.asarray(imgloss_avg, np.float32)), []
            phong_val, phong_avg = np.mean(np.asarray(phong_avg, np.float32),
                                           axis=0), []
            phong_rgb_rmse = np.mean((phong_val[:3] - phong_rgb)**2)**0.5
            phong_exp_rel_err = np.abs(phong_val[3] - phong_exp) / phong_exp
            s = "iter=%d,phong_rgb_rmse=%f,phong_exp_rel_err=%f,img_rmse=%f" % (
                it, phong_rgb_rmse, phong_exp_rel_err, imgloss_val)
            print(s)
            if log_file:
                log_file.write(s + '\n')

        # Show/save result image.
        display_image = display_interval and (it % display_interval == 0)
        save_image = imgsave_interval and (it % imgsave_interval == 0)

        if display_image or save_image:
            result_image = util.run(
                color_opt, {
                    mtx_in: a_mvp,
                    invmtx_in: np.linalg.inv(a_mvp),
                    campos_in: a_campos,
                    lightdir_in: lightdir
                })[0]
        if display_image:
            util.display_image(result_image,
                               size=display_res,
                               title='%d / %d' % (it, max_iter))
        if save_image:
            util.save_image(out_dir + '/' + (imgsave_fn % it), result_image)

    # Done.
    if log_file:
        log_file.close()