def forward(self, source_image, kp_driving, kp_source): if self.scale_factor != 1: source_image = F.interpolate(source_image, scale_factor=(1, self.scale_factor, self.scale_factor)) prediction = self.mask_embedding(source_image, kp_driving, kp_source) for block in self.group_blocks: prediction = block(prediction) prediction = F.leaky_relu(prediction, 0.2) prediction = self.hourglass(prediction) bs, _, d, h, w = prediction.shape if self.use_mask: mask = prediction[:, :(self.num_kp + 1)] mask = F.softmax(mask, dim=1) mask = mask.unsqueeze(2) difference_embedding = self.difference_embedding(source_image, kp_driving, kp_source) difference_embedding = difference_embedding.view(bs, self.num_kp + 1, 2, d, h, w) deformations_relative = (difference_embedding * mask).sum(dim=1) else: deformations_relative = 0 if self.use_correction: correction = prediction[:, -2:] else: correction = 0 deformations_relative = deformations_relative + correction deformations_relative = deformations_relative.permute(0, 2, 3, 4, 1) coordinate_grid = make_coordinate_grid((h, w), type=deformations_relative.type()) coordinate_grid = coordinate_grid.view(1, 1, h, w, 2) deformation = deformations_relative + coordinate_grid z_coordinate = torch.zeros(deformation.shape[:-1] + (1,)).type(deformation.type()) return torch.cat([deformation, z_coordinate], dim=-1)
def __init__(self, bs, **kwargs): # noise = np.random.normal(loc=0, scale=kwargs['sigma_affine'], size=(bs, 2, 3)) noise = paddle.distribution.Normal(loc=[0], scale=[kwargs['sigma_affine'] ]).sample([bs, 2, 3]) noise = noise.reshape((bs, 2, 3)) if TEST_MODE: noise = paddle.to_tensor(np.ones((bs, 2, 3)).astype(np.float32)) self.theta = noise + paddle.tensor.eye(2, 3, dtype='float32').reshape( (1, 2, 3)) self.bs = bs if ('sigma_tps' in kwargs) and ('points_tps' in kwargs): self.tps = True self.control_points = make_coordinate_grid( (kwargs['points_tps'], kwargs['points_tps'])).unsqueeze(0) if TEST_MODE: self.control_params = paddle.to_tensor( np.ones( (bs, 1, kwargs['points_tps']**2)).astype(np.float32)) else: buf = paddle.distribution.Normal( loc=[0], scale=[kwargs['sigma_tps'] ]).sample([bs, 1, kwargs['points_tps']**2]) self.control_params = buf.reshape( (bs, 1, kwargs['points_tps']**2)) else: self.tps = False
def transform_frame(self, frame): grid = make_coordinate_grid(frame.shape[2:], type=frame.type()).unsqueeze(0) grid = grid.view(1, frame.shape[2] * frame.shape[3], 2) grid = self.warp_coordinates(grid).view(self.bs, frame.shape[2], frame.shape[3], 2) return F.grid_sample(frame, grid, padding_mode="reflection")
def create_sparse_motions(self, source_image, kp_driving, kp_source): """ Eq 4. in the paper T_{s<-d}(z) """ bs, _, h, w = source_image.shape identity_grid = make_coordinate_grid((h, w)) identity_grid = identity_grid.reshape((1, 1, h, w, 2)) coordinate_grid = identity_grid - kp_driving['value'].reshape( (bs, self.num_kp, 1, 1, 2)) if 'jacobian' in kp_driving: jacobian = paddle.matmul(kp_source['jacobian'], paddle.inverse(kp_driving['jacobian'])) dim_1, dim_2, *else_dim = jacobian.shape jacobian = jacobian.reshape((-1, *else_dim)) jacobian = jacobian.unsqueeze(-3).unsqueeze(-3) jacobian = jacobian.tile((1, h, w, 1, 1)) _, _, *dimm = coordinate_grid.shape coordinate_grid = coordinate_grid.reshape((-1, *dimm)) coordinate_grid = paddle.matmul(jacobian, coordinate_grid.unsqueeze(-1)) coordinate_grid = coordinate_grid.squeeze(-1) coordinate_grid = coordinate_grid.reshape( (dim_1, dim_2, *(coordinate_grid.shape[1:]))) driving_to_source = coordinate_grid + kp_source['value'].reshape( (bs, self.num_kp, 1, 1, 2)) # adding background feature identity_grid = identity_grid.tile((bs, 1, 1, 1, 1)) sparse_motions = paddle.concat([identity_grid, driving_to_source], axis=1) return sparse_motions
def kp2gaussian(kp, spatial_size, kp_variance='matrix'): """ Transform a keypoint into gaussian like representation """ mean = kp['mean'] coordinate_grid = make_coordinate_grid(spatial_size, mean.type()) number_of_leading_dimensions = len(mean.shape) - 1 shape = (1, ) * number_of_leading_dimensions + coordinate_grid.shape coordinate_grid = coordinate_grid.view(*shape) repeats = mean.shape[:number_of_leading_dimensions] + (1, 1, 1) coordinate_grid = coordinate_grid.repeat(*repeats) # Preprocess kp shape shape = mean.shape[:number_of_leading_dimensions] + (1, 1, 2) mean = mean.view(*shape) mean_sub = (coordinate_grid - mean) if kp_variance == 'matrix': var = kp['var'] inv_var = matrix_inverse(var) shape = inv_var.shape[:number_of_leading_dimensions] + (1, 1, 2, 2) inv_var = inv_var.view(*shape) under_exp = torch.matmul(torch.matmul(mean_sub.unsqueeze(-2), inv_var), mean_sub.unsqueeze(-1)) under_exp = under_exp.squeeze(-1).squeeze(-1) out = torch.exp(-0.5 * under_exp) elif kp_variance == 'single': out = torch.exp(-0.5 * (mean_sub**2).sum(-1) / kp['var']) else: out = torch.exp(-0.5 * (mean_sub**2).sum(-1) / kp_variance) return out
def transform_frame(self, frame): grid = fluid.layers.unsqueeze( make_coordinate_grid(frame.shape[2:], 'float32'), [0]) grid = fluid.layers.reshape(grid, (1, frame.shape[2] * frame.shape[3], 2)) grid = fluid.layers.reshape( self.warp_coordinates(grid), (self.bs, frame.shape[2], frame.shape[3], 2)) if TEST_MODE: bf = fluid.layers.grid_sampler(frame, grid) logging.warning( 'TEST MODE Output of fluid.layers.grid_sampler == 2. model:L152' ) return fluid.dygraph.to_variable( np.ones(bf.shape).astype(np.float32) * 2) # 0.0.0c 分支等待更新 elif PP_v2: # return fluid.layers.grid_sampler(frame, grid) return fluid.layers.grid_sampler(frame, grid, mode='bilinear', padding_mode='reflect', align_corners=False) else: return fluid.layers.grid_sampler(frame, grid)
def create_sparse_motions(self, source_image, kp_driving, kp_source): """ Eq 4. in the paper T_{s<-d}(z) """ bs, _, h, w = source_image.shape identity_grid = make_coordinate_grid((h, w), type=kp_source['value'].type()) identity_grid = identity_grid.view(1, 1, h, w, 2) coordinate_grid = identity_grid - kp_driving['value'].view( bs, self.num_kp, 1, 1, 2) if 'jacobian' in kp_driving: jacobian = torch.matmul(kp_source['jacobian'], torch.inverse(kp_driving['jacobian'])) jacobian = jacobian.unsqueeze(-3).unsqueeze(-3) jacobian = jacobian.repeat(1, 1, h, w, 1, 1) coordinate_grid = torch.matmul(jacobian, coordinate_grid.unsqueeze(-1)) coordinate_grid = coordinate_grid.squeeze(-1) driving_to_source = coordinate_grid + kp_source['value'].view( bs, self.num_kp, 1, 1, 2) #adding background feature identity_grid = identity_grid.repeat(bs, 1, 1, 1, 1) sparse_motions = torch.cat([identity_grid, driving_to_source], dim=1) return sparse_motions
def forward(self, appearance_frame, kp_video, kp_appearance): bs, _, _, h, w = appearance_frame.shape _, d, num_kp, _ = kp_video['mean'].shape coordinate_grid = make_coordinate_grid((h, w), type=appearance_frame.type()) coordinate_grid = coordinate_grid.view(1, 1, h, w, 2).repeat(bs, d, 1, 1, 1) z_coordinate = torch.zeros(coordinate_grid.shape[:-1] + (1,)).type(coordinate_grid.type()) return torch.cat([coordinate_grid, z_coordinate], dim=-1)
def gaussian2kp(self, heatmap): shape = heatmap.shape heatmap = heatmap.unsqueeze(-1) grid = make_coordinate_grid(shape[2:], heatmap.type()).unsqueeze_(0).unsqueeze_(0) value = (heatmap * grid).sum(dim=(2, 3)) kp = {'value': value} return kp
def transform_frame(self, frame): grid = make_coordinate_grid(frame.shape[2:], 'float32').unsqueeze(0) grid = grid.reshape((1, frame.shape[2] * frame.shape[3], 2)) grid = self.warp_coordinates(grid).reshape( (self.bs, frame.shape[2], frame.shape[3], 2)) return F.grid_sample(frame, grid, mode='bilinear', padding_mode='reflection', align_corners=True)
def gaussian2kp(self, heatmap): """ Extract the mean and the variance from a heatmap """ shape = heatmap.shape heatmap = heatmap.unsqueeze(-1) grid = make_coordinate_grid(shape[2:], heatmap.type()).unsqueeze_(0).unsqueeze_(0) result = (heatmap * grid).sum(dim=(2, 3)) return result
def gaussian2kp(self, heatmap): """ Extract the mean and from a heatmap """ shape = heatmap.shape heatmap = heatmap.unsqueeze(-1) grid = make_coordinate_grid(shape[2:], heatmap.type()) grid = grid.unsqueeze(0) grid = grid.unsqueeze(0) value = (heatmap * grid).sum(dim=(2, 3)) return value
def __init__(self, bs, **kwargs): noise = torch.normal(mean=0, std=kwargs['sigma_affine'] * torch.ones([bs, 2, 3])) self.theta = noise + torch.eye(2, 3).view(1, 2, 3) self.bs = bs if ('sigma_tps' in kwargs) and ('points_tps' in kwargs): self.tps = True self.control_points = make_coordinate_grid((kwargs['points_tps'], kwargs['points_tps']), type=noise.type()) self.control_points = self.control_points.unsqueeze(0) self.control_params = torch.normal(mean=0, std=kwargs['sigma_tps'] * torch.ones([bs, 1, kwargs['points_tps'] ** 2])) else: self.tps = False
def gaussian2kp(self, heatmap): """ Extract the mean and from a heatmap """ shape = heatmap.shape # B, 10, 58, 58 heatmap = heatmap.unsqueeze(-1) # B, 10, 58, 58, 1 grid = make_coordinate_grid(shape[2:], heatmap.type()).unsqueeze_(0).unsqueeze_( 0) # 1, 1, 58, 58, 2 value = (heatmap * grid).sum(dim=(2, 3)) # B, 10, 2 kp = {'value': value} return kp
def region2affine(self, region): shape = region.shape region = region.unsqueeze(-1) grid = make_coordinate_grid(shape[2:], region.type()).unsqueeze_(0).unsqueeze_(0) mean = (region * grid).sum(dim=(2, 3)) region_params = {'shift': mean} if self.pca_based: mean_sub = grid - mean.unsqueeze(-2).unsqueeze(-2) covar = torch.matmul(mean_sub.unsqueeze(-1), mean_sub.unsqueeze(-2)) covar = covar * region.unsqueeze(-1) covar = covar.sum(dim=(2, 3)) region_params['covar'] = covar return region_params
def create_sparse_motions(self, source_image, kp_driving_value, kp_driving_jacobian, kp_source_value, kp_source_jacobian): """ Eq 4. in the paper T_{s<-d}(z) """ bs, _, h, w = source_image.shape identity_grid = make_coordinate_grid((h, w), type=kp_source_value.type()) identity_grid = identity_grid.view(1, 1, h, w, 2) coordinate_grid = identity_grid - kp_driving_value.view(bs, self.num_kp, 1, 1, 2) if kp_driving_value is not None: # TODO: Replace torch.inverse not implemented in coreml jacobian = torch.matmul(kp_source_jacobian, torch.inverse(kp_driving_jacobian)) jacobian = jacobian.unsqueeze(-3).unsqueeze(-3) jacobian = jacobian.repeat(1, 1, h, w, 1, 1) coordinate_grid = torch.matmul(jacobian, coordinate_grid.unsqueeze(-1)) coordinate_grid = coordinate_grid.squeeze(-1) driving_to_source = coordinate_grid + kp_source_value.view(bs, self.num_kp, 1, 1, 2) #adding background feature identity_grid = identity_grid.repeat(bs, 1, 1, 1, 1) sparse_motions = torch.cat([identity_grid, driving_to_source], dim=1) return sparse_motions
def kp2gaussian(kp, spatial_size, kp_variance): """ Transform a keypoint into gaussian like representation """ mean = kp['value'] coordinate_grid = make_coordinate_grid(spatial_size, mean.type()) number_of_leading_dimensions = len(mean.shape) - 1 shape = (1, ) * number_of_leading_dimensions + coordinate_grid.shape coordinate_grid = coordinate_grid.view(*shape) repeats = mean.shape[:number_of_leading_dimensions] + (1, 1, 1) coordinate_grid = coordinate_grid.repeat(*repeats) # Preprocess kp shape shape = mean.shape[:number_of_leading_dimensions] + (1, 1, 2) mean = mean.view(*shape) mean_sub = (coordinate_grid - mean) out = torch.exp(-0.5 * (mean_sub**2).sum(-1) / kp_variance) return out
def create_sparse_motions(self, source_image, driving_region_params, source_region_params, bg_params=None): bs, _, h, w = source_image.shape identity_grid = make_coordinate_grid( (h, w), type=source_region_params['shift'].type()) identity_grid = identity_grid.view(1, 1, h, w, 2) coordinate_grid = identity_grid - driving_region_params['shift'].view( bs, self.num_regions, 1, 1, 2) if 'affine' in driving_region_params: affine = torch.matmul( source_region_params['affine'], torch.inverse(driving_region_params['affine'])) if self.revert_axis_swap: affine = affine * torch.sign(affine[:, :, 0:1, 0:1]) affine = affine.unsqueeze(-3).unsqueeze(-3) affine = affine.repeat(1, 1, h, w, 1, 1) coordinate_grid = torch.matmul(affine, coordinate_grid.unsqueeze(-1)) coordinate_grid = coordinate_grid.squeeze(-1) driving_to_source = coordinate_grid + source_region_params[ 'shift'].view(bs, self.num_regions, 1, 1, 2) # adding background feature if bg_params is None: bg_grid = identity_grid.repeat(bs, 1, 1, 1, 1) else: bg_grid = identity_grid.repeat(bs, 1, 1, 1, 1) bg_grid = to_homogeneous(bg_grid) bg_grid = torch.matmul(bg_params.view(bs, 1, 1, 1, 3, 3), bg_grid.unsqueeze(-1)).squeeze(-1) bg_grid = from_homogeneous(bg_grid) sparse_motions = torch.cat([bg_grid, driving_to_source], dim=1) return sparse_motions
def segment_motion(self, seg_target, seg_source): bs, _, h, w = seg_target['segmentation'].shape identity_grid = make_coordinate_grid((h, w), type=seg_source['shift'].type()) identity_grid = identity_grid.view(1, 1, h, w, 2) coordinate_grid = identity_grid - seg_target['shift'].view( bs, self.num_segments, 1, 1, 2) if 'affine' in seg_target: affine = torch.matmul(seg_source['affine'], torch.inverse(seg_target['affine'])) affine = affine.unsqueeze(-3).unsqueeze(-3) affine = affine.repeat(1, 1, h, w, 1, 1) coordinate_grid = torch.matmul(affine, coordinate_grid.unsqueeze(-1)) coordinate_grid = coordinate_grid.squeeze(-1) target_to_source = coordinate_grid + seg_source['shift'].view( bs, self.num_segments, 1, 1, 2) # adding background feature identity_grid = identity_grid.repeat(bs, 1, 1, 1, 1) return torch.cat([identity_grid, target_to_source], dim=1)
def gaussian2kp(heatmap, kp_variance='matrix', clip_variance=None): """ Extract the mean and the variance from a heatmap """ shape = heatmap.shape #adding small eps to avoid 'nan' in variance heatmap = heatmap.unsqueeze(-1) + 1e-7 grid = make_coordinate_grid( shape[3:], heatmap.type()).unsqueeze_(0).unsqueeze_(0).unsqueeze_(0) mean = (heatmap * grid).sum(dim=(3, 4)) kp = {'mean': mean.permute(0, 2, 1, 3)} if kp_variance == 'matrix': mean_sub = grid - mean.unsqueeze(-2).unsqueeze(-2) var = torch.matmul(mean_sub.unsqueeze(-1), mean_sub.unsqueeze(-2)) var = var * heatmap.unsqueeze(-1) var = var.sum(dim=(3, 4)) var = var.permute(0, 2, 1, 3, 4) if clip_variance: min_norm = torch.tensor(clip_variance).type(var.type()) sg = smallest_singular(var).unsqueeze(-1) var = torch.max(min_norm, sg) * var / sg kp['var'] = var elif kp_variance == 'single': mean_sub = grid - mean.unsqueeze(-2).unsqueeze(-2) var = mean_sub**2 var = var * heatmap var = var.sum(dim=(3, 4)) var = var.mean(dim=-1, keepdim=True) var = var.unsqueeze(-1) var = var.permute(0, 2, 1, 3, 4) kp['var'] = var return kp
def __init__(self, bs, **kwargs): # noise = np.random.normal(loc=0, scale=kwargs['sigma_affine'], size=(bs, 2, 3)) noise = fluid.layers.Normal(loc=[0], scale=[kwargs['sigma_affine'] ]).sample([bs, 2, 3]) noise = fluid.layers.reshape(noise, (bs, 2, 3)) if TEST_MODE: logging.warning( 'TEST MODE: Transform.noise == np.ones model.py:L135') noise = dygraph.to_variable(np.ones((bs, 2, 3)).astype(np.float32)) self.theta = noise + fluid.layers.reshape(fluid.layers.eye(2, 3), (1, 2, 3)) self.bs = bs if ('sigma_tps' in kwargs) and ('points_tps' in kwargs): self.tps = True self.control_points = make_coordinate_grid( (kwargs['points_tps'], kwargs['points_tps']), 'float32') self.control_points = fluid.layers.unsqueeze( self.control_points, [0]) if TEST_MODE: logging.warning( 'TEST MODE: Transform.control_params == np.ones model.py:L144' ) self.control_params = dygraph.to_variable( np.ones((bs, 1, kwargs['points_tps']**2))) else: buf = fluid.layers.Normal(loc=[0], scale=[kwargs['sigma_tps']]).sample( [bs, 1, kwargs['points_tps']**2]) self.control_params = fluid.layers.reshape( buf, (bs, 1, kwargs['points_tps']**2)) # self.control_params = dygraph.to_variable( # np.random.normal(loc=0, scale=kwargs['sigma_tps'], size=(bs, 1, kwargs['points_tps'] ** 2))) else: self.tps = False
def forward(self, source_image, kp_driving, kp_source): if self.scale_factor != 1: source_image = F.interpolate(source_image, scale_factor=(1, self.scale_factor, self.scale_factor)) spatial_size = source_image.shape[3:] bs, _, _, h, w = source_image.shape _, d, num_kp, _ = kp_driving['mean'].shape inputs = [] if self.use_heatmap: heatmap = self.normalize_heatmap( kp2gaussian(kp_driving, spatial_size=spatial_size, kp_variance=self.kp_variance)) if self.heatmap_type == 'difference': heatmap_appearance = self.normalize_heatmap( kp2gaussian(kp_source, spatial_size=spatial_size, kp_variance=self.kp_variance)) heatmap = heatmap - heatmap_appearance if self.add_bg_feature_map: zeros = torch.zeros(bs, d, 1, h, w).type(heatmap.type()) heatmap = torch.cat([zeros, heatmap], dim=2) heatmap = heatmap.unsqueeze(3) inputs.append(heatmap) num_kp += self.add_bg_feature_map if self.use_difference or self.use_deformed_source_image: kp_video_diff = kp_source['mean'] - kp_driving['mean'] if self.add_bg_feature_map: zeros = torch.zeros(bs, d, 1, 2).type(kp_video_diff.type()) kp_video_diff = torch.cat([zeros, kp_video_diff], dim=2) kp_video_diff = kp_video_diff.view( (bs, d, num_kp, 2, 1, 1)).repeat(1, 1, 1, 1, h, w) if self.use_difference: inputs.append(kp_video_diff) if self.use_deformed_source_image: appearance_repeat = source_image.unsqueeze(1).unsqueeze(1).repeat( 1, d, num_kp, 1, 1, 1, 1) appearance_repeat = appearance_repeat.view(bs * d * num_kp, -1, h, w) deformation_approx = kp_video_diff.view( (bs * d * num_kp, -1, h, w)).permute(0, 2, 3, 1) coordinate_grid = make_coordinate_grid( (h, w), type=deformation_approx.type()) coordinate_grid = coordinate_grid.view(1, h, w, 2) deformation_approx = coordinate_grid + deformation_approx appearance_approx_deform = F.grid_sample(appearance_repeat, deformation_approx) appearance_approx_deform = appearance_approx_deform.view( (bs, d, num_kp, -1, h, w)) inputs.append(appearance_approx_deform) movement_encoding = torch.cat(inputs, dim=3) movement_encoding = movement_encoding.view(bs, d, -1, h, w) return movement_encoding.permute(0, 2, 1, 3, 4)