def gen_loaders(self, n_samples=500, test_size=0.1, batch_size=50, pool=8, latent=True): train_data = pd.read_csv(TRAIN_DATA, delimiter='\t', header=None) cv_data = pd.read_csv(CV_DATA, delimiter='\t', header=None) df = pd.concat([train_data, cv_data], axis=0, ignore_index=True) df = df.sample(n=n_samples) df.drop(0, axis=1, inplace=True) df_train, df_cv = train_test_split(df, test_size=test_size, random_state=42) train_data = df_train.values.astype(np.float32) cv_data = df_cv.values.astype(np.float32) if pool: train_data = pooling(train_data, (1, pool)) cv_data = pooling(cv_data, (1, pool)) train_set = VAEDataset(train_data) cv_set = VAEDataset(cv_data) if latent: train_loader = DataLoader(train_set, batch_size=batch_size, collate_fn=self.collate_ae, shuffle=False) cv_loader = DataLoader(cv_set, batch_size=batch_size, collate_fn=self.collate_ae, shuffle=False) else: train_loader = DataLoader(train_set, batch_size=batch_size) cv_loader = DataLoader(cv_set, batch_size=batch_size) torch.save(train_loader, 'vae_train_loader.pt') torch.save(cv_loader, 'vae_cv_loader.pt')
def gen_loaders(self, n_samples=2000, test_size=0.2, batch_size=200, pool=8, latent=True): train_data = pd.read_csv(TRAIN_DATA, delimiter='\t', header=None) cv_data = pd.read_csv(CV_DATA, delimiter='\t', header=None) df = pd.concat([train_data, cv_data], axis=0, ignore_index=True) df.rename({0: 'y'}, inplace=True, axis=1) d = dict(df['y'].value_counts()) class_cnt = n_samples // 3 df1 = df.loc[df['y'] == 1].sample(n=min(class_cnt, d[1])) df2 = df.loc[df['y'] == 2].sample(n=min(class_cnt, d[2])) df3 = df.loc[df['y'] == 3].sample(n=min(class_cnt, d[3])) df = pd.concat([df1, df2, df3], axis=0, ignore_index=True) df['y'] = df['y'] - 1 df_train, df_cv = train_test_split(df, test_size=test_size, stratify=df['y'], random_state=42) print(df_train['y'].value_counts(), df_cv['y'].value_counts()) train_y = df_train['y'].values cv_y = df_cv['y'].values df_train.drop('y', axis=1, inplace=True) df_cv.drop('y', axis=1, inplace=True) train_data = df_train.values.astype(np.float32) cv_data = df_cv.values.astype(np.float32) if pool: train_data = pooling(train_data, (1, pool)) cv_data = pooling(cv_data, (1, pool)) train_set = TSDataset(train_data, train_y) cv_set = TSDataset(cv_data, cv_y) if latent: train_loader = DataLoader(train_set, batch_size=batch_size, drop_last=True, collate_fn=self.collate_ae, shuffle=False) cv_loader = DataLoader(cv_set, batch_size=batch_size, drop_last=True, collate_fn=self.collate_ae, shuffle=False) torch.save(train_loader, 'ucr_train.pt') torch.save(cv_loader, 'ucr_cv.pt') else: train_loader = DataLoader(train_set, batch_size=batch_size, drop_last=True) cv_loader = DataLoader(cv_set, batch_size=batch_size, drop_last=True) torch.save(train_loader, 'train_loader.pt') torch.save(cv_loader, 'cv_loader.pt')
def connvolution_process(img): '''----------------------Reading the image-------------------------------''' # # print(img.shape) # 3D image # Converting the image into gray. img = color.rgb2gray(img) # # print(img.shape) # 2D image # io.imshow(img) # plt.show() '''----------------------Preparing Filter-------------------------------''' l1_filter = numpy.zeros((2, 3, 3)) # Vertical ditector Filter l1_filter[0, :, :] = numpy.array([[[-1, 0, 1], [-1, 0, 1], [-1, 0, 1]]]) # Horizontal ditector Filter l1_filter[1, :, :] = numpy.array([[[1, 1, 1], [0, 0, 0], [-1, -1, -1]]]) # # print(l1_filter) '''---------------------- Convolutional Layer 1 ---------------------------''' l1_feature_map = conv(img, l1_filter) # print("l1_feature_map", l1_feature_map.shape) l1_feature_map_relu = relu(l1_feature_map) # print("l1_feature_map_relu", l1_feature_map_relu.shape) l1_feature_map_relu_pool = pooling(l1_feature_map_relu, 2, 2) # print("l1_feature_map_relu_pool", l1_feature_map_relu_pool.shape) # print("**End of conv layer 1**\n\n") '''---------------------- Convolutional Layer 2 ---------------------------''' l2_filter = numpy.random.rand(3, 5, 5, l1_feature_map_relu_pool.shape[-1]) l2_feature_map = conv(l1_feature_map_relu_pool, l2_filter) # print("l2_feature_map", l2_feature_map.shape) l2_feature_map_relu = relu(l2_feature_map) # print("l2_feature_map_relu", l2_feature_map_relu.shape) l2_feature_map_relu_pool = pooling(l2_feature_map_relu, 2, 2) # print("l2_feature_map_relu_pool", l2_feature_map_relu_pool.shape) # print("**End of conv layer 2**\n\n") '''---------------------- Convolutional Layer 3 ---------------------------''' l3_filter = numpy.random.rand(1, 7, 7, l2_feature_map_relu_pool.shape[-1]) l3_feature_map = conv(l2_feature_map_relu_pool, l3_filter) # print("l3_feature_map", l3_feature_map.shape) l3_feature_map_relu = relu(l3_feature_map) # print("l3_feature_map_relu", l3_feature_map_relu.shape) l3_feature_map_relu_pool = pooling(l3_feature_map_relu, 2, 2) # print("l3_feature_map_relu_pool", l3_feature_map_relu_pool.shape) # print("**End of conv layer 3**\n\n") '''---------------------- Graphing results of convolution ---------------------------''' draw_layer(img, l1_feature_map, l1_feature_map_relu, l1_feature_map_relu_pool, l2_feature_map, l2_feature_map_relu, l2_feature_map_relu_pool, l3_feature_map, l3_feature_map_relu, l3_feature_map_relu_pool) '''---------------------- Fully Connected layer ---------------------------''' # print("**Fully connected layer(Convolutional layer to Fully connected layer)**") fc = l3_feature_map_relu_pool.reshape(-1) ## print(fc.shape) return fc
def build_cnn_model(self): self.imgs = tf.placeholder('float32', [self.batch_size, self.input_dims]) self.img_reshape = tf.reshape(self.imgs, [self.batch_size, self.w, self.h, self.channel]) if self.synthetic: self.layer_out['l1'], self.var['l1_w'], self.var['l1_b'], self.synthetic_grad['l1'] = conv2d(self.img_reshape, 128, [5,5], [1,1], self.weight_initializer, self.bias_initializer, synthetic=True, batch_norm=True, activation_fn=tf.nn.relu, name='l1_con2d') self.layer_out['l1_pool'] = pooling(self.layer_out['l1'], kernel_size=[3,3], stride=[1,1], type='max') self.layer_out['l2'], self.var['l2_w'], self.var['l2_b'], self.synthetic_grad['l2'] = conv2d(self.layer_out['l1_pool'], 128, [5,5], [1,1], self.weight_initializer, self.bias_initializer, synthetic=True, batch_norm=True, activation_fn=tf.nn.relu, name='l2_con2d') self.layer_out['l2_pool'] = pooling(self.layer_out['l2'], kernel_size=[3,3], stride=[1,1], type='average') self.layer_out['l3'], self.var['l3_w'], self.var['l3_b'], self.synthetic_grad['l3'] = conv2d(self.layer_out['l2_pool'], 128, [5,5], [1,1], self.weight_initializer, self.bias_initializer, synthetic=True, batch_norm=True, activation_fn=tf.nn.relu, name='l3_con2d') self.layer_out['l3_pool'] = pooling(self.layer_out['l3'], kernel_size=[3,3], stride=[1,1], type='average') self.layer_out['l3_reshape'] = tf.reshape(self.layer_out['l3_pool'], [self.batch_size, -1]) self.layer_out['l4'], self.var['l4_w'], self.var['l4_b'], self.synthetic_grad['l4'] = linear(self.layer_out['l3_reshape'], self.output_size, self.weight_initializer, self.bias_initializer, synthetic=True, activation_fn=tf.nn.relu, name='l4_linear') else: self.layer_out['l1'], self.var['l1_w'], self.var['l1_b'] = conv2d(self.img_reshape, 128, [5,5], [1,1], self.weight_initializer, self.bias_initializer, batch_norm=True, activation_fn=tf.nn.relu, name='l1_con2d') self.layer_out['l1_pool'] = pooling(self.layer_out['l1'], kernel_size=[3,3], stride=[1,1], type='max') self.layer_out['l2'], self.var['l2_w'], self.var['l2_b'] = conv2d(self.layer_out['l1_pool'], 128, [5,5], [1,1], self.weight_initializer, self.bias_initializer, batch_norm=True, activation_fn=tf.nn.relu, name='l2_con2d') self.layer_out['l2_pool'] = pooling(self.layer_out['l2'], kernel_size=[3,3], stride=[1,1], type='average') self.layer_out['l3'], self.var['l3_w'], self.var['l3_b'] = conv2d(self.layer_out['l2_pool'], 128, [5,5], [1,1], self.weight_initializer, self.bias_initializer, batch_norm=True, activation_fn=tf.nn.relu, name='l3_con2d') self.layer_out['l3_pool'] = pooling(self.layer_out['l3'], kernel_size=[3,3], stride=[1,1], type='average') self.layer_out['l3_reshape'] = tf.reshape(self.layer_out['l3_pool'], [self.batch_size, -1]) self.layer_out['l4'], self.var['l4_w'], self.var['l4_b'] = linear(self.layer_out['l3_reshape'], self.output_size, self.weight_initializer, self.bias_initializer, activation_fn=tf.nn.relu, name='l4_linear') self.out_logit = tf.nn.softmax(self.layer_out['l4']) self.out_argmax = tf.argmax(self.out_logit, 1) self.labels = tf.placeholder('int32', [self.batch_size]) self.loss_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(self.layer_out['l4'], self.labels) self.loss = tf.reduce_sum(self.loss_entropy)/self.batch_size if self.synthetic: self.grad_output['l1'] = tf.gradients(self.loss, self.layer_out['l1']) self.grad_output['l2'] = tf.gradients(self.loss, self.layer_out['l2']) self.grad_output['l3'] = tf.gradients(self.loss, self.layer_out['l3']) self.grad_output['l4'] = tf.gradients(self.loss, self.layer_out['l4']) for k in self.grad_output.keys(): self.grad_loss.append(tf.reduce_sum(tf.square(self.synthetic_grad[k]-self.grad_output[k]))) self.grad_total_loss = sum(self.grad_loss)
def validate(self): [e.eval() for e in encoders], [m.eval() for m in mesh_updates] with torch.no_grad(): num_batches = 0 loss_epoch = 0. loss_f = 0 # Validation loop for i, data in enumerate(tqdm(dataloader_val), 0): optimizer.zero_grad() ############################### ####### data creation ######### ############################### tgt_points = data['points'].to(args.device) inp_images = data['imgs'].to(args.device) cam_mat = data['cam_mat'].to(args.device) cam_pos = data['cam_pos'].to(args.device) if (tgt_points.shape[0]!=args.batch_size) and (inp_images.shape[0]!=args.batch_size) \ and (cam_mat.shape[0]!=args.batch_size) and (cam_pos.shape[0]!=args.batch_size) : continue surf_loss = 0 ############################### ########## inference ########## ############################### img_features = [e(inp_images) for e in encoders] for bn in range(args.batch_size): reset_meshes(meshes) ##### layer_1 ##### pool_indices = get_pooling_index(meshes['init'][0].vertices, cam_mat[bn], cam_pos[bn], encoding_dims) projected_image_features = pooling(img_features[0], pool_indices, bn) full_vert_features = torch.cat((meshes['init'][0].vertices, projected_image_features), dim = 1) delta, future_features = mesh_updates[0](full_vert_features, meshes['adjs'][0]) meshes['update'][0].vertices = (meshes['init'][0].vertices + delta.clone()) future_features = split_meshes(meshes,future_features, 0) ##### layer_2 ##### pool_indices = get_pooling_index(meshes['init'][1].vertices, cam_mat[bn], cam_pos[bn], encoding_dims) projected_image_features = pooling(img_features[1], pool_indices, bn) full_vert_features = torch.cat((meshes['init'][1].vertices, projected_image_features, future_features), dim = 1) delta, future_features = mesh_updates[1](full_vert_features, meshes['adjs'][1]) meshes['update'][1].vertices = (meshes['init'][1].vertices + delta.clone()) future_features = split_meshes(meshes,future_features, 1) ##### layer_3 ##### pool_indices = get_pooling_index(meshes['init'][2].vertices, cam_mat[bn], cam_pos[bn], encoding_dims) projected_image_features = pooling(img_features[2], pool_indices, bn) full_vert_features = torch.cat((meshes['init'][2].vertices, projected_image_features, future_features), dim = 1) delta, future_features = mesh_updates[2](full_vert_features, meshes['adjs'][2]) meshes['update'][2].vertices = (meshes['init'][2].vertices + delta.clone()) pred_points, _ = meshes['update'][2].sample(10000) ############################### ########## losses ############# ############################### surf_loss = 3000 * nvl.metrics.point.chamfer_distance(pred_points, tgt_points[bn]) loss_f += (nvl.metrics.point.f_score(.57*meshes['update'][2].sample(2466)[0],.57*tgt_points[bn], extend=False).item() / float(args.batch_size)) loss_epoch += (surf_loss.item() / float(args.batch_size)) # logging num_batches += 1 if i % args.print_every == 0: out_loss = loss_epoch / float(num_batches) out_f_loss = loss_f / float(num_batches) tqdm.write(f'[VAL] Epoch {self.cur_epoch:03d}, Batch {i:03d}: loss: {out_loss:3.3f}, loss: {out_f_loss:3.3f}') out_f_loss = loss_f / float(num_batches) out_loss = loss_epoch / float(num_batches) tqdm.write(f'[VAL Total] Epoch {self.cur_epoch:03d}, Batch {i:03d}: loss: {out_loss:3.3f}, loss: {out_f_loss:3.3f}') self.val_loss.append(out_f_loss)
def train(self): loss_epoch = 0. num_batches = 0 [e.train() for e in encoders], [m.train() for m in mesh_updates] # Train loop for i, data in enumerate(tqdm(dataloader_train), 0): optimizer.zero_grad() ############################### ####### data creation ######### ############################### tgt_points = data['points'].to(args.device) inp_images = data['imgs'].to(args.device) cam_mat = data['cam_mat'].to(args.device) cam_pos = data['cam_pos'].to(args.device) if (tgt_points.shape[0]!=args.batch_size) and (inp_images.shape[0]!=args.batch_size) \ and (cam_mat.shape[0]!=args.batch_size) and (cam_pos.shape[0]!=args.batch_size) : continue surf_loss, edge_loss, lap_loss, loss, f_loss = 0,0,0,0,0 ############################### ########## inference ########## ############################### img_features = [e(inp_images) for e in encoders] for bn in range(args.batch_size): reset_meshes(meshes) ##### layer_1 ##### pool_indices = get_pooling_index(meshes['init'][0].vertices, cam_mat[bn], cam_pos[bn], encoding_dims) projected_image_features = pooling(img_features[0], pool_indices, bn) full_vert_features = torch.cat((meshes['init'][0].vertices, projected_image_features), dim = 1) delta, future_features = mesh_updates[0](full_vert_features, meshes['adjs'][0]) meshes['update'][0].vertices = (meshes['init'][0].vertices + delta.clone()) future_features = split_meshes(meshes,future_features, 0) ##### layer_2 ##### pool_indices = get_pooling_index(meshes['init'][1].vertices, cam_mat[bn], cam_pos[bn], encoding_dims) projected_image_features = pooling(img_features[1], pool_indices, bn) full_vert_features = torch.cat((meshes['init'][1].vertices, projected_image_features, future_features), dim = 1) delta, future_features = mesh_updates[1](full_vert_features, meshes['adjs'][1]) meshes['update'][1].vertices = (meshes['init'][1].vertices + delta.clone()) future_features = split_meshes(meshes,future_features, 1) ##### layer_3 ##### pool_indices = get_pooling_index(meshes['init'][2].vertices, cam_mat[bn], cam_pos[bn], encoding_dims) projected_image_features = pooling(img_features[2], pool_indices, bn) full_vert_features = torch.cat((meshes['init'][2].vertices, projected_image_features, future_features), dim = 1) delta, future_features = mesh_updates[2](full_vert_features, meshes['adjs'][2]) meshes['update'][2].vertices = (meshes['init'][2].vertices + delta.clone()) if args.latent_loss: inds = data['adj_indices'][bn] vals = data['adj_values'][bn] gt_verts = data['verts'][bn].to(args.device) vert_len = gt_verts.shape[0] gt_adj = torch.sparse.FloatTensor(inds, vals, torch.Size([vert_len,vert_len])).to(args.device) predicted_latent = mesh_encoder(meshes['update'][2].vertices, meshes['adjs'][2]) gt_latent = mesh_encoder(gt_verts, gt_adj) latent_loss = torch.mean(torch.abs(predicted_latent - gt_latent)) * .2 ############################### ########## losses ############# ############################### surf_loss += (6000 * loss_surf(meshes, tgt_points[bn]) / float(args.batch_size)) edge_loss += (300 *.6 * loss_edge(meshes) / float(args.batch_size)) lap_loss += (1500 * loss_lap(meshes) / float(args.batch_size)) f_loss += nvl.metrics.point.f_score(.57*meshes['update'][2].sample(2466)[0],.57*tgt_points[bn], extend=False) / float(args.batch_size) loss = surf_loss + edge_loss + lap_loss if args.latent_loss: loss += latent_loss loss.backward() loss_epoch += float(surf_loss.item()) # logging num_batches += 1 if i % args.print_every == 0: message = f'[TRAIN] Epoch {self.cur_epoch:03d}, Batch {i:03d}:, Loss: {(surf_loss.item()):4.3f}, ' message = message + f'Lap: {(lap_loss.item()):3.3f}, Edge: {(edge_loss.item()):3.3f}' message = message + f' F: {(f_loss.item()):3.3f}' if args.latent_loss: message = message + f', Lat: {(latent_loss.item()):3.3f}' tqdm.write(message) optimizer.step() loss_epoch = loss_epoch / num_batches self.train_loss.append(loss_epoch) self.cur_epoch += 1
def validate(self): [e.eval() for e in encoders], [m.eval() for m in mesh_updates] with torch.no_grad(): num_batches = 0 loss_epoch = 0. f_score = 0 # Validation loop for i, sample in enumerate(tqdm(dataloader_val), 0): data = sample['data'] optimizer.zero_grad() # Data Creation tgt_points = data['points'].to(args.device) inp_images = data['images'].to(args.device) cam_mat = data['params']['cam_mat'].to(args.device) cam_pos = data['params']['cam_pos'].to(args.device) if (tgt_points.shape[0] != args.batch_size) and (inp_images.shape[0] != args.batch_size) \ and (cam_mat.shape[0] != args.batch_size) and (cam_pos.shape[0] != args.batch_size): continue surf_loss = 0 # Inference img_features = [e(inp_images) for e in encoders] for bn in range(args.batch_size): reset_meshes(meshes) # Layer_1 pool_indices = get_pooling_index( meshes['init'][0].vertices, cam_mat[bn], cam_pos[bn], encoding_dims) projected_image_features = pooling(img_features[0], pool_indices, bn) full_vert_features = torch.cat( (meshes['init'][0].vertices, projected_image_features), dim=1) delta, future_features = mesh_updates[0]( full_vert_features, meshes['adjs'][0]) meshes['update'][0].vertices = ( meshes['init'][0].vertices + delta.clone()) future_features = split_meshes(meshes, future_features, 0, angle=ANGLE_THRESHOLD) # Layer_2 pool_indices = get_pooling_index( meshes['init'][1].vertices, cam_mat[bn], cam_pos[bn], encoding_dims) projected_image_features = pooling(img_features[1], pool_indices, bn) full_vert_features = torch.cat( (meshes['init'][1].vertices, projected_image_features, future_features), dim=1) delta, future_features = mesh_updates[1]( full_vert_features, meshes['adjs'][1]) meshes['update'][1].vertices = ( meshes['init'][1].vertices + delta.clone()) future_features = split_meshes(meshes, future_features, 1, angle=ANGLE_THRESHOLD) # Layer_3 pool_indices = get_pooling_index( meshes['init'][2].vertices, cam_mat[bn], cam_pos[bn], encoding_dims) projected_image_features = pooling(img_features[2], pool_indices, bn) full_vert_features = torch.cat( (meshes['init'][2].vertices, projected_image_features, future_features), dim=1) delta, future_features = mesh_updates[2]( full_vert_features, meshes['adjs'][2]) meshes['update'][2].vertices = ( meshes['init'][2].vertices + delta.clone()) pred_points, _ = meshes['update'][2].sample(10000) # Losses surf_loss = weights[ 'surface'] * kal.metrics.point.chamfer_distance( pred_points, tgt_points[bn]) # F-Score f_score += (kal.metrics.point.f_score( .57 * meshes['update'][2].sample(2466)[0], .57 * tgt_points[bn], extend=False).item() / args.batch_size) loss_epoch += surf_loss.item() / args.batch_size # logging num_batches += 1 if i % args.print_every == 0: out_loss = loss_epoch / num_batches out_f_score = f_score / num_batches tqdm.write( f'[VAL]\tEpoch {self.cur_epoch:03d}, Batch {i:03d}: F-Score: {out_f_score:3.3f}' ) out_loss = loss_epoch / num_batches out_f_score = f_score / num_batches tqdm.write( f'[VAL Total] Epoch {self.cur_epoch:03d}, Batch {i:03d}: F-Score: {out_f_score:3.3f}' ) self.val_score[self.cur_epoch] = out_f_score
def train(self): loss_epoch = 0. num_batches = 0 [e.train() for e in encoders], [m.train() for m in mesh_updates] # Train loop for i, sample in enumerate(tqdm(dataloader_train), 0): data = sample['data'] optimizer.zero_grad() # Data Creation tgt_points = data['points'].to(args.device) inp_images = data['images'].to(args.device) cam_mat = data['params']['cam_mat'].to(args.device) cam_pos = data['params']['cam_pos'].to(args.device) if (tgt_points.shape[0] != args.batch_size) and (inp_images.shape[0] != args.batch_size) \ and (cam_mat.shape[0] != args.batch_size) and (cam_pos.shape[0] != args.batch_size): continue surf_loss, edge_loss, lap_loss, latent_loss, loss, f_score = 0, 0, 0, 0, 0, 0 # Inference img_features = [e(inp_images) for e in encoders] for bn in range(args.batch_size): reset_meshes(meshes) # Layer_1 pool_indices = get_pooling_index(meshes['init'][0].vertices, cam_mat[bn], cam_pos[bn], encoding_dims) projected_image_features = pooling(img_features[0], pool_indices, bn) full_vert_features = torch.cat( (meshes['init'][0].vertices, projected_image_features), dim=1) delta, future_features = mesh_updates[0](full_vert_features, meshes['adjs'][0]) meshes['update'][0].vertices = (meshes['init'][0].vertices + delta.clone()) future_features = split_meshes(meshes, future_features, 0, angle=ANGLE_THRESHOLD) # Layer_2 pool_indices = get_pooling_index(meshes['init'][1].vertices, cam_mat[bn], cam_pos[bn], encoding_dims) projected_image_features = pooling(img_features[1], pool_indices, bn) full_vert_features = torch.cat( (meshes['init'][1].vertices, projected_image_features, future_features), dim=1) delta, future_features = mesh_updates[1](full_vert_features, meshes['adjs'][1]) meshes['update'][1].vertices = (meshes['init'][1].vertices + delta.clone()) future_features = split_meshes(meshes, future_features, 1, angle=ANGLE_THRESHOLD) # Layer_3 pool_indices = get_pooling_index(meshes['init'][2].vertices, cam_mat[bn], cam_pos[bn], encoding_dims) projected_image_features = pooling(img_features[2], pool_indices, bn) full_vert_features = torch.cat( (meshes['init'][2].vertices, projected_image_features, future_features), dim=1) delta, future_features = mesh_updates[2](full_vert_features, meshes['adjs'][2]) meshes['update'][2].vertices = (meshes['init'][2].vertices + delta.clone()) if args.latent_loss: inds = data['adj']['indices'][bn] vals = data['adj']['values'][bn] gt_verts = data['vertices'][bn].to(args.device) vert_len = gt_verts.shape[0] gt_adj = torch.sparse.FloatTensor( inds, vals, torch.Size([vert_len, vert_len])).to(args.device) predicted_latent = mesh_encoder( meshes['update'][2].vertices, meshes['adjs'][2]) gt_latent = mesh_encoder(gt_verts, gt_adj) latent_loss += weights['latent'] * torch.mean( torch.abs(predicted_latent - gt_latent)) / args.batch_size # Losses surf_loss += weights['surface'] * loss_surf( meshes, tgt_points[bn]) / args.batch_size edge_loss += weights['edge'] * loss_edge( meshes) / args.batch_size lap_loss += weights['laplace'] * loss_lap( meshes) / args.batch_size # F-Score f_score += kal.metrics.point.f_score( .57 * tgt_points[bn], .57 * meshes['update'][2].sample(2466)[0], extend=False) / args.batch_size loss = surf_loss + edge_loss + lap_loss if args.latent_loss: loss += latent_loss loss.backward() loss_epoch += float(loss.item()) # logging num_batches += 1 if i % args.print_every == 0: message = f'[TRAIN]\tEpoch {self.cur_epoch:03d}, Batch {i:03d} | Total Loss: {loss.item():4.3f} ' message += f'Surf: {(surf_loss.item()):3.3f}, Lap: {(lap_loss.item()):3.3f}, ' message += f'Edge: {(edge_loss.item()):3.3f}' if args.latent_loss: message = message + f', Latent: {(latent_loss.item()):3.3f}' message = message + f', F-score: {(f_score.item()):3.3f}' tqdm.write(message) optimizer.step() loss_epoch = loss_epoch / num_batches self.train_loss[self.cur_epoch] = loss_epoch
cam_mat = data['cam_mat'].to(args.device) cam_pos = data['cam_pos'].to(args.device) tgt_verts = data['verts'].to(args.device) tgt_faces = data['faces'].to(args.device) ############################### ########## inference ########## ############################### img_features = [e(inp_images) for e in encoders] reset_meshes(meshes) ##### layer_1 ##### pool_indices = get_pooling_index(meshes['init'][0].vertices, cam_mat, cam_pos, encoding_dims) projected_image_features = pooling(img_features[0], pool_indices, 0) full_vert_features = torch.cat((meshes['init'][0].vertices, projected_image_features), dim = 1) delta, future_features = mesh_updates[0](full_vert_features, meshes['adjs'][0]) meshes['update'][0].vertices = (meshes['init'][0].vertices + delta.clone()) future_features = split_meshes(meshes,future_features, 0) ##### layer_2 ##### pool_indices = get_pooling_index(meshes['init'][1].vertices, cam_mat, cam_pos, encoding_dims) projected_image_features = pooling(img_features[1], pool_indices, 0) full_vert_features = torch.cat((meshes['init'][1].vertices, projected_image_features, future_features), dim = 1) delta, future_features = mesh_updates[1](full_vert_features, meshes['adjs'][1]) meshes['update'][1].vertices = (meshes['init'][1].vertices + delta.clone())
def build_cnn_model(self): self.imgs = tf.placeholder('float32', [self.batch_size, self.input_dims]) self.img_reshape = tf.reshape( self.imgs, [self.batch_size, self.w, self.h, self.channel]) if self.synthetic: self.layer_out['l1'], self.var['l1_w'], self.var[ 'l1_b'], self.synthetic_grad['l1'] = conv2d( self.img_reshape, 128, [5, 5], [1, 1], self.weight_initializer, self.bias_initializer, synthetic=True, batch_norm=True, activation_fn=tf.nn.relu, name='l1_con2d') self.layer_out['l1_pool'] = pooling(self.layer_out['l1'], kernel_size=[3, 3], stride=[1, 1], type='max') self.layer_out['l2'], self.var['l2_w'], self.var[ 'l2_b'], self.synthetic_grad['l2'] = conv2d( self.layer_out['l1_pool'], 128, [5, 5], [1, 1], self.weight_initializer, self.bias_initializer, synthetic=True, batch_norm=True, activation_fn=tf.nn.relu, name='l2_con2d') self.layer_out['l2_pool'] = pooling(self.layer_out['l2'], kernel_size=[3, 3], stride=[1, 1], type='average') self.layer_out['l3'], self.var['l3_w'], self.var[ 'l3_b'], self.synthetic_grad['l3'] = conv2d( self.layer_out['l2_pool'], 128, [5, 5], [1, 1], self.weight_initializer, self.bias_initializer, synthetic=True, batch_norm=True, activation_fn=tf.nn.relu, name='l3_con2d') self.layer_out['l3_pool'] = pooling(self.layer_out['l3'], kernel_size=[3, 3], stride=[1, 1], type='average') self.layer_out['l3_reshape'] = tf.reshape( self.layer_out['l3_pool'], [self.batch_size, -1]) self.layer_out['l4'], self.var['l4_w'], self.var[ 'l4_b'], self.synthetic_grad['l4'] = linear( self.layer_out['l3_reshape'], self.output_size, self.weight_initializer, self.bias_initializer, synthetic=True, activation_fn=tf.nn.relu, name='l4_linear') else: self.layer_out['l1'], self.var['l1_w'], self.var['l1_b'] = conv2d( self.img_reshape, 128, [5, 5], [1, 1], self.weight_initializer, self.bias_initializer, batch_norm=True, activation_fn=tf.nn.relu, name='l1_con2d') self.layer_out['l1_pool'] = pooling(self.layer_out['l1'], kernel_size=[3, 3], stride=[1, 1], type='max') self.layer_out['l2'], self.var['l2_w'], self.var['l2_b'] = conv2d( self.layer_out['l1_pool'], 128, [5, 5], [1, 1], self.weight_initializer, self.bias_initializer, batch_norm=True, activation_fn=tf.nn.relu, name='l2_con2d') self.layer_out['l2_pool'] = pooling(self.layer_out['l2'], kernel_size=[3, 3], stride=[1, 1], type='average') self.layer_out['l3'], self.var['l3_w'], self.var['l3_b'] = conv2d( self.layer_out['l2_pool'], 128, [5, 5], [1, 1], self.weight_initializer, self.bias_initializer, batch_norm=True, activation_fn=tf.nn.relu, name='l3_con2d') self.layer_out['l3_pool'] = pooling(self.layer_out['l3'], kernel_size=[3, 3], stride=[1, 1], type='average') self.layer_out['l3_reshape'] = tf.reshape( self.layer_out['l3_pool'], [self.batch_size, -1]) self.layer_out['l4'], self.var['l4_w'], self.var['l4_b'] = linear( self.layer_out['l3_reshape'], self.output_size, self.weight_initializer, self.bias_initializer, activation_fn=tf.nn.relu, name='l4_linear') self.out_logit = tf.nn.softmax(self.layer_out['l4']) self.out_argmax = tf.argmax(self.out_logit, 1) self.labels = tf.placeholder('int32', [self.batch_size]) self.loss_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( self.layer_out['l4'], self.labels) self.loss = tf.reduce_sum(self.loss_entropy) / self.batch_size if self.synthetic: self.grad_output['l1'] = tf.gradients(self.loss, self.layer_out['l1']) self.grad_output['l2'] = tf.gradients(self.loss, self.layer_out['l2']) self.grad_output['l3'] = tf.gradients(self.loss, self.layer_out['l3']) self.grad_output['l4'] = tf.gradients(self.loss, self.layer_out['l4']) for k in self.grad_output.keys(): self.grad_loss.append( tf.reduce_sum( tf.square(self.synthetic_grad[k] - self.grad_output[k]))) self.grad_total_loss = sum(self.grad_loss)
def validate(self): encoder.eval(), [m.eval() for m in mesh_updates] with torch.no_grad(): num_batches = 0 loss_epoch = 0. f_loss = 0. # Validation loop for i, data in enumerate(tqdm(dataloader_val), 0): optimizer.zero_grad() # data creation tgt_points = data['points'].to(args.device)[0] inp_images = data['imgs'].to(args.device) cam_mat = data['cam_mat'].to(args.device)[0] cam_pos = data['cam_pos'].to(args.device)[0] ############################### ########## inference ########## ############################### img_features = encoder(inp_images) ##### layer_1 ##### pool_indices = get_pooling_index(meshes['init'][0].vertices, cam_mat, cam_pos, encoding_dims) projected_image_features = pooling(img_features, pool_indices) full_vert_features = torch.cat( (meshes['init'][0].vertices, projected_image_features), dim=1) pred_verts, future_features = mesh_updates[0]( full_vert_features, meshes['adjs'][0]) meshes['update'][0].vertices = pred_verts.clone() ##### layer_2 ##### future_features = split(meshes, future_features, 0) pool_indices = get_pooling_index(meshes['init'][1].vertices, cam_mat, cam_pos, encoding_dims) projected_image_features = pooling(img_features, pool_indices) full_vert_features = torch.cat( (meshes['init'][1].vertices, projected_image_features, future_features), dim=1) pred_verts, future_features = mesh_updates[1]( full_vert_features, meshes['adjs'][1]) meshes['update'][1].vertices = pred_verts.clone() ##### layer_3 ##### future_features = split(meshes, future_features, 1) pool_indices = get_pooling_index(meshes['init'][2].vertices, cam_mat, cam_pos, encoding_dims) projected_image_features = pooling(img_features, pool_indices) full_vert_features = torch.cat( (meshes['init'][2].vertices, projected_image_features, future_features), dim=1) pred_verts, future_features = mesh_updates[2]( full_vert_features, meshes['adjs'][2]) meshes['update'][2].vertices = pred_verts.clone() f_loss += kal.metrics.point.f_score( meshes['update'][2].sample(2466)[0], tgt_points, extend=False) ############################### ########## losses ############# ############################### surf_loss = 3000 * kal.metrics.point.chamfer_distance( pred_verts.clone(), tgt_points) loss_epoch += surf_loss.item() # logging num_batches += 1 if i % args.print_every == 0: out_loss = loss_epoch / float(num_batches) f_out_loss = f_loss / float(num_batches) tqdm.write( f'[VAL] Epoch {self.cur_epoch:03d}, Batch {i:03d}: loss: {out_loss:3.3f}, F: {(f_out_loss.item()):3.3f}' ) out_loss = loss_epoch / float(num_batches) f_out_loss = f_loss / float(num_batches) tqdm.write( f'[VAL Total] Epoch {self.cur_epoch:03d}, Batch {i:03d}: loss: {out_loss:3.3f}, F: {(f_out_loss.item()):3.3f}' ) self.val_loss.append(out_loss)
def train(self): loss_epoch = 0. num_batches = 0 encoder.train(), [m.train() for m in mesh_updates] # Train loop for i, data in enumerate(tqdm(dataloader_train), 0): optimizer.zero_grad() ############################### ####### data creation ######### ############################### tgt_points = data['points'].to(args.device)[0] tgt_norms = data['normals'].to(args.device)[0] inp_images = data['imgs'].to(args.device) cam_mat = data['cam_mat'].to(args.device)[0] cam_pos = data['cam_pos'].to(args.device)[0] ############################### ########## inference ########## ############################### img_features = encoder(inp_images) ##### layer_1 ##### pool_indices = get_pooling_index(meshes['init'][0].vertices, cam_mat, cam_pos, encoding_dims) projected_image_features = pooling(img_features, pool_indices) full_vert_features = torch.cat( (meshes['init'][0].vertices, projected_image_features), dim=1) pred_verts, future_features = mesh_updates[0](full_vert_features, meshes['adjs'][0]) meshes['update'][0].vertices = pred_verts.clone() ##### layer_2 ##### future_features = split(meshes, future_features, 0) pool_indices = get_pooling_index(meshes['init'][1].vertices, cam_mat, cam_pos, encoding_dims) projected_image_features = pooling(img_features, pool_indices) full_vert_features = torch.cat( (meshes['init'][1].vertices, projected_image_features, future_features), dim=1) pred_verts, future_features = mesh_updates[1](full_vert_features, meshes['adjs'][1]) meshes['update'][1].vertices = pred_verts.clone() ##### layer_3 ##### future_features = split(meshes, future_features, 1) pool_indices = get_pooling_index(meshes['init'][2].vertices, cam_mat, cam_pos, encoding_dims) projected_image_features = pooling(img_features, pool_indices) full_vert_features = torch.cat( (meshes['init'][2].vertices, projected_image_features, future_features), dim=1) pred_verts, future_features = mesh_updates[2](full_vert_features, meshes['adjs'][2]) meshes['update'][2].vertices = pred_verts.clone() ############################### ########## losses ############# ############################### surf_loss = 3000 * loss_surf(meshes, tgt_points) edge_loss = 300 * loss_edge(meshes) lap_loss = 1500 * loss_lap(meshes) norm_loss = .5 * loss_norm(meshes, tgt_points, tgt_norms) loss = surf_loss + edge_loss + lap_loss + norm_loss loss.backward() loss_epoch += float(surf_loss.item()) # logging num_batches += 1 if i % args.print_every == 0: f_loss = kal.metrics.point.f_score( meshes['update'][2].sample(2466)[0], tgt_points, extend=False) message = f'[TRAIN] Epoch {self.cur_epoch:03d}, Batch {i:03d}:, Loss: {(surf_loss.item()):4.3f}, ' message = message + f'Lap: {(lap_loss.item()):3.3f}, Edge: {(edge_loss.item()):3.3f}, Norm: {(norm_loss.item()):3.3f}' message = message + f' F: {(f_loss.item()):3.3f}' tqdm.write(message) optimizer.step() loss_epoch = loss_epoch / num_batches self.train_loss.append(loss_epoch) self.cur_epoch += 1
for data in tqdm(valid_set): # data creation tgt_points = data['points'].to(args.device) inp_images = data['imgs'].to(args.device).unsqueeze(0) cam_mat = data['cam_mat'].to(args.device) cam_pos = data['cam_pos'].to(args.device) ############################### ########## inference ########## ############################### img_features = encoder(inp_images) ##### layer_1 ##### pool_indices = get_pooling_index(meshes['init'][0].vertices, cam_mat, cam_pos, encoding_dims) projected_image_features = pooling(img_features, pool_indices) full_vert_features = torch.cat( (meshes['init'][0].vertices, projected_image_features), dim=1) pred_verts, future_features = mesh_updates[0](full_vert_features, meshes['adjs'][0]) meshes['update'][0].vertices = pred_verts.clone() ##### layer_2 ##### future_features = split(meshes, future_features, 0) pool_indices = get_pooling_index(meshes['init'][1].vertices, cam_mat, cam_pos, encoding_dims) projected_image_features = pooling(img_features, pool_indices) full_vert_features = torch.cat( (meshes['init'][1].vertices, projected_image_features, future_features),
def _build_fcn(self, input_op, reuse=False, is_training=True): row, col = self.input_shape[0], self.input_shape[1] row_p1, col_p1 = int(row / 2), int(col / 2) row_p2, col_p2 = int(row_p1 / 2), int(col_p1 / 2) with tf.variable_scope('FCNN', reuse=reuse): conv1_1 = conv2d_relu(input_op, n_out=64, name='conv1_1', is_training=is_training) conv1_2 = conv2d_relu(conv1_1, n_out=64, name='conv1_2', is_training=is_training) pool_1 = pooling(conv1_2, name='pool_1') conv2_1 = conv2d_relu(pool_1, n_out=128, name='conv2_1', is_training=is_training) conv2_2 = conv2d_relu(conv2_1, n_out=128, name='conv2_2', is_training=is_training) pool_2 = pooling(conv2_2, name='pool_2') conv3_1 = dilated_block(pool_2, n_out=256, is_training=is_training, name='conv3_1') conv3_2 = dilated_block(conv3_1, n_out=256, is_training=is_training, name='conv3_2') conv3_3 = dilated_block(conv3_2, n_out=256, is_training=is_training, name='conv3_3') pool_3 = pooling(conv3_3, name='pool_3') conv4_1 = dilated_block(pool_3, n_out=512, is_training=is_training, name='conv4_1') conv4_2 = dilated_block(conv4_1, n_out=512, is_training=is_training, name='conv4_2') conv4_3 = dilated_block(conv4_2, n_out=512, is_training=is_training, name='conv4_3') deconv_1 = deconv2d( conv4_3, output_shape=[self.batch_size, row_p2, col_p2, 256], name='deconv_1') concat_1 = tf.concat([conv3_3, deconv_1], axis=3, name='concat_1') conv5_1 = dilated_block(concat_1, n_out=256, is_training=is_training, name='conv5_1') conv5_2 = dilated_block(conv5_1, n_out=256, is_training=is_training, name='conv5_2') conv5_3 = dilated_block(conv5_2, n_out=256, is_training=is_training, name='conv5_3') deconv_2 = deconv2d( conv5_3, output_shape=[self.batch_size, row_p1, col_p1, 128], name='deconv_2') concat_2 = tf.concat([conv2_2, deconv_2], axis=3, name='concat_2') conv6_1 = conv2d_relu(concat_2, n_out=151, name='conv6_1', is_training=is_training) conv6_2 = conv2d_relu(conv6_1, n_out=151, name='conv6_2', is_training=is_training) deconv_3 = deconv2d(conv6_2, output_shape=[self.batch_size, row, col, 64], name='deconv_3') concat_3 = tf.concat([conv1_2, deconv_3], axis=3, name='concat_3') conv7_1 = conv2d_relu(concat_3, n_out=151, name='conv7_1', is_training=is_training) conv7_2 = conv2d(conv7_1, n_out=151, name='conv7_2') return tf.nn.softmax(conv7_2, axis=3), conv7_2