def train_one_bezier_transformer(model, dataset, batch_size, num_epochs, optimizer, num_experiment, lr=1e-4, cuda=True, debug=True): print("\n\nTHE TRAINING BEGINS") print("Experiment #{} ---> batch_size={} num_epochs={} learning_rate={}". format(num_experiment, batch_size, num_epochs, lr)) # basedir = "/data1slow/users/asuso/trans_bezier" basedir = "/home/asuso/PycharmProjects/trans_bezier" probabilistic_map_generator = ProbabilisticMap( (model.image_size, model.image_size, 50)) cp_covariances = torch.tensor([[[3, 0], [0, 3]] for i in range(model.num_cp)], dtype=torch.float32) # cp_covariances = torch.empty((model.num_cp, batch_size, 2, 2)) # for i in range(batch_size): # cp_covariances[:, i, :, :] = cp_covariance if cuda: probabilistic_map_generator = probabilistic_map_generator.cuda() cp_covariances = cp_covariances.cuda() # Iniciamos una variable en la que guardaremos la mejor loss obtenida en validation best_loss = float('inf') # Inicializamos el writer de tensorboard, y las variables que usaremos para # la recopilación de datos. cummulative_loss = 0 if debug: # Tensorboard writter writer = SummaryWriter( basedir + "/graphics/DeterministicBezierEncoder/OneBezierModels/FixedCP/" + str(model.num_cp) + "CP_exp" + str(num_experiment)) counter = 0 # Separamos el dataset en imagenes y secuencias images, sequences = dataset # Enviamos los datos y el modelo a la GPU if cuda: images = images.cuda() sequences = sequences.cuda() model = model.cuda() # Particionamos el dataset en training y validation # images.shape=(N, 1, 64, 64) # sequences.shape=(100, N, 2) im_training = images[:40000] im_validation = images[40000:] seq_training = sequences[:, :40000] seq_validation = sequences[:, 40000:] # Definimos el optimizer optimizer = optimizer(model.parameters(), lr=lr) scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=10**(-0.5), patience=8, min_lr=1e-8) for epoch in range(num_epochs): print("Beginning epoch number", epoch + 1) for i in range(0, len(im_training) - batch_size + 1, batch_size): # Obtenemos el batch im = im_training[i:i + batch_size] seq = seq_training[:, i:i + batch_size] # Ejecutamos el modelo sobre el batch probabilities = model(im, seq) #probabilities.shape = (tgt_seq_len, batch_size, num_probabilites) #seq.shape = (tgt_seq_len, batch_size, 1) # Calculamos la loss loss = 0 for k in range(batch_size): loss_1 = F.cross_entropy(probabilities[:, k], seq[:, k]) seq_inver = torch.empty_like(seq[:, k]) seq_inver[:-1] = seq[:-1, k].flip(0) seq_inver[-1] = seq[-1, k] loss_2 = F.cross_entropy(probabilities[:, k], seq_inver) if loss_1 < loss_2: loss += loss_1 else: loss += loss_2 loss = loss / batch_size if debug: cummulative_loss += loss # Realizamos backpropagation y un paso de descenso del gradiente loss.backward() optimizer.step() model.zero_grad() # Recopilación de datos para tensorboard k = int(40000 / (batch_size * 5)) if debug and i % k == k - 1: writer.add_scalar("Training/loss", cummulative_loss / k, counter) counter += 1 cummulative_loss = 0 """ Al completar cada época, probamos el modelo sobre el conjunto de validation. En concreto: - Calcularemos la loss del modelo sobre el conjunto de validación - Realizaremos 500 predicciones sobre imagenes del conjunto de validación. Generaremos una imagen a partir de la parametrización de la curva de bezier obtenida. Calcularemos las metricas IoU, chamfer_distance, y differentiable_chamfer_distance (probabilistic_map) asociadas a estas prediciones (comparandolas con el ground truth). """ model.eval() with torch.no_grad(): cummulative_loss = 0 num_vueltas = 0 for j in range(0, len(im_validation) - batch_size + 1, batch_size): im = im_validation[j:j + batch_size] seq = seq_validation[:, j:j + batch_size] # Ejecutamos el modelo sobre el batch probabilities = model(im, seq) # probabilies.shape = (tgt_seq_len, batch_size, num_probabilites) # seq.shape = (tgt_seq_len, batch_size, 1) loss = 0 for k in range(batch_size): loss_1 = F.cross_entropy(probabilities[:, k], seq[:, k]) seq_inver = torch.empty_like(seq[:, k]) seq_inver[:-1] = seq[:-1, k].flip(0) seq_inver[-1] = seq[-1, k] loss_2 = F.cross_entropy(probabilities[:, k], seq_inver) if loss_1 < loss_2: loss += loss_1 else: loss += loss_2 num_vueltas += 1 loss = loss / batch_size cummulative_loss += loss print("Hemos dado la vuelta un tanto por uno", num_vueltas / 10000, "de veces") # Aplicamos el learning rate scheduler scheduler.step(cummulative_loss) # Recopilamos los datos para tensorboard if debug: writer.add_scalar("Validation/loss", cummulative_loss / (j / batch_size + 1), counter) # Si la loss obtenida es la mas baja hasta ahora, nos guardamos los pesos del modelo if cummulative_loss < best_loss: print("El modelo ha mejorado!! Nueva loss={}".format( cummulative_loss / (j / batch_size + 1))) best_loss = cummulative_loss torch.save( model.state_dict(), basedir + "/state_dicts/DeterministicBezierEncoder/OneBezierModels/FixedCP/" + str(model.num_cp) + "CP_exp" + str(num_experiment)) cummulative_loss = 0 # Iniciamos la evaluación del modo "predicción" iou_value = 0 chamfer_value = 0 probabilistic_similarity = 0 # Inicialmente, predeciremos 10 imagenes que almacenaremos en tensorboard target_images = torch.empty((10, 1, 64, 64)) predicted_images = torch.empty((10, 1, 64, 64)) for idx in range(0, 200, 20): tgt_im = im_validation[idx].unsqueeze(0) pred_im = torch.zeros_like(tgt_im) control_points = model.predict(tgt_im) resolution = 150 output = bezier(control_points.unsqueeze(1), torch.tensor([len(control_points)], dtype=torch.long, device=control_points.device), torch.linspace( 0, 1, resolution, device=control_points.device).unsqueeze(0), device=control_points.device) pred_im[0, 0, output[0, :, 0], output[0, :, 1]] = 1 iou_value += intersection_over_union(pred_im, tgt_im) chamfer_value += chamfer_distance(pred_im[0].cpu().numpy(), tgt_im[0].cpu().numpy()) if control_points.shape[0] > 0: probability_map = probabilistic_map_generator( control_points.unsqueeze(1), len(control_points) * torch.ones( 1, dtype=torch.long, device=control_points.device), cp_covariances[:len(control_points)].unsqueeze(1)) reduced_map, _ = torch.max(probability_map, dim=3) probabilistic_similarity += torch.sum( reduced_map * tgt_im) / torch.sum(tgt_im) target_images[idx // 20] = tgt_im.unsqueeze(0) predicted_images[idx // 20] = pred_im.unsqueeze(0) #Guardamos estas primeras 10 imagenes en tensorboard img_grid = torchvision.utils.make_grid(target_images) writer.add_image('target_images', img_grid) img_grid = torchvision.utils.make_grid(predicted_images) writer.add_image('predicted_images', img_grid) # Predecimos 490 imagenes mas, esta vez almacenando solo el error for idx in range(200, 10000, 20): tgt_im = im_validation[idx].unsqueeze(0) pred_im = torch.zeros_like(tgt_im) control_points = model.predict(tgt_im) resolution = 150 output = bezier(control_points.unsqueeze(1), torch.tensor([len(control_points)], dtype=torch.long, device=control_points.device), torch.linspace( 0, 1, resolution, device=control_points.device).unsqueeze(0), device=control_points.device) pred_im[0, 0, output[0, :, 0], output[0, :, 1]] = 1 iou_value += intersection_over_union(pred_im, tgt_im) chamfer_value += chamfer_distance(pred_im[0].cpu().numpy(), tgt_im[0].cpu().numpy()) if control_points.shape[0] > 0: probability_map = probabilistic_map_generator( control_points.unsqueeze(1), len(control_points) * torch.ones( 1, dtype=torch.long, device=control_points.device), cp_covariances[:len(control_points)].unsqueeze(1)) reduced_map, _ = torch.max(probability_map, dim=3) probabilistic_similarity += torch.sum( reduced_map * tgt_im) / torch.sum(tgt_im) # Guardamos el error de predicción en tensorboard writer.add_scalar("Prediction/IoU", iou_value / 500, counter) writer.add_scalar("Prediction/Chamfer_distance", chamfer_value / 500, counter) print("La chamfer distance obtenida es", chamfer_value / 500) writer.add_scalar("Prediction/Probabilistic_similarity", probabilistic_similarity / 500, counter) # Volvemos al modo train para la siguiente epoca model.train()
output = torch.round(output).long() pred_im[0, 0, output[0], output[1]] = 1 target = torch.empty((3, 64, 64)) predicted = torch.empty((3, 64, 64)) target[:] = tgt_im predicted[:] = pred_im for cp_tgt in tgt_control_points: target[:, int(cp_tgt[0]), int(cp_tgt[1])] = 0 target[0, int(cp_tgt[0]), int(cp_tgt[1])] = 1 for cp_pred in control_points.cpu(): predicted[:, int(cp_pred[0]), int(cp_pred[1])] = 0 predicted[0, int(cp_pred[0]), int(cp_pred[1])] = 1 print("La chamfer distance es", chamfer_distance(target.numpy(), predicted.numpy())) plt.figure() plt.subplot(1, 2, 1) plt.imshow(target.transpose(0, 1).transpose(1, 2)) plt.title("Target\n{}".format(tgt_control_points)) plt.subplot(1, 2, 2) plt.imshow(predicted.transpose(0, 1).transpose(1, 2)) plt.title("Predicted\n{}".format(control_points.cpu())) plt.show() model.train()
def train_one_bezier_transformer(model, dataset, batch_size, num_epochs, optimizer, num_experiment, cp_variance, var_drop, epochs_drop, min_variance, penalization_coef, lr=1e-4, cuda=True, debug=True): # torch.autograd.set_detect_anomaly(True) print("\n\nTHE TRAINING BEGINS") print( "Experiment #{} ---> batch_size={} num_epochs={} learning_rate={} cp_variance={} var_drop={} epochs_drop={} min_variance={} pen_coef={}" .format(num_experiment, batch_size, num_epochs, lr, cp_variance, var_drop, epochs_drop, min_variance, penalization_coef)) # basedir = "/data1slow/users/asuso/trans_bezier" basedir = "/home/asuso/PycharmProjects/trans_bezier" # Inicializamos el generador de mapas probabilisticos y la matriz de covariancias probabilistic_map_generator = ProbabilisticMap( (model.image_size, model.image_size, 50)) cp_covariance = torch.tensor([[[1, 0], [0, 1]] for i in range(model.max_cp)], dtype=torch.float32) cp_covariances = torch.empty((model.max_cp, batch_size, 2, 2)) for i in range(batch_size): cp_covariances[:, i, :, :] = cp_covariance if cuda: probabilistic_map_generator = probabilistic_map_generator.cuda() cp_covariances = cp_covariances.cuda() # Iniciamos una variable en la que guardaremos la mejor loss obtenida en validation best_loss = float('inf') # Inicializamos el writer de tensorboard, y las variables que usaremos para # la recopilación de datos. cummulative_loss = 0 if debug: # Tensorboard writter writer = SummaryWriter( basedir + "/graphics/ProbabilisticBezierEncoder/OneBezierModels/MultiCP/newProbabilitiesApproach" ) counter = 0 # Obtenemos las imagenes del dataset images = dataset # Obtenemos las imagenes para la loss loss_images = generate_loss_images(images, weight=penalization_coef) # Enviamos los datos y el modelo a la GPU if cuda: images = images.cuda() loss_images = loss_images.cuda() model = model.cuda() # Particionamos el dataset en training y validation # images.shape=(N, 1, 64, 64) # sequences.shape=(100, N, 2) im_training = images[:40000] im_validation = images[40000:] loss_im_training = loss_images[:40000] loss_im_validation = loss_images[40000:] # Definimos el optimizer optimizer = optimizer(model.parameters(), lr=lr) scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=10**(-0.5), patience=8, min_lr=1e-8) for epoch in range(num_epochs): t0 = time.time() print("Beginning epoch number", epoch + 1) actual_covariances = cp_covariances * step_decay( cp_variance, epoch, var_drop, epochs_drop, min_variance).to( cp_covariances.device) for i in range(0, len(im_training) - batch_size + 1, batch_size): # Obtenemos el batch im = im_training[i:i + batch_size] loss_im = loss_im_training[i:i + batch_size] # Ejecutamos el modelo sobre el batch control_points, ncp_probabilities = model(im) # assert torch.abs(torch.sum(ncp_probabilities)-ncp_probabilities.shape[1]) < 0.1 # Calculamos la loss loss = 0 for n, cps in enumerate(control_points): # Calculamos el mapa de probabilidades asociado a la curva de bezier probabilistica determinada por los n+2 puntos de control "cps" probability_map = probabilistic_map_generator( cps, (n + 2) * torch.ones( (batch_size, ), dtype=torch.long, device=cps.device), actual_covariances) #Actualizamos la loss loss += probability_map + ncp_probabilities[n].view( -1, 1, 1) * (probability_map.detach()) # loss += -ncp_probabilities[n]*torch.sum(reduced_map * im[:, 0] / torch.sum(im[:, 0], dim=(1, 2))) loss = -torch.sum(loss * loss_im[:, 0] / torch.sum(im[:, 0], dim=(1, 2)).view(-1, 1, 1)) if debug: cummulative_loss += loss # Realizamos backpropagation y un paso de descenso del gradiente loss.backward() optimizer.step() model.zero_grad() # Recopilación de datos para tensorboard k = int(40000 / (batch_size * 5)) if debug and i % k == k - 1: writer.add_scalar("Training/loss", cummulative_loss / k, counter) counter += 1 cummulative_loss = 0 """ Al completar cada época, probamos el modelo sobre el conjunto de validation. En concreto: - Calcularemos la loss del modelo sobre el conjunto de validación - Realizaremos 500 predicciones sobre imagenes del conjunto de validación. Generaremos una imagen a partir de la parametrización de la curva de bezier obtenida. Calcularemos las metricas IoU, chamfer_distance, y differentiable_chamfer_distance (probabilistic_map) asociadas a estas prediciones (comparandolas con el ground truth). """ model.eval() with torch.no_grad(): cummulative_loss = 0 for j in range(0, len(im_validation) - batch_size + 1, batch_size): im = im_validation[j:j + batch_size] loss_im = loss_im_validation[j:j + batch_size] # Ejecutamos el modelo sobre el batch control_points, ncp_probabilities = model(im) # assert torch.abs(torch.sum(ncp_probabilities) - ncp_probabilities.shape[1]) < 0.1 # Calculamos la loss loss = 0 for n, cps in enumerate(control_points): # Calculamos el mapa de probabilidades asociado a la curva de bezier probabilistica determinada por los n+2 puntos de control "cps" probability_map = probabilistic_map_generator( cps, (n + 2) * torch.ones((batch_size, ), dtype=torch.long, device=cps.device), actual_covariances) # Actualizamos la loss loss += probability_map + ncp_probabilities[n].view( -1, 1, 1) * (probability_map.detach()) # loss += -ncp_probabilities[n]*torch.sum(reduced_map * im[:, 0] / torch.sum(im[:, 0], dim=(1, 2))) loss = -torch.sum(loss * loss_im[:, 0] / torch.sum( im[:, 0], dim=(1, 2)).view(-1, 1, 1)) cummulative_loss += loss # Aplicamos el learning rate scheduler scheduler.step(cummulative_loss) # Recopilamos los datos para tensorboard if debug: writer.add_scalar("Validation/loss", cummulative_loss / (j / batch_size + 1), counter) # Si la loss obtenida es la mas baja hasta ahora, nos guardamos los pesos del modelo if cummulative_loss < best_loss: print("El modelo ha mejorado!! Nueva loss={}".format( cummulative_loss / (j / batch_size + 1))) best_loss = cummulative_loss torch.save( model.state_dict(), basedir + "/state_dicts/ProbabilisticBezierEncoder/OneBezierModels/MultiCP/newProbabilitiesApproach" ) cummulative_loss = 0 # Iniciamos la evaluación del modo "predicción" iou_value = 0 chamfer_value = 0 prob_num_cps = torch.zeros(model.max_cp - 1, device=im_validation.device) # Inicialmente, predeciremos 10 imagenes que almacenaremos en tensorboard target_images = im_validation[0:200:20] predicted_images = torch.zeros_like(target_images) # Obtenemos los puntos de control con mayor probabilidad all_control_points, ncp_probabilities = model(target_images) num_cps = torch.argmax(ncp_probabilities, dim=0) # Actualizamos la probabilidad de los control points for i in range(ncp_probabilities.shape[1]): prob_num_cps += ncp_probabilities[:, i] control_points = torch.empty_like(all_control_points[0]) for sample in range(10): control_points[:, sample, :] = all_control_points[ num_cps[sample], :, sample, :] # Renderizamos las imagenes predichas im_seq = bezier( control_points, num_cps + 2, torch.linspace(0, 1, 150, device=control_points.device).unsqueeze(0), device='cuda') im_seq = torch.round(im_seq).long() for i in range(10): predicted_images[i, 0, im_seq[i, :, 0], im_seq[i, :, 1]] = 1 # Guardamos estas primeras 10 imagenes en tensorboard img_grid = torchvision.utils.make_grid(target_images) writer.add_image('target_images', img_grid) img_grid = torchvision.utils.make_grid(predicted_images) writer.add_image('predicted_images', img_grid) # Calculamos metricas iou_value += intersection_over_union(predicted_images, target_images) chamfer_value += np.sum( chamfer_distance(predicted_images[:, 0].cpu().numpy(), target_images[:, 0].cpu().numpy())) # Finalmente, predecimos 490 imagenes mas para calcular IoU y chamfer_distance target_images = im_validation[200:10000:20] predicted_images = torch.zeros_like(target_images) # Obtenemos los puntos de control con mayor probabilidad all_control_points, ncp_probabilities = model(target_images) num_cps = torch.argmax(ncp_probabilities, dim=0) # Actualizamos la probabilidad de los control points for i in range(ncp_probabilities.shape[1]): prob_num_cps += ncp_probabilities[:, i] control_points = torch.empty_like(all_control_points[0]) for sample in range(490): control_points[:, sample, :] = all_control_points[ num_cps[sample], :, sample, :] # Renderizamos las imagenes predichas im_seq = bezier( control_points, num_cps + 2, torch.linspace(0, 1, 150, device=control_points.device).unsqueeze(0), device='cuda') im_seq = torch.round(im_seq).long() for i in range(490): predicted_images[i, 0, im_seq[i, :, 0], im_seq[i, :, 1]] = 1 # Calculamos metricas iou_value += intersection_over_union(predicted_images, target_images) chamfer_value += np.sum( chamfer_distance(predicted_images[:, 0].cpu().numpy(), target_images[:, 0].cpu().numpy())) # Guardamos los resultados en tensorboard writer.add_scalar("Prediction/IoU", iou_value / 500, counter) writer.add_scalar("Prediction/Chamfer_distance", chamfer_value / 500, counter) prob_num_cps = prob_num_cps.cpu() probabilities = { str(2 + i) + "_cp": prob_num_cps[i] / 500 for i in range(model.max_cp - 1) } writer.add_scalars('num_cp probabilities', probabilities, counter) # Volvemos al modo train para la siguiente epoca model.train() print("Tiempo por epoca de", time.time() - t0)
def train_one_bezier_transformer(model, dataset, batch_size, num_epochs, optimizer, num_experiment, lr=1e-4, cuda=True, debug=True): # torch.autograd.set_detect_anomaly(True) print("\n\nTHE TRAINING BEGINS") print("MultiBezier Experiment #{} ---> num_cp={} max_beziers={} batch_size={} num_epochs={} learning_rate={}".format( num_experiment, model.num_cp, model.max_beziers, batch_size, num_epochs, lr)) # basedir = "/data1slow/users/asuso/trans_bezier" basedir = "/home/asuso/PycharmProjects/trans_bezier" # Iniciamos una variable en la que guardaremos la mejor loss obtenida en validation best_loss = float('inf') # Inicializamos el writer de tensorboard, y las variables que usaremos para # la recopilación de datos. cummulative_loss = 0 if debug: # Tensorboard writter writer = SummaryWriter(basedir + "/graphics/ProbabilisticBezierEncoder/MultiBezierModels/SegmentationVersion/"+str(model.num_cp)+"CP_maxBeziers"+str(model.max_beziers)) counter = 0 # Obtenemos las imagenes del dataset images = dataset # Realizamos la segmentacion # segmented_images = torch.zeros((0, 1, images.shape[-2], images.shape[-1]), dtype=images.dtype, device=images.device) connected_components_per_image = 4 segmented_images = torch.zeros((connected_components_per_image*images.shape[0], 1, images.shape[-2], images.shape[-1]), dtype=images.dtype, device=images.device) num_images = 0 for n, im in enumerate(images): num_labels, labels_im = cv2.connectedComponents(im[0].numpy().astype(np.uint8)) new_segmented_images = torch.zeros((num_labels - 1, 1, images.shape[-2], images.shape[-1]), dtype=images.dtype, device=images.device) for i in range(1, num_labels): new_segmented_images[i - 1, 0][labels_im == i] = 1 #segmented_images = torch.cat((segmented_images, new_segmented_images), dim=0) segmented_images[num_images:num_images+num_labels-1] = new_segmented_images num_images += num_labels-1 segmented_images = segmented_images[:num_images] # Inicializamos el generador de mapas probabilisticos y la matriz de covariancias probabilistic_map_generator = ProbabilisticMap((model.image_size, model.image_size, 50)) cp_covariance = torch.tensor([ [[1, 0], [0, 1]] for i in range(model.num_cp)], dtype=torch.float32) covariances = torch.empty((model.num_cp, batch_size, 2, 2)) for i in range(batch_size): covariances[:, i, :, :] = cp_covariance # Obtenemos el grid grid = torch.empty((1, 1, images.shape[2], images.shape[2], 2), dtype=torch.float32) for i in range(images.shape[2]): grid[0, 0, i, :, 0] = i grid[0, 0, :, i, 1] = i # Obtenemos las distance_images distance_images = generate_distance_images(segmented_images) if cuda: segmented_images = segmented_images.cuda() distance_images = distance_images.cuda() probabilistic_map_generator = probabilistic_map_generator.cuda() covariances = covariances.cuda() grid = grid.cuda() model = model.cuda() # Particionamos el dataset en training y validation # images.shape=(N, 1, 64, 64) dataset_len = segmented_images.shape[0] train_size = int(dataset_len*0.8) im_training = segmented_images[:train_size] im_validation = segmented_images[train_size:] distance_im_training = distance_images[:train_size] distance_im_validation = distance_images[train_size:] orig_im_validation = images[int(images.shape[0]*0.9):] # Definimos el optimizer optimizer = optimizer(model.parameters(), lr=lr) scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=10**(-0.5), patience=8, min_lr=1e-8) for epoch in range(num_epochs): t0 = time.time() print("Beginning epoch number", epoch+1) for i in range(0, len(im_training)-batch_size+1, batch_size): # Obtenemos el batch im = im_training[i:i+batch_size]#.cuda() distance_im = distance_im_training[i:i+batch_size]#.cuda() # Ejecutamos el modelo sobre el batch control_points = model(im) # Calculamos la loss loss = loss_function(control_points, im, distance_im, covariances, probabilistic_map_generator, grid) # Realizamos backpropagation y un paso de descenso del gradiente loss.backward() optimizer.step() model.zero_grad() # Recopilación de datos para tensorboard k = int(int(im_training.shape[0]/(batch_size*5))*batch_size + 1) if debug: cummulative_loss += loss.detach() if i%k == k-1: writer.add_scalar("Training/loss", cummulative_loss/k, counter) counter += 1 cummulative_loss = 0 """ Al completar cada época, probamos el modelo sobre el conjunto de validation. En concreto: - Calcularemos la loss del modelo sobre el conjunto de validación - Realizaremos 500 predicciones sobre imagenes del conjunto de validación. Generaremos una imagen a partir de la parametrización de la curva de bezier obtenida. Calcularemos las metricas IoU, chamfer_distance, y differentiable_chamfer_distance (probabilistic_map) asociadas a estas prediciones (comparandolas con el ground truth). """ model.eval() with torch.no_grad(): cummulative_loss = 0 for j in range(0, len(im_validation)-batch_size+1, batch_size): # Obtenemos el batch im = im_validation[j:j+batch_size]#.cuda() distance_im = distance_im_validation[j:j + batch_size]#.cuda() # Ejecutamos el modelo sobre el batch control_points = model(im) # Calculamos la loss loss = loss_function(control_points, im, distance_im, covariances, probabilistic_map_generator, grid) cummulative_loss += loss.detach() # Aplicamos el learning rate scheduler scheduler.step(cummulative_loss) # Recopilamos los datos para tensorboard if debug: writer.add_scalar("Validation/loss", cummulative_loss/(j/batch_size+1), counter) # Si la loss obtenida es la mas baja hasta ahora, nos guardamos los pesos del modelo if cummulative_loss < best_loss: print("El modelo ha mejorado!! Nueva loss={}".format(cummulative_loss/(j/batch_size+1))) best_loss = cummulative_loss torch.save(model.state_dict(), basedir+"/state_dicts/ProbabilisticBezierEncoder/MultiBezierModels/SegmentationVersion/"+str(model.num_cp)+"CP_maxBeziers"+str(model.max_beziers)+"_repCoef") cummulative_loss = 0 # Iniciamos la evaluación del modo "predicción" iou_value = 0 chamfer_value = 0 # Inicialmente, predeciremos 10 imagenes que almacenaremos en tensorboard target_images = orig_im_validation[0:100:10] predicted_images = model.predict(target_images) # Guardamos estas primeras 10 imagenes en tensorboard img_grid = torchvision.utils.make_grid(target_images) writer.add_image('target_images', img_grid) img_grid = torchvision.utils.make_grid(predicted_images) writer.add_image('predicted_images', img_grid) # Calculamos metricas iou_value += intersection_over_union(predicted_images, target_images) chamfer_value += np.sum(chamfer_distance(predicted_images[:, 0].cpu().numpy(), target_images[:, 0].cpu().numpy())) # Finalmente, predecimos 490 imagenes mas para calcular IoU y chamfer_distance idxs = [100, 800, 1500, 2200, 2900, 3600, 4300, 5000] for i in range(7): target_images = orig_im_validation[idxs[i]:idxs[i+1]:10] predicted_images = model.predict(target_images) # Calculamos metricas iou_value += intersection_over_union(predicted_images, target_images) chamfer_value += np.sum(chamfer_distance(predicted_images[:, 0].cpu().numpy(), target_images[:, 0].cpu().numpy())) # Guardamos los resultados en tensorboard writer.add_scalar("Prediction/IoU", iou_value / 500, counter) writer.add_scalar("Prediction/Chamfer_distance", chamfer_value / 500, counter) # Volvemos al modo train para la siguiente epoca model.train() print("Tiempo por epoca de", time.time()-t0)
def train_one_bezier_transformer(model, dataset, batch_size, num_epochs, optimizer, loss_mode, num_experiment, cp_variance, var_drop, epochs_drop, min_variance, penalization_coef, lr=1e-4, cuda=True, debug=True): # torch.autograd.set_detect_anomaly(True) print("\n\nTHE TRAINING BEGINS") print("MultiBezier Experiment #{} ---> loss={} distance_loss={} num_cp={} max_beziers={} batch_size={} num_epochs={} learning_rate={} pen_coef={}".format( num_experiment, loss_mode[0], loss_mode[1], model.num_cp, model.max_beziers, batch_size, num_epochs, lr, penalization_coef)) # basedir = "/data1slow/users/asuso/trans_bezier" basedir = "/home/asuso/PycharmProjects/trans_bezier" # Iniciamos una variable en la que guardaremos la mejor loss obtenida en validation best_loss = float('inf') # Inicializamos el writer de tensorboard, y las variables que usaremos para # la recopilación de datos. cummulative_loss = 0 if debug: # Tensorboard writter writer = SummaryWriter(basedir + "/graphics/ProbabilisticBezierEncoder/MultiBezierModels/FixedCP/"+str(model.num_cp)+"CP_maxBeziers"+str(model.max_beziers)+"loss"+str(loss_mode[0])) counter = 0 # Obtenemos las imagenes del dataset images = dataset assert loss_mode[0] in ['pmap', 'dmap', 'chamfer'] assert loss_mode[1] in ['l2', 'quadratic', 'exp'] # En caso de que la loss seleccionada sea la del mapa probabilístico, hacemos los preparativos necesarios if loss_mode[0] == 'pmap': # Inicializamos el generador de mapas probabilisticos y la matriz de covariancias probabilistic_map_generator = ProbabilisticMap((model.image_size, model.image_size, 50)) cp_covariance = torch.tensor([ [[1, 0], [0, 1]] for i in range(model.num_cp)], dtype=torch.float32) cp_covariances = torch.empty((model.num_cp, batch_size, 2, 2)) for i in range(batch_size): cp_covariances[:, i, :, :] = cp_covariance # Obtenemos las imagenes del groundtruth con el factor de penalización añadido loss_images = generate_loss_images(images, weight=penalization_coef, distance=loss_mode[1]) grid = None distance_im = None if cuda: images = images.cuda() loss_images = loss_images.cuda() probabilistic_map_generator = probabilistic_map_generator.cuda() cp_covariances = cp_covariances.cuda() model = model.cuda() # En caso de que la loss seleccionada sea la del mapa de distancias, hacemos los preparativos necesarios if loss_mode[0] == 'dmap': grid = torch.empty((1, 1, images.shape[2], images.shape[3], 2), dtype=torch.float32) for i in range(images.shape[2]): grid[0, 0, i, :, 0] = i grid[0, 0, :, i, 1] = i loss_im = None distance_im = None actual_covariances = None probabilistic_map_generator = None if cuda: images = images.cuda() grid = grid.cuda() model = model.cuda() # En caso de que la loss seleccionada sea la chamfer_loss, hacemos los preparativos necesarios if loss_mode[0] == 'chamfer': # Inicializamos el generador de mapas probabilisticos y la matriz de covariancias probabilistic_map_generator = ProbabilisticMap((model.image_size, model.image_size, 50)) cp_covariance = torch.tensor([ [[1, 0], [0, 1]] for i in range(model.num_cp)], dtype=torch.float32) actual_covariances = torch.empty((model.num_cp, batch_size, 2, 2)) for i in range(batch_size): actual_covariances[:, i, :, :] = cp_covariance # Obtenemos el grid grid = torch.empty((1, 1, images.shape[2], images.shape[2], 2), dtype=torch.float32) for i in range(images.shape[2]): grid[0, 0, i, :, 0] = i grid[0, 0, :, i, 1] = i # Obtenemos las distance_images distance_images = generate_distance_images(images) loss_im = None if cuda: images = images.cuda() distance_images = distance_images.cuda() probabilistic_map_generator = probabilistic_map_generator.cuda() actual_covariances = actual_covariances.cuda() grid = grid.cuda() model = model.cuda() # Particionamos el dataset en training y validation # images.shape=(N, 1, 64, 64) im_training = images[:40000] im_validation = images[40000:] if loss_mode[0] == 'pmap': loss_im_training = loss_images[:40000] loss_im_validation = loss_images[40000:] if loss_mode[0] == 'chamfer': distance_im_training = distance_images[:40000] distance_im_validation = distance_images[40000:] # Definimos el optimizer optimizer = optimizer(model.parameters(), lr=lr) scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=10**(-0.5), patience=8, min_lr=1e-8) for epoch in range(num_epochs): t0 = time.time() print("Beginning epoch number", epoch+1) if loss_mode[0] == 'pmap': actual_covariances = cp_covariances * step_decay(cp_variance, epoch, var_drop, epochs_drop, min_variance).to(cp_covariances.device) for i in range(0, len(im_training)-batch_size+1, batch_size): # Obtenemos el batch im = im_training[i:i+batch_size]#.cuda() if loss_mode[0] == 'pmap': loss_im = loss_im_training[i:i+batch_size]#.cuda() if loss_mode[0] == 'chamfer': distance_im = distance_im_training[i:i+batch_size]#.cuda() # Ejecutamos el modelo sobre el batch control_points, probabilities = model(im) # Calculamos la loss loss = loss_function(epoch, control_points, model.max_beziers+torch.zeros(batch_size, dtype=torch.long, device=control_points.device), probabilities, model.num_cp, im, distance_im, loss_im, grid, actual_covariances, probabilistic_map_generator, loss_type=loss_mode[0], distance='l2', gamma=0.9) # Realizamos backpropagation y un paso de descenso del gradiente loss.backward() optimizer.step() model.zero_grad() # Recopilación de datos para tensorboard k = int(int(40000/(batch_size*5))*batch_size + 1) if debug: cummulative_loss += loss.detach() if i%k == k-1: writer.add_scalar("Training/loss", cummulative_loss/k, counter) counter += 1 cummulative_loss = 0 """ Al completar cada época, probamos el modelo sobre el conjunto de validation. En concreto: - Calcularemos la loss del modelo sobre el conjunto de validación - Realizaremos 500 predicciones sobre imagenes del conjunto de validación. Generaremos una imagen a partir de la parametrización de la curva de bezier obtenida. Calcularemos las metricas IoU, chamfer_distance, y differentiable_chamfer_distance (probabilistic_map) asociadas a estas prediciones (comparandolas con el ground truth). """ model.eval() with torch.no_grad(): cummulative_loss = 0 for j in range(0, len(im_validation)-batch_size+1, batch_size): # Obtenemos el batch im = im_validation[j:j+batch_size]#.cuda() if loss_mode[0] == 'pmap': loss_im = loss_im_validation[j:j + batch_size]#.cuda() if loss_mode[0] == 'chamfer': distance_im = distance_im_validation[j:j + batch_size]#.cuda() # Ejecutamos el modelo sobre el batch control_points, probabilities = model(im) # Calculamos la loss loss = loss_function(epoch, control_points, model.max_beziers+torch.zeros(batch_size, dtype=torch.long, device=control_points.device), probabilities, model.num_cp, im, distance_im, loss_im, grid, actual_covariances, probabilistic_map_generator, loss_type=loss_mode[0], distance='l2', gamma=0.9) cummulative_loss += loss.detach() # Aplicamos el learning rate scheduler scheduler.step(cummulative_loss) # Recopilamos los datos para tensorboard if debug: writer.add_scalar("Validation/loss", cummulative_loss/(j/batch_size+1), counter) # Si la loss obtenida es la mas baja hasta ahora, nos guardamos los pesos del modelo if cummulative_loss < best_loss: print("El modelo ha mejorado!! Nueva loss={}".format(cummulative_loss/(j/batch_size+1))) best_loss = cummulative_loss torch.save(model.state_dict(), basedir+"/state_dicts/ProbabilisticBezierEncoder/MultiBezierModels/FixedCP/"+str(model.num_cp)+"CP_maxBeziers"+str(model.max_beziers)+"loss"+str(loss_mode[0])) cummulative_loss = 0 # Representación grafica del modo forward target_images = im_validation[0:200:20].cuda() forwarded_images = torch.zeros_like(target_images) forwarded_cp, _ = model(target_images) # Renderizamos las imagenes forward for i in range(model.max_beziers): num_cps = model.num_cp * torch.ones(10, dtype=torch.long, device=forwarded_cp.device) im_seq = bezier(forwarded_cp[model.num_cp * i: model.num_cp * (i + 1)], num_cps, torch.linspace(0, 1, 150, device=control_points.device).unsqueeze(0), device='cuda') im_seq = torch.round(im_seq).long() for j in range(10): forwarded_images[j, 0, im_seq[j, :, 0], im_seq[j, :, 1]] = 1 img_grid = torchvision.utils.make_grid(forwarded_images) writer.add_image('forwarded_images', img_grid) # Iniciamos la evaluación del modo "predicción" if epoch > 60: iou_value = 0 chamfer_value = 0 # Inicialmente, predeciremos 10 imagenes que almacenaremos en tensorboard target_images = im_validation[0:200:20].cuda() predicted_images = torch.zeros_like(target_images) control_points, num_beziers = model.predict(target_images) # Renderizamos las imagenes predichas i = 0 not_finished = num_beziers > i to_end = torch.sum(not_finished) while to_end: num_cps = model.num_cp * torch.ones_like(num_beziers[not_finished]) im_seq = bezier(control_points[model.num_cp*i: model.num_cp*(i+1), not_finished], num_cps, torch.linspace(0, 1, 150, device=control_points.device).unsqueeze(0), device='cuda') im_seq = torch.round(im_seq).long() k = 0 for j in range(10): if not_finished[j]: predicted_images[j, 0, im_seq[k, :, 0], im_seq[k, :, 1]] = 1 k += 1 i += 1 not_finished = num_beziers > i to_end = torch.sum(not_finished) # Guardamos estas primeras 10 imagenes en tensorboard img_grid = torchvision.utils.make_grid(target_images) writer.add_image('target_images', img_grid) img_grid = torchvision.utils.make_grid(predicted_images) writer.add_image('predicted_images', img_grid) # Calculamos metricas iou_value += intersection_over_union(predicted_images, target_images) chamfer_value += np.sum( chamfer_distance(predicted_images[:, 0].cpu().numpy(), target_images[:, 0].cpu().numpy())) # Finalmente, predecimos 490 imagenes mas para calcular IoU y chamfer_distance idxs = [200, 1600, 3000, 4400, 5800, 7200, 8600, 10000] for i in range(7): target_images = im_validation[idxs[i]:idxs[i+1]:20].cuda() predicted_images = torch.zeros_like(target_images) control_points, num_beziers = model.predict(target_images) # Renderizamos las imagenes predichas i = 0 not_finished = num_beziers > i to_end = torch.sum(not_finished) while to_end: num_cps = model.num_cp * torch.ones_like(num_beziers[not_finished]) im_seq = bezier(control_points[model.num_cp * i: model.num_cp * (i + 1), not_finished], num_cps, torch.linspace(0, 1, 150, device=control_points.device).unsqueeze(0), device='cuda') im_seq = torch.round(im_seq).long() k = 0 for j in range(70): if not_finished[j]: predicted_images[j, 0, im_seq[k, :, 0], im_seq[k, :, 1]] = 1 k += 1 i += 1 not_finished = num_beziers > i to_end = torch.sum(not_finished) # Calculamos metricas iou_value += intersection_over_union(predicted_images, target_images) chamfer_value += np.sum( chamfer_distance(predicted_images[:, 0].cpu().numpy(), target_images[:, 0].cpu().numpy())) # Guardamos los resultados en tensorboard writer.add_scalar("Prediction/IoU", iou_value / 500, counter) writer.add_scalar("Prediction/Chamfer_distance", chamfer_value / 500, counter) # Volvemos al modo train para la siguiente epoca model.train() print("Tiempo por epoca de", time.time()-t0)
def train_one_bezier_transformer( model, dataset, batch_size, num_epochs, optimizer, num_experiment, lr=1e-4, loss_type='probabilistic', dataset_name="MNIST", cuda=True, debug=True): # rep_coef=0.1, dist_thresh=4.5, second_term=True # torch.autograd.set_detect_anomaly(True) print("\n\nTHE TRAINING BEGINS") print( "MultiBezier Experiment #{} ---> num_cp={} max_beziers={} batch_size={} num_epochs={} learning_rate={}" .format(num_experiment, model.num_cp, model.max_beziers, batch_size, num_epochs, lr)) # basedir = "/data1slow/users/asuso/trans_bezier" basedir = "/home/asuso/PycharmProjects/trans_bezier" # Iniciamos una variable en la que guardaremos la mejor loss obtenida en validation best_loss = float('inf') # Inicializamos el writer de tensorboard, y las variables que usaremos para # la recopilación de datos. if debug: # Tensorboard writter writer = SummaryWriter( basedir + "/graphics/ProbabilisticBezierEncoder/MultiBezierModels/ParallelVersion/" + str(dataset_name) + "_" + str(loss_type) + "_" + str(model.num_cp) + "CP_maxBeziers" + str(model.max_beziers)) counter = 0 # Obtenemos las imagenes del dataset images = dataset if loss_type == "probabilistic": # Inicializamos el generador de mapas probabilisticos y la matriz de covariancias probabilistic_map_generator = ProbabilisticMap( (model.image_size, model.image_size, 50)) cp_covariance = torch.tensor([[[1, 0], [0, 1]] for i in range(model.num_cp)], dtype=torch.float32) covariances = torch.empty((model.num_cp, batch_size, 2, 2)) for i in range(batch_size): covariances[:, i, :, :] = cp_covariance # Obtenemos las distance_images distance_images = generate_distance_images(images) if cuda: distance_images = distance_images.cuda() probabilistic_map_generator = probabilistic_map_generator.cuda() covariances = covariances.cuda() distance_im_training = distance_images[:int(0.83 * images.shape[0])] distance_im_validation = distance_images[int(0.83 * images.shape[0]):] else: # Obtenemos la groundtruth_seq del dataset (bs, max_N, 2) im_size = images.shape[-1] groundtruth_sequences = torch.empty( (images.shape[0], im_size * im_size, 2), dtype=torch.long) max_coords = 0 for i, im in enumerate(images): new_coords = torch.nonzero(im[0]) groundtruth_sequences[i, :new_coords.shape[0]] = new_coords if new_coords.shape[0] > max_coords: max_coords = new_coords.shape[0] groundtruth_sequences = groundtruth_sequences[:, :max_coords] if cuda: groundtruth_sequences = groundtruth_sequences.cuda() groundtruth_seq_training = groundtruth_sequences[:int(0.83 * images.shape[0])] groundtruth_seq_validation = groundtruth_sequences[int(0.83 * images.shape[0] ):] # Obtenemos el grid grid = torch.empty((1, 1, images.shape[2], images.shape[2], 2), dtype=torch.float32) for i in range(images.shape[2]): grid[0, 0, i, :, 0] = i grid[0, 0, :, i, 1] = i if cuda: images = images.cuda() grid = grid.cuda() model = model.cuda() # Particionamos el dataset en training y validation # images.shape=(N, 1, 64, 64) im_training = images[:int(0.83 * images.shape[0])] im_validation = images[int(0.83 * images.shape[0]):] # Definimos el optimizer optimizer = optimizer(model.parameters(), lr=lr) scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=10**(-0.5), patience=8, min_lr=1e-8) for epoch in range(num_epochs): t0 = time.time() print("Beginning epoch number", epoch + 1) cummulative_loss = 0 for i in range(0, len(im_training) - batch_size + 1, batch_size): # Obtenemos el batch im = im_training[i:i + batch_size] #.cuda() # Ejecutamos el modelo sobre el batch control_points = model(im) if loss_type == "probabilistic": distance_im = distance_im_training[i:i + batch_size] # .cuda() # Calculamos la loss loss = loss_function( control_points, im, distance_im, covariances, probabilistic_map_generator, grid ) #repulsion_coef=rep_coef, dist_thresh=dist_thresh, second_term=second_term else: groundtruth_seq = groundtruth_seq_training[i:i + batch_size] loss = new_loss(control_points, im, groundtruth_seq, grid) # Realizamos backpropagation y un paso de descenso del gradiente loss.backward() optimizer.step() model.zero_grad() if debug: cummulative_loss += loss.detach() if debug: writer.add_scalar("Training/loss", cummulative_loss / (i / batch_size), counter) """ Al completar cada época, probamos el modelo sobre el conjunto de validation. En concreto: - Calcularemos la loss del modelo sobre el conjunto de validación - Realizaremos 500 predicciones sobre imagenes del conjunto de validación. Generaremos una imagen a partir de la parametrización de la curva de bezier obtenida. Calcularemos las metricas IoU, chamfer_distance, y differentiable_chamfer_distance (probabilistic_map) asociadas a estas prediciones (comparandolas con el ground truth). """ model.eval() with torch.no_grad(): cummulative_loss = 0 for j in range(0, len(im_validation) - batch_size + 1, batch_size): # Obtenemos el batch im = im_validation[i:i + batch_size] # .cuda() # Ejecutamos el modelo sobre el batch control_points = model(im) if loss_type == "probabilistic": distance_im = distance_im_validation[j:j + batch_size] # .cuda() # Calculamos la loss loss = loss_function( control_points, im, distance_im, covariances, probabilistic_map_generator, grid ) # repulsion_coef=rep_coef, dist_thresh=dist_thresh, second_term=second_term else: groundtruth_seq = groundtruth_seq_validation[j:j + batch_size] loss = new_loss(control_points, im, groundtruth_seq, grid) cummulative_loss += loss.detach() # Aplicamos el learning rate scheduler scheduler.step(cummulative_loss) # Recopilamos los datos para tensorboard if debug: writer.add_scalar("Validation/loss", cummulative_loss / j, counter) # Si la loss obtenida es la mas baja hasta ahora, nos guardamos los pesos del modelo if cummulative_loss < best_loss: print("El modelo ha mejorado!! Nueva loss={}".format( cummulative_loss / j)) best_loss = cummulative_loss torch.save( model.state_dict(), basedir + "/state_dicts/ProbabilisticBezierEncoder/MultiBezierModels/ParallelVersion/" + str(dataset_name) + "_" + str(loss_type) + "_" + str(model.num_cp) + "CP_maxBeziers" + str(model.max_beziers)) # Iniciamos la evaluación del modo "predicción" iou_value = 0 chamfer_value = 0 # Inicialmente, predeciremos 10 imagenes que almacenaremos en tensorboard target_images = im_validation[0:200:20] #.cuda() predicted_images = torch.zeros_like(target_images) control_points = model(target_images) # Renderizamos las imagenes predichas for bezier_cp in control_points: # Calculamos la secuencia de puntos de esta curva num_cps = model.num_cp * torch.ones( 10, dtype=torch.long, device=bezier_cp.device) im_seq = bezier( bezier_cp, num_cps, torch.linspace(0, 1, 150, device=num_cps.device).unsqueeze(0), device=num_cps.device) im_seq = torch.round(im_seq).long() for j in range(10): predicted_images[j, 0, im_seq[j, :, 0], im_seq[j, :, 1]] = 1 # Guardamos estas primeras 10 imagenes en tensorboard img_grid = torchvision.utils.make_grid(target_images) writer.add_image('target_images', img_grid) img_grid = torchvision.utils.make_grid(predicted_images) writer.add_image('predicted_images', img_grid) # Calculamos metricas iou_value += intersection_over_union(predicted_images, target_images) chamfer_value += np.sum( chamfer_distance(predicted_images[:, 0].cpu().numpy(), target_images[:, 0].cpu().numpy())) # Finalmente, predecimos 490 imagenes mas para calcular IoU y chamfer_distance idxs = [200, 1600, 3000, 4400, 5800, 7200, 8600, 10000] for i in range(7): target_images = im_validation[idxs[i]:idxs[i + 1]:20] #.cuda() predicted_images = torch.zeros_like(target_images) control_points = model(target_images) # Renderizamos las imagenes predichas for bezier_cp in control_points: # Calculamos la secuencia de puntos de esta curva num_cps = model.num_cp * torch.ones( 70, dtype=torch.long, device=bezier_cp.device) im_seq = bezier( bezier_cp, num_cps, torch.linspace(0, 1, 150, device=num_cps.device).unsqueeze(0), device=num_cps.device) im_seq = torch.round(im_seq).long() for j in range(70): predicted_images[j, 0, im_seq[j, :, 0], im_seq[j, :, 1]] = 1 # Calculamos metricas iou_value += intersection_over_union(predicted_images, target_images) chamfer_value += np.sum( chamfer_distance(predicted_images[:, 0].cpu().numpy(), target_images[:, 0].cpu().numpy())) # Guardamos los resultados en tensorboard writer.add_scalar("Prediction/IoU", iou_value / 500, counter) writer.add_scalar("Prediction/Chamfer_distance", chamfer_value / 500, counter) # Volvemos al modo train para la siguiente epoca model.train() print("Tiempo por epoca de", time.time() - t0)
control_points = model(im) num_cps = model.num_cp + torch.zeros( 1, dtype=torch.long, device=control_points.device) num_beziers = 1 if modelo == "OneBezier" else max_beziers predicted_im = torch.zeros_like(im) for j in range(num_beziers): im_seq = bezier_rend(control_points[3 * j:3 * (j + 1)], num_cps, torch.linspace(0, 1, 150, device=device).unsqueeze(0), device=device) im_seq = torch.round(im_seq).long() predicted_im[0, 0, im_seq[0, :, 0], im_seq[0, :, 1]] = 1 chamfer_dist = chamfer_distance(predicted_im[0].cpu().numpy(), im[0].cpu().numpy()) # scheduler.step(chamfer_dist) if chamfer_dist < best_chamfer: # print("Epoch {} --> chamfer_distance={}".format(i+1, chamfer_dist)) best_chamfer = chamfer_dist best_chamfer_epoch = i + 1 best_image = predicted_im plt.figure() plt.subplot(1, 2, 1) plt.imshow(im[0, 0].cpu(), cmap='gray') plt.title("Original") plt.subplot(1, 2, 2) plt.imshow(predicted_im[0, 0].cpu(), cmap='gray') plt.title("Predicted epoch {}".format(i + 1)) plt.show()