def main(): use_cuda = args.use_cuda train_data = UnlabeledContact(data=args.data_dir) print('Number of samples: {}'.format(len(train_data))) trainloader = DataLoader(train_data, batch_size=args.batch_size) # Contact matrices are 21x21 input_size = 441 img_height = 21 img_width = 21 vae = AutoEncoder(code_size=20, imgsize=input_size, height=img_height, width=img_width) criterion = nn.BCEWithLogitsLoss() if use_cuda: #vae = nn.DataParallel(vae) vae = vae.cuda() #.half() criterion = criterion.cuda() optimizer = optim.SGD(vae.parameters(), lr=0.01) clock = AverageMeter(name='clock32single', rank=0) epoch_loss = 0 total_loss = 0 end = time.time() for epoch in range(15): for batch_idx, data in enumerate(trainloader): inputs = data['cont_matrix'] inputs = inputs.resize_(args.batch_size, 1, 21, 21) inputs = inputs.float() if use_cuda: inputs = inputs.cuda() #.half() inputs = Variable(inputs) optimizer.zero_grad() output, code = vae(inputs) loss = criterion(output, inputs) loss.backward() optimizer.step() epoch_loss += loss.data[0] clock.update(time.time() - end) end = time.time() if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(trainloader.dataset), 100. * batch_idx / len(trainloader), loss.data[0])) clock.save( path= '/home/ygx/libraries/mds/molecules/molecules/conv_autoencoder/runtimes' )
def generate(model_path,model_name, generate_path, generate_name, piece): """Synthesize audio from an array of embeddings. Args: encodings: Numpy array with shape [batch_size, time, dim]. save_paths: Iterable of output file names. checkpoint_path: Location of the pretrained model. [model.ckpt-200000] samples_per_save: Save files after every amount of generated samples. """ # Create directory for encoding if os.path.exists(generate_path) is False: os.makedirs(generate_path) net = AutoEncoder() net = load_model(net,model_path,model_name) cuda_available = torch.cuda.is_available() if cuda_available is True: net = net.cuda() net.eval() # Load audio for encoding piece = audio.load_wav(piece) spec = audio.spectrogram(piece).astype(np.float32) spec = torch.from_numpy(spec.T) spec = torch.FloatTensor(spec) spec = torch.unsqueeze(spec, 0) spec = Variable(spec, volatile=True).contiguous() if cuda_available is True: spec = spec.cuda() generated_spec = net(spec) generated_spec = generated_spec.data.cpu().numpy() generated_spec = np.squeeze(generated_spec) waveform = audio.inv_spectrogram(generated_spec.T) wav_name = generate_path + generate_name + '.wav' audio.save_wav(waveform , wav_name)
def decode(model_name, encoding, decoder_name): """Synthesize audio from an array of embeddings. Args: encodings: Numpy array with shape [batch_size, time, dim]. save_paths: Iterable of output file names. checkpoint_path: Location of the pretrained model. [model.ckpt-200000] samples_per_save: Save files after every amount of generated samples. """ decoder_path = './decoding/' model_path = './restore/' # Create directory for encoding if os.path.exists(decoder_path) is False: os.makedirs(decoder_path) net = AutoEncoder() net = load_model(net,model_path,model_name) cuda_available = torch.cuda.is_available() if cuda_available is True: net = net.cuda() net.eval() # Load Encoding encoding_ndarray = np.load(encoding) encoding = torch.from_numpy(encoding_ndarray).float() encoding = Variable(encoding, volatile=True) generated_spec = net.decoder(encoding) generated_spec = generated_spec.data.cpu().numpy() generated_spec = np.squeeze(generated_spec) dec_name = decoder_path + decoder_name np.save(dec_name , generated_spec)
def encode(model_name, piece, encoding_name): model_path = './restore/' encoding_path = './encoding/' # Create directory for encoding if os.path.exists(encoding_path) is False: os.makedirs(encoding_path) net = AutoEncoder() net = load_model(net,model_path,model_name) cuda_available = torch.cuda.is_available() if cuda_available is True: net = net.cuda() net.eval() # Load audio for encoding piece = audio.load_wav(piece) spec = audio.spectrogram(piece).astype(np.float32) spec = torch.from_numpy(spec.T) spec = torch.FloatTensor(spec) spec = torch.unsqueeze(spec, 0) spec = Variable(spec, volatile=True).contiguous() if cuda_available is True: spec = spec.cuda() # Pass input audio to net forward pass encoding = net.encoder(spec) encoding = encoding.data.cpu().numpy() #encoding = np.squeeze(encoding) encoding_ndarray = encoding_path + encoding_name+ '.npy' np.save(encoding_ndarray, encoding)
def encode(spec): model_path = './restore/' model_name = 'Autoencoder1.model' net = AutoEncoder() net = load_model(net, model_path, model_name) cuda_available = torch.cuda.is_available() if cuda_available is True: net = net.cuda() net.eval() spec = torch.FloatTensor(torch.from_numpy(spec)) spec = torch.unsqueeze(spec, 0) spec = Variable(spec, volatile=True).contiguous() if cuda_available is True: spec = spec.cuda() # Pass input audio to net forward pass out = net.encoder(spec) out = out.data.cpu().numpy() out = np.squeeze(out) return out
def main(args): # ensures that weight initializations are all the same torch.manual_seed(args.seed) np.random.seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) logging = utils.Logger(args.global_rank, args.save) writer = utils.Writer(args.global_rank, args.save) # Get data loaders. train_queue, valid_queue, num_classes, _ = datasets.get_loaders(args) args.num_total_iter = len(train_queue) * args.epochs warmup_iters = len(train_queue) * args.warmup_epochs swa_start = len(train_queue) * (args.epochs - 1) arch_instance = utils.get_arch_cells(args.arch_instance) model = AutoEncoder(args, writer, arch_instance) model = model.cuda() logging.info('args = %s', args) logging.info('param size = %fM ', utils.count_parameters_in_M(model)) logging.info('groups per scale: %s, total_groups: %d', model.groups_per_scale, sum(model.groups_per_scale)) if args.fast_adamax: # Fast adamax has the same functionality as torch.optim.Adamax, except it is faster. cnn_optimizer = Adamax(model.parameters(), args.learning_rate, weight_decay=args.weight_decay, eps=1e-3) else: cnn_optimizer = torch.optim.Adamax(model.parameters(), args.learning_rate, weight_decay=args.weight_decay, eps=1e-3) cnn_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( cnn_optimizer, float(args.epochs - args.warmup_epochs - 1), eta_min=args.learning_rate_min) grad_scalar = GradScaler(2**10) num_output = utils.num_output(args.dataset, args) bpd_coeff = 1. / np.log(2.) / num_output # if load checkpoint_file = os.path.join(args.save, 'checkpoint.pt') if args.cont_training: logging.info('loading the model.') checkpoint = torch.load(checkpoint_file, map_location='cpu') init_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) model = model.cuda() cnn_optimizer.load_state_dict(checkpoint['optimizer']) grad_scalar.load_state_dict(checkpoint['grad_scalar']) cnn_scheduler.load_state_dict(checkpoint['scheduler']) global_step = checkpoint['global_step'] else: global_step, init_epoch = 0, 0 for epoch in range(init_epoch, args.epochs): # update lrs. if args.distributed: train_queue.sampler.set_epoch(global_step + args.seed) valid_queue.sampler.set_epoch(0) if epoch > args.warmup_epochs: cnn_scheduler.step() # Logging. logging.info('epoch %d', epoch) # Training. train_nelbo, global_step = train(train_queue, model, cnn_optimizer, grad_scalar, global_step, warmup_iters, writer, logging) logging.info('train_nelbo %f', train_nelbo) writer.add_scalar('train/nelbo', train_nelbo, global_step) model.eval() # generate samples less frequently eval_freq = 1 if args.epochs <= 50 else 20 if epoch % eval_freq == 0 or epoch == (args.epochs - 1): with torch.no_grad(): num_samples = 16 n = int(np.floor(np.sqrt(num_samples))) for t in [0.7, 0.8, 0.9, 1.0]: logits = model.sample(num_samples, t) output = model.decoder_output(logits) output_img = output.mean if isinstance( output, torch.distributions.bernoulli.Bernoulli ) else output.sample(t) output_tiled = utils.tile_image(output_img, n) writer.add_image('generated_%0.1f' % t, output_tiled, global_step) valid_neg_log_p, valid_nelbo = test(valid_queue, model, num_samples=10, args=args, logging=logging) logging.info('valid_nelbo %f', valid_nelbo) logging.info('valid neg log p %f', valid_neg_log_p) logging.info('valid bpd elbo %f', valid_nelbo * bpd_coeff) logging.info('valid bpd log p %f', valid_neg_log_p * bpd_coeff) writer.add_scalar('val/neg_log_p', valid_neg_log_p, epoch) writer.add_scalar('val/nelbo', valid_nelbo, epoch) writer.add_scalar('val/bpd_log_p', valid_neg_log_p * bpd_coeff, epoch) writer.add_scalar('val/bpd_elbo', valid_nelbo * bpd_coeff, epoch) save_freq = int(np.ceil(args.epochs / 100)) if epoch % save_freq == 0 or epoch == (args.epochs - 1): if args.global_rank == 0: logging.info('saving the model.') torch.save( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': cnn_optimizer.state_dict(), 'global_step': global_step, 'args': args, 'arch_instance': arch_instance, 'scheduler': cnn_scheduler.state_dict(), 'grad_scalar': grad_scalar.state_dict() }, checkpoint_file) # Final validation valid_neg_log_p, valid_nelbo = test(valid_queue, model, num_samples=1000, args=args, logging=logging) logging.info('final valid nelbo %f', valid_nelbo) logging.info('final valid neg log p %f', valid_neg_log_p) writer.add_scalar('val/neg_log_p', valid_neg_log_p, epoch + 1) writer.add_scalar('val/nelbo', valid_nelbo, epoch + 1) writer.add_scalar('val/bpd_log_p', valid_neg_log_p * bpd_coeff, epoch + 1) writer.add_scalar('val/bpd_elbo', valid_nelbo * bpd_coeff, epoch + 1) writer.close()
def train_autoencoder(train_matrix, test_set): num_users, num_items = train_matrix.shape weight_matrix = log_surplus_confidence_matrix(train_matrix, alpha=args.alpha, epsilon=args.epsilon) train_matrix[train_matrix > 0] = 1.0 place_correlation = scipy.sparse.load_npz( './data/Foursquare/place_correlation_gamma60.npz') assert num_items == place_correlation.shape[0] print(train_matrix.shape) # Construct the model by instantiating the class defined in model.py model = AutoEncoder(num_items, args.inner_layers, num_items, da=args.num_attention, dropout_rate=args.dropout_rate) if torch.cuda.is_available(): model.cuda() criterion = torch.nn.MSELoss(size_average=False, reduce=False) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) batch_size = args.batch_size user_indexes = np.arange(num_users) model.train() for t in range(args.epoch): print("epoch:{}".format(t)) np.random.shuffle(user_indexes) avg_cost = 0. for batchID in range(int(num_users / batch_size)): start = batchID * batch_size end = start + batch_size batch_user_index = user_indexes[start:end] batch_x, batch_x_weight, batch_item_index = get_mini_batch( train_matrix, weight_matrix, batch_user_index) batch_x_weight += 1 batch_x = Variable(torch.from_numpy(batch_x).type(T.FloatTensor), requires_grad=False) y_pred = model(batch_item_index, place_correlation) # Compute and print loss batch_x_weight = Variable(torch.from_numpy(batch_x_weight).type( T.FloatTensor), requires_grad=False) loss = (batch_x_weight * criterion(y_pred, batch_x)).sum() / batch_size print(batchID, loss.data) # Zero gradients, perform a backward pass, and update the weights. optimizer.zero_grad() loss.backward() optimizer.step() avg_cost += loss / num_users * batch_size print("Avg loss:{}".format(avg_cost)) # print the prediction score for the user 0 print( model([train_matrix.getrow(0).indices], place_correlation) [:, T.LongTensor(train_matrix.getrow(0).indices.astype(np.int32))]) print(model([train_matrix.getrow(0).indices], place_correlation)) # Evaluation model.eval() topk = 20 recommended_list = [] for user_id in range(num_users): user_rating_vector = train_matrix.getrow(user_id).toarray() pred_rating_vector = model([train_matrix.getrow(user_id).indices], place_correlation) pred_rating_vector = pred_rating_vector.cpu().data.numpy() user_rating_vector = user_rating_vector[0] pred_rating_vector = pred_rating_vector[0] pred_rating_vector[user_rating_vector > 0] = 0 item_recommended_dict = dict() for item_inner_id, score in enumerate(pred_rating_vector): item_recommended_dict[item_inner_id] = score sorted_item = heapq.nlargest(topk, item_recommended_dict, key=item_recommended_dict.get) recommended_list.append(sorted_item) print(test_set[user_id], sorted_item[:topk]) print(pred_rating_vector[sorted_item[0]], pred_rating_vector[sorted_item[1]], pred_rating_vector[sorted_item[2]], pred_rating_vector[sorted_item[3]], pred_rating_vector[sorted_item[4]]) print("user:%d, precision@5:%f, precision@10:%f" % (user_id, eval_metrics.precision_at_k_per_sample(test_set[user_id], sorted_item[:5], 5), eval_metrics.precision_at_k_per_sample( test_set[user_id], sorted_item[:topk], topk))) precision, recall, MAP = [], [], [] for k in [5, 10, 15, 20]: precision.append( eval_metrics.precision_at_k(test_set, recommended_list, k)) recall.append(eval_metrics.recall_at_k(test_set, recommended_list, k)) MAP.append(eval_metrics.mapk(test_set, recommended_list, k)) print(precision) print(recall) print(MAP)
def main(): cuda_available = torch.cuda.is_available() train_params, dataset_params = get_arguments() net = AutoEncoder() epoch_trained = 0 if train_params['restore_model']: net = load_model(net, train_params['restore_dir'], train_params['restore_model']) if net is None: print("Initialize network and train from scratch.") net = AutoEncoder() else: epoch_trained = 0 train_loader, validation = audio_data_loader(**dataset_params) if cuda_available is False: warnings.warn( "Cuda is not avalable, can not train model using multi-gpu.") if cuda_available: # Remove train_params["device_ids"] for single GPU if train_params["device_ids"]: batch_size = dataset_params["batch_size"] num_gpu = len(train_params["device_ids"]) assert batch_size % num_gpu == 0 net = nn.DataParallel(net, device_ids=train_params['device_ids']) torch.backends.cudnn.benchmark = True net = net.cuda() criterion = nn.MSELoss() optimizer = get_optimizer(net, train_params['optimizer'], train_params['learning_rate'], train_params['momentum']) if cuda_available: criterion = criterion.cuda() if not os.path.exists(train_params['log_dir']): os.makedirs(train_params['log_dir']) if not os.path.exists(train_params['restore_dir']): os.makedirs(train_params['restore_dir']) train_loss_log_file = open(train_params['log_dir'] + 'train_loss_log.log', 'a') test_loss_log_file = open(train_params['log_dir'] + 'test_loss_log.log', 'a') # Add print for start of training time time = str(datetime.now()) line = 'Training Started at' + str(time) + ' !!! \n' train_loss_log_file.writelines(line) train_loss_log_file.flush() # Keep track of losses train_losses = [] eval_losses = [] best_eval = float('inf') # Begin! for epoch in range(train_params['num_epochs']): train(net, criterion, optimizer, train_losses, train_params, train_loss_log_file, train_loader, cuda_available) eval_loss = evaluate(net, criterion, epoch, eval_losses, validation, test_loss_log_file, cuda_available) if eval_loss < best_eval: save_model(net, 1, train_params['restore_dir']) torch.save(net.state_dict(), train_params['restore_dir'] + 'bestmodel.pth') best_eval = eval_loss save_model(net, epoch_trained + epoch + 1, train_params['restore_dir']) torch.save([train_losses, eval_losses, epoch], train_params['restore_dir'] + 'data_params') # Add print for end of training time time = str(datetime.now()) line = 'Training Ended at' + str(time) + ' !!! \n' train_loss_log_file.writelines(line) train_loss_log_file.flush() train_loss_log_file.close() test_loss_log_file.close()
def test(self, config): """Testing routine""" # Initialize Dataset for testing. test_data = torchvision.datasets.ImageFolder( root=os.path.join(config.data_dir, "test"), transform=torchvision.transforms.ToTensor()) # Create data loader for the test dataset with two number of workers and no # shuffling. te_data_loader = torch.utils.data.DataLoader( dataset=test_data, batch_size=config.batch_size, num_workers=config.numWorker, shuffle=False) # Create model model = AutoEncoder() # Move to GPU if you have one. if torch.cuda.is_available(): model = model.cuda() # Create loss objects data_loss = nn.MSELoss() # Fix gpu -> cpu bug compute_device = 'cuda' if torch.cuda.is_available() else 'cpu' # Load our best model and set model for testing load_res = torch.load(os.path.join(config.save_dir, "best_model.pth"), map_location=compute_device) model.load_state_dict(load_res["model"]) model.eval() # Implement The Test loop prefix = "Testing: " te_loss = [] te_acc = [] for data in tqdm(te_data_loader, desc=prefix): # Split the data x, y = data # Send data to GPU if we have one if torch.cuda.is_available(): x = x.cuda() y = y.cuda() # Don't invoke gradient computation with torch.no_grad(): # Compute logits logits = model.forward(x) # Compute loss and store as numpy loss = data_loss(logits, x.float()) te_loss += [loss.cpu().numpy()] # Compute accuracy and store as numpy pred = torch.argmax(logits, dim=1) acc = torch.mean(torch.eq(pred.vewi(x.size()), x).float()) * 100.0 te_acc += [acc.cpu().numpy()] # Report Test loss and accuracy print("Test Loss = {}".format(np.mean(te_loss))) # TODO proper logging print("Test Accuracy = {}%".format( np.mean(te_acc))) # TODO proper logging
def train_autoencoder(train_matrix, test_set): num_users, num_items = train_matrix.shape weight_matrix = log_surplus_confidence_matrix(train_matrix, alpha=args.alpha, epsilon=args.epsilon) train_matrix[train_matrix > 0] = 1.0 place_correlation = scipy.sparse.load_npz( 'Foursquare/place_correlation_gamma60.npz') assert num_items == place_correlation.shape[0] print(train_matrix.shape) # Construct the model by instantiating the class defined in model.py model = AutoEncoder(num_items, args.inner_layers, num_items, da=args.num_attention, dropout_rate=args.dropout_rate) if torch.cuda.is_available(): model.cuda() criterion = torch.nn.MSELoss(size_average=False, reduce=False) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) batch_size = args.batch_size user_indexes = np.arange(num_users) model.train() torch.load('model.pkl') # Evaluation model.eval() topk = 20 recommended_list = [] for user_id in range(num_users): user_rating_vector = train_matrix.getrow(user_id).toarray() pred_rating_vector = model([train_matrix.getrow(user_id).indices], place_correlation) pred_rating_vector = pred_rating_vector.cpu().data.numpy() user_rating_vector = user_rating_vector[0] pred_rating_vector = pred_rating_vector[0] pred_rating_vector[user_rating_vector > 0] = 0 item_recommended_dict = dict() for item_inner_id, score in enumerate(pred_rating_vector): item_recommended_dict[item_inner_id] = score sorted_item = heapq.nlargest(topk, item_recommended_dict, key=item_recommended_dict.get) recommended_list.append(sorted_item) print(test_set[user_id], sorted_item[:topk]) print(pred_rating_vector[sorted_item[0]], pred_rating_vector[sorted_item[1]], pred_rating_vector[sorted_item[2]], pred_rating_vector[sorted_item[3]], pred_rating_vector[sorted_item[4]]) print("user:%d, precision@5:%f, precision@10:%f" % (user_id, eval_metrics.precision_at_k_per_sample(test_set[user_id], sorted_item[:5], 5), eval_metrics.precision_at_k_per_sample( test_set[user_id], sorted_item[:topk], topk))) precision, recall, MAP = [], [], [] for k in [5, 10, 15, 20]: precision.append( eval_metrics.precision_at_k(test_set, recommended_list, k)) recall.append(eval_metrics.recall_at_k(test_set, recommended_list, k)) MAP.append(eval_metrics.mapk(test_set, recommended_list, k)) print(precision) print(recall) print(MAP)
def main(eval_args): # ensures that weight initializations are all the same logging = utils.Logger(eval_args.local_rank, eval_args.save) # load a checkpoint logging.info('loading the model at:') logging.info(eval_args.checkpoint) checkpoint = torch.load(eval_args.checkpoint, map_location='cpu') args = checkpoint['args'] if not hasattr(args, 'ada_groups'): logging.info('old model, no ada groups was found.') args.ada_groups = False if not hasattr(args, 'min_groups_per_scale'): logging.info('old model, no min_groups_per_scale was found.') args.min_groups_per_scale = 1 if not hasattr(args, 'num_mixture_dec'): logging.info('old model, no num_mixture_dec was found.') args.num_mixture_dec = 10 logging.info('loaded the model at epoch %d', checkpoint['epoch']) arch_instance = utils.get_arch_cells(args.arch_instance) model = AutoEncoder(args, None, arch_instance) # Loading is not strict because of self.weight_normalized in Conv2D class in neural_operations. This variable # is only used for computing the spectral normalization and it is safe not to load it. Some of our earlier models # did not have this variable. model.load_state_dict(checkpoint['state_dict'], strict=False) model = model.cuda() logging.info('args = %s', args) logging.info('num conv layers: %d', len(model.all_conv_layers)) logging.info('param size = %fM ', utils.count_parameters_in_M(model)) if eval_args.eval_mode == 'evaluate': # load train valid queue args.data = eval_args.data train_queue, valid_queue, num_classes = datasets.get_loaders(args) if eval_args.eval_on_train: logging.info('Using the training data for eval.') valid_queue = train_queue # get number of bits num_output = utils.num_output(args.dataset) bpd_coeff = 1. / np.log(2.) / num_output valid_neg_log_p, valid_nelbo = test( valid_queue, model, num_samples=eval_args.num_iw_samples, args=args, logging=logging) logging.info('final valid nelbo %f', valid_nelbo) logging.info('final valid neg log p %f', valid_neg_log_p) logging.info('final valid nelbo in bpd %f', valid_nelbo * bpd_coeff) logging.info('final valid neg log p in bpd %f', valid_neg_log_p * bpd_coeff) else: bn_eval_mode = not eval_args.readjust_bn num_samples = 16 with torch.no_grad(): n = int(np.floor(np.sqrt(num_samples))) set_bn(model, bn_eval_mode, num_samples=36, t=eval_args.temp, iter=500) for ind in range(10): # sampling is repeated. torch.cuda.synchronize() start = time() with autocast(): logits = model.sample(num_samples, eval_args.temp) output = model.decoder_output(logits) output_img = output.mean if isinstance(output, torch.distributions.bernoulli.Bernoulli) \ else output.sample() torch.cuda.synchronize() end = time() # save images to 'results/eval-x/images/epochn' where x is exp id and n is epoch muber # print("tensor shape: {}".format(output_img.shape)) # try saving the images one my one path_to_images = '/content/gdrive/MyDrive/pipeline_results/NVAE/results/eval-1/images' if not os.path.exists(path_to_images): os.makedirs(path_to_images) for i in range(output_img.size(0)): vutils.save_image(output_img[i, :, :, :], '%s/sample_batch%03d_img%03d.png' % (path_to_images, ind + 1, i + 1), normalize=True)
type=int, default=144, help='batch size') parser.add_argument('--epoch', type=int, default=1, help='epoch size') opt = parser.parse_args() # 超参数 LR = opt.lr BATCH_SIZE = opt.batch_size EPOCHES = opt.epoch LOG_INTERVAL = 5 # 获取gpu是不是可用 cuda_available = torch.cuda.is_available() # 实例化网络 auto = AutoEncoder() if cuda_available: auto.cuda() # 定义优化器和损失函数 optimizer = torch.optim.Adam(auto.parameters(), lr=LR) # 数据准备 root_dir = "./celeba_select" image_files = os.listdir(root_dir) train_dataset = CelebaDataset(root_dir, image_files, (64, 64), transforms.Compose([ToTensor()])) train_loader = DataLoader(train_dataset, batch_size=32, num_workers=1, shuffle=True) for i in range(EPOCHES): # 打乱数据 auto.train() train_loss = 0
def main(eval_args): # ensures that weight initializations are all the same logging = utils.Logger(eval_args.local_rank, eval_args.save) # load a checkpoint logging.info('loading the model at:') logging.info(eval_args.checkpoint) checkpoint = torch.load(eval_args.checkpoint, map_location='cpu') args = checkpoint['args'] logging.info('loaded the model at epoch %d', checkpoint['epoch']) arch_instance = utils.get_arch_cells(args.arch_instance) model = AutoEncoder(args, None, arch_instance) model.load_state_dict(checkpoint['state_dict']) model = model.cuda() logging.info('args = %s', args) logging.info('num conv layers: %d', len(model.all_conv_layers)) logging.info('param size = %fM ', utils.count_parameters_in_M(model)) if eval_args.eval_mode == 'evaluate': # load train valid queue args.data = eval_args.data train_queue, valid_queue, num_classes, test_queue = datasets.get_loaders(args) if eval_args.eval_on_train: logging.info('Using the training data for eval.') valid_queue = train_queue if eval_args.eval_on_test: logging.info('Using the test data for eval.') valid_queue = test_queue # get number of bits num_output = utils.num_output(args.dataset, args) bpd_coeff = 1. / np.log(2.) / num_output valid_neg_log_p, valid_nelbo = test(valid_queue, model, num_samples=eval_args.num_iw_samples, args=args, logging=logging) logging.info('final valid nelbo %f', valid_nelbo) logging.info('final valid neg log p %f', valid_neg_log_p) logging.info('final valid nelbo in bpd %f', valid_nelbo * bpd_coeff) logging.info('final valid neg log p in bpd %f', valid_neg_log_p * bpd_coeff) else: bn_eval_mode = not eval_args.readjust_bn num_samples = 16 with torch.no_grad(): n = int(np.floor(np.sqrt(num_samples))) set_bn(model, bn_eval_mode, num_samples=36, t=eval_args.temp, iter=500) for ind in range(eval_args.repetition): # sampling is repeated. torch.cuda.synchronize() start = time() with autocast(): logits = model.sample(num_samples, eval_args.temp) output = model.decoder_output(logits) output_img = output.mean if isinstance(output, torch.distributions.bernoulli.Bernoulli) \ else output.sample() torch.cuda.synchronize() end = time() # save to file total_name = "{}/data_to_save_{}_{}.pickle".format(eval_args.save, eval_args.name_to_save, ind) with open(total_name, 'wb') as handle: pickle.dump(output_img.deatach().numpy(), handle, protocol=pickle.HIGHEST_PROTOCOL) output_tiled = utils.tile_image(output_img, n).cpu().numpy().transpose(1, 2, 0) logging.info('sampling time per batch: %0.3f sec', (end - start)) output_tiled = np.asarray(output_tiled * 255, dtype=np.uint8) output_tiled = np.squeeze(output_tiled) plt.imshow(output_tiled) plt.savefig("{}/generation_{}_{}".format(eval_args.save, eval_args.name_to_save, ind))
def main(eval_args): # ensures that weight initializations are all the same logging = utils.Logger(eval_args.local_rank, eval_args.save) # load a checkpoint logging.info('loading the model at:') logging.info(eval_args.checkpoint) checkpoint = torch.load(eval_args.checkpoint, map_location='cpu') args = checkpoint['args'] if not hasattr(args, 'ada_groups'): logging.info('old model, no ada groups was found.') args.ada_groups = False if not hasattr(args, 'min_groups_per_scale'): logging.info('old model, no min_groups_per_scale was found.') args.min_groups_per_scale = 1 if not hasattr(args, 'num_mixture_dec'): logging.info('old model, no num_mixture_dec was found.') args.num_mixture_dec = 10 logging.info('loaded the model at epoch %d', checkpoint['epoch']) arch_instance = utils.get_arch_cells(args.arch_instance) model = AutoEncoder(args, None, arch_instance) # Loading is not strict because of self.weight_normalized in Conv2D class in neural_operations. This variable # is only used for computing the spectral normalization and it is safe not to load it. Some of our earlier models # did not have this variable. model.load_state_dict(checkpoint['state_dict'], strict=False) model = model.cuda() logging.info('args = %s', args) logging.info('num conv layers: %d', len(model.all_conv_layers)) logging.info('param size = %fM ', utils.count_parameters_in_M(model)) if eval_args.eval_mode == 'evaluate': # load train valid queue args.data = eval_args.data train_queue, valid_queue, num_classes = datasets.get_loaders(args) if eval_args.eval_on_train: logging.info('Using the training data for eval.') valid_queue = train_queue # get number of bits num_output = utils.num_output(args.dataset) bpd_coeff = 1. / np.log(2.) / num_output valid_neg_log_p, valid_nelbo = test( valid_queue, model, num_samples=eval_args.num_iw_samples, args=args, logging=logging) logging.info('final valid nelbo %f', valid_nelbo) logging.info('final valid neg log p %f', valid_neg_log_p) logging.info('final valid nelbo in bpd %f', valid_nelbo * bpd_coeff) logging.info('final valid neg log p in bpd %f', valid_neg_log_p * bpd_coeff) else: bn_eval_mode = not eval_args.readjust_bn num_samples = 16 with torch.no_grad(): n = int(np.floor(np.sqrt(num_samples))) set_bn(model, bn_eval_mode, num_samples=36, t=eval_args.temp, iter=500) for ind in range(10): # sampling is repeated. torch.cuda.synchronize() start = time() with autocast(): logits = model.sample(num_samples, eval_args.temp) output = model.decoder_output(logits) output_img = output.mean if isinstance(output, torch.distributions.bernoulli.Bernoulli) \ else output.sample() torch.cuda.synchronize() end = time() output_tiled = utils.tile_image(output_img, n).cpu().numpy().transpose( 1, 2, 0) logging.info('sampling time per batch: %0.3f sec', (end - start)) output_tiled = np.asarray(output_tiled * 255, dtype=np.uint8) output_tiled = np.squeeze(output_tiled) plt.imshow(output_tiled) plt.show()
train=True, transform=transforms.ToTensor(), download=True) test_set = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True) train_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_set, batch_size=batch_size, shuffle=False) model = AutoEncoder(input_size=input_size) model.cuda() optimizer = optim.Adam( model.parameters(), lr=LR, betas=(0.9, 0.999), eps=1e-08, weight_decay=WD, amsgrad=False) # write so many arguments to get a clear view criterion = nn.MSELoss() losses_adam = [] for i in range(EP): loss_ep = 0 idx = 0 for batch_idx, (inputs, targets) in enumerate( train_loader): # Notice that we will not use target here.
if __name__ == '__main__': opts = get_args() if not os.path.exists(opts.outDir): os.makedirs(opts.outDir) # mnist digits dataset transforms_ = torchvision.transforms.ToTensor() traindata = torchvision.datasets.MNIST(root=opts.root, train=True, transform=transforms_, download=True) trainset = DataLoader(dataset=traindata, batch_size=opts.bs, shuffle=False) # getting the structure of your autoencoder autoencoder = AutoEncoder(opts.nz) # useful when you have a GPU (link pytorch to CUDA) if torch.cuda.is_available(): autoencoder.cuda() print("test the data") test_dataset(traindata=traindata, trainset=trainset) print("begin training ...") train(autoencoder=autoencoder, outDir=opts.outDir, trainset=trainset, traindata=traindata) print("visualize latent space representation") visualize(autoencoder=autoencoder, outDir=opts.outDir, trainset=trainset, traindata=traindata)
def train(self, config): """Training routine""" # Initialize datasets for both training and validation train_data = torchvision.datasets.ImageFolder( root=os.path.join(config.data_dir, "train"), transform=torchvision.transforms.ToTensor()) valid_data = torchvision.datasets.ImageFolder( root=os.path.join(config.data_dir, "valid"), transform=torchvision.transforms.ToTensor()) # Create data loader for training and validation. tr_data_loader = torch.utils.data.DataLoader( dataset=train_data, batch_size=config.batch_size, num_workers=config.numWorker, shuffle=True) va_data_loader = torch.utils.data.DataLoader( dataset=valid_data, batch_size=config.batch_size, num_workers=config.numWorker, shuffle=False) # Create model instance. #model = Model() model = AutoEncoder() # Move model to gpu if cuda is available if torch.cuda.is_available(): model = model.cuda() # Make sure that the model is set for training model.train() # Create loss objects data_loss = nn.MSELoss() # Create optimizier optimizer = optim.Adam(model.parameters(), lr=config.learn_rate) # No need to move the optimizer (as of PyTorch 1.0), it lies in the same # space as the model # Create summary writer tr_writer = SummaryWriter( log_dir=os.path.join(config.log_dir, "train")) va_writer = SummaryWriter( log_dir=os.path.join(config.log_dir, "valid")) # Create log directory and save directory if it does not exist if not os.path.exists(config.log_dir): os.makedirs(config.log_dir) if not os.path.exists(config.save_dir): os.makedirs(config.save_dir) # Initialize training iter_idx = -1 # make counter start at zero best_va_acc = 0 # to check if best validation accuracy # Prepare checkpoint file and model file to save and load from checkpoint_file = os.path.join(config.save_dir, "checkpoint.pth") bestmodel_file = os.path.join(config.save_dir, "best_model.pth") # Check for existing training results. If it existst, and the configuration # is set to resume `config.resume==True`, resume from previous training. If # not, delete existing checkpoint. if os.path.exists(checkpoint_file): if config.resume: # Use `torch.load` to load the checkpoint file and the load the # things that are required to continue training. For the model and # the optimizer, use `load_state_dict`. It's actually a good idea # to code the saving part first and then code this part. print("Checkpoint found! Resuming") # TODO proper logging # Read checkpoint file. # Fix gpu -> cpu bug compute_device = 'cuda' if torch.cuda.is_available() else 'cpu' load_res = torch.load(checkpoint_file, map_location=compute_device) # Resume iterations iter_idx = load_res["iter_idx"] # Resume best va result best_va_acc = load_res["best_va_acc"] # Resume model model.load_state_dict(load_res["model"]) # Resume optimizer optimizer.load_state_dict(load_res["optimizer"]) # Note that we do not resume the epoch, since we will never be able # to properly recover the shuffling, unless we remember the random # seed, for example. For simplicity, we will simply ignore this, # and run `config.num_epoch` epochs regardless of resuming. else: os.remove(checkpoint_file) # Training loop for epoch in range(config.num_epoch): # For each iteration prefix = "Training Epoch {:3d}: ".format(epoch) for data in tqdm(tr_data_loader, desc=prefix): # Counter iter_idx += 1 # Split the data # x is img, y is label x, y = data #print(x) # Send data to GPU if we have one if torch.cuda.is_available(): x = x.cuda() y = y.cuda() # Apply the model to obtain scores (forward pass) logits = model.forward(x) # Compute the loss loss = data_loss(logits, x.float()) # Compute gradients loss.backward() # Update parameters optimizer.step() # Zero the parameter gradients in the optimizer optimizer.zero_grad() # Monitor results every report interval if iter_idx % config.rep_intv == 0: # Compute accuracy (No gradients required). We'll wrapp this # part so that we prevent torch from computing gradients. with torch.no_grad(): pred = torch.argmax(logits, dim=1) acc = torch.mean( torch.eq(pred.view(x.size()), x).float()) * 100.0 # Write loss and accuracy to tensorboard, using keywords `loss` # and `accuracy`. tr_writer.add_scalar("loss", loss, global_step=iter_idx) tr_writer.add_scalar("accuracy", acc, global_step=iter_idx) # Save torch.save( { "iter_idx": iter_idx, "best_va_acc": best_va_acc, "model": model.state_dict(), "optimizer": optimizer.state_dict(), "loss": loss, "epoch": epoch, "acc": acc }, checkpoint_file) # Validate results every validation interval if iter_idx % config.val_intv == 0: # List to contain all losses and accuracies for all the # training batches va_loss = [] va_acc = [] # Set model for evaluation model = model.eval() for data in va_data_loader: # Split the data x, y = data # Send data to GPU if we have one if torch.cuda.is_available(): x = x.cuda() y = y.cuda() # Apply forward pass to compute the losses # and accuracies for each of the validation batches with torch.no_grad(): # Compute logits logits = model.forward(x) # Compute loss and store as numpy loss = data_loss(logits, x.float()) va_loss += [loss.cpu().numpy()] # Compute accuracy and store as numpy pred = torch.argmax(logits, dim=1) acc = torch.mean( torch.eq(pred.view(x.size()), x).float()) * 100.0 va_acc += [acc.cpu().numpy()] # Set model back for training model = model.train() # Take average va_loss = np.mean(va_loss) va_acc = np.mean(va_acc) # Write to tensorboard using `va_writer` va_writer.add_scalar("loss", va_loss, global_step=iter_idx) va_writer.add_scalar("accuracy", va_acc, global_step=iter_idx) # Check if best accuracy if va_acc > best_va_acc: best_va_acc = va_acc # Save best model using torch.save. Similar to previous # save but at location defined by `bestmodel_file` torch.save( { "iter_idx": iter_idx, "best_va_acc": best_va_acc, "model": model.state_dict(), "optimizer": optimizer.state_dict(), "loss": loss, "acc": acc }, bestmodel_file)