def train(dataloader, parameters, device): model = AutoEncoder(input_dim=1900, nlayers=parameters.get('nlayers', 5), latent=100) model = model.to(device) model.train() train_loss = 0 optimizer = torch.optim.Adam(model.parameters(), lr=parameters.get('lr', 1e-5), weight_decay=parameters.get( 'weight_decay', 0.)) loss_func = torch.nn.MSELoss() for epoch in range(parameters.get('epochs', 1000)): for index, (data, ) in enumerate(dataloader, 1): optimizer.zero_grad() output = model(data) loss = loss_func(output, data) train_loss += loss.item() loss.backward() optimizer.step() return model
def main(opt): device = 'cuda' if torch.cuda.is_available() else 'cpu' # Dataset print('Dataset....') transform = transforms.Compose([ transforms.Resize((600, 600)), transforms.Grayscale(3), transforms.ToTensor() ]) train_set = myDataset(image_path=opt.train_path, transform=transform) val_set = myDataset(image_path=opt.val_path, transform=transform) train_loader = DataLoader(train_set, batch_size=opt.train_batch_size) val_loader = DataLoader(val_set, batch_size=opt.val_batch_size) # Model print('Model....') model = AutoEncoder() model.to(device) # Optimizer optimizer = optim.Adam(model.parameters(), lr=opt.lr) loss_func = nn.MSELoss() # Train print('Training....') train_epoch_loss = [] val_epoch_loss = [] train_iter_losses = [] val_iter_losses = [] for e in range(opt.epoch): train_iter_loss = train(opt, model, train_loader, optimizer, loss_func, device, e) train_iter_losses += train_iter_loss train_epoch_loss.append(sum(train_iter_loss)) val_iter_loss = val(opt, model, val_loader, loss_func, device, e) val_iter_losses += val_iter_loss val_epoch_loss.append(sum(val_iter_loss)) # save model best = 10000 if val_epoch_loss[-1] < best: print('Saving Model....') torch.save(model, 'weights/AutoEncoder_try1.pth') best = val_epoch_loss[-1] print('Saving Result') plt.figure(figsize=(10, 10)) plt.plot(train_iter_losses) plt.plot(val_iter_losses) plt.legend(['Train_loss', 'Val_loss']) plt.savefig('Result.jpg')
def train(output_filename, model_type, hidden_size, loss_type, norm_type, sigma_noise): train_data = torchvision.datasets.MNIST( root='datasets/mnist/', train=True, transform=torchvision.transforms.ToTensor(), download=False, ) train_loader = Data.DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True) if loss_type == 'l2': loss_func = nn.MSELoss() elif loss_type == 'cross_entropy': loss_func = F.binary_cross_entropy if model_type == 'AE': model = AutoEncoder(hidden_size).cuda() elif model_type == 'LTAE': model = LatentAutoEncoder(hidden_size, norm_type, sigma=sigma_noise).cuda() model.set_device() elif model_type == 'VAE': model = VariationalAE(hidden_size).cuda() optimizer = torch.optim.Adam(model.parameters(), lr=0.001) model.train() for epoch in range(EPOCH): for step, (x, _) in enumerate(train_loader): optimizer.zero_grad() x_batch = x.view(-1, 28 * 28).cuda() y_batch = x.view(-1, 28 * 28).cuda() if model_type == 'AE': _, decoded = model(x_batch) loss = loss_func(decoded, y_batch) elif model_type == 'LTAE': _, latent, transformed, decoded = model(x_batch) loss = loss_func(decoded, y_batch) loss += torch.nn.functional.mse_loss(transformed, latent) elif model_type == 'VAE': decoded, mu, logvar = model(x_batch) loss = loss_func_vae(decoded, x_batch, mu, logvar, loss_type) loss.backward() optimizer.step() if epoch % 10 == 0: print('Epoch: ', epoch, '| train loss: %.4f' % loss.detach().cpu()) torch.save({'state_dict': model.state_dict()}, f'./saved_models/{output_filename}')
def main(): use_cuda = args.use_cuda train_data = UnlabeledContact(data=args.data_dir) print('Number of samples: {}'.format(len(train_data))) trainloader = DataLoader(train_data, batch_size=args.batch_size) # Contact matrices are 21x21 input_size = 441 img_height = 21 img_width = 21 vae = AutoEncoder(code_size=20, imgsize=input_size, height=img_height, width=img_width) criterion = nn.BCEWithLogitsLoss() if use_cuda: #vae = nn.DataParallel(vae) vae = vae.cuda() #.half() criterion = criterion.cuda() optimizer = optim.SGD(vae.parameters(), lr=0.01) clock = AverageMeter(name='clock32single', rank=0) epoch_loss = 0 total_loss = 0 end = time.time() for epoch in range(15): for batch_idx, data in enumerate(trainloader): inputs = data['cont_matrix'] inputs = inputs.resize_(args.batch_size, 1, 21, 21) inputs = inputs.float() if use_cuda: inputs = inputs.cuda() #.half() inputs = Variable(inputs) optimizer.zero_grad() output, code = vae(inputs) loss = criterion(output, inputs) loss.backward() optimizer.step() epoch_loss += loss.data[0] clock.update(time.time() - end) end = time.time() if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(trainloader.dataset), 100. * batch_idx / len(trainloader), loss.data[0])) clock.save( path= '/home/ygx/libraries/mds/molecules/molecules/conv_autoencoder/runtimes' )
def main(args): device = torch.device( 'cuda' if torch.cuda.is_available() and not args.cpu else 'cpu') print('Using %s device.' % device) world_size = int( os.environ[args.env_size]) if args.env_size in os.environ else 1 local_rank = int( os.environ[args.env_rank]) if args.env_rank in os.environ else 0 if local_rank == 0: print(vars(args)) if world_size > 1: print('rank: {}/{}'.format(local_rank + 1, world_size)) torch.distributed.init_process_group(backend='gloo', init_method='file://%s' % args.tmpname, rank=local_rank, world_size=world_size) train_dataloader, test_dataloader = load_dataset(args, device, world_size) net = AutoEncoder(input_dim=1900, nlayers=args.nlayers, latent=100).to(device) if world_size > 1: net = torch.nn.parallel.DistributedDataParallel(net) if args.modelfile: net.load_state_dict(torch.load(args.modelfile)) # define our optimizer and loss function optimizer = torch.optim.Adam(net.parameters(), lr=args.lr, weight_decay=args.weight_decay) loss_func = nn.MSELoss(reduction='mean') test_losses = [] for epoch in range(args.epochs): epoch_start = timeit.default_timer() train(train_dataloader, net, optimizer, loss_func, epoch) test_loss = test(test_dataloader, net, loss_func) print(' %5.2f sec' % (timeit.default_timer() - epoch_start)) test_losses.append(test_loss) if test_loss <= min(test_losses): torch.save(net.state_dict(), 'model/%5.3f.pth' % min(test_losses))
download=True) test_set = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True) train_loader = torch.utils.data.DataLoader(dataset=train_set, batch_size=batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(dataset=test_set, batch_size=batch_size, shuffle=False) model = AutoEncoder(input_size=input_size) model.cuda() optimizer = optim.Adam( model.parameters(), lr=LR, betas=(0.9, 0.999), eps=1e-08, weight_decay=WD, amsgrad=False) # write so many arguments to get a clear view criterion = nn.MSELoss() losses_adam = [] for i in range(EP): loss_ep = 0 idx = 0 for batch_idx, (inputs, targets) in enumerate( train_loader): # Notice that we will not use target here. inputs = inputs.view([-1, 28 * 28]).cuda()
def train_autoencoder(train_matrix, test_set): num_users, num_items = train_matrix.shape weight_matrix = log_surplus_confidence_matrix(train_matrix, alpha=args.alpha, epsilon=args.epsilon) train_matrix[train_matrix > 0] = 1.0 place_correlation = scipy.sparse.load_npz( 'Foursquare/place_correlation_gamma60.npz') assert num_items == place_correlation.shape[0] print(train_matrix.shape) # Construct the model by instantiating the class defined in model.py model = AutoEncoder(num_items, args.inner_layers, num_items, da=args.num_attention, dropout_rate=args.dropout_rate) if torch.cuda.is_available(): model.cuda() criterion = torch.nn.MSELoss(size_average=False, reduce=False) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) batch_size = args.batch_size user_indexes = np.arange(num_users) model.train() torch.load('model.pkl') # Evaluation model.eval() topk = 20 recommended_list = [] for user_id in range(num_users): user_rating_vector = train_matrix.getrow(user_id).toarray() pred_rating_vector = model([train_matrix.getrow(user_id).indices], place_correlation) pred_rating_vector = pred_rating_vector.cpu().data.numpy() user_rating_vector = user_rating_vector[0] pred_rating_vector = pred_rating_vector[0] pred_rating_vector[user_rating_vector > 0] = 0 item_recommended_dict = dict() for item_inner_id, score in enumerate(pred_rating_vector): item_recommended_dict[item_inner_id] = score sorted_item = heapq.nlargest(topk, item_recommended_dict, key=item_recommended_dict.get) recommended_list.append(sorted_item) print(test_set[user_id], sorted_item[:topk]) print(pred_rating_vector[sorted_item[0]], pred_rating_vector[sorted_item[1]], pred_rating_vector[sorted_item[2]], pred_rating_vector[sorted_item[3]], pred_rating_vector[sorted_item[4]]) print("user:%d, precision@5:%f, precision@10:%f" % (user_id, eval_metrics.precision_at_k_per_sample(test_set[user_id], sorted_item[:5], 5), eval_metrics.precision_at_k_per_sample( test_set[user_id], sorted_item[:topk], topk))) precision, recall, MAP = [], [], [] for k in [5, 10, 15, 20]: precision.append( eval_metrics.precision_at_k(test_set, recommended_list, k)) recall.append(eval_metrics.recall_at_k(test_set, recommended_list, k)) MAP.append(eval_metrics.mapk(test_set, recommended_list, k)) print(precision) print(recall) print(MAP)
from model import AutoEncoder from visualize import * device = torch.device("cuda" if torch.cuda.is_available() else "cpu") algo_name = 'AE' max_episodes = 100 batch_size = 128 lr = 1e-3 # play with this value # load dataset from MNIST, save to ../datasets dataset = MNIST('../datasets', transform=transforms.ToTensor(), download=True) dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True) DeCiPhEr = AutoEncoder() optimizer = optim.Adam(DeCiPhEr.parameters(), lr=lr) for episode in range(max_episodes): for data in dataloader: img, _ = data img = img.view(img.size(0), -1) # squash to 1D output = DeCiPhEr(img) # calculate loss and update acc. loss = torch.pow(img - output, 2).mean() optimizer.zero_grad() loss.backward() optimizer.step() if episode % 10 == 0: img = img.view(output.size(0), 1, 28, 28)
def train(args): print('Start') if torch.cuda.is_available(): device = 'cuda' torch.set_default_tensor_type('torch.cuda.FloatTensor') else: device = 'cpu' train_epoch = args.train_epoch lr = args.lr beta1 = args.beta1 beta2 = args.beta2 batch_size = args.batch_size noise_var = args.noise_var h_dim = args.h_dim images_path = glob.glob(args.data_dir+'/face_images/*/*.png') random.shuffle(images_path) split_num = int(len(images_path)*0.8) train_path = images_path[:split_num] test_path = images_path[split_num:] result_path = images_path[-15:] train_dataset = MyDataset(train_path) train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True) test_dataset = MyDataset(test_path) test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=True) result_dataset = MyDataset(result_path) result_dataloader = torch.utils.data.DataLoader(result_dataset, batch_size=result_dataset.__len__(), shuffle=False) result_images = next(iter(result_dataloader)) model = AutoEncoder(h_dim=h_dim).to(device) criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr, (beta1, beta2)) out_path = args.model_dir train_loss_list = [] test_loss_list = [] for epoch in range(train_epoch): model.to(device) loss_train = 0 for x in train_dataloader: noised_x = add_noise(x, noise_var) recon_x = model(noised_x) loss = criterion(recon_x, x) optimizer.zero_grad() loss.backward() optimizer.step() loss_train += loss.item() loss_train /= train_dataloader.__len__() train_loss_list.append(loss_train) if epoch % 1 == 0: with torch.no_grad(): model.eval() loss_test = 0 for x_test in test_dataloader: recon_x_test = model(x_test) loss_test += criterion(recon_x_test, x_test).item() loss_test /= test_dataloader.__len__() test_loss_list.append(loss_test) np.save(os.path.join(out_path, 'train_loss.npy'), np.array(train_loss_list)) np.save(os.path.join(out_path, 'test_loss.npy'), np.array(test_loss_list)) model.train()
def main(args): # ensures that weight initializations are all the same torch.manual_seed(args.seed) np.random.seed(args.seed) torch.cuda.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) logging = utils.Logger(args.global_rank, args.save) writer = utils.Writer(args.global_rank, args.save) # Get data loaders. train_queue, valid_queue, num_classes, _ = datasets.get_loaders(args) args.num_total_iter = len(train_queue) * args.epochs warmup_iters = len(train_queue) * args.warmup_epochs swa_start = len(train_queue) * (args.epochs - 1) arch_instance = utils.get_arch_cells(args.arch_instance) model = AutoEncoder(args, writer, arch_instance) model = model.cuda() logging.info('args = %s', args) logging.info('param size = %fM ', utils.count_parameters_in_M(model)) logging.info('groups per scale: %s, total_groups: %d', model.groups_per_scale, sum(model.groups_per_scale)) if args.fast_adamax: # Fast adamax has the same functionality as torch.optim.Adamax, except it is faster. cnn_optimizer = Adamax(model.parameters(), args.learning_rate, weight_decay=args.weight_decay, eps=1e-3) else: cnn_optimizer = torch.optim.Adamax(model.parameters(), args.learning_rate, weight_decay=args.weight_decay, eps=1e-3) cnn_scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( cnn_optimizer, float(args.epochs - args.warmup_epochs - 1), eta_min=args.learning_rate_min) grad_scalar = GradScaler(2**10) num_output = utils.num_output(args.dataset, args) bpd_coeff = 1. / np.log(2.) / num_output # if load checkpoint_file = os.path.join(args.save, 'checkpoint.pt') if args.cont_training: logging.info('loading the model.') checkpoint = torch.load(checkpoint_file, map_location='cpu') init_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) model = model.cuda() cnn_optimizer.load_state_dict(checkpoint['optimizer']) grad_scalar.load_state_dict(checkpoint['grad_scalar']) cnn_scheduler.load_state_dict(checkpoint['scheduler']) global_step = checkpoint['global_step'] else: global_step, init_epoch = 0, 0 for epoch in range(init_epoch, args.epochs): # update lrs. if args.distributed: train_queue.sampler.set_epoch(global_step + args.seed) valid_queue.sampler.set_epoch(0) if epoch > args.warmup_epochs: cnn_scheduler.step() # Logging. logging.info('epoch %d', epoch) # Training. train_nelbo, global_step = train(train_queue, model, cnn_optimizer, grad_scalar, global_step, warmup_iters, writer, logging) logging.info('train_nelbo %f', train_nelbo) writer.add_scalar('train/nelbo', train_nelbo, global_step) model.eval() # generate samples less frequently eval_freq = 1 if args.epochs <= 50 else 20 if epoch % eval_freq == 0 or epoch == (args.epochs - 1): with torch.no_grad(): num_samples = 16 n = int(np.floor(np.sqrt(num_samples))) for t in [0.7, 0.8, 0.9, 1.0]: logits = model.sample(num_samples, t) output = model.decoder_output(logits) output_img = output.mean if isinstance( output, torch.distributions.bernoulli.Bernoulli ) else output.sample(t) output_tiled = utils.tile_image(output_img, n) writer.add_image('generated_%0.1f' % t, output_tiled, global_step) valid_neg_log_p, valid_nelbo = test(valid_queue, model, num_samples=10, args=args, logging=logging) logging.info('valid_nelbo %f', valid_nelbo) logging.info('valid neg log p %f', valid_neg_log_p) logging.info('valid bpd elbo %f', valid_nelbo * bpd_coeff) logging.info('valid bpd log p %f', valid_neg_log_p * bpd_coeff) writer.add_scalar('val/neg_log_p', valid_neg_log_p, epoch) writer.add_scalar('val/nelbo', valid_nelbo, epoch) writer.add_scalar('val/bpd_log_p', valid_neg_log_p * bpd_coeff, epoch) writer.add_scalar('val/bpd_elbo', valid_nelbo * bpd_coeff, epoch) save_freq = int(np.ceil(args.epochs / 100)) if epoch % save_freq == 0 or epoch == (args.epochs - 1): if args.global_rank == 0: logging.info('saving the model.') torch.save( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'optimizer': cnn_optimizer.state_dict(), 'global_step': global_step, 'args': args, 'arch_instance': arch_instance, 'scheduler': cnn_scheduler.state_dict(), 'grad_scalar': grad_scalar.state_dict() }, checkpoint_file) # Final validation valid_neg_log_p, valid_nelbo = test(valid_queue, model, num_samples=1000, args=args, logging=logging) logging.info('final valid nelbo %f', valid_nelbo) logging.info('final valid neg log p %f', valid_neg_log_p) writer.add_scalar('val/neg_log_p', valid_neg_log_p, epoch + 1) writer.add_scalar('val/nelbo', valid_nelbo, epoch + 1) writer.add_scalar('val/bpd_log_p', valid_neg_log_p * bpd_coeff, epoch + 1) writer.add_scalar('val/bpd_elbo', valid_nelbo * bpd_coeff, epoch + 1) writer.close()
trainloader = torch.utils.data.DataLoader(video, batch_size=32, shuffle=True, pin_memory=True) print("数据集准备完毕") from model import AutoEncoder, BAN_decoder, Sine_decoder autoencoder = AutoEncoder().cuda() sin_decoder = Sine_decoder().cuda() ban_decoder = BAN_decoder().cuda() autoencoder.decoder = ban_decoder criterion = nn.MSELoss().cuda() optimizer = optim.Adam(autoencoder.parameters(), lr=0.001) print("开始训练 training GeneralConv...") stime = time.time() for epoch in range(20): running_loss = 0.0 for i, data in enumerate(trainloader, 0): optimizer.zero_grad() Tensordata = data.float().cuda() _, output = autoencoder(Tensordata) loss = criterion(output, Tensordata) loss.backward() optimizer.step() running_loss += loss.item() if i % 50 == 49:
class Trainer(object): def __init__(self, train_loader, test_loader, config): self.train_loader = train_loader self.test_loader = test_loader self.config = config self.device = torch.device( "cuda:0" if torch.cuda.is_available() else "cpu") self.num_epochs = config.num_epochs self.lr = config.lr self.in_channel = config.in_channel self.image_size = config.image_size self.hidden_dim = config.hidden_dim self.output_dim = config.output_dim self.log_interval = config.log_interval self.sample_interval = config.sample_interval self.ckpt_interval = config.ckpt_interval self.sample_folder = config.sample_folder self.ckpt_folder = config.ckpt_folder self.build_net() self.vis = Visualizer() def build_net(self): # define network self.net = AutoEncoder(self.in_channel, self.image_size, self.hidden_dim, self.output_dim) if self.config.mode == 'test' and self.config.training_path == '': print("[*] Enter model path!") exit() # if training model exists if self.config.training_path != '': self.net.load_state_dict( torch.load(self.config.training_path, map_location=lambda storage, loc: storage)) print("[*] Load weight from {}!".format(self.config.training_path)) self.net.to(self.device) # add noise to image def add_noise(self, imgs): noise = torch.randn(imgs.size()) * 0.4 noisy_imgs = noise + imgs return noisy_imgs def train(self): # define loss function bce_criterion = nn.BCELoss().to(self.device) mse_criterion = nn.MSELoss().to(self.device) # define optimizer optimizer = Adam(self.net.parameters(), self.lr) step = 0 print("[*] Learning started!") # get fixed sample temp_iter = iter(self.train_loader) fixed_imgs, _ = next(temp_iter) fixed_imgs = fixed_imgs.to(self.device) # save fixed sample image x_path = os.path.join(self.sample_folder, 'fixed_input.png') save_image(fixed_imgs, x_path, normalize=True) print("[*] Save fixed input image!") # make fixed noisy sample and save fixed_noisy_imgs = self.add_noise(fixed_imgs) noisy_x_path = os.path.join(self.sample_folder, 'fixed_noisy_input.png') save_image(fixed_noisy_imgs, noisy_x_path, normalize=True) print("[*] Save fixed noisy input image!") # flatten data tensors fixed_imgs = fixed_imgs.view(fixed_imgs.size(0), -1) fixed_noisy_imgs = fixed_noisy_imgs.view(fixed_imgs.size(0), -1) for epoch in range(self.num_epochs): for i, (imgs, _) in enumerate(self.train_loader): self.net.train() imgs = imgs.view(imgs.size(0), -1) # original images noisy_imgs = self.add_noise(imgs) # add noise noisy_imgs = noisy_imgs.to(self.device) # forwarding outputs = self.net(noisy_imgs) # use noisy image as input bce_loss = bce_criterion(outputs, imgs) mse_loss = mse_criterion(outputs, imgs) # backwarding optimizer.zero_grad() bce_loss.backward() # backward BCE loss optimizer.step() # do logging if (step + 1) % self.log_interval == 0: print("[{}/{}] [{}/{}] BCE loss: {:3f}, MSE loss:{:3f}". format(epoch + 1, self.num_epochs, i + 1, len(self.train_loader), bce_loss.item() / len(imgs), mse_loss.item() / len(imgs))) self.vis.plot("BCE Loss plot", bce_loss.item() / len(imgs)) self.vis.plot("MSE Loss plot", mse_loss.item() / len(imgs)) # do sampling if (step + 1) % self.sample_interval == 0: outputs = self.net(fixed_noisy_imgs) x_hat = outputs.cpu().data.view(outputs.size(0), -1, self.image_size, self.image_size) x_hat_path = os.path.join( self.sample_folder, 'output_epoch{}.png'.format(epoch + 1)) save_image(x_hat, x_hat_path, normalize=True) print("[*] Save sample images!") step += 1 if (epoch + 1) % self.ckpt_interval == 0: ckpt_path = os.path.join(self.ckpt_folder, 'ckpt_epoch{}.pth'.format(epoch + 1)) torch.save(self.net.state_dict(), ckpt_path) print("[*] Checkpoint saved!") print("[*] Learning finished!") ckpt_path = os.path.join(self.ckpt_folder, 'final_model.pth') torch.save(self.net.state_dict(), ckpt_path) print("[*] Final weight saved!")
from tqdm import tqdm from torch import nn import torch import random from hdf5_data import HDF5DatasetGenerator from model import AutoEncoder batch_size = 128 learning_rate = 0.01 train_gen = HDF5DatasetGenerator(db_path="train.hdf5", batch_size=batch_size) test_gen = HDF5DatasetGenerator(db_path="test.hdf5", batch_size=batch_size) network = AutoEncoder().cuda() criterion = nn.MSELoss() optimizer = torch.optim.Adam(network.parameters(), lr=learning_rate, weight_decay=1e-5) for epoch in range(500): network.train() min_audios = 0 max_audios = 0 min_labels = 0 max_labels = 0 total_loss = 0 total = 0 with tqdm(total=train_gen.get_total_samples() / batch_size) as pbar: for audios, labels in train_gen.generator(): indexs = np.random.permutation(audios.shape[0]) audios = audios[indexs]
help='batch size') parser.add_argument('--epoch', type=int, default=1, help='epoch size') opt = parser.parse_args() # 超参数 LR = opt.lr BATCH_SIZE = opt.batch_size EPOCHES = opt.epoch LOG_INTERVAL = 5 # 获取gpu是不是可用 cuda_available = torch.cuda.is_available() # 实例化网络 auto = AutoEncoder() if cuda_available: auto.cuda() # 定义优化器和损失函数 optimizer = torch.optim.Adam(auto.parameters(), lr=LR) # 数据准备 root_dir = "./celeba_select" image_files = os.listdir(root_dir) train_dataset = CelebaDataset(root_dir, image_files, (64, 64), transforms.Compose([ToTensor()])) train_loader = DataLoader(train_dataset, batch_size=32, num_workers=1, shuffle=True) for i in range(EPOCHES): # 打乱数据 auto.train() train_loss = 0 for batch_idx, data in enumerate(train_loader): data = Variable(data.type(torch.FloatTensor))
if smoothed_loss < best_loss or batch_num == 1: best_loss = smoothed_loss losses.append(smoothed_loss) log_lrs.append(math.log10(lr)) optimizer.zero_grad() loss.backward() optimizer.step() lr *= mult optimizer.param_groups[0]['lr'] = lr sys.stdout.write('\r[ %d/%d] LR: %f' % (i, len(data_loader), lr)) return log_lrs, losses device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = AutoEncoder().to(device) optimizer = Adam(model.parameters(), lr=0.01, weight_decay=0.0001) # optimizer = AdamW(model.parameters(), lr=0.01, weight_decay=0.0001) # optimizer = SGD(model.parameters(), lr=0.05) # loss_fn = nn.CrossEntropyLoss() loss_fn = nn.MSELoss() data_loader_train = load_data('./data', 128) logs, losses = find_lr(model=model, data_loader=data_loader_train, device=device) plt.plot(logs[10:-5], losses[10:-5]) plt.show()
batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) network = AutoEncoder() if args.model is not None: print('Loaded trained model from {}.'.format(args.model)) network.load_state_dict(torch.load(args.model)) else: print('Begin training new model.') network.to(DEVICE) optimizer = optim.Adam(network.parameters(), lr=args.lr, weight_decay=args.weight_decay) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.7) max_iter = int(len(train_dataset) / args.batch_size + 0.5) minimum_loss = 1e4 best_epoch = 0 for epoch in range(1, args.epochs + 1): # training network.train() total_loss, iter_count = 0, 0 for i, data in enumerate(train_dataloader, 1): partial_input, coarse_gt, dense_gt = data partial_input = partial_input.to(DEVICE)
from torch.utils.data import TensorDataset, DataLoader from model import AutoEncoder import torch.nn as nn import torch import numpy as np from tqdm import tqdm from torch.utils.tensorboard import SummaryWriter writer = SummaryWriter("./runs/") model = AutoEncoder().cuda() criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) with open("../agent_code/rule_based_agent_auto_encode/states.npy", "rb") as f: data = np.load(f, allow_pickle=False) data = data[:2_500_000] data_t = torch.Tensor(data) data_t = data_t.cuda() dataset = TensorDataset(data_t) train_len = int(0.7 * len(dataset)) valid_len = (len(dataset) - train_len) // 2 test_len = len(dataset) - train_len - valid_len train_dataset, valid_dataset, test_dataset = torch.utils.data.random_split( dataset, [train_len, valid_len, test_len], generator=torch.Generator().manual_seed(42)) batch_size = 32
def main(): parser = argparse.ArgumentParser(description='AvatarNet by Pytorch') parser.add_argument('--batch_size', '-b', type=int, default=4, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=2, help='Number of sweeps over the dataset to train') parser.add_argument('--patch_size', '-p', type=int, default=5, help='Size of extracted patches from style features') parser.add_argument('--alpha', '-a', type=float, default=0.8, help='alpha control the fusion degree') parser.add_argument('--lam1', type=float, default=0.01, help='lambda1 for perceptual loss') parser.add_argument('--lam2', type=float, default=0.01, help='lambda2 for tv loss') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID(nagative value indicate CPU)') parser.add_argument('--learning_rate', '-lr', type=float, default=1e-4, help='learning rate for Adam') parser.add_argument('--snapshot_interval', type=int, default=10, help='Interval of snapshot to generate image') parser.add_argument('--train_content_dir', type=str, default='/data/chen/content', help='content images directory for train') parser.add_argument('--train_style_dir', type=str, default='/data/chen/style', help='style images directory for train') parser.add_argument('--test_content_dir', type=str, default='/data/chen/content', help='content images directory for test') parser.add_argument('--test_style_dir', type=str, default='/data/chen/style', help='style images directory for test') parser.add_argument('--save_dir', type=str, default='result', help='save directory for result and loss') parser.add_argument('--reuse', default=None, help='model state path to load for reuse') args = parser.parse_args() # create directory to save if not os.path.exists(args.save_dir): os.mkdir(args.save_dir) loss_dir = f'{args.save_dir}/loss' model_state_dir = f'{args.save_dir}/model_state' image_dir = f'{args.save_dir}/image' if not os.path.exists(loss_dir): os.mkdir(loss_dir) os.mkdir(model_state_dir) os.mkdir(image_dir) # set device on GPU if available, else CPU if torch.cuda.is_available() and args.gpu >= 0: device = torch.device(f'cuda:{args.gpu}') print(f'# CUDA available: {torch.cuda.get_device_name(0)}') else: device = 'cpu' print(f'# Minibatch-size: {args.batch_size}') print(f'# epoch: {args.epoch}') print('') # prepare dataset and dataLoader train_dataset = PreprocessDataset(args.train_content_dir, args.train_style_dir) test_dataset = PreprocessDataset(args.test_content_dir, args.test_style_dir) iters = len(train_dataset) print(f'Length of train image pairs: {iters}') train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False) test_iter = iter(test_loader) # set model and optimizer model = AutoEncoder().to(device) if args.reuse is not None: model.load_state_dict(torch.load(args.reuse)) optimizer = Adam(model.parameters(), lr=args.learning_rate) # start training loss_list = [] for e in range(1, args.epoch + 1): print(f'Start {e} epoch') for i, (content, style) in tqdm(enumerate(train_loader, 1)): content = content.to(device) style = style.to(device) loss = model(content, style, args.patch_size, args.alpha, args.lam1, args.lam2) loss_list.append(loss.item()) optimizer.zero_grad() loss.backward() optimizer.step() print( f'[{e}/total {args.epoch} epoch],[{i} /' f'total {round(iters/args.batch_size)} iteration]: {loss.item()}' ) if i % args.snapshot_interval == 0: content, style = next(test_iter) content = content.to(device) style = style.to(device) with torch.no_grad(): out = model.generate(content, style, args.patch_size, args.alpha) content = denorm(content, device) style = denorm(style, device) out = denorm(out, device) res = torch.cat([content, style, out], dim=0) res = res.to('cpu') save_image(res, f'{image_dir}/{e}_epoch_{i}_iteration.png', nrow=args.batch_size) torch.save(model.state_dict(), f'{model_state_dir}/{e}_epoch.pth') plt.plot(range(len(loss_list)), loss_list) plt.xlabel('iteration') plt.ylabel('loss') plt.title('train loss') plt.savefig(f'{loss_dir}/train_loss.png') with open(f'{loss_dir}/loss_log.txt', 'w') as f: for l in loss_list: f.write(f'{l}\n') print(f'Loss saved in {loss_dir}')
def train_autoencoder(train_matrix, test_set): num_users, num_items = train_matrix.shape weight_matrix = log_surplus_confidence_matrix(train_matrix, alpha=args.alpha, epsilon=args.epsilon) train_matrix[train_matrix > 0] = 1.0 place_correlation = scipy.sparse.load_npz( './data/Foursquare/place_correlation_gamma60.npz') assert num_items == place_correlation.shape[0] print(train_matrix.shape) # Construct the model by instantiating the class defined in model.py model = AutoEncoder(num_items, args.inner_layers, num_items, da=args.num_attention, dropout_rate=args.dropout_rate) if torch.cuda.is_available(): model.cuda() criterion = torch.nn.MSELoss(size_average=False, reduce=False) optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay=args.weight_decay) batch_size = args.batch_size user_indexes = np.arange(num_users) model.train() for t in range(args.epoch): print("epoch:{}".format(t)) np.random.shuffle(user_indexes) avg_cost = 0. for batchID in range(int(num_users / batch_size)): start = batchID * batch_size end = start + batch_size batch_user_index = user_indexes[start:end] batch_x, batch_x_weight, batch_item_index = get_mini_batch( train_matrix, weight_matrix, batch_user_index) batch_x_weight += 1 batch_x = Variable(torch.from_numpy(batch_x).type(T.FloatTensor), requires_grad=False) y_pred = model(batch_item_index, place_correlation) # Compute and print loss batch_x_weight = Variable(torch.from_numpy(batch_x_weight).type( T.FloatTensor), requires_grad=False) loss = (batch_x_weight * criterion(y_pred, batch_x)).sum() / batch_size print(batchID, loss.data) # Zero gradients, perform a backward pass, and update the weights. optimizer.zero_grad() loss.backward() optimizer.step() avg_cost += loss / num_users * batch_size print("Avg loss:{}".format(avg_cost)) # print the prediction score for the user 0 print( model([train_matrix.getrow(0).indices], place_correlation) [:, T.LongTensor(train_matrix.getrow(0).indices.astype(np.int32))]) print(model([train_matrix.getrow(0).indices], place_correlation)) # Evaluation model.eval() topk = 20 recommended_list = [] for user_id in range(num_users): user_rating_vector = train_matrix.getrow(user_id).toarray() pred_rating_vector = model([train_matrix.getrow(user_id).indices], place_correlation) pred_rating_vector = pred_rating_vector.cpu().data.numpy() user_rating_vector = user_rating_vector[0] pred_rating_vector = pred_rating_vector[0] pred_rating_vector[user_rating_vector > 0] = 0 item_recommended_dict = dict() for item_inner_id, score in enumerate(pred_rating_vector): item_recommended_dict[item_inner_id] = score sorted_item = heapq.nlargest(topk, item_recommended_dict, key=item_recommended_dict.get) recommended_list.append(sorted_item) print(test_set[user_id], sorted_item[:topk]) print(pred_rating_vector[sorted_item[0]], pred_rating_vector[sorted_item[1]], pred_rating_vector[sorted_item[2]], pred_rating_vector[sorted_item[3]], pred_rating_vector[sorted_item[4]]) print("user:%d, precision@5:%f, precision@10:%f" % (user_id, eval_metrics.precision_at_k_per_sample(test_set[user_id], sorted_item[:5], 5), eval_metrics.precision_at_k_per_sample( test_set[user_id], sorted_item[:topk], topk))) precision, recall, MAP = [], [], [] for k in [5, 10, 15, 20]: precision.append( eval_metrics.precision_at_k(test_set, recommended_list, k)) recall.append(eval_metrics.recall_at_k(test_set, recommended_list, k)) MAP.append(eval_metrics.mapk(test_set, recommended_list, k)) print(precision) print(recall) print(MAP)
help='upper epoch limit') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() print(json.dumps(args.__dict__, sort_keys=True, indent=4) + '\n') args.device = torch.device("cuda" if args.cuda else "cpu") # Set random seed torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed_all(args.seed) # Load pre-trained model model = AutoEncoder().to(args.device) optimizer = Adam(model.parameters(), lr=0.001, weight_decay=0.0001) # optimizer = AdamW(model.parameters(), lr=0.001, weight_decay=0.0001) # optimizer = SGD(model.parameters(), lr=0.01) # exp_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.25) exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2, verbose=True, min_lr=0.001) loss_fn = nn.MSELoss() # loss_fn = nn.BCELoss() torch.backends.cudnn.benchmark = True print('n parameters: %d' % sum([m.numel() for m in model.parameters()]))
torch.backends.cudnn.deterministic = DEBUG torch.backends.cudnn.benchmark = not DEBUG print(f'Using device {DEVICE}') # Data loaders train_ds = PPFDataset(**TRAIN_DS_ARGS) train_dl = DataLoader(train_ds, **TRAIN_DL_ARGS) val_ds = PPFDataset(**VAL_DS_ARGS) val_dl = DataLoader(val_ds, **VAL_DL_ARGS) print('Training set: {} Validation set: {}\n'.format( train_ds.__len__(), val_ds.__len__() )) # Model model = AutoEncoder(NUM_PTS_PER_PATCH) model.apply(init_weights).to(DEVICE) loss_func = ChamferLoss() optimizer = Adam(model.parameters(), LR) scheduler = OneCycleLR( optimizer, MAX_LR, total_steps=len(train_dl)*TRAINER_ARGS.num_epochs ) Path(TRAINER_ARGS.checkpoint_path).parent.mkdir(parents=True, exist_ok=True) # Training train(model, loss_func, optimizer, scheduler, (train_dl, val_dl), TRAINER_ARGS)
def train(self, config): """Training routine""" # Initialize datasets for both training and validation train_data = torchvision.datasets.ImageFolder( root=os.path.join(config.data_dir, "train"), transform=torchvision.transforms.ToTensor()) valid_data = torchvision.datasets.ImageFolder( root=os.path.join(config.data_dir, "valid"), transform=torchvision.transforms.ToTensor()) # Create data loader for training and validation. tr_data_loader = torch.utils.data.DataLoader( dataset=train_data, batch_size=config.batch_size, num_workers=config.numWorker, shuffle=True) va_data_loader = torch.utils.data.DataLoader( dataset=valid_data, batch_size=config.batch_size, num_workers=config.numWorker, shuffle=False) # Create model instance. #model = Model() model = AutoEncoder() # Move model to gpu if cuda is available if torch.cuda.is_available(): model = model.cuda() # Make sure that the model is set for training model.train() # Create loss objects data_loss = nn.MSELoss() # Create optimizier optimizer = optim.Adam(model.parameters(), lr=config.learn_rate) # No need to move the optimizer (as of PyTorch 1.0), it lies in the same # space as the model # Create summary writer tr_writer = SummaryWriter( log_dir=os.path.join(config.log_dir, "train")) va_writer = SummaryWriter( log_dir=os.path.join(config.log_dir, "valid")) # Create log directory and save directory if it does not exist if not os.path.exists(config.log_dir): os.makedirs(config.log_dir) if not os.path.exists(config.save_dir): os.makedirs(config.save_dir) # Initialize training iter_idx = -1 # make counter start at zero best_va_acc = 0 # to check if best validation accuracy # Prepare checkpoint file and model file to save and load from checkpoint_file = os.path.join(config.save_dir, "checkpoint.pth") bestmodel_file = os.path.join(config.save_dir, "best_model.pth") # Check for existing training results. If it existst, and the configuration # is set to resume `config.resume==True`, resume from previous training. If # not, delete existing checkpoint. if os.path.exists(checkpoint_file): if config.resume: # Use `torch.load` to load the checkpoint file and the load the # things that are required to continue training. For the model and # the optimizer, use `load_state_dict`. It's actually a good idea # to code the saving part first and then code this part. print("Checkpoint found! Resuming") # TODO proper logging # Read checkpoint file. # Fix gpu -> cpu bug compute_device = 'cuda' if torch.cuda.is_available() else 'cpu' load_res = torch.load(checkpoint_file, map_location=compute_device) # Resume iterations iter_idx = load_res["iter_idx"] # Resume best va result best_va_acc = load_res["best_va_acc"] # Resume model model.load_state_dict(load_res["model"]) # Resume optimizer optimizer.load_state_dict(load_res["optimizer"]) # Note that we do not resume the epoch, since we will never be able # to properly recover the shuffling, unless we remember the random # seed, for example. For simplicity, we will simply ignore this, # and run `config.num_epoch` epochs regardless of resuming. else: os.remove(checkpoint_file) # Training loop for epoch in range(config.num_epoch): # For each iteration prefix = "Training Epoch {:3d}: ".format(epoch) for data in tqdm(tr_data_loader, desc=prefix): # Counter iter_idx += 1 # Split the data # x is img, y is label x, y = data #print(x) # Send data to GPU if we have one if torch.cuda.is_available(): x = x.cuda() y = y.cuda() # Apply the model to obtain scores (forward pass) logits = model.forward(x) # Compute the loss loss = data_loss(logits, x.float()) # Compute gradients loss.backward() # Update parameters optimizer.step() # Zero the parameter gradients in the optimizer optimizer.zero_grad() # Monitor results every report interval if iter_idx % config.rep_intv == 0: # Compute accuracy (No gradients required). We'll wrapp this # part so that we prevent torch from computing gradients. with torch.no_grad(): pred = torch.argmax(logits, dim=1) acc = torch.mean( torch.eq(pred.view(x.size()), x).float()) * 100.0 # Write loss and accuracy to tensorboard, using keywords `loss` # and `accuracy`. tr_writer.add_scalar("loss", loss, global_step=iter_idx) tr_writer.add_scalar("accuracy", acc, global_step=iter_idx) # Save torch.save( { "iter_idx": iter_idx, "best_va_acc": best_va_acc, "model": model.state_dict(), "optimizer": optimizer.state_dict(), "loss": loss, "epoch": epoch, "acc": acc }, checkpoint_file) # Validate results every validation interval if iter_idx % config.val_intv == 0: # List to contain all losses and accuracies for all the # training batches va_loss = [] va_acc = [] # Set model for evaluation model = model.eval() for data in va_data_loader: # Split the data x, y = data # Send data to GPU if we have one if torch.cuda.is_available(): x = x.cuda() y = y.cuda() # Apply forward pass to compute the losses # and accuracies for each of the validation batches with torch.no_grad(): # Compute logits logits = model.forward(x) # Compute loss and store as numpy loss = data_loss(logits, x.float()) va_loss += [loss.cpu().numpy()] # Compute accuracy and store as numpy pred = torch.argmax(logits, dim=1) acc = torch.mean( torch.eq(pred.view(x.size()), x).float()) * 100.0 va_acc += [acc.cpu().numpy()] # Set model back for training model = model.train() # Take average va_loss = np.mean(va_loss) va_acc = np.mean(va_acc) # Write to tensorboard using `va_writer` va_writer.add_scalar("loss", va_loss, global_step=iter_idx) va_writer.add_scalar("accuracy", va_acc, global_step=iter_idx) # Check if best accuracy if va_acc > best_va_acc: best_va_acc = va_acc # Save best model using torch.save. Similar to previous # save but at location defined by `bestmodel_file` torch.save( { "iter_idx": iter_idx, "best_va_acc": best_va_acc, "model": model.state_dict(), "optimizer": optimizer.state_dict(), "loss": loss, "acc": acc }, bestmodel_file)