def solve(self): for t in monit.loop(self.epochs): if not self.is_online_update: for I in self.info_sets.values(): I.clear() for i in range(self.n_players): self.cfr(self.create_new_history(), cast(Player, i), [1 for _ in range(self.n_players)]) if not self.is_online_update: self.update() with monit.section("Track"): for I in self.info_sets.values(): for a in I.actions(): tracker.add({ f'strategy.{I.key}.{a}': I.strategy[a], f'average_strategy.{I.key}.{a}': I.average_strategy[a], f'regret.{I.key}.{a}': I.regret[a], f'current_regret.{I.key}.{a}': I.current_regret[a] }) if t % self.track_frequency == 0: tracker.save() logger.log() if (t + 1) % self.save_frequency == 0: experiment.save_checkpoint() logger.inspect(self.info_sets)
def run_training_loop(self): """### Run training loop""" offset = tracker.get_global_step() if offset > 100: # If resumed, sample several iterations first to reduce sampling bias for i in range(16): self.sample(False) for _ in monit.loop(self.c.updates - offset): update = tracker.get_global_step() progress = update / self.c.updates # sample with current policy samples = self.sample() # train the model self.train(samples) # write summary info to the writer, and log to the screen tracker.save() if (update + 1) % 2 == 0: self.set_optim(self.c.lr(), self.c.reg_l2()) self.set_game_param(self.c.right_gain(), self.c.fix_prob(), self.c.neg_mul(), self.c.step_reward()) self.set_weight_param(self.c.entropy_weight(), self.c.prob_reg_weight(), self.c.target_prob_weight(), self.c.gamma(), self.c.lamda()) if (update + 1) % 25 == 0: logger.log() if (update + 1) % 200 == 0: experiment.save_checkpoint()
def run_training_loop(self): """ ### Run training loop """ # last 100 episode information tracker.set_queue('reward', 100, True) tracker.set_queue('length', 100, True) for update in monit.loop(self.updates): progress = update / self.updates # decreasing `learning_rate` and `clip_range` $\epsilon$ learning_rate = 2.5e-4 * (1 - progress) clip_range = 0.1 * (1 - progress) # sample with current policy samples = self.sample() # train the model self.train(samples, learning_rate, clip_range) # write summary info to the writer, and log to the screen tracker.save() if (update + 1) % 1_000 == 0: logger.log()
def run(self): """ ### Training loop We do full batch training since the dataset is small. If we were to sample and train we will have to sample a set of nodes for each training step along with the edges that span across those selected nodes. """ # Move the feature vectors to the device features = self.dataset.features.to(self.device) # Move the labels to the device labels = self.dataset.labels.to(self.device) # Move the adjacency matrix to the device edges_adj = self.dataset.adj_mat.to(self.device) # Add an empty third dimension for the heads edges_adj = edges_adj.unsqueeze(-1) # Random indexes idx_rand = torch.randperm(len(labels)) # Nodes for training idx_train = idx_rand[:self.training_samples] # Nodes for validation idx_valid = idx_rand[self.training_samples:] # Training loop for epoch in monit.loop(self.epochs): # Set the model to training mode self.model.train() # Make all the gradients zero self.optimizer.zero_grad() # Evaluate the model output = self.model(features, edges_adj) # Get the loss for training nodes loss = self.loss_func(output[idx_train], labels[idx_train]) # Calculate gradients loss.backward() # Take optimization step self.optimizer.step() # Log the loss tracker.add('loss.train', loss) # Log the accuracy tracker.add('accuracy.train', accuracy(output[idx_train], labels[idx_train])) # Set mode to evaluation mode for validation self.model.eval() # No need to compute gradients with torch.no_grad(): # Evaluate the model again output = self.model(features, edges_adj) # Calculate the loss for validation nodes loss = self.loss_func(output[idx_valid], labels[idx_valid]) # Log the loss tracker.add('loss.valid', loss) # Log the accuracy tracker.add('accuracy.valid', accuracy(output[idx_valid], labels[idx_valid])) # Save logs tracker.save()
def main(): experiment.create(name='test_schedule', writers={'screen', 'web_api'}) lr = DynamicSchedule(0.01, (0, 1)) experiment.configs({'lr': lr}) with experiment.start(): for epoch in monit.loop(100): tracker.save('hp.lr', lr()) time.sleep(1)
def main(): import time for _ in monit.loop(10): for n, v in monit.mix(5, ('train', range(50)), ('valid', range(10))): time.sleep(0.05) # print(n, v) tracker.save({n: v}) tracker.new_line()
def setup_and_add(): for t in range(10): tracker.set_scalar(f"loss1.{t}", is_print=t == 0) experiment.start() for i in monit.loop(1000): for t in range(10): tracker.add({f'loss1.{t}': i}) tracker.save()
def __iter__(self): self.__loop = monit.loop( range(tracker.get_global_step(), self.__loop_count, self.__loop_step)) iter(self.__loop) try: self.old_handler = signal.signal(signal.SIGINT, self.__handler) except ValueError: pass return self
def main(): # Configurations configs = { 'epochs': 10, 'train_batch_size': 64, 'valid_batch_size': 100, 'use_cuda': True, 'seed': 5, 'train_log_interval': 10, 'learning_rate': 0.01, } is_cuda = configs['use_cuda'] and torch.cuda.is_available() if not is_cuda: device = torch.device("cpu") else: device = torch.device(f"cuda:0") train_loader = torch.utils.data.DataLoader( RemoteDataset('mnist_train'), batch_size=configs['train_batch_size'], shuffle=True, num_workers=4) valid_loader = torch.utils.data.DataLoader( RemoteDataset('mnist_valid'), batch_size=configs['valid_batch_size'], shuffle=False, num_workers=4) model = Net().to(device) optimizer = optim.Adam(model.parameters(), lr=configs['learning_rate']) torch.manual_seed(configs['seed']) # ✨ Create the experiment experiment.create(name='mnist_labml_monit') # ✨ Save configurations experiment.configs(configs) # ✨ Set PyTorch models for checkpoint saving and loading experiment.add_pytorch_models(dict(model=model)) # ✨ Start and monitor the experiment with experiment.start(): for _ in monit.loop(range(1, configs['epochs'] + 1)): train(model, optimizer, train_loader, device, configs['train_log_interval']) validate(model, valid_loader, device) logger.log() # save the model experiment.save_checkpoint()
def main(): experiment.create(name='test_dynamic_hp', writers={'screen', 'web_api'}) lr = FloatDynamicHyperParam(0.01, (0, 1)) # experiment.configs({'lr': lr}) conf = Configs() experiment.configs(conf) lr = conf.lr with experiment.start(): for epoch in monit.loop(100): tracker.save('hp.lr', lr()) time.sleep(1)
def train(self): """ ## Train model """ # Loop for `training_steps` for i in monit.loop(self.training_steps): # Take a training step self.step(i) # if (i + 1) % self.log_generated_interval == 0: tracker.new_line()
def __iter__(self): self._iter = TrainingLoopIterator(tracker.get_global_step(), self.__loop_count, self.__loop_step) self.__loop = monit.loop(typing.cast(Collection, self._iter)) iter(self.__loop) try: self.old_handler = signal.signal(signal.SIGINT, self.__handler) except ValueError: pass return self
def run(self): tracker.set_text('text_artifact', is_print=True) tracker.set_indexed_text('ti', is_print=True) tracker.set_indexed_text('other', is_print=True) for i in monit.loop(self.epochs): tracker.add('text_artifact', f'sample {i}') for j in range(5): tracker.add('ti', (f'{j}', 'text' * 5 + f'text {i} {j}')) tracker.add('other', (f'{j}', f'other {j}')) tracker.save() logger.log()
def train(self): """ ### Train the model """ # Loop for the given number of epochs for _ in monit.loop(self.epochs): # Iterate over the minibatches for i, batch in monit.enum('Train', self.dataloader): # Move data to the device data, target = batch[0].to(self.device), batch[1].to( self.device) # Set tracker step, as the number of characters trained on tracker.add_global_step(data.shape[0] * data.shape[1]) # Set model state to training self.model.train() # Evaluate the model output = self.model(data) # Calculate loss loss = self.loss_func(output.view(-1, output.shape[-1]), target.view(-1)) # Log the loss tracker.add("loss.train", loss) # Calculate gradients loss.backward() # Clip gradients torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=self.grad_norm_clip) # Take optimizer step self.optimizer.step() # Log the model parameters and gradients if (i + 1) % 100 == 0: tracker.add('model', self.model) # Clear the gradients self.optimizer.zero_grad() # Generate a sample if (i + 1) % 100 == 0: self.model.eval() with torch.no_grad(): self.sample() # Save the tracked metrics if (i + 1) % 10 == 0: tracker.save() # Save the model experiment.save_checkpoint()
def add_save(): arr = torch.zeros((1000, 1000)) experiment.start() for i in monit.loop(N): for t in range(10): arr += 1 for t in range(10): if i == 0: tracker.set_scalar(f"loss1.{t}", is_print=t == 0) for t in range(10): tracker.add({f'loss1.{t}': i}) tracker.save()
def run(self): """ ### Training loop """ for _ in monit.loop(self.epochs): # Train the model self.train() # Sample some images self.sample() # New line in the console tracker.new_line() # Save the model experiment.save_checkpoint()
def run_training_loop(self): """### Run training loop""" offset = tracker.get_global_step() for _ in monit.loop(self.c.updates - offset): update = tracker.get_global_step() progress = update / self.c.updates # sample with current policy samples = self.sample() # train the model self.train(samples) # write summary info to the writer, and log to the screen tracker.save() logger.log() if (update + 1) % 500 == 0: experiment.save_checkpoint()
def loop(self): # Loop through the monitored iterator for epoch in monit.loop(range(0, self.__epochs)): self._train() self._test() self.__log_model_params() # Clear line and output to console tracker.save() # Clear line and go to the next line; # that is, we add a new line to the output # at the end of each epoch if (epoch + 1) % self.__log_new_line_interval == 0: logger.log() if self.__is_save_models: experiment.save_checkpoint()
def run_training_loop(self): """ ### Run training loop """ # last 100 episode information tracker.set_queue('reward', 100, True) tracker.set_queue('length', 100, True) for update in monit.loop(self.updates): # sample with current policy samples = self.sample() # train the model self.train(samples) # Save tracked indicators. tracker.save() # Add a new line to the screen periodically if (update + 1) % 1_000 == 0: logger.log()
def train(self): for _ in monit.loop(self.epochs): for i, batch in monit.enum('Train', self.dataloader): # Move data to the device data, target = batch[0].to(self.device), batch[1].to( self.device) tracker.add_global_step(data.shape[0] * data.shape[1]) self.model.train() output = self.model(data) # Calculate and log loss loss = self.loss_func(output.view(-1, output.shape[-1]), target.view(-1)) tracker.add("loss.train", loss) # Calculate gradients loss.backward() # Clip gradients torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=self.grad_norm_clip) # Take optimizer step self.optimizer.step() # Log the model parameters and gradients on last batch of every epoch if (i + 1) % 100 == 0: tracker.add('model', self.model) # Clear the gradients self.optimizer.zero_grad() if (i + 1) % 100 == 0: self.model.eval() with torch.no_grad(): self.sample() # Save the tracked metrics if (i + 1) % 10 == 0: tracker.save() experiment.save_checkpoint()
def start_training(self, model): """ Initializes the Training step with the model initialized :param model: Instance of the NewsClassifier class """ best_loss = float('inf') for epoch in monit.loop(self.epochs): with tracker.namespace('train'): self.train_epoch(model, self.train_data_loader, 'train') with tracker.namespace('valid'): _, val_loss = self.train_epoch(model, self.val_data_loader, 'valid') if val_loss < best_loss: best_loss = val_loss if self.is_save_model: self.save_model(model) tracker.new_line()
def run_training_loop(self): """ ### Run training loop """ # Last 100 episode information tracker.set_queue('reward', 100, True) tracker.set_queue('length', 100, True) # Copy to target network initially self.target_model.load_state_dict(self.model.state_dict()) for update in monit.loop(self.updates): # $\epsilon$, exploration fraction exploration = self.exploration_coefficient(update) tracker.add('exploration', exploration) # $\beta$ for prioritized replay beta = self.prioritized_replay_beta(update) tracker.add('beta', beta) # Sample with current policy self.sample(exploration) # Start training after the buffer is full if self.replay_buffer.is_full(): # Train the model self.train(beta) # Periodically update target network if update % self.update_target_model == 0: self.target_model.load_state_dict(self.model.state_dict()) # Save tracked indicators. tracker.save() # Add a new line to the screen periodically if (update + 1) % 1_000 == 0: logger.log()
def run(self): """ ## Training We aim to solve: $$G^{*}, F^{*} = \arg \min_{G,F} \max_{D_X, D_Y} \mathcal{L}(G, F, D_X, D_Y)$$ where, $G$ translates images from $X \rightarrow Y$, $F$ translates images from $Y \rightarrow X$, $D_X$ tests if images are from $X$ space, $D_Y$ tests if images are from $Y$ space, and \begin{align} \mathcal{L}(G, F, D_X, D_Y) &= \mathcal{L}_{GAN}(G, D_Y, X, Y) \\ &+ \mathcal{L}_{GAN}(F, D_X, Y, X) \\ &+ \lambda_1 \mathcal{L}_{cyc}(G, F) \\ &+ \lambda_2 \mathcal{L}_{identity}(G, F) \\ \\ \mathcal{L}_{GAN}(G, F, D_Y, X, Y) &= \mathbb{E}_{y \sim p_{data}(y)} \Big[log D_Y(y)\Big] \\ &+ \mathbb{E}_{x \sim p_{data}(x)} \bigg[log\Big(1 - D_Y(G(x))\Big)\bigg] \\ &+ \mathbb{E}_{x \sim p_{data}(x)} \Big[log D_X(x)\Big] \\ &+ \mathbb{E}_{y \sim p_{data}(y)} \bigg[log\Big(1 - D_X(F(y))\Big)\bigg] \\ \\ \mathcal{L}_{cyc}(G, F) &= \mathbb{E}_{x \sim p_{data}(x)} \Big[\lVert F(G(x)) - x \lVert_1\Big] \\ &+ \mathbb{E}_{y \sim p_{data}(y)} \Big[\lVert G(F(y)) - y \rVert_1\Big] \\ \\ \mathcal{L}_{identity}(G, F) &= \mathbb{E}_{x \sim p_{data}(x)} \Big[\lVert F(x) - x \lVert_1\Big] \\ &+ \mathbb{E}_{y \sim p_{data}(y)} \Big[\lVert G(y) - y \rVert_1\Big] \\ \end{align} $\mathcal{L}_{GAN}$ is the generative adversarial loss from the original GAN paper. $\mathcal{L}_{cyc}$ is the cyclic loss, where we try to get $F(G(x))$ to be similar to $x$, and $G(F(y))$ to be similar to $y$. Basically if the two generators (transformations) are applied in series it should give back the original image. This is the main contribution of this paper. It trains the generators to generate an image of the other distribution that is similar to the original image. Without this loss $G(x)$ could generate anything that's from the distribution of $Y$. Now it needs to generate something from the distribution of $Y$ but still has properties of $x$, so that $F(G(x)$ can re-generate something like $x$. $\mathcal{L}_{cyc}$ is the identity loss. This was used to encourage the mapping to preserve color composition between the input and the output. To solve $G^{\*}, F^{\*}$, discriminators $D_X$ and $D_Y$ should **ascend** on the gradient, \begin{align} \nabla_{\theta_{D_X, D_Y}} \frac{1}{m} \sum_{i=1}^m &\Bigg[ \log D_Y\Big(y^{(i)}\Big) \\ &+ \log \Big(1 - D_Y\Big(G\Big(x^{(i)}\Big)\Big)\Big) \\ &+ \log D_X\Big(x^{(i)}\Big) \\ & +\log\Big(1 - D_X\Big(F\Big(y^{(i)}\Big)\Big)\Big) \Bigg] \end{align} That is descend on *negative* log-likelihood loss. In order to stabilize the training the negative log- likelihood objective was replaced by a least-squared loss - the least-squared error of discriminator, labelling real images with 1, and generated images with 0. So we want to descend on the gradient, \begin{align} \nabla_{\theta_{D_X, D_Y}} \frac{1}{m} \sum_{i=1}^m &\Bigg[ \bigg(D_Y\Big(y^{(i)}\Big) - 1\bigg)^2 \\ &+ D_Y\Big(G\Big(x^{(i)}\Big)\Big)^2 \\ &+ \bigg(D_X\Big(x^{(i)}\Big) - 1\bigg)^2 \\ &+ D_X\Big(F\Big(y^{(i)}\Big)\Big)^2 \Bigg] \end{align} We use least-squares for generators also. The generators should *descend* on the gradient, \begin{align} \nabla_{\theta_{F, G}} \frac{1}{m} \sum_{i=1}^m &\Bigg[ \bigg(D_Y\Big(G\Big(x^{(i)}\Big)\Big) - 1\bigg)^2 \\ &+ \bigg(D_X\Big(F\Big(y^{(i)}\Big)\Big) - 1\bigg)^2 \\ &+ \mathcal{L}_{cyc}(G, F) + \mathcal{L}_{identity}(G, F) \Bigg] \end{align} We use `generator_xy` for $G$ and `generator_yx$ for $F$. We use `discriminator_x$ for $D_X$ and `discriminator_y` for $D_Y$. """ # Replay buffers to keep generated samples gen_x_buffer = ReplayBuffer() gen_y_buffer = ReplayBuffer() # Loop through epochs for epoch in monit.loop(self.epochs): # Loop through the dataset for i, batch in monit.enum('Train', self.dataloader): # Move images to the device data_x, data_y = batch['x'].to(self.device), batch['y'].to( self.device) # true labels equal to $1$ true_labels = torch.ones(data_x.size(0), *self.discriminator_x.output_shape, device=self.device, requires_grad=False) # false labels equal to $0$ false_labels = torch.zeros(data_x.size(0), *self.discriminator_x.output_shape, device=self.device, requires_grad=False) # Train the generators. # This returns the generated images. gen_x, gen_y = self.optimize_generators( data_x, data_y, true_labels) # Train discriminators self.optimize_discriminator(data_x, data_y, gen_x_buffer.push_and_pop(gen_x), gen_y_buffer.push_and_pop(gen_y), true_labels, false_labels) # Save training statistics and increment the global step counter tracker.save() tracker.add_global_step(max(len(data_x), len(data_y))) # Save images at intervals batches_done = epoch * len(self.dataloader) + i if batches_done % self.sample_interval == 0: # Save models when sampling images experiment.save_checkpoint() # Sample images self.sample_images(batches_done) # Update learning rates self.generator_lr_scheduler.step() self.discriminator_lr_scheduler.step() # New line tracker.new_line()
def main_train(): lstm_size = 1024 lstm_layers = 3 batch_size = 32 seq_len = 32 with monit.section("Loading data"): # Load all python files files = parser.load.load_files() # Split training and validation data train_files, valid_files = parser.load.split_train_valid( files, is_shuffle=False) with monit.section("Create model"): # Create model model = SimpleLstmModel(encoding_size=tokenizer.VOCAB_SIZE, embedding_size=tokenizer.VOCAB_SIZE, lstm_size=lstm_size, lstm_layers=lstm_layers) # Move model to `device` model.to(device) # Create loss function and optimizer loss_func = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) # Initial state is 0 h0 = torch.zeros((lstm_layers, batch_size, lstm_size), device=device) c0 = torch.zeros((lstm_layers, batch_size, lstm_size), device=device) # Setup logger indicators tracker.set_queue("train.loss", queue_size=500, is_print=True) tracker.set_queue("valid.loss", queue_size=500, is_print=True) # Specify the model in [lab](https://github.com/vpj/lab) for saving and loading experiment.add_pytorch_models({'base': model}) # Start training scratch (step '0') experiment.start() # Number of batches per epoch batches = math.ceil( sum([len(f[1]) + 1 for f in train_files]) / (batch_size * seq_len)) # Number of steps per epoch. We train and validate on each step. steps_per_epoch = 200 # Train for 100 epochs for epoch in monit.loop(range(100)): # Create trainer trainer = Trainer(files=train_files, model=model, loss_func=loss_func, optimizer=optimizer, batch_size=batch_size, seq_len=seq_len, is_train=True, h0=h0, c0=c0, eof=0) # Create validator validator = Trainer(files=valid_files, model=model, loss_func=loss_func, optimizer=optimizer, is_train=False, seq_len=seq_len, batch_size=batch_size, h0=h0, c0=c0, eof=0) # Next batch to train and validation train_batch = 0 valid_batch = 0 # Loop through steps for i in range(1, steps_per_epoch): try: with DelayedKeyboardInterrupt(): # Set global step global_step = epoch * batches + min( batches, (batches * i) // steps_per_epoch) tracker.set_global_step(global_step) # Last batch to train and validate train_batch_limit = trainer.x.shape[0] * min( 1., (i + 1) / steps_per_epoch) valid_batch_limit = validator.x.shape[0] * min( 1., (i + 1) / steps_per_epoch) with monit.section("train", total_steps=trainer.x.shape[0], is_partial=True): model.train() # Train while train_batch < train_batch_limit: trainer.run(train_batch) monit.progress(train_batch + 1) train_batch += 1 with monit.section("valid", total_steps=validator.x.shape[0], is_partial=True): model.eval() # Validate while valid_batch < valid_batch_limit: validator.run(valid_batch) monit.progress(valid_batch + 1) valid_batch += 1 # Output results tracker.save() # 10 lines of logs per epoch if (i + 1) % (steps_per_epoch // 10) == 0: logger.log() except KeyboardInterrupt: experiment.save_checkpoint() return experiment.save_checkpoint()
def main(): # Configurations configs = { 'epochs': 10, 'train_batch_size': 64, 'valid_batch_size': 100, 'use_cuda': True, 'seed': 5, 'train_log_interval': 10, 'learning_rate': 0.01, } is_cuda = configs['use_cuda'] and torch.cuda.is_available() if not is_cuda: device = torch.device("cpu") else: device = torch.device(f"cuda:0") data_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) train_loader = torch.utils.data.DataLoader( datasets.MNIST(str(lab.get_data_path()), train=True, download=True, transform=data_transform), batch_size=configs['train_batch_size'], shuffle=True) valid_loader = torch.utils.data.DataLoader( datasets.MNIST(str(lab.get_data_path()), train=False, download=True, transform=data_transform), batch_size=configs['valid_batch_size'], shuffle=False) model = Net().to(device) optimizer = optim.Adam(model.parameters(), lr=configs['learning_rate']) torch.manual_seed(configs['seed']) # ✨ Create the experiment experiment.create(name='mnist_labml_monit') # ✨ Save configurations experiment.configs(configs) # ✨ Set PyTorch models for checkpoint saving and loading experiment.add_pytorch_models(dict(model=model)) # ✨ Start and monitor the experiment with experiment.start(): for _ in monit.loop(range(1, configs['epochs'] + 1)): train(model, optimizer, train_loader, device, configs['train_log_interval']) validate(model, valid_loader, device) logger.log() # save the model experiment.save_checkpoint()
def train(): """ ## Create and train a small model """ # Create an experiment experiment.create(name='retro_small') # GPU device device = torch.device('cuda:0') # Load Tiny Shakespeare dataset tds = TextFileDataset( lab.get_data_path() / 'tiny_shakespeare.txt', list, url= 'https://raw.githubusercontent.com/karpathy/char-rnn/master/data/tinyshakespeare/input.txt' ) # Load [Retro dataset](dataset.html) train_dataset = Dataset(lab.get_data_path() / 'retro_train_dataset.json', tds) # Create dataloader train_dl = DataLoader(train_dataset, batch_size=4, sampler=RandomSampler(train_dataset, replacement=True)) # Hyper-parameters chunk_len = 16 d_model = 128 d_ff = 512 n_heads = 16 d_k = 16 # Create the nearest neighbor encoder nearest_neighbor_encoder = NearestNeighborEncoder(chunk_len, 6, {3}, d_model, n_heads, d_k, d_ff) # Create the model model = RetroModel(tds.n_tokens, d_model, 6, {3, 5}, chunk_len, n_heads, d_k, d_ff, encoder=nearest_neighbor_encoder) # Move the model to the device model = model.to(device) # Create the optimizer optimizer = Noam(model.parameters(), lr=1., d_model=d_model, warmup=2_000) # Create the `Trainer` trainer = Trainer(device, model, train_dl, optimizer) # Create the `Sampler` sampler = Sampler(device, model, tds, chunk_len) # prompt = '''Second Citizen:\nOne word, good citizens.\n\nFirst Citizen:''' # Set models for saving and loading experiment.add_pytorch_models(model=model) # Start the experiment with experiment.start(): # Train for `32` epochs for epoch in monit.loop(32): # Train trainer() # Print a new line tracker.new_line() # Sample from the `prompt` logger.log([(prompt.replace('\n', '\\n\n'), Text.subtle), (sampler.sample(prompt, 128).replace('\n', '\\n\n'), Text.none)]) # Save models experiment.save_checkpoint()
def main(): # Configurations configs = { 'epochs': 10, 'train_batch_size': 64, 'valid_batch_size': 100, 'use_cuda': True, 'seed': 5, 'train_log_interval': 10, 'learning_rate': 0.01, } is_cuda = configs['use_cuda'] and torch.cuda.is_available() if not is_cuda: device = torch.device("cpu") else: device = torch.device(f"cuda:0") data_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) train_loader = torch.utils.data.DataLoader( datasets.MNIST(str(lab.get_data_path()), train=True, download=True, transform=data_transform), batch_size=configs['train_batch_size'], shuffle=True) valid_loader = torch.utils.data.DataLoader( datasets.MNIST(str(lab.get_data_path()), train=False, download=True, transform=data_transform), batch_size=configs['valid_batch_size'], shuffle=False) model = Net().to(device) optimizer = optim.Adam(model.parameters(), lr=configs['learning_rate']) torch.manual_seed(configs['seed']) # ✨ Create the experiment experiment.create(name='mnist_labml_monit') # ✨ Save configurations experiment.configs(configs) # ✨ Set PyTorch models for checkpoint saving and loading experiment.add_pytorch_models(dict(model=model)) # ✨ Start and monitor the experiment with experiment.start(): for _ in monit.loop(range(1, configs['epochs'] + 1)): for mode, batch in monit.mix(10, ('train', train_loader), ('valid', valid_loader)): with tracker.namespace(mode): with torch.set_grad_enabled(mode == 'train'): data, target = batch[0].to(device), batch[1].to(device) output = model(data) loss = F.cross_entropy(output, target) pred = output.argmax(dim=1, keepdim=True) if mode == 'train': optimizer.zero_grad() loss.backward() optimizer.step() tracker.add_global_step(data.shape[0]) tracker.save({ 'loss.': loss, 'accuracy.': pred.eq(target.view_as(pred)).sum() / pred.shape[0] }) tracker.new_line() # save the model experiment.save_checkpoint()