def _train(self): for i, (images, _) in monit.enum("train", self.train_loader): targets_real = torch.empty(images.size(0), 1, device=self.device).uniform_(0.8, 1.0) targets_fake = torch.empty(images.size(0), 1, device=self.device).uniform_(0.0, 0.2) images = images.to(self.device) self.optimizer_D.zero_grad() logits_real = self.discriminator(images) fake_images = self.generator( noise(self.device, self.batch_size, self.noise_dim)).detach() logits_fake = self.discriminator(fake_images) discriminator_loss = DLoss(logits_real, logits_fake, targets_real, targets_fake) discriminator_loss.backward() self.optimizer_D.step() self.optimizer_G.zero_grad() fake_images = self.generator( noise(self.device, self.batch_size, self.noise_dim)) logits_fake = self.discriminator(fake_images) generator_loss = GLoss(logits_fake, targets_real) generator_loss.backward() self.optimizer_G.step() tracker.add(G_Loss=generator_loss.item()) tracker.add(D_Loss=discriminator_loss.item()) tracker.add_global_step() for j in range(1, 10): img = fake_images[j].squeeze() tracker.add('generated', img)
def step(self, batch: any, batch_idx: BatchIndex): data, target = batch[0].to(self.device), batch[1].to(self.device) if self.mode.is_train: tracker.add_global_step(target.shape[0] * target.shape[1]) with self.mode.update(is_log_activations=batch_idx.is_last): state = self.state.get() output, new_state = self.model(data, state) state = self.state_updater(state, new_state) self.state.set(state) loss = self.loss_func(output, target) tracker.add("loss.", loss) self.accuracy(output, target) self.accuracy.track() if self.mode.is_train: loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=self.grad_norm_clip) self.optimizer.step() if batch_idx.is_last: tracker.add('model', self.model) self.optimizer.zero_grad() tracker.save()
def step(self, batch: any, batch_idx: BatchIndex): # Get the batch data, target = batch[0].to(self.device), batch[1].to(self.device) # Add global step if we are in training mode if self.mode.is_train: tracker.add_global_step(len(data)) # Run the model and specify whether to log the activations with self.mode.update(is_log_activations=batch_idx.is_last): output = self.model(data) # Calculate the loss loss = self.loss_func(output, target) # Calculate the accuracy self.accuracy_func(output, target) # Log the loss tracker.add("loss.", loss) # Optimize if we are in training mode if self.mode.is_train: # Calculate the gradients loss.backward() # Take optimizer step self.optimizer.step() # Log the parameter and gradient L2 norms once per epoch if batch_idx.is_last: tracker.add('model', self.model) tracker.add('optimizer', (self.optimizer, {'model': self.model})) # Clear the gradients self.optimizer.zero_grad() # Save logs tracker.save()
def step(self, batch: Any, batch_idx: BatchIndex): self.model.train(self.mode.is_train) data, target = batch[0].to(self.device), batch[1].to(self.device) if self.mode.is_train: tracker.add_global_step(len(data)) is_log_activations = batch_idx.is_interval( self.log_activations_batches) with monit.section("model"): with self.mode.update(is_log_activations=is_log_activations): output = self.model(data) loss = self.loss_func(output, target) tracker.add("loss.", loss) if self.mode.is_train: with monit.section('backward'): loss.backward() if batch_idx.is_interval(self.update_batches): with monit.section('optimize'): self.optimizer.step() if batch_idx.is_interval(self.log_params_updates): tracker.add('model', self.model) self.optimizer.zero_grad() if batch_idx.is_interval(self.log_save_batches): tracker.save()
def iterate(self): device = get_device(self.model) correct_sum = 0 total_samples = 0 for i, (data, target) in monit.enum(self.name, self.data_loader): data, target = data.to(device), target.to(device) if self.optimizer is not None: self.optimizer.zero_grad() output = self.model(data) loss = self.loss_func(output, target) correct_sum += self.accuracy_func(output, target) total_samples += len(target) tracker.add(".loss", loss) if self.optimizer is not None: loss.backward() self.optimizer.step() if self.is_increment_global_step: tracker.add_global_step(len(target)) if self.log_interval is not None and (i + 1) % self.log_interval == 0: tracker.save() tracker.add(".accuracy", correct_sum / total_samples)
def __call__(self): """ ### Train the model for an epoch """ # Iterate through training data for i, (src, tgt, neighbors) in monit.enum('Train', self.dataloader): # Move data to the device src, tgt, neighbors = src.to(self.device), tgt.to( self.device), neighbors.to(self.device) # Forward pass res = self.model(src, neighbors) # Calculate loss loss = self.loss_func(res.view(-1, res.shape[-1]), tgt.view(-1)) # Clear the gradients self.optimizer.zero_grad() # Backward pass loss.backward() # Optimize the model self.optimizer.step() # Save training statistics and increment the global step counter tracker.save({'loss.train': loss}) tracker.add_global_step(len(src))
def step(self, batch: Any, batch_idx: BatchIndex): """ This method is called for each batch """ self.model.train(self.mode.is_train) # Get data and target labels data, target = batch[0].to(self.model.device), batch[1].to(self.model.device) if self.mode.is_train: tracker.add_global_step(data.shape[0] * data.shape[1]) # Run the model output = self.model(data) # Calculate loss loss = self.loss_func(output, target) # Calculate accuracy self.accuracy(output, target) # Log the loss tracker.add("loss.", loss) # If we are in training mode, calculate the gradients if self.mode.is_train: loss.backward() self.optimizer.step() if batch_idx.is_last: tracker.add('model', self.model) self.optimizer.zero_grad() tracker.save()
def step(self, batch: Any, batch_idx: BatchIndex): self.model.train(self.mode.is_train) data, target = batch['data'].to(self.device), batch['target'].to( self.device) target = (target - self.model.y_mean) / self.model.y_std if self.mode.is_train: tracker.add_global_step(len(data)) output = self.model(data) loss = self.loss_func(output, target) tracker.add("loss.", loss) if self.mode.is_train: loss.backward() if batch_idx.is_last: tracker.add('model', self.model) self.optimizer.step() self.optimizer.zero_grad() if not self.mode.is_train: self.output_collector(output * self.model.y_std + self.model.y_mean) tracker.save()
def train(model, optimizer, train_loader, device, train_log_interval): """This is the training code""" model.train() for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) output = model(data) loss = F.cross_entropy(output, target) optimizer.zero_grad() loss.backward() if batch_idx == 0: tracker.add('model', model) optimizer.step() # **✨ Increment the global step** tracker.add_global_step() # **✨ Store stats in the tracker** tracker.save({'loss.train': loss}) # if batch_idx % train_log_interval == 0: # **✨ Save added stats** tracker.save()
def step(self, batch: Any, batch_idx: BatchIndex): self.encoder.train(self.mode.is_train) self.decoder.train(self.mode.is_train) # Move `data` and `mask` to device and swap the sequence and batch dimensions. # `data` will have shape `[seq_len, batch_size, 5]` and # `mask` will have shape `[seq_len, batch_size]`. data = batch[0].to(self.device).transpose(0, 1) mask = batch[1].to(self.device).transpose(0, 1) # Increment step in training mode if self.mode.is_train: tracker.add_global_step(len(data)) # Encode the sequence of strokes with monit.section("encoder"): # Get $z$, $\mu$, and $\hat{\sigma}$ z, mu, sigma_hat = self.encoder(data) # Decode the mixture of distributions and $\hat{q}$ with monit.section("decoder"): # Concatenate $[(\Delta x, \Delta y, p_1, p_2, p_3); z]$ z_stack = z.unsqueeze(0).expand(data.shape[0] - 1, -1, -1) inputs = torch.cat([data[:-1], z_stack], 2) # Get mixture of distributions and $\hat{q}$ dist, q_logits, _ = self.decoder(inputs, z, None) # Compute the loss with monit.section('loss'): # $L_{KL}$ kl_loss = self.kl_div_loss(sigma_hat, mu) # $L_R$ reconstruction_loss = self.reconstruction_loss(mask, data[1:], dist, q_logits) # $Loss = L_R + w_{KL} L_{KL}$ loss = reconstruction_loss + self.kl_div_loss_weight * kl_loss # Track losses tracker.add("loss.kl.", kl_loss) tracker.add("loss.reconstruction.", reconstruction_loss) tracker.add("loss.total.", loss) # Only if we are in training state if self.mode.is_train: # Run optimizer with monit.section('optimize'): # Set `grad` to zero self.optimizer.zero_grad() # Compute gradients loss.backward() # Log model parameters and gradients if batch_idx.is_last: tracker.add(encoder=self.encoder, decoder=self.decoder) # Clip gradients nn.utils.clip_grad_norm_(self.encoder.parameters(), self.grad_clip) nn.utils.clip_grad_norm_(self.decoder.parameters(), self.grad_clip) # Optimize self.optimizer.step() tracker.save()
def main(): experiment.create(name='Test') with experiment.start(): for i in range(1, 401): tracker.add_global_step() time.sleep(1) tracker.save(loss=1.)
def log_metrics(self, metrics: Dict[str, Union[torch.Tensor, float]], step: Optional[int] = None) -> None: if step is None: tracker.add_global_step() tracker.save(metrics) else: tracker.save(step, metrics)
def on_train_batch_end(self, batch, logs=None): if logs is None: logs = {} tracker.add_global_step() if 'size' in logs: del logs['size'] if 'batch' in logs: del logs['batch'] tracker.add(logs) if batch % self.save_batch_frequency == 0: tracker.save()
def step(self, batch: Any, batch_idx: BatchIndex): self.generator.train(self.mode.is_train) self.discriminator.train(self.mode.is_train) data, target = batch[0].to(self.device), batch[1].to(self.device) # Increment step in training mode if self.mode.is_train: tracker.add_global_step(len(data)) # Train the discriminator with monit.section("discriminator"): for _ in range(self.discriminator_k): latent = torch.randn(data.shape[0], 100, device=self.device) logits_true = self.discriminator(data) logits_false = self.discriminator(self.generator(latent).detach()) loss_true, loss_false = self.discriminator_loss(logits_true, logits_false) loss = loss_true + loss_false # Log stuff tracker.add("loss.discriminator.true.", loss_true) tracker.add("loss.discriminator.false.", loss_false) tracker.add("loss.discriminator.", loss) # Train if self.mode.is_train: self.discriminator_optimizer.zero_grad() loss.backward() if batch_idx.is_last: tracker.add('discriminator', self.discriminator) self.discriminator_optimizer.step() # Train the generator with monit.section("generator"): latent = torch.randn(data.shape[0], 100, device=self.device) generated_images = self.generator(latent) logits = self.discriminator(generated_images) loss = self.generator_loss(logits) # Log stuff tracker.add('generated', generated_images[0:5]) tracker.add("loss.generator.", loss) # Train if self.mode.is_train: self.generator_optimizer.zero_grad() loss.backward() if batch_idx.is_last: tracker.add('generator', self.generator) self.generator_optimizer.step() tracker.save()
def train(self): """ ### Train the model """ # Loop for the given number of epochs for _ in monit.loop(self.epochs): # Iterate over the minibatches for i, batch in monit.enum('Train', self.dataloader): # Move data to the device data, target = batch[0].to(self.device), batch[1].to( self.device) # Set tracker step, as the number of characters trained on tracker.add_global_step(data.shape[0] * data.shape[1]) # Set model state to training self.model.train() # Evaluate the model output = self.model(data) # Calculate loss loss = self.loss_func(output.view(-1, output.shape[-1]), target.view(-1)) # Log the loss tracker.add("loss.train", loss) # Calculate gradients loss.backward() # Clip gradients torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=self.grad_norm_clip) # Take optimizer step self.optimizer.step() # Log the model parameters and gradients if (i + 1) % 100 == 0: tracker.add('model', self.model) # Clear the gradients self.optimizer.zero_grad() # Generate a sample if (i + 1) % 100 == 0: self.model.eval() with torch.no_grad(): self.sample() # Save the tracked metrics if (i + 1) % 10 == 0: tracker.save() # Save the model experiment.save_checkpoint()
def main(): # Reset global step because we incremented in previous loop tracker.set_global_step(0) for i in range(1, 401): tracker.add_global_step() loss = train() tracker.add(loss=loss) if i % 10 == 0: tracker.save() if i % 100 == 0: logger.log() time.sleep(0.02)
def step(self, batch: any, batch_idx: BatchIndex): """ ### Training or validation step """ # Set training/eval mode self.model.train(self.mode.is_train) # Move data to the device data, target = batch[0].to(self.device), batch[1].to(self.device) # Update global step (number of tokens processed) when in training mode if self.mode.is_train: tracker.add_global_step(data.shape[0] * data.shape[1]) # Whether to capture model outputs with self.mode.update(is_log_activations=batch_idx.is_last and self.is_log_model_activations): # Get model outputs. # It's returning a tuple for states when using RNNs. # This is not implemented yet. 😜 output, *_ = self.model(data) # Calculate and log loss loss = self.loss_func(output, target) tracker.add("loss.", loss) # Calculate and log accuracy self.accuracy(output, target) self.accuracy.track() self.other_metrics(output, target) # Train the model if self.mode.is_train: # Calculate gradients loss.backward() # Clip gradients torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=self.grad_norm_clip) # Take optimizer step self.optimizer.step() # Log the model parameters and gradients on last batch of every epoch if batch_idx.is_last and self.is_log_model_params_grads: tracker.add('model', self.model) # Clear the gradients self.optimizer.zero_grad() # Save the tracked metrics tracker.save()
def after_batch(self): tracker.add_global_step() if self.training: metrics = {'loss.train': self.learn.loss} else: metrics = {'loss.valid': self.learn.loss} try: for m in self.learn.metrics: if m.value is not None: metrics[m.name] = m.value except: pass tracker.save(metrics)
def repeat_values(): conf = {'batch_size': 20} with experiment.record(name='sample', exp_conf=conf, writers={'web_api', 'screen'}): for i in range(10): tracker.add_global_step(1) tracker.save('loss', 1) tracker.save('loss', 5) # tracker.save() if i % 1000 == 0: tracker.new_line()
def iterate(self): stats = self.batch_step.init_stats() for i, batch in monit.enum(self.name, self.data_loader): update = self.batch_step.process(batch) self.batch_step.update_stats(stats, update) if self.is_increment_global_step: tracker.add_global_step(update['samples']) if self.log_interval is not None and (i + 1) % self.log_interval == 0: tracker.save() self.batch_step.log_stats(stats)
def step(self, batch: any, batch_idx: BatchIndex): """ ### Training/validation step """ # Move data to the device data, target = batch[0].to(self.device), batch[1].to(self.device) # Update global step (number of tokens processed) when in training mode if self.mode.is_train: tracker.add_global_step(data.shape[0] * data.shape[1]) # Whether to capture model outputs with self.mode.update(is_log_activations=batch_idx.is_last): # Get memories mem = self.memory.get() # Run the model output, new_mem = self.model(data, mem) # Merge memory mem = self.merge_memory(mem, new_mem) # Update memories self.memory.set(mem) # Calculate and log cross entropy loss loss = self.loss_func(output, target) tracker.add("loss.", loss) # Calculate and log accuracy self.accuracy(output, target) self.accuracy.track() # Train the model if self.mode.is_train: # Calculate gradients loss.backward() # Clip gradients torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=self.grad_norm_clip) # Take optimizer step self.optimizer.step() # Log the model parameters and gradients on last batch of every epoch if batch_idx.is_last: tracker.add('model', self.model) # Clear the gradients self.optimizer.zero_grad() # Save the tracked metrics tracker.save()
def train(self): self.model.train() for i, (data, target) in monit.enum("Train", self.train_loader): data, target = data.to(self.device), target.to(self.device) self.optimizer.zero_grad() output = self.model(data) loss = F.cross_entropy(output, target) loss.backward() self.optimizer.step() tracker.add({'train.loss': loss}) tracker.add_global_step() if i % self.train_log_interval == 0: tracker.save()
def train_epoch(self, model: nn.Module, data_loader: DataLoader, name: str): """ Train/Validate for an epoch """ model.train(name == 'train') correct_predictions = 0 total = 0 total_loss = 0 with torch.set_grad_enabled(name == 'train'): for i, data in monit.enum(name, data_loader): input_ids = data["input_ids"].to(self.device) attention_mask = data["attention_mask"].to(self.device) targets = data["targets"].to(self.device) outputs = model(input_ids=input_ids, attention_mask=attention_mask) _, preds = torch.max(outputs, dim=1) loss = self.loss_fn(outputs, targets) total_loss += loss.item() * len(preds) correct_predictions += torch.sum(preds == targets).item() total += len(preds) tracker.add('loss.', loss) if name == 'train': tracker.add_global_step(len(preds)) loss.backward() nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0) self.optimizer.step() self.optimizer.zero_grad() if (i + 1) % 10 == 0: tracker.save() tracker.save('accuracy.', correct_predictions / total) mlflow.log_metric(f"{name}_acc", float(correct_predictions / total), step=tracker.get_global_step()) mlflow.log_metric(f"{name}_loss", float(total_loss / total), step=tracker.get_global_step()) return correct_predictions / total, total_loss / total
def step(self, batch: Any, batch_idx: BatchIndex): """ This method gets called by the trainer for each batch """ # Set the model mode self.model.train(self.mode.is_train) # Get the input and labels and move them to the model's device data, target = batch[0].to(self.device), batch[1].to(self.device) # Increment step in training mode if self.mode.is_train: tracker.add_global_step(len(data)) # Run the model p, y_hat, p_sampled, y_hat_sampled = self.model(data) # Calculate the reconstruction loss loss_rec = self.loss_rec(p, y_hat, target.to(torch.float)) tracker.add("loss.", loss_rec) # Calculate the regularization loss loss_reg = self.loss_reg(p) tracker.add("loss_reg.", loss_reg) # $L = L_{Rec} + \beta L_{Reg}$ loss = loss_rec + self.beta * loss_reg # Calculate the expected number of steps taken steps = torch.arange(1, p.shape[0] + 1, device=p.device) expected_steps = (p * steps[:, None]).sum(dim=0) tracker.add("steps.", expected_steps) # Call accuracy metric self.accuracy(y_hat_sampled > 0, target) if self.mode.is_train: # Compute gradients loss.backward() # Clip gradients torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=self.grad_norm_clip) # Optimizer self.optimizer.step() # Clear gradients self.optimizer.zero_grad() # tracker.save()
def step(self, batch: Any, batch_idx: BatchIndex): """ Take a training step """ # Set model states self.generator.train(self.mode.is_train) self.discriminator.train(self.mode.is_train) # Get MNIST images data = batch[0].to(self.device) # Increment step in training mode if self.mode.is_train: tracker.add_global_step(len(data)) # Train the discriminator with monit.section("discriminator"): # Get discriminator loss loss = self.calc_discriminator_loss(data) # Train if self.mode.is_train: self.discriminator_optimizer.zero_grad() loss.backward() if batch_idx.is_last: tracker.add('discriminator', self.discriminator) self.discriminator_optimizer.step() # Train the generator once in every `discriminator_k` if batch_idx.is_interval(self.discriminator_k): with monit.section("generator"): loss = self.calc_generator_loss(data.shape[0]) # Train if self.mode.is_train: self.generator_optimizer.zero_grad() loss.backward() if batch_idx.is_last: tracker.add('generator', self.generator) self.generator_optimizer.step() tracker.save()
def _train(self): self.model.train() for i, (data, target) in monit.enum("Train", self.train_loader): data, target = data.to(self.device), target.to(self.device) self.optimizer.zero_grad() output = self.model(data) loss = F.nll_loss(output, target) loss.backward() self.optimizer.step() # Add training loss to the logger. # The logger will queue the values and output the mean tracker.add({'train.loss': loss}) tracker.add_global_step() # Print output to the console if i % self.log_interval == 0: # Output the indicators tracker.save()
def step(self, batch: any, batch_idx: BatchIndex): """ ### Training or validation step """ # Training/Evaluation mode self.model.train(self.mode.is_train) # Move data to the device data, target = batch[0].to(self.device), batch[1].to(self.device) # Update global step (number of samples processed) when in training mode if self.mode.is_train: tracker.add_global_step(len(data)) # Whether to capture model outputs with self.mode.update(is_log_activations=batch_idx.is_last): # Get model outputs. output = self.model(data) # Calculate and log loss loss = self.loss_func(output, target) tracker.add("loss.", loss) # Calculate and log accuracy self.accuracy(output, target) self.accuracy.track() # Train the model if self.mode.is_train: # Calculate gradients loss.backward() # Take optimizer step self.optimizer.step() # Log the model parameters and gradients on last batch of every epoch if batch_idx.is_last: tracker.add('model', self.model) # Clear the gradients self.optimizer.zero_grad() # Save the tracked metrics tracker.save()
def train(self): self.model.train() for batch_idx, (data, target) in enumerate(self.train_loader): data, target = data.to(self.device), target.to(self.device) self.optimizer.zero_grad() output = self.model(data) loss = F.cross_entropy(output, target) loss.backward() self.optimizer.step() # **✨ Increment the global step** tracker.add_global_step() # **✨ Store stats in the tracker** tracker.save({'loss.train': loss}) # if batch_idx % self.train_log_interval == 0: # **✨ Save added stats** tracker.save()
def train(model, optimizer, train_loader, device, train_log_interval): model.train() for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = F.cross_entropy(output, target) loss.backward() optimizer.step() tracker.add( {'train_loss': loss}) # overloading methods # tracker.add(train_loss = loss) # tracker.add('train_loss', loss) tracker.add_global_step() if batch_idx % train_log_interval == 0: tracker.save()
def _train(self): for i, (input_tensor, target_tensor) in monit.enum("train", self.train_loader): encoder_hidden = self.encoder.init_hidden(self.device).double().to( self.device) input_tensor = input_tensor.to(self.device).unsqueeze(1) target_tensor = target_tensor.to(self.device).double() self.optimizer.zero_grad() encoder_output, encoder_hidden = self.encoder( input_tensor, encoder_hidden) train_loss = self.loss(encoder_output, target_tensor) train_loss.backward() self.optimizer.step() tracker.add(loss=train_loss.item()) tracker.add_global_step() tracker.save()