def step(self, batch: any, batch_idx: BatchIndex): data, target = batch[0].to(self.device), batch[1].to(self.device) if self.mode.is_train: tracker.add_global_step(target.shape[0] * target.shape[1]) with self.mode.update(is_log_activations=batch_idx.is_last): state = self.state.get() output, new_state = self.model(data, state) state = self.state_updater(state, new_state) self.state.set(state) loss = self.loss_func(output, target) tracker.add("loss.", loss) self.accuracy(output, target) self.accuracy.track() if self.mode.is_train: loss.backward() torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=self.grad_norm_clip) self.optimizer.step() if batch_idx.is_last: tracker.add('model', self.model) self.optimizer.zero_grad() tracker.save()
def step(self, batch: any, batch_idx: BatchIndex): # Get the batch data, target = batch[0].to(self.device), batch[1].to(self.device) # Add global step if we are in training mode if self.mode.is_train: tracker.add_global_step(len(data)) # Run the model and specify whether to log the activations with self.mode.update(is_log_activations=batch_idx.is_last): output = self.model(data) # Calculate the loss loss = self.loss_func(output, target) # Calculate the accuracy self.accuracy_func(output, target) # Log the loss tracker.add("loss.", loss) # Optimize if we are in training mode if self.mode.is_train: # Calculate the gradients loss.backward() # Take optimizer step self.optimizer.step() # Log the parameter and gradient L2 norms once per epoch if batch_idx.is_last: tracker.add('model', self.model) tracker.add('optimizer', (self.optimizer, {'model': self.model})) # Clear the gradients self.optimizer.zero_grad() # Save logs tracker.save()
def step(self, batch: Any, batch_idx: BatchIndex): """ This method is called for each batch """ self.model.train(self.mode.is_train) # Get data and target labels data, target = batch[0].to(self.model.device), batch[1].to(self.model.device) if self.mode.is_train: tracker.add_global_step(data.shape[0] * data.shape[1]) # Run the model output = self.model(data) # Calculate loss loss = self.loss_func(output, target) # Calculate accuracy self.accuracy(output, target) # Log the loss tracker.add("loss.", loss) # If we are in training mode, calculate the gradients if self.mode.is_train: loss.backward() self.optimizer.step() if batch_idx.is_last: tracker.add('model', self.model) self.optimizer.zero_grad() tracker.save()
def __next__(self): if self.__signal_received is not None: logger.log('\nKilling Loop.', Text.danger) monit.finish_loop() self.__finish() raise StopIteration("SIGINT") try: global_step = next(self.__loop) except StopIteration as e: self.__finish() raise e tracker.set_global_step(global_step) if global_step - self.__last_write_step >= self.__log_write_interval: tracker.save() self.__last_write_step = global_step if global_step - self.__last_new_line_step >= self.__log_new_line_interval: tracker.new_line() self.__last_new_line_step = global_step # if self.is_interval(self.__log_write_interval, global_step): # tracker.save() # if self.is_interval(self.__log_new_line_interval, global_step): # logger.log() # if (self.__is_save_models and # self.is_interval(self.__save_models_interval, global_step)): # experiment.save_checkpoint() if (self.__is_save_models and global_step - self.__last_save_step >= self.__save_models_interval): experiment.save_checkpoint() self.__last_save_step = global_step return global_step
def run_training_loop(self): """ ### Run training loop """ # last 100 episode information tracker.set_queue('reward', 100, True) tracker.set_queue('length', 100, True) for update in monit.loop(self.updates): progress = update / self.updates # decreasing `learning_rate` and `clip_range` $\epsilon$ learning_rate = 2.5e-4 * (1 - progress) clip_range = 0.1 * (1 - progress) # sample with current policy samples = self.sample() # train the model self.train(samples, learning_rate, clip_range) # write summary info to the writer, and log to the screen tracker.save() if (update + 1) % 1_000 == 0: logger.log()
def run_training_loop(self): """### Run training loop""" offset = tracker.get_global_step() if offset > 100: # If resumed, sample several iterations first to reduce sampling bias for i in range(16): self.sample(False) for _ in monit.loop(self.c.updates - offset): update = tracker.get_global_step() progress = update / self.c.updates # sample with current policy samples = self.sample() # train the model self.train(samples) # write summary info to the writer, and log to the screen tracker.save() if (update + 1) % 2 == 0: self.set_optim(self.c.lr(), self.c.reg_l2()) self.set_game_param(self.c.right_gain(), self.c.fix_prob(), self.c.neg_mul(), self.c.step_reward()) self.set_weight_param(self.c.entropy_weight(), self.c.prob_reg_weight(), self.c.target_prob_weight(), self.c.gamma(), self.c.lamda()) if (update + 1) % 25 == 0: logger.log() if (update + 1) % 200 == 0: experiment.save_checkpoint()
def __call__(self): """ ### Train the model for an epoch """ # Iterate through training data for i, (src, tgt, neighbors) in monit.enum('Train', self.dataloader): # Move data to the device src, tgt, neighbors = src.to(self.device), tgt.to( self.device), neighbors.to(self.device) # Forward pass res = self.model(src, neighbors) # Calculate loss loss = self.loss_func(res.view(-1, res.shape[-1]), tgt.view(-1)) # Clear the gradients self.optimizer.zero_grad() # Backward pass loss.backward() # Optimize the model self.optimizer.step() # Save training statistics and increment the global step counter tracker.save({'loss.train': loss}) tracker.add_global_step(len(src))
def iterate(self): device = get_device(self.model) correct_sum = 0 total_samples = 0 for i, (data, target) in monit.enum(self.name, self.data_loader): data, target = data.to(device), target.to(device) if self.optimizer is not None: self.optimizer.zero_grad() output = self.model(data) loss = self.loss_func(output, target) correct_sum += self.accuracy_func(output, target) total_samples += len(target) tracker.add(".loss", loss) if self.optimizer is not None: loss.backward() self.optimizer.step() if self.is_increment_global_step: tracker.add_global_step(len(target)) if self.log_interval is not None and (i + 1) % self.log_interval == 0: tracker.save() tracker.add(".accuracy", correct_sum / total_samples)
def run(self): """ ### Training loop We do full batch training since the dataset is small. If we were to sample and train we will have to sample a set of nodes for each training step along with the edges that span across those selected nodes. """ # Move the feature vectors to the device features = self.dataset.features.to(self.device) # Move the labels to the device labels = self.dataset.labels.to(self.device) # Move the adjacency matrix to the device edges_adj = self.dataset.adj_mat.to(self.device) # Add an empty third dimension for the heads edges_adj = edges_adj.unsqueeze(-1) # Random indexes idx_rand = torch.randperm(len(labels)) # Nodes for training idx_train = idx_rand[:self.training_samples] # Nodes for validation idx_valid = idx_rand[self.training_samples:] # Training loop for epoch in monit.loop(self.epochs): # Set the model to training mode self.model.train() # Make all the gradients zero self.optimizer.zero_grad() # Evaluate the model output = self.model(features, edges_adj) # Get the loss for training nodes loss = self.loss_func(output[idx_train], labels[idx_train]) # Calculate gradients loss.backward() # Take optimization step self.optimizer.step() # Log the loss tracker.add('loss.train', loss) # Log the accuracy tracker.add('accuracy.train', accuracy(output[idx_train], labels[idx_train])) # Set mode to evaluation mode for validation self.model.eval() # No need to compute gradients with torch.no_grad(): # Evaluate the model again output = self.model(features, edges_adj) # Calculate the loss for validation nodes loss = self.loss_func(output[idx_valid], labels[idx_valid]) # Log the loss tracker.add('loss.valid', loss) # Log the accuracy tracker.add('accuracy.valid', accuracy(output[idx_valid], labels[idx_valid])) # Save logs tracker.save()
def run(self): pytorch_utils.add_model_indicators(self.policy) for epoch, (game, arrange) in enumerate(self.games): board = Board(arrange) # TODO change this state = board.get_current_board() for iteration in count(): logger.log('epoch : {}, iteration : {}'.format(epoch, iteration), Color.cyan) action = self.get_action(state) next_state, reward, done = self.step(board, action.item()) if done: next_state = None self.memory.push(state, action, next_state, reward) state = next_state self.train() if done: tracker.add(iterations=iteration) tracker.save() break if epoch % self.target_update == 0: self.target.load_state_dict(self.policy.state_dict()) if self.is_log_parameters: pytorch_utils.store_model_indicators(self.policy)
def train(model, optimizer, train_loader, device, train_log_interval): """This is the training code""" model.train() for batch_idx, (data, target) in enumerate(train_loader): data, target = data.to(device), target.to(device) output = model(data) loss = F.cross_entropy(output, target) optimizer.zero_grad() loss.backward() if batch_idx == 0: tracker.add('model', model) optimizer.step() # **✨ Increment the global step** tracker.add_global_step() # **✨ Store stats in the tracker** tracker.save({'loss.train': loss}) # if batch_idx % train_log_interval == 0: # **✨ Save added stats** tracker.save()
def step(self, batch: Any, batch_idx: BatchIndex): self.model.train(self.mode.is_train) data, target = batch[0].to(self.device), batch[1].to(self.device) if self.mode.is_train: tracker.add_global_step(len(data)) is_log_activations = batch_idx.is_interval( self.log_activations_batches) with monit.section("model"): with self.mode.update(is_log_activations=is_log_activations): output = self.model(data) loss = self.loss_func(output, target) tracker.add("loss.", loss) if self.mode.is_train: with monit.section('backward'): loss.backward() if batch_idx.is_interval(self.update_batches): with monit.section('optimize'): self.optimizer.step() if batch_idx.is_interval(self.log_params_updates): tracker.add('model', self.model) self.optimizer.zero_grad() if batch_idx.is_interval(self.log_save_batches): tracker.save()
def step(self, batch: Any, batch_idx: BatchIndex): self.model.train(self.mode.is_train) data, target = batch['data'].to(self.device), batch['target'].to( self.device) target = (target - self.model.y_mean) / self.model.y_std if self.mode.is_train: tracker.add_global_step(len(data)) output = self.model(data) loss = self.loss_func(output, target) tracker.add("loss.", loss) if self.mode.is_train: loss.backward() if batch_idx.is_last: tracker.add('model', self.model) self.optimizer.step() self.optimizer.zero_grad() if not self.mode.is_train: self.output_collector(output * self.model.y_std + self.model.y_mean) tracker.save()
def save_artifacts(self): tracker.save({ 'target.': self.y, 'ref.': self.reference, 'strike_low.': self.strike_low, 'strike_high.': self.strike_high })
def main(): conf = {'batch_size': 20} with experiment.record(name='sample', exp_conf=conf, writers={'web_api', 'screen'}): for i in range(10_000): values = {'loss': random()} # if i > 1000: # raise RuntimeError('Testing error') # for j in range(0, 100): # values[f'grad.fc.{j}.l1'] = random() # values[f'grad.fc.{j}.l2'] = random() # values[f'grad.fc.{j}.mean'] = random() # # # values[f'param.fc.{j}.l1'] = random() # # values[f'param.fc.{j}.l2'] = random() # # values[f'param.fc.{j}.mean'] = random() # # # # values[f'module.fc.{j}.l1'] = random() # # values[f'module.fc.{j}.l2'] = random() # # values[f'module.fc.{j}.mean'] = random() # # # # values[f'time.fc.{j}.l1'] = random() # # values[f'time.fc.{j}.l2'] = random() # # values[f'time.fc.{j}.mean'] = random() tracker.save(i, values) if i % 1000 == 0: tracker.new_line()
def _test(self): self.encoder.eval() with torch.no_grad(): macro_f1s = [] test_losses = [] for input_tensor, target_tensor in monit.iterate( "test", self.test_loader): encoder_hidden = self.encoder.init_hidden( self.device).double().to(self.device) input_tensor = input_tensor.to(self.device).unsqueeze(1) target_tensor = target_tensor.to(self.device).double() encoder_output, encoder_hidden = self.encoder( input_tensor, encoder_hidden) test_loss = self.loss(encoder_output, target_tensor) macro_f1 = f1_score( y_true=target_tensor.cpu().detach().numpy().ravel(), y_pred=encoder_output.cpu().detach().to( torch.int32).numpy().ravel(), average='macro') test_losses.append(test_loss) macro_f1s.append(macro_f1) tracker.save(test_loss=np.mean(test_losses), accuracy=np.mean(macro_f1s))
def solve(self): for t in monit.loop(self.epochs): if not self.is_online_update: for I in self.info_sets.values(): I.clear() for i in range(self.n_players): self.cfr(self.create_new_history(), cast(Player, i), [1 for _ in range(self.n_players)]) if not self.is_online_update: self.update() with monit.section("Track"): for I in self.info_sets.values(): for a in I.actions(): tracker.add({ f'strategy.{I.key}.{a}': I.strategy[a], f'average_strategy.{I.key}.{a}': I.average_strategy[a], f'regret.{I.key}.{a}': I.regret[a], f'current_regret.{I.key}.{a}': I.current_regret[a] }) if t % self.track_frequency == 0: tracker.save() logger.log() if (t + 1) % self.save_frequency == 0: experiment.save_checkpoint() logger.inspect(self.info_sets)
def step(self, batch: Any, batch_idx: BatchIndex): self.encoder.train(self.mode.is_train) self.decoder.train(self.mode.is_train) # Move `data` and `mask` to device and swap the sequence and batch dimensions. # `data` will have shape `[seq_len, batch_size, 5]` and # `mask` will have shape `[seq_len, batch_size]`. data = batch[0].to(self.device).transpose(0, 1) mask = batch[1].to(self.device).transpose(0, 1) # Increment step in training mode if self.mode.is_train: tracker.add_global_step(len(data)) # Encode the sequence of strokes with monit.section("encoder"): # Get $z$, $\mu$, and $\hat{\sigma}$ z, mu, sigma_hat = self.encoder(data) # Decode the mixture of distributions and $\hat{q}$ with monit.section("decoder"): # Concatenate $[(\Delta x, \Delta y, p_1, p_2, p_3); z]$ z_stack = z.unsqueeze(0).expand(data.shape[0] - 1, -1, -1) inputs = torch.cat([data[:-1], z_stack], 2) # Get mixture of distributions and $\hat{q}$ dist, q_logits, _ = self.decoder(inputs, z, None) # Compute the loss with monit.section('loss'): # $L_{KL}$ kl_loss = self.kl_div_loss(sigma_hat, mu) # $L_R$ reconstruction_loss = self.reconstruction_loss(mask, data[1:], dist, q_logits) # $Loss = L_R + w_{KL} L_{KL}$ loss = reconstruction_loss + self.kl_div_loss_weight * kl_loss # Track losses tracker.add("loss.kl.", kl_loss) tracker.add("loss.reconstruction.", reconstruction_loss) tracker.add("loss.total.", loss) # Only if we are in training state if self.mode.is_train: # Run optimizer with monit.section('optimize'): # Set `grad` to zero self.optimizer.zero_grad() # Compute gradients loss.backward() # Log model parameters and gradients if batch_idx.is_last: tracker.add(encoder=self.encoder, decoder=self.decoder) # Clip gradients nn.utils.clip_grad_norm_(self.encoder.parameters(), self.grad_clip) nn.utils.clip_grad_norm_(self.decoder.parameters(), self.grad_clip) # Optimize self.optimizer.step() tracker.save()
def main(): experiment.create(name='test_schedule', writers={'screen', 'web_api'}) lr = DynamicSchedule(0.01, (0, 1)) experiment.configs({'lr': lr}) with experiment.start(): for epoch in monit.loop(100): tracker.save('hp.lr', lr()) time.sleep(1)
def log_metrics(self, metrics: Dict[str, Union[torch.Tensor, float]], step: Optional[int] = None) -> None: if step is None: tracker.add_global_step() tracker.save(metrics) else: tracker.save(step, metrics)
def main(): experiment.create(name='Test') with experiment.start(): for i in range(1, 401): tracker.add_global_step() time.sleep(1) tracker.save(loss=1.)
def main(): import time for _ in monit.loop(10): for n, v in monit.mix(5, ('train', range(50)), ('valid', range(10))): time.sleep(0.05) # print(n, v) tracker.save({n: v}) tracker.new_line()
def __finish(self): try: signal.signal(signal.SIGINT, self.old_handler) except ValueError: pass tracker.save() tracker.new_line() if self.__is_save_models: logger.log("Saving model...") experiment.save_checkpoint()
def setup_and_add(): for t in range(10): tracker.set_scalar(f"loss1.{t}", is_print=t == 0) experiment.start() for i in monit.loop(1000): for t in range(10): tracker.add({f'loss1.{t}': i}) tracker.save()
def main(): conf = {'batch_size': 20} for i in range(2): with experiment.record(name=f'sample_{i}', exp_conf=conf, writers={'screen'}): for epoch in range(100): tracker.save(i, loss=random()) tracker.new_line()
def main(): experiment.create(name='test_dynamic_hp', writers={'screen', 'web_api'}) lr = FloatDynamicHyperParam(0.01, (0, 1)) # experiment.configs({'lr': lr}) conf = Configs() experiment.configs(conf) lr = conf.lr with experiment.start(): for epoch in monit.loop(100): tracker.save('hp.lr', lr()) time.sleep(1)
def after_epoch(self): metrics = {} try: for m in self.learn.metrics: if m.value is not None: metrics[m.name] = m.value except: pass tracker.save(metrics) tracker.new_line()
def run_step(self): for i in range(self.inner_iterations): with tracker.namespace('sample'): self.sample() with self.mode.update(is_train=True): with tracker.namespace('train'): self.trainer() if self.validator: with tracker.namespace('valid'): self.validator() tracker.save()
def save_artifacts(self): tracker.set_tensor(".ref", is_once=True) tracker.set_tensor(".target", is_once=True) tracker.set_tensor(".strike_low", is_once=True) tracker.set_tensor(".strike_high", is_once=True) tracker.save({ '.target': self.y, '.ref': self.reference, '.strike_low': self.strike_low, '.strike_high': self.strike_high })
def on_train_batch_end(self, batch, logs=None): if logs is None: logs = {} tracker.add_global_step() if 'size' in logs: del logs['size'] if 'batch' in logs: del logs['batch'] tracker.add(logs) if batch % self.save_batch_frequency == 0: tracker.save()