示例#1
0
    def _train(self):
        for i, (images, _) in monit.enum("train", self.train_loader):
            targets_real = torch.empty(images.size(0), 1,
                                       device=self.device).uniform_(0.8, 1.0)
            targets_fake = torch.empty(images.size(0), 1,
                                       device=self.device).uniform_(0.0, 0.2)

            images = images.to(self.device)

            self.optimizer_D.zero_grad()
            logits_real = self.discriminator(images)
            fake_images = self.generator(
                noise(self.device, self.batch_size, self.noise_dim)).detach()
            logits_fake = self.discriminator(fake_images)
            discriminator_loss = DLoss(logits_real, logits_fake, targets_real,
                                       targets_fake)
            discriminator_loss.backward()
            self.optimizer_D.step()

            self.optimizer_G.zero_grad()
            fake_images = self.generator(
                noise(self.device, self.batch_size, self.noise_dim))
            logits_fake = self.discriminator(fake_images)
            generator_loss = GLoss(logits_fake, targets_real)
            generator_loss.backward()
            self.optimizer_G.step()

            tracker.add(G_Loss=generator_loss.item())
            tracker.add(D_Loss=discriminator_loss.item())
            tracker.add_global_step()

        for j in range(1, 10):
            img = fake_images[j].squeeze()
            tracker.add('generated', img)
示例#2
0
    def step(self, batch: any, batch_idx: BatchIndex):
        data, target = batch[0].to(self.device), batch[1].to(self.device)

        if self.mode.is_train:
            tracker.add_global_step(target.shape[0] * target.shape[1])

        with self.mode.update(is_log_activations=batch_idx.is_last):
            state = self.state.get()
            output, new_state = self.model(data, state)
            state = self.state_updater(state, new_state)
            self.state.set(state)

        loss = self.loss_func(output, target)
        tracker.add("loss.", loss)

        self.accuracy(output, target)
        self.accuracy.track()

        if self.mode.is_train:
            loss.backward()

            torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                           max_norm=self.grad_norm_clip)
            self.optimizer.step()
            if batch_idx.is_last:
                tracker.add('model', self.model)
            self.optimizer.zero_grad()

        tracker.save()
示例#3
0
    def step(self, batch: any, batch_idx: BatchIndex):
        # Get the batch
        data, target = batch[0].to(self.device), batch[1].to(self.device)

        # Add global step if we are in training mode
        if self.mode.is_train:
            tracker.add_global_step(len(data))

        # Run the model and specify whether to log the activations
        with self.mode.update(is_log_activations=batch_idx.is_last):
            output = self.model(data)

        # Calculate the loss
        loss = self.loss_func(output, target)
        # Calculate the accuracy
        self.accuracy_func(output, target)
        # Log the loss
        tracker.add("loss.", loss)

        # Optimize if we are in training mode
        if self.mode.is_train:
            # Calculate the gradients
            loss.backward()

            # Take optimizer step
            self.optimizer.step()
            # Log the parameter and gradient L2 norms once per epoch
            if batch_idx.is_last:
                tracker.add('model', self.model)
                tracker.add('optimizer', (self.optimizer, {'model': self.model}))
            # Clear the gradients
            self.optimizer.zero_grad()

        # Save logs
        tracker.save()
示例#4
0
    def step(self, batch: Any, batch_idx: BatchIndex):
        self.model.train(self.mode.is_train)
        data, target = batch[0].to(self.device), batch[1].to(self.device)

        if self.mode.is_train:
            tracker.add_global_step(len(data))

        is_log_activations = batch_idx.is_interval(
            self.log_activations_batches)
        with monit.section("model"):
            with self.mode.update(is_log_activations=is_log_activations):
                output = self.model(data)

        loss = self.loss_func(output, target)
        tracker.add("loss.", loss)

        if self.mode.is_train:
            with monit.section('backward'):
                loss.backward()

            if batch_idx.is_interval(self.update_batches):
                with monit.section('optimize'):
                    self.optimizer.step()
                if batch_idx.is_interval(self.log_params_updates):
                    tracker.add('model', self.model)
                self.optimizer.zero_grad()

            if batch_idx.is_interval(self.log_save_batches):
                tracker.save()
示例#5
0
    def iterate(self):
        device = get_device(self.model)
        correct_sum = 0
        total_samples = 0

        for i, (data, target) in monit.enum(self.name, self.data_loader):
            data, target = data.to(device), target.to(device)

            if self.optimizer is not None:
                self.optimizer.zero_grad()

            output = self.model(data)
            loss = self.loss_func(output, target)
            correct_sum += self.accuracy_func(output, target)
            total_samples += len(target)

            tracker.add(".loss", loss)

            if self.optimizer is not None:
                loss.backward()
                self.optimizer.step()

            if self.is_increment_global_step:
                tracker.add_global_step(len(target))

            if self.log_interval is not None and (i +
                                                  1) % self.log_interval == 0:
                tracker.save()

        tracker.add(".accuracy", correct_sum / total_samples)
示例#6
0
文件: train.py 项目: weihaoxie/nn
    def __call__(self):
        """
        ### Train the model for an epoch
        """

        # Iterate through training data
        for i, (src, tgt, neighbors) in monit.enum('Train', self.dataloader):
            # Move data to the device
            src, tgt, neighbors = src.to(self.device), tgt.to(
                self.device), neighbors.to(self.device)

            # Forward pass
            res = self.model(src, neighbors)
            # Calculate loss
            loss = self.loss_func(res.view(-1, res.shape[-1]), tgt.view(-1))

            # Clear the gradients
            self.optimizer.zero_grad()
            # Backward pass
            loss.backward()
            # Optimize the model
            self.optimizer.step()

            # Save training statistics and increment the global step counter
            tracker.save({'loss.train': loss})
            tracker.add_global_step(len(src))
示例#7
0
    def step(self, batch: Any, batch_idx: BatchIndex):
        """
        This method is called for each batch
        """
        self.model.train(self.mode.is_train)

        # Get data and target labels
        data, target = batch[0].to(self.model.device), batch[1].to(self.model.device)

        if self.mode.is_train:
            tracker.add_global_step(data.shape[0] * data.shape[1])

        # Run the model
        output = self.model(data)

        # Calculate loss
        loss = self.loss_func(output, target)
        # Calculate accuracy
        self.accuracy(output, target)

        # Log the loss
        tracker.add("loss.", loss)

        #  If we are in training mode, calculate the gradients
        if self.mode.is_train:
            loss.backward()
            self.optimizer.step()
            if batch_idx.is_last:
                tracker.add('model', self.model)
            self.optimizer.zero_grad()

        tracker.save()
示例#8
0
    def step(self, batch: Any, batch_idx: BatchIndex):
        self.model.train(self.mode.is_train)
        data, target = batch['data'].to(self.device), batch['target'].to(
            self.device)
        target = (target - self.model.y_mean) / self.model.y_std

        if self.mode.is_train:
            tracker.add_global_step(len(data))

        output = self.model(data)
        loss = self.loss_func(output, target)
        tracker.add("loss.", loss)

        if self.mode.is_train:
            loss.backward()
            if batch_idx.is_last:
                tracker.add('model', self.model)
            self.optimizer.step()
            self.optimizer.zero_grad()

        if not self.mode.is_train:
            self.output_collector(output * self.model.y_std +
                                  self.model.y_mean)

        tracker.save()
示例#9
0
def train(model, optimizer, train_loader, device, train_log_interval):
    """This is the training code"""

    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        output = model(data)
        loss = F.cross_entropy(output, target)

        optimizer.zero_grad()
        loss.backward()
        if batch_idx == 0:
            tracker.add('model', model)
        optimizer.step()

        # **✨ Increment the global step**
        tracker.add_global_step()
        # **✨ Store stats in the tracker**
        tracker.save({'loss.train': loss})

        #
        if batch_idx % train_log_interval == 0:
            # **✨ Save added stats**
            tracker.save()
示例#10
0
文件: __init__.py 项目: wx-b/nn
    def step(self, batch: Any, batch_idx: BatchIndex):
        self.encoder.train(self.mode.is_train)
        self.decoder.train(self.mode.is_train)

        # Move `data` and `mask` to device and swap the sequence and batch dimensions.
        # `data` will have shape `[seq_len, batch_size, 5]` and
        # `mask` will have shape `[seq_len, batch_size]`.
        data = batch[0].to(self.device).transpose(0, 1)
        mask = batch[1].to(self.device).transpose(0, 1)

        # Increment step in training mode
        if self.mode.is_train:
            tracker.add_global_step(len(data))

        # Encode the sequence of strokes
        with monit.section("encoder"):
            # Get $z$, $\mu$, and $\hat{\sigma}$
            z, mu, sigma_hat = self.encoder(data)

        # Decode the mixture of distributions and $\hat{q}$
        with monit.section("decoder"):
            # Concatenate $[(\Delta x, \Delta y, p_1, p_2, p_3); z]$
            z_stack = z.unsqueeze(0).expand(data.shape[0] - 1, -1, -1)
            inputs = torch.cat([data[:-1], z_stack], 2)
            # Get mixture of distributions and $\hat{q}$
            dist, q_logits, _ = self.decoder(inputs, z, None)

        # Compute the loss
        with monit.section('loss'):
            # $L_{KL}$
            kl_loss = self.kl_div_loss(sigma_hat, mu)
            # $L_R$
            reconstruction_loss = self.reconstruction_loss(mask, data[1:], dist, q_logits)
            # $Loss = L_R + w_{KL} L_{KL}$
            loss = reconstruction_loss + self.kl_div_loss_weight * kl_loss

            # Track losses
            tracker.add("loss.kl.", kl_loss)
            tracker.add("loss.reconstruction.", reconstruction_loss)
            tracker.add("loss.total.", loss)

        # Only if we are in training state
        if self.mode.is_train:
            # Run optimizer
            with monit.section('optimize'):
                # Set `grad` to zero
                self.optimizer.zero_grad()
                # Compute gradients
                loss.backward()
                # Log model parameters and gradients
                if batch_idx.is_last:
                    tracker.add(encoder=self.encoder, decoder=self.decoder)
                # Clip gradients
                nn.utils.clip_grad_norm_(self.encoder.parameters(), self.grad_clip)
                nn.utils.clip_grad_norm_(self.decoder.parameters(), self.grad_clip)
                # Optimize
                self.optimizer.step()

        tracker.save()
示例#11
0
def main():
    experiment.create(name='Test')

    with experiment.start():
        for i in range(1, 401):
            tracker.add_global_step()
            time.sleep(1)
            tracker.save(loss=1.)
示例#12
0
 def log_metrics(self,
                 metrics: Dict[str, Union[torch.Tensor, float]],
                 step: Optional[int] = None) -> None:
     if step is None:
         tracker.add_global_step()
         tracker.save(metrics)
     else:
         tracker.save(step, metrics)
示例#13
0
 def on_train_batch_end(self, batch, logs=None):
     if logs is None:
         logs = {}
     tracker.add_global_step()
     if 'size' in logs:
         del logs['size']
     if 'batch' in logs:
         del logs['batch']
     tracker.add(logs)
     if batch % self.save_batch_frequency == 0:
         tracker.save()
示例#14
0
    def step(self, batch: Any, batch_idx: BatchIndex):
        self.generator.train(self.mode.is_train)
        self.discriminator.train(self.mode.is_train)

        data, target = batch[0].to(self.device), batch[1].to(self.device)

        # Increment step in training mode
        if self.mode.is_train:
            tracker.add_global_step(len(data))

        # Train the discriminator
        with monit.section("discriminator"):
            for _ in range(self.discriminator_k):
                latent = torch.randn(data.shape[0], 100, device=self.device)
                logits_true = self.discriminator(data)
                logits_false = self.discriminator(self.generator(latent).detach())
                loss_true, loss_false = self.discriminator_loss(logits_true, logits_false)
                loss = loss_true + loss_false

                # Log stuff
                tracker.add("loss.discriminator.true.", loss_true)
                tracker.add("loss.discriminator.false.", loss_false)
                tracker.add("loss.discriminator.", loss)

                # Train
                if self.mode.is_train:
                    self.discriminator_optimizer.zero_grad()
                    loss.backward()
                    if batch_idx.is_last:
                        tracker.add('discriminator', self.discriminator)
                    self.discriminator_optimizer.step()

        # Train the generator
        with monit.section("generator"):
            latent = torch.randn(data.shape[0], 100, device=self.device)
            generated_images = self.generator(latent)
            logits = self.discriminator(generated_images)
            loss = self.generator_loss(logits)

            # Log stuff
            tracker.add('generated', generated_images[0:5])
            tracker.add("loss.generator.", loss)

            # Train
            if self.mode.is_train:
                self.generator_optimizer.zero_grad()
                loss.backward()
                if batch_idx.is_last:
                    tracker.add('generator', self.generator)
                self.generator_optimizer.step()

        tracker.save()
示例#15
0
    def train(self):
        """
        ### Train the model
        """

        # Loop for the given number of epochs
        for _ in monit.loop(self.epochs):
            # Iterate over the minibatches
            for i, batch in monit.enum('Train', self.dataloader):
                # Move data to the device
                data, target = batch[0].to(self.device), batch[1].to(
                    self.device)

                # Set tracker step, as the number of characters trained on
                tracker.add_global_step(data.shape[0] * data.shape[1])

                # Set model state to training
                self.model.train()
                # Evaluate the model
                output = self.model(data)

                # Calculate loss
                loss = self.loss_func(output.view(-1, output.shape[-1]),
                                      target.view(-1))
                # Log the loss
                tracker.add("loss.train", loss)

                # Calculate gradients
                loss.backward()
                # Clip gradients
                torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                               max_norm=self.grad_norm_clip)
                # Take optimizer step
                self.optimizer.step()
                # Log the model parameters and gradients
                if (i + 1) % 100 == 0:
                    tracker.add('model', self.model)
                # Clear the gradients
                self.optimizer.zero_grad()

                # Generate a sample
                if (i + 1) % 100 == 0:
                    self.model.eval()
                    with torch.no_grad():
                        self.sample()

                # Save the tracked metrics
                if (i + 1) % 10 == 0:
                    tracker.save()

            # Save the model
            experiment.save_checkpoint()
示例#16
0
def main():
    # Reset global step because we incremented in previous loop
    tracker.set_global_step(0)

    for i in range(1, 401):
        tracker.add_global_step()
        loss = train()
        tracker.add(loss=loss)
        if i % 10 == 0:
            tracker.save()
        if i % 100 == 0:
            logger.log()
        time.sleep(0.02)
示例#17
0
    def step(self, batch: any, batch_idx: BatchIndex):
        """
        ### Training or validation step
        """

        # Set training/eval mode
        self.model.train(self.mode.is_train)

        # Move data to the device
        data, target = batch[0].to(self.device), batch[1].to(self.device)

        # Update global step (number of tokens processed) when in training mode
        if self.mode.is_train:
            tracker.add_global_step(data.shape[0] * data.shape[1])

        # Whether to capture model outputs
        with self.mode.update(is_log_activations=batch_idx.is_last
                              and self.is_log_model_activations):
            # Get model outputs.
            # It's returning a tuple for states when using RNNs.
            # This is not implemented yet. 😜
            output, *_ = self.model(data)

        # Calculate and log loss
        loss = self.loss_func(output, target)
        tracker.add("loss.", loss)

        # Calculate and log accuracy
        self.accuracy(output, target)
        self.accuracy.track()

        self.other_metrics(output, target)

        # Train the model
        if self.mode.is_train:
            # Calculate gradients
            loss.backward()
            # Clip gradients
            torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                           max_norm=self.grad_norm_clip)
            # Take optimizer step
            self.optimizer.step()
            # Log the model parameters and gradients on last batch of every epoch
            if batch_idx.is_last and self.is_log_model_params_grads:
                tracker.add('model', self.model)
            # Clear the gradients
            self.optimizer.zero_grad()

        # Save the tracked metrics
        tracker.save()
示例#18
0
文件: fastai.py 项目: skiedra/labml
    def after_batch(self):
        tracker.add_global_step()
        if self.training:
            metrics = {'loss.train': self.learn.loss}
        else:
            metrics = {'loss.valid': self.learn.loss}
        try:
            for m in self.learn.metrics:
                if m.value is not None:
                    metrics[m.name] = m.value
        except:
            pass

        tracker.save(metrics)
示例#19
0
def repeat_values():
    conf = {'batch_size': 20}

    with experiment.record(name='sample',
                           exp_conf=conf,
                           writers={'web_api', 'screen'}):
        for i in range(10):
            tracker.add_global_step(1)
            tracker.save('loss', 1)
            tracker.save('loss', 5)
            # tracker.save()

            if i % 1000 == 0:
                tracker.new_line()
示例#20
0
    def iterate(self):
        stats = self.batch_step.init_stats()

        for i, batch in monit.enum(self.name, self.data_loader):
            update = self.batch_step.process(batch)
            self.batch_step.update_stats(stats, update)

            if self.is_increment_global_step:
                tracker.add_global_step(update['samples'])

            if self.log_interval is not None and (i +
                                                  1) % self.log_interval == 0:
                tracker.save()

        self.batch_step.log_stats(stats)
示例#21
0
    def step(self, batch: any, batch_idx: BatchIndex):
        """
        ### Training/validation step
        """

        # Move data to the device
        data, target = batch[0].to(self.device), batch[1].to(self.device)

        # Update global step (number of tokens processed) when in training mode
        if self.mode.is_train:
            tracker.add_global_step(data.shape[0] * data.shape[1])

        # Whether to capture model outputs
        with self.mode.update(is_log_activations=batch_idx.is_last):
            # Get memories
            mem = self.memory.get()
            # Run the model
            output, new_mem = self.model(data, mem)
            # Merge memory
            mem = self.merge_memory(mem, new_mem)
            # Update memories
            self.memory.set(mem)

        # Calculate and log cross entropy loss
        loss = self.loss_func(output, target)
        tracker.add("loss.", loss)

        # Calculate and log accuracy
        self.accuracy(output, target)
        self.accuracy.track()

        # Train the model
        if self.mode.is_train:
            # Calculate gradients
            loss.backward()
            # Clip gradients
            torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                           max_norm=self.grad_norm_clip)
            # Take optimizer step
            self.optimizer.step()
            # Log the model parameters and gradients on last batch of every epoch
            if batch_idx.is_last:
                tracker.add('model', self.model)
            # Clear the gradients
            self.optimizer.zero_grad()

        # Save the tracked metrics
        tracker.save()
示例#22
0
    def train(self):
        self.model.train()
        for i, (data, target) in monit.enum("Train", self.train_loader):
            data, target = data.to(self.device), target.to(self.device)

            self.optimizer.zero_grad()
            output = self.model(data)
            loss = F.cross_entropy(output, target)
            loss.backward()
            self.optimizer.step()

            tracker.add({'train.loss': loss})
            tracker.add_global_step()

            if i % self.train_log_interval == 0:
                tracker.save()
示例#23
0
    def train_epoch(self, model: nn.Module, data_loader: DataLoader,
                    name: str):
        """
        Train/Validate for an epoch
        """

        model.train(name == 'train')
        correct_predictions = 0
        total = 0
        total_loss = 0

        with torch.set_grad_enabled(name == 'train'):
            for i, data in monit.enum(name, data_loader):
                input_ids = data["input_ids"].to(self.device)
                attention_mask = data["attention_mask"].to(self.device)
                targets = data["targets"].to(self.device)

                outputs = model(input_ids=input_ids,
                                attention_mask=attention_mask)
                _, preds = torch.max(outputs, dim=1)

                loss = self.loss_fn(outputs, targets)
                total_loss += loss.item() * len(preds)

                correct_predictions += torch.sum(preds == targets).item()
                total += len(preds)
                tracker.add('loss.', loss)
                if name == 'train':
                    tracker.add_global_step(len(preds))

                    loss.backward()
                    nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                    self.optimizer.step()
                    self.optimizer.zero_grad()

                if (i + 1) % 10 == 0:
                    tracker.save()

        tracker.save('accuracy.', correct_predictions / total)
        mlflow.log_metric(f"{name}_acc",
                          float(correct_predictions / total),
                          step=tracker.get_global_step())
        mlflow.log_metric(f"{name}_loss",
                          float(total_loss / total),
                          step=tracker.get_global_step())

        return correct_predictions / total, total_loss / total
示例#24
0
文件: experiment.py 项目: wx-b/nn
    def step(self, batch: Any, batch_idx: BatchIndex):
        """
        This method gets called by the trainer for each batch
        """
        # Set the model mode
        self.model.train(self.mode.is_train)

        # Get the input and labels and move them to the model's device
        data, target = batch[0].to(self.device), batch[1].to(self.device)

        # Increment step in training mode
        if self.mode.is_train:
            tracker.add_global_step(len(data))

        # Run the model
        p, y_hat, p_sampled, y_hat_sampled = self.model(data)

        # Calculate the reconstruction loss
        loss_rec = self.loss_rec(p, y_hat, target.to(torch.float))
        tracker.add("loss.", loss_rec)

        # Calculate the regularization loss
        loss_reg = self.loss_reg(p)
        tracker.add("loss_reg.", loss_reg)

        # $L = L_{Rec} + \beta L_{Reg}$
        loss = loss_rec + self.beta * loss_reg

        # Calculate the expected number of steps taken
        steps = torch.arange(1, p.shape[0] + 1, device=p.device)
        expected_steps = (p * steps[:, None]).sum(dim=0)
        tracker.add("steps.", expected_steps)

        # Call accuracy metric
        self.accuracy(y_hat_sampled > 0, target)

        if self.mode.is_train:
            # Compute gradients
            loss.backward()
            # Clip gradients
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=self.grad_norm_clip)
            # Optimizer
            self.optimizer.step()
            # Clear gradients
            self.optimizer.zero_grad()
            #
            tracker.save()
示例#25
0
文件: experiment.py 项目: wx-b/nn
    def step(self, batch: Any, batch_idx: BatchIndex):
        """
        Take a training step
        """

        # Set model states
        self.generator.train(self.mode.is_train)
        self.discriminator.train(self.mode.is_train)

        # Get MNIST images
        data = batch[0].to(self.device)

        # Increment step in training mode
        if self.mode.is_train:
            tracker.add_global_step(len(data))

        # Train the discriminator
        with monit.section("discriminator"):
            # Get discriminator loss
            loss = self.calc_discriminator_loss(data)

            # Train
            if self.mode.is_train:
                self.discriminator_optimizer.zero_grad()
                loss.backward()
                if batch_idx.is_last:
                    tracker.add('discriminator', self.discriminator)
                self.discriminator_optimizer.step()

        # Train the generator once in every `discriminator_k`
        if batch_idx.is_interval(self.discriminator_k):
            with monit.section("generator"):
                loss = self.calc_generator_loss(data.shape[0])

                # Train
                if self.mode.is_train:
                    self.generator_optimizer.zero_grad()
                    loss.backward()
                    if batch_idx.is_last:
                        tracker.add('generator', self.generator)
                    self.generator_optimizer.step()

        tracker.save()
示例#26
0
    def _train(self):
        self.model.train()
        for i, (data, target) in monit.enum("Train", self.train_loader):
            data, target = data.to(self.device), target.to(self.device)
            self.optimizer.zero_grad()
            output = self.model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            self.optimizer.step()

            # Add training loss to the logger.
            # The logger will queue the values and output the mean
            tracker.add({'train.loss': loss})
            tracker.add_global_step()

            # Print output to the console
            if i % self.log_interval == 0:
                # Output the indicators
                tracker.save()
示例#27
0
    def step(self, batch: any, batch_idx: BatchIndex):
        """
        ### Training or validation step
        """

        # Training/Evaluation mode
        self.model.train(self.mode.is_train)

        # Move data to the device
        data, target = batch[0].to(self.device), batch[1].to(self.device)

        # Update global step (number of samples processed) when in training mode
        if self.mode.is_train:
            tracker.add_global_step(len(data))

        # Whether to capture model outputs
        with self.mode.update(is_log_activations=batch_idx.is_last):
            # Get model outputs.
            output = self.model(data)

        # Calculate and log loss
        loss = self.loss_func(output, target)
        tracker.add("loss.", loss)

        # Calculate and log accuracy
        self.accuracy(output, target)
        self.accuracy.track()

        # Train the model
        if self.mode.is_train:
            # Calculate gradients
            loss.backward()
            # Take optimizer step
            self.optimizer.step()
            # Log the model parameters and gradients on last batch of every epoch
            if batch_idx.is_last:
                tracker.add('model', self.model)
            # Clear the gradients
            self.optimizer.zero_grad()

        # Save the tracked metrics
        tracker.save()
示例#28
0
    def train(self):
        self.model.train()
        for batch_idx, (data, target) in enumerate(self.train_loader):
            data, target = data.to(self.device), target.to(self.device)

            self.optimizer.zero_grad()
            output = self.model(data)
            loss = F.cross_entropy(output, target)
            loss.backward()
            self.optimizer.step()

            # **✨ Increment the global step**
            tracker.add_global_step()
            # **✨ Store stats in the tracker**
            tracker.save({'loss.train': loss})

            #
            if batch_idx % self.train_log_interval == 0:
                # **✨ Save added stats**
                tracker.save()
示例#29
0
def train(model, optimizer, train_loader, device, train_log_interval):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()

        tracker.add(
            {'train_loss':
             loss})  # overloading methods  # tracker.add(train_loss = loss)
        # tracker.add('train_loss', loss)

        tracker.add_global_step()

        if batch_idx % train_log_interval == 0:
            tracker.save()
示例#30
0
文件: rnn.py 项目: whiskey1/samples
    def _train(self):
        for i, (input_tensor,
                target_tensor) in monit.enum("train", self.train_loader):
            encoder_hidden = self.encoder.init_hidden(self.device).double().to(
                self.device)

            input_tensor = input_tensor.to(self.device).unsqueeze(1)
            target_tensor = target_tensor.to(self.device).double()

            self.optimizer.zero_grad()
            encoder_output, encoder_hidden = self.encoder(
                input_tensor, encoder_hidden)

            train_loss = self.loss(encoder_output, target_tensor)

            train_loss.backward()
            self.optimizer.step()

            tracker.add(loss=train_loss.item())
            tracker.add_global_step()
            tracker.save()