Exemplo n.º 1
0
    def __call__(self):
        """
        ### Train the model for an epoch
        """

        # Iterate through training data
        for i, (src, tgt, neighbors) in monit.enum('Train', self.dataloader):
            # Move data to the device
            src, tgt, neighbors = src.to(self.device), tgt.to(
                self.device), neighbors.to(self.device)

            # Forward pass
            res = self.model(src, neighbors)
            # Calculate loss
            loss = self.loss_func(res.view(-1, res.shape[-1]), tgt.view(-1))

            # Clear the gradients
            self.optimizer.zero_grad()
            # Backward pass
            loss.backward()
            # Optimize the model
            self.optimizer.step()

            # Save training statistics and increment the global step counter
            tracker.save({'loss.train': loss})
            tracker.add_global_step(len(src))
Exemplo n.º 2
0
    def iterate(self):
        device = get_device(self.model)
        correct_sum = 0
        total_samples = 0

        for i, (data, target) in monit.enum(self.name, self.data_loader):
            data, target = data.to(device), target.to(device)

            if self.optimizer is not None:
                self.optimizer.zero_grad()

            output = self.model(data)
            loss = self.loss_func(output, target)
            correct_sum += self.accuracy_func(output, target)
            total_samples += len(target)

            tracker.add(".loss", loss)

            if self.optimizer is not None:
                loss.backward()
                self.optimizer.step()

            if self.is_increment_global_step:
                tracker.add_global_step(len(target))

            if self.log_interval is not None and (i +
                                                  1) % self.log_interval == 0:
                tracker.save()

        tracker.add(".accuracy", correct_sum / total_samples)
Exemplo n.º 3
0
    def _train(self):
        for i, (images, _) in monit.enum("train", self.train_loader):
            targets_real = torch.empty(images.size(0), 1,
                                       device=self.device).uniform_(0.8, 1.0)
            targets_fake = torch.empty(images.size(0), 1,
                                       device=self.device).uniform_(0.0, 0.2)

            images = images.to(self.device)

            self.optimizer_D.zero_grad()
            logits_real = self.discriminator(images)
            fake_images = self.generator(
                noise(self.device, self.batch_size, self.noise_dim)).detach()
            logits_fake = self.discriminator(fake_images)
            discriminator_loss = DLoss(logits_real, logits_fake, targets_real,
                                       targets_fake)
            discriminator_loss.backward()
            self.optimizer_D.step()

            self.optimizer_G.zero_grad()
            fake_images = self.generator(
                noise(self.device, self.batch_size, self.noise_dim))
            logits_fake = self.discriminator(fake_images)
            generator_loss = GLoss(logits_fake, targets_real)
            generator_loss.backward()
            self.optimizer_G.step()

            tracker.add(G_Loss=generator_loss.item())
            tracker.add(D_Loss=discriminator_loss.item())
            tracker.add_global_step()

        for j in range(1, 10):
            img = fake_images[j].squeeze()
            tracker.add('generated', img)
Exemplo n.º 4
0
def concat_and_save(path: PurePath, source_files: List[PythonFile]):
    with open(str(path), 'w') as f:
        for i, source in monit.enum(f"Write {path.name}", source_files):
            f.write(
                f"# PROJECT: {source.project} FILE: {str(source.relative_path)}\n"
            )
            f.write(read_file(source.path) + "\n")
Exemplo n.º 5
0
    def collect_pairs(self):
        for w, v in monit.enum('Collect pairs', self.word_codes):
            f = self.word_freq[w]

            for i in range(len(v) - 1):
                self.add_pair(w, i, i + 1)

        self.heap_add_all()
Exemplo n.º 6
0
def download():
    path = Path(lab.get_data_path() / 'download')
    if not path.exists():
        path.mkdir(parents=True)

    get_awesome_pytorch()
    repos = get_repos('pytorch_awesome.md')

    for i, r in monit.enum("Download", repos):
        download_repo(r[0], r[1], i)
Exemplo n.º 7
0
def progressive(overwrite: bool = False):
    # Get repos
    get_awesome_pytorch_readme()
    repos = get_repos_from_readme('pytorch_awesome.md')

    # Download zips
    for i, r in monit.enum(f"Download {len(repos)} repos", repos):
        zip_file = download_repo(r[0], r[1], i)
        extracted = extract_zip(zip_file, overwrite)
        remove_files(extracted, {'.py'})
Exemplo n.º 8
0
def main():
    source_files = _GetPythonFiles().files

    logger.inspect(source_files)

    with open(str(Path(os.getcwd()) / 'data' / 'all.py'), 'w') as f:
        for i, source in monit.enum("Parse", source_files):
            serialized = _read_file(source.path)
            # return
            serialized = [str(t) for t in serialized]
            f.write(f"{str(source.path)}\n")
            f.write(" ".join(serialized) + "\n")
Exemplo n.º 9
0
    def train(self):
        """
        ### Train the model
        """

        # Loop for the given number of epochs
        for _ in monit.loop(self.epochs):
            # Iterate over the minibatches
            for i, batch in monit.enum('Train', self.dataloader):
                # Move data to the device
                data, target = batch[0].to(self.device), batch[1].to(
                    self.device)

                # Set tracker step, as the number of characters trained on
                tracker.add_global_step(data.shape[0] * data.shape[1])

                # Set model state to training
                self.model.train()
                # Evaluate the model
                output = self.model(data)

                # Calculate loss
                loss = self.loss_func(output.view(-1, output.shape[-1]),
                                      target.view(-1))
                # Log the loss
                tracker.add("loss.train", loss)

                # Calculate gradients
                loss.backward()
                # Clip gradients
                torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                               max_norm=self.grad_norm_clip)
                # Take optimizer step
                self.optimizer.step()
                # Log the model parameters and gradients
                if (i + 1) % 100 == 0:
                    tracker.add('model', self.model)
                # Clear the gradients
                self.optimizer.zero_grad()

                # Generate a sample
                if (i + 1) % 100 == 0:
                    self.model.eval()
                    with torch.no_grad():
                        self.sample()

                # Save the tracked metrics
                if (i + 1) % 10 == 0:
                    tracker.save()

            # Save the model
            experiment.save_checkpoint()
Exemplo n.º 10
0
    def iterate(self):
        stats = self.batch_step.init_stats()

        for i, batch in monit.enum(self.name, self.data_loader):
            update = self.batch_step.process(batch)
            self.batch_step.update_stats(stats, update)

            if self.is_increment_global_step:
                tracker.add_global_step(update['samples'])

            if self.log_interval is not None and (i +
                                                  1) % self.log_interval == 0:
                tracker.save()

        self.batch_step.log_stats(stats)
Exemplo n.º 11
0
def validation_loss(knn_weights: List[float], last_n: Optional[int],
                    conf: Configs, index: faiss.IndexFlatL2,
                    keys_store: np.ndarray, vals_store: np.ndarray):
    """
    ## Calculate validation loss

    We calculate the validation loss of the combined on $k$-NN prediction and transformer prediction.
    The weight given to the $k$-NN model is given by `knn_weight`.
    It's a list of weights and we calculate the validation loss for each.
    """

    # List of losses for each `knn_weights`
    losses = [[] for _ in knn_weights]
    # Number of samples in each batch
    n_samples = []
    with torch.no_grad():
        # Iterate through validation data
        for i, batch in monit.enum("Validation",
                                   conf.validator.data_loader,
                                   is_children_silent=True):
            # Get data and target labels
            data, target = batch[0].to(conf.device), batch[1].to(conf.device)
            # Run the model and get predictions $p(w_t, c_t)$
            res = conf.model(data)
            # Get $k$-NN predictions
            res_knn = knn(conf.model.ff_input.cpu(), index, keys_store,
                          vals_store, conf.n_tokens)
            res_knn = res_knn.to(conf.device)

            # This is to calculate only the loss for `last_n` tokens.
            # This is important because the first predictions (along the sequence)
            # of transformer model has very few past tokens to look at.
            if last_n:
                res = res[-last_n:]
                res_knn = res_knn[-last_n:]
                target = target[-last_n:]

            # Number of samples
            n_s = res.shape[0] * data.shape[1]
            n_samples.append(n_s)

            # Calculate scores for each of `knn_weights`.
            for i, c in enumerate(knn_weights):
                # Calculate the loss
                loss = conf.loss_func(res_knn * c + (1 - c) * res, target)
                losses[i].append(loss * n_s)

    return losses, n_samples
Exemplo n.º 12
0
    def train(self):
        self.model.train()
        for i, (data, target) in monit.enum("Train", self.train_loader):
            data, target = data.to(self.device), target.to(self.device)

            self.optimizer.zero_grad()
            output = self.model(data)
            loss = F.cross_entropy(output, target)
            loss.backward()
            self.optimizer.step()

            tracker.add({'train.loss': loss})
            tracker.add_global_step()

            if i % self.train_log_interval == 0:
                tracker.save()
Exemplo n.º 13
0
def gather_keys(conf: Configs):
    """
    ## Gather $\big(f(c_i), w_i\big)$ and save them in numpy arrays

    *Note that these numpy arrays will take up a lot of space (even few hundred gigabytes)
    depending on the size of your dataset*.
    """

    # Dimensions of $f(c_i)$
    d_model = conf.transformer.d_model
    # Training data loader
    data_loader = conf.trainer.data_loader
    # Number of contexts; i.e. number of tokens in the training data minus one.
    # $\big(f(c_i), w_i\big)$ for $i \in [2, T]$
    n_keys = data_loader.data.shape[0] * data_loader.data.shape[1] - 1
    # Numpy array for $f(c_i)$
    keys_store = np.memmap(str(lab.get_data_path() / 'keys.npy'),
                           dtype=np.float32,
                           mode='w+',
                           shape=(n_keys, d_model))
    # Numpy array for $w_i$
    vals_store = np.memmap(str(lab.get_data_path() / 'vals.npy'),
                           dtype=np.int,
                           mode='w+',
                           shape=(n_keys, 1))

    # Number of keys $f(c_i)$ collected
    added = 0
    with torch.no_grad():
        # Loop through data
        for i, batch in monit.enum("Collect data",
                                   data_loader,
                                   is_children_silent=True):
            # $w_i$ the target labels
            vals = batch[1].view(-1, 1)
            # Input data moved to the device of the model
            data = batch[0].to(conf.device)
            # Run the model
            _ = conf.model(data)
            # Get $f(c_i)$
            keys = conf.model.ff_input.view(-1, d_model)
            # Save keys, $f(c_i)$ in the memory mapped numpy array
            keys_store[added:added + keys.shape[0]] = keys.cpu()
            # Save values, $w_i$ in the memory mapped numpy array
            vals_store[added:added + keys.shape[0]] = vals
            # Increment the number of collected keys
            added += keys.shape[0]
Exemplo n.º 14
0
    def train_epoch(self, model: nn.Module, data_loader: DataLoader,
                    name: str):
        """
        Train/Validate for an epoch
        """

        model.train(name == 'train')
        correct_predictions = 0
        total = 0
        total_loss = 0

        with torch.set_grad_enabled(name == 'train'):
            for i, data in monit.enum(name, data_loader):
                input_ids = data["input_ids"].to(self.device)
                attention_mask = data["attention_mask"].to(self.device)
                targets = data["targets"].to(self.device)

                outputs = model(input_ids=input_ids,
                                attention_mask=attention_mask)
                _, preds = torch.max(outputs, dim=1)

                loss = self.loss_fn(outputs, targets)
                total_loss += loss.item() * len(preds)

                correct_predictions += torch.sum(preds == targets).item()
                total += len(preds)
                tracker.add('loss.', loss)
                if name == 'train':
                    tracker.add_global_step(len(preds))

                    loss.backward()
                    nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                    self.optimizer.step()
                    self.optimizer.zero_grad()

                if (i + 1) % 10 == 0:
                    tracker.save()

        tracker.save('accuracy.', correct_predictions / total)
        mlflow.log_metric(f"{name}_acc",
                          float(correct_predictions / total),
                          step=tracker.get_global_step())
        mlflow.log_metric(f"{name}_loss",
                          float(total_loss / total),
                          step=tracker.get_global_step())

        return correct_predictions / total, total_loss / total
Exemplo n.º 15
0
    def collect_words(self, data: str):
        last_idx = 0
        is_id = False

        for i, c in monit.enum('Collect words', data):
            if c in ID_CHARS:
                if not is_id:
                    self.add_word(data[last_idx:i])
                    last_idx = i
                    is_id = True
            else:
                if is_id:
                    self.add_word(data[last_idx:i])
                    last_idx = i
                    is_id = False

        self.add_word(data[last_idx:])
Exemplo n.º 16
0
def batch(overwrite: bool = False):
    with monit.section('Get pytorch_awesome'):
        get_awesome_pytorch_readme()
        repos = get_repos_from_readme('pytorch_awesome.md')

    # Download zips
    for i, r in monit.enum(f"Download {len(repos)} repos", repos):
        download_repo(r[0], r[1], i)

    # Extract downloads
    with monit.section('Extract zips'):
        download = Path(lab.get_data_path() / 'download')

        for repo in download.iterdir():
            extract_zip(repo, overwrite)

    with monit.section('Remove non python files'):
        remove_files(lab.get_data_path() / 'source', {'.py'})
Exemplo n.º 17
0
    def _train(self):
        self.model.train()
        for i, (data, target) in monit.enum("Train", self.train_loader):
            data, target = data.to(self.device), target.to(self.device)
            self.optimizer.zero_grad()
            output = self.model(data)
            loss = F.nll_loss(output, target)
            loss.backward()
            self.optimizer.step()

            # Add training loss to the logger.
            # The logger will queue the values and output the mean
            tracker.add({'train.loss': loss})
            tracker.add_global_step()

            # Print output to the console
            if i % self.log_interval == 0:
                # Output the indicators
                tracker.save()
Exemplo n.º 18
0
    def _train(self):
        for i, (input_tensor,
                target_tensor) in monit.enum("train", self.train_loader):
            encoder_hidden = self.encoder.init_hidden(self.device).double().to(
                self.device)

            input_tensor = input_tensor.to(self.device).unsqueeze(1)
            target_tensor = target_tensor.to(self.device).double()

            self.optimizer.zero_grad()
            encoder_output, encoder_hidden = self.encoder(
                input_tensor, encoder_hidden)

            train_loss = self.loss(encoder_output, target_tensor)

            train_loss.backward()
            self.optimizer.step()

            tracker.add(loss=train_loss.item())
            tracker.add_global_step()
            tracker.save()
Exemplo n.º 19
0
    def train(self):
        for _ in monit.loop(self.epochs):
            for i, batch in monit.enum('Train', self.dataloader):
                # Move data to the device
                data, target = batch[0].to(self.device), batch[1].to(
                    self.device)

                tracker.add_global_step(data.shape[0] * data.shape[1])

                self.model.train()
                output = self.model(data)

                # Calculate and log loss
                loss = self.loss_func(output.view(-1, output.shape[-1]),
                                      target.view(-1))
                tracker.add("loss.train", loss)

                # Calculate gradients
                loss.backward()
                # Clip gradients
                torch.nn.utils.clip_grad_norm_(self.model.parameters(),
                                               max_norm=self.grad_norm_clip)
                # Take optimizer step
                self.optimizer.step()
                # Log the model parameters and gradients on last batch of every epoch
                if (i + 1) % 100 == 0:
                    tracker.add('model', self.model)
                # Clear the gradients
                self.optimizer.zero_grad()

                if (i + 1) % 100 == 0:
                    self.model.eval()
                    with torch.no_grad():
                        self.sample()

                # Save the tracked metrics
                if (i + 1) % 10 == 0:
                    tracker.save()

            experiment.save_checkpoint()
Exemplo n.º 20
0
def train(model, optimizer, train_loader, device, train_log_interval):
    """This is the training code"""

    model.train()
    for batch_idx, (data, target) in monit.enum("Train", train_loader):
        data, target = data.to(device), target.to(device)

        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()

        # **✨ Increment the global step**
        tracker.add_global_step()
        # **✨ Store stats in the tracker**
        tracker.save({'loss.train': loss})

        #
        if batch_idx % train_log_interval == 0:
            # **✨ Save added stats**
            tracker.save()
Exemplo n.º 21
0
    def tokenize(self, data: str, *, is_silent: bool = False) -> List[str]:
        last_idx = 0
        is_id = False
        res = []

        for i, c in monit.enum('Collect words', data, is_silent=is_silent):
            if c in ID_CHARS:
                if not is_id:
                    if last_idx < i:
                        res.append(data[last_idx:i])
                    last_idx = i
                    is_id = True
            else:
                if is_id:
                    if last_idx < i:
                        res.append(data[last_idx:i])
                    last_idx = i
                    is_id = False

        if last_idx < len(data):
            res.append(data[last_idx:])

        return res
Exemplo n.º 22
0
    def run(self):
        """
        ## Training

        We aim to solve:
        $$G^{*}, F^{*} = \arg \min_{G,F} \max_{D_X, D_Y} \mathcal{L}(G, F, D_X, D_Y)$$

        where,
        $G$ translates images from $X \rightarrow Y$,
        $F$ translates images from $Y \rightarrow X$,
        $D_X$ tests if images are from $X$ space,
        $D_Y$ tests if images are from $Y$ space, and
        \begin{align}
        \mathcal{L}(G, F, D_X, D_Y)
            &= \mathcal{L}_{GAN}(G, D_Y, X, Y) \\
            &+ \mathcal{L}_{GAN}(F, D_X, Y, X) \\
            &+ \lambda_1 \mathcal{L}_{cyc}(G, F) \\
            &+ \lambda_2 \mathcal{L}_{identity}(G, F) \\
        \\
        \mathcal{L}_{GAN}(G, F, D_Y, X, Y)
            &= \mathbb{E}_{y \sim p_{data}(y)} \Big[log D_Y(y)\Big] \\
            &+ \mathbb{E}_{x \sim p_{data}(x)} \bigg[log\Big(1 - D_Y(G(x))\Big)\bigg] \\
            &+ \mathbb{E}_{x \sim p_{data}(x)} \Big[log D_X(x)\Big] \\
            &+ \mathbb{E}_{y \sim p_{data}(y)} \bigg[log\Big(1 - D_X(F(y))\Big)\bigg] \\
        \\
        \mathcal{L}_{cyc}(G, F)
            &= \mathbb{E}_{x \sim p_{data}(x)} \Big[\lVert F(G(x)) - x \lVert_1\Big] \\
            &+ \mathbb{E}_{y \sim p_{data}(y)} \Big[\lVert G(F(y)) - y \rVert_1\Big] \\
        \\
        \mathcal{L}_{identity}(G, F)
            &= \mathbb{E}_{x \sim p_{data}(x)} \Big[\lVert F(x) - x \lVert_1\Big] \\
            &+ \mathbb{E}_{y \sim p_{data}(y)} \Big[\lVert G(y) - y \rVert_1\Big] \\
        \end{align}

        $\mathcal{L}_{GAN}$ is the generative adversarial loss from the original
        GAN paper.

        $\mathcal{L}_{cyc}$ is the cyclic loss, where we try to get $F(G(x))$ to be similar to $x$,
        and $G(F(y))$ to be similar to $y$.
        Basically if the two generators (transformations) are applied in series it should give back the
        original image.
        This is the main contribution of this paper.
        It trains the generators to generate an image of the other distribution that is similar to
        the original image.
        Without this loss $G(x)$ could generate anything that's from the distribution of $Y$.
        Now it needs to generate something from the distribution of $Y$ but still has properties of $x$,
        so that $F(G(x)$ can re-generate something like $x$.

        $\mathcal{L}_{cyc}$ is the identity loss.
        This was used to encourage the mapping to preserve color composition between
        the input and the output.

        To solve $G^{\*}, F^{\*}$,
        discriminators $D_X$ and $D_Y$ should **ascend** on the gradient,
        \begin{align}
        \nabla_{\theta_{D_X, D_Y}} \frac{1}{m} \sum_{i=1}^m
        &\Bigg[
        \log D_Y\Big(y^{(i)}\Big) \\
        &+ \log \Big(1 - D_Y\Big(G\Big(x^{(i)}\Big)\Big)\Big) \\
        &+ \log D_X\Big(x^{(i)}\Big) \\
        & +\log\Big(1 - D_X\Big(F\Big(y^{(i)}\Big)\Big)\Big)
        \Bigg]
        \end{align}
        That is descend on *negative* log-likelihood loss.

        In order to stabilize the training the negative log- likelihood objective
        was replaced by a least-squared loss -
        the least-squared error of discriminator, labelling real images with 1,
        and generated images with 0.
        So we want to descend on the gradient,
        \begin{align}
        \nabla_{\theta_{D_X, D_Y}} \frac{1}{m} \sum_{i=1}^m
        &\Bigg[
            \bigg(D_Y\Big(y^{(i)}\Big) - 1\bigg)^2 \\
            &+ D_Y\Big(G\Big(x^{(i)}\Big)\Big)^2 \\
            &+ \bigg(D_X\Big(x^{(i)}\Big) - 1\bigg)^2 \\
            &+ D_X\Big(F\Big(y^{(i)}\Big)\Big)^2
        \Bigg]
        \end{align}

        We use least-squares for generators also.
        The generators should *descend* on the gradient,
        \begin{align}
        \nabla_{\theta_{F, G}} \frac{1}{m} \sum_{i=1}^m
        &\Bigg[
            \bigg(D_Y\Big(G\Big(x^{(i)}\Big)\Big) - 1\bigg)^2 \\
            &+ \bigg(D_X\Big(F\Big(y^{(i)}\Big)\Big) - 1\bigg)^2 \\
            &+ \mathcal{L}_{cyc}(G, F)
            + \mathcal{L}_{identity}(G, F)
        \Bigg]
        \end{align}

        We use `generator_xy` for $G$ and `generator_yx$ for $F$.
        We use `discriminator_x$ for $D_X$ and `discriminator_y` for $D_Y$.
        """

        # Replay buffers to keep generated samples
        gen_x_buffer = ReplayBuffer()
        gen_y_buffer = ReplayBuffer()

        # Loop through epochs
        for epoch in monit.loop(self.epochs):
            # Loop through the dataset
            for i, batch in monit.enum('Train', self.dataloader):
                # Move images to the device
                data_x, data_y = batch['x'].to(self.device), batch['y'].to(
                    self.device)

                # true labels equal to $1$
                true_labels = torch.ones(data_x.size(0),
                                         *self.discriminator_x.output_shape,
                                         device=self.device,
                                         requires_grad=False)
                # false labels equal to $0$
                false_labels = torch.zeros(data_x.size(0),
                                           *self.discriminator_x.output_shape,
                                           device=self.device,
                                           requires_grad=False)

                # Train the generators.
                # This returns the generated images.
                gen_x, gen_y = self.optimize_generators(
                    data_x, data_y, true_labels)

                #  Train discriminators
                self.optimize_discriminator(data_x, data_y,
                                            gen_x_buffer.push_and_pop(gen_x),
                                            gen_y_buffer.push_and_pop(gen_y),
                                            true_labels, false_labels)

                # Save training statistics and increment the global step counter
                tracker.save()
                tracker.add_global_step(max(len(data_x), len(data_y)))

                # Save images at intervals
                batches_done = epoch * len(self.dataloader) + i
                if batches_done % self.sample_interval == 0:
                    # Save models when sampling images
                    experiment.save_checkpoint()
                    # Sample images
                    self.sample_images(batches_done)

            # Update learning rates
            self.generator_lr_scheduler.step()
            self.discriminator_lr_scheduler.step()
            # New line
            tracker.new_line()