Пример #1
0
    def _train_model(self, images):
        """

        :param images: List of images paths
        """
        data_source = self._transform_images(images)
        self.model = autoencoder().cpu()
        criterion = nn.MSELoss()
        optimizer = torch.optim.Adam(self.model.parameters(),
                                     lr=learning_rate,
                                     weight_decay=1e-5)
        for epoch in range(1, num_epochs + 1):
            for i, data in enumerate(data_source):
                img = data
                img = img.view(img.size(0), -1)
                img = Variable(img).cpu()
                # ===================forward=====================
                with LightwoodAutocast():
                    output = self.model(img)
                    loss = criterion(output, img)
                # ===================backward====================
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                # ===================log========================
                print('epoch [{}/{}], loss:{:.4f}'.format(
                    epoch + 1, num_epochs, loss.data))
                if epoch % num_epochs == 0:
                    pic = to_img(output.cpu().data[0:-2])
                    if not os.path.exists('./mlp_img'):
                        os.mkdir('./mlp_img')
                    save_image(pic, './mlp_img/image_{}.png'.format(i))
        shutil.rmtree('./mlp_img')
        torch.save(self.model.state_dict(), './sim_autoencoder.pth')
Пример #2
0
 def forward(self, input, hidden):
     with LightwoodAutocast():
         output = self.in_activation(input.float())
         output, hidden = self.gru(output, hidden)
         output = self.dropout(output)
         output = self.out(output)
     return output, hidden
Пример #3
0
 def forward(self, input, hidden):
     with LightwoodAutocast():
         output = self.embedding(input).view(1, 1, -1)
         output = F.relu(output)
         output, hidden = self.gru(output, hidden)
         output = self.softmax(self.out(output[0]))
     return output, hidden
Пример #4
0
 def forward(self, input):
     """
     Forward pass of the function.
     The goal is to generate values such as if they weights of the linear operation are sampled
     from a normal distribution
     """
     with LightwoodAutocast():
         return F.linear(input, self.w_sampler(), self.bias)
Пример #5
0
 def forward(self, true_input, main_net_output):
     """
     :param true_input: tensor with data point features
     :param main_net_output: tensor with main NN prediction for true_input
     :return: predicted loss value over the tensor samples
     """
     with LightwoodAutocast():
         aware_in = torch.cat((true_input, main_net_output), 1)
         output = self.net(aware_in)
         return output
Пример #6
0
    def forward(self, input):
        """
        In this particular model, we just need to forward the network defined in setup, with our input
        :param input: a pytorch tensor with the input data of a batch
        :return: output of the network
        """
        with LightwoodAutocast():
            output = self._foward_net(input)

        return output
Пример #7
0
 def forward(self, src, lengths, device):
     with LightwoodAutocast():
         if self.src_mask is None or self.src_mask.size(0) != src.size(0):
             # Attention mask to avoid attending to upcoming parts of the sequence
             self.src_mask = self._generate_square_subsequent_mask(
                 src.size(0)).to(device)
         src = self.src_linear(src)
         # src = self.pos_encoder(src) # not sure if this is helpful in time series
         # The lengths_mask has to be of size [batch, lengths]
         lengths_mask = len_to_mask(lengths, zeros=True).to(device)
         hidden = self.transformer_encoder(
             src, mask=self.src_mask, src_key_padding_mask=lengths_mask)
         output = self.src_decoder(hidden)
         return output, hidden
Пример #8
0
    def categorical_train_function(model, data, gym, test=False):
        input, real = data
        input = input.to(gym.device)
        labels = torch.tensor([torch.argmax(x) for x in real]).to(gym.device)

        with LightwoodAutocast():
            outputs = gym.model(input, labels=labels)
            loss, logits = outputs[:2]

        if not test:
            loss.backward()
            gym.optimizer.step()
            gym.scheduler.step()
            gym.optimizer.zero_grad()
        return loss
Пример #9
0
    def forward(self, input, hidden, encoder_outputs):
        with LightwoodAutocast():
            embedded = self.embedding(input).view(1, 1, -1)
            embedded = self.dropout(embedded)

            attn_weights = F.softmax(self.attn(
                torch.cat((embedded[0], hidden[0]), 1)),
                                     dim=1)
            attn_applied = torch.bmm(attn_weights.unsqueeze(0),
                                     encoder_outputs.unsqueeze(0))

            output = torch.cat((embedded[0], attn_applied[0]), 1)
            output = self.attn_combine(output).unsqueeze(0)

            output = F.relu(output)
            output, hidden = self.gru(output, hidden)

            output = F.log_softmax(self.out(output[0]), dim=1)
        return output, hidden, attn_weights
Пример #10
0
    def numerical_train_function(model, data, gym, backbone, test=False):
        input, real = data

        backbone = backbone.eval()

        with torch.no_grad():
            input = input.to(gym.device)
            real = real.to(gym.device)

            embeddings = backbone(input)[0][:, 0, :]

        with LightwoodAutocast():
            outputs = gym.model(embeddings)
            loss = gym.loss_criterion(outputs, real)

        if not test:
            loss.backward()
            gym.optimizer.step()
            gym.scheduler.step()
            gym.optimizer.zero_grad()

        return loss
Пример #11
0
def train(input_tensor,
          target_tensor,
          encoder,
          decoder,
          encoder_optimizer,
          decoder_optimizer,
          criterion,
          max_length=MAX_LENGTH):
    encoder_hidden = encoder.initHidden()

    encoder_optimizer.zero_grad()
    decoder_optimizer.zero_grad()

    input_length = input_tensor.size(0)
    target_length = target_tensor.size(0)

    encoder_outputs = torch.zeros(max_length,
                                  encoder.hidden_size,
                                  device=device)

    loss = 0

    with LightwoodAutocast():
        for ei in range(min(input_length, len(encoder_outputs))):
            encoder_output, encoder_hidden = encoder(input_tensor[ei],
                                                     encoder_hidden)
            encoder_outputs[ei] = encoder_output[0, 0]

        decoder_input = torch.tensor([[SOS_token]], device=device)

        decoder_hidden = encoder_hidden

        use_teacher_forcing = True if random.random(
        ) < teacher_forcing_ratio else False

        if use_teacher_forcing:
            # Teacher forcing: Feed the target as the next input
            for di in range(target_length):
                if isinstance(decoder, AttnDecoderRNN):
                    decoder_output, decoder_hidden, decoder_attention = decoder(
                        decoder_input, decoder_hidden, encoder_outputs)
                else:
                    decoder_output, decoder_hidden = decoder(
                        decoder_input, decoder_hidden)
                loss += criterion(decoder_output, target_tensor[di])
                decoder_input = target_tensor[di]  # Teacher forcing

        else:
            # Without teacher forcing: use its own predictions as the next input
            for di in range(target_length):
                if isinstance(decoder, AttnDecoderRNN):
                    decoder_output, decoder_hidden, decoder_attention = decoder(
                        decoder_input, decoder_hidden, encoder_outputs)
                else:
                    decoder_output, decoder_hidden = decoder(
                        decoder_input, decoder_hidden)
                topv, topi = decoder_output.topk(1)
                decoder_input = topi.squeeze().detach(
                )  # detach from history as input

                loss += criterion(decoder_output, target_tensor[di])
                if decoder_input.item() == EOS_token:
                    break

    loss.backward()

    encoder_optimizer.step()
    decoder_optimizer.step()

    return loss.item() / target_length
Пример #12
0
 def forward(self, x):
     with LightwoodAutocast():
         x = x + self.pe[:x.size(0), :]
         return self.dropout(x)
Пример #13
0
    def fit(self, train_data_loader, test_data_loader, desired_error, max_time, callback,
            eval_every_x_epochs=1, max_unimproving_models=10, custom_train_func=None, custom_test_func=None):
        started = time.time()
        epoch = 0
        lowest_test_error = None
        last_test_error = None
        test_error_delta_buff = []

        keep_training = True

        while keep_training:
            epoch += 1
            running_loss = 0.0
            error = 0
            self.model = self.model.train()
            for i, data in enumerate(train_data_loader, 0):
                if custom_train_func is None:
                    input, real = data

                    with LightwoodAutocast():
                        if self.input_encoder is not None:
                            input = self.input_encoder(input)
                        if self.output_encoder is not None:
                            real = self.output_encoder(real)

                        input = input.to(self.device)
                        real = real.to(self.device)

                        predicted = self.model(input)
                        loss = self.loss_criterion(predicted, real)
                    loss.backward()
                    self.optimizer.step()

                    if self.scheduler is not None:
                        self.scheduler.step()

                    self.optimizer.zero_grad()
                else:
                    loss = custom_train_func(self.model, data, self)

                running_loss += loss.item()
                error = running_loss / (i + 1)

            if epoch % eval_every_x_epochs == 0:
                if test_data_loader is not None:
                    test_running_loss = 0.0
                    test_error = 0
                    self.model = self.model.eval()
                    real_buff = []
                    predicted_buff = []

                    for i, data in enumerate(test_data_loader, 0):
                        if custom_test_func is None:
                            input, real = data

                            if self.input_encoder is not None:
                                input = self.input_encoder(input)
                            if self.output_encoder is not None:
                                real = self.output_encoder(real)

                            input = input.to(self.device)
                            real = real.to(self.device)

                            with torch.no_grad():
                                predicted = self.model(input)

                            real_buff.append(real.tolist())
                            predicted_buff.append(predicted.tolist())

                            loss = self.loss_criterion(predicted, real)
                        else:
                            with torch.no_grad():
                                loss = custom_test_func(self.model, data, self)

                        test_running_loss += loss.item()
                        test_error = test_running_loss / (i + 1)
                else:
                    test_error = error
                    real_buff = None
                    predicted_buff = None

                if lowest_test_error is None or test_error < lowest_test_error:
                    lowest_test_error = test_error
                    self.best_model = copy.deepcopy(self.model).to('cpu')

                if last_test_error is None:
                    test_error_delta_buff.append(0)
                else:
                    test_error_delta_buff.append(last_test_error - test_error)

                last_test_error = test_error

                if (time.time() - started) > max_time:
                    keep_training = False

                if lowest_test_error < desired_error:
                    keep_training = False

                if len(test_error_delta_buff) >= max_unimproving_models:
                    delta_mean = np.mean(test_error_delta_buff[-max_unimproving_models:])
                    if delta_mean <= 0:
                        keep_training = False

                callback(test_error, real_buff, predicted_buff)

        return self.best_model, lowest_test_error, int(time.time() - started)
Пример #14
0
 def forward(self, input, hidden):
     with LightwoodAutocast():
         output, hidden = self.gru(input, hidden)
         output = self.dropout(output)
         output = self.out(output)
     return output, hidden
 def forward(self, preds, target):
     with LightwoodAutocast():
         cat_labels = target.max(1).indices
         return self.cross_entropy_loss(preds, cat_labels)
Пример #16
0
    def _iter_fit(self, ds, subset_id=None, max_epochs=120000):
        if self._nonpersistent['sampler'] is None:
            data_loader = DataLoader(ds,
                                     batch_size=self.batch_size,
                                     shuffle=True,
                                     num_workers=0)
        else:
            data_loader = DataLoader(ds,
                                     batch_size=self.batch_size,
                                     num_workers=0,
                                     sampler=self._nonpersistent['sampler'])

        for epoch in range(max_epochs):  # loop over the dataset multiple times
            running_loss = 0.0
            error = 0
            for i, data in enumerate(data_loader, 0):
                if self.start_selfaware_training and not self.is_selfaware:
                    log.info(
                        'Starting to train selfaware network for better confidence determination !'
                    )
                    self.is_selfaware = True

                if self.stop_selfaware_training and self.is_selfaware:
                    log.info(
                        'Cannot train selfaware network, will fallback to using simpler confidence models !'
                    )
                    self.is_selfaware = False

                self.total_iterations += 1
                # get the inputs; data is a list of [inputs, labels]
                inputs, labels = data

                labels = labels.to(self.net.device)
                inputs = inputs.to(self.net.device)

                # zero the parameter gradients
                self.optimizer.zero_grad()
                self.selfaware_optimizer.zero_grad()

                # forward + backward + optimize
                with LightwoodAutocast():
                    outputs = self.net(inputs)
                if self.is_selfaware:
                    with LightwoodAutocast():
                        awareness = self.selfaware_net(inputs.detach(),
                                                       outputs.detach())

                loss = None
                for k, criterion in enumerate(self.criterion_arr):
                    with LightwoodAutocast():
                        target_loss = criterion(
                            outputs[:,
                                    ds.out_indexes[k][0]:ds.out_indexes[k][1]],
                            labels[:,
                                   ds.out_indexes[k][0]:ds.out_indexes[k][1]])

                    if loss is None:
                        loss = target_loss
                    else:
                        loss += target_loss

                awareness_loss = None
                if self.is_selfaware:
                    unreduced_losses = []
                    for k, criterion in enumerate(
                            self.unreduced_criterion_arr):
                        target_loss = criterion(
                            outputs[:,
                                    ds.out_indexes[k][0]:ds.out_indexes[k][1]],
                            labels[:,
                                   ds.out_indexes[k][0]:ds.out_indexes[k][1]])

                        target_loss = target_loss.tolist()
                        if type(target_loss[0]) == type([]):
                            target_loss = [np.mean(x) for x in target_loss]
                        for i, value in enumerate(target_loss):
                            if len(unreduced_losses) <= i:
                                unreduced_losses.append([])
                            unreduced_losses[i].append(value)

                    unreduced_losses = torch.Tensor(unreduced_losses).to(
                        self.net.device)

                    awareness_loss = self.awareness_criterion(
                        awareness,
                        unreduced_losses) * self.awareness_scale_factor

                    if CONFIG.MONITORING['batch_loss']:
                        self.monitor.plot_loss(awareness_loss.item(),
                                               self.total_iterations,
                                               'Awreness Batch Loss')

                if CONFIG.MONITORING['batch_loss']:
                    self.monitor.plot_loss(loss.item(), self.total_iterations,
                                           'Targets Batch Loss')

                if awareness_loss is not None:
                    awareness_loss.backward(retain_graph=True)

                running_loss += loss.item()
                loss.backward()

                # @NOTE: Decrease 900 if you want to plot gradients more often, I find it's too expensive to do so
                if CONFIG.MONITORING['network_heatmap'] and random.randint(
                        0, 1000) > 900:
                    weights = []
                    gradients = []
                    layer_name = []
                    for index, layer in enumerate(self.net.net):
                        if 'Linear' in str(type(layer)):
                            weights.append(
                                list(layer.weight.cpu().detach().numpy().ravel(
                                )))
                            gradients.append(
                                list(layer.weight.grad.cpu().detach().numpy().
                                     ravel()))
                            layer_name.append(f'Layer {index}-{index+1}')
                    self.monitor.weight_map(layer_name, weights,
                                            'Predcitive network weights')
                    self.monitor.weight_map(layer_name, weights,
                                            'Predictive network gradients')

                    if self.is_selfaware:
                        weights = []
                        gradients = []
                        layer_name = []
                        for index, layer in enumerate(self.selfaware_net.net):
                            if 'Linear' in str(type(layer)):
                                weights.append(
                                    list(layer.weight.cpu().detach().numpy().
                                         ravel()))
                                gradients.append(
                                    list(layer.weight.grad.cpu().detach().
                                         numpy().ravel()))
                                layer_name.append(f'Layer {index}-{index+1}')
                        self.monitor.weight_map(layer_name, weights,
                                                'Awareness network weights')
                        self.monitor.weight_map(layer_name, weights,
                                                'Awareness network gradients')

                # now that we have run backward in both losses, optimize()
                # (review: we may need to optimize for each step)
                self.optimizer.step()

                if self.is_selfaware and self.start_selfaware_training:
                    self.selfaware_optimizer.step()

                error = running_loss / (i + 1)

                if CONFIG.MONITORING['batch_loss']:
                    #self.monitor.plot_loss(total_loss.item(), self.total_iterations, 'Total Batch Loss')
                    self.monitor.plot_loss(error, self.total_iterations,
                                           'Mean Total Running Loss')

            if CONFIG.MONITORING['epoch_loss']:
                self.monitor.plot_loss(error, self.total_iterations,
                                       'Train Epoch Error')
                self.monitor.plot_loss(
                    error, self.total_iterations,
                    f'Train Epoch Error - Subset {subset_id}')
            yield error
Пример #17
0
 def forward(self, x):
     with LightwoodAutocast():
         x = self.encoder(x)
         x = self.decoder(x)
     return x
Пример #18
0
    def prepare(self, priming_data, previous_target_data=None, feedback_hoop_function=None, batch_size=256):
        """
        The usual, run this on the initial training data for the encoder
        :param priming_data: a list of (self._n_dims)-dimensional time series [[dim1_data], ...]
        :param previous_target_data: tensor with encoded previous target values for autoregressive tasks
        :param feedback_hoop_function: [if you want to get feedback on the training process]
        :param batch_size
        :return:
        """
        if self._prepared:
            raise Exception('You can only call "prepare" once for a given encoder.')
        else:
            self.setup_nn(previous_target_data)

        # Convert to array and determine max length
        priming_data, lengths_data = self._prepare_raw_data(priming_data)
        self._max_ts_length = int(lengths_data.max())

        if self._normalizer:
            self._normalizer.prepare(priming_data)
            priming_data = torch.stack([self._normalizer.encode(d) for d in priming_data]).to(self.device)
        else:
            priming_data = torch.stack([d for d in priming_data]).unsqueeze(-1).to(self.device)

        # merge all normalized data into a training batch
        if previous_target_data is not None and len(previous_target_data) > 0:
            normalized_tensors = []
            for target_dict in previous_target_data:
                normalizer = target_dict['normalizer']
                self._target_ar_normalizers.append(normalizer)
                data = torch.Tensor(normalizer.encode(target_dict['data'])).to(self.device)
                data[torch.isnan(data)] = 0.0
                if len(data.shape) < 3:
                    data = data.unsqueeze(-1)  # add feature dimension
                normalized_tensors.append(data)

            normalized_data = torch.cat(normalized_tensors, dim=-1)
            priming_data = torch.cat([priming_data, normalized_data], dim=-1)

        self._encoder.train()
        for i in range(self._epochs):
            average_loss = 0

            for batch_idx in range(0, len(priming_data), batch_size):
                # setup loss and optimizer
                self._optimizer.zero_grad()
                loss = 0

                # shape: (batch_size, timesteps, n_dims)
                batch = self._get_batch(priming_data, batch_idx, min(batch_idx + batch_size, len(priming_data)))

                # encode and decode through time
                with LightwoodAutocast():
                    if self.encoder_class == TransformerEncoder:
                        # pack batch length info tensor
                        len_batch = self._get_batch(lengths_data, batch_idx, min(batch_idx + batch_size, len(priming_data)))
                        batch = batch, len_batch

                        next_tensor, hidden_state, dec_loss = self._encoder.bptt(batch, self._enc_criterion, self.device)
                        loss += dec_loss

                    else:
                        next_tensor, hidden_state, enc_loss = self._encoder.bptt(batch, self._enc_criterion, self.device)
                        loss += enc_loss

                        next_tensor, hidden_state, dec_loss = self._decoder.decode(batch, next_tensor, self._dec_criterion,
                                                                                   self.device,
                                                                                   hidden_state=hidden_state)
                        loss += dec_loss

                loss.backward()

                self._optimizer.step()
                average_loss += loss.item()

            average_loss = average_loss / len(priming_data)
            batch_idx += batch_size

            if average_loss < self._stop_on_error:
                break
            if feedback_hoop_function is not None:
                feedback_hoop_function("epoch [{epoch_n}/{total}] average_loss = {average_loss}".format(
                    epoch_n=i + 1,
                    total=self._epochs,
                    average_loss=average_loss))

        self._prepared = True
Пример #19
0
 def forward(self, input, hidden):
     with LightwoodAutocast():
         embedded = self.embedding(input).view(1, 1, -1)
         output = embedded
         output, hidden = self.gru(output, hidden)
     return output, hidden