def _train_model(self, images): """ :param images: List of images paths """ data_source = self._transform_images(images) self.model = autoencoder().cpu() criterion = nn.MSELoss() optimizer = torch.optim.Adam(self.model.parameters(), lr=learning_rate, weight_decay=1e-5) for epoch in range(1, num_epochs + 1): for i, data in enumerate(data_source): img = data img = img.view(img.size(0), -1) img = Variable(img).cpu() # ===================forward===================== with LightwoodAutocast(): output = self.model(img) loss = criterion(output, img) # ===================backward==================== optimizer.zero_grad() loss.backward() optimizer.step() # ===================log======================== print('epoch [{}/{}], loss:{:.4f}'.format( epoch + 1, num_epochs, loss.data)) if epoch % num_epochs == 0: pic = to_img(output.cpu().data[0:-2]) if not os.path.exists('./mlp_img'): os.mkdir('./mlp_img') save_image(pic, './mlp_img/image_{}.png'.format(i)) shutil.rmtree('./mlp_img') torch.save(self.model.state_dict(), './sim_autoencoder.pth')
def forward(self, input, hidden): with LightwoodAutocast(): output = self.in_activation(input.float()) output, hidden = self.gru(output, hidden) output = self.dropout(output) output = self.out(output) return output, hidden
def forward(self, input, hidden): with LightwoodAutocast(): output = self.embedding(input).view(1, 1, -1) output = F.relu(output) output, hidden = self.gru(output, hidden) output = self.softmax(self.out(output[0])) return output, hidden
def forward(self, input): """ Forward pass of the function. The goal is to generate values such as if they weights of the linear operation are sampled from a normal distribution """ with LightwoodAutocast(): return F.linear(input, self.w_sampler(), self.bias)
def forward(self, true_input, main_net_output): """ :param true_input: tensor with data point features :param main_net_output: tensor with main NN prediction for true_input :return: predicted loss value over the tensor samples """ with LightwoodAutocast(): aware_in = torch.cat((true_input, main_net_output), 1) output = self.net(aware_in) return output
def forward(self, input): """ In this particular model, we just need to forward the network defined in setup, with our input :param input: a pytorch tensor with the input data of a batch :return: output of the network """ with LightwoodAutocast(): output = self._foward_net(input) return output
def forward(self, src, lengths, device): with LightwoodAutocast(): if self.src_mask is None or self.src_mask.size(0) != src.size(0): # Attention mask to avoid attending to upcoming parts of the sequence self.src_mask = self._generate_square_subsequent_mask( src.size(0)).to(device) src = self.src_linear(src) # src = self.pos_encoder(src) # not sure if this is helpful in time series # The lengths_mask has to be of size [batch, lengths] lengths_mask = len_to_mask(lengths, zeros=True).to(device) hidden = self.transformer_encoder( src, mask=self.src_mask, src_key_padding_mask=lengths_mask) output = self.src_decoder(hidden) return output, hidden
def categorical_train_function(model, data, gym, test=False): input, real = data input = input.to(gym.device) labels = torch.tensor([torch.argmax(x) for x in real]).to(gym.device) with LightwoodAutocast(): outputs = gym.model(input, labels=labels) loss, logits = outputs[:2] if not test: loss.backward() gym.optimizer.step() gym.scheduler.step() gym.optimizer.zero_grad() return loss
def forward(self, input, hidden, encoder_outputs): with LightwoodAutocast(): embedded = self.embedding(input).view(1, 1, -1) embedded = self.dropout(embedded) attn_weights = F.softmax(self.attn( torch.cat((embedded[0], hidden[0]), 1)), dim=1) attn_applied = torch.bmm(attn_weights.unsqueeze(0), encoder_outputs.unsqueeze(0)) output = torch.cat((embedded[0], attn_applied[0]), 1) output = self.attn_combine(output).unsqueeze(0) output = F.relu(output) output, hidden = self.gru(output, hidden) output = F.log_softmax(self.out(output[0]), dim=1) return output, hidden, attn_weights
def numerical_train_function(model, data, gym, backbone, test=False): input, real = data backbone = backbone.eval() with torch.no_grad(): input = input.to(gym.device) real = real.to(gym.device) embeddings = backbone(input)[0][:, 0, :] with LightwoodAutocast(): outputs = gym.model(embeddings) loss = gym.loss_criterion(outputs, real) if not test: loss.backward() gym.optimizer.step() gym.scheduler.step() gym.optimizer.zero_grad() return loss
def train(input_tensor, target_tensor, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion, max_length=MAX_LENGTH): encoder_hidden = encoder.initHidden() encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() input_length = input_tensor.size(0) target_length = target_tensor.size(0) encoder_outputs = torch.zeros(max_length, encoder.hidden_size, device=device) loss = 0 with LightwoodAutocast(): for ei in range(min(input_length, len(encoder_outputs))): encoder_output, encoder_hidden = encoder(input_tensor[ei], encoder_hidden) encoder_outputs[ei] = encoder_output[0, 0] decoder_input = torch.tensor([[SOS_token]], device=device) decoder_hidden = encoder_hidden use_teacher_forcing = True if random.random( ) < teacher_forcing_ratio else False if use_teacher_forcing: # Teacher forcing: Feed the target as the next input for di in range(target_length): if isinstance(decoder, AttnDecoderRNN): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) else: decoder_output, decoder_hidden = decoder( decoder_input, decoder_hidden) loss += criterion(decoder_output, target_tensor[di]) decoder_input = target_tensor[di] # Teacher forcing else: # Without teacher forcing: use its own predictions as the next input for di in range(target_length): if isinstance(decoder, AttnDecoderRNN): decoder_output, decoder_hidden, decoder_attention = decoder( decoder_input, decoder_hidden, encoder_outputs) else: decoder_output, decoder_hidden = decoder( decoder_input, decoder_hidden) topv, topi = decoder_output.topk(1) decoder_input = topi.squeeze().detach( ) # detach from history as input loss += criterion(decoder_output, target_tensor[di]) if decoder_input.item() == EOS_token: break loss.backward() encoder_optimizer.step() decoder_optimizer.step() return loss.item() / target_length
def forward(self, x): with LightwoodAutocast(): x = x + self.pe[:x.size(0), :] return self.dropout(x)
def fit(self, train_data_loader, test_data_loader, desired_error, max_time, callback, eval_every_x_epochs=1, max_unimproving_models=10, custom_train_func=None, custom_test_func=None): started = time.time() epoch = 0 lowest_test_error = None last_test_error = None test_error_delta_buff = [] keep_training = True while keep_training: epoch += 1 running_loss = 0.0 error = 0 self.model = self.model.train() for i, data in enumerate(train_data_loader, 0): if custom_train_func is None: input, real = data with LightwoodAutocast(): if self.input_encoder is not None: input = self.input_encoder(input) if self.output_encoder is not None: real = self.output_encoder(real) input = input.to(self.device) real = real.to(self.device) predicted = self.model(input) loss = self.loss_criterion(predicted, real) loss.backward() self.optimizer.step() if self.scheduler is not None: self.scheduler.step() self.optimizer.zero_grad() else: loss = custom_train_func(self.model, data, self) running_loss += loss.item() error = running_loss / (i + 1) if epoch % eval_every_x_epochs == 0: if test_data_loader is not None: test_running_loss = 0.0 test_error = 0 self.model = self.model.eval() real_buff = [] predicted_buff = [] for i, data in enumerate(test_data_loader, 0): if custom_test_func is None: input, real = data if self.input_encoder is not None: input = self.input_encoder(input) if self.output_encoder is not None: real = self.output_encoder(real) input = input.to(self.device) real = real.to(self.device) with torch.no_grad(): predicted = self.model(input) real_buff.append(real.tolist()) predicted_buff.append(predicted.tolist()) loss = self.loss_criterion(predicted, real) else: with torch.no_grad(): loss = custom_test_func(self.model, data, self) test_running_loss += loss.item() test_error = test_running_loss / (i + 1) else: test_error = error real_buff = None predicted_buff = None if lowest_test_error is None or test_error < lowest_test_error: lowest_test_error = test_error self.best_model = copy.deepcopy(self.model).to('cpu') if last_test_error is None: test_error_delta_buff.append(0) else: test_error_delta_buff.append(last_test_error - test_error) last_test_error = test_error if (time.time() - started) > max_time: keep_training = False if lowest_test_error < desired_error: keep_training = False if len(test_error_delta_buff) >= max_unimproving_models: delta_mean = np.mean(test_error_delta_buff[-max_unimproving_models:]) if delta_mean <= 0: keep_training = False callback(test_error, real_buff, predicted_buff) return self.best_model, lowest_test_error, int(time.time() - started)
def forward(self, input, hidden): with LightwoodAutocast(): output, hidden = self.gru(input, hidden) output = self.dropout(output) output = self.out(output) return output, hidden
def forward(self, preds, target): with LightwoodAutocast(): cat_labels = target.max(1).indices return self.cross_entropy_loss(preds, cat_labels)
def _iter_fit(self, ds, subset_id=None, max_epochs=120000): if self._nonpersistent['sampler'] is None: data_loader = DataLoader(ds, batch_size=self.batch_size, shuffle=True, num_workers=0) else: data_loader = DataLoader(ds, batch_size=self.batch_size, num_workers=0, sampler=self._nonpersistent['sampler']) for epoch in range(max_epochs): # loop over the dataset multiple times running_loss = 0.0 error = 0 for i, data in enumerate(data_loader, 0): if self.start_selfaware_training and not self.is_selfaware: log.info( 'Starting to train selfaware network for better confidence determination !' ) self.is_selfaware = True if self.stop_selfaware_training and self.is_selfaware: log.info( 'Cannot train selfaware network, will fallback to using simpler confidence models !' ) self.is_selfaware = False self.total_iterations += 1 # get the inputs; data is a list of [inputs, labels] inputs, labels = data labels = labels.to(self.net.device) inputs = inputs.to(self.net.device) # zero the parameter gradients self.optimizer.zero_grad() self.selfaware_optimizer.zero_grad() # forward + backward + optimize with LightwoodAutocast(): outputs = self.net(inputs) if self.is_selfaware: with LightwoodAutocast(): awareness = self.selfaware_net(inputs.detach(), outputs.detach()) loss = None for k, criterion in enumerate(self.criterion_arr): with LightwoodAutocast(): target_loss = criterion( outputs[:, ds.out_indexes[k][0]:ds.out_indexes[k][1]], labels[:, ds.out_indexes[k][0]:ds.out_indexes[k][1]]) if loss is None: loss = target_loss else: loss += target_loss awareness_loss = None if self.is_selfaware: unreduced_losses = [] for k, criterion in enumerate( self.unreduced_criterion_arr): target_loss = criterion( outputs[:, ds.out_indexes[k][0]:ds.out_indexes[k][1]], labels[:, ds.out_indexes[k][0]:ds.out_indexes[k][1]]) target_loss = target_loss.tolist() if type(target_loss[0]) == type([]): target_loss = [np.mean(x) for x in target_loss] for i, value in enumerate(target_loss): if len(unreduced_losses) <= i: unreduced_losses.append([]) unreduced_losses[i].append(value) unreduced_losses = torch.Tensor(unreduced_losses).to( self.net.device) awareness_loss = self.awareness_criterion( awareness, unreduced_losses) * self.awareness_scale_factor if CONFIG.MONITORING['batch_loss']: self.monitor.plot_loss(awareness_loss.item(), self.total_iterations, 'Awreness Batch Loss') if CONFIG.MONITORING['batch_loss']: self.monitor.plot_loss(loss.item(), self.total_iterations, 'Targets Batch Loss') if awareness_loss is not None: awareness_loss.backward(retain_graph=True) running_loss += loss.item() loss.backward() # @NOTE: Decrease 900 if you want to plot gradients more often, I find it's too expensive to do so if CONFIG.MONITORING['network_heatmap'] and random.randint( 0, 1000) > 900: weights = [] gradients = [] layer_name = [] for index, layer in enumerate(self.net.net): if 'Linear' in str(type(layer)): weights.append( list(layer.weight.cpu().detach().numpy().ravel( ))) gradients.append( list(layer.weight.grad.cpu().detach().numpy(). ravel())) layer_name.append(f'Layer {index}-{index+1}') self.monitor.weight_map(layer_name, weights, 'Predcitive network weights') self.monitor.weight_map(layer_name, weights, 'Predictive network gradients') if self.is_selfaware: weights = [] gradients = [] layer_name = [] for index, layer in enumerate(self.selfaware_net.net): if 'Linear' in str(type(layer)): weights.append( list(layer.weight.cpu().detach().numpy(). ravel())) gradients.append( list(layer.weight.grad.cpu().detach(). numpy().ravel())) layer_name.append(f'Layer {index}-{index+1}') self.monitor.weight_map(layer_name, weights, 'Awareness network weights') self.monitor.weight_map(layer_name, weights, 'Awareness network gradients') # now that we have run backward in both losses, optimize() # (review: we may need to optimize for each step) self.optimizer.step() if self.is_selfaware and self.start_selfaware_training: self.selfaware_optimizer.step() error = running_loss / (i + 1) if CONFIG.MONITORING['batch_loss']: #self.monitor.plot_loss(total_loss.item(), self.total_iterations, 'Total Batch Loss') self.monitor.plot_loss(error, self.total_iterations, 'Mean Total Running Loss') if CONFIG.MONITORING['epoch_loss']: self.monitor.plot_loss(error, self.total_iterations, 'Train Epoch Error') self.monitor.plot_loss( error, self.total_iterations, f'Train Epoch Error - Subset {subset_id}') yield error
def forward(self, x): with LightwoodAutocast(): x = self.encoder(x) x = self.decoder(x) return x
def prepare(self, priming_data, previous_target_data=None, feedback_hoop_function=None, batch_size=256): """ The usual, run this on the initial training data for the encoder :param priming_data: a list of (self._n_dims)-dimensional time series [[dim1_data], ...] :param previous_target_data: tensor with encoded previous target values for autoregressive tasks :param feedback_hoop_function: [if you want to get feedback on the training process] :param batch_size :return: """ if self._prepared: raise Exception('You can only call "prepare" once for a given encoder.') else: self.setup_nn(previous_target_data) # Convert to array and determine max length priming_data, lengths_data = self._prepare_raw_data(priming_data) self._max_ts_length = int(lengths_data.max()) if self._normalizer: self._normalizer.prepare(priming_data) priming_data = torch.stack([self._normalizer.encode(d) for d in priming_data]).to(self.device) else: priming_data = torch.stack([d for d in priming_data]).unsqueeze(-1).to(self.device) # merge all normalized data into a training batch if previous_target_data is not None and len(previous_target_data) > 0: normalized_tensors = [] for target_dict in previous_target_data: normalizer = target_dict['normalizer'] self._target_ar_normalizers.append(normalizer) data = torch.Tensor(normalizer.encode(target_dict['data'])).to(self.device) data[torch.isnan(data)] = 0.0 if len(data.shape) < 3: data = data.unsqueeze(-1) # add feature dimension normalized_tensors.append(data) normalized_data = torch.cat(normalized_tensors, dim=-1) priming_data = torch.cat([priming_data, normalized_data], dim=-1) self._encoder.train() for i in range(self._epochs): average_loss = 0 for batch_idx in range(0, len(priming_data), batch_size): # setup loss and optimizer self._optimizer.zero_grad() loss = 0 # shape: (batch_size, timesteps, n_dims) batch = self._get_batch(priming_data, batch_idx, min(batch_idx + batch_size, len(priming_data))) # encode and decode through time with LightwoodAutocast(): if self.encoder_class == TransformerEncoder: # pack batch length info tensor len_batch = self._get_batch(lengths_data, batch_idx, min(batch_idx + batch_size, len(priming_data))) batch = batch, len_batch next_tensor, hidden_state, dec_loss = self._encoder.bptt(batch, self._enc_criterion, self.device) loss += dec_loss else: next_tensor, hidden_state, enc_loss = self._encoder.bptt(batch, self._enc_criterion, self.device) loss += enc_loss next_tensor, hidden_state, dec_loss = self._decoder.decode(batch, next_tensor, self._dec_criterion, self.device, hidden_state=hidden_state) loss += dec_loss loss.backward() self._optimizer.step() average_loss += loss.item() average_loss = average_loss / len(priming_data) batch_idx += batch_size if average_loss < self._stop_on_error: break if feedback_hoop_function is not None: feedback_hoop_function("epoch [{epoch_n}/{total}] average_loss = {average_loss}".format( epoch_n=i + 1, total=self._epochs, average_loss=average_loss)) self._prepared = True
def forward(self, input, hidden): with LightwoodAutocast(): embedded = self.embedding(input).view(1, 1, -1) output = embedded output, hidden = self.gru(output, hidden) return output, hidden