def Train(): data_train = DataTXT_All('data/trump_full_speech.txt', sequence_length) data = DataLoader(data_train, batch_size=batch_size, shuffle=True, drop_last=True) model = RNN(len(id2lettre), latent_size, len(id2lettre)) loss = nn.CrossEntropyLoss() optim = torch.optim.Adam(model.parameters(), lr=5 * 10**-3) iterations = 15 # GPU model.to(device) loss.to(device) writer = SummaryWriter("runs/exo4/runs_sequence_length" + str(sequence_length) + "seq_pred_len" + str(sequence_pred_length) + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")) savepath = Path("seq_gen" + "seq_len" + str(sequence_length) + "seq_pred_len" + str(sequence_pred_length) + str(latent_size) + ".pch") if savepath.is_file(): with savepath.open("rb") as fp: state = torch.load(fp) else: state = State(model, optim) state.optim.lr = 5 * 10**-2 for i in range(iterations): train_loss = 0 nt = 0 for x in data: if nt % 1000 == 0: print(nt, end=', ') nt += 1 y = x.to(device) x = one_hot(x, len(id2lettre)).to(device) h = torch.zeros(batch_size, latent_size).to(device) x = x.permute(1, 0, 2).to(device) h = state.model(x, h).to(device) yhat = state.model.decoder(h).transpose(0, 1).to(device) l = loss(yhat[:, :-sequence_pred_length].flatten(0, 1), y[:, sequence_pred_length:].flatten()) state.optim.zero_grad() l.backward() state.optim.step() train_loss += l.data.to(device).item() train_loss = train_loss / nt writer.add_scalar('Loss/Train', train_loss, i) print() print('Epoch: ', i + 1, '\tError train: ', train_loss) Generate(state) with savepath.open("wb") as fp: state.epoch = i + 1 torch.save(state, fp)
def descente_gradient(self, X_train, Y_train, seq_len_train, X_test, Y_test, seq_len_test): """ X_train : dim(len_sequence, batch_size, nb_feature) contains all batchs concat following dim 0 Y_train : 1D tensor, 32 first label for the first batch, from 32 to 64 for second batch etc... seq_len_train: 1D tensor, seq_len_train[0]=25 ----> 1st batch sequence length = 25 seq_len_train[1]=5 ----> 2nd batch sequence length = 5 """ #parametre à optimiser optim = torch.optim.SGD(self.model.parameters(), lr=self.eps) #cross entropy loss loss_func = nn.CrossEntropyLoss() #checkpoint if savepath.is_file(): with savepath.open("rb") as fp: state = torch.load(fp) else: state = State(self.model, optim) rec_loss_train = [None] * epoch #record train loss rec_loss_test = [None] * epoch rec_cost = [None] * epoch for n_iter in range(state.epoch, self.epoch): cumul_loss = 0 count_batch = 0 ind = 0 for i, len_seq in enumerate(seq_len_train): #Reinitialisation du gradient state.optim.zero_grad() X_batch = X_train[ind:ind + len_seq] ind += len_seq h0 = torch.zeros(self.batch_size, self.dim_latent) H = state.model.forward(X_batch.to(device), h0.to(device)) Y_hat = state.model.decode(torch.unsqueeze(H[-1], 0)) Y_batch = Y_train[i * self.batch_size:(i + 1) * self.batch_size] loss = loss_func(Y_hat[0], Y_batch.long().to(device)) loss.backward() #Mise à jour paramétres du modéle state.optim.step() state.iteration += 1 cumul_loss += loss count_batch += 1 #vidage GPU h0.cpu() H.cpu() X_batch.cpu() Y_hat.cpu() Y_batch.cpu() with savepath.open("wb") as fp: state.epoch = state.epoch + 1 torch.save(state, fp) # on peut visualiser avec # tensorboard --logdir runs/ writer.add_scalar('Loss/train', cumul_loss / count_batch, n_iter) # Sortie directe print(f"Itérations {n_iter}: loss {cumul_loss/count_batch}") rec_loss_train[n_iter] = cumul_loss / count_batch #Evalute loss in test with torch.no_grad(): cost_0_1 = 0 cumul_loss = 0 count_batch = 0 ind = 0 for i, len_seq in enumerate(seq_len_test): X_batch = X_test[ind:ind + len_seq] ind += len_seq h0 = torch.zeros(self.batch_size, self.dim_latent) H = state.model.forward(X_batch.to(device), h0.to(device)) Y_hat_test = state.model.decode(torch.unsqueeze(H[-1], 0)) Y_batch = Y_test[i * self.batch_size:(i + 1) * self.batch_size] loss_test = loss_func(Y_hat_test[0], Y_batch.long().to(device)) #Cost 0-1 Y_hat_test = Y_hat_test.cpu() Y_batch = Y_batch.cpu() soft_m = torch.tensor(nn.functional.softmax(Y_hat_test[0])) label = torch.argmax(soft_m, dim=1) cost_0_1 += (label != Y_batch.long()).sum().item() / self.batch_size cumul_loss += loss_test count_batch += 1 #vidage GPU h0.cpu() H.cpu() X_batch.cpu() writer.add_scalar('Loss/test', cumul_loss / count_batch, n_iter) rec_loss_test[n_iter] = cumul_loss / count_batch writer.add_scalar('Pourcentage erreur classif', cost_0_1 / count_batch, n_iter) rec_cost[n_iter] = cost_0_1 / count_batch return rec_loss_train, rec_loss_test, rec_cost
def descente_gradient(self, X_train, X_test): #parametre à optimiser optim = torch.optim.Adam(self.model.parameters(), lr=self.eps) #cross entropy loss loss_func = nn.CrossEntropyLoss(reduction='sum') #checkpoint if savepath.is_file(): with savepath.open("rb") as fp: state = torch.load(fp) else: state = State(self.model, optim) rec_loss_train = [None] * epoch #record train loss rec_loss_test = [None] * epoch for n_iter in range(state.epoch, self.epoch): cumul_loss = 0 count_batch = 0 for batch in X_train: #Reinitialisation du gradient state.optim.zero_grad() try: batch = torch.stack(batch.split(100), dim=1) h0 = torch.zeros(self.batch_size, self.dim_latent) except RuntimeError: batch = torch.stack(batch.split(100)[:-1], dim=1) h0 = torch.zeros(batch.shape[1], self.dim_latent) H = state.model.forward(batch.to(device), h0.to(device)) pred = state.model.decode(H) pred = pred.permute(0, 2, 1) real = batch[1:].argmax(dim=2) loss = loss_func(pred.narrow(0, 0, len(pred) - 1), real.to(device)) loss.backward() #Mise à jour paramétres du modéle state.optim.step() state.iteration += 1 cumul_loss += loss count_batch += 1 #vidage GPU h0.cpu() H.cpu() batch.cpu() pred.cpu() real.cpu() with savepath.open("wb") as fp: state.epoch = state.epoch + 1 torch.save(state, fp) # on peut visualiser avec # tensorboard --logdir runs/ writer.add_scalar('Loss/train', cumul_loss, n_iter) # Sortie directe print(f"Itérations {n_iter}: loss {cumul_loss}") rec_loss_train[n_iter] = cumul_loss #Evalute loss in test with torch.no_grad(): cumul_loss = 0 count_batch = 0 for batch in X_test: try: batch = torch.stack(batch.split(100), dim=1) h0 = torch.zeros(self.batch_size, self.dim_latent) except RuntimeError: batch = torch.stack(batch.split(100)[:-1], dim=1) h0 = torch.zeros(batch.shape[1], self.dim_latent) H = state.model.forward( batch.narrow(0, 0, len(batch) - 1).to(device), h0.to(device)) pred = state.model.decode(torch.unsqueeze(H[-1], 0)) real = batch.argmax(dim=2) real = real[-1] loss = loss_func(pred[0], real.to(device)) #vidage GPU h0.cpu() H.cpu() batch.cpu() pred.cpu() real.cpu() cumul_loss += loss count_batch += 1 writer.add_scalar('Loss/test', cumul_loss, n_iter) rec_loss_test[n_iter] = cumul_loss return rec_loss_train, rec_loss_test