def _process(self, index): if type(index) is list: dict_list = sorted( [self._transform(s, t) for s, t in self.dataset[index]], key=lambda x: x["num_frames"], reverse=True) spectrogram = pack_sequence([d["spectrogram"] for d in dict_list]) target_attr = pad_sequence( [d["target_attr"] for d in dict_list], batch_first=True) silent_mask = pad_sequence( [d["silent_mask"] for d in dict_list], batch_first=True) return spectrogram, target_attr, silent_mask elif type(index) is int: s, t = self.dataset[index] data_dict = self._transform(s, t) return data_dict["spectrogram"], \ data_dict["target_attr"], \ data_dict["silent_mask"] else: raise ValueError("Unsupported index type({})".format(type(index)))
def pad_collate(batch): (xx, y) = zip(*batch) dev = y[0].device pack = pack_sequence(xx, enforce_sorted=False) y = torch.tensor(y).to(dev) return pack, y
def evaluate_base_lstm(hyp_params, model_path, dev_path, log_path): """ Evaluate LSTM model by calculating the accuracy over a test set. Parameters ---------- hyp_params : tuple(int) 5-tuple containing the number of words, the embedding size, the hidden size, the batch size, and the maximum sentence length of the model. model_path : str Path of file containing the weights of the model. dev_path : str Path of file containing the set on which to evaluate the model. log_path : str Path to write the results to. """ num_words, emb_size, hidden_size, batch_size, max_len = hyp_params model = EmbAttLSTM(num_words, emb_size, hidden_size, batch_size, max_len) model.load_state_dict(torch.load(model_path)) model.eval() with open(dev_path, "rb") as f: caps, objs, labels = pickle.load(f) corr = 0 total = 0 true_pos = 0 false_pos = 0 true_neg = 0 false_neg = 0 count = 0 for (cap_batch, obj_batch), label_batch in list(zip(zip(caps, objs), labels)): cap_batch.sort(key=len, reverse=True) cap_batch = rnn.pack_sequence(cap_batch) cap_batch, _ = rnn.pad_packed_sequence(cap_batch, padding_value=PAD_INDEX) cap_batch = cap_batch.unsqueeze(2) obj_batch.sort(key=len, reverse=True) obj_batch = rnn.pack_sequence(obj_batch) obj_batch, _ = rnn.pad_packed_sequence(obj_batch, padding_value=PAD_INDEX) obj_batch = obj_batch.unsqueeze(2) preds = model(cap_batch, obj_batch) for pred, label in zip(preds, label_batch): count += 1 if count % 20000 == 0 or count == 100: print("processed", count, "examples ...") pred = torch.argmax(pred) label = torch.argmax(label) total += 1 if pred == label: corr += 1 if label == 1 and pred == 1: true_pos += 1 if label == 0 and pred == 1: false_pos += 1 if label == 0 and pred == 0: true_neg += 1 if label == 1 and pred == 0: false_neg += 1 acc = corr / total prec = true_pos / (true_pos + false_pos) rec = true_pos / (true_pos + false_neg) neg_prec = true_neg / (true_neg + true_pos) neg_rec = true_neg / (true_neg + false_neg) with open(log_path, "w") as log_file: log_file.write("EmbAttModel\n") log_file.write(str(hyp_params) + "\n") log_file.write("parameter file: " + model_path + "\n") log_file.write("evaluated on: " + dev_path + "\n") log_file.write("accuracy: " + str(acc) + "\n") log_file.write("precision: " + str(prec) + "\n") log_file.write("recall: " + str(rec) + "\n") log_file.write("neg precision: " + str(neg_prec) + "\n") log_file.write("neg recall: " + str(neg_rec) + "\n")
num_batches = len(caps) print("loaded data") model = EmbAttLSTM(NUM_WORDS, EMB_SIZE, HIDDEN_SIZE, BATCH_SIZE, MAX_LEN) loss_fn = torch.nn.MSELoss(size_average=False) optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) for epoch in range(NUM_EPOCHS): i = 0 for (cap_batch, obj_batch), label_batch in list(zip(zip(caps, objs), labels)): cap_batch.sort(key=len, reverse=True) cap_batch = rnn.pack_sequence(cap_batch) #, padding_value=PAD_INDEX) cap_batch, _ = rnn.pad_packed_sequence(cap_batch, padding_value=PAD_INDEX) cap_batch = cap_batch.unsqueeze(2) obj_batch.sort(key=len, reverse=True) obj_batch = rnn.pack_sequence(obj_batch) obj_batch, _ = rnn.pad_packed_sequence(obj_batch, padding_value=PAD_INDEX) obj_batch = obj_batch.unsqueeze(2) label_batch = torch.stack(label_batch) model.zero_grad() model.reset_state()
def forward(self, packed_srcs, packed_srcs_positions, packed_tgts, packed_tgts_positions, packed_paths, packed_paths_positions, focus_num_of_paths, before_ctx_num_of_paths, after_ctx_num_of_paths): """ :param packed_srcs: PackedSequence of shape (src_length, batch_1) :param packed_srcs_positions: PackedSequence of shape (src_length, batch_1) :param packed_tgts: PackedSequence of shape (tgt_length, batch_1) :param packed_tgts_positions: PackedSequence of shape (tgt_length, batch_1) :param packed_paths: PackedSequence of shape (path_length, batch_1) :param packed_paths_positions: PackedSequence of shape (path_length, batch_1) :return: path_encoded, ctx: of shape (batch, h_dim) """ batch_size = len(focus_num_of_paths) # (num_all_paths, h_dim) encoded_path = self.path_encoder(packed_srcs, packed_srcs_positions, packed_tgts, packed_tgts_positions, packed_paths, packed_paths_positions) num_of_paths = focus_num_of_paths + before_ctx_num_of_paths + after_ctx_num_of_paths encoded_path_list = torch.split(encoded_path, num_of_paths, dim=0) encoded_focus_path_list = encoded_path_list[:batch_size] before_ctx_encoded_path_list = encoded_path_list[batch_size:2 * batch_size] after_ctx_encoded_path_list = encoded_path_list[-batch_size:] # h_list = list(map(lambda t: torch.mean(torch.cat(t, dim=0), dim=0).unsqueeze(dim=0).unsqueeze(dim=0), zip(before_ctx_encoded_path_list, encoded_focus_path_list, after_ctx_encoded_path_list))) # # # (num_layers, batch_size, h_dim) # h = torch.cat(h_list, dim=1).repeat(self.num_layers, 1, 1) packed_encoded_path = pack_sequence(encoded_focus_path_list, enforce_sorted=False) # TODO: Consider mixing the paths with Transformer before the operation_mix packed_mixed_encoded_path = self.operation_mix(packed_encoded_path) split_tokens = [self.split_token] * batch_size # real_batch_size * ((before_ctx_num_of_paths, h_dim),(1, h_dim),(after_ctx_num_of_paths, h_dim)) ctx_encoded_path_list = list( map( lambda t: torch.cat(t, dim=0), zip(before_ctx_encoded_path_list, split_tokens, after_ctx_encoded_path_list))) ctx_encoded_path_packed = pack_sequence(ctx_encoded_path_list, enforce_sorted=False) packed_encoded_ctx = ctx_encoded_path_packed if self.ctx_encoder is not None: # (real_batch_size, num_of_paths, num_directions * h_dim) packed_encoded_ctx = self.ctx_encoder(ctx_encoded_path_packed) padded_encoded_path, encoded_lengths = pad_packed_sequence( packed_encoded_path, batch_first=True) padded_encoded_ctx_path, encoded_ctx_lengths = pad_packed_sequence( packed_encoded_ctx, batch_first=True) lengths = encoded_lengths + encoded_ctx_lengths h = (padded_encoded_path.sum(dim=1) + padded_encoded_ctx_path.sum(dim=1)) / lengths.to( self.device).view(-1, 1) h = h.unsqueeze(dim=0).repeat(self.num_layers, 1, 1) return packed_mixed_encoded_path, packed_encoded_ctx, h
def input_packing(_list): tensor_list = [tensor_from_sentence(_list[i]) for i in range(len(_list))] return pack_sequence(tensor_list)
def load_sequence_data_generator(self, total_batch, tr_f, batch_size, idx_pe): with open(os.path.join(self.data_dir, 'profile2id.pkl'), 'rb') as f: profile2id = pickle.load(f) per1_users = self.tp.loc[self.tp.user_per == 1].userId.map( lambda x: profile2id[x]).unique() per2_users = self.tp.loc[self.tp.user_per == 2].userId.map( lambda x: profile2id[x]).unique() # print(per2_users) # per3_users = self.tp.loc[self.tp.user_per==3].userId.unique() if tr_f == 'train': with open(f'./data/ml-1m/tr_users.pkl', 'rb') as f: users = pickle.load(f) elif tr_f == 'valid': with open(f'./data/ml-1m/vd_users.pkl', 'rb') as f: users = pickle.load(f) f.close() with open('./data/ml-1m/vd_users_uid_fix_2.pkl', 'rb') as f: vd_uid = pickle.load(f) else: with open(f'./data/ml-1m/te_users.pkl', 'rb') as f: users = pickle.load(f) uid = [] if tr_f == 'valid': for us in users: uss = profile2id[us] if uss in vd_uid: uid.append(uss) else: for us in users: uid.append(profile2id[us]) uid.sort() ### add uid = np.array(uid) uid = uid[idx_pe] for i in range(total_batch): sequence_data_list = [] per_list = [] user_ba = uid[i * batch_size:(i + 1) * batch_size] for u in user_ba: # print(u) if u in per1_users: per = 0 elif u in per2_users: per = 1 else: per = 2 # uid = profile2id[u] with open( self.data_dir + f'/item_genomes_v2/period_{per+1}/user_{u}.pkl', 'rb') as f: sequence_data = pickle.load(f) f.close() sequence_data_list.append(sequence_data) per_list.append(per) ## order 완전 잘못 미친 order = np.argsort([len(item) for item in sequence_data_list]) item_sorted = [ torch.LongTensor(sequence_data_list[i]) for i in order[::-1] ] label_sorted = [per_list[i] for i in order[::-1]] item = pack_sequence(item_sorted) yield (order[::-1], item, torch.tensor(label_sorted, dtype=torch.long))
def post_step(self, new_observations, rewards, dones, mean_reward, mean_success): # saving metrics self.mean_reward = mean_reward self.mean_success = mean_success # collecting first action in chain if self.chain_step == 0: self.action_batch = self.last_actions # collecting rewards and dones from all states in chain self.reward_batch.extend(rewards) self.done_batch.extend(dones) # add observation in environment for i in range(self.num_envs): # add observation only to sequence if episode has not already ended if self.chain_step == 0 or self.done_batch[ (self.chain_step - 1) * self.num_envs + i] == 0: self.state_sequences[i].append(new_observations[i]) self.episode_step[i] += 1 # update counter self.chain_step += 1 self.step_count += 1 ret = 0 #check if chain completed if self.chain_step >= REWARD_STEPS: # get sequences from start of episode till first state in chain # get sequences from first state in chain till end first_state_sequences_v = [] last_state_sequences_v = [] #print(self.state_sequences[i][:(self.first_state_idx[i]+1)]) #print(self.state_sequences[i][:(self.first_state_idx[i]+1)]) for i in range(self.num_envs): first_state_sequences_v.append( torch.FloatTensor( self.state_sequences[i][:(self.first_state_idx[i] + 1)]).to(self.device)) last_state_sequences_v.append( torch.FloatTensor( self.state_sequences[i][(self.first_state_idx[i] + 1):]).to(self.device)) first_packed_sequence = pack_sequence(first_state_sequences_v, enforce_sorted=False) last_packed_sequence = pack_sequence(last_state_sequences_v, enforce_sorted=False) # clear gradients self.optimizer.zero_grad() # forward first sequences policy_v, value_v, hidden_states = self.net( first_packed_sequence, self.net.get_initial_hidden(self.device, self.num_envs)) # get expected value from last state _, last_values_v, _ = self.net(last_packed_sequence, hidden_states) # calculate total value from all steps in chain total_values = [] for e in range(self.num_envs): total_reward = 0.0 step_idx = None for i in range(REWARD_STEPS): step_idx = (REWARD_STEPS - i - 1) * self.num_envs + e total_reward *= GAMMA total_reward += self.reward_batch[step_idx] if self.done_batch[ step_idx] == 1: #stop if episode is done self.episode_count += 1 break if self.done_batch[ step_idx] == 0: # add estimated value for final state if episode is not done total_reward += self.final_gamma * last_values_v[ e].data.cpu() total_values.append(total_reward) total_values_v = torch.FloatTensor(total_values).to(self.device) # calculate value loss loss_value_v = nn.functional.mse_loss(value_v.squeeze(-1), total_values_v) # calculate policy loss log_prob_v = nn.functional.log_softmax(policy_v, dim=1) advantage_v = total_values_v - value_v.detach() actions_v = torch.LongTensor(self.action_batch).to(self.device) log_prob_actions_v = advantage_v * log_prob_v[range(self.num_envs), actions_v] loss_policy_v = -log_prob_actions_v.mean() # apply softmax and calculate entropy loss prob_v = nn.functional.softmax(policy_v, dim=1) loss_entropy_v = ENTROPY_BETA * (prob_v * log_prob_v).sum(dim=1).mean() # calculate policy gradients loss_policy_v.backward(retain_graph=True) grads = numpy.concatenate([ p.grad.data.cpu().numpy().flatten() for p in self.net.parameters() if p.grad is not None ]) # calculate entropy and value gradients loss_v = loss_entropy_v + loss_value_v loss_v.backward() nn.utils.clip_grad_norm_(self.net.parameters(), CLIP_GRAD) self.optimizer.step() # add policy loss to get total loss loss_v += loss_policy_v # save stats self.last_stats = [ ("advantage", self.tensorToFloat(advantage_v)), ("values", self.tensorToFloat(value_v)), ("batch rewards", float(numpy.mean(total_values))), ("loss entropy", self.tensorToFloat(loss_entropy_v)), ("loss policy", self.tensorToFloat(loss_policy_v)), ("loss value", self.tensorToFloat(loss_value_v)), ("loss total", self.tensorToFloat(loss_v)), ("grad l2", float(numpy.sqrt(numpy.mean(numpy.square(grads))))), ("grad max", float(numpy.max(numpy.abs(grads)))), ("grad var", float(numpy.var(grads))) ] # check best mean reward if self.mean_reward > self.best_reward and self.episode_count >= 100: self.save_model_weights(AGENT_NAME + "_best.dat") self.best_reward = self.mean_reward # clear batches self.action_batch.clear() self.reward_batch.clear() self.done_batch.clear() self.chain_step = 0 # get new hidden states new_state_sequences = [] for i in range(self.num_envs): new_state_sequences.append( torch.FloatTensor(self.state_sequences[i][:-1]).to( self.device)) new_packed_sequence = pack_sequence(new_state_sequences, enforce_sorted=False) # reset episode step counter if episode done self.last_policy, _, (self.last_h, self.last_c) = self.net( new_packed_sequence, self.net.get_initial_hidden(self.device, self.num_envs)) for i in range(self.num_envs): if dones[i] == 1: self.episode_step[i] = 0 self.state_sequences[i].clear() # environment will be reset -> reset hidden states self.last_h[0, i, :] = 0 self.last_c[0, i, :] = 0 ret = 0 # chain complete -> reset environments else: # chain still incomplete ret = -1 return ret
def LSTMCollate(samples): sequences, labels = zip(*samples) sequences = pack_sequence([torch.from_numpy(s) for s in sequences], enforce_sorted=False) labels = torch.tensor(labels, dtype=torch.float32) return sequences, labels
def forward(self, inputs): # Passing the input through the embeding model in order to retrieve the # embeddings # Setting output formatting output = { "embeddings1": None, # Context free representations "embeddings2": None, # 1-lvl context representations "embeddings3": None, # 2-lvl context representations "embeddings3_fwd": None, # gonna be used for LM "embeddings3_rev": None, # gonna be used for LM "embeddings4": None, # pos refined word embeddings "length": None # batch length } # It will be computed the output for all datasets ''' "dataset_1": None, # output for dataset1 "dataset_2": None, # output for dataset1 ... "dataset_n": None # output for dataset1 ''' output.update({dataset: None for dataset in self.dataset2id}) embeddings1, lens = self.charBILSTM(inputs) # Char BILSTM output["embeddings1"] = embeddings1.clone() # Saving output embeddings2, lens, _ = self.wordBILSTM1( (embeddings1, lens)) # 1-Word BILSTM output["embeddings2"] = embeddings2.clone() # Saving output embeddings3, lens, (rev_embeddings3, fwd_embeddings3) = self.wordBILSTM2( (embeddings2, lens)) output["embeddings3"] = embeddings3.clone() # Saving output output["embeddings3_rev"] = rev_embeddings3 # Saving output output["embeddings3_fwd"] = fwd_embeddings3 # Saving output output["length"] = max(lens) # Saving output # Sequence packing embeddings3 = rnn.pack_sequence(embeddings3, enforce_sorted=False) # Passing the embeddings through the bilstm layer(s) refined_embeddings, _ = self.tag_bilstm(embeddings3) refined_embeddings, _ = rnn.pad_packed_sequence(refined_embeddings, batch_first=True) output["embeddings4"] = refined_embeddings.clone() # Applying dropout refined_embeddings = self.dropout(refined_embeddings) # Updating view # see as: B x L x I (batch_size x length x input_size) refined_embeddings = refined_embeddings.contiguous().view( -1, output["length"], self.n_tag_bilstm_hidden * 2) # Saving final outputs # Passing through the final layer for each dataset output.update({ name: self.classifiers[idx](refined_embeddings) for idx, name in enumerate(self.dataset2id) }) return output
def _apply_encoder(self, batch: List[List[int]], encoder: Encoder): dev = self.emb.weight.device batch_t = [self.emb(torch.tensor(sample).to(dev)) for sample in batch] batch_seq = rnn_utils.pack_sequence(batch_t, enforce_sorted=False) return encoder(batch_seq)
from torch.nn.utils.rnn import pack_sequence import torch a = torch.tensor([1, 2, 3]) b = torch.tensor([4, 5]) c = torch.tensor([6]) pack_sequence([a, b, c])
def forward(self, features): # features: n, t(variable), f n = len(features) f = len(features[0][0]) ## PACKING features = rnnUtils.pack_sequence(features) h0, _ = self.rnn_0(features) seq_len0 = len(h0[1]) h0, lengths0 = rnnUtils.pad_packed_sequence(h0, batch_first=False, padding_value=0.0) lengths0 = lengths0 // 2 h0 = h0[0:lengths0[0] * 2].transpose(0, 1) h0_size = h0.size() h0 = h0.contiguous().view(h0_size[0], int(h0_size[1] / 2), h0_size[2] * 2) h0 = h0.transpose(0, 1) #print (h0.shape) h0 = rnnUtils.pack_padded_sequence(h0, lengths0, batch_first=False) h1, _ = self.rnn_1(h0) seq_len1 = len(h1[1]) h1, lengths1 = rnnUtils.pad_packed_sequence(h1, batch_first=False, padding_value=0.0) lengths1 = lengths1 // 2 h1 = h1[0:lengths1[0] * 2].transpose(0, 1) h1_size = h1.size() h1 = h1.contiguous().view(h1_size[0], int(h1_size[1] / 2), h1_size[2] * 2) h1 = h1.transpose(0, 1) #print (h1.shape) h1 = rnnUtils.pack_padded_sequence(h1, lengths1, batch_first=False) h2, _ = self.rnn_2(h1) seq_len2 = len(h2[1]) h2, lengths2 = rnnUtils.pad_packed_sequence(h2, batch_first=False, padding_value=0.0) lengths2 = lengths2 // 2 h2 = h2[0:lengths2[0] * 2].transpose(0, 1) h2_size = h2.size() h2 = h2.contiguous().view(h2_size[0], int(h2_size[1] / 2), h2_size[2] * 2) h2 = h2.transpose(0, 1) #print (h2.shape) h2 = rnnUtils.pack_padded_sequence(h2, lengths2, batch_first=False) h3, _ = self.rnn_3(h2) h3, lengths3 = rnnUtils.pad_packed_sequence(h3, batch_first=False, padding_value=0.0) if (torch.cuda.is_available()): mask = torch.zeros((h3.size()[1], h3.size()[0])).cuda() else: mask = torch.zeros((h3.size()[1], h3.size()[0])) for i in range(h3.size()[1]): mask[i][0:lengths3[i]] = torch.ones(lengths3[i]) key = self.key_layer(h3) value = self.val_layer(h3) return key, value, mask
# Hyperparams optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.8) criterion = nn.MSELoss() # just for sorting def getLen(item): return len(item) # PREPARE DATA FOR TRAINING sorted_data = sorted(training_data, key=getLen, reverse=True) prepared_data = [[seq[1:4] for seq in x] for x in sorted_data] packed_data = pack_sequence([torch.tensor(x) for x in prepared_data]) prepared_targets = [[seq[4:5] for seq in x] for x in sorted_data] max_size = len(sorted_data[0]) batches, dimensions = pad_packed_sequence(packed_data) epochs = 30 # TRAINING OR TEST if training: for k in range(epochs): for i in range(len(sorted_data)): # INPUT DATA FOR NEURONS batch = batches[:, i:i+1] input_seq = Variable(batch).float() target = Variable(torch.tensor([prepared_targets[i][0]])).float()
print('lengths:', lengths) # Make a Tensor of shape (Batch x Maximum_Sequence_Length) padded_sequence = pad_sequence(X, batch_first=True) # X is now padded sequence print(padded_sequence) print(padded_sequence.shape) # Sort by descending lengths sorted_idx = sorted(range(len(lengths)), key=lengths.__getitem__, reverse=True) sorted_X = [X[idx] for idx in sorted_idx] # Check converted result for sequence in sorted_X: print(sequence) packed_sequence = pack_sequence(sorted_X) print(packed_sequence) # one-hot embedding using PaddedSequence eye = torch.eye( len(char_set)) # Identity matrix of shape (len(char_set), len(char_set)) embedded_tensor = eye[padded_sequence] print(embedded_tensor.shape ) # shape: (Batch_size, max_sequence_length, number_of_input_tokens) # one-hot embedding using PackedSequence embedded_packed_seq = pack_sequence([eye[X[idx]] for idx in sorted_idx]) print(embedded_packed_seq.data.shape) # declare RNN rnn = torch.nn.RNN(input_size=len(char_set), hidden_size=30, batch_first=True)
def train(self): print('Started Training.\n') tr_iter = 0 val_iter = 0 best_prec = 0.0 for epoch in range(self._num_epochs): if (epoch + 1) // 3 == 0: self.adjust_learning_rate(epoch + 1) num_batches = len(self._train_dataset_loader) for batch_id, (imgT, quesT, gT) in enumerate(self._train_dataset_loader): self._model.train() # Set the model to train mode current_step = epoch * num_batches + batch_id # ============ # TODO: Run the model and get the ground truth answers that you'll pass to your optimizer # This logic should be generic; not specific to either the Simple Baseline or CoAttention. if not self.method == 'simple': quesT = rnn.pack_sequence(quesT) imgT = imgT.to(self.DEVICE) imgT = self.img_enc(imgT) imgT = imgT.view(imgT.size(0), imgT.size(1), -1) else: imgT = imgT.to(self.DEVICE) quesT, gT = quesT.to(self.DEVICE), gT.to(self.DEVICE) predicted_answer = self._model(imgT, quesT) # TODO ground_truth_answer = torch.squeeze(gT) # TODO # ============ # Optimize the model according to the predictions loss = self._optimize(predicted_answer, ground_truth_answer) if (current_step + 1) % self._log_freq == 0: print("Epoch: {}, Batch {}/{} has loss {}".format( epoch, batch_id, num_batches, loss)) # TODO: you probably want to plot something here self.writer.add_scalar('train/loss', loss.item(), tr_iter) tr_iter = tr_iter + 1 # if (current_step + 1) % self._test_freq == 0: # self._model.eval() # val_accuracy = self.validate() # print("Epoch: {} has val accuracy {}".format(epoch, val_accuracy)) # # # TODO: you probably want to plot something here # self.writer.add_scalar('valid/accuracy', val_accuracy, val_iter) # val_iter = val_iter + 1 if (epoch + 1) % self._save_freq == 0 or epoch == self._num_epochs - 1: val_accuracy = self.validate() print("Epoch: {} has val accuracy {}".format( epoch, val_accuracy)) self.writer.add_scalar('valid/accuracy', val_accuracy, val_iter) val_iter = val_iter + 1 # remember best val_accuracy and save checkpoint is_best = val_accuracy > best_prec best_prec = max(val_accuracy, best_prec) self.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': self._model.state_dict(), 'best_prec': best_prec }, #'optimizer': optimizer.state_dict()}, is_best, is_best, self.chk_dir + 'checkpoint_' + str(epoch + 1) + '.pth.tar') # Closing tensorboard logger logdir = os.path.join('./tb_', datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) if not os.path.exists(logdir): os.makedirs(logdir) self.writer.export_scalars_to_json(logdir + 'tb_summary.json') self.writer.close()
if __name__ == "__main__": import random import numpy as np import torch # Test Encoder indim, encdim = 10, 5 batchsize = 8 net = SeqEncoder(indim) print(net) xlst = [np.zeros((int((batchsize-ii)*7), indim))+ii for ii in range(batchsize)] x = pack_sequence( sorted([torch.from_numpy(x).to(dtype=torch.float32) for x in xlst], reverse=True, key=len, ) ) y = net(x) print(y.data.shape) loss = y.data.mean() loss.backward() # Test seq2seq vocabsize = 10 model = ListenAttendSpell(indim, encdim, vocabsize) print(model) feats = x chars = [torch.from_numpy(np.array([random.randrange(vocabsize) for _ in range(2*(b+1))])).long() for b in range(batchsize)] y = model(feats, chars) print([len(p) for p in y])
def fit(self, X, y): # =====DATA-PREPARATION================================================= # y numpy array values into torch tensors self.train() if not isinstance(y, torch.Tensor): y = torch.from_numpy(y.astype("float32")) y = y.to(self.device) # split into mini batches y_batches = torch.split( y, split_size_or_sections=self.training_batch_size) # Como cada tensor tem um tamanho Diferente, colocamos eles em uma # lista (que nao reclama de tamanhos diferentes em seus elementos). if not isinstance(X, torch.Tensor): lista_X = [ torch.from_numpy(i.astype("float32")).view( -1, self.input_size).to(self.device) for i in X ] else: lista_X = [i.view(-1, self.input_size) for i in X] X_batches = split_into_chunks(lista_X, self.training_batch_size) # pytorch only accepts different sizes tensors inside packed_sequences. # Then we need to convert it. aux_list = [] for i in X_batches: aux_list.append(pack_sequence(i, enforce_sorted=False)) X_batches = aux_list # =====fim-DATA-PREPARATION============================================= epochs = self.epochs best_validation_loss = 999999 if self.loss_function is None: self.loss_function = nn.MSELoss() if self.optimizer is None: self.optimizer = torch.optim.Adam(self.parameters(), lr=0.0001) f = open("loss_log.csv", "w") w = csv.writer(f) w.writerow(["epoch", "training_loss", "val_loss"]) for i in tqdm(range(epochs)): training_loss = 0 validation_loss = 0 for j, (X, y) in enumerate( zip( X_batches[:int( len(X_batches) * (1.0 - self.validation_percent))], y_batches[:int( len(y_batches) * (1.0 - self.validation_percent))])): self.optimizer.zero_grad() # Precisamos resetar o hidden state do LSTM a cada batch, ou # ocorre erro no backward(). O tamanho do batch para a cell eh # simplesmente o tamanho do batch em y ou X (tanto faz). self.hidden_cell = (torch.zeros( self.num_directions * self.n_lstm_units, y.shape[0], self.hidden_layer_size).to(self.device), torch.zeros( self.num_directions * self.n_lstm_units, y.shape[0], self.hidden_layer_size).to( self.device)) y_pred = self(X) single_loss = self.loss_function(y_pred, y) single_loss.backward() self.optimizer.step() training_loss += single_loss # Tira a media das losses. training_loss = training_loss / (j + 1) for j, (X, y) in enumerate( zip( X_batches[int( len(X_batches) * (1.0 - self.validation_percent)):], y_batches[int( len(y_batches) * (1.0 - self.validation_percent)):])): self.hidden_cell = (torch.zeros( self.num_directions * self.n_lstm_units, y.shape[0], self.hidden_layer_size).to(self.device), torch.zeros( self.num_directions * self.n_lstm_units, y.shape[0], self.hidden_layer_size).to( self.device)) y_pred = self(X) single_loss = self.loss_function(y_pred, y) validation_loss += single_loss # Tira a media das losses. validation_loss = validation_loss / (j + 1) # Checkpoint to best models found. if best_validation_loss > validation_loss: # Update the new best loss. best_validation_loss = validation_loss # torch.save(self, "{:.15f}".format(best_validation_loss) + "_checkpoint.pth") torch.save(self, "best_model.pth") print(f'\nepoch: {i:1} train_loss: {training_loss.item():10.10f}', f'val_loss: {validation_loss.item():10.10f}') w.writerow([i, training_loss.item(), validation_loss.item()]) f.flush() f.close() # At the end of training, save the final model. torch.save(self, "last_training_model.pth") self.eval() # Returns the best model found so far. return torch.load("best_model.pth")
def collateFunction(batch): # batch contains a list of tuples of structure (sequence, target) data = [item[0] for item in batch] data = rnn_utils.pack_sequence(data, enforce_sorted=False) targets = torch.LongTensor([item[1] for item in batch]) return [data, targets]
import torch import torch.nn.utils.rnn as rnn_utils import torch.nn as nn a = torch.Tensor([[1], [2], [3]]) b = torch.Tensor([[4], [5]]) c = torch.Tensor([[6]]) packed = rnn_utils.pack_sequence([a, b, c]) lstm = nn.LSTM(1, 3) packed_output, (h, c) = lstm(packed) y = rnn_utils.pad_packed_sequence(packed_output)
def my_collate(batch): # batch contains a list of tuples of structure (sequence, target) data = [item[0] for item in batch] data = pack_sequence(data, enforce_sorted=False) targets = [item[1] for item in batch] return [data, targets]
def target_packing(_list): tensor_list = [tensor_from_sentence(torch.cat((torch.zeros((1,), device=device), _list[i][:-1, 0]), 0)) for i in range(len(_list))] packed_list = pack_sequence(tensor_list) return packed_list
def compute_and_embed_previous_dirs(self, dirs, unpack_results: bool = True, point_idx=None): """ Runs the self.prev_dirs_embedding layer, if instantiated, and returns the model's output. Else, returns the data as is. Params ------ dirs: Union[List, torch.tensor], Batch all streamline directions. If it is a tensor, it should be of size [nb_points, 3]. If it is a list, length of the list is the number of streamlines in the batch. Each tensor is as described above. The batch will be packed and embedding will be ran on resulting tensor. unpack_results: bool If data was a list, unpack the model's outputs before returning. Default: True. Hint: skipping unpacking can be useful if you want to concatenate this embedding to your input's packed sequence's embedding. point_idx: int Point of the streamline for which to compute the previous dirs. """ if self.nb_previous_dirs == 0: return None else: # Formatting the n previous dirs for all points. n_prev_dirs = self.format_previous_dirs(dirs, point_idx=point_idx) if not point_idx: # Not keeping the last point: only useful to get the last # direction (ex, last target), but won't be used as an input. n_prev_dirs = [s[:-1] for s in n_prev_dirs] if self.prev_dirs_embedding is None: return n_prev_dirs else: is_list = isinstance(n_prev_dirs, list) if is_list: # Using Packed_sequence's tensor. n_prev_dirs_packed = pack_sequence(n_prev_dirs, enforce_sorted=False) n_prev_dirs = n_prev_dirs_packed.data n_prev_dirs.to(self.device) n_prev_dirs_embedded = self.prev_dirs_embedding(n_prev_dirs) if is_list and unpack_results: # Packing back to unpack correctly batch_sizes = n_prev_dirs_packed.batch_sizes sorted_indices = n_prev_dirs_packed.sorted_indices unsorted_indices = n_prev_dirs_packed.unsorted_indices n_prev_dirs_embedded_packed = \ PackedSequence(n_prev_dirs_embedded, batch_sizes, sorted_indices, unsorted_indices) n_prev_dirs_embedded = unpack_sequence( n_prev_dirs_embedded_packed) return n_prev_dirs_embedded
def forward(self, mode, features, uttname_list, source_attr, source_wav, target_attr, target_wav_list, feat_length, wav_length, records, **kwargs): """ Args: mode: string 'train', 'dev' or 'test' for this forward step features: list of unpadded features [feat1, feat2, ...] each feat is in torch.FloatTensor and already put in the device assigned by command-line args uttname_list: list of utterance names source_attr: source_attr is a dict containing the STFT information for the mixture. source_attr['magnitude'] stores the STFT magnitude, source_attr['phase'] stores the STFT phase and source_attr['stft'] stores the raw STFT feature. The shape is [bs, max_length, feat_dim] source_wav: source_wav contains the raw waveform for the mixture, and it has the shape of [bs, max_wav_length] target_attr: similar to source_attr, it contains the STFT information for individual sources. It only has two keys ('magnitude' and 'phase') target_attr['magnitude'] is a list of length n_srcs, and target_attr['magnitude'][i] has the shape [bs, max_length, feat_dim] target_wav_list: target_wav_list contains the raw waveform for the individual sources, and it is a list of length n_srcs. target_wav_list[0] has the shape [bs, max_wav_length] feat_length: length of STFT features wav_length: length of raw waveform records: defaultdict(list), by appending contents into records, these contents can be averaged and logged on Tensorboard later by self.log_records every log_step Return: loss: the loss to be optimized, should not be detached """ # match the feature length to STFT feature length features = match_length(features, feat_length) features = pack_sequence(features) mask = self.model(features) # evaluate the separation quality of predict sources if mode == 'dev' or mode == 'test': predict_stfts = [ torch.squeeze(m * source_attr['stft'].to(device)) for m in mask ] predict_stfts_np = [ np.transpose(s.data.cpu().numpy()) for s in predict_stfts ] assert len(wav_length) == 1 # reconstruct the signal using iSTFT predict_srcs_np = [ librosa.istft(stft_mat, hop_length=self.upstream_rate, win_length=self.datarc['win_length'], window=self.datarc['window'], center=self.datarc['center'], length=wav_length[0]) for stft_mat in predict_stfts_np ] predict_srcs_np = np.stack(predict_srcs_np, 0) gt_srcs_np = torch.cat(target_wav_list, 0).data.cpu().numpy() mix_np = source_wav.data.cpu().numpy() utt_metrics = get_metrics( mix_np, gt_srcs_np, predict_srcs_np, sample_rate=self.datarc['rate'], metrics_list=COMPUTE_METRICS, compute_permutation=True, ) for metric in COMPUTE_METRICS: input_metric = "input_" + metric assert metric in utt_metrics and input_metric in utt_metrics imp = utt_metrics[metric] - utt_metrics[input_metric] if metric not in records: records[metric] = [] records[metric].append(imp) assert 'batch_id' in kwargs if kwargs[ 'batch_id'] % 1000 == 0: # Save the prediction every 1000 examples records['mix'].append(mix_np) records['hypo'].append(predict_srcs_np) records['ref'].append(gt_srcs_np) records['uttname'].append(uttname_list[0]) if self.loss_type == "MSE": # mean square loss loss = self.objective.compute_loss(mask, feat_length, source_attr, target_attr) elif self.loss_type == "SISDR": # end-to-end SI-SNR loss loss = self.objective.compute_loss(mask, feat_length, source_attr, wav_length, target_wav_list) else: raise ValueError("Loss type not defined.") records["loss"].append(loss.item()) return loss
def __getitem__(self, indices): values = [self.values[i] for i in indices] return pack_sequence(sorted(values, key=lambda x: -len(x)))
def pack_words(self, ws): return pack_sequence(ws)
def collocate(self, batch): """ batch: B * [text_data: [T], mel_data: [T, C]] ----- return: text_data, text_len, text_mask, mel_data, mel_len, mel_mask text_data: [B, T], text_len: [B], text_mask: [B, 1, T] mel_data: [B, T, C], mel_len: [B], mel_mask: [B, T, T] gate: [B, T, 1] """ # sort on text size batch = sorted(batch, key=lambda x: x[0].size(0), reverse=True) text_data, mel_data = zip(*batch) text_max_len = text_data[0].size(0) mel_max_len = max([mel.size(0) for mel in mel_data]) text_pad = torch.zeros(len(text_data), text_max_len, dtype=text_data[0].dtype) for idx, text in enumerate(text_data): size = torch.randint(5, 10, [1]) pad = text[-size:] size = (text_max_len / size) + 1 text_pad[idx] = pad.repeat(size)[:text_max_len] text_pad[idx, :text.size(0)] = text text_data = rnn.pack_sequence(text_data) text_data, text_len = rnn.pad_packed_sequence(text_data, batch_first=True, padding_value=0) text_pos = torch.arange(0, text_max_len).view(1, -1) + 1 mel_data = rnn.pack_sequence(mel_data, enforce_sorted=False) mel_data, mel_len = rnn.pad_packed_sequence(mel_data, batch_first=True, padding_value=0, total_length=mel_max_len) mel_pos = torch.arange(0, mel_max_len).view(1, -1) + 1 # ----- text_mask = (text_pos > text_len.unsqueeze(1)).unsqueeze(1) mel_mask = (mel_pos > mel_len.unsqueeze(1)).unsqueeze(1) # mask_noise = torch.arange(mel_mask.size(-1)).unsqueeze(0) - (mel_len - 1).unsqueeze(-1) # mask_noise = torch.sigmoid(mask_noise.to(torch.float)) * 0.0999 + 0.0001 # mask_noise = torch.randn_like(mask_noise) * mask_noise # mel_mask = mel_mask + mask_noise.unsqueeze(1) # mel_mask = mel_mask.clamp(0, 1) gate = torch.arange( text_len[0]).unsqueeze(0) >= (text_len - 1).unsqueeze(-1) gate = gate.unsqueeze(-1) text_att_mask = torch.triu( torch.ones(text_mask.size(2), text_mask.size(2), dtype=torch.bool), 1) text_mask = text_att_mask.unsqueeze(0) text_mask = text_mask.to(torch.float) mel_mask = mel_mask.to(torch.float) gate = gate.to(torch.float) return text_data, text_pos, text_len, text_mask, mel_data, mel_pos, mel_len, mel_mask, gate, text_pad
def __init__(self, dataset, sizes, gts, device): self.pack = pack_sequence(dataset, enforce_sorted=False) self.pack = self.pack.to(device) self.gts = gts.to(device)
def get_packed_sequence(self, sentences: list): seqs = [ torch.LongTensor([self.word2idx[word] for word in s]) for s in sentences ] return pack_sequence(seqs).to(self.device)
def eval(b): with torch.no_grad(): out_pack, (ht, ct) = lstm(tokens_emb_pack) out_pad, out_pad_len = pad_packed_sequence(out_pack, batch_first=True) y_scores = torch.sum(torch.mul(out_pad.unsqueeze(2), cand_pads), -1) if not os.path.exists(BERT_EMBS): print('Creating BERT Embeddings.') start = time.time() bert_embs = [] for b in batch(data): tokens, cand_tokens, labels = b tokens_pack = pack_sequence([t.to(device) for t in tokens], enforce_sorted=False) cand_packs = [pack_sequence([c.to(device) for c in ct], enforce_sorted=False) for ct in cand_tokens] # only works if you pad with 0 tokens_mask = (tokens_pack.data > pad_idx).int() cand_masks = [(cp.data > pad_idx).int() for cp in cand_packs] with torch.no_grad(): tokens_emb = embed(tokens_pack.data, tokens_mask) cand_embs = [embed(cp.data, mask) for cp, mask in zip(cand_packs, cand_masks)] tokens_emb_pack = PackedSequence(tokens_emb, tokens_pack.batch_sizes, tokens_pack.sorted_indices, tokens_pack.unsorted_indices) cand_emb_packs = [PackedSequence(cand_embs[i], cp.batch_sizes, cp.sorted_indices, cp.unsorted_indices) for i, cp in enumerate(cand_packs)] cand_pads = pad_sequence([pad_packed_sequence(ce, batch_first=True)[0] for ce in cand_emb_packs], batch_first=True, padding_value=pad_idx) bert_embs.append((tokens_emb_pack.to(cpu), cand_pads.to(cpu), labels))
''' if not is_train: x = torch.unsqueeze(x, 0) # B x T x F -> B x T x hidden x, _ = self.blstm(x) if is_train: x,_ = pad_packed_sequence(x,batch_first=True) x = self.dropout(x) # B x T x hidden -> B x T x FD x = self.linear(x) x = self.activation(x) B = x.shape[0] if is_train: # B x TF x D x = x.view(B,-1,self.D) else: # B x TF x D -> TF x D x = x.view(-1, self.D) return x if __name__ == "__main__": device = torch.device('cuda:0') a = torch.randn((11,129)) #b = torch.randn((22,129)) #c = torch.randn((33,129)) train = pack_sequence([a]).to(device) net = DPCL().to(device) x = net(train)