def predict_one(self, model, datum, vocab): """ :param model: :param datum: a (lemma chars) :param vocab: :return: """ lemma, feats = datum x_enc_inputs = BaseMorphData.lemma_chars_to_ids(lemma, vocab) x_feature_ids = BaseMorphData.feat_d_to_ids(feats, vocab) # TODO: this works only with batch=1; consider batching and transposition instead of viewing x_enc_inputs_v = cuda_if_gpu( Variable(torch.LongTensor(x_enc_inputs)).view(-1, 1)) x_feature_ids_v = cuda_if_gpu( Variable(torch.LongTensor(x_feature_ids)).view(-1, 1)) pred_form_char_ids = model.predict_one_instance( (x_enc_inputs_v, x_feature_ids_v)) pred_form_chars = BaseMorphData.unvectorize_seq( pred_form_char_ids, vocab) pred_form = ''.join([ ch for ch in pred_form_chars if not ch in MORPH_OUTPUT_CLASSES_START ]) return pred_form
def predict(self, input_var): enc_inputs_v, feature_ids_v = input_var src_len = enc_inputs_v.size()[0] dec_len = self.max_tgt_len batch_size = enc_inputs_v.size()[1] batch_indices = cuda_if_gpu(torch.arange(0, batch_size).long()) # Embedding lookup - chars char_embeddings = self.embedding_lookup(enc_inputs_v) # SL x B x E # Encode encoder_outputs, encoder_hidden = self.encoder(char_embeddings, batch_size=batch_size) if self.encoder.rnn.bidirectional: # squeeze the outputs of a BiRNN encoder # outputs: # SL x B x enc_dim # hidden: # 1 x B x enc_dim encoder_outputs = sum_bidirectional_outputs( encoder_outputs, self.encoder.rnn) encoder_hidden = sum_bidirectional_state(encoder_hidden) # Embedding lookup - features # 1 x self.num_embedding_feats * self.embedding_dim feat_embeddings = self.embedding_lookup(feature_ids_v).view( batch_size, -1) feat_embeddings = F.relu(self.feat_proj_layer(feat_embeddings)) # Initialize decoder # 1 x B x enc_dim dec_hidden = encoder_hidden[:1] # 1 x B x enc_dim # Decode prev_y_id = self.BOS_ID prev_y_var = cuda_if_gpu( Variable(torch.LongTensor([prev_y_id] * batch_size))) dec_ids = torch.zeros(dec_len, batch_size) # attn_ids_v = cuda_if_gpu(torch.zeros(batch_size).long()) for di in range(dec_len): prev_y_embeddings = self.embedding_lookup(prev_y_var) # prev_y_embeddings_drop = self.dropout(prev_y) # 1 x B x dec_input_dim dec_input = torch.cat((prev_y_embeddings, feat_embeddings), 1) dec_output, dec_hidden, attn_weights = self.decoder( dec_input, dec_hidden, encoder_outputs) unnormalized_scores = self.output_layer(dec_output[0]) logits = self.softmax(unnormalized_scores) topval, topids = logits.data.topk(1) prev_y_ids = topids.squeeze(1) # B # bookeeping dec_ids[di] = prev_y_ids # attn_w.append(attn_weights.data) prev_y_var = cuda_if_gpu(Variable(prev_y_ids)) return dec_ids
def predict_one_instance(self, input_var): enc_inputs_v, feature_ids_v = input_var src_len = enc_inputs_v.size()[0] # Embedding lookup - chars char_embeddings = self.embedding_lookup(enc_inputs_v) # SL x B x E # Encode # SL x B x 2*H, num_layers * num_directions x B x H enc_outputs, enc_hidden, enc_cell = self.encoder( rnn_input=char_embeddings, batch_size=1) # Embedding lookup - features # 1 x self.num_embedding_feats * self.embedding_dim feat_embeddings = self.embedding_lookup(feature_ids_v).view(1, -1) feat_embeddings = F.relu(self.feat_proj_layer(feat_embeddings)) # Initialize decoder # 1 x B x enc_dim dec_hidden = sum_bidirectional_state(enc_hidden) dec_cell = sum_bidirectional_state(enc_cell) # Decode dec_ids = [] prev_y_id = BOS_ID prev_y_var = cuda_if_gpu(Variable(torch.LongTensor([prev_y_id]))) total_decoded_len = 0 attn_idx = 0 while (prev_y_id != EOS_ID and total_decoded_len < self.max_tgt_len * 2): prev_y_embeddings = self.embedding_mat(prev_y_var) # prev_y_embeddings_drop = self.dropout(prev_y) attended_encoder_output = enc_outputs[attn_idx] # 1 x B x dec_input_dim dec_input = torch.cat( (prev_y_embeddings, feat_embeddings, attended_encoder_output), 1).unsqueeze(0) dec_output, dec_hidden, dec_cell = self.decoder( dec_input, dec_hidden, dec_cell) unnormalized_scores = self.output_layer(dec_output[0]) logits = self.softmax(unnormalized_scores) topval, topidx = logits.data.topk(1) prev_y_id = topidx.data.cpu().numpy()[0][0] total_decoded_len += 1 # store result dec_ids.append(prev_y_id) prev_y_var = cuda_if_gpu(Variable(torch.LongTensor([prev_y_id]))) # check if step or char output to promote i. if prev_y_id == STEP_ID and attn_idx < src_len - 1: attn_idx += 1 return dec_ids
def predict_one_instance(self, input_var): enc_inputs_v, feature_ids_v = input_var src_len = enc_inputs_v.size()[0] # Embedding lookup - chars char_embeddings = self.embedding_lookup(enc_inputs_v) # SL x B x E # Encode # SL x B x 2*H, num_layers * num_directions x B x H encoder_outputs, encoder_hidden = self.encoder(char_embeddings, batch_size=1) if self.encoder.rnn.bidirectional: # squeeze the outputs of a BiRNN encoder # outputs: # SL x B x enc_dim # hidden: # 1 x B x enc_dim encoder_outputs = sum_bidirectional_outputs( encoder_outputs, self.encoder.rnn) encoder_hidden = sum_bidirectional_state(encoder_hidden) # Embedding lookup - features # 1 x self.num_embedding_feats * self.embedding_dim feat_embeddings = self.embedding_lookup(feature_ids_v).view(1, -1) feat_embeddings = F.relu(self.feat_proj_layer(feat_embeddings)) # Initialize decoder # 1 x B x enc_dim dec_hidden = encoder_hidden[:1] # 1 x B x enc_dim # Decode dec_ids = [] prev_y_id = self.BOS_ID prev_y_var = cuda_if_gpu(Variable(torch.LongTensor([prev_y_id]))) total_decoded_len = 0 while (prev_y_id != self.EOS_ID and total_decoded_len < self.max_tgt_len): prev_y_embeddings = self.embedding_mat(prev_y_var) # prev_y_embeddings_drop = self.dropout(prev_y) # 1 x B x dec_input_dim dec_input = torch.cat((prev_y_embeddings, feat_embeddings), 1) dec_output, dec_hidden, attn_weights = self.decoder( dec_input, dec_hidden, encoder_outputs) unnormalized_scores = self.output_layer(dec_output[0]) logits = self.softmax(unnormalized_scores) topval, topidx = logits.data.topk(1) prev_y_id = topidx[0][0] total_decoded_len += 1 # store result dec_ids.append(prev_y_id) prev_y_var = cuda_if_gpu(Variable(torch.LongTensor([prev_y_id]))) return dec_ids
def train_step(self, model, np_xy_mat_pair): x_var = cuda_if_gpu(Variable(torch.from_numpy(np_xy_mat_pair[0]))) y_var = cuda_if_gpu( Variable( torch.from_numpy(np_xy_mat_pair[1]).type( torch.FloatTensor))) # have to cast to float logits = model.forward(x_var) # loss_var = self.calc_loss(logits, y_var) return loss_var
def decode_teacher(self, encoder_hidden, encoder_outputs, batch_x_feat_var, batch_y_var): """ Decoding policy 1: feeding the ground truth label as a target :param dec_input_var: ground truth labels :param encoder_hidden: the last hidden state of the Encoder RNN; (num_layers * num_directions) x B x enc_dim :param encoder_outputs: SL x B x enc_dim :return: """ dec_len = batch_y_var.size()[0] batch_size = batch_y_var.size()[1] # 1 x self.num_embedding_feats * self.embedding_dim feat_embeddings = self.embedding_lookup(batch_x_feat_var).view( batch_size, -1) feat_embeddings = F.relu(self.feat_proj_layer(feat_embeddings)) # 1 x B x enc_dim dec_hidden = encoder_hidden[: 1] # (num_layers * num_directions) x B x enc_dim # 1 x B prev_y = cuda_if_gpu( Variable(torch.LongTensor([self.BOS_ID] * batch_size))) # TL x B x Output_size predicted_logits = cuda_if_gpu( Variable(torch.zeros(dec_len, batch_size, self.output_size))) # Teacher forcing: feed the target as the next input for di in range(dec_len): # embedding lookup of a vector of length = B; result: B x E prev_y_embeddings = self.embedding_mat(prev_y) # prev_y = self.dropout(prev_y) # apply dropout # B x dec_input_dim + num_feats * embed_dim dec_input = torch.cat((prev_y_embeddings, feat_embeddings), 1) # 1 x B x dec_output_dim, # 1 x B x dec_output_dim, B x SL dec_output, dec_hidden, attn_weights = self.decoder( dec_input, dec_hidden, encoder_outputs, ) unnormalized_scores = self.output_layer(dec_output[0]) logits = self.softmax(unnormalized_scores) predicted_logits[di] = logits # store this step's output logit prev_y = batch_y_var[di] # next input return predicted_logits
def init_rnn_hidden(rnn, num_directions, batch_size): hidden_state = Variable( torch.zeros(rnn.num_layers * num_directions, batch_size, rnn.hidden_size)) return cuda_if_gpu(hidden_state)
def init_gru_state(num_directions, enc_or_dec, batch_size): hidden = Variable( torch.zeros(enc_or_dec.num_layers * num_directions, batch_size, enc_or_dec.hidden_size)) return cuda_if_gpu(hidden)
def decode_teacher(self, encoder_hidden, encoder_cell, encoder_outputs, attn_ids_v, feat_ids, dec_targets): """ Decoding policy 1: feeding the ground truth label as a target """ dec_len = dec_targets.size()[0] batch_size = dec_targets.size()[1] batch_indices = cuda_if_gpu(torch.arange(0, batch_size).long()) # 1 x self.num_embedding_feats * self.embedding_dim feat_embeddings = self.embedding_lookup(feat_ids).view(batch_size, -1) feat_embeddings = F.relu(self.feat_proj_layer(feat_embeddings)) # 1 x B x enc_dim dec_hidden = sum_bidirectional_state(encoder_hidden) dec_cell = sum_bidirectional_state(encoder_cell) # 1 x B prev_y = cuda_if_gpu(Variable(torch.LongTensor([BOS_ID] * batch_size))) # TL x B x Output_size predicted_logits = cuda_if_gpu( Variable(torch.zeros(dec_len, batch_size, self.output_size))) # Teacher forcing: feed the target as the next input for di in range(dec_len): # embedding lookup of a vector of length = B; result: B x E prev_y_embeddings = self.embedding_mat(prev_y) # prev_y = self.dropout(prev_y) # apply dropout attended_encoder_output = encoder_outputs[attn_ids_v[di], batch_indices] # 1 x B x dec_input_dim dec_input = torch.cat( (prev_y_embeddings, feat_embeddings, attended_encoder_output), 1).unsqueeze(0) dec_output, dec_hidden, dec_cell = self.decoder( dec_input, dec_hidden, dec_cell) unnormalized_scores = self.output_layer(dec_output[0]) logits = self.softmax(unnormalized_scores) predicted_logits[di] = logits # store this step's output logit prev_y = dec_targets[di] # next input return predicted_logits
def make_binary_prediction(self, x, model): logit = model(cuda_if_gpu(Variable(torch.from_numpy(x)))) logit_val = logit.cpu().data[0].numpy() if logit_val >= 0.5: decision = RIGHT else: decision = LEFT return decision
def batchify_training_data(self, xy_ids, batch_size, is_dev_data): logger.debug('Batchifying data') x_data_ids, y_data_ids, forms = xy_ids data_size = len(x_data_ids) num_batches = data_size // batch_size data_indices = self.index_data(data_size, mode='no_shuffling') batch_pairs = [] for bi in range(num_batches + 1): batch_x = [] batch_y = [] curr_batch_indices = data_indices[bi * batch_size:(bi + 1) * batch_size] for idx in curr_batch_indices: x_ids = x_data_ids[idx] y_ids = y_data_ids[idx] x_enc_ids_copy = copy.deepcopy(x_ids) batch_x.append(x_enc_ids_copy) y_ids_copy = copy.deepcopy(y_ids) batch_y.append(y_ids_copy) batch_enc_x_var = cuda_if_gpu(Variable(torch.LongTensor(batch_x))) batch_dec_y_var = cuda_if_gpu(Variable(torch.LongTensor(batch_y))) batch_pairs.append((batch_enc_x_var, batch_dec_y_var)) if is_dev_data: self.dev_references = forms logger.info('Saving dev (training) references to --> %s', self.fnames.dev_ref_fn) save_txt(self.dev_references, self.fnames.dev_ref_fn) assert data_size == len(forms) return batch_pairs
def make_one_batch(self, sorted_data, data_indices, bi, batch_size): batch_x_enc = [] batch_x_feat = [] batch_y = [] batch_forms = [] curr_batch_indices = data_indices[bi * batch_size: (bi + 1) * batch_size] for idx in curr_batch_indices: x_enc_ids, x_feature_ids, y_ids, forms = sorted_data[idx] x_enc_ids_copy = copy.deepcopy(x_enc_ids) batch_x_enc.append(x_enc_ids_copy) x_feature_ids_copy = copy.deepcopy(x_feature_ids) batch_x_feat.append(x_feature_ids_copy) y_ids_copy = copy.deepcopy(y_ids) batch_y.append(y_ids_copy) batch_forms.append(forms) # skipping features, since the num does not change batch_x_enc_lens = [len(s) for s in batch_x_enc] batch_y_lens = [len(s) for s in batch_y] max_enc_seq_len = max(batch_x_enc_lens) max_dec_seq_len = max(batch_y_lens) batch_enc_x_padded = [pad_seq(x, max_enc_seq_len, pad_id=self.vocab.PAD_ID) for x in batch_x_enc] batch_dec_y_padded = [pad_seq(y, max_dec_seq_len, pad_id=self.vocab.PAD_ID) for y in batch_y] batch_enc_x_var = cuda_if_gpu(Variable(torch.LongTensor(batch_enc_x_padded)).transpose(0, 1)) batch_dec_y_var = cuda_if_gpu(Variable(torch.LongTensor(batch_dec_y_padded)).transpose(0, 1)) batch_x_feat_var = cuda_if_gpu(Variable(torch.LongTensor(batch_x_feat))) return (batch_enc_x_var, batch_x_feat_var, batch_dec_y_var), batch_forms
def predict_one(self, model, datum, vocab): """ :param model: :param datum: a (lemma chars) :param vocab: :return: """ lemma, feats = datum x_ids = MorphMLPData.feat_d_to_ids(feats, vocab) x_ids[0] = vocab.src_vocab.lookup_tok(lemma) x_ids_v = cuda_if_gpu(Variable(torch.LongTensor(x_ids)).view(1, -1)) pred_form_id = model.predict(x_ids_v)[0] pred_form = vocab.tgt_vocab.lookup_id(pred_form_id) return pred_form
def predict(self, input_var): enc_inputs_v, feature_ids_v = input_var src_len = enc_inputs_v.size()[0] attn_clamp_value = src_len - 1 dec_len = self.max_tgt_len * 2 batch_size = enc_inputs_v.size()[1] batch_indices = cuda_if_gpu(torch.arange(0, batch_size).long()) # Embedding lookup - chars char_embeddings = self.embedding_lookup(enc_inputs_v) # SL x B x E # Encode # SL x B x 2*H, num_layers * num_directions x B x H enc_outputs, enc_hidden, enc_cell = self.encoder( rnn_input=char_embeddings, batch_size=batch_size) # Embedding lookup - features # B x self.num_embedding_feats * self.embedding_dim feat_embeddings = self.embedding_lookup(feature_ids_v).view( batch_size, -1) feat_embeddings = F.relu(self.feat_proj_layer(feat_embeddings)) # Initialize decoder # 1 x B x enc_dim dec_hidden = sum_bidirectional_state(enc_hidden) dec_cell = sum_bidirectional_state(enc_cell) # Decode prev_y_id = BOS_ID prev_y_var = cuda_if_gpu( Variable(torch.LongTensor([prev_y_id] * batch_size))) dec_ids = torch.zeros(dec_len, batch_size) attn_ids_v = cuda_if_gpu(torch.zeros(batch_size).long()) # run the decoder through the sequence and predict characters, # twice max prediction as step outputs are added for di in range(dec_len): prev_y_embeddings = self.embedding_mat(prev_y_var) # B x E # prev_y_embeddings_drop = self.dropout(prev_y) attended_encoder_output = enc_outputs[ attn_ids_v, batch_indices] # B x enc_dim * enc.num_directions # 1 x B x dec_input_dim dec_input = torch.cat( (prev_y_embeddings, feat_embeddings, attended_encoder_output), 1).unsqueeze(0) dec_output, dec_hidden, dec_cell = self.decoder( dec_input, dec_hidden, dec_cell) unnormalized_scores = self.output_layer(dec_output[0]) logits = self.softmax(unnormalized_scores) # B x TV topval, topids = logits.data.topk(1) prev_y_ids = topids.squeeze(1) # B # store result dec_ids[di] = prev_y_ids prev_y_var = cuda_if_gpu(Variable(prev_y_ids)) # check if step or char output to promote i. attn_mask = prev_y_ids.eq(STEP_ID).long() attn_ids_v += attn_mask attn_ids_v = torch.clamp(attn_ids_v, min=0, max=attn_clamp_value) return dec_ids