Пример #1
0
    def predict_one(self, model, datum, vocab):
        """

        :param model:
        :param datum: a (lemma chars)
        :param vocab:
        :return:
        """

        lemma, feats = datum

        x_enc_inputs = BaseMorphData.lemma_chars_to_ids(lemma, vocab)
        x_feature_ids = BaseMorphData.feat_d_to_ids(feats, vocab)

        # TODO: this works only with batch=1; consider batching and transposition instead of viewing
        x_enc_inputs_v = cuda_if_gpu(
            Variable(torch.LongTensor(x_enc_inputs)).view(-1, 1))
        x_feature_ids_v = cuda_if_gpu(
            Variable(torch.LongTensor(x_feature_ids)).view(-1, 1))

        pred_form_char_ids = model.predict_one_instance(
            (x_enc_inputs_v, x_feature_ids_v))
        pred_form_chars = BaseMorphData.unvectorize_seq(
            pred_form_char_ids, vocab)
        pred_form = ''.join([
            ch for ch in pred_form_chars
            if not ch in MORPH_OUTPUT_CLASSES_START
        ])

        return pred_form
Пример #2
0
    def predict(self, input_var):

        enc_inputs_v, feature_ids_v = input_var
        src_len = enc_inputs_v.size()[0]
        dec_len = self.max_tgt_len
        batch_size = enc_inputs_v.size()[1]
        batch_indices = cuda_if_gpu(torch.arange(0, batch_size).long())

        # Embedding lookup - chars
        char_embeddings = self.embedding_lookup(enc_inputs_v)  # SL x B x E

        # Encode
        encoder_outputs, encoder_hidden = self.encoder(char_embeddings,
                                                       batch_size=batch_size)
        if self.encoder.rnn.bidirectional:
            # squeeze the outputs of a BiRNN encoder
            # outputs:  # SL x B x enc_dim
            # hidden: # 1 x B x enc_dim
            encoder_outputs = sum_bidirectional_outputs(
                encoder_outputs, self.encoder.rnn)
            encoder_hidden = sum_bidirectional_state(encoder_hidden)

        # Embedding lookup - features
        # 1 x self.num_embedding_feats * self.embedding_dim
        feat_embeddings = self.embedding_lookup(feature_ids_v).view(
            batch_size, -1)
        feat_embeddings = F.relu(self.feat_proj_layer(feat_embeddings))

        # Initialize decoder
        # 1 x B x enc_dim
        dec_hidden = encoder_hidden[:1]  # 1 x B x enc_dim

        # Decode
        prev_y_id = self.BOS_ID
        prev_y_var = cuda_if_gpu(
            Variable(torch.LongTensor([prev_y_id] * batch_size)))
        dec_ids = torch.zeros(dec_len, batch_size)
        # attn_ids_v = cuda_if_gpu(torch.zeros(batch_size).long())

        for di in range(dec_len):
            prev_y_embeddings = self.embedding_lookup(prev_y_var)
            # prev_y_embeddings_drop = self.dropout(prev_y)

            # 1 x B x dec_input_dim
            dec_input = torch.cat((prev_y_embeddings, feat_embeddings), 1)
            dec_output, dec_hidden, attn_weights = self.decoder(
                dec_input, dec_hidden, encoder_outputs)
            unnormalized_scores = self.output_layer(dec_output[0])
            logits = self.softmax(unnormalized_scores)

            topval, topids = logits.data.topk(1)
            prev_y_ids = topids.squeeze(1)  # B

            # bookeeping
            dec_ids[di] = prev_y_ids
            # attn_w.append(attn_weights.data)
            prev_y_var = cuda_if_gpu(Variable(prev_y_ids))

        return dec_ids
    def predict_one_instance(self, input_var):

        enc_inputs_v, feature_ids_v = input_var
        src_len = enc_inputs_v.size()[0]

        # Embedding lookup - chars
        char_embeddings = self.embedding_lookup(enc_inputs_v)  # SL x B x E

        # Encode
        # SL x B x 2*H, num_layers * num_directions x B x H
        enc_outputs, enc_hidden, enc_cell = self.encoder(
            rnn_input=char_embeddings, batch_size=1)

        # Embedding lookup - features
        # 1 x self.num_embedding_feats * self.embedding_dim
        feat_embeddings = self.embedding_lookup(feature_ids_v).view(1, -1)
        feat_embeddings = F.relu(self.feat_proj_layer(feat_embeddings))

        # Initialize decoder
        # 1 x B x enc_dim
        dec_hidden = sum_bidirectional_state(enc_hidden)
        dec_cell = sum_bidirectional_state(enc_cell)

        # Decode
        dec_ids = []
        prev_y_id = BOS_ID
        prev_y_var = cuda_if_gpu(Variable(torch.LongTensor([prev_y_id])))
        total_decoded_len = 0
        attn_idx = 0

        while (prev_y_id != EOS_ID
               and total_decoded_len < self.max_tgt_len * 2):

            prev_y_embeddings = self.embedding_mat(prev_y_var)
            # prev_y_embeddings_drop = self.dropout(prev_y)

            attended_encoder_output = enc_outputs[attn_idx]

            # 1 x B x dec_input_dim
            dec_input = torch.cat(
                (prev_y_embeddings, feat_embeddings, attended_encoder_output),
                1).unsqueeze(0)
            dec_output, dec_hidden, dec_cell = self.decoder(
                dec_input, dec_hidden, dec_cell)
            unnormalized_scores = self.output_layer(dec_output[0])
            logits = self.softmax(unnormalized_scores)
            topval, topidx = logits.data.topk(1)
            prev_y_id = topidx.data.cpu().numpy()[0][0]
            total_decoded_len += 1

            # store result
            dec_ids.append(prev_y_id)
            prev_y_var = cuda_if_gpu(Variable(torch.LongTensor([prev_y_id])))

            # check if step or char output to promote i.
            if prev_y_id == STEP_ID and attn_idx < src_len - 1:
                attn_idx += 1

        return dec_ids
Пример #4
0
    def predict_one_instance(self, input_var):

        enc_inputs_v, feature_ids_v = input_var
        src_len = enc_inputs_v.size()[0]

        # Embedding lookup - chars
        char_embeddings = self.embedding_lookup(enc_inputs_v)  # SL x B x E

        # Encode
        # SL x B x 2*H, num_layers * num_directions x B x H
        encoder_outputs, encoder_hidden = self.encoder(char_embeddings,
                                                       batch_size=1)
        if self.encoder.rnn.bidirectional:
            # squeeze the outputs of a BiRNN encoder
            # outputs:  # SL x B x enc_dim
            # hidden: # 1 x B x enc_dim
            encoder_outputs = sum_bidirectional_outputs(
                encoder_outputs, self.encoder.rnn)
            encoder_hidden = sum_bidirectional_state(encoder_hidden)

        # Embedding lookup - features
        # 1 x self.num_embedding_feats * self.embedding_dim
        feat_embeddings = self.embedding_lookup(feature_ids_v).view(1, -1)
        feat_embeddings = F.relu(self.feat_proj_layer(feat_embeddings))

        # Initialize decoder
        # 1 x B x enc_dim
        dec_hidden = encoder_hidden[:1]  # 1 x B x enc_dim

        # Decode
        dec_ids = []
        prev_y_id = self.BOS_ID
        prev_y_var = cuda_if_gpu(Variable(torch.LongTensor([prev_y_id])))
        total_decoded_len = 0

        while (prev_y_id != self.EOS_ID
               and total_decoded_len < self.max_tgt_len):

            prev_y_embeddings = self.embedding_mat(prev_y_var)
            # prev_y_embeddings_drop = self.dropout(prev_y)

            # 1 x B x dec_input_dim
            dec_input = torch.cat((prev_y_embeddings, feat_embeddings), 1)
            dec_output, dec_hidden, attn_weights = self.decoder(
                dec_input, dec_hidden, encoder_outputs)
            unnormalized_scores = self.output_layer(dec_output[0])
            logits = self.softmax(unnormalized_scores)
            topval, topidx = logits.data.topk(1)
            prev_y_id = topidx[0][0]
            total_decoded_len += 1

            # store result
            dec_ids.append(prev_y_id)
            prev_y_var = cuda_if_gpu(Variable(torch.LongTensor([prev_y_id])))

        return dec_ids
    def train_step(self, model, np_xy_mat_pair):
        x_var = cuda_if_gpu(Variable(torch.from_numpy(np_xy_mat_pair[0])))

        y_var = cuda_if_gpu(
            Variable(
                torch.from_numpy(np_xy_mat_pair[1]).type(
                    torch.FloatTensor)))  # have to cast to float

        logits = model.forward(x_var)  #
        loss_var = self.calc_loss(logits, y_var)
        return loss_var
Пример #6
0
    def decode_teacher(self, encoder_hidden, encoder_outputs, batch_x_feat_var,
                       batch_y_var):
        """
        Decoding policy 1: feeding the ground truth label as a target
        :param dec_input_var: ground truth labels
        :param encoder_hidden: the last hidden state of the Encoder RNN; (num_layers * num_directions) x B x enc_dim
        :param encoder_outputs: SL x B x enc_dim
        :return:
        """

        dec_len = batch_y_var.size()[0]
        batch_size = batch_y_var.size()[1]

        # 1 x self.num_embedding_feats * self.embedding_dim
        feat_embeddings = self.embedding_lookup(batch_x_feat_var).view(
            batch_size, -1)
        feat_embeddings = F.relu(self.feat_proj_layer(feat_embeddings))

        # 1 x B x enc_dim
        dec_hidden = encoder_hidden[:
                                    1]  # (num_layers * num_directions) x B x enc_dim

        # 1 x B
        prev_y = cuda_if_gpu(
            Variable(torch.LongTensor([self.BOS_ID] * batch_size)))

        # TL x B x Output_size
        predicted_logits = cuda_if_gpu(
            Variable(torch.zeros(dec_len, batch_size, self.output_size)))

        # Teacher forcing: feed the target as the next input
        for di in range(dec_len):
            # embedding lookup of a vector of length = B; result: B x E
            prev_y_embeddings = self.embedding_mat(prev_y)
            # prev_y = self.dropout(prev_y) # apply dropout

            # B x dec_input_dim + num_feats * embed_dim
            dec_input = torch.cat((prev_y_embeddings, feat_embeddings), 1)

            # 1 x B x dec_output_dim, # 1 x B x dec_output_dim, B x SL
            dec_output, dec_hidden, attn_weights = self.decoder(
                dec_input,
                dec_hidden,
                encoder_outputs,
            )

            unnormalized_scores = self.output_layer(dec_output[0])
            logits = self.softmax(unnormalized_scores)
            predicted_logits[di] = logits  # store this step's output logit
            prev_y = batch_y_var[di]  # next input

        return predicted_logits
Пример #7
0
def init_rnn_hidden(rnn, num_directions, batch_size):

    hidden_state = Variable(
        torch.zeros(rnn.num_layers * num_directions, batch_size,
                    rnn.hidden_size))

    return cuda_if_gpu(hidden_state)
Пример #8
0
def init_gru_state(num_directions, enc_or_dec, batch_size):

    hidden = Variable(
        torch.zeros(enc_or_dec.num_layers * num_directions, batch_size,
                    enc_or_dec.hidden_size))

    return cuda_if_gpu(hidden)
    def decode_teacher(self, encoder_hidden, encoder_cell, encoder_outputs,
                       attn_ids_v, feat_ids, dec_targets):
        """
        Decoding policy 1: feeding the ground truth label as a target
        """

        dec_len = dec_targets.size()[0]
        batch_size = dec_targets.size()[1]
        batch_indices = cuda_if_gpu(torch.arange(0, batch_size).long())

        # 1 x self.num_embedding_feats * self.embedding_dim
        feat_embeddings = self.embedding_lookup(feat_ids).view(batch_size, -1)
        feat_embeddings = F.relu(self.feat_proj_layer(feat_embeddings))

        # 1 x B x enc_dim
        dec_hidden = sum_bidirectional_state(encoder_hidden)
        dec_cell = sum_bidirectional_state(encoder_cell)

        # 1 x B
        prev_y = cuda_if_gpu(Variable(torch.LongTensor([BOS_ID] * batch_size)))

        # TL x B x Output_size
        predicted_logits = cuda_if_gpu(
            Variable(torch.zeros(dec_len, batch_size, self.output_size)))

        # Teacher forcing: feed the target as the next input
        for di in range(dec_len):

            # embedding lookup of a vector of length = B; result: B x E
            prev_y_embeddings = self.embedding_mat(prev_y)
            # prev_y = self.dropout(prev_y) # apply dropout
            attended_encoder_output = encoder_outputs[attn_ids_v[di],
                                                      batch_indices]

            # 1 x B x dec_input_dim
            dec_input = torch.cat(
                (prev_y_embeddings, feat_embeddings, attended_encoder_output),
                1).unsqueeze(0)
            dec_output, dec_hidden, dec_cell = self.decoder(
                dec_input, dec_hidden, dec_cell)
            unnormalized_scores = self.output_layer(dec_output[0])
            logits = self.softmax(unnormalized_scores)
            predicted_logits[di] = logits  # store this step's output logit
            prev_y = dec_targets[di]  # next input

        return predicted_logits
Пример #10
0
    def make_binary_prediction(self, x, model):
        logit = model(cuda_if_gpu(Variable(torch.from_numpy(x))))
        logit_val = logit.cpu().data[0].numpy()

        if logit_val >= 0.5:
            decision = RIGHT
        else:
            decision = LEFT

        return decision
    def batchify_training_data(self, xy_ids, batch_size, is_dev_data):

        logger.debug('Batchifying data')

        x_data_ids, y_data_ids, forms = xy_ids
        data_size = len(x_data_ids)
        num_batches = data_size // batch_size
        data_indices = self.index_data(data_size, mode='no_shuffling')

        batch_pairs = []

        for bi in range(num_batches + 1):
            batch_x = []
            batch_y = []

            curr_batch_indices = data_indices[bi * batch_size:(bi + 1) *
                                              batch_size]

            for idx in curr_batch_indices:
                x_ids = x_data_ids[idx]
                y_ids = y_data_ids[idx]

                x_enc_ids_copy = copy.deepcopy(x_ids)
                batch_x.append(x_enc_ids_copy)

                y_ids_copy = copy.deepcopy(y_ids)
                batch_y.append(y_ids_copy)

            batch_enc_x_var = cuda_if_gpu(Variable(torch.LongTensor(batch_x)))
            batch_dec_y_var = cuda_if_gpu(Variable(torch.LongTensor(batch_y)))

            batch_pairs.append((batch_enc_x_var, batch_dec_y_var))

        if is_dev_data:
            self.dev_references = forms
            logger.info('Saving dev (training) references to --> %s',
                        self.fnames.dev_ref_fn)
            save_txt(self.dev_references, self.fnames.dev_ref_fn)

        assert data_size == len(forms)

        return batch_pairs
    def make_one_batch(self, sorted_data, data_indices, bi, batch_size):

        batch_x_enc = []
        batch_x_feat = []
        batch_y = []
        batch_forms = []

        curr_batch_indices = data_indices[bi * batch_size: (bi + 1) * batch_size]

        for idx in curr_batch_indices:
            x_enc_ids, x_feature_ids, y_ids, forms = sorted_data[idx]

            x_enc_ids_copy = copy.deepcopy(x_enc_ids)
            batch_x_enc.append(x_enc_ids_copy)

            x_feature_ids_copy = copy.deepcopy(x_feature_ids)
            batch_x_feat.append(x_feature_ids_copy)

            y_ids_copy = copy.deepcopy(y_ids)
            batch_y.append(y_ids_copy)

            batch_forms.append(forms)

        # skipping features, since the num does not change
        batch_x_enc_lens = [len(s) for s in batch_x_enc]
        batch_y_lens = [len(s) for s in batch_y]

        max_enc_seq_len = max(batch_x_enc_lens)
        max_dec_seq_len = max(batch_y_lens)

        batch_enc_x_padded = [pad_seq(x, max_enc_seq_len, pad_id=self.vocab.PAD_ID) for x in batch_x_enc]
        batch_dec_y_padded = [pad_seq(y, max_dec_seq_len, pad_id=self.vocab.PAD_ID) for y in batch_y]

        batch_enc_x_var = cuda_if_gpu(Variable(torch.LongTensor(batch_enc_x_padded)).transpose(0, 1))
        batch_dec_y_var = cuda_if_gpu(Variable(torch.LongTensor(batch_dec_y_padded)).transpose(0, 1))
        batch_x_feat_var = cuda_if_gpu(Variable(torch.LongTensor(batch_x_feat)))

        return (batch_enc_x_var, batch_x_feat_var, batch_dec_y_var), batch_forms
Пример #13
0
    def predict_one(self, model, datum, vocab):
        """

        :param model:
        :param datum: a (lemma chars)
        :param vocab:
        :return:
        """

        lemma, feats = datum

        x_ids = MorphMLPData.feat_d_to_ids(feats, vocab)
        x_ids[0] = vocab.src_vocab.lookup_tok(lemma)
        x_ids_v = cuda_if_gpu(Variable(torch.LongTensor(x_ids)).view(1, -1))

        pred_form_id = model.predict(x_ids_v)[0]
        pred_form = vocab.tgt_vocab.lookup_id(pred_form_id)

        return pred_form
    def predict(self, input_var):

        enc_inputs_v, feature_ids_v = input_var
        src_len = enc_inputs_v.size()[0]
        attn_clamp_value = src_len - 1
        dec_len = self.max_tgt_len * 2
        batch_size = enc_inputs_v.size()[1]
        batch_indices = cuda_if_gpu(torch.arange(0, batch_size).long())

        # Embedding lookup - chars
        char_embeddings = self.embedding_lookup(enc_inputs_v)  # SL x B x E

        # Encode
        # SL x B x 2*H, num_layers * num_directions x B x H
        enc_outputs, enc_hidden, enc_cell = self.encoder(
            rnn_input=char_embeddings, batch_size=batch_size)

        # Embedding lookup - features
        # B x self.num_embedding_feats * self.embedding_dim
        feat_embeddings = self.embedding_lookup(feature_ids_v).view(
            batch_size, -1)
        feat_embeddings = F.relu(self.feat_proj_layer(feat_embeddings))

        # Initialize decoder
        # 1 x B x enc_dim
        dec_hidden = sum_bidirectional_state(enc_hidden)
        dec_cell = sum_bidirectional_state(enc_cell)

        # Decode
        prev_y_id = BOS_ID
        prev_y_var = cuda_if_gpu(
            Variable(torch.LongTensor([prev_y_id] * batch_size)))
        dec_ids = torch.zeros(dec_len, batch_size)
        attn_ids_v = cuda_if_gpu(torch.zeros(batch_size).long())

        # run the decoder through the sequence and predict characters,
        # twice max prediction as step outputs are added
        for di in range(dec_len):
            prev_y_embeddings = self.embedding_mat(prev_y_var)  # B x E
            # prev_y_embeddings_drop = self.dropout(prev_y)

            attended_encoder_output = enc_outputs[
                attn_ids_v, batch_indices]  # B x enc_dim * enc.num_directions

            # 1 x B x dec_input_dim
            dec_input = torch.cat(
                (prev_y_embeddings, feat_embeddings, attended_encoder_output),
                1).unsqueeze(0)
            dec_output, dec_hidden, dec_cell = self.decoder(
                dec_input, dec_hidden, dec_cell)
            unnormalized_scores = self.output_layer(dec_output[0])
            logits = self.softmax(unnormalized_scores)  # B x TV

            topval, topids = logits.data.topk(1)
            prev_y_ids = topids.squeeze(1)  # B

            # store result
            dec_ids[di] = prev_y_ids
            prev_y_var = cuda_if_gpu(Variable(prev_y_ids))

            # check if step or char output to promote i.
            attn_mask = prev_y_ids.eq(STEP_ID).long()
            attn_ids_v += attn_mask
            attn_ids_v = torch.clamp(attn_ids_v, min=0, max=attn_clamp_value)

        return dec_ids