def forward(self, input_x, input_char, input_y):

        word_seq_tensor, word_seq_lengths, word_seq_recover, char_seq_tensor, char_seq_lengths, char_seq_recover, label_seq_tensor, mask = padding_word_char(
            self.use_cuda, input_x, input_char, input_y)

        input_x = word_seq_tensor
        input_y = label_seq_tensor

        embed_input_x = self.embedding(
            input_x)  # embed_intput_x: (b_s, m_l, em_s)

        batch_size = word_seq_tensor.size(0)
        sent_len = word_seq_tensor.size(1)

        if self.use_char:
            if self.use_cuda:
                char_features = self.char_feature.get_last_hiddens(
                    char_seq_tensor, char_seq_lengths.numpy())
            else:
                char_features = self.char_feature.get_last_hiddens(
                    char_seq_tensor,
                    char_seq_lengths.cpu().numpy())
            char_features = char_features[char_seq_recover]
            char_features = char_features.view(batch_size, sent_len, -1)
            embed_input_x = torch.cat([embed_input_x, char_features], 2)

        embed_input_x = self.dropout(embed_input_x)

        encoder_outputs = torch.zeros(len(input_y),
                                      self.input_size)  # 存放加和平均的句子表示

        if self.use_cuda:
            encoder_outputs = Variable(encoder_outputs).cuda()
        else:
            encoder_outputs = Variable(encoder_outputs)

        for index, batch in enumerate(embed_input_x):
            true_batch = batch[
                0:word_seq_lengths[index]]  # 根据每一个句子的实际长度取出实际batch
            encoder_outputs[index] = torch.mean(true_batch, 0)  # 平均

        predict = self.linear(encoder_outputs)
        predict = self.softmax(predict)
        loss = self.NLLoss(predict, input_y)

        if self.training:  # if it is in training module
            return loss
        else:
            value, index = torch.max(predict, 1)
            return index  # outsize, cal the acc
    def forward(self, input_x, input_char, input_y):
        """
        intput_x: b_s instances, 没有进行padding和Variable
        :param input:
        :return:
        """

        word_seq_tensor, word_seq_lengths, word_seq_recover, char_seq_tensor, char_seq_lengths, char_seq_recover, label_seq_tensor, mask = padding_word_char(
            self.use_cuda, input_x, input_char, input_y)

        input_x = word_seq_tensor
        input_y = label_seq_tensor

        embed_input_x = self.embedding(
            input_x)  # embed_intput_x: (b_s, m_l, em_s)

        batch_size = word_seq_tensor.size(0)
        sent_len = word_seq_tensor.size(1)

        if self.use_char:
            if self.use_cuda:
                char_features = self.char_feature.get_last_hiddens(
                    char_seq_tensor, char_seq_lengths.numpy())
            else:
                char_features = self.char_feature.get_last_hiddens(
                    char_seq_tensor,
                    char_seq_lengths.cpu().numpy())
            char_features = char_features[char_seq_recover]
            char_features = char_features.view(batch_size, sent_len, -1)
            embed_input_x = torch.cat([embed_input_x, char_features], 2)

        embed_input_x = self.dropout(embed_input_x)

        if self.use_cuda:
            embed_input_x_packed = pack_padded_sequence(
                embed_input_x,
                word_seq_lengths.cpu().numpy(),
                batch_first=True)
        else:
            embed_input_x_packed = pack_padded_sequence(
                embed_input_x, word_seq_lengths.numpy(), batch_first=True)

        encoder_outputs_packed, (h_last,
                                 c_last) = self.lstm(embed_input_x_packed)
        encoder_outputs, _ = pad_packed_sequence(encoder_outputs_packed,
                                                 batch_first=True)

        predict = self.linear(h_last)  # predict: [test.txt, b_s, o_s]
        predict = self.softmax(
            predict.squeeze(0))  # predict.squeeze(0) [b_s, o_s]

        loss = self.NLLoss(predict, input_y)

        if self.training:  # if it is in training module
            return loss
        else:
            value, index = torch.max(predict, 1)
            return index  # outsize, cal the acc
    def forward(self, input_x, input_char, input_y):
        word_seq_tensor, word_seq_lengths, word_seq_recover, char_seq_tensor, char_seq_lengths, char_seq_recover, label_seq_tensor, mask = padding_word_char(
            self.use_cuda, input_x, input_char, input_y)

        input_x = word_seq_tensor
        input_y = label_seq_tensor
        batch_size = word_seq_tensor.size(0)
        sent_len = word_seq_tensor.size(1)

        self.poolings = nn.ModuleList([
            nn.MaxPool1d(sent_len - size + 1, 1) for size in self.kernel_size
        ])  # the output of each pooling layer is a number

        input = input_x.squeeze(1)
        embed_input_x = self.embedding(
            input)  # embed_intput_x: (b_s, m_l, em_s)

        if self.use_char:
            if self.use_cuda:
                char_features = self.char_feature.get_last_hiddens(
                    char_seq_tensor, char_seq_lengths.numpy())
            else:
                char_features = self.char_feature.get_last_hiddens(
                    char_seq_tensor,
                    char_seq_lengths.cpu().numpy())
            char_features = char_features[char_seq_recover]
            char_features = char_features.view(batch_size, sent_len, -1)
            embed_input_x = torch.cat([embed_input_x, char_features], 2)

        embed_input_x = self.dropout(embed_input_x)
        embed_input_x = embed_input_x.view(embed_input_x.size(0), 1, -1,
                                           embed_input_x.size(2))

        parts = [
        ]  # example:[3,4,5] [100,100,100] the dims of data though pooling layer is 100 + 100 + 100 = 300
        for (conv, pooling) in zip(self.convs, self.poolings):
            conved_data = conv(embed_input_x).squeeze()
            if len(conved_data.size()) == 2:
                conved_data = conved_data.view(1, conved_data.size(0),
                                               conved_data.size(1))
            if len(conved_data.size()) == 1:
                conved_data = conved_data.view(1, conved_data.size(0), 1)
            pooled_data = pooling(conved_data).view(input_x.size(0), -1)
            parts.append(pooled_data)
        x = F.relu(torch.cat(parts, 1))

        # make sure the l2 norm of w less than l2
        w = torch.mul(self.linear.weight, self.linear.weight).sum().data[0]
        if w > self.l2 * self.l2:
            x = torch.mul(x.weight, np.math.sqrt(self.l2 * self.l2 * 1.0 / w))

        predict = self.linear(x)  # predict: [1, b_s, o_s]
        predict = self.softmax(
            predict.squeeze(0))  # predict.squeeze(0) [b_s, o_s]

        loss = self.NLLoss(predict, input_y)

        if self.training:  # if it is in training module
            return loss
        else:
            value, index = torch.max(predict, 1)
            return index  # outsize, cal the acc