示例#1
0
    def __init__(self, data, opt):
        super(WordSequence, self).__init__()

        self.gpu = opt.gpu

        self.droplstm = nn.Dropout(opt.dropout)

        self.wordrep = WordRep(data, opt)
        self.input_size = data.word_emb_dim

        self.input_size += opt.char_hidden_dim

        if data.feat_config is not None:
            for idx in range(len(data.feature_emb_dims)):
                self.input_size += data.feature_emb_dims[idx]

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        lstm_hidden = opt.hidden_dim // 2

        self.lstm = nn.LSTM(self.input_size,
                            lstm_hidden,
                            num_layers=1,
                            batch_first=True,
                            bidirectional=True)

        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(opt.hidden_dim,
                                    data.label_alphabet.size() + 2)

        if opt.gpu >= 0 and torch.cuda.is_available():
            self.droplstm = self.droplstm.cuda(self.gpu)
            self.hidden2tag = self.hidden2tag.cuda(self.gpu)
            self.lstm = self.lstm.cuda(self.gpu)
示例#2
0
    def __init__(self, data):
        super(WordSequence, self).__init__()
        print("build word sequence feature extractor: %s..."%(data.word_feature_extractor))
        self.gpu = data.HP_gpu
        self.use_char = data.use_char
        # self.batch_size = data.HP_batch_size
        # self.hidden_dim = data.HP_hidden_dim
        self.droplstm = nn.Dropout(data.HP_dropout)
        self.bilstm_flag = data.HP_bilstm
        self.lstm_layer = data.HP_lstm_layer
        self.wordrep = WordRep(data)

        self.input_size = data.word_emb_dim
        if self.use_char:
            self.input_size += data.HP_char_hidden_dim
            if data.char_seq_feature == "ALL":
                self.input_size += data.HP_char_hidden_dim
        for idx in range(data.feature_num):
            self.input_size += data.feature_emb_dims[idx]
        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        if self.bilstm_flag:
            lstm_hidden = data.HP_hidden_dim // 2
        else:
            lstm_hidden = data.HP_hidden_dim

        self.word_feature_extractor = data.word_feature_extractor
        if self.word_feature_extractor == "GRU":
            self.lstm = nn.GRU(self.input_size, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag)
        elif self.word_feature_extractor == "LSTM":
            self.lstm = nn.LSTM(self.input_size, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag)
        elif self.word_feature_extractor == "CNN":
            # cnn_hidden = data.HP_hidden_dim
            self.word2cnn = nn.Linear(self.input_size, data.HP_hidden_dim)
            self.cnn_layer = data.HP_cnn_layer
            print "CNN layer: ", self.cnn_layer
            self.cnn_list = nn.ModuleList()
            self.cnn_drop_list = nn.ModuleList()
            self.cnn_batchnorm_list = nn.ModuleList()
            kernel = 3
            pad_size = (kernel-1)/2
            for idx in range(self.cnn_layer):
                self.cnn_list.append(nn.Conv1d(data.HP_hidden_dim, data.HP_hidden_dim, kernel_size=kernel, padding=pad_size))
                self.cnn_drop_list.append(nn.Dropout(data.HP_dropout))
                self.cnn_batchnorm_list.append(nn.BatchNorm1d(data.HP_hidden_dim))
        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(data.HP_hidden_dim, data.label_alphabet_size)

        if self.gpu:
            self.droplstm = self.droplstm.cuda()
            self.hidden2tag = self.hidden2tag.cuda()
            if self.word_feature_extractor == "CNN":
                self.word2cnn = self.word2cnn.cuda()
                for idx in range(self.cnn_layer):
                    self.cnn_list[idx] = self.cnn_list[idx].cuda()
                    self.cnn_drop_list[idx] = self.cnn_drop_list[idx].cuda()
                    self.cnn_batchnorm_list[idx] = self.cnn_batchnorm_list[idx].cuda()
            else:
                self.lstm = self.lstm.cuda()
示例#3
0
    def __init__(self, data, use_position, use_cap, use_postag, use_char):
        super(WordSequence, self).__init__()

        self.gpu = data.HP_gpu
        self.use_char = use_char
        # self.batch_size = data.HP_batch_size
        # self.hidden_dim = data.HP_hidden_dim
        self.droplstm = nn.Dropout(data.HP_dropout)
        self.bilstm_flag = True
        self.lstm_layer = 1
        self.wordrep = WordRep(data, use_position, use_cap, use_postag, use_char)
        self.tune_wordemb = data.tune_wordemb

        self.input_size = data.word_emb_dim
        if self.use_char:
            self.input_size += data.HP_char_hidden_dim


        if use_cap:
            self.input_size += data.feature_emb_dims[data.feature_name2id['[Cap]']]
        if use_postag:
            self.input_size += data.feature_emb_dims[data.feature_name2id['[POS]']]

        self.use_position = use_position
        if self.use_position:
            self.input_size += 2*data.re_feature_emb_dims[data.re_feature_name2id['[POSITION]']]

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        if self.bilstm_flag:
            lstm_hidden = data.HP_hidden_dim // 2
        else:
            lstm_hidden = data.HP_hidden_dim


        self.lstm = nn.LSTM(self.input_size, lstm_hidden, num_layers=self.lstm_layer, batch_first=True, bidirectional=self.bilstm_flag)


        if torch.cuda.is_available():
            self.droplstm = self.droplstm.cuda(self.gpu)

            self.lstm = self.lstm.cuda(self.gpu)
示例#4
0
    def __init__(self, data, use_position, use_cap, use_postag, use_char):
        super(WordSequence, self).__init__()
        print("build word sequence feature extractor: %s..." %
              (data.word_feature_extractor))
        self.gpu = data.HP_gpu
        self.use_char = use_char
        # self.batch_size = data.HP_batch_size
        # self.hidden_dim = data.HP_hidden_dim
        self.droplstm = nn.Dropout(data.HP_dropout)
        self.bilstm_flag = data.HP_bilstm
        self.lstm_layer = data.HP_lstm_layer
        self.wordrep = WordRep(data, use_position, use_cap, use_postag,
                               use_char)
        self.tune_wordemb = data.tune_wordemb

        self.input_size = data.word_emb_dim
        if self.use_char:
            self.input_size += data.HP_char_hidden_dim
            if data.char_feature_extractor == "ALL":
                self.input_size += data.HP_char_hidden_dim

        if use_cap:
            self.input_size += data.feature_emb_dims[
                data.feature_name2id['[Cap]']]
        if use_postag:
            self.input_size += data.feature_emb_dims[
                data.feature_name2id['[POS]']]

        self.use_position = use_position
        if self.use_position:
            self.input_size += 2 * data.re_feature_emb_dims[
                data.re_feature_name2id['[POSITION]']]

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        if self.bilstm_flag:
            lstm_hidden = data.HP_hidden_dim // 2
        else:
            lstm_hidden = data.HP_hidden_dim

        self.word_feature_extractor = data.word_feature_extractor
        if self.word_feature_extractor == "GRU":
            self.lstm = nn.GRU(self.input_size,
                               lstm_hidden,
                               num_layers=self.lstm_layer,
                               batch_first=True,
                               bidirectional=self.bilstm_flag)
        elif self.word_feature_extractor == "LSTM":
            self.lstm = nn.LSTM(self.input_size,
                                lstm_hidden,
                                num_layers=self.lstm_layer,
                                batch_first=True,
                                bidirectional=self.bilstm_flag)
        elif self.word_feature_extractor == "CNN":
            # cnn_hidden = data.HP_hidden_dim
            self.word2cnn = nn.Linear(self.input_size, data.HP_hidden_dim)
            self.cnn_layer = data.HP_cnn_layer
            print "CNN layer: ", self.cnn_layer
            self.cnn_list = nn.ModuleList()
            self.cnn_drop_list = nn.ModuleList()
            self.cnn_batchnorm_list = nn.ModuleList()
            kernel = 3
            pad_size = (kernel - 1) / 2
            for idx in range(self.cnn_layer):
                self.cnn_list.append(
                    nn.Conv1d(data.HP_hidden_dim,
                              data.HP_hidden_dim,
                              kernel_size=kernel,
                              padding=pad_size))
                self.cnn_drop_list.append(nn.Dropout(data.HP_dropout))
                self.cnn_batchnorm_list.append(
                    nn.BatchNorm1d(data.HP_hidden_dim))

        if torch.cuda.is_available():
            self.droplstm = self.droplstm.cuda(self.gpu)
            if self.word_feature_extractor == "CNN":
                self.word2cnn = self.word2cnn.cuda(self.gpu)
                for idx in range(self.cnn_layer):
                    self.cnn_list[idx] = self.cnn_list[idx].cuda(self.gpu)
                    self.cnn_drop_list[idx] = self.cnn_drop_list[idx].cuda(
                        self.gpu)
                    self.cnn_batchnorm_list[idx] = self.cnn_batchnorm_list[
                        idx].cuda(self.gpu)
            else:
                self.lstm = self.lstm.cuda(self.gpu)

        self.frozen = False
示例#5
0
    def __init__(self, data, circul_time, deepth):
        super(WordSequence_circulationBiLSTM, self).__init__()
        print("Build word sequence feature extractor: %s..." %
              data.word_feature_extractor)
        self.circul_time = circul_time
        self.deepth = deepth
        self.hidden_dim = data.hidden_dim
        self.gather_output_mode = data.circul_gather_output_mode
        # self.gather_output_mode = 'add'

        self.gpu = data.gpu
        self.use_char = data.use_char
        self.use_trans = data.use_trans
        # self.batch_size = data.batch_size
        self.droplstm = nn.Dropout(data.dropout)
        self.bilstm_flag = data.bilstm
        self.lstm_layer = data.lstm_layer
        self.wordrep = WordRep(data)

        self.input_size = data.word_emb_dim

        if self.use_char:
            self.input_size += data.char_hidden_dim
            if data.char_seq_feature == "ALL":
                self.input_size += data.char_hidden_dim
        for idx in range(data.feature_num):
            self.input_size += data.feature_emb_dims[idx]

        if self.use_trans:
            self.input_size += data.trans_hidden_dim

        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        if self.bilstm_flag:
            lstm_hidden = data.hidden_dim // 2
        else:
            lstm_hidden = data.hidden_dim

        self.word_feature_extractor = data.word_feature_extractor
        if self.word_feature_extractor == "GRU":
            self.lstm = nn.GRU(self.input_size,
                               lstm_hidden,
                               num_layers=self.lstm_layer,
                               batch_first=True,
                               bidirectional=self.bilstm_flag)
        elif self.word_feature_extractor == "LSTM":
            if self.gather_output_mode == 'concat':
                lstm_hidden /= circul_time
            elif self.gather_output_mode == 'add':
                pass
            else:
                print('no such mode for gather output: {}'.format(
                    self.gather_output_mode))

            self.lstm_stack = []
            for i in range(deepth):
                lstm_layer = []
                for j in range(circul_time):
                    if i == 0:
                        lstm_layer.append(
                            nn.LSTM(self.input_size,
                                    lstm_hidden,
                                    num_layers=self.lstm_layer,
                                    batch_first=True,
                                    bidirectional=self.bilstm_flag))
                    else:
                        lstm_layer.append(
                            nn.LSTM(self.hidden_dim,
                                    lstm_hidden,
                                    num_layers=self.lstm_layer,
                                    batch_first=True,
                                    bidirectional=self.bilstm_flag))
                self.lstm_stack.append(lstm_layer)

        elif self.word_feature_extractor == "CNN":
            # cnn_hidden = data.hidden_dim
            self.word2cnn = nn.Linear(self.input_size, data.hidden_dim)
            self.cnn_layer = data.cnn_layer
            print "CNN layer: ", self.cnn_layer
            self.cnn_list = nn.ModuleList()
            self.cnn_drop_list = nn.ModuleList()
            self.cnn_batchnorm_list = nn.ModuleList()
            kernel = 3
            pad_size = (kernel - 1) / 2
            for idx in range(self.cnn_layer):
                self.cnn_list.append(
                    nn.Conv1d(data.hidden_dim,
                              data.hidden_dim,
                              kernel_size=kernel,
                              padding=pad_size))
                self.cnn_drop_list.append(nn.Dropout(data.dropout))
                self.cnn_batchnorm_list.append(nn.BatchNorm1d(data.hidden_dim))
        # The linear layer that maps from hidden state space to tag space
        self.hidden2tag = nn.Linear(data.hidden_dim, data.label_alphabet_size)

        if self.gpu:
            self.droplstm = self.droplstm.cuda()
            self.hidden2tag = self.hidden2tag.cuda()
            if self.word_feature_extractor == "CNN":
                self.word2cnn = self.word2cnn.cuda()
                for idx in range(self.cnn_layer):
                    self.cnn_list[idx] = self.cnn_list[idx].cuda()
                    self.cnn_drop_list[idx] = self.cnn_drop_list[idx].cuda()
                    self.cnn_batchnorm_list[idx] = self.cnn_batchnorm_list[
                        idx].cuda()
            else:
                for i in range(deepth):
                    for j in range(circul_time):
                        self.lstm_stack[i][j] = self.lstm_stack[i][j].cuda()