def __init__(self, word_seq_indexer, tag_seq_indexer, class_num, batch_size=1, rnn_hidden_dim=100,
              freeze_word_embeddings=False, dropout_ratio=0.5, rnn_type='GRU', gpu=-1, useElmo = False, weight_file = '', options_file = ''):
     super(TaggerBiRNNCRF, self).__init__(word_seq_indexer, tag_seq_indexer, gpu, batch_size, useElmo, weight_file, options_file)
     self.tag_seq_indexer = tag_seq_indexer
     self.class_num = class_num
     self.rnn_hidden_dim = rnn_hidden_dim
     self.freeze_embeddings = freeze_word_embeddings
     self.dropout_ratio = dropout_ratio
     self.rnn_type = rnn_type
     self.gpu = gpu
     self.word_embeddings_layer = LayerWordEmbeddings(word_seq_indexer, gpu, freeze_word_embeddings)
     self.dropout = torch.nn.Dropout(p=dropout_ratio)
     
     print ("init targer BiRNNcfr")
         
     if rnn_type == 'GRU':
         self.birnn_layer = LayerBiGRU(input_dim=self.word_embeddings_layer.output_dim,
                                       hidden_dim=rnn_hidden_dim,
                                       gpu=gpu)
     elif rnn_type == 'LSTM':
         self.birnn_layer = LayerBiLSTM(input_dim=self.word_embeddings_layer.output_dim,
                                        hidden_dim=rnn_hidden_dim,
                                        gpu=gpu)
     elif rnn_type == 'Vanilla':
         self.birnn_layer = LayerBiVanilla(input_dim=self.word_embeddings_layer.output_dim+self.char_cnn_layer.output_dim,
                                        hidden_dim=rnn_hidden_dim,
                                        gpu=gpu)
     else:
         raise ValueError('Unknown rnn_type = %s, must be either "LSTM" or "GRU"')
     self.lin_layer = nn.Linear(in_features=self.birnn_layer.output_dim, out_features=class_num + 2)
     self.crf_layer = LayerCRF(gpu, states_num=class_num + 2, pad_idx=tag_seq_indexer.pad_idx, sos_idx=class_num + 1,
                               tag_seq_indexer=tag_seq_indexer)
     if gpu >= 0:
         self.cuda(device=self.gpu)
示例#2
0
 def __init__(self,
              word_seq_indexer,
              tag_seq_indexer,
              class_num,
              batch_size=1,
              rnn_hidden_dim=100,
              freeze_word_embeddings=False,
              dropout_ratio=0.5,
              rnn_type='GRU',
              gpu=-1):
     super(TaggerBiRNN, self).__init__(word_seq_indexer, tag_seq_indexer,
                                       gpu, batch_size)
     self.tag_seq_indexer = tag_seq_indexer
     self.class_num = class_num
     self.rnn_hidden_dim = rnn_hidden_dim
     self.freeze_embeddings = freeze_word_embeddings
     self.dropout_ratio = dropout_ratio
     self.rnn_type = rnn_type
     self.gpu = gpu
     if ((not word_seq_indexer.bert) and (not word_seq_indexer.elmo)):
         self.word_embeddings_layer = LayerWordEmbeddings(
             word_seq_indexer, gpu, freeze_word_embeddings)
     elif (word_seq_indexer.bert):
         print("word_seq_indexer.bert gpu", gpu)
         self.word_embeddings_layer = LayerContextWordEmbeddingsBert(
             word_seq_indexer, gpu, freeze_word_embeddings)
     else:
         self.word_embeddings_layer = LayerContextWordEmbeddings(
             word_seq_indexer, gpu, freeze_word_embeddings)
     self.dropout = torch.nn.Dropout(p=dropout_ratio)
     if rnn_type == 'GRU':
         self.birnn_layer = LayerBiGRU(
             input_dim=self.word_embeddings_layer.output_dim,
             hidden_dim=rnn_hidden_dim,
             gpu=gpu)
     elif rnn_type == 'LSTM':
         self.birnn_layer = LayerBiLSTM(
             input_dim=self.word_embeddings_layer.output_dim,
             hidden_dim=rnn_hidden_dim,
             gpu=gpu)
     elif rnn_type == 'Vanilla':
         self.birnn_layer = LayerBiVanilla(
             input_dim=self.word_embeddings_layer.output_dim +
             self.char_cnn_layer.output_dim,
             hidden_dim=rnn_hidden_dim,
             gpu=gpu)
     else:
         raise ValueError(
             'Unknown rnn_type = %s, must be either "LSTM" or "GRU"')
     # We add an additional class that corresponds to the zero-padded values not to be included to the loss function
     self.lin_layer = nn.Linear(in_features=self.birnn_layer.output_dim,
                                out_features=class_num + 1)
     self.log_softmax_layer = nn.LogSoftmax(dim=1)
     if gpu >= 0:
         self.cuda(device=self.gpu)
     self.nll_loss = nn.NLLLoss(
         ignore_index=0
     )  # "0" target values actually are zero-padded parts of sequences
示例#3
0
    def __init__(self, word_seq_indexer, tag_seq_indexer, class_num, batch_size=1, rnn_hidden_dim=100,
                 freeze_word_embeddings=False, dropout_ratio=0.5, rnn_type='GRU', gpu=-1,
                 freeze_char_embeddings = False, char_embeddings_dim=25, word_len=20, char_cnn_filter_num=30,
                 char_window_size=3, emb_type='word'):
        super(TaggerBiRNNCNNCRF, self).__init__(word_seq_indexer, tag_seq_indexer, gpu, batch_size)
        self.tag_seq_indexer = tag_seq_indexer
        self.class_num = class_num
        self.rnn_hidden_dim = rnn_hidden_dim
        self.freeze_embeddings = freeze_word_embeddings
        self.dropout_ratio = dropout_ratio
        self.rnn_type = rnn_type
        self.gpu = gpu
        #self.word_embeddings_layer = LayerWordEmbeddings(word_seq_indexer, gpu, freeze_word_embeddings)
        self.freeze_char_embeddings = freeze_char_embeddings
        self.char_embeddings_dim = char_embeddings_dim
        self.word_len = word_len
        self.char_cnn_filter_num = char_cnn_filter_num
        self.char_window_size = char_window_size
        self.word_embeddings_layer = EmbeddingsFactory.create(emb_type, word_seq_indexer, gpu, freeze_word_embeddings)
        self.char_embeddings_layer = LayerCharEmbeddings(gpu, char_embeddings_dim, freeze_char_embeddings,
                                                         word_len, word_seq_indexer.get_unique_characters_list())
        self.char_cnn_layer = LayerCharCNN(gpu, char_embeddings_dim, char_cnn_filter_num, char_window_size,
                                           word_len)
        self.dropout = torch.nn.Dropout(p=dropout_ratio)

        if rnn_type == 'GRU':
            self.birnn_layer = LayerBiGRU(input_dim=self.word_embeddings_layer.output_dim+self.char_cnn_layer.output_dim,
                                          hidden_dim=rnn_hidden_dim,
                                          gpu=gpu)
        elif rnn_type == 'LSTM':
            self.birnn_layer = LayerBiLSTM(input_dim=self.word_embeddings_layer.output_dim+self.char_cnn_layer.output_dim,
                                           hidden_dim=rnn_hidden_dim,
                                           gpu=gpu)
        elif rnn_type == 'Vanilla':
            self.birnn_layer = LayerBiVanilla(input_dim=self.word_embeddings_layer.output_dim+self.char_cnn_layer.output_dim,
                                           hidden_dim=rnn_hidden_dim,
                                           gpu=gpu)
        else:
            raise ValueError('Unknown rnn_type = %s, must be either "LSTM" or "GRU"')
        self.lin_layer = nn.Linear(in_features=self.birnn_layer.output_dim, out_features=class_num + 2)
        self.crf_layer = LayerCRF(gpu, states_num=class_num + 2, pad_idx=tag_seq_indexer.pad_idx, sos_idx=class_num + 1,
                                  tag_seq_indexer=tag_seq_indexer)
        self.softmax = nn.Softmax(dim=2)
        if gpu >= 0:
            self.cuda(device=self.gpu)
示例#4
0
 def __init__(self,
              word_seq_indexer,
              tag_seq_indexer,
              class_num,
              batch_size=1,
              rnn_hidden_dim=100,
              freeze_word_embeddings=False,
              dropout_ratio=0.5,
              rnn_type='GRU',
              gpu=-1,
              freeze_char_embeddings=False,
              char_embeddings_dim=25,
              word_len=20,
              char_cnn_filter_num=30,
              char_window_size=3):
     super(TaggerBiRNNCNN, self).__init__(word_seq_indexer, tag_seq_indexer,
                                          gpu, batch_size)
     self.tag_seq_indexer = tag_seq_indexer
     self.class_num = class_num
     self.rnn_hidden_dim = rnn_hidden_dim
     self.freeze_embeddings = freeze_word_embeddings
     self.dropout_ratio = dropout_ratio
     self.rnn_type = rnn_type
     self.gpu = gpu
     self.freeze_char_embeddings = freeze_char_embeddings
     self.char_embeddings_dim = char_embeddings_dim
     self.word_len = word_len
     self.char_cnn_filter_num = char_cnn_filter_num
     self.char_window_size = char_window_size
     self.word_embeddings_layer = LayerWordEmbeddings(
         word_seq_indexer, gpu, freeze_word_embeddings)
     self.char_embeddings_layer = LayerCharEmbeddings(
         gpu, char_embeddings_dim, freeze_char_embeddings, word_len,
         word_seq_indexer.get_unique_characters_list())
     self.char_cnn_layer = LayerCharCNN(gpu, char_embeddings_dim,
                                        char_cnn_filter_num,
                                        char_window_size, word_len)
     self.dropout = torch.nn.Dropout(p=dropout_ratio)
     if rnn_type == 'GRU':
         self.birnn_layer = LayerBiGRU(
             input_dim=self.word_embeddings_layer.output_dim +
             self.char_cnn_layer.output_dim,
             hidden_dim=rnn_hidden_dim,
             gpu=gpu)
     elif rnn_type == 'LSTM':
         self.birnn_layer = LayerBiLSTM(
             input_dim=self.word_embeddings_layer.output_dim +
             self.char_cnn_layer.output_dim,
             hidden_dim=rnn_hidden_dim,
             gpu=gpu)
     elif rnn_type == 'Vanilla':
         self.birnn_layer = LayerBiVanilla(
             input_dim=self.word_embeddings_layer.output_dim +
             self.char_cnn_layer.output_dim,
             hidden_dim=rnn_hidden_dim,
             gpu=gpu)
     else:
         raise ValueError(
             'Unknown rnn_type = %s, must be either "LSTM" or "GRU"')
     # We add an additional class that corresponds to the zero-padded values not to be included to the loss function
     self.lin_layer = nn.Linear(in_features=self.birnn_layer.output_dim,
                                out_features=class_num + 1)
     self.log_softmax_layer = nn.LogSoftmax(dim=1)
     if gpu >= 0:
         self.cuda(device=self.gpu)
     self.nll_loss = nn.NLLLoss(
         ignore_index=0
     )  # "0" target values actually are zero-padded parts of sequences
示例#5
0
    def __init__(self,
                 args,
                 word_seq_indexer,
                 tag_seq_indexer,
                 class_num,
                 batch_size=1,
                 rnn_hidden_dim=100,
                 freeze_word_embeddings=False,
                 dropout_ratio=0.5,
                 rnn_type='GRU',
                 gpu=-1,
                 freeze_char_embeddings=False,
                 char_embeddings_dim=25,
                 word_len=20,
                 char_cnn_filter_num=30,
                 char_window_size=3):
        super(TaggerBiRNNCNN, self).__init__(word_seq_indexer, tag_seq_indexer,
                                             gpu, batch_size)
        self.tag_seq_indexer = tag_seq_indexer
        self.class_num = class_num
        self.rnn_hidden_dim = rnn_hidden_dim
        self.freeze_embeddings = True
        self.dropout_ratio = dropout_ratio
        self.rnn_type = rnn_type
        self.gpu = gpu
        self.freeze_char_embeddings = freeze_char_embeddings
        self.char_embeddings_dim = char_embeddings_dim
        self.word_len = word_len
        self.char_cnn_filter_num = char_cnn_filter_num
        self.char_window_size = char_window_size
        self.if_elmo = args.if_elmo
        self.if_bert = args.if_bert
        self.if_flair = args.if_flair
        if args.if_glove or args.if_wordEmbRand or args.if_twitter_emb:
            self.if_word = True
        else:
            self.if_word = False

        if args.if_char_cnn or args.if_char_lstm:
            self.if_char = True
        else:
            self.if_char = False
        # self.elmo_embeddings_dim = args.elmo_embeddings_dim
        # self.bert_embeddings_dim = args.bert_embeddings_dim
        self.bert_mode = 'mean'

        self.options_file = args.options_file
        self.weight_file = args.weight_file
        emb_models_dim = []

        if args.if_wordEmbRand:
            self.word_embeddings_layer = LayerWordEmbeddings_Rand(
                word_seq_indexer, gpu, self.freeze_embeddings)
            emb_models_dim.append(self.word_embeddings_layer.output_dim)
        if args.if_glove:
            self.word_embeddings_layer = LayerWordEmbeddings(
                word_seq_indexer, gpu, self.freeze_embeddings)
            emb_models_dim.append(self.word_embeddings_layer.output_dim)

        if args.if_char_lstm:
            self.char_embeddings_layer = LayerCharEmbeddings(
                gpu, char_embeddings_dim, freeze_char_embeddings, word_len,
                word_seq_indexer.get_unique_characters_list())
            self.char_layer = LayerCharLSTM(gpu, char_embeddings_dim,
                                            self.char_lstm_hidden_dim,
                                            word_len)
            emb_models_dim.append(self.char_layer.output_dim)
        if args.if_char_cnn:
            self.char_embeddings_layer = LayerCharEmbeddings(
                gpu, char_embeddings_dim, freeze_char_embeddings, word_len,
                word_seq_indexer.get_unique_characters_list())
            self.char_layer = LayerCharCNN(gpu, char_embeddings_dim,
                                           char_cnn_filter_num,
                                           char_window_size, word_len)
            emb_models_dim.append(self.char_layer.output_dim)
        if args.if_elmo:
            self.elmo_embeddings_layer = LayerElmoEmbeddings(
                args, gpu, self.elmo_embeddings_dim, self.options_file,
                self.weight_file, freeze_char_embeddings, word_len)
            emb_models_dim.append(self.elmo_embeddings_layer.output_dim)
        if args.if_bert:
            self.bert_embeddings_layer = LayerBertEmbeddings(
                gpu, self.bert_embeddings_dim, self.bert_mode)
            emb_models_dim.append(self.bert_embeddings_layer.output_dim)
            print('start bert embedding successful...')
        if args.if_flair:
            self.flair_embeddings_layer = LayerFlairEmbeddings(gpu)
            emb_models_dim.append(self.flair_embeddings_layer.output_dim)

        self.input_dim = sum(emb_models_dim)

        #
        # self.word_embeddings_layer = LayerWordEmbeddings(word_seq_indexer, gpu, freeze_word_embeddings)
        self.char_embeddings_layer = LayerCharEmbeddings(
            gpu, char_embeddings_dim, freeze_char_embeddings, word_len,
            word_seq_indexer.get_unique_characters_list())
        self.char_cnn_layer = LayerCharCNN(gpu, char_embeddings_dim,
                                           char_cnn_filter_num,
                                           char_window_size, word_len)
        self.dropout = torch.nn.Dropout(p=dropout_ratio)
        if rnn_type == 'GRU':
            self.birnn_layer = LayerBiGRU(args=args,
                                          input_dim=self.input_dim,
                                          hidden_dim=rnn_hidden_dim,
                                          gpu=gpu)
        elif rnn_type == 'LSTM':
            self.birnn_layer = LayerBiLSTM(args=args,
                                           input_dim=self.input_dim,
                                           hidden_dim=rnn_hidden_dim,
                                           gpu=gpu)
        elif rnn_type == 'Vanilla':
            self.birnn_layer = LayerBiVanilla(args=args,
                                              input_dim=self.input_dim,
                                              hidden_dim=rnn_hidden_dim,
                                              gpu=gpu)
        else:
            raise ValueError(
                'Unknown rnn_type = %s, must be either "LSTM" or "GRU"')
        # We add an additional class that corresponds to the zero-padded values not to be included to the loss function
        self.lin_layer = nn.Linear(in_features=self.birnn_layer.output_dim,
                                   out_features=class_num + 1)
        self.log_softmax_layer = nn.LogSoftmax(dim=1)
        if gpu >= 0:
            self.cuda(device=self.gpu)
        self.nll_loss = nn.NLLLoss(
            ignore_index=0
        )  # "0" target values actually are zero-padded parts of sequences
    def __init__(self,
                 args,
                 word_seq_indexer,
                 tag_seq_indexer,
                 class_num,
                 batch_size=1,
                 rnn_hidden_dim=100,
                 freeze_word_embeddings=False,
                 dropout_ratio=0.5,
                 rnn_type='GRU',
                 gpu=-1,
                 freeze_char_embeddings=False,
                 char_embeddings_dim=25,
                 word_len=20,
                 char_cnn_filter_num=30,
                 char_window_size=3):
        super(TaggerBiRNNCNN, self).__init__(word_seq_indexer, tag_seq_indexer,
                                             gpu, batch_size)
        self.tag_seq_indexer = tag_seq_indexer
        self.class_num = class_num
        self.rnn_hidden_dim = rnn_hidden_dim
        self.freeze_embeddings = freeze_word_embeddings
        self.dropout_ratio = dropout_ratio
        self.rnn_type = rnn_type
        self.gpu = gpu
        self.freeze_char_embeddings = freeze_char_embeddings
        self.char_embeddings_dim = char_embeddings_dim
        self.word_len = word_len
        self.char_cnn_filter_num = char_cnn_filter_num
        self.char_window_size = char_window_size

        self.args = args
        self.if_bert = args.if_bert
        self.if_flair = args.if_flair

        self.dropout = torch.nn.Dropout(p=dropout_ratio)

        emb_models_dim = []
        print('load embedding...')
        if args.if_bert:
            # print('PYTORCH_PRETRAINED_BERT_CACHE',PYTORCH_PRETRAINED_BERT_CACHE)
            cache_dir = args.cache_dir if args.cache_dir else os.path.join(
                str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed_{}'.format(
                    args.local_rank))

            # cache_dir =os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE),'distributed_{}'.format(args.local_rank))
            print('cache_dir', cache_dir)
            # cache_dir = '/home/jlfu/.pytorch_pretrained_bert/distributed_-1'
            # cache_dir='emb/bert_model_cache/bert_cache.hdf5',
            self.bert_embeddings_layer = LayerBertEmbeddings.from_pretrained(
                args.bert_model, cache_dir=cache_dir, num_labels=class_num)
            # self.bert_embeddings_layer = LayerBertEmbeddings(gpu, freeze_bert_embeddings=True)

            reduce_dim = False
            if reduce_dim:
                self.W_bert = nn.Linear(args.bert_output_dim, 256)
                emb_models_dim.append(256)
            else:
                emb_models_dim.append(args.bert_output_dim)
        if args.if_flair:
            self.flair_embeddings_layer = LayerFlairEmbeddings(gpu)
            reduce_dim = True
            if reduce_dim == True:
                self.W_flair = nn.Linear(
                    self.flair_embeddings_layer.output_dim, 256).cuda()
                emb_models_dim.append(256)
            else:
                emb_models_dim.append(self.flair_embeddings_layer.output_dim)
        if args.if_elmo:
            self.elmo_embeddings_layer = LayerElmoEmbeddings(
                args, gpu, args.options_file, args.weight_file,
                freeze_char_embeddings, word_len)

            elmo_reduce_dim = False
            if elmo_reduce_dim:
                # self.W_elmo = nn.Linear(self.elmo_embeddings_layer.output_dim,256)
                # emb_models_dim.append(256)

                self.W_elmo = nn.Linear(self.elmo_embeddings_layer.output_dim,
                                        self.word_embeddings_layer.output_dim)
                emb_models_dim.append(self.word_embeddings_layer.output_dim)
            else:
                emb_models_dim.append(self.elmo_embeddings_layer.output_dim)

        # if args.if_glove:
        #     self.word_embeddings_layer = LayerWordEmbeddings(word_seq_indexer, gpu, freeze_word_embeddings)
        #     emb_models_dim.append(self.word_embeddings_layer.output_dim)
        self.if_word = False
        if args.if_wordEmbRand == True and args.if_glove == False:
            self.word_embeddings_layer = LayerWordEmbeddings_Rand(
                word_seq_indexer, gpu, freeze_word_embeddings)
            emb_models_dim.append(self.word_embeddings_layer.output_dim)
            print('load random word emb ')
            self.if_word = True
        elif args.if_wordEmbRand == False and args.if_glove == True:
            self.word_embeddings_layer = LayerWordEmbeddings(
                args, word_seq_indexer, gpu, freeze_word_embeddings)
            emb_models_dim.append(self.word_embeddings_layer.output_dim)
            print('load glove word emb ')
            self.if_word = True
        else:
            print('can only use one word embedding (random or glove)')

        self.if_char = False
        if args.if_cnnChar == True and args.if_lstmChar == False:
            self.char_embeddings_layer = LayerCharEmbeddings(
                gpu, char_embeddings_dim, freeze_char_embeddings, word_len,
                word_seq_indexer.get_unique_characters_list())
            self.char_layer = LayerCharCNN(gpu, char_embeddings_dim,
                                           char_cnn_filter_num,
                                           char_window_size, word_len)
            emb_models_dim.append(self.char_layer.output_dim)
            self.if_char = True

        elif args.if_cnnChar == False and args.if_lstmChar == True:
            self.char_embeddings_layer = LayerCharEmbeddings(
                gpu, char_embeddings_dim, freeze_char_embeddings, word_len,
                word_seq_indexer.get_unique_characters_list())
            self.char_layer = LayerCharLSTM(gpu, char_embeddings_dim,
                                            self.char_lstm_hidden_dim,
                                            word_len)
            emb_models_dim.append(self.char_layer.output_dim)
            self.if_char = True
        else:
            print('can only use one char embedding (cnnChar or lstmChar)')

        self.input_dim = sum(emb_models_dim)

        if self.args.transformer:
            self.n_head = self.args.trans_head
            self.emb_dim = int((self.input_dim / self.n_head)) * self.n_head
            print('self.emb_dim', self.emb_dim)
            print('self.input_dim', self.input_dim)
            self.emb_linear = nn.Linear(in_features=self.input_dim,
                                        out_features=self.emb_dim)
            self.transEncodeLayer = TransformerEncoderLayer(
                d_model=self.emb_dim, nhead=self.n_head)
            self.transformer_encoder = TransformerEncoder(
                encoder_layer=self.transEncodeLayer, num_layers=6)
            self.input_dim = self.emb_dim

            self.transClassify_lin = nn.Linear(in_features=self.emb_dim,
                                               out_features=class_num + 1)

        if rnn_type == 'GRU':
            self.birnn_layer = LayerBiGRU(args=args,
                                          input_dim=self.input_dim,
                                          hidden_dim=rnn_hidden_dim,
                                          gpu=gpu)
        elif rnn_type == 'LSTM':
            self.birnn_layer = LayerBiLSTM(args=args,
                                           input_dim=self.input_dim,
                                           hidden_dim=rnn_hidden_dim,
                                           gpu=gpu)
        elif rnn_type == 'Vanilla':
            self.birnn_layer = LayerBiVanilla(args=args,
                                              input_dim=self.input_dim,
                                              hidden_dim=rnn_hidden_dim,
                                              gpu=gpu)
        elif self.rnn_type == 'SATN':
            self.birnn_layer = LayerSelfAttn(args=args,
                                             input_dim=self.input_dim,
                                             hidden_dim=rnn_hidden_dim,
                                             gpu=gpu)
        elif self.rnn_type == 'WCNN':
            self.birnn_layer = LayerWCNN(
                args=args,
                input_dim=self.input_dim,
                hidden_dim=rnn_hidden_dim,
                cnn_layer=args.wcnn_layer,
                # wcnn_hidden_dim =args.wcnn_hidden_dim,
                gpu=gpu)
        else:
            raise ValueError(
                'Unknown rnn_type = %s, must be either "LSTM" or "GRU"')

        self.lin_layer = nn.Linear(in_features=self.birnn_layer.output_dim,
                                   out_features=class_num + 1)
        self.log_softmax_layer = nn.LogSoftmax(dim=1)
        if gpu >= 0:
            self.cuda(device=self.gpu)
        self.nll_loss = nn.NLLLoss(
            ignore_index=0
        )  # "0" target values actually are zero-padded parts of sequences
示例#7
0
    def __init__(self,
                 word_seq_indexer,
                 tag_seq_indexer,
                 class_num,
                 batch_size=1,
                 rnn_hidden_dim=100,
                 freeze_word_embeddings=False,
                 dropout_ratio=0.5,
                 rnn_type='LSTM',
                 gpu=-1,
                 latent_dim=None):
        super(TaggerAttentive, self).__init__(word_seq_indexer,
                                              tag_seq_indexer, gpu, batch_size)
        self.tag_seq_indexer = tag_seq_indexer
        self.class_num = class_num
        self.rnn_hidden_dim = rnn_hidden_dim
        self.freeze_embeddings = freeze_word_embeddings
        self.dropout_ratio = dropout_ratio
        self.rnn_type = rnn_type
        self.gpu = gpu
        self.word_embeddings_layer = LayerWordEmbeddings(
            word_seq_indexer, gpu, freeze_word_embeddings)
        self.dropout = torch.nn.Dropout(p=dropout_ratio)
        self.latent_dim = latent_dim
        if rnn_type == 'GRU':
            self.birnn_layer = LayerBiGRU(
                input_dim=self.word_embeddings_layer.output_dim,
                hidden_dim=rnn_hidden_dim,
                gpu=gpu)
        elif rnn_type == 'LSTM':
            self.birnn_layer = LayerBiLSTM(
                input_dim=self.word_embeddings_layer.output_dim,
                hidden_dim=rnn_hidden_dim,
                gpu=gpu)
        elif rnn_type == 'Vanilla':
            self.birnn_layer = LayerBiVanilla(
                input_dim=self.word_embeddings_layer.output_dim +
                self.char_cnn_layer.output_dim,
                hidden_dim=rnn_hidden_dim,
                gpu=gpu)
        else:
            raise ValueError(
                'Unknown rnn_type = %s, must be either "LSTM" or "GRU"')

        # equal weight attention
        self.attention = LayerAttention(
            input_dim=self.birnn_layer.output_dim,
            embedding_dim=self.word_embeddings_layer.output_dim,
            output_dim=self.birnn_layer.output_dim,
            gpu=gpu)

        # dimension reduction
        if latent_dim is not None:
            self.dim_red = nn.Sequential(
                nn.Linear(in_features=self.attention.output_dim +
                          self.word_embeddings_layer.output_dim,
                          out_features=latent_dim), nn.Sigmoid())
            self.dim_red.apply(self.inititialize_random_projection)

            lin_layer_in = latent_dim
        else:
            lin_layer_in = self.attention.output_dim + self.word_embeddings_layer.output_dim

        # We add an additional class that corresponds to the zero-padded values not to be included to the loss function
        self.lin_layer = nn.Linear(in_features=lin_layer_in,
                                   out_features=class_num + 1)

        self.log_softmax_layer = nn.LogSoftmax(dim=1)
        if gpu >= 0:
            self.cuda(device=self.gpu)
        self.nll_loss = nn.NLLLoss(
            ignore_index=0
        )  # "0" target values actually are zero-padded parts of sequences