示例#1
0
    def __init__(self, args, vocab, emb_matrix=None, share_hid=False):
        super().__init__()

        self.vocab = vocab
        self.args = args
        self.share_hid = share_hid
        self.unsaved_modules = []

        def add_unsaved_module(name, module):
            self.unsaved_modules += [name]
            setattr(self, name, module)

        # input layers
        input_size = 0
        if self.args['word_emb_dim'] > 0:
            # frequent word embeddings
            self.word_emb = nn.Embedding(len(vocab['word']),
                                         self.args['word_emb_dim'],
                                         padding_idx=0)
            self.word_emb.weight.data.uniform_(-self.init_range,
                                               self.init_range)
            input_size += self.args['word_emb_dim']

        if self.args['lemma_emb_dim'] > 0:
            self.lemma_emb = nn.Embedding(len(vocab['lemma']),
                                          self.args['lemma_emb_dim'],
                                          padding_idx=0)
            self.lemma_emb.weight.data.uniform_(-self.init_range,
                                                self.init_range)
            input_size += self.args['lemma_emb_dim']

        if self.args['tag_emb_dim'] > 0:
            self.upos_emb = nn.Embedding(len(vocab['upos']),
                                         self.args['tag_emb_dim'],
                                         padding_idx=0)
            self.upos_emb.weight.data.uniform_(-self.init_range,
                                               self.init_range)

            if not isinstance(vocab['xpos'], CompositeVocab):
                self.xpos_emb = nn.Embedding(len(vocab['xpos']),
                                             self.args['tag_emb_dim'],
                                             padding_idx=0)
                self.xpos_emb.weight.data.uniform_(-self.init_range,
                                                   self.init_range)
            else:
                self.xpos_emb = nn.ModuleList()

                for l in vocab['xpos'].lens():
                    self.xpos_emb.append(
                        nn.Embedding(l,
                                     self.args['tag_emb_dim'],
                                     padding_idx=0))
                    self.xpos_emb[-1].weight.data.uniform_(
                        -self.init_range, self.init_range)

            self.ufeats_emb = nn.ModuleList()

            for l in vocab['feats'].lens():
                self.ufeats_emb.append(
                    nn.Embedding(l, self.args['tag_emb_dim'], padding_idx=0))
                self.ufeats_emb[-1].weight.data.uniform_(
                    -self.init_range, self.init_range)

            input_size += self.args['tag_emb_dim'] * 2

        if self.args['char'] and self.args['char_emb_dim'] > 0:
            self.charmodel = CharacterModel(args, vocab)
            self.trans_char = nn.Linear(self.args['char_hidden_dim'],
                                        self.args['transformed_dim'],
                                        bias=False)
            input_size += self.args['transformed_dim']

        if self.args['pretrain']:
            # pretrained embeddings, by default this won't be saved into model file
            add_unsaved_module(
                'pretrained_emb',
                nn.Embedding.from_pretrained(torch.from_numpy(emb_matrix),
                                             freeze=True))
            self.trans_pretrained = nn.Linear(emb_matrix.shape[1],
                                              self.args['transformed_dim'],
                                              bias=False)
            input_size += self.args['transformed_dim']

        # recurrent layers
        rnn_params = {
            'input_size':
            input_size,
            'hidden_size':
            self.args['hidden_dim'],
            'num_layers':
            self.args['num_layers'],
            'batch_first':
            True,
            'bidirectional':
            False,
            'dropout':
            self.args['dropout'],
            ('rec_dropout' if self.args['lstm_type'] == 'hlstm' else 'weight_dropout'):
            self.args['rec_dropout'],
        }
        if args['lstm_type'] == 'hlstm':
            self.lstm_forward = HighwayLSTM(**rnn_params,
                                            highway_func=torch.tanh)
            self.lstm_backward = HighwayLSTM(**rnn_params,
                                             highway_func=torch.tanh)
        elif args['lstm_type'] == 'wdlstm':
            out_dim = args['word_emb_dim'] if args['tie_softmax'] else args[
                'hidden_dim']
            self.lstm_forward = MultiLayerLSTM(**rnn_params,
                                               output_size=out_dim)
            self.lstm_backward = MultiLayerLSTM(**rnn_params,
                                                output_size=out_dim)
        else:
            raise ValueError('LSTM type not supported')

        self.drop_replacement = nn.Parameter(
            torch.randn(input_size) / np.sqrt(input_size))

        if args['tie_softmax']:
            if self.args['lstm_type'] == 'hlstm' and self.args[
                    'hidden_dim'] != self.args['word_emb_dim']:
                raise ValueError(
                    'If tie_softmax is true and HighwayLSTM is used, hidden_dim and word_emb_dim must be equal'
                )
            self.dec_forward = nn.Linear(self.args['word_emb_dim'],
                                         len(vocab['word']))
            self.dec_backward = nn.Linear(self.args['word_emb_dim'],
                                          len(vocab['word']))
            self.dec_forward.weight = self.word_emb.weight
            self.dec_backward.weight = self.word_emb.weight

        else:
            self.dec_forward = nn.Linear(self.args['hidden_dim'],
                                         len(vocab['word']))
            self.dec_backward = nn.Linear(self.args['hidden_dim'],
                                          len(vocab['word']))

        # criterion
        self.crit = nn.CrossEntropyLoss(ignore_index=-1,
                                        reduction='sum')  # ignore padding

        self.drop = nn.Dropout(args['dropout'])
        self.worddrop = WordDropout(args['word_dropout'])
示例#2
0
    def __init__(self, args, vocab, emb_matrix=None, share_hid=False):
        super().__init__()

        self.vocab = vocab
        self.args = args
        self.share_hid = share_hid
        self.unsaved_modules = []

        def add_unsaved_module(name, module):
            self.unsaved_modules += [name]
            setattr(self, name, module)

        # input layers
        input_size = 0
        if self.args['word_emb_dim'] > 0:
            # frequent word embeddings
            self.word_emb = nn.Embedding(len(vocab['word']), self.args['word_emb_dim'], padding_idx=0)
            self.lemma_emb = nn.Embedding(len(vocab['lemma']), self.args['word_emb_dim'], padding_idx=0)
            input_size += self.args['word_emb_dim'] * 2

        if self.args['tag_emb_dim'] > 0:
            self.upos_emb = nn.Embedding(len(vocab['upos']), self.args['tag_emb_dim'], padding_idx=0)

            if not isinstance(vocab['xpos'], CompositeVocab):
                self.xpos_emb = nn.Embedding(len(vocab['xpos']), self.args['tag_emb_dim'], padding_idx=0)
            else:
                self.xpos_emb = nn.ModuleList()

                for l in vocab['xpos'].lens():
                    self.xpos_emb.append(nn.Embedding(l, self.args['tag_emb_dim'], padding_idx=0))

            self.ufeats_emb = nn.ModuleList()

            for l in vocab['feats'].lens():
                self.ufeats_emb.append(nn.Embedding(l, self.args['tag_emb_dim'], padding_idx=0))

            input_size += self.args['tag_emb_dim'] * 2

        if self.args['char'] and self.args['char_emb_dim'] > 0:
            self.charmodel = CharacterModel(args, vocab)
            self.trans_char = nn.Linear(self.args['char_hidden_dim'], self.args['transformed_dim'], bias=False)
            input_size += self.args['transformed_dim']

        if self.args['pretrain']:
            # pretrained embeddings, by default this won't be saved into model file
            add_unsaved_module('pretrained_emb', nn.Embedding.from_pretrained(torch.from_numpy(emb_matrix), freeze=True))
            self.trans_pretrained = nn.Linear(emb_matrix.shape[1], self.args['transformed_dim'], bias=False)
            input_size += self.args['transformed_dim']

        # recurrent layers
        self.parserlstm = HighwayLSTM(input_size, self.args['hidden_dim'], self.args['num_layers'], batch_first=True, bidirectional=True, dropout=self.args['dropout'], rec_dropout=self.args['rec_dropout'], highway_func=torch.tanh)
        self.drop_replacement = nn.Parameter(torch.randn(input_size) / np.sqrt(input_size))
        self.parserlstm_h_init = nn.Parameter(torch.zeros(2 * self.args['num_layers'], 1, self.args['hidden_dim']))
        self.parserlstm_c_init = nn.Parameter(torch.zeros(2 * self.args['num_layers'], 1, self.args['hidden_dim']))

        # classifiers
        self.unlabeled = DeepBiaffineScorer(2 * self.args['hidden_dim'], 2 * self.args['hidden_dim'], self.args['deep_biaff_hidden_dim'], 1, pairwise=True, dropout=args['dropout'])
        self.deprel = DeepBiaffineScorer(2 * self.args['hidden_dim'], 2 * self.args['hidden_dim'], self.args['deep_biaff_hidden_dim'], len(vocab['deprel']), pairwise=True, dropout=args['dropout'])
        if args['linearization']:
            self.linearization = DeepBiaffineScorer(2 * self.args['hidden_dim'], 2 * self.args['hidden_dim'], self.args['deep_biaff_hidden_dim'], 1, pairwise=True, dropout=args['dropout'])
        if args['distance']:
            self.distance = DeepBiaffineScorer(2 * self.args['hidden_dim'], 2 * self.args['hidden_dim'], self.args['deep_biaff_hidden_dim'], 1, pairwise=True, dropout=args['dropout'])

        # criterion
        self.crit = nn.CrossEntropyLoss(ignore_index=-1, reduction='sum') # ignore padding

        self.drop = nn.Dropout(args['dropout'])
        self.worddrop = WordDropout(args['word_dropout'])
示例#3
0
    def __init__(self, args, vocab, emb_matrix=None, share_hid=False):
        super().__init__()

        self.vocab = vocab
        self.args = args
        self.share_hid = share_hid
        self.unsaved_modules = []

        def add_unsaved_module(name, module):
            self.unsaved_modules += [name]
            setattr(self, name, module)

        # input layers
        input_size = 0
        if self.args['word_emb_dim'] > 0:
            # frequent word embeddings
            self.word_emb = nn.Embedding(len(vocab['word']),
                                         self.args['word_emb_dim'],
                                         padding_idx=0)
            input_size += self.args['word_emb_dim']

        if not share_hid:
            # upos embeddings
            self.upos_emb = nn.Embedding(len(vocab['upos']),
                                         self.args['tag_emb_dim'],
                                         padding_idx=0)

        if self.args['char'] and self.args['char_emb_dim'] > 0:
            self.charmodel = CharacterModel(args, vocab)
            self.trans_char = nn.Linear(self.args['char_hidden_dim'],
                                        self.args['transformed_dim'],
                                        bias=False)
            input_size += self.args['transformed_dim']

        if self.args['pretrain']:
            # pretrained embeddings, by default this won't be saved into model file
            add_unsaved_module(
                'pretrained_emb',
                nn.Embedding.from_pretrained(torch.from_numpy(emb_matrix),
                                             freeze=True))
            self.trans_pretrained = nn.Linear(emb_matrix.shape[1],
                                              self.args['transformed_dim'],
                                              bias=False)
            input_size += self.args['transformed_dim']

        # recurrent layers
        self.taggerlstm = HighwayLSTM(input_size,
                                      self.args['hidden_dim'],
                                      self.args['num_layers'],
                                      batch_first=True,
                                      bidirectional=True,
                                      dropout=self.args['dropout'],
                                      rec_dropout=self.args['rec_dropout'],
                                      highway_func=torch.tanh)
        self.drop_replacement = nn.Parameter(
            torch.randn(input_size) / np.sqrt(input_size))
        self.taggerlstm_h_init = nn.Parameter(
            torch.zeros(2 * self.args['num_layers'], 1,
                        self.args['hidden_dim']))
        self.taggerlstm_c_init = nn.Parameter(
            torch.zeros(2 * self.args['num_layers'], 1,
                        self.args['hidden_dim']))

        # classifiers
        self.upos_hid = nn.Linear(self.args['hidden_dim'] * 2,
                                  self.args['deep_biaff_hidden_dim'])
        self.upos_clf = nn.Linear(self.args['deep_biaff_hidden_dim'],
                                  len(vocab['upos']))
        self.upos_clf.weight.data.zero_()
        self.upos_clf.bias.data.zero_()

        if share_hid:
            clf_constructor = lambda insize, outsize: nn.Linear(
                insize, outsize)
        else:
            self.xpos_hid = nn.Linear(
                self.args['hidden_dim'] * 2, self.args['deep_biaff_hidden_dim']
                if not isinstance(vocab['xpos'], CompositeVocab) else
                self.args['composite_deep_biaff_hidden_dim'])
            self.ufeats_hid = nn.Linear(
                self.args['hidden_dim'] * 2,
                self.args['composite_deep_biaff_hidden_dim'])
            clf_constructor = lambda insize, outsize: BiaffineScorer(
                insize, self.args['tag_emb_dim'], outsize)

        if isinstance(vocab['xpos'], CompositeVocab):
            self.xpos_clf = nn.ModuleList()
            for l in vocab['xpos'].lens():
                self.xpos_clf.append(
                    clf_constructor(
                        self.args['composite_deep_biaff_hidden_dim'], l))
        else:
            self.xpos_clf = clf_constructor(self.args['deep_biaff_hidden_dim'],
                                            len(vocab['xpos']))
            if share_hid:
                self.xpos_clf.weight.data.zero_()
                self.xpos_clf.bias.data.zero_()

        self.ufeats_clf = nn.ModuleList()
        for l in vocab['feats'].lens():
            if share_hid:
                self.ufeats_clf.append(
                    clf_constructor(self.args['deep_biaff_hidden_dim'], l))
                self.ufeats_clf[-1].weight.data.zero_()
                self.ufeats_clf[-1].bias.data.zero_()
            else:
                self.ufeats_clf.append(
                    clf_constructor(
                        self.args['composite_deep_biaff_hidden_dim'], l))

        # criterion
        self.crit = nn.CrossEntropyLoss(ignore_index=0)  # ignore padding

        self.drop = nn.Dropout(args['dropout'])
        self.worddrop = WordDropout(args['word_dropout'])