def __init__(self, args, vocab, emb_matrix=None, share_hid=False): super().__init__() self.vocab = vocab self.args = args self.share_hid = share_hid self.unsaved_modules = [] def add_unsaved_module(name, module): self.unsaved_modules += [name] setattr(self, name, module) # input layers input_size = 0 if self.args['word_emb_dim'] > 0: # frequent word embeddings self.word_emb = nn.Embedding(len(vocab['word']), self.args['word_emb_dim'], padding_idx=0) self.word_emb.weight.data.uniform_(-self.init_range, self.init_range) input_size += self.args['word_emb_dim'] if self.args['lemma_emb_dim'] > 0: self.lemma_emb = nn.Embedding(len(vocab['lemma']), self.args['lemma_emb_dim'], padding_idx=0) self.lemma_emb.weight.data.uniform_(-self.init_range, self.init_range) input_size += self.args['lemma_emb_dim'] if self.args['tag_emb_dim'] > 0: self.upos_emb = nn.Embedding(len(vocab['upos']), self.args['tag_emb_dim'], padding_idx=0) self.upos_emb.weight.data.uniform_(-self.init_range, self.init_range) if not isinstance(vocab['xpos'], CompositeVocab): self.xpos_emb = nn.Embedding(len(vocab['xpos']), self.args['tag_emb_dim'], padding_idx=0) self.xpos_emb.weight.data.uniform_(-self.init_range, self.init_range) else: self.xpos_emb = nn.ModuleList() for l in vocab['xpos'].lens(): self.xpos_emb.append( nn.Embedding(l, self.args['tag_emb_dim'], padding_idx=0)) self.xpos_emb[-1].weight.data.uniform_( -self.init_range, self.init_range) self.ufeats_emb = nn.ModuleList() for l in vocab['feats'].lens(): self.ufeats_emb.append( nn.Embedding(l, self.args['tag_emb_dim'], padding_idx=0)) self.ufeats_emb[-1].weight.data.uniform_( -self.init_range, self.init_range) input_size += self.args['tag_emb_dim'] * 2 if self.args['char'] and self.args['char_emb_dim'] > 0: self.charmodel = CharacterModel(args, vocab) self.trans_char = nn.Linear(self.args['char_hidden_dim'], self.args['transformed_dim'], bias=False) input_size += self.args['transformed_dim'] if self.args['pretrain']: # pretrained embeddings, by default this won't be saved into model file add_unsaved_module( 'pretrained_emb', nn.Embedding.from_pretrained(torch.from_numpy(emb_matrix), freeze=True)) self.trans_pretrained = nn.Linear(emb_matrix.shape[1], self.args['transformed_dim'], bias=False) input_size += self.args['transformed_dim'] # recurrent layers rnn_params = { 'input_size': input_size, 'hidden_size': self.args['hidden_dim'], 'num_layers': self.args['num_layers'], 'batch_first': True, 'bidirectional': False, 'dropout': self.args['dropout'], ('rec_dropout' if self.args['lstm_type'] == 'hlstm' else 'weight_dropout'): self.args['rec_dropout'], } if args['lstm_type'] == 'hlstm': self.lstm_forward = HighwayLSTM(**rnn_params, highway_func=torch.tanh) self.lstm_backward = HighwayLSTM(**rnn_params, highway_func=torch.tanh) elif args['lstm_type'] == 'wdlstm': out_dim = args['word_emb_dim'] if args['tie_softmax'] else args[ 'hidden_dim'] self.lstm_forward = MultiLayerLSTM(**rnn_params, output_size=out_dim) self.lstm_backward = MultiLayerLSTM(**rnn_params, output_size=out_dim) else: raise ValueError('LSTM type not supported') self.drop_replacement = nn.Parameter( torch.randn(input_size) / np.sqrt(input_size)) if args['tie_softmax']: if self.args['lstm_type'] == 'hlstm' and self.args[ 'hidden_dim'] != self.args['word_emb_dim']: raise ValueError( 'If tie_softmax is true and HighwayLSTM is used, hidden_dim and word_emb_dim must be equal' ) self.dec_forward = nn.Linear(self.args['word_emb_dim'], len(vocab['word'])) self.dec_backward = nn.Linear(self.args['word_emb_dim'], len(vocab['word'])) self.dec_forward.weight = self.word_emb.weight self.dec_backward.weight = self.word_emb.weight else: self.dec_forward = nn.Linear(self.args['hidden_dim'], len(vocab['word'])) self.dec_backward = nn.Linear(self.args['hidden_dim'], len(vocab['word'])) # criterion self.crit = nn.CrossEntropyLoss(ignore_index=-1, reduction='sum') # ignore padding self.drop = nn.Dropout(args['dropout']) self.worddrop = WordDropout(args['word_dropout'])
def __init__(self, args, vocab, emb_matrix=None, share_hid=False): super().__init__() self.vocab = vocab self.args = args self.share_hid = share_hid self.unsaved_modules = [] def add_unsaved_module(name, module): self.unsaved_modules += [name] setattr(self, name, module) # input layers input_size = 0 if self.args['word_emb_dim'] > 0: # frequent word embeddings self.word_emb = nn.Embedding(len(vocab['word']), self.args['word_emb_dim'], padding_idx=0) self.lemma_emb = nn.Embedding(len(vocab['lemma']), self.args['word_emb_dim'], padding_idx=0) input_size += self.args['word_emb_dim'] * 2 if self.args['tag_emb_dim'] > 0: self.upos_emb = nn.Embedding(len(vocab['upos']), self.args['tag_emb_dim'], padding_idx=0) if not isinstance(vocab['xpos'], CompositeVocab): self.xpos_emb = nn.Embedding(len(vocab['xpos']), self.args['tag_emb_dim'], padding_idx=0) else: self.xpos_emb = nn.ModuleList() for l in vocab['xpos'].lens(): self.xpos_emb.append(nn.Embedding(l, self.args['tag_emb_dim'], padding_idx=0)) self.ufeats_emb = nn.ModuleList() for l in vocab['feats'].lens(): self.ufeats_emb.append(nn.Embedding(l, self.args['tag_emb_dim'], padding_idx=0)) input_size += self.args['tag_emb_dim'] * 2 if self.args['char'] and self.args['char_emb_dim'] > 0: self.charmodel = CharacterModel(args, vocab) self.trans_char = nn.Linear(self.args['char_hidden_dim'], self.args['transformed_dim'], bias=False) input_size += self.args['transformed_dim'] if self.args['pretrain']: # pretrained embeddings, by default this won't be saved into model file add_unsaved_module('pretrained_emb', nn.Embedding.from_pretrained(torch.from_numpy(emb_matrix), freeze=True)) self.trans_pretrained = nn.Linear(emb_matrix.shape[1], self.args['transformed_dim'], bias=False) input_size += self.args['transformed_dim'] # recurrent layers self.parserlstm = HighwayLSTM(input_size, self.args['hidden_dim'], self.args['num_layers'], batch_first=True, bidirectional=True, dropout=self.args['dropout'], rec_dropout=self.args['rec_dropout'], highway_func=torch.tanh) self.drop_replacement = nn.Parameter(torch.randn(input_size) / np.sqrt(input_size)) self.parserlstm_h_init = nn.Parameter(torch.zeros(2 * self.args['num_layers'], 1, self.args['hidden_dim'])) self.parserlstm_c_init = nn.Parameter(torch.zeros(2 * self.args['num_layers'], 1, self.args['hidden_dim'])) # classifiers self.unlabeled = DeepBiaffineScorer(2 * self.args['hidden_dim'], 2 * self.args['hidden_dim'], self.args['deep_biaff_hidden_dim'], 1, pairwise=True, dropout=args['dropout']) self.deprel = DeepBiaffineScorer(2 * self.args['hidden_dim'], 2 * self.args['hidden_dim'], self.args['deep_biaff_hidden_dim'], len(vocab['deprel']), pairwise=True, dropout=args['dropout']) if args['linearization']: self.linearization = DeepBiaffineScorer(2 * self.args['hidden_dim'], 2 * self.args['hidden_dim'], self.args['deep_biaff_hidden_dim'], 1, pairwise=True, dropout=args['dropout']) if args['distance']: self.distance = DeepBiaffineScorer(2 * self.args['hidden_dim'], 2 * self.args['hidden_dim'], self.args['deep_biaff_hidden_dim'], 1, pairwise=True, dropout=args['dropout']) # criterion self.crit = nn.CrossEntropyLoss(ignore_index=-1, reduction='sum') # ignore padding self.drop = nn.Dropout(args['dropout']) self.worddrop = WordDropout(args['word_dropout'])
def __init__(self, args, vocab, emb_matrix=None, share_hid=False): super().__init__() self.vocab = vocab self.args = args self.share_hid = share_hid self.unsaved_modules = [] def add_unsaved_module(name, module): self.unsaved_modules += [name] setattr(self, name, module) # input layers input_size = 0 if self.args['word_emb_dim'] > 0: # frequent word embeddings self.word_emb = nn.Embedding(len(vocab['word']), self.args['word_emb_dim'], padding_idx=0) input_size += self.args['word_emb_dim'] if not share_hid: # upos embeddings self.upos_emb = nn.Embedding(len(vocab['upos']), self.args['tag_emb_dim'], padding_idx=0) if self.args['char'] and self.args['char_emb_dim'] > 0: self.charmodel = CharacterModel(args, vocab) self.trans_char = nn.Linear(self.args['char_hidden_dim'], self.args['transformed_dim'], bias=False) input_size += self.args['transformed_dim'] if self.args['pretrain']: # pretrained embeddings, by default this won't be saved into model file add_unsaved_module( 'pretrained_emb', nn.Embedding.from_pretrained(torch.from_numpy(emb_matrix), freeze=True)) self.trans_pretrained = nn.Linear(emb_matrix.shape[1], self.args['transformed_dim'], bias=False) input_size += self.args['transformed_dim'] # recurrent layers self.taggerlstm = HighwayLSTM(input_size, self.args['hidden_dim'], self.args['num_layers'], batch_first=True, bidirectional=True, dropout=self.args['dropout'], rec_dropout=self.args['rec_dropout'], highway_func=torch.tanh) self.drop_replacement = nn.Parameter( torch.randn(input_size) / np.sqrt(input_size)) self.taggerlstm_h_init = nn.Parameter( torch.zeros(2 * self.args['num_layers'], 1, self.args['hidden_dim'])) self.taggerlstm_c_init = nn.Parameter( torch.zeros(2 * self.args['num_layers'], 1, self.args['hidden_dim'])) # classifiers self.upos_hid = nn.Linear(self.args['hidden_dim'] * 2, self.args['deep_biaff_hidden_dim']) self.upos_clf = nn.Linear(self.args['deep_biaff_hidden_dim'], len(vocab['upos'])) self.upos_clf.weight.data.zero_() self.upos_clf.bias.data.zero_() if share_hid: clf_constructor = lambda insize, outsize: nn.Linear( insize, outsize) else: self.xpos_hid = nn.Linear( self.args['hidden_dim'] * 2, self.args['deep_biaff_hidden_dim'] if not isinstance(vocab['xpos'], CompositeVocab) else self.args['composite_deep_biaff_hidden_dim']) self.ufeats_hid = nn.Linear( self.args['hidden_dim'] * 2, self.args['composite_deep_biaff_hidden_dim']) clf_constructor = lambda insize, outsize: BiaffineScorer( insize, self.args['tag_emb_dim'], outsize) if isinstance(vocab['xpos'], CompositeVocab): self.xpos_clf = nn.ModuleList() for l in vocab['xpos'].lens(): self.xpos_clf.append( clf_constructor( self.args['composite_deep_biaff_hidden_dim'], l)) else: self.xpos_clf = clf_constructor(self.args['deep_biaff_hidden_dim'], len(vocab['xpos'])) if share_hid: self.xpos_clf.weight.data.zero_() self.xpos_clf.bias.data.zero_() self.ufeats_clf = nn.ModuleList() for l in vocab['feats'].lens(): if share_hid: self.ufeats_clf.append( clf_constructor(self.args['deep_biaff_hidden_dim'], l)) self.ufeats_clf[-1].weight.data.zero_() self.ufeats_clf[-1].bias.data.zero_() else: self.ufeats_clf.append( clf_constructor( self.args['composite_deep_biaff_hidden_dim'], l)) # criterion self.crit = nn.CrossEntropyLoss(ignore_index=0) # ignore padding self.drop = nn.Dropout(args['dropout']) self.worddrop = WordDropout(args['word_dropout'])