def test_stacked_bidirectional_lstm_dropout_version_is_different(self, dropout_name: str): stacked_lstm = StackedBidirectionalLstm(input_size=10, hidden_size=11, num_layers=3) if dropout_name == 'layer_dropout_probability': dropped_stacked_lstm = StackedBidirectionalLstm(input_size=10, hidden_size=11, num_layers=3, layer_dropout_probability=0.9) elif dropout_name == 'recurrent_dropout_probability': dropped_stacked_lstm = StackedBidirectionalLstm(input_size=10, hidden_size=11, num_layers=3, recurrent_dropout_probability=0.9) else: raise ValueError('Do not recognise the following dropout name ' f'{dropout_name}') # Initialize all weights to be == 1. constant_init = Initializer.from_params(Params({"type": "constant", "val": 0.5})) initializer = InitializerApplicator([(".*", constant_init)]) initializer(stacked_lstm) initializer(dropped_stacked_lstm) initial_state = torch.randn([3, 5, 11]) initial_memory = torch.randn([3, 5, 11]) tensor = torch.rand([5, 7, 10]) sequence_lengths = torch.LongTensor([7, 7, 7, 7, 7]) sorted_tensor, sorted_sequence, _, _ = sort_batch_by_length(tensor, sequence_lengths) lstm_input = pack_padded_sequence(sorted_tensor, sorted_sequence.data.tolist(), batch_first=True) stacked_output, stacked_state = stacked_lstm(lstm_input, (initial_state, initial_memory)) dropped_output, dropped_state = dropped_stacked_lstm(lstm_input, (initial_state, initial_memory)) dropped_output_sequence, _ = pad_packed_sequence(dropped_output, batch_first=True) stacked_output_sequence, _ = pad_packed_sequence(stacked_output, batch_first=True) if dropout_name == 'layer_dropout_probability': with pytest.raises(AssertionError): numpy.testing.assert_array_almost_equal(dropped_output_sequence.data.numpy(), stacked_output_sequence.data.numpy(), decimal=4) if dropout_name == 'recurrent_dropout_probability': with pytest.raises(AssertionError): numpy.testing.assert_array_almost_equal(dropped_state[0].data.numpy(), stacked_state[0].data.numpy(), decimal=4) with pytest.raises(AssertionError): numpy.testing.assert_array_almost_equal(dropped_state[1].data.numpy(), stacked_state[1].data.numpy(), decimal=4)
def test_stacked_bidirectional_lstm_completes_forward_pass(self): input_tensor = torch.rand(4, 5, 3) input_tensor[1, 4:, :] = 0. input_tensor[2, 2:, :] = 0. input_tensor[3, 1:, :] = 0. input_tensor = pack_padded_sequence(input_tensor, [5, 4, 2, 1], batch_first=True) lstm = StackedBidirectionalLstm(3, 7, 3) output, _ = lstm(input_tensor) output_sequence, _ = pad_packed_sequence(output, batch_first=True) numpy.testing.assert_array_equal(output_sequence.data[1, 4:, :].numpy(), 0.0) numpy.testing.assert_array_equal(output_sequence.data[2, 2:, :].numpy(), 0.0) numpy.testing.assert_array_equal(output_sequence.data[3, 1:, :].numpy(), 0.0)
def __init__( self, input_size: int, hidden_size: int, num_layers: int, recurrent_dropout_probability: float = 0.0, layer_dropout_probability: float = 0.0, use_highway: bool = True, ) -> None: module = StackedBidirectionalLstm( input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, recurrent_dropout_probability=recurrent_dropout_probability, layer_dropout_probability=layer_dropout_probability, use_highway=use_highway, ) super().__init__(module=module)
def __init__(self, input_dim: int, combination: str = "x,y", num_width_embeddings: int = None, span_width_embedding_dim: int = None, bucket_widths: bool = False, use_exclusive_start_indices: bool = False) -> None: super().__init__() self._input_dim = input_dim self._combination = combination self._encoder = PytorchSeq2SeqWrapper( StackedBidirectionalLstm(self._input_dim, int(floor(self._input_dim / 2)), 1)) self._span_extractor = BidirectionalEndpointSpanExtractor( self._input_dim, "y", "y", num_width_embeddings, span_width_embedding_dim, bucket_widths)
def __init__(self, vocab, pretrained_model: str = "bert-base-uncased", requires_grad: bool = True): super(ChatClassification, self).__init__() self.vocab = vocab self.turn_pooler = BertPooler(pretrained_model, requires_grad, dropout=0.0) #self.turn_pooler = self.chat_encoder = StackedBidirectionalLstm( hidden_size=400, input_size=768, num_layers=1, recurrent_dropout_probability=0.3, use_highway=True) self.classif_layer = torch.nn.Linear( in_features=self.chat_encoder.hidden_size, out_features=2) self.accuracy = CategoricalAccuracy()
def __init__(self, vocab: Vocabulary, span_emb_dim: int, tree_prop: int = 1, tree_dropout: float = 0.0, tree_children: str = 'attention', initializer: InitializerApplicator = InitializerApplicator(), regularizer: Optional[RegularizerApplicator] = None) -> None: super(Tree, self).__init__(vocab, regularizer) self._span_emb_dim = span_emb_dim assert span_emb_dim % 2 == 0 self._f_network = FeedForward(input_dim=2 * span_emb_dim, num_layers=1, hidden_dims=span_emb_dim, activations=torch.nn.Sigmoid(), dropout=0) self._tree_prop = tree_prop self._tree_children = tree_children if self._tree_children == 'attention': self._global_attention = TimeDistributed( torch.nn.Linear(span_emb_dim, 1)) elif self._tree_children == 'pooling': pass elif self._tree_children == 'conv': self._conv = torch.nn.Conv1d(span_emb_dim, span_emb_dim, kernel_size=3, padding=1) elif self._tree_children == 'rnn': self._encoder = PytorchSeq2SeqWrapper( StackedBidirectionalLstm(span_emb_dim, int(floor(span_emb_dim / 2)), 1)) else: raise RuntimeError('invalid tree_children option: {}'.format( self._tree_children)) self._dropout = torch.nn.Dropout(p=tree_dropout) initializer(self)
def __init__(self, input_size, hidden_size, num_layers, input_keep_prob, recurrent_keep_prob, layer_norm=False, first_dropout=0, bidirectional=True ): super(AllenNLPLSTMLayer, self).__init__() from allennlp.modules.stacked_bidirectional_lstm import StackedBidirectionalLstm self.rnn = StackedBidirectionalLstm( input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, recurrent_dropout_probability=1 - recurrent_keep_prob, layer_dropout_probability=1 - input_keep_prob, use_highway=False ) self.layer_norm = LayerNorm(hidden_size * 2) if layer_norm else None self.first_dropout = Dropout(first_dropout) # self.reset_parameters() self.output_dim = hidden_size * (2 if bidirectional else 1)
def __init__(self, conf: Dict, input_batchers: Dict[str, Union[WordBatch, CharacterBatch]], n_class: int, use_cuda: bool): super(SeqLabelModel, self).__init__() self.n_class = n_class self.use_cuda = use_cuda self.input_dropout = torch.nn.Dropout2d(p=conf["dropout"]) self.dropout = InputVariationalDropout(p=conf['dropout']) input_layers = {} for i, c in enumerate(conf['input']): if c['type'] == 'embeddings': if 'pretrained' in c: embs = load_embedding_txt(c['pretrained'], c['has_header']) logger.info('loaded {0} embedding entries.'.format( len(embs[0]))) else: embs = None name = c['name'] mapping = input_batchers[name].mapping layer = Embeddings(c['dim'], mapping, fix_emb=c['fixed'], embs=embs, normalize=c.get('normalize', False), input_field_name=name) logger.info('embedding for field {0} ' 'created with {1} x {2}.'.format( c['field'], layer.n_V, layer.n_d)) input_layers[name] = layer elif c['type'] == 'cnn_encoder' or c['type'] == 'lstm_encoder': name = c['name'] mapping = input_batchers[name].mapping embeddings = Embeddings( c['dim'], mapping, fix_emb=False, embs=None, normalize=False, input_field_name='{0}_ch_emb'.format(name)) logger.info('character embedding for field {0} ' 'created with {1} x {2}.'.format( c['field'], embeddings.n_V, embeddings.n_d)) if c['type'] == 'lstm_encoder': layer = LstmTokenEmbedder(c['dim'], embeddings, conf['dropout'], use_cuda, input_field_name=name) elif c['type'] == 'cnn_encoder': layer = ConvTokenEmbedder(c['dim'], embeddings, c['filters'], c.get('n_highway', 1), c.get('activation', 'relu'), use_cuda, input_field_name=name) else: raise ValueError('Unknown type: {}'.format(c['type'])) input_layers[name] = layer elif c['type'] == 'elmo': name = c['name'] layer = ContextualizedWordEmbeddings(name, c['path'], use_cuda) input_layers[name] = layer else: raise ValueError('{} unknown input layer'.format(c['type'])) self.input_layers = torch.nn.ModuleDict(input_layers) input_encoders = [] input_dim = 0 for i, c in enumerate(conf['input_encoder']): input_info = { name: self.input_layers[name].get_output_dim() for name in c['input'] } if c['type'] == 'affine': input_encoder = AffineTransformInputEncoder( input_info, c['dim'], use_cuda) elif c['type'] == 'sum': input_encoder = SummationInputEncoder(input_info, use_cuda) elif c['type'] == 'concat': input_encoder = ConcatenateInputEncoder(input_info, use_cuda) else: raise ValueError('{} unknown input encoder'.format(c['type'])) input_dim += input_encoder.get_output_dim() input_encoders.append(input_encoder) self.input_encoders = torch.nn.ModuleList(input_encoders) encoder_name = conf['encoder']['type'].lower() if encoder_name == 'stacked_bidirectional_lstm': lstm = StackedBidirectionalLstm( input_size=input_dim, hidden_size=conf['encoder']['hidden_dim'], num_layers=conf['encoder']['n_layers'], recurrent_dropout_probability=conf['dropout'], layer_dropout_probability=conf['dropout'], use_highway=conf['encoder'].get('use_highway', True)) self.encoder = PytorchSeq2SeqWrapper(lstm, stateful=False) encoded_input_dim = self.encoder.get_output_dim() elif encoder_name == 'project': self.encoder = ProjectedEncoder(input_dim, conf['encoder']['hidden_dim'], dropout=conf['dropout']) encoded_input_dim = self.encoder.get_output_dim() elif encoder_name == 'dummy': self.encoder = DummyEncoder() encoded_input_dim = input_dim else: raise ValueError('Unknown input encoder: {}'.format(encoder_name)) if conf["classifier"]["type"].lower() == 'crf': self.classify_layer = CRFLayer(encoded_input_dim, n_class, use_cuda) else: self.classify_layer = ClassifyLayer(encoded_input_dim, n_class, use_cuda) self.encode_time = 0 self.emb_time = 0 self.classify_time = 0
def __init__(self, n_relations: int, conf: Dict, input_batchers: Dict[str, InputBatch], use_cuda: bool): super(BiaffineParser, self).__init__() self.n_relations = n_relations self.conf = conf self.use_cuda = use_cuda self.use_mst_decoding_for_validation = conf[ 'use_mst_decoding_for_validation'] input_layers = {} for i, c in enumerate(conf['input']): if c['type'] == 'embeddings': if 'pretrained' in c: embs = load_embedding_txt(c['pretrained'], c['has_header']) logger.info('loaded {0} embedding entries.'.format( len(embs[0]))) else: embs = None name = c['name'] mapping = input_batchers[name].mapping layer = Embeddings(name, c['dim'], mapping, fix_emb=c['fixed'], embs=embs, normalize=c.get('normalize', False)) logger.info('embedding for field {0} ' 'created with {1} x {2}.'.format( c['field'], layer.n_V, layer.n_d)) input_layers[name] = layer elif c['type'] == 'cnn_encoder' or c['type'] == 'lstm_encoder': name = c['name'] mapping = input_batchers[name].mapping embeddings = Embeddings('{0}_ch_emb', c['dim'], mapping, fix_emb=False, embs=None, normalize=False) logger.info('character embedding for field {0} ' 'created with {1} x {2}.'.format( c['field'], embeddings.n_V, embeddings.n_d)) if c['type'] == 'lstm_encoder': layer = LstmTokenEmbedder(name, c['dim'], embeddings, conf['dropout'], use_cuda) elif c['type'] == 'cnn_encoder': layer = ConvTokenEmbedder(name, c['dim'], embeddings, c['filters'], c.get('n_highway', 1), c.get('activation', 'relu'), use_cuda) else: raise ValueError('Unknown type: {}'.format(c['type'])) input_layers[name] = layer elif c['type'] == 'elmo': name = c['name'] layer = ContextualizedWordEmbeddings(name, c['path'], use_cuda) input_layers[name] = layer else: raise ValueError('{} unknown input layer'.format(c['type'])) self.input_layers = torch.nn.ModuleDict(input_layers) input_encoders = [] input_dim = 0 for i, c in enumerate(conf['input_encoder']): input_info = { name: [ entry['dim'] for entry in conf['input'] if entry['name'] == name ][0] for name in c['input'] } if c['type'] == 'affine': input_encoder = AffineTransformInputEncoder( input_info, c['dim'], use_cuda) elif c['type'] == 'sum': input_encoder = SummationInputEncoder(input_info, use_cuda) elif c['type'] == 'concat': input_encoder = ConcatenateInputEncoder(input_info, use_cuda) else: raise ValueError('{} unknown input encoder'.format(c['type'])) input_dim += input_encoder.get_output_dim() input_encoders.append(input_encoder) self.input_encoders = torch.nn.ModuleList(input_encoders) c = conf['context_encoder'] if c['type'] == 'stacked_bidirectional_lstm_dozat': self.encoder = PytorchSeq2SeqWrapper( InputDropoutedStackedBidirectionalLstm( DozatLstmCell, num_layers=c['num_layers'], input_size=input_dim, hidden_size=c['hidden_dim'], recurrent_dropout_probability=c[ 'recurrent_dropout_probability'], layer_dropout_probability=c['layer_dropout_probability'], activation=Activation.by_name("leaky_relu")()), stateful=False) elif c['type'] == 'stacked_bidirectional_lstm_ma': self.encoder = PytorchSeq2SeqWrapper( InputDropoutedStackedBidirectionalLstm( MaLstmCell, num_layers=c['num_layers'], input_size=input_dim, hidden_size=c['hidden_dim'], recurrent_dropout_probability=c[ 'recurrent_dropout_probability'], layer_dropout_probability=c['layer_dropout_probability'], activation=Activation.by_name("tanh")()), stateful=False) elif c['type'] == 'stacked_bidirectional_lstm': self.encoder = PytorchSeq2SeqWrapper(StackedBidirectionalLstm( num_layers=c['num_layers'], input_size=input_dim, hidden_size=c['hidden_dim'], recurrent_dropout_probability=c[ 'recurrent_dropout_probability'], layer_dropout_probability=c['layer_dropout_probability']), stateful=False) else: self.encoder = DummyContextEncoder() encoder_dim = self.encoder.get_output_dim() c = conf['biaffine_parser'] self.arc_representation_dim = arc_representation_dim = c[ 'arc_representation_dim'] self.tag_representation_dim = tag_representation_dim = c[ 'tag_representation_dim'] self.head_sentinel_ = torch.nn.Parameter( torch.randn([1, 1, encoder_dim])) self.head_arc_feedforward = FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.child_arc_feedforward = FeedForward(encoder_dim, 1, arc_representation_dim, Activation.by_name("elu")()) self.head_tag_feedforward = FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) self.child_tag_feedforward = FeedForward(encoder_dim, 1, tag_representation_dim, Activation.by_name("elu")()) arc_attention_version = c.get('arc_attention_version', 'v1') if arc_attention_version == 'v2': self.arc_attention = BilinearMatrixAttentionV2( arc_representation_dim, arc_representation_dim, use_input_biases=True) else: self.arc_attention = BilinearMatrixAttention( arc_representation_dim, arc_representation_dim, use_input_biases=True) self.tag_bilinear = BilinearWithBias(tag_representation_dim, tag_representation_dim, n_relations) self.input_dropout_ = torch.nn.Dropout2d(p=conf['dropout']) self.dropout_ = InputVariationalDropout(p=conf['dropout']) self.input_encoding_timer = TimeRecoder() self.context_encoding_timer = TimeRecoder() self.classification_timer = TimeRecoder()