def test_forward(self): # pylint: disable=protected-access similarity = MultiHeadedSimilarity(num_heads=3, tensor_1_dim=6) similarity._tensor_1_projection = Parameter(torch.eye(6)) similarity._tensor_2_projection = Parameter(torch.eye(6)) a_vectors = Variable(torch.FloatTensor([[[[1, 1, -1, -1, 0, 1], [-2, 5, 9, -1, 3, 4]]]])) b_vectors = Variable(torch.FloatTensor([[[[1, 1, 1, 0, 2, 5], [0, 1, -1, -7, 1, 2]]]])) result = similarity(a_vectors, b_vectors).data.numpy() assert result.shape == (1, 1, 2, 3) assert_almost_equal(result, [[[[2, -1, 5], [5, -2, 11]]]])
def test_weights_are_correct_sizes(self): # pylint: disable=protected-access similarity = MultiHeadedSimilarity(num_heads=3, tensor_1_dim=9, tensor_1_projected_dim=6, tensor_2_dim=6, tensor_2_projected_dim=12) assert list(similarity._tensor_1_projection.size()) == [9, 6] assert list(similarity._tensor_2_projection.size()) == [6, 12] with pytest.raises(ConfigurationError): similarity = MultiHeadedSimilarity(num_heads=3, tensor_1_dim=10) with pytest.raises(ConfigurationError): params = Params({'num_heads': 3, 'tensor_1_dim': 9, 'tensor_2_dim': 10}) MultiHeadedSimilarity.from_params(params)
def test_forward(self): similarity = MultiHeadedSimilarity(num_heads=3, tensor_1_dim=6) similarity._tensor_1_projection = Parameter(torch.eye(6)) similarity._tensor_2_projection = Parameter(torch.eye(6)) a_vectors = torch.FloatTensor([[[[1, 1, -1, -1, 0, 1], [-2, 5, 9, -1, 3, 4]]]]) b_vectors = torch.FloatTensor([[[[1, 1, 1, 0, 2, 5], [0, 1, -1, -7, 1, 2]]]]) result = similarity(a_vectors, b_vectors).data.numpy() assert result.shape == (1, 1, 2, 3) assert_almost_equal(result, [[[[2, -1, 5], [5, -2, 11]]]])
def test_weights_are_correct_sizes(self): # pylint: disable=protected-access similarity = MultiHeadedSimilarity(num_heads=3, tensor_1_dim=9, tensor_1_projected_dim=6, tensor_2_dim=6, tensor_2_projected_dim=12) assert list(similarity._tensor_1_projection.size()) == [9, 6] assert list(similarity._tensor_2_projection.size()) == [6, 12] with pytest.raises(ConfigurationError): similarity = MultiHeadedSimilarity(num_heads=3, tensor_1_dim=10) with pytest.raises(ConfigurationError): params = Params({ 'num_heads': 3, 'tensor_1_dim': 9, 'tensor_2_dim': 10 }) MultiHeadedSimilarity.from_params(params)
def test_weights_are_correct_sizes(self): similarity = MultiHeadedSimilarity( num_heads=3, tensor_1_dim=9, tensor_1_projected_dim=6, tensor_2_dim=6, tensor_2_projected_dim=12, ) assert list(similarity._tensor_1_projection.size()) == [9, 6] assert list(similarity._tensor_2_projection.size()) == [6, 12] with pytest.raises(ConfigurationError): similarity = MultiHeadedSimilarity(num_heads=3, tensor_1_dim=10) with pytest.raises(ConfigurationError): params = Params({ "num_heads": 3, "tensor_1_dim": 9, "tensor_2_dim": 10 }) MultiHeadedSimilarity.from_params(params)
def test_forward_works_with_multi_headed_attention(self): # We're not going to check the output values here, as that's complicated; we'll just make # sure the code runs and the shapes are correct. similarity = MultiHeadedSimilarity(3, 24) encoder = IntraSentenceAttentionEncoder(input_dim=24, projection_dim=24, similarity_function=similarity, num_attention_heads=3, combination=u"1+2") input_tensor = torch.from_numpy(numpy.random.rand(4, 6, 24)).float() encoder_output = encoder(input_tensor, None) assert list(encoder_output.size()) == [4, 6, 24]
def test_constructor_asserts_multi_head_consistency(self): with pytest.raises(ConfigurationError) as exception_info: IntraSentenceAttentionEncoder(input_dim=5, num_attention_heads=4) assert 'Encoder has multiple heads' in exception_info.value.message similarity = MultiHeadedSimilarity(3, 6) with pytest.raises(ConfigurationError) as exception_info: IntraSentenceAttentionEncoder(input_dim=5, similarity_function=similarity) assert 'Similarity function has multiple heads' in exception_info.value.message with pytest.raises(ConfigurationError) as exception_info: IntraSentenceAttentionEncoder(input_dim=5, num_attention_heads=2, similarity_function=similarity) assert "Number of heads don't match" in exception_info.value.message
def build( self, name: str, embedding_dim: int, num_heads: int = 3, output_dim: int = 30 ) -> Callable[[Tensor, Optional[Tensor]], Tensor]: encoder = None if name in {'intra'}: encoder = IntraSentenceAttentionEncoder(input_dim=embedding_dim, projection_dim=output_dim, combination="1,2") elif name in {'multihead'}: sim = MultiHeadedSimilarity(num_heads, embedding_dim) encoder = IntraSentenceAttentionEncoder( input_dim=embedding_dim, projection_dim=embedding_dim, similarity_function=sim, num_attention_heads=num_heads, combination="1+2") assert encoder is not None return encoder
def build( self, name: str, embedding_dim: int, hidden_size: int = 32, num_filters: int = 1, num_heads: int = 3, output_dim: int = 30, ngram_filter_sizes: Tuple = (1, 2, 3, 4, 5), filters: List[List[int]] = [[1, 4], [2, 8], [3, 16], [4, 32], [5, 64]], num_highway: int = 2, projection_dim: int = 16 ) -> Callable[[Tensor, Optional[Tensor]], Tensor]: encoder = None if name in {'boe'}: encoder = BagOfEmbeddingsEncoder(embedding_dim=embedding_dim, averaged=True) elif name in {'cnn'}: encoder = CnnEncoder(embedding_dim=embedding_dim, num_filters=num_filters, ngram_filter_sizes=ngram_filter_sizes, output_dim=output_dim) elif name in {'cnnh'}: encoder = CnnHighwayEncoder(embedding_dim=embedding_dim, filters=filters, num_highway=num_highway, projection_dim=projection_dim, projection_location="after_cnn") elif name in {'rnn'}: rnn = RNN(input_size=embedding_dim, bidirectional=True, hidden_size=hidden_size, batch_first=True) encoder = PytorchSeq2VecWrapper(rnn) elif name in {'lstm'}: lstm = LSTM(input_size=embedding_dim, bidirectional=True, hidden_size=hidden_size, batch_first=True) encoder = PytorchSeq2VecWrapper(lstm) elif name in {'gru'}: gru = GRU(input_size=embedding_dim, bidirectional=True, hidden_size=hidden_size, batch_first=True) encoder = PytorchSeq2VecWrapper(gru) elif name in {'intra'}: intra = IntraSentenceAttentionEncoder(input_dim=embedding_dim, projection_dim=output_dim, combination="1,2") aggr = PytorchSeq2VecWrapper( LSTM(input_size=embedding_dim + output_dim, bidirectional=True, hidden_size=hidden_size, batch_first=True)) encoder = lambda x, y: aggr(intra(x, y), y) elif name in {'multihead'}: sim = MultiHeadedSimilarity(num_heads, embedding_dim) multi = IntraSentenceAttentionEncoder( input_dim=embedding_dim, projection_dim=embedding_dim, similarity_function=sim, num_attention_heads=num_heads, combination="1+2") aggr = PytorchSeq2VecWrapper( LSTM(input_size=embedding_dim, bidirectional=True, hidden_size=hidden_size, batch_first=True)) encoder = lambda x, y: aggr(multi(x, y), y) assert encoder is not None return encoder