Пример #1
0
    def __init__(self,
                 similarity_function: SimilarityFunction = None,
                 normalize: bool = True) -> None:
        super(Attention, self).__init__()

        self._similarity_function = similarity_function or DotProductSimilarity()
        self._normalize = normalize
Пример #2
0
 def test_forward_does_a_dot_product(self):
     dot_product = DotProductSimilarity()
     a_vectors = torch.LongTensor([[1, 1, 1], [-1, -1, -1]])
     b_vectors = torch.LongTensor([[1, 0, 1], [1, 0, 0]])
     result = dot_product(a_vectors, b_vectors).data.numpy()
     assert result.shape == (2, )
     assert numpy.all(result == [2, -1])
Пример #3
0
 def __init__(self,
              input_dim: int,
              projection_dim: int = None,
              similarity_function: SimilarityFunction = DotProductSimilarity(),
              num_attention_heads: int = 1,
              combination: str = '1,2') -> None:
     super(IntraSentenceAttentionEncoder, self).__init__()
     self._input_dim = input_dim
     if projection_dim:
         self._projection = torch.nn.Linear(input_dim, projection_dim)
     else:
         self._projection = lambda x: x
         projection_dim = input_dim
     self._matrix_attention = MatrixAttention(similarity_function)
     self._num_attention_heads = num_attention_heads
     if isinstance(similarity_function, MultiHeadedSimilarity):
         if num_attention_heads == 1:
             raise ConfigurationError("Similarity function has multiple heads but encoder doesn't")
         if num_attention_heads != similarity_function.num_heads:
             raise ConfigurationError("Number of heads don't match between similarity function "
                                      "and encoder: %d, %d" % (num_attention_heads,
                                                               similarity_function.num_heads))
     elif num_attention_heads > 1:
         raise ConfigurationError("Encoder has multiple heads but similarity function doesn't")
     self._combination = combination
     self._output_dim = util.get_combined_dim(combination, [input_dim, projection_dim])
Пример #4
0
 def test_forward_works_with_higher_order_tensors(self):
     dot_product = DotProductSimilarity()
     a_vectors = numpy.random.rand(5, 4, 3, 6, 7)
     b_vectors = numpy.random.rand(5, 4, 3, 6, 7)
     desired_result = numpy.sum(a_vectors * b_vectors, axis=-1)
     result = dot_product(torch.from_numpy(a_vectors), torch.from_numpy(b_vectors)).data.numpy()
     assert result.shape == (5, 4, 3, 6)
     # We're cutting this down here with a random partial index, so that if this test fails the
     # output isn't so huge and slow.
     assert_almost_equal(result[2, 3, 1], desired_result[2, 3, 1])
Пример #5
0
 def __init__(self, similarity_function=None, normalize=True):
     super(LegacyAttention, self).__init__(normalize)
     self._similarity_function = similarity_function or DotProductSimilarity(
     )
def build_model(args, vocab, pretrained_embs, tasks):
    '''Build model according to arguments

    args:
        - args (TODO): object with attributes:
        - vocab (Vocab):
        - pretrained_embs (TODO): word embeddings to use

    returns
    '''
    d_word, n_layers_highway = args.d_word, args.n_layers_highway

    # Build embedding layers
    if args.glove:
        word_embs = pretrained_embs
        train_embs = bool(args.train_words)
    else:
        log.info("\tLearning embeddings from scratch!")
        word_embs = None
        train_embs = True
    word_embedder = Embedding(
        vocab.get_vocab_size('tokens'),
        d_word,
        weight=word_embs,
        trainable=train_embs,
        padding_index=vocab.get_token_index('@@PADDING@@'))
    d_inp_phrase = 0

    # Handle elmo and cove
    token_embedder = {}
    if args.elmo:
        log.info("\tUsing ELMo embeddings!")
        if args.deep_elmo:
            n_reps = 2
            log.info("\tUsing deep ELMo embeddings!")
        else:
            n_reps = 1
        if args.elmo_no_glove:
            log.info("\tNOT using GLoVe embeddings!")
        else:
            token_embedder = {"words": word_embedder}
            log.info("\tUsing GLoVe embeddings!")
            d_inp_phrase += d_word
        elmo = Elmo(options_file=ELMO_OPT_PATH,
                    weight_file=ELMO_WEIGHTS_PATH,
                    num_output_representations=n_reps)
        d_inp_phrase += 1024
    else:
        elmo = None
        token_embedder = {"words": word_embedder}
        d_inp_phrase += d_word
    text_field_embedder = BasicTextFieldEmbedder(token_embedder) if "words" in token_embedder \
                            else None
    d_hid_phrase = args.d_hid if args.pair_enc != 'bow' else d_inp_phrase

    if args.cove:
        cove_layer = cove_lstm(n_vocab=vocab.get_vocab_size('tokens'),
                               vectors=word_embedder.weight.data)
        d_inp_phrase += 600
        log.info("\tUsing CoVe embeddings!")
    else:
        cove_layer = None

    # Build encoders
    phrase_layer = s2s_e.by_name('lstm').from_params(
        Params({
            'input_size': d_inp_phrase,
            'hidden_size': d_hid_phrase,
            'num_layers': args.n_layers_enc,
            'bidirectional': True
        }))
    if args.pair_enc == 'bow':
        sent_encoder = BoWSentEncoder(
            vocab, text_field_embedder)  # maybe should take in CoVe/ELMO?
        pair_encoder = None  # model will just run sent_encoder on both inputs
    else:  # output will be 2 x d_hid_phrase (+ deep elmo)
        sent_encoder = HeadlessSentEncoder(vocab,
                                           text_field_embedder,
                                           n_layers_highway,
                                           phrase_layer,
                                           dropout=args.dropout,
                                           cove_layer=cove_layer,
                                           elmo_layer=elmo)
    d_single = 2 * d_hid_phrase + (args.elmo and args.deep_elmo) * 1024
    if args.pair_enc == 'simple':  # output will be 4 x [2 x d_hid_phrase (+ deep elmo)]
        pair_encoder = HeadlessPairEncoder(vocab,
                                           text_field_embedder,
                                           n_layers_highway,
                                           phrase_layer,
                                           cove_layer=cove_layer,
                                           elmo_layer=elmo,
                                           dropout=args.dropout)
        d_pair = d_single
    elif args.pair_enc == 'attn':
        log.info("\tUsing attention!")
        d_inp_model = 4 * d_hid_phrase + (args.elmo and args.deep_elmo) * 1024
        d_hid_model = d_hid_phrase  # make it as large as the original sentence encoding
        modeling_layer = s2s_e.by_name('lstm').from_params(
            Params({
                'input_size': d_inp_model,
                'hidden_size': d_hid_model,
                'num_layers': 1,
                'bidirectional': True
            }))
        pair_encoder = HeadlessPairAttnEncoder(vocab,
                                               text_field_embedder,
                                               n_layers_highway,
                                               phrase_layer,
                                               DotProductSimilarity(),
                                               modeling_layer,
                                               cove_layer=cove_layer,
                                               elmo_layer=elmo,
                                               deep_elmo=args.deep_elmo,
                                               dropout=args.dropout)
        d_pair = 2 * d_hid_phrase
        # output will be 4 x [2 x d_hid_model], where d_hid_model = 2 x d_hid_phrase
        #                = 4 x [2 x 2 x d_hid_phrase]

    # Build model and classifiers
    model = MultiTaskModel(args, sent_encoder, pair_encoder)
    build_classifiers(tasks, model, d_pair, d_single)
    if args.cuda >= 0:
        model = model.cuda()
    return model
Пример #7
0
 def test_can_construct_from_params(self):
     assert DotProductSimilarity.from_params(Params(
         {})).__class__.__name__ == u'DotProductSimilarity'
Пример #8
0
 def test_can_construct_from_params(self):
     assert DotProductSimilarity.from_params(Params({})).__class__.__name__ == 'DotProductSimilarity'
    def __init__(self, similarity_function: SimilarityFunction = None) -> None:
        super(MatrixAttention, self).__init__()

        self._similarity_function = similarity_function or DotProductSimilarity()