def __init__(self, similarity_function: SimilarityFunction = None, normalize: bool = True) -> None: super(Attention, self).__init__() self._similarity_function = similarity_function or DotProductSimilarity() self._normalize = normalize
def test_forward_does_a_dot_product(self): dot_product = DotProductSimilarity() a_vectors = torch.LongTensor([[1, 1, 1], [-1, -1, -1]]) b_vectors = torch.LongTensor([[1, 0, 1], [1, 0, 0]]) result = dot_product(a_vectors, b_vectors).data.numpy() assert result.shape == (2, ) assert numpy.all(result == [2, -1])
def __init__(self, input_dim: int, projection_dim: int = None, similarity_function: SimilarityFunction = DotProductSimilarity(), num_attention_heads: int = 1, combination: str = '1,2') -> None: super(IntraSentenceAttentionEncoder, self).__init__() self._input_dim = input_dim if projection_dim: self._projection = torch.nn.Linear(input_dim, projection_dim) else: self._projection = lambda x: x projection_dim = input_dim self._matrix_attention = MatrixAttention(similarity_function) self._num_attention_heads = num_attention_heads if isinstance(similarity_function, MultiHeadedSimilarity): if num_attention_heads == 1: raise ConfigurationError("Similarity function has multiple heads but encoder doesn't") if num_attention_heads != similarity_function.num_heads: raise ConfigurationError("Number of heads don't match between similarity function " "and encoder: %d, %d" % (num_attention_heads, similarity_function.num_heads)) elif num_attention_heads > 1: raise ConfigurationError("Encoder has multiple heads but similarity function doesn't") self._combination = combination self._output_dim = util.get_combined_dim(combination, [input_dim, projection_dim])
def test_forward_works_with_higher_order_tensors(self): dot_product = DotProductSimilarity() a_vectors = numpy.random.rand(5, 4, 3, 6, 7) b_vectors = numpy.random.rand(5, 4, 3, 6, 7) desired_result = numpy.sum(a_vectors * b_vectors, axis=-1) result = dot_product(torch.from_numpy(a_vectors), torch.from_numpy(b_vectors)).data.numpy() assert result.shape == (5, 4, 3, 6) # We're cutting this down here with a random partial index, so that if this test fails the # output isn't so huge and slow. assert_almost_equal(result[2, 3, 1], desired_result[2, 3, 1])
def __init__(self, similarity_function=None, normalize=True): super(LegacyAttention, self).__init__(normalize) self._similarity_function = similarity_function or DotProductSimilarity( )
def build_model(args, vocab, pretrained_embs, tasks): '''Build model according to arguments args: - args (TODO): object with attributes: - vocab (Vocab): - pretrained_embs (TODO): word embeddings to use returns ''' d_word, n_layers_highway = args.d_word, args.n_layers_highway # Build embedding layers if args.glove: word_embs = pretrained_embs train_embs = bool(args.train_words) else: log.info("\tLearning embeddings from scratch!") word_embs = None train_embs = True word_embedder = Embedding( vocab.get_vocab_size('tokens'), d_word, weight=word_embs, trainable=train_embs, padding_index=vocab.get_token_index('@@PADDING@@')) d_inp_phrase = 0 # Handle elmo and cove token_embedder = {} if args.elmo: log.info("\tUsing ELMo embeddings!") if args.deep_elmo: n_reps = 2 log.info("\tUsing deep ELMo embeddings!") else: n_reps = 1 if args.elmo_no_glove: log.info("\tNOT using GLoVe embeddings!") else: token_embedder = {"words": word_embedder} log.info("\tUsing GLoVe embeddings!") d_inp_phrase += d_word elmo = Elmo(options_file=ELMO_OPT_PATH, weight_file=ELMO_WEIGHTS_PATH, num_output_representations=n_reps) d_inp_phrase += 1024 else: elmo = None token_embedder = {"words": word_embedder} d_inp_phrase += d_word text_field_embedder = BasicTextFieldEmbedder(token_embedder) if "words" in token_embedder \ else None d_hid_phrase = args.d_hid if args.pair_enc != 'bow' else d_inp_phrase if args.cove: cove_layer = cove_lstm(n_vocab=vocab.get_vocab_size('tokens'), vectors=word_embedder.weight.data) d_inp_phrase += 600 log.info("\tUsing CoVe embeddings!") else: cove_layer = None # Build encoders phrase_layer = s2s_e.by_name('lstm').from_params( Params({ 'input_size': d_inp_phrase, 'hidden_size': d_hid_phrase, 'num_layers': args.n_layers_enc, 'bidirectional': True })) if args.pair_enc == 'bow': sent_encoder = BoWSentEncoder( vocab, text_field_embedder) # maybe should take in CoVe/ELMO? pair_encoder = None # model will just run sent_encoder on both inputs else: # output will be 2 x d_hid_phrase (+ deep elmo) sent_encoder = HeadlessSentEncoder(vocab, text_field_embedder, n_layers_highway, phrase_layer, dropout=args.dropout, cove_layer=cove_layer, elmo_layer=elmo) d_single = 2 * d_hid_phrase + (args.elmo and args.deep_elmo) * 1024 if args.pair_enc == 'simple': # output will be 4 x [2 x d_hid_phrase (+ deep elmo)] pair_encoder = HeadlessPairEncoder(vocab, text_field_embedder, n_layers_highway, phrase_layer, cove_layer=cove_layer, elmo_layer=elmo, dropout=args.dropout) d_pair = d_single elif args.pair_enc == 'attn': log.info("\tUsing attention!") d_inp_model = 4 * d_hid_phrase + (args.elmo and args.deep_elmo) * 1024 d_hid_model = d_hid_phrase # make it as large as the original sentence encoding modeling_layer = s2s_e.by_name('lstm').from_params( Params({ 'input_size': d_inp_model, 'hidden_size': d_hid_model, 'num_layers': 1, 'bidirectional': True })) pair_encoder = HeadlessPairAttnEncoder(vocab, text_field_embedder, n_layers_highway, phrase_layer, DotProductSimilarity(), modeling_layer, cove_layer=cove_layer, elmo_layer=elmo, deep_elmo=args.deep_elmo, dropout=args.dropout) d_pair = 2 * d_hid_phrase # output will be 4 x [2 x d_hid_model], where d_hid_model = 2 x d_hid_phrase # = 4 x [2 x 2 x d_hid_phrase] # Build model and classifiers model = MultiTaskModel(args, sent_encoder, pair_encoder) build_classifiers(tasks, model, d_pair, d_single) if args.cuda >= 0: model = model.cuda() return model
def test_can_construct_from_params(self): assert DotProductSimilarity.from_params(Params( {})).__class__.__name__ == u'DotProductSimilarity'
def test_can_construct_from_params(self): assert DotProductSimilarity.from_params(Params({})).__class__.__name__ == 'DotProductSimilarity'
def __init__(self, similarity_function: SimilarityFunction = None) -> None: super(MatrixAttention, self).__init__() self._similarity_function = similarity_function or DotProductSimilarity()