Exemplo n.º 1
0
def test_S2SModelLoader(s2smodel_data):
    path, train, valid, test = s2smodel_data
    fields = [
        ("english",
         Field(init_token="__init__", eos_token="__eos__", lower=True)),
        ("french", Field(init_token="__init__",
                         eos_token="__eos__",
                         lower=True)),
        ("german", Field(init_token="__init__",
                         eos_token="__eos__",
                         lower=True))
    ]
    ds = TabularDatasetFromFiles(path=path / train, fields=fields)
    for name, field in fields:
        field.build_vocab(ds)
    bs = 2
    ml = S2SDataLoader(dataset=ds,
                       batch_size=bs,
                       source_names=["english", "french"],
                       target_names=["french"])
    assert len(ml) == 200
    index = 0
    for index, (*X, Y) in enumerate(ml):
        assert_dims(X, [2, None, bs])
        assert_dims(Y, [None, bs])

        assert X[1].shape[0] == Y.shape[0] + 1

    assert len(ml) == index + 1
Exemplo n.º 2
0
    def _greedy_forward(self, inputs):
        inputs = inputs[:
                        1]  # inputs should be only first token initially [1,bs]
        sl, bs = inputs.size()
        finished = to_gpu(torch.zeros(bs).byte())
        iteration = 0
        self.beam_outputs = inputs.clone()
        layer_outputs = [[] for _ in range(self.nlayers)]
        raw_layer_outputs = [[] for _ in range(self.nlayers)]
        while not finished.all() and iteration < self.max_iterations:
            # output should be List[[sl, bs, layer_dim], ...] sl should be one
            raw_output, output = self.forward(inputs, 0)
            for layer_index in range(self.nlayers):
                layer_outputs[layer_index].append(output[layer_index])
                raw_layer_outputs[layer_index].append(raw_output[layer_index])

            #  inputs are the indices  dims [1,bs]
            _, inputs = output[-1].max(dim=-1)
            assert_dims(inputs, [1, bs])
            iteration += 1
            self.beam_outputs = assert_dims(
                torch.cat([self.beam_outputs, inputs], dim=0),
                [iteration + 1, bs])
            new_finished = inputs.data == self.eos_token
            finished = finished | new_finished

        self.beam_outputs = self.beam_outputs.view(-1, bs, 1)
        # ensure the outputs are a list of layers where each layer is [sl,bs,layerdim]
        raw_outputs = [torch.cat(i, dim=0) for i in raw_layer_outputs]
        outputs = [torch.cat(i, dim=0) for i in layer_outputs]
        return raw_outputs, outputs
Exemplo n.º 3
0
    def _greedy_forward(self, inputs, hidden=None, constraints=None):
        inputs = inputs[:
                        1]  # inputs should be only first token initially [1,bs]
        sl, bs = inputs.size()
        finished = to_gpu(torch.zeros(bs).byte())
        iteration = 0
        self.beam_outputs = inputs.clone()
        layer_outputs = [[] for _ in range(self.nlayers)]
        while not finished.all() and iteration < self.max_iterations:
            # output should be List[[sl, bs, layer_dim], ...] sl should be one
            output = self.forward(inputs, hidden=hidden, num_beams=0)
            for layer_index in range(self.nlayers):
                layer_outputs[layer_index].append(output[layer_index])

            # step_inputs have shape [1,bs]
            _, step_inputs = output[-1][-1:].max(dim=-1)
            iteration += 1
            self.beam_outputs = assert_dims(
                torch.cat([self.beam_outputs, step_inputs], dim=0),
                [iteration + 1, bs])
            new_finished = step_inputs.data == self.eos_token
            inputs = torch.cat([inputs, step_inputs], dim=0)
            assert_dims(inputs, [iteration + 1, bs])
            finished = finished | new_finished

        self.beam_outputs = self.beam_outputs.view(-1, bs, 1)
        outputs = [torch.cat(i, dim=0) for i in layer_outputs]
        return outputs
Exemplo n.º 4
0
 def forward(self, *inputs, num_beams=0):
     with torch.set_grad_enabled(self.training):
         encoder_inputs, decoder_inputs = inputs
         # reset the states for the new batch
         bs = encoder_inputs.size(1)
         self.encoder.reset(bs)
         self.decoder.reset(bs)
         outputs = self.encoder(encoder_inputs)
         # as initial state we use the initial decoder state (zeros)
         state = self.decoder.hidden
         assert_dims(
             outputs,
             [self.nlayers[0], None, bs, (self.nhid[0], self.emb_sz[0])])
         # pass the encoder outputs as keys to the attention projection_layer
         self.decoder.projection_layer.reset(keys=outputs[-1])
         if self.training:
             self.decoder.pr_force = self.pr_force
             nb = 1 if self.pr_force < 1 else 0
         else:
             nb = num_beams
         outputs_dec = self.decoder(decoder_inputs,
                                    hidden=state,
                                    num_beams=nb)
         predictions = outputs_dec[:decoder_inputs.size(
             0)] if num_beams == 0 else self.decoder.beam_outputs
     return predictions, [*outputs, *outputs_dec]
Exemplo n.º 5
0
    def _greedy_forward(self, inputs, hidden=None, constraints=None):
        dec_inputs = inputs
        max_iterations = min(dec_inputs.size(0), self.MAX_STEPS_ALLOWED) if self.training else self.max_iterations
        inputs = V(inputs[:1].data)  # inputs should be only first token initially [1,bs]
        sl, bs = inputs.size()
        finished = to_gpu(torch.zeros(bs).byte())
        iteration = 0
        self.beam_outputs = inputs.clone()
        final_outputs = []
        while not finished.all() and iteration < max_iterations:
            # output should be List[[sl, bs, layer_dim], ...] sl should be one
            if 0 < iteration and self.training and 0. < self.random() < self.pr_force:
                inputs = dec_inputs[iteration].unsqueeze(0)
            output = self.forward(inputs, hidden=hidden, num_beams=0, constraints=constraints)
            hidden = self.decoder_layer.hidden
            final_outputs.append(output)  # dim should be [sl=1, bs, nt]
            #  inputs are the indices  dims [1,bs] # repackage the var to avoid grad backwards
            inputs = assert_dims(V(output.data.max(dim=-1)[1]), [1, bs])
            iteration += 1
            self.beam_outputs = assert_dims(torch.cat([self.beam_outputs, inputs], dim=0), [iteration + 1, bs])
            new_finished = inputs.data == self.eos_token
            finished = finished | new_finished
            # stop if the output is to big to fit in memory

        self.beam_outputs = self.beam_outputs.view(-1, bs, 1)
        # outputs should be [sl, bs, nt]
        outputs = torch.cat(final_outputs, dim=0)
        return outputs
Exemplo n.º 6
0
def test_layer_norm():
    sl = 10
    bs = 2
    in_features = 32
    inputs = to_gpu(V(tr.randn([sl, bs, in_features])))
    layernorm = to_gpu(LayerNorm(in_features))
    outputs = layernorm(inputs)
    assert_dims(outputs, [sl, bs, in_features])
Exemplo n.º 7
0
def test_transfomer_layer():
    sl = 10
    bs = 2
    in_features = 32
    inputs = tr.randn([sl, bs, in_features])
    inputs = to_gpu(V(T(inputs)))
    transfomer = to_gpu(TransformerLayer(in_features=in_features, num_heads=8))
    outputs = transfomer(inputs)
    assert_dims(outputs, [sl, bs, in_features])
Exemplo n.º 8
0
 def forward(self, decoder_inputs, encoder_inputs):
     output_tensors = []
     sl, bs, input_size = decoder_inputs.size()
     dec_inputs = assert_dims(decoder_inputs, [sl, bs, self.input_size])
     # nlayers, sl, bs, input_size
     encoder_inputs = assert_dims(encoder_inputs, [self.nlayers, None, bs, self.input_size])
     for enc_inputs, layer in zip(encoder_inputs, self.layers):
         dec_inputs = layer(enc_inputs, dec_inputs)
         output_tensors.append(dec_inputs)
     assert_dims(output_tensors, [self.nlayers, sl, bs, self.input_size])
     return output_tensors
Exemplo n.º 9
0
def test_MultiHeadAttention_with_mask(self_attention_setup):
    keys, query = self_attention_setup
    slk, bs, ek = keys.size()
    slq, bs, eq = query.size()
    num_heads = 4
    nhid = 10
    attention = to_gpu(
        MultiHeadAttention(num_heads=num_heads, nhid=nhid, keys_dim=ek, query_dim=eq, values_dim=ek, dropout=0.3))
    mask = T(np.tril(np.ones((bs, num_heads, slq, slk)))).float()
    result = attention(query=V(query), keys=V(keys), values=V(keys), mask=mask)
    assert_dims(result, [slq, bs, num_heads * nhid])
Exemplo n.º 10
0
def test_MultiHeadAttention(self_attention_setup):
    keys, query = self_attention_setup
    slk, bs, ek = keys.size()
    slq, bs, eq = query.size()
    num_heads = 4
    nhid = 10
    attention = to_gpu(
        MultiHeadAttention(num_heads=num_heads, nhid=nhid, keys_dim=ek, query_dim=eq, values_dim=ek, dropout=0.3))

    result = attention(query=V(query), keys=V(keys), values=V(keys))
    assert_dims(result, [slq, bs, num_heads * nhid])
Exemplo n.º 11
0
def test_transformer_encoder():
    sl = 10
    bs = 2
    in_features = 300
    num_layers = 5
    inputs = tr.randn([sl, bs, in_features])
    inputs = to_gpu(V(T(inputs)))
    transformer = to_gpu(
        TransformerEncoderLayers(input_size=in_features,
                                 num_heads=8,
                                 nhid=512,
                                 num_layers=num_layers))
    layer_outputs = transformer(inputs)
    assert_dims(layer_outputs, [num_layers, sl, bs, in_features])
Exemplo n.º 12
0
 def _train_forward(self, inputs, hidden=None, constraints=None):
     sl, bs = inputs.size()
     emb = self.embedding_layer(inputs)
     final_outputs = []
     for step in emb:
         step = torch.cat(
             [step, self.projection_layer.get_attention_output(step)],
             dim=-1).unsqueeze_(0)
         step = assert_dims(step, [1, bs, self.emb_size * 2])
         outputs = self._rnn_step(step, hidden=hidden)
         rnn_out = assert_dims(outputs[-1], [1, bs, self.emb_size])
         final_outputs.append(self.projection_layer(rnn_out[0]))
     outputs = torch.cat(final_outputs, dim=0)
     return outputs
Exemplo n.º 13
0
def test_attention_projection(attention_projection_setup):
    encoder_outputs, decoder_output, params = attention_projection_setup
    module = to_gpu(AttentionProjection(**params))
    # When I reset the module
    module.reset(keys=encoder_outputs)
    # the attention output will be a zeros array with shape equal to the input
    assert to_np(module.get_attention_output(decoder_output)).sum() == 0
    assert module.get_attention_output(decoder_output) is not module._attention_output
    # when when I pass an input for the the decoder output
    results = module(decoder_output)
    assert_dims(results, [1, 2, params['n_out']])
    # the new attention_output is calculated from he attention module and is no longer zero
    assert to_np(module.get_attention_output(decoder_output)).sum() != 0
    assert module.get_attention_output(decoder_output) is module._attention_output
    assert_dims(module._attention_output, [2, params['n_in']])
Exemplo n.º 14
0
 def forward(self, *inputs, num_beams=0):
     encoder_inputs, decoder_inputs = inputs
     # reset the states for the new batch
     bs = encoder_inputs.size(1)
     self.encoder.reset(bs)
     self.decoder.reset(bs)
     raw_outpus, outputs = self.encoder(encoder_inputs)
     state = self.decoder.hidden
     assert_dims(outputs, [self.nlayers[0], None, bs, (self.nhid[0], self.emb_sz[0])])
     # pass the encoder outputs as keys to the attention projection_layer
     self.decoder.projection_layer.reset(keys=outputs[-1])
     raw_outputs_dec, outputs_dec = self.decoder(decoder_inputs, hidden=state, num_beams=num_beams)
     # outputs_dec[-1].shape ==  (sl, bs, num_tokens)
     predictions = outputs_dec[-1] if num_beams == 0 else self.decoder.beam_outputs
     return predictions, [*raw_outpus, *raw_outputs_dec], [*outputs, *outputs_dec]
Exemplo n.º 15
0
def test_MultiHeadAttention(attention_setup):
    keys, query = attention_setup
    bs = query.size(0)
    ed = keys.size(2)
    eq = query.size(1)
    num_heads = 4
    nhid = 10
    attention = to_gpu(
        MultiHeadAttention(num_heads=num_heads,
                           nhid=nhid,
                           keys_dim=ed,
                           query_dim=eq,
                           values_dim=eq))

    result = attention(query=V(query), keys=V(keys), values=V(keys))
    assert_dims(result, [bs, num_heads * nhid])
Exemplo n.º 16
0
    def forward(self, query, keys, values, mask=None):
        # Query dim [sl, bs, dimQ]
        # keys dim [slQ, bs, dimK]
        # values dim [sl, bs, dimV]
        sl, bs, dimK = keys.size()
        slq = query.size(0)
        # [slQ, bs, dimH *NH] - > [bs, NH, slQ, dimH]
        query_projection = self.query_linear(query).view(
            slq, bs, self.num_heads, self.nhid).permute(1, 2, 0, 3)
        # [sl, bs, dimH *NH] -> [bs, NH, dimH, sl]
        keys_projection = self.keys_linear(keys).view(sl, bs, self.num_heads,
                                                      self.nhid).permute(
                                                          1, 2, 3, 0)
        # [sl, bs, dimH *NH] -> [bs, NH, sl, dimH]
        values_projection = self.values_linear(values).view(
            sl, bs, self.num_heads, self.nhid).permute(1, 2, 0, 3)

        # [bs, NH, slQ, dimH] x [bs, NH, dimH, sl] =  [bs, NH, slQ, sl]
        scores = query_projection @ keys_projection
        if mask is not None:
            scores = scores.masked_fill(mask == 0, -1e20)
        weights = F.softmax(scores, dim=-1)
        if self.dropout is not None:
            weights = self.dropout(weights)

        #  [bs, NH, slQ, sl] x  [bs, NH, sl, dimH] =  [bs, NH, slQ, dimH] -> [slQ, bs, NH * dimH]
        attention = (weights @ values_projection).permute(
            2, 0, 1, 3).contiguous().view(slq, bs, self.num_heads * self.nhid)
        output = self.linear(attention)
        return assert_dims(output, [slq, bs, self.out_dim])
Exemplo n.º 17
0
 def forward(self, *inputs, num_beams=0):
     with torch.set_grad_enabled(self.training):
         encoder_inputs, decoder_inputs = assert_dims(
             inputs,
             [2, None, None])  # dims: [sl, bs] for encoder and decoder
         # reset the states for the new batch
         bs = encoder_inputs.size(1)
         self.encoder.reset(bs)
         self.decoder.reset(bs)
         outputs = self.encoder(encoder_inputs)
         state = concat_bidir_state(self.encoder.encoder_layer.hidden,
                                    cell_type=self.cell_type,
                                    nlayers=self.nlayers,
                                    bidir=self.bidir)
         if self.training:
             self.decoder.pr_force = self.pr_force
             nb = 1 if self.pr_force < 1 else 0
         else:
             nb = num_beams
         outputs_dec = self.decoder(decoder_inputs,
                                    hidden=state,
                                    num_beams=nb)
         predictions = outputs_dec[:decoder_inputs.size(
             0)] if num_beams == 0 else self.decoder.beam_outputs
     return predictions, [*outputs, *outputs_dec]
Exemplo n.º 18
0
 def _train_forward(self, inputs, hidden=None, constraints=None):
     sl, bs = inputs.size()
     emb = self.embedding_layer(inputs)
     layer_outputs = [[] for _ in range(self.nlayers)]
     for step in emb:
         step = torch.cat(
             [step, self.projection_layer.get_attention_output(step)],
             dim=-1).unsqueeze_(0)
         step = assert_dims(step, [1, bs, self.emb_size * 2])
         outputs = self._rnn_step(step, hidden=hidden)
         for layer_index in range(self.nlayers):
             layer_outputs[layer_index].append(outputs[layer_index])
         rnn_out = assert_dims(outputs[-1], [1, bs, self.emb_size])
         layer_outputs[-1][-1] = self.projection_layer(rnn_out[0])
     outputs = [torch.cat(i, dim=0) for i in layer_outputs]
     return outputs
Exemplo n.º 19
0
 def forward(self, *inputs, num_beams=0):
     encoder_inputs, decoder_inputs = assert_dims(
         inputs, [2, None, None])  # dims: [sl, bs] for encoder and decoder
     # reset the states for the new batch
     bs = encoder_inputs.size(2)
     self.session_encoder.reset(bs)
     self.decoder.reset(bs)
     query_encoder_outputs = []
     outputs = []
     num_utterances, max_sl, *_ = encoder_inputs.size()
     for index, context in enumerate(encoder_inputs):
         self.query_encoder.reset(bs)
         outputs = self.query_encoder(context)  # context has size [sl, bs]
         # BPTT if the dialogue is too long repackage the first half of the outputs to decrease
         # the gradient backpropagation and fit it into memory
         # to test before adding back
         out = repackage_var(outputs[-1][
                                 -1]) if max_sl * num_utterances > self.BPTT_MAX_UTTERANCES and index <= num_utterances // 2 else \
             outputs[-1][-1]
         query_encoder_outputs.append(
             out)  # get the last sl output of the query_encoder
     query_encoder_outputs = torch.stack(query_encoder_outputs,
                                         dim=0)  # [cl, bs, nhid]
     session_outputs = self.session_encoder(query_encoder_outputs)
     self.decoder.projection_layer.reset(keys=session_outputs[-1])
     if self.training:
         self.decoder.pr_force = self.pr_force
         nb = 1 if self.pr_force < 1 else 0
     else:
         nb = num_beams
     state = self.decoder.hidden
     outputs_dec = self.decoder(decoder_inputs, hidden=state, num_beams=nb)
     predictions = outputs_dec[-1][:decoder_inputs.size(
         0)] if num_beams == 0 else self.decoder.beam_outputs
     return predictions, [*outputs, *outputs_dec]
Exemplo n.º 20
0
    def forward(self, *inputs, num_beams=0):
        with torch.set_grad_enabled(self.training):
            encoder_inputs, decoder_inputs = assert_dims(
                inputs,
                [2, None, None])  # dims: [sl, bs] for encoder and decoder
            # reset the states for the new batch
            num_utterances, max_sl, bs = encoder_inputs.size()
            self.reset_encoders(bs)
            outputs, session = self.encoder(encoder_inputs)
            self.encoder.query_encoder.reset(bs)
            decoder_outputs = self.encoder.query_encoder(decoder_inputs)
            decoder_out = concat_bidir_state(
                self.encoder.query_encoder_layer.get_last_hidden_state(),
                cell_type=self.cell_type,
                nlayers=1,
                bidir=self.encoder.bidir)
            x = torch.cat([session, decoder_out], dim=-1)
            prior_log_var, prior_mu, recog_log_var, recog_mu, session = self.variational_encoding(
                session, x)
            bow_logits = self.bow_network(session).squeeze(
                0) if num_beams == 0 else None

            state, constraints = self.encoder_hidden_state_projection(session)
            outputs_dec, predictions = self.decoding(decoder_inputs, num_beams,
                                                     state)
            if num_beams == 0:
                return [
                    predictions, recog_mu, recog_log_var, prior_mu,
                    prior_log_var, bow_logits
                ], [*outputs, *outputs_dec]
            else:
                return predictions, [*outputs, *outputs_dec]
Exemplo n.º 21
0
    def forward(self, query, keys, values, mask=None):
        # Query dim [bs, dimQ]
        # keys dim [sl, bs, dimK]
        # values dim [sl, bs, dimV]

        # [bs, dimH *NH]
        query_projection = self.query_linear(query)
        sl, bs, dimK = keys.size()
        # [sl, bs, dimH *NH]
        keys_projection = self.keys_linear(keys)
        # [sl, bs, dimH *NH]
        values_projection = self.values_linear(values)

        scores = (query_projection * keys_projection).view(
            sl, bs, self.num_heads,
            self.nhid).sum(dim=-1).contiguous() / self.scale
        if mask is not None:
            scores = scores.masked_fill(mask == 0, -1e20)
        weights = F.softmax(scores, dim=0)
        if self.dropout is not None:
            weights = self.dropout(weights)
        attention = (
            weights.unsqueeze(-1) *
            values_projection.view(sl, bs, self.num_heads, self.nhid)).sum(0)
        output = self.linear(attention.view(bs, -1))
        return assert_dims(output, [bs, self.out_dim])
Exemplo n.º 22
0
def test_S2SModelData_from_file(generalmodel):
    assert generalmodel is not None
    # number of batches
    assert 200 == len(generalmodel.trn_dl)
    train_iter = iter(generalmodel.trn_dl)
    batch = next(train_iter)
    assert isinstance(batch, list)
    # shape should be equal to sl, bs
    # The elements in the batch equal the sum of source_names and target_names (in this case 4)
    # the first three being the sources (inputs to the encoder, and the last the target_names (input to the decoder)
    assert_dims(batch, [4, None, 2])

    sentences = to_np(batch[0])
    batch_sentences = generalmodel.itos(sentences, "english")
    for beam_sentence in batch_sentences:
        for sentence in beam_sentence:
            assert sentence in {"goodbye", "hello", "i like to read", "i am hungry"}
Exemplo n.º 23
0
def test_transfomer_layer_decoder():
    sl = 10
    bs = 2
    in_features = 32
    tr.random.manual_seed(0)
    encoder_inputs = tr.randn([sl, bs, in_features])
    decoder_inputs = tr.randn([sl, bs, in_features])
    encoder_inputs = to_gpu(V(T(encoder_inputs)))
    decoder_inputs = to_gpu(V(T(decoder_inputs)))
    transformer = to_gpu(
        TransformerLayerDecoder(input_size=in_features,
                                num_heads=8,
                                nhid=64,
                                dropout=0))
    outputs = transformer(encoder_inputs, decoder_inputs)
    assert_dims(outputs, [sl, bs, in_features])
    outputs1 = transformer(encoder_inputs, decoder_inputs[:1])
    assert_dims(outputs1, [1, bs, in_features])
    assert ((outputs[0] - outputs1[0]).abs() < 1E-6).all()
Exemplo n.º 24
0
def test_MultiHeadAttention_with_mask(attention_setup):
    keys, query = attention_setup
    bs = query.size(0)
    ed = keys.size(2)
    sl = keys.size(0)
    eq = query.size(1)
    num_heads = 4
    nhid = 10
    attention = to_gpu(
        MultiHeadAttention(num_heads=num_heads,
                           nhid=nhid,
                           keys_dim=ed,
                           query_dim=eq,
                           values_dim=ed,
                           dropout=0.3))
    mask = V(T(np.zeros((sl, bs, num_heads))))
    mask[0] = 1
    result = attention(query=V(query), keys=V(keys), values=V(keys), mask=mask)
    assert_dims(result, [bs, num_heads * nhid])
Exemplo n.º 25
0
    def forward(self, *inputs, num_beams=0):

        encoder_inputs, decoder_inputs = assert_dims(
            inputs, [2, None, None])  # dims: [sl, bs] for encoder and decoder
        encoder_outputs = self.encoder(encoder_inputs)
        decoder_outputs = self.decoder(decoder_inputs,
                                       encoder_outputs,
                                       num_beams=num_beams)
        predictions = decoder_outputs[-1][:decoder_inputs.size(
            0)] if num_beams == 0 else self.decoder.beam_outputs
        return predictions, decoder_outputs
Exemplo n.º 26
0
def test_transformer_decoder(num_beams, decoder_inputs_transformer):
    batch_size, emb_size, nlayers, sl, vin, ven = decoder_inputs_transformer
    ntokens, nhid, max_tokens = 10, 2, 20
    embedding = TransformerEmbeddings(ntokens=ntokens,
                                      emb_size=emb_size,
                                      dropout=0.0,
                                      pad_token=1)

    encoder = TransformerDecoderLayers(nlayers=nlayers,
                                       input_size=emb_size,
                                       num_heads=2,
                                       nhid=emb_size)
    projection_layer = Projection(output_size=ntokens,
                                  input_size=emb_size,
                                  tie_encoder=None,
                                  dropout=0.0)
    decoder = TransformerDecoder(decoder_layer=encoder,
                                 projection_layer=projection_layer,
                                 pad_token=1,
                                 eos_token=2,
                                 max_tokens=max_tokens,
                                 embedding_layer=embedding)
    decoder = to_gpu(decoder)
    outputs = decoder(vin, ven, num_beams=num_beams)
    if num_beams > 0:
        assert_dims(outputs,
                    [None, num_beams * batch_size, (emb_size, ntokens)])
        # actual beam outputs can be found in beam_outputs
        assert decoder.beam_outputs is not None
        assert_dims(decoder.beam_outputs, [None, batch_size, num_beams])
        # the sl can go up to max_tokens + 1(for the extra 0 token at the end)
        assert 0 < decoder.beam_outputs.shape[0] <= max_tokens + 1
    else:
        assert_dims(outputs, [None, batch_size, (emb_size, ntokens)])
        assert decoder.beam_outputs is None
Exemplo n.º 27
0
    def process_minibatch(self, minibatch: List[Example]) -> Tuple[LT, LT, LT]:
        max_sl = max([max(ex.sl) for ex in minibatch])
        max_conv = max([len(ex.roles) for ex in minibatch])
        padded_examples, targets, padded_lengths, padded_roles = [], [], [], []
        for example in minibatch:
            examples, lens, roles = self.pad(example,
                                             max_sl=max_sl,
                                             max_conv=max_conv,
                                             field=self.text_field)
            padded_examples.extend(examples)
            padded_lengths.extend(lens)
            padded_roles.append(roles)
            targets.append(example.response)
        self.text_field.include_lengths = False

        data = self.text_field.numericalize(padded_examples,
                                            device=self.device,
                                            train=self.train)
        batch_size = len(minibatch)
        assert_dims(data, [max_sl, max_conv * batch_size])
        data = data.view(max_sl, batch_size,
                         max_conv).transpose(2, 0).transpose(2,
                                                             1).contiguous()
        self.text_field.fix_length = None
        padded_targets = self.text_field.pad(targets)
        targets = self.text_field.numericalize(
            padded_targets, device=self.device,
            train=self.train)  # [max_sl, batch_size]
        assert_dims(data, [max_conv, max_sl, batch_size])
        assert_dims(targets, [None, batch_size])
        return data, targets, targets[1:]
Exemplo n.º 28
0
def test_rnn_decoder(rnn_decoder, decoder_inputs):
    dec_ins, keys = decoder_inputs
    decoder, params = rnn_decoder
    decoder.reset(params.batch_size)
    hidden = decoder.hidden
    decoder.projection_layer.keys = keys
    outputs = decoder(dec_ins, hidden=hidden, num_beams=params.num_beams)
    assert params.nlayers == len(outputs)
    if params.num_beams > 0:
        assert_dims(outputs, [
            params.nlayers, None, params.num_beams * params.batch_size,
            (params.nhid, params.ntokens)
        ])
        # actual beam outputs can be found in beam_outputs
        assert decoder.beam_outputs is not None
        assert_dims(decoder.beam_outputs,
                    [None, params.batch_size, params.num_beams])
        # the sl can go up to max_tokens + 1(for the extra 0 token at the end)
        assert 0 < decoder.beam_outputs.shape[0] <= params.max_tokens + 1
    else:
        assert_dims(outputs, [
            params.nlayers, None, params.batch_size,
            (params.nhid, params.ntokens)
        ])
        assert decoder.beam_outputs is None
Exemplo n.º 29
0
    def process_minibatch(self, minibatch: List[Example]) -> Tuple[LT, LT, LT]:
        max_sl = max([max(ex.sl) for ex in minibatch])
        max_conv = max([len(ex.roles) for ex in minibatch]) + 1  # add extra padding sentence for the target
        padded_examples, padded_targets, padded_lengths, padded_roles = [], [], [], []
        for example in minibatch:
            examples, lens, roles = self.pad(example, max_sl=max_sl, max_conv=max_conv, field=self.text_field)
            padded_examples.extend(examples)
            padded_lengths.extend(lens)
            padded_roles.append(roles)
            # if self.target_roles is not None we will pad the roles we do not want to train on
            # this allows for learning only the responses we are interested in
            targets, *_ = self.pad(example, max_sl=max_sl, max_conv=max_conv, field=self.text_field,
                                   target_roles=self.target_roles)
            padded_targets.extend(targets)

        self.text_field.include_lengths = False

        data = self.text_field.numericalize(padded_examples, device=self.device, train=self.train)
        batch_size = len(minibatch)
        assert_dims(data, [max_sl, max_conv * batch_size])
        data = data.view(max_sl, batch_size, max_conv).transpose(2, 0).transpose(2, 1).contiguous()
        source = data[:-1]  # we remove the extra padding  sentence added here
        targets = self.text_field.numericalize(padded_targets, device=self.device, train=self.train)
        targets = targets.view(max_sl, batch_size, max_conv).transpose(2, 0).transpose(2, 1).contiguous()
        # shapes will be max_conv -1 , max_sl, batch_size
        assert_dims(source, [max_conv - 1, max_sl, batch_size])
        assert_dims(targets, [max_conv, max_sl, batch_size])
        return source, targets[1:], targets[1:, 1:]
Exemplo n.º 30
0
 def forward(self, *inputs, num_beams=0):
     encoder_inputs, decoder_inputs = assert_dims(
         inputs, [2, None, None])  # dims: [sl, bs] for encoder and decoder
     # reset the states for the new batch
     bs = encoder_inputs.size(2)
     self.session_encoder.reset(bs)
     self.decoder.reset(bs)
     query_encoder_raw_outputs, query_encoder_outputs = [], []
     raw_outputs, outputs = [], []
     num_utterances = encoder_inputs.shape[0]
     for index, context in enumerate(encoder_inputs):
         self.query_encoder.reset(bs)
         raw_outputs, outputs = self.query_encoder(context)
         query_encoder_raw_outputs.append(raw_outputs)
         # BPTT if the dialogue is too long repackage the first half of the outputs to decrease
         # the gradient backpropagation and fit it into memory
         out = repackage_var(
             outputs[-1]
         ) if num_utterances > 20 and index <= num_utterances // 2 else outputs[
             -1]
         query_encoder_outputs.append(out)
     query_encoder_outputs = torch.cat(query_encoder_outputs, dim=0)
     raw_outputs_session, session_outputs = self.session_encoder(
         query_encoder_outputs)
     state = self.decoder.hidden
     state[0] = self.create_decoder_state(session_outputs[-1])
     raw_outputs_dec, outputs_dec = self.decoder(decoder_inputs,
                                                 hidden=state,
                                                 num_beams=num_beams)
     if num_beams == 0:
         # use output of the projection module
         predictions = assert_dims(
             outputs_dec[-1], [None, bs, self.nt])  # dims: [sl, bs, nt]
     else:
         # use argmax or beam search predictions
         predictions = assert_dims(
             self.decoder.beam_outputs,
             [None, bs, num_beams])  # dims: [sl, bs, nb]
     return predictions, [*raw_outputs,
                          *raw_outputs_dec], [*outputs, *outputs_dec]