示例#1
0
 def speak(self, you_token, eos_token=None):
     empty_resp_indices = th.autograd.Variable(cu(th.LongTensor([[0, 1]])))
     empty_resp_len = th.autograd.Variable(cu(th.LongTensor([2])))
     response_predict, response_score = self.dialogue(empty_resp_indices, empty_resp_len,
                                                      persist=False, eos_token=eos_token)
     del response_score['target']
     return response_predict, response_score
示例#2
0
def compute_reward(sel, other_sel, goal_indices):
    assert goal_indices.size()[1] == NUM_ITEMS * 2, goal_indices.size()
    counts = goal_indices[:, cu(th.LongTensor(range(0, NUM_ITEMS * 2, 2)))]
    values = goal_indices[:, cu(th.LongTensor(range(1, NUM_ITEMS * 2, 2)))]
    total_claimed = sel + other_sel
    # feasible = (total_claimed >= 0).prod() * (total_claimed <= counts).prod()
    feasible = (total_claimed == counts).prod().long()

    return ((values * sel).sum(1) * feasible).float()
示例#3
0
    def __init__(self, negotiator, partner, vectorizer, options):
        super(RLNegotiator, self).__init__()
        self.negotiator = negotiator
        self.partner = partner
        self.vectorizer = vectorizer
        self.eos = cu(th.LongTensor(self.vectorizer.resp_vec.vectorize(['<eos>'])[0])[0])
        self.you = cu(th.LongTensor(self.vectorizer.resp_vec.vectorize(['YOU:'])[0])[0])

        self.epsilon = options.rl_epsilon
        self.max_dialogue_len = options.max_dialogue_len
示例#4
0
 def start(self):
     self.negotiator = self.models[0].model.module
     self.vectorizer = self.models[0].model.vectorizer
     self.tokenize, self.detokenize = tokenizers.TOKENIZERS[self.models[0].options.tokenizer]
     with self.use_device():
         resp_vec = self.vectorizer.resp_vec
         self.eos = cu(th.LongTensor(resp_vec.vectorize(['<eos>'])[0])[0])
         self.you = cu(th.LongTensor(resp_vec.vectorize(['YOU:'])[0])[0])
         self.them = cu(th.LongTensor(resp_vec.vectorize(['THEM:'])[0])[0])
         self.sel_token = cu(th.LongTensor(resp_vec.vectorize(['<selection>'])[0])[0])
示例#5
0
    def forward(self,
                goal_indices, partner_goal_indices,
                resp_indices_, resp_len_,
                sel_indices_, feasible_sels, num_feasible_sels):
        num_feasible_sels = th.autograd.Variable(cu(th.LongTensor(
            [feasible_sels.size()[1]]
        )))

        self.negotiator.context(goal_indices)
        self.partner.context(goal_indices)

        my_turn = rng.choice([True, False])
        dialogue = []
        policy_scores = []
        for _ in range(self.max_dialogue_len):
            me = self.negotiator if my_turn else self.partner
            other = self.partner if my_turn else self.negotiator

            output_predict, output_score = me.speak(self.you, self.eos)
            (me_resp_indices, resp_len), policy_score = self.policy(output_predict, output_score)
            start_with_you = th.autograd.Variable(cu(th.LongTensor([[self.you]])))
            me_resp_indices = th.cat([start_with_you.expand(resp_len.size()[0], 1),
                                      me_resp_indices], 1)
            me.listen(me_resp_indices, resp_len + 1)

            other_resp_indices = self.transform_dialogue(me_resp_indices)
            other.listen(other_resp_indices, resp_len + 1)

            dialogue.append(((me_resp_indices if my_turn else other_resp_indices), resp_len))
            policy_scores.append(policy_score)
            if is_selection(me_resp_indices, resp_len, self.sel_token):
                break

            my_turn = not my_turn

        empty_sel_indices = th.autograd.Variable(cu(th.LongTensor([0])))
        # TODO: epsilon-greedy here too?
        selection_predict, selection_score = self.negotiator.selection(empty_sel_indices,
                                                                       feasible_sels,
                                                                       num_feasible_sels)
        sel_a = selection_predict['beam']
        sel_b = self.partner.selection(empty_sel_indices,
                                       feasible_sels, num_feasible_sels)[0]['beam']

        reward = compute_reward(sel_a, sel_b, goal_indices)
        partner_reward = compute_reward(sel_b, sel_a, partner_goal_indices)

        result = (dialogue, sel_a, sel_b, reward, partner_reward)
        return {'sample': result, 'beam': result}, (th.stack(policy_scores, 0)[:, 0],
                                                    selection_score)
示例#6
0
 def __init__(self, module, loss, optimizer, optimizer_params, vectorizer):
     self.get_options()
     self.module = cu(module)
     self.loss = cu(loss)
     self.optimizer_class = optimizer
     self.optimizer_params = optimizer_params
     self.build_optimizer()
     self.vectorizer = vectorizer
     summary_path = config.get_file_path('monitoring.tfevents')
     if summary_path:
         self.summary_writer = summary.SummaryWriter(summary_path)
     else:
         self.summary_writer = None
     self.step = 0
     self.last_timestamp = datetime.datetime.now()
示例#7
0
    def forward(self, predict, score):
        dialogue, sel_a, sel_b, reward, partner_reward = predict
        response_scores, selection_score = score

        reward_transformed = self.transform_reward(reward)
        step_rewards = []
        discount = th.Variable(cu(th.FloatTensor([1.0])))
        for i in range(len(response_scores)):
            step_rewards.append(discount * reward_transformed)
            discount = discount * self.gamma

        loss = th.Variable(cu(th.FloatTensor([0.0])))
        for score, step_reward in zip(response_scores, step_rewards):
            loss -= score * step_reward

        return loss
示例#8
0
    def forward(self, src_indices, src_lengths):
        a = self.activations

        # TODO: PackedSequence?
        batch_size = src_indices.size()[0]
        max_len = src_lengths.data.max()
        a.in_embed = self.enc_embedding(src_indices[:, :max_len])
        conv_stack = [a.in_embed.transpose(1, 2)]
        for i in range(max_len - 1):
            conv_stack.append(self.conv(self.nonlinearity(conv_stack[-1])))
        a.conv_repr = (th.stack([
            conv_stack[n - 1][j, :, 0] for j, n in enumerate(src_lengths.data)
        ], 0).view(1, batch_size,
                   self.cell_size).repeat(self.num_layers, 1, 1))
        init_var = th.autograd.Variable(cu(th.FloatTensor([1.0])))
        c_init = (self.c_init(init_var).view(self.num_layers, 1,
                                             self.cell_size).repeat(
                                                 1, batch_size, 1))

        result = a.conv_repr, c_init

        if not self.monitor_activations:
            # Free up memory
            a.__dict__.clear()

        return result
示例#9
0
 def make_selection(self):
     empty_sel_indices = th.autograd.Variable(cu(th.LongTensor([0])))
     sel_predict, sel_score = self.negotiator.selection(empty_sel_indices,
                                                        self.feasible_sels,
                                                        self.num_feasible_sels)
     return parse_selection(' '.join(self.vectorizer.sel_vec.unvectorize(
         thutils.to_numpy(sel_predict['sample'])[0]
     )), self.game[0])
示例#10
0
    def forward(self, outputs, src_lengths):
        a = self.activations

        assert outputs.dim() == 3, outputs.size()
        assert outputs.size()[2] == self.repr_size, (outputs.size(),
                                                     self.repr_size)
        batch_size, max_len, repr_size = outputs.size()

        a.attn_h1 = th.nn.Tanh()(self.hidden1(outputs))
        a.attn_h2 = self.hidden2(outputs)
        assert a.attn_h2.size() == (batch_size, max_len, repr_size), \
            (a.attn_h2.size(), (batch_size, max_len, repr_size))
        init_var = th.autograd.Variable(cu(th.FloatTensor([1.0])))
        a.target = self.target(init_var)
        assert a.target.size() == (repr_size, ), (a.target.size(), repr_size)
        a.attn_scores = th.matmul(a.attn_h2, a.target)
        assert a.attn_scores.size() == (batch_size, max_len), \
            (a.attn_scores.size(), (batch_size, max_len))
        attn_mask = th.autograd.Variable(
            cu(
                th.log((lrange(max_len)[None, :] <
                        src_lengths.data[:, None]).float())))
        a.attn_weights = th.exp(
            th.nn.LogSoftmax(dim=1)(a.attn_scores + attn_mask))
        assert a.attn_weights.size() == (batch_size, max_len), \
            (a.attn_weights.size(), (batch_size, max_len))
        a.attn_out = th.matmul(a.attn_weights[:, None, :], outputs)[:, 0, :]
        assert a.attn_out.size() == (batch_size, repr_size), \
            (a.attn_out.size(), (batch_size, repr_size))

        self.dump_weights(a.attn_weights.data)

        result = a.attn_out, a.attn_weights

        if not self.monitor_activations:
            # Free up memory
            a.__dict__.clear()

        return result
示例#11
0
def generate_rnn_state(encoder, h_init_mod, c_init_mod, batch_size):
    init_var = th.autograd.Variable(cu(th.FloatTensor([1.0])))

    h_init = (h_init_mod(init_var).view(
        encoder.num_layers * encoder.num_directions, 1,
        encoder.cell_size // encoder.num_directions).repeat(1, batch_size, 1))
    if encoder.use_c:
        c_init = (c_init_mod(init_var).view(
            encoder.num_layers * encoder.num_directions, 1,
            encoder.cell_size // encoder.num_directions).repeat(
                1, batch_size, 1))
        return (h_init, c_init)
    else:
        return h_init
示例#12
0
    def context(self, goal_indices):
        # "GRU_g": encode goals (values of items)
        a = self.activations

        batch_size, goal_size = goal_indices.size()
        assert goal_size == GOAL_SIZE, goal_indices.size()

        goal_len = th.autograd.Variable(cu(
            (th.ones(batch_size) * goal_size).int()
        ))
        assert goal_len.size() == (batch_size,), goal_len.size()

        a.context_repr_seq, _ = self.context_encoder(goal_indices, goal_len)
        assert a.context_repr_seq.dim() == 3, a.context_repr_seq.size()
        assert a.context_repr_seq.size()[:2] == (batch_size, goal_size), a.context_repr_seq.size()

        a.context_repr = a.context_repr_seq[:, -1, :]
        context_repr_size = a.context_repr_seq.size()[2]
        assert a.context_repr.size() == (batch_size, context_repr_size), a.context_repr.size()

        self.dec_state = seq2seq.generate_rnn_state(self.response_encoder,
                                                    self.h_init, self.c_init, batch_size)
        if not isinstance(self.dec_state, tuple):
            self.dec_state = (self.dec_state,)
示例#13
0
    def forward(self, enc_state, extra_inputs=None, extra_delimiter=None):
        if not isinstance(enc_state, tuple):
            enc_state = (enc_state, )
        assert len(enc_state[0].size()) == 3, enc_state[0].size()
        num_layers, batch_size, h_size = enc_state[0].size()
        state_sizes = []
        state = []
        for enc_c in enc_state:
            assert len(enc_c.size()) == 3, enc_c.size()
            assert enc_c.size()[:2] == (num_layers, batch_size), enc_c.size()
            c_size = enc_c.size()[2]
            state_sizes.append(c_size)
            state.append(enc_c[:, :, None, :].expand(num_layers, batch_size,
                                                     self.beam_size, c_size))
        if extra_inputs is None:
            extra_inputs = []
        else:
            extra_inputs = [
                inp[:, None,
                    ...].expand((inp.size()[0], self.beam_size) +
                                tuple(inp.size()[1:])).contiguous().view(
                                    (inp.size()[0] * self.beam_size, 1) +
                                    tuple(inp.size()[1:]))
                for inp in extra_inputs
            ]

        def ravel(x):
            return x.contiguous().view(
                *tuple(x.size()[:-2]) +
                (batch_size, self.beam_size, x.size()[-1]))

        def unravel(x):
            return x.contiguous().view(
                *tuple(x.size()[:-3]) +
                (batch_size * self.beam_size, x.size()[-1]))

        beam = th.autograd.Variable(
            cu(
                th.LongTensor(batch_size, self.beam_size,
                              1).fill_(self.delimiters[0])))
        beam_scores = th.autograd.Variable(
            cu(th.zeros(batch_size, self.beam_size)))
        beam_lengths = th.autograd.Variable(
            cu(th.LongTensor(batch_size, self.beam_size).zero_()))
        outputs = []
        states = []

        for length in itertools.count(1):
            last_tokens = beam[:, :, -1:]
            assert last_tokens.size() == (batch_size, self.beam_size,
                                          1), last_tokens.size()
            word_scores, (dec_out, state) = self.decode_fn(
                unravel(last_tokens),
                tuple(unravel(c) for c in state),
                extra_inputs=extra_inputs)
            word_scores = ravel(word_scores[:, 0, :])
            state = tuple(ravel(c) for c in state)
            states.append(state)
            outputs.append(dec_out)
            assert word_scores.size()[:2] == (
                batch_size, self.beam_size), word_scores.size()
            beam, beam_lengths, beam_scores = self.step(
                word_scores,
                length,
                beam,
                beam_scores,
                beam_lengths,
                extra_delimiter=extra_delimiter)
            if (beam_lengths.data != length).prod() or \
                    (self.max_len is not None and length == self.max_len):
                break

        all_states_collated = [th.stack(s, dim=3) for s in zip(*states)]
        final_indices = th.clamp(beam_lengths.data, max=self.max_len - 1)
        final_states = [
            s[:,
              lrange(batch_size)[:, None],
              lrange(self.beam_size)[None, :], final_indices, :]
            for s in all_states_collated
        ]
        all_outputs = th.stack(outputs, dim=1)
        return (beam, th.clamp(beam_lengths, max=self.max_len), beam_scores,
                (all_outputs, final_states))
示例#14
0
 def policy(self, output_predict, output_score):
     if rng.random_sample() <= self.epsilon:
         return output_predict['sample'], output_score['sample']
     else:
         return output_predict['beam'], th.autograd.Variable(cu(th.FloatTensor([0.0])))
示例#15
0
 def vectorize_response(self, response, you_them):
     tag = th.autograd.Variable(cu(th.LongTensor([[you_them]])))
     resp_indices, resp_len = self.vectorizer.resp_vec.vectorize(self.tokenize(response))
     tagged_resp_indices = th.cat([tag.expand(1, 1),
                                   thutils.to_torch(resp_indices)[None, :]], 1)
     return (tagged_resp_indices, thutils.to_torch(resp_len + 1))
示例#16
0
 def transform_and_predict(self, arrays):
     return self.module(*(th.autograd.Variable(cu(th.from_numpy(a))) for a in arrays))
示例#17
0
    def selection(self, sel_indices, feasible_sels, num_feasible_sels):
        # "GRU_o": encode dialogue for selection
        a = self.activations

        assert sel_indices.dim() == 1, sel_indices.size()
        batch_size = sel_indices.size()[0]

        a.combined_repr = self.combined_layer(th.cat([a.context_repr, a.dialogue_repr],
                                                     dim=1))
        assert a.combined_repr.dim() == 2, a.combined_repr.size()
        assert a.combined_repr.size()[0] == batch_size, (a.combined_repr.size(), batch_size)

        a.all_item_scores = log_softmax(self.selection_layer(a.combined_repr))
        assert a.all_item_scores.size() == (batch_size, self.selection_layer.out_features), \
            (a.all_item_scores.size(), (batch_size, self.selection_layer.out_features))

        a.feasible_item_scores = a.all_item_scores[
            lrange(a.all_item_scores.size()[0])[:, None, None],
            feasible_sels.data
        ]
        assert a.feasible_item_scores.size() == (batch_size, MAX_FEASIBLE + 3, NUM_ITEMS), \
            (a.feasible_item_scores.size(), batch_size)

        num_feasible_mask = th.autograd.Variable(cu(
            (lrange(a.feasible_item_scores.size()[1])[None, :, None] <=
             num_feasible_sels.data[:, None, None]).float()
        ))
        a.feasible_masked = a.feasible_item_scores + th.log(num_feasible_mask)
        a.full_selection_scores = log_softmax(a.feasible_item_scores.sum(dim=2), dim=1)
        assert a.full_selection_scores.size() == (batch_size, MAX_FEASIBLE + 3), \
            (a.full_selection_scores.size(), batch_size)

        a.selection_beam_score, selection_beam = a.full_selection_scores.max(dim=1)
        assert selection_beam.size() == (batch_size,), (selection_beam.size(), batch_size)
        selection_sample = th.multinomial(th.exp(a.full_selection_scores),
                                          1, replacement=True)[:, 0]
        a.selection_sample_score = th.exp(a.full_selection_scores)[
            lrange(a.full_selection_scores.size()[0]),
            selection_sample.data
        ]
        assert selection_sample.size() == (batch_size,), (selection_sample.size(), batch_size)
        selection_predict = {
            'beam': self.sel_indices_to_selection(feasible_sels, selection_beam),
            'sample': self.sel_indices_to_selection(feasible_sels, selection_sample),
        }
        assert selection_predict['beam'].size() == (batch_size, NUM_ITEMS), \
            (selection_predict['beam'].size(), batch_size)
        assert selection_predict['sample'].size() == (batch_size, NUM_ITEMS), \
            (selection_predict['sample'].size(), batch_size)
        a.selection_target_score = a.full_selection_scores[
            lrange(a.full_selection_scores.size()[0]),
            sel_indices.data
        ]
        assert a.selection_target_score.size() == (batch_size,), (a.selection_score.size(),
                                                                  batch_size)
        selection_score = {
            'target': a.selection_target_score,
            'beam': a.selection_beam_score,
            'sample': a.selection_sample_score,
        }

        return selection_predict, selection_score