Пример #1
0
 def set_loss(self, loss_fn):
     assert loss_fn in ['multinomial', 'hinge', 'squared', 'huber']
     if loss_fn == 'hinge':
         l = nn.MultiMarginLoss(size_average=False)
         self.loss_fn = lambda p, t, _: l(p, Varng(torch.LongTensor([t])))
     elif loss_fn == 'multinomial':
         l = nn.NLLLoss(size_average=False)
         self.loss_fn = lambda p, t, _: l(
             F.log_softmax(p.unsqueeze(0), dim=1),
             Varng(torch.LongTensor([t])))
     elif loss_fn in ['squared', 'huber']:
         l = (nn.MSELoss if loss_fn == 'squared' else nn.SmoothL1Loss)(
             size_average=False)
         self.loss_fn = lambda p, t, sa: self._compute_loss(
             l, p, 1 - truth_to_vec(t, torch.zeros(self.n_actions)), sa)
Пример #2
0
 def _forward(self, state):
     view = util.zeros(self._t.weight, 1, 1, 4)
     view[0, 0, 0] = 1.
     view[0, 0, 1] = float(sum_hand(state.player))
     view[0, 0, 2] = float(state.dealer[0])
     view[0, 0, 3] = float(usable_ace(state.player))
     return Varng(view)
Пример #3
0
 def _forward(self, state):
     view = util.zeros(self._t.weight, 1, 1, 4)
     view[0, 0, 0] = 1.
     view[0, 0, 1] = np.cos(state.th)
     view[0, 0, 2] = np.sin(state.th)
     view[0, 0, 3] = state.th_dot
     return Varng(view)
Пример #4
0
 def forward(self, state):
     x = self.features(state)
     if self.disconnect_values:
         x = Varng(x.data)
     #x *= 0
     #x[0,0] = 1
     return self.value_fn(x)
Пример #5
0
 def _forward(self, state):
     view = util.zeros(self._t, 1, 1, 10 * self.history_length)
     for h in range(self.history_length):
         obs = state.obs[max(0, len(state.obs) - h - 1)]
         for i in range(10):
             if (obs & i) > 0:
                 view[0, 0, h * 10 + i] = 1.
     return Varng(view)
Пример #6
0
 def _forward_batch(self, envs):
     batch_size = len(envs)
     txts = [util.getattr_deep(env, self.input_field) for env in envs]
     txt_len = list(map(len, txts))
     max_len = max(txt_len)
     bow = util.zeros(self, batch_size, max_len, self.dim)
     for j, txt in enumerate(txts):
         self.set_bow(bow, j, txt)
     return Varng(bow), txt_len
Пример #7
0
 def _forward_batch(self, envs):
     batch_size = len(envs)
     txts = [util.getattr_deep(env, self.input_field) for env in envs]
     txt_len = list(map(len, txts))
     max_len = max(txt_len)
     x = util.longtensor(self, batch_size, max_len).zero_()
     for n, txt in enumerate(txts):
         for i in range(txt_len[n]): # TODO could this be faster?
             x[n,i] = int(txt[i])
     return self.embed(Varng(x)).view(batch_size, max_len, self.dim), txt_len
Пример #8
0
 def _forward(self, state, x):
     feats = x[:]
     if self.act_history_length > 0:
         f = util.zeros(self, 1, self.act_history_length * self.n_actions)
         for i in range(min(self.act_history_length,
                            len(state._trajectory))):
             a = state._trajectory[-i]
             f[0, i * self.n_actions + a] = 1
         feats.append(Varng(f))
     if self.obs_history_length > 0:
         for i in range(self.obs_history_length):
             feats.append(
                 Varng(self.obs_history[(self.obs_history_pos + i) %
                                        self.obs_history_length]))
         # update history
         self.obs_history[self.obs_history_pos] = torch.cat(x, dim=1).data
         self.obs_history_pos = (self.obs_history_pos +
                                 1) % self.obs_history_length
     return torch.cat(feats, dim=1)
Пример #9
0
 def stochastic(self, state):
     z = self.mapping(self.features(state)).squeeze()
     if len(state.actions) != self.n_actions:
         self.disallow.zero_()
         self.disallow += 1e10
         for a in state.actions:
             self.disallow[a] = 0.
         z += Varng(self.disallow)
     p = F.softmax(-z / self.temperature, dim=0)
     return util.sample_from_probs(p)
Пример #10
0
 def _forward(self, state):
     view = util.zeros(self._t.weight, 1, 1, self.dim)
     if not state.is_legal((state.loc[0] - 1, state.loc[1])):
         view[0, 0, 0] = 1.
     if not state.is_legal((state.loc[0] + 1, state.loc[1])):
         view[0, 0, 1] = 1.
     if not state.is_legal((state.loc[0], state.loc[1] - 1)):
         view[0, 0, 2] = 1.
     if not state.is_legal((state.loc[0], state.loc[1] + 1)):
         view[0, 0, 3] = 1.
     return Varng(view)
Пример #11
0
    def _forward(self, state, x):
        w = self.rnn.weight_ih
        # embed the previous action (if it exists)
        last_a = self.n_actions if len(
            state._trajectory) == 0 else state._trajectory[-1]
        if self.d_actemb is None:
            prev_a = util.zeros(w, 1, 1 + self.n_actions)
            prev_a[0, last_a] = 1
            prev_a = Varng(prev_a)
        else:
            prev_a = self.embed_a(util.onehot(w, last_a))

        # combine prev hidden state, prev action embedding, and input x
        inputs = torch.cat([prev_a] + x, 1)
        self.h = self.rnn(inputs, self.h)
        return self.hidden()
Пример #12
0
    def get_objective(self, loss):
        if len(self.trajectory) == 0: return
        loss = float(loss)
        loss_var = Varng(self.loss_var + loss)

        total_loss = 0.0
        for p_a, value in self.trajectory:
            v = value.data[0, 0]

            # reinforcement loss
            total_loss += (loss - v) * p_a.log()

            # value fn approximator loss
            total_loss += self.value_multiplier * self.loss_fn(value, loss_var)

        self.trajectory = []
        return total_loss
Пример #13
0
 def _forward(self, state):
     ghost_positions = set(state.ghost_pos)
     view = util.zeros(self._t, 1, 1, 10 * self.history_length)
     for y in range(self.height):
         for x in range(self.width):
             idx = (x * self.height + y) * 8
             c = 0
             pos = (x, y)
             if not state.passable(pos): c = 1
             if pos in state.food:
                 c = 3 if state.check(pos, POWER) else 2
             if pos in ghost_positions:
                 c = 5 if state.power_steps == 0 else 7
             elif pos == state.pocman:
                 c = 4 if state.power_steps == 0 else 6
             view[0, 0, idx + c] = 1
     return Varng(view)
Пример #14
0
 def _forward(self, state):
     view = util.zeros(self._t.weight, 1, 1, self.dim)
     view[0, 0, state.loc[0] * state.example.height + state.loc[1]] = 1
     return Varng(view)
Пример #15
0
 def _compute_loss(self, loss_fn, pred_costs, truth, state_actions):
     if len(state_actions) == self.n_actions:
         return loss_fn(pred_costs, Varng(truth))
     return sum((loss_fn(pred_costs[a], Varng(torch.zeros(1) + truth[a])) \
                 for a in state_actions))
Пример #16
0
 def _reset(self):
     self.h = Varng(util.zeros(self.rnn.weight_ih, 1, self.d_hid))
     if self.cell_type == 'LSTM':
         self.h = self.h, Varng(
             util.zeros(self.rnn.weight_ih, 1, self.d_hid))
Пример #17
0
 def _forward(self, state):
     f = util.zeros(self._t.weight, 1, 1, self.n_states)
     if np.random.random() > self.noise_rate:
         f[0, 0, state.s] = 1
     return Varng(f)
Пример #18
0
 def _forward(self, env):
     txt = util.getattr_deep(env, self.input_field)
     bow = util.zeros(self, 1, len(txt), self.dim)
     self.set_bow(bow, 0, txt)
     return Varng(bow)
Пример #19
0
 def _forward(self, state):
     view = state.state.view(1, 1, 3 * self.board_size**2)
     return Varng(view)
Пример #20
0
 def _forward(self, env):
     txt = util.getattr_deep(env, self.input_field)
     return self.embed(Varng(util.longtensor(self, txt))).view(1, -1, self.dim)