Exemplo n.º 1
0
 def score_step(self, step_batch, train=False):
     feats = self._featurizer(step_batch.init_obs, step_batch.obs)
     pol_loss = self._logprob_of(self._flat_policy, feats,
                                 step_batch).mean()
     seg_loss = self._segmenter_obj(self._segmenter(feats),
                                    step_batch.final)
     return pol_loss + seg_loss, {
         'pol_loss': unwrap(pol_loss)[0],
         'seg_loss': unwrap(seg_loss)[0],
     }
Exemplo n.º 2
0
 def best_desc(self, seq_batch, i_task, start, end, descs):
     scores = [
         self.score_span(seq_batch, i_task, start, end, desc).sum()
         for desc in descs
     ]
     scores = [unwrap(score)[0] for score in scores]
     return min(zip(scores, descs))
Exemplo n.º 3
0
 def decode(self, init_state, max_len, sample=False):
     n_stack, n_batch, _ = init_state.shape
     out = [[self._start_id] for _ in range(n_batch)]
     tok_inp = [self._start_id for _ in range(n_batch)]
     done = [False for _ in range(n_batch)]
     state = init_state
     for _ in range(max_len):
         hot_inp = np.zeros((1, n_batch, len(self._vocab)))
         for i, t in enumerate(tok_inp):
             hot_inp[0, i, t] = 1
         hot_inp = Variable(torch.FloatTensor(hot_inp))
         if init_state.is_cuda:
             hot_inp = hot_inp.cuda()
         new_state, label_logits = self(state, hot_inp)
         label_logits = label_logits.squeeze(0)
         label_probs = unwrap(self._softmax(label_logits))
         new_tok_inp = []
         for i, row in enumerate(label_probs):
             if sample:
                 tok = np.random.choice(row.size, p=row)
             else:
                 tok = row.argmax()
             new_tok_inp.append(tok)
             if not done[i]:
                 out[i].append(tok)
             done[i] = done[i] or tok == self._stop_id
         state = new_state
         tok_inp = new_tok_inp
         if all(done):
             break
     return out
Exemplo n.º 4
0
 def act(self, step_batch, sample=True):
     feats = self._featurizer(step_batch.init_obs, step_batch.obs)
     (act_logits, act_pos_logits), _ = self._flat_policy(feats, step_batch)
     act_probs = unwrap(self._flat_policy._act_softmax(act_logits))
     act_pos_probs = unwrap(
         self._flat_policy._act_pos_softmax(act_pos_logits))
     out = []
     for i in range(act_probs.shape[0]):
         arow = act_probs[i, :]
         aprow = act_pos_probs[i, :]
         if sample:
             a = np.random.choice(arow.size, p=arow)
             ap = np.random.choice(aprow.size, p=aprow)
         else:
             a = arow.argmax()
             ap = aprow.argmax()
         a, ap = self._dataset.unravel_action((a, ap))
         out.append((a, ap))
     #print(out)
     return out
Exemplo n.º 5
0
 def score_seq(self, seq_batch, train=False):
     state_feats, _ = self._featurizer(seq_batch.init_obs(),
                                       seq_batch.last_obs())
     _, desc_logits = self._describer(state_feats.unsqueeze(0),
                                      seq_batch.desc)
     n_tok, n_batch, n_pred = desc_logits.shape
     desc_loss = self._describer_obj(
         desc_logits.view(n_tok * n_batch, n_pred),
         seq_batch.desc_tgt.view(n_tok * n_batch))
     return desc_loss, {
         'desc_loss': unwrap(desc_loss)[0],
     }
Exemplo n.º 6
0
    def step(self, train_loss=None, val_loss=None, hier_loss=None):
        if train_loss is not None:
            self._opt.zero_grad()
            train_loss.backward()
            self._opt.step()

        if val_loss is not None:
            self._sched.step(unwrap(val_loss)[0])

        if hier_loss is not None:
            self._hier_opt.zero_grad()
            hier_loss.backward()
            self._hier_opt.step()
Exemplo n.º 7
0
    def act_hier(self, step_batch, sample=True, feats=None):
        if feats is None:
            feats = self._featurizer(step_batch.init_obs, step_batch.obs)
        (act_logits, _), _ = self._hier_policy(feats, step_batch)
        descs = self._hier_policy.decode(feats, step_batch)
        act_probs = unwrap(self._hier_policy._act_softmax(act_logits))

        top_actions = []
        for i in range(act_probs.shape[0]):
            arow = act_probs[i, :]
            if sample:
                a = np.random.choice(arow.size, p=arow)
            else:
                a = arow.argmax()
            top_actions.append(a)

        #print()
        #print(top_actions)
        #print([a == self._env.SAY for a in top_actions])

        ### # TODO modify init obs
        ### next_descs = []
        ### for i, a in enumerate(top_actions):
        ###     if a == self._env.SAY:
        ###         next_descs.append(Variable(data.load_desc_data(
        ###             descs[i:i+1], self._dataset, tokenize=False)))
        ###     else:
        ###         next_descs.append(step_batch.desc_in[:, i:i+1, :])
        ### # TODO not here
        ### next_descs = torch.cat([d.cuda() for d in next_descs], dim=1)

        next_descs = []
        for i, a in enumerate(top_actions):
            if a == self._env.SAY:
                next_descs.append(descs[i])
            else:
                next_descs.append(step_batch.desc[i])
        next_descs = Variable(
            data.load_desc_data(next_descs, self._dataset, tokenize=False))
        flat_batch = step_batch._replace(desc_in=next_descs).cuda()
        return self.act(flat_batch, sample=sample)
Exemplo n.º 8
0
    def _parse_inner(self, seq_batch, i_task, start, end, remaining_depth,
                     top_desc):
        if remaining_depth <= 0:
            return []
        if end - start < 2:
            return []

        task = seq_batch.tasks[i_task]

        # TODO only this segment
        root_scores = self.score_span(seq_batch, i_task, 0,
                                      len(seq_batch.act[i_task]) - 1, top_desc)

        splits = unwrap(self.propose_splits(seq_batch, i_task, start, end))
        splits = [int(k) for k in splits]

        indices = [[(i_task, start, k), (i_task, k, end)] for k in splits]
        indices = sum(indices, [])
        descs = self.propose_descs(seq_batch, indices)
        desc_pairs = [(descs[2 * i], descs[2 * i + 1])
                      for i in range(len(splits))]

        candidates = []
        for k, (descs1, descs2) in zip(splits, desc_pairs):

            s1c, desc1 = self.best_desc(seq_batch, i_task, start, k, descs1)
            s2c, desc2 = self.best_desc(seq_batch, i_task, k, end, descs2)

            s1p, = unwrap(root_scores[start:k].sum())
            s2p, = unwrap(root_scores[k:end].sum())

            pick = [None, None]
            if s1c < s1p:
                s1 = s1c
                pick[0] = desc1
            else:
                s1 = s1p

            if s2c < s2p:
                s2 = s2c
                pick[1] = desc2
            else:
                s2 = s2p

            candidates.append((s1 + s2, k, tuple(pick)))

        if len(candidates) == 0:
            return []

        score, split, (d1, d2) = min(candidates)
        actions = [a for a, ap in seq_batch.act[i_task]]

        out = [(d1, (start, split)), (d2, (split, end))]
        if not (d1 is None and d2 is None):  # and np.random.random() < 0.05:
            print(self._dataset.render_desc(top_desc), ':',
                  self._dataset.render_desc(d1) if d1 else '_', '>',
                  self._dataset.render_desc(d2) if d2 else '_')
            print(actions[start:split], actions[split:end])
            print()

        for start_, end_, desc_ in [(start, split, d1), (split, end, d2)]:
            if desc_ is None:
                desc_ = top_desc
            out += self._parse_inner(seq_batch, i_task, start_, end_,
                                     remaining_depth - 1, desc_)

        return out
Exemplo n.º 9
0
 def score_hier(self, step_batch, train=False):
     feats = self._featurizer(step_batch.init_obs, step_batch.obs)
     hier_loss = self._logprob_of(self._hier_policy, feats,
                                  step_batch).mean()
     return hier_loss, {'hier_loss': unwrap(hier_loss)[0]}