Exemplo n.º 1
0
    def __init__(self, sketch_lengths, val_ratio):
        data = {}
        keys = ['states', 'actions', 'gt_onsets', 'tasks']
        for sketch_len in sketch_lengths:
            pkl_name = os.path.join(DATASET_DIR,
                                    'jacopinpad_{}.pkl'.format(sketch_len))
            with open(pkl_name, 'rb') as f:
                data[sketch_len] = pickle.load(f)

        # Turn it into DictList
        for env_name in data:
            new_data = []
            trajs = data[env_name]
            trajs = [{k: trajs[k][i]
                      for k in keys} for i in range(len(trajs['states']))]
            for traj in trajs:
                new_traj = DictList(traj)
                if len(new_traj) > MAX_TRAIN_LENGTH:
                    continue
                new_traj.done = [False] * (len(new_traj) - 1) + [True]
                new_data.append(new_traj)
            data[env_name] = new_data

        self.data = {'train': {}, 'val': {}}
        for env_name in data:
            _data = data[env_name]
            nb_data = len(_data)
            nb_val = int(val_ratio * nb_data)
            random.shuffle(_data)
            self.data['val'][env_name] = _data[:nb_val]
            self.data['train'][env_name] = _data[nb_val:]
            logging.info('{}: Train: {} Val: {}'.format(
                env_name, len(self.data['train'][env_name]),
                len(self.data['val'][env_name])))
        self.env_names = sketch_lengths

        start = time.time()
        a_stats = RunningMeanMax(dimension=9)
        s_stats = RunningMeanMax(dimension=39)
        for env_name in data:
            train_data = self.data['train'][env_name]
            for _traj in train_data:
                for state, action in zip(_traj.states, _traj.actions):
                    a_stats.accumulate(action)
                    s_stats.accumulate(state)

        self.a_mu = a_stats.mean
        a_std = a_stats.max
        zer = np.where(a_std < 0.000001)[0]
        a_std[zer] = 1
        self.a_std = a_std

        self.s_mu = s_stats.mean
        self.s_std = s_stats.max
        logging.info('Compute mean and var cost {} sec'.format(time.time() -
                                                               start))
Exemplo n.º 2
0
def step_batch_envs(envs, actions, actives, cuda):
    """ Step a batch of envs. And detect if there are inactive/done envs
    return obss, rewards, dones of the active envs
    """
    assert actions.shape[0] == len(actives)
    active_envs = [envs[i] for i in actives]
    obss = DictList()
    rewards = []
    dones = []
    for action, env in zip(actions, active_envs):
        obs, reward, done, _ = env.step(action.cpu().numpy())
        obss.append(obs)
        rewards.append(reward)
        dones.append(done)

    obss.apply(lambda _t: torch.tensor(_t).float())
    rewards = torch.tensor(rewards).float()
    dones = torch.tensor(dones)

    if cuda:
        obss.apply(lambda _t: _t.cuda())
        rewards = rewards.cuda()
        dones = dones.cuda()

    # Update active
    return obss, rewards, dones
Exemplo n.º 3
0
 def forward(self, obs, sketchs, sketch_lengths, mems=None) -> DictList:
     obs_repr = self.encode_obs(obs)
     env_emb = self.encode_sketch(sketchs, sketch_lengths)
     inp = torch.cat([obs_repr, env_emb], dim=-1)
     mean = self.actor(inp)
     dist = get_action_dist(mean)
     return DictList({'dist': dist})
Exemplo n.º 4
0
def parsing_loop(bot, dataloader, batch_size, cuda):
    bot.eval()
    parsing_metric = {env: DictList() for env in dataloader.env_names}
    for env_name in dataloader.env_names:
        data_iter = dataloader.val_iter(batch_size=batch_size,
                                        env_names=[env_name],
                                        shuffle=True)
        batch, batch_lens, batch_sketch_lens = data_iter.__next__()
        if cuda:
            batch.apply(lambda _t: _t.cuda())
            batch_lens = batch_lens.cuda()
            batch_sketch_lens = batch_sketch_lens.cuda()
        with torch.no_grad():
            _, extra_info = bot.teacherforcing_batch(batch,
                                                     batch_lens,
                                                     batch_sketch_lens,
                                                     recurrence=100)
        for batch_id, (length, sketch_length, ps) in enumerate(
                zip(batch_lens, batch_sketch_lens, extra_info.p)):
            traj = batch[batch_id]
            traj = traj[:length]
            _gt_subtask = traj.gt_onsets
            target = point_of_change(_gt_subtask)

            # Get prediction sorted
            ps = ps[:length]
            ps[0, :-1] = 0
            ps[0, -1] = 1
            for threshold in [0.2, 0.3, 0.4, 0.5, 0.6, 0.7]:
                preds = get_boundaries(ps,
                                       bot.nb_slots,
                                       threshold=threshold,
                                       nb_boundaries=len(target))
                _decoded_subtask = get_subtask_seq(length.item(),
                                                   subtask=traj.tasks.tolist(),
                                                   use_ids=np.array(preds))
                parsing_metric[env_name] += {
                    'task_acc_thres{}'.format(threshold):
                    (_gt_subtask.cpu() == _decoded_subtask.cpu()).tolist()
                }

            preds = automatic_get_boundaries_peak(ps,
                                                  bot.nb_slots,
                                                  nb_boundaries=len(target))
            get_subtask_seq(length.item(),
                            subtask=traj.tasks.tolist(),
                            use_ids=np.array(preds))
            parsing_metric[env_name] += {
                'task_acc_auto':
                (_gt_subtask.cpu() == _decoded_subtask.cpu()).tolist()
            }

    # print task alignment
    lines = []
    lines.append('tru_ids: {}'.format(target))
    lines.append('dec_ids: {}'.format(preds))
    lines.append(idxpos2tree(pos=ps))
    return parsing_metric, lines
Exemplo n.º 5
0
def evaluate_on_envs(bot, dataloader):
    val_metrics = {}
    bot.eval()
    envs = dataloader.env_names
    for env_name in envs:
        val_iter = dataloader.val_iter(batch_size=FLAGS.il_batch_size,
                                       env_names=[env_name],
                                       shuffle=True)
        output = DictList({})
        total_lengths = 0
        for batch, batch_lens, batch_sketch_lens in val_iter:
            if FLAGS.cuda:
                batch.apply(lambda _t: _t.cuda())
                batch_lens = batch_lens.cuda()
                batch_sketch_lens = batch_sketch_lens.cuda()

            # Initialize memory
            with torch.no_grad():
                #batch_results = run_batch(batch, batch_lens, batch_sketch_lens, bot, mode='val')
                start = time.time()
                batch_results, _ = bot.teacherforcing_batch(
                    batch,
                    batch_lens,
                    batch_sketch_lens,
                    recurrence=FLAGS.il_recurrence)
                end = time.time()
                print('batch time', end - start)
            batch_results.apply(lambda _t: _t.sum().item())
            output.append(batch_results)
            total_lengths += batch_lens.sum().item()
            if FLAGS.debug:
                break
        output.apply(lambda _t: torch.tensor(_t).sum().item() / total_lengths)
        val_metrics[env_name] = {k: v for k, v in output.items()}

    # Parsing
    if 'om' in FLAGS.arch:
        with torch.no_grad():
            parsing_stats, parsing_lines = parsing_loop(
                bot,
                dataloader=dataloader,
                batch_size=FLAGS.il_batch_size,
                cuda=FLAGS.cuda)
        for env_name in parsing_stats:
            parsing_stats[env_name].apply(lambda _t: np.mean(_t))
            val_metrics[env_name].update(parsing_stats[env_name])
        logging.info('Get parsing result')
        logging.info('\n' + '\n'.join(parsing_lines))

    # evaluate on free run env
    #if not FLAGS.debug:
    #    for sketch_length in val_metrics:
    #        envs = [gym.make('jacopinpad-v0', sketch_length=sketch_length,
    #                         max_steps_per_sketch=FLAGS.max_steps_per_sketch)
    #                for _ in range(FLAGS.eval_episodes)]
    #        with torch.no_grad():
    #            free_run_metric = batch_evaluate(envs=envs, bot=bot, cuda=FLAGS.cuda)
    #        val_metrics[sketch_length].update(free_run_metric)
    return val_metrics
Exemplo n.º 6
0
def ompn_eval(bot, args):
    parsing_metric = {}
    dataloader = Dataloader(args.sketch_lengths, 0.99)
    bot.eval()
    for sketch_len in dataloader.env_names:
        parsing_metric[sketch_len] = DictList()
        data_iter = dataloader.val_iter(args.episodes,
                                        shuffle=True,
                                        env_names=[sketch_len])
        batch, batch_lens, batch_sketch_lens = data_iter.__next__()
        with torch.no_grad():
            _, extra_info = bot.teacherforcing_batch(batch,
                                                     batch_lens,
                                                     batch_sketch_lens,
                                                     recurrence=64)

        for batch_id, (length, sketch_length, ps) in tqdm(
                enumerate(zip(batch_lens, batch_sketch_lens, extra_info.p))):
            traj = batch[batch_id]
            traj = traj[:length]
            _gt_subtask = traj.gt_onsets
            target = point_of_change(_gt_subtask)

            # Get prediction sorted
            ps = ps[:length]
            ps[0, :-1] = 0
            ps[0, -1] = 1
            for threshold in [0.2, 0.3, 0.4, 0.5, 0.6, 0.7]:
                preds = get_boundaries(ps,
                                       bot.nb_slots,
                                       threshold=threshold,
                                       nb_boundaries=len(target))
                #parsing_metric[sketch_len].append({'f1_tol{}_thres{}'.format(tol, threshold):
                #                                       f1(target, preds, tol) for tol in [0, 1, 2]})
                _decoded_subtask = get_subtask_seq(length.item(),
                                                   subtask=traj.tasks.tolist(),
                                                   use_ids=np.array(preds))
                parsing_metric[sketch_len] += {
                    'task_acc_thres{}'.format(threshold):
                    (_gt_subtask.cpu() == _decoded_subtask.cpu()).tolist()
                }

            preds = automatic_get_boundaries_peak(ps,
                                                  bot.nb_slots,
                                                  nb_boundaries=len(target))
            _decoded_subtask = get_subtask_seq(length.item(),
                                               subtask=traj.tasks.tolist(),
                                               use_ids=np.array(preds))
            parsing_metric[sketch_len] += {
                'task_acc_auto':
                (_gt_subtask.cpu() == _decoded_subtask.cpu()).tolist()
            }

        parsing_metric[sketch_len].apply(lambda _t: np.mean(_t))

    return parsing_metric
Exemplo n.º 7
0
    def batch_iter(self,
                   trajs,
                   batch_size,
                   shuffle=True,
                   epochs=-1) -> DictList:
        """
        :param trajs: A list of DictList
        :param batch_size: int
        :param seq_len: int
        :param epochs: int. If -1, then forever
        :return: DictList [bsz, seq_len]
        """
        epoch_iter = range(1, epochs + 1) if epochs > 0 else _forever()
        for _ in epoch_iter:
            if shuffle:
                random.shuffle(trajs)

            start_idx = 0
            while start_idx < len(trajs):
                batch = DictList()
                lengths = []
                task_lengths = []
                for _traj in trajs[start_idx:start_idx + batch_size]:
                    lengths.append(len(_traj.actions))
                    task_lengths.append(len(_traj.tasks))
                    _traj.apply(lambda _t: torch.tensor(_t))
                    batch.append(_traj)

                batch.apply(lambda _t: pad_sequence(_t, batch_first=True))
                yield batch, torch.tensor(lengths), torch.tensor(task_lengths)
                start_idx += batch_size
Exemplo n.º 8
0
def evaluate_loop(dataloader, model, dropout_p):
    # Testing
    val_metrics = {}
    model.eval()
    for sketch_length in dataloader.env_names:
        val_metrics[sketch_length] = DictList()

        # Teacher Forcing
        val_iter = dataloader.val_iter(batch_size=FLAGS.taco_batch_size, env_names=[sketch_length])
        for val_batch, val_lengths, val_subtask_lengths in val_iter:
            if FLAGS.cuda:
                val_batch.apply(lambda _t: _t.cuda())
                val_lengths = val_lengths.cuda()
                val_subtask_lengths = val_subtask_lengths.cuda()
            with torch.no_grad():
                batch_res = teacherforce_batch(model, trajs=val_batch, lengths=val_lengths,
                                               subtask_lengths=val_subtask_lengths,
                                               decode=False, dropout_p=dropout_p)
            val_metrics[sketch_length].append(batch_res)

        # parsing
        val_iter = dataloader.val_iter(batch_size=FLAGS.eval_episodes, env_names=[sketch_length])
        val_batch, val_lengths, val_subtask_lengths = val_iter.__next__()
        if FLAGS.cuda:
            val_batch.apply(lambda _t: _t.cuda())
            val_lengths = val_lengths.cuda()
            val_subtask_lengths = val_subtask_lengths.cuda()
        with torch.no_grad():
            parsing_res, parsing_info = teacherforce_batch(model, trajs=val_batch, lengths=val_lengths,
                                                           subtask_lengths=val_subtask_lengths,
                                                           dropout_p=dropout_p, decode=True)
        val_metrics[sketch_length].append(parsing_res)

        # Print parsing info
        parsing_lines = ["Parsing...",
                         'tru_boundaries' + str(point_of_change(parsing_info['tru'])),
                         'dec_boundaries' + str(point_of_change(parsing_info['dec']))]
        logging.info('\n'.join(parsing_lines))

        # Free Run
        #free_run_metrics = DictList()
        #for _ in range(FLAGS.eval_episodes):
        #    with torch.no_grad():
        #         metric = evaluate_on_env(modular_p=model, sketch_length=sketch_length,
        #                                  max_steps_per_sketch=FLAGS.max_steps_per_sketch)
        #    free_run_metrics.append(metric)
        #val_metrics[sketch_length].update(free_run_metrics)

        # Mean everything
        val_metrics[sketch_length].apply(lambda _t: torch.tensor(_t).float().mean())
    return val_metrics
Exemplo n.º 9
0
 def forward(self, inputs, dropout_rate=0.):
     actor_output = inputs
     stop_output = inputs
     for i in range(self.depth):
         actor_output = self.relu(self.actor_layers[i](actor_output))
         stop_output = self.relu(self.stop_layers[i](stop_output))
         stop_output = nn.functional.dropout(stop_output,
                                             p=dropout_rate,
                                             training=self.training)
     action_mean = self.actor_layers[-1](actor_output)
     stop_logits = self.stop_layers[-1](stop_output)
     action_dist = get_action_dist(mean=action_mean)
     stop_dist = torch.distributions.Categorical(logits=stop_logits)
     return DictList({'action_dist': action_dist, 'stop_dist': stop_dist})
Exemplo n.º 10
0
 def forward(self, obs, sketchs, sketch_lengths, mems=None) -> DictList:
     """
     :param obss: [bsz, obs_size]
     :param mems: [bsz, mem_size]
     :return:
     """
     inputs = self.layernorm(self.encode_obs(obs))
     lstm_mems = self._unflat_mem(mems)
     outputs, next_lstm_mems = self.lstm(inputs.unsqueeze(1), lstm_mems)
     next_mems = self._flat_mem(next_lstm_mems)
     outputs = outputs.squeeze(1)
     outputs = torch.cat(
         [outputs, self.env_emb(sketchs, sketch_lengths), inputs], dim=-1)
     mean = self.actor(outputs)
     results = {'mems': next_mems, 'dist': get_action_dist(mean)}
     return DictList(results)
Exemplo n.º 11
0
def taco_eval(bot, args):
    parsing_metric = {}
    dataloader = Dataloader(args.sketch_lengths, 0.99)
    bot.eval()
    for sketch_len in dataloader.env_names:
        parsing_metric[sketch_len] = DictList()
        data_iter = dataloader.val_iter(args.episodes,
                                        shuffle=True,
                                        env_names=[sketch_len])
        batch, batch_lens, batch_sketch_lens = data_iter.__next__()
        with torch.no_grad():
            parsing_res, _ = taco_decode(bot,
                                         trajs=batch,
                                         lengths=batch_lens,
                                         subtask_lengths=batch_sketch_lens,
                                         dropout_p=0.,
                                         decode=True)
        parsing_metric[sketch_len].append(parsing_res)
        parsing_metric[sketch_len].apply(lambda _t: _t[0].item())
    return parsing_metric
Exemplo n.º 12
0
def evaluate_on_env(modular_p: ModularPolicy,
                    sketch_length,
                    max_steps_per_sketch,
                    use_sketch_id=False):
    start = time.time()
    env = gym.make('jacopinpad-v0',
                   sketch_length=sketch_length,
                   max_steps_per_sketch=max_steps_per_sketch)
    device = next(modular_p.parameters()).device
    modular_p.eval()
    obs = DictList(env.reset())
    modular_p.reset(subtasks=obs.sketch)
    obs.apply(lambda _t: torch.tensor(_t, device=device).float())
    done = False
    traj = DictList()
    try:
        while not done:
            if not use_sketch_id:
                action = modular_p.get_action(obs.state.unsqueeze(0))
            else:
                action = modular_p.get_action(obs.state.unsqueeze(0),
                                              sketch_idx=int(
                                                  obs.sketch_idx.item()))
            if action is not None:
                next_obs, reward, done, _ = env.step(action.cpu().numpy()[0])
                transition = {
                    'reward': reward,
                    'action': action,
                    'features': obs.state
                }
                traj.append(transition)

                obs = DictList(next_obs)
                obs.apply(lambda _t: torch.tensor(_t, device=device).float())
            else:
                done = True
    except MujocoException:
        pass
    end = time.time()
    if 'reward' in traj:
        return {
            'succs': np.sum(traj.reward),
            'episode_length': len(traj.reward),
            'ret': sum(env.local_score),
            'runtime': end - start
        }
    else:
        return {
            'succs': 0,
            'episode_length': 0,
            'ret': 0,
            'runtime': end - start
        }
Exemplo n.º 13
0
def teacherforce_batch(modular_p: ModularPolicy,
                       trajs: DictList,
                       lengths,
                       subtask_lengths,
                       dropout_p,
                       decode=False):
    """ Return log probs of a trajectory """
    dropout_p = 0. if decode else dropout_p
    unique_tasks = set()
    for subtask in trajs.tasks:
        for task_id in subtask:
            task_id = task_id.item()
            if not task_id in unique_tasks:
                unique_tasks.add(task_id)
    unique_tasks = list(unique_tasks)

    # Forward for all unique task
    # task_results [bsz, length, all_tasks]
    states = trajs.states.float()
    targets = trajs.actions.float()
    all_task_results = DictList()
    for task in unique_tasks:
        all_task_results.append(
            modular_p.forward(task, states, targets, dropout_p=dropout_p))
    all_task_results.apply(lambda _t: torch.stack(_t, dim=2))

    # pad subtasks
    subtasks = trajs.tasks

    # results [bsz, len, nb_tasks]
    results = DictList()
    for batch_id, subtask in enumerate(subtasks):
        curr_result = DictList()
        for task in subtask:
            task_id = unique_tasks.index(task)
            curr_result.append(all_task_results[batch_id, :, task_id])

        # [len, tasks]
        curr_result.apply(lambda _t: torch.stack(_t, dim=1))
        results.append(curr_result)
    results.apply(lambda _t: torch.stack(_t, dim=0))

    # Training
    if not decode:
        log_alphas = tac_forward_log(action_logprobs=results.action_logprobs,
                                     stop_logprobs=results.stop_logprobs,
                                     lengths=lengths,
                                     subtask_lengths=subtask_lengths)
        seq_logprobs = log_alphas[
            torch.arange(log_alphas.shape[0], device=log_alphas.device),
            lengths - 1, subtask_lengths - 1]
        avg_logprobs = seq_logprobs.sum() / lengths.sum()
        return {'loss': -avg_logprobs}

    # Decode
    else:
        alphas, _ = tac_forward(action_logprobs=results.action_logprobs,
                                stop_logprobs=results.stop_logprobs,
                                lengths=lengths,
                                subtask_lengths=subtask_lengths)
        decoded = alphas.argmax(-1)
        batch_ids = torch.arange(decoded.shape[0],
                                 device=decoded.device).unsqueeze(-1).repeat(
                                     1, decoded.shape[1])
        decoded_subtasks = subtasks[batch_ids, decoded]
        total_task_corrects = 0
        for idx, (subtask, decoded_subtask, action, length, gt) in enumerate(
                zip(subtasks, decoded_subtasks, trajs.actions, lengths,
                    trajs.gt_onsets)):
            _decoded_subtask = decoded_subtask[:length]
            _action = action[:length]
            gt = gt[:length]
            total_task_corrects += (gt == _decoded_subtask).float().sum()
        return {
            'task_acc': total_task_corrects / lengths.sum()
        }, {
            'tru': gt,
            'act': _action,
            'dec': _decoded_subtask
        }
Exemplo n.º 14
0
def main(training_folder):
    logging.info('start taco...')
    dataloader = Dataloader(FLAGS.sketch_lengths, 0.2)
    model = ModularPolicy(nb_subtasks=10, input_dim=39,
                          n_actions=9,
                          a_mu=dataloader.a_mu,
                          a_std=dataloader.a_std,
                          s_mu=dataloader.s_mu,
                          s_std=dataloader.s_std)
    if FLAGS.cuda:
        model = model.cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=FLAGS.taco_lr)

    train_steps = 0
    writer = SummaryWriter(training_folder)
    train_iter = dataloader.train_iter(batch_size=FLAGS.taco_batch_size)
    nb_frames = 0
    curr_best = np.inf
    train_stats = DictList()

    # test dataloader
    test_sketch_lengths = set(FLAGS.test_sketch_lengths) - set(FLAGS.sketch_lengths)
    test_dataloader = None if len(test_sketch_lengths) == 0 else Dataloader(test_sketch_lengths, FLAGS.il_val_ratio)
    scheduler = DropoutScheduler()
    while True:
        if train_steps > FLAGS.taco_train_steps:
            logging.info('Reaching maximum steps')
            break

        if train_steps % FLAGS.taco_eval_freq == 0:
            val_metrics = evaluate_loop(dataloader, model, dropout_p=scheduler.dropout_p)
            logging_metrics(nb_frames, train_steps, val_metrics, writer, 'val')

            if test_dataloader is not None:
                test_metrics = evaluate_loop(test_dataloader, model, dropout_p=scheduler.dropout_p)
                logging_metrics(nb_frames, train_steps, test_metrics, writer, 'test')

            avg_loss = [val_metrics[env_name].loss for env_name in val_metrics]
            avg_loss = np.mean(avg_loss)
            if avg_loss < curr_best:
                curr_best = avg_loss
                logging.info('Save Best with loss: {}'.format(avg_loss))
                # Save the checkpoint
                with open(os.path.join(training_folder, 'bot_best.pkl'), 'wb') as f:
                    torch.save(model, f)

        model.train()
        train_batch, train_lengths, train_subtask_lengths = train_iter.__next__()
        if FLAGS.cuda:
            train_batch.apply(lambda _t: _t.cuda())
            train_lengths = train_lengths.cuda()
            train_subtask_lengths = train_subtask_lengths.cuda()
        start = time.time()
        train_outputs = teacherforce_batch(modular_p=model,
                                           trajs=train_batch,
                                           lengths=train_lengths,
                                           subtask_lengths=train_subtask_lengths,
                                           decode=False,
                                           dropout_p=scheduler.dropout_p)
        optimizer.zero_grad()
        train_outputs['loss'].backward()
        optimizer.step()
        train_steps += 1
        scheduler.step()
        nb_frames += train_lengths.sum().item()
        end = time.time()
        fps = train_lengths.sum().item() / (end - start)
        train_outputs['fps'] = torch.tensor(fps)

        train_outputs = DictList(train_outputs)
        train_outputs.apply(lambda _t: _t.item())
        train_stats.append(train_outputs)

        if train_steps % FLAGS.taco_eval_freq == 0:
            train_stats.apply(lambda _tensors: np.mean(_tensors))
            logger_str = ['[TRAIN] steps={}'.format(train_steps)]
            for k, v in train_stats.items():
                logger_str.append("{}: {:.4f}".format(k, v))
                writer.add_scalar('train/' + k, v, global_step=nb_frames)
            logging.info('\t'.join(logger_str))
            train_stats = DictList()
            writer.flush()
Exemplo n.º 15
0
def main(training_folder):
    logging.info('Start compile...')
    dataloader = Dataloader(FLAGS.sketch_lengths, 0.2)
    model = compile.CompILE(vec_size=39,
                            hidden_size=FLAGS.hidden_size,
                            action_size=9,
                            env_arch=FLAGS.env_arch,
                            max_num_segments=FLAGS.compile_max_segs,
                            latent_dist=FLAGS.compile_latent,
                            beta_b=FLAGS.compile_beta_b,
                            beta_z=FLAGS.compile_beta_z,
                            prior_rate=FLAGS.compile_prior_rate,
                            dataloader=dataloader)
    if FLAGS.cuda:
        model = model.cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=FLAGS.compile_lr)

    train_steps = 0
    writer = SummaryWriter(training_folder)
    train_iter = dataloader.train_iter(batch_size=FLAGS.compile_batch_size)
    nb_frames = 0
    curr_best = np.inf
    train_stats = DictList()
    while True:
        if train_steps > FLAGS.compile_train_steps:
            logging.info('Reaching maximum steps')
            break

        if train_steps % FLAGS.compile_eval_freq == 0:
            # Testing
            val_metrics = {}
            model.eval()
            for env_name in FLAGS.sketch_lengths:
                val_metrics[env_name] = DictList()
                val_iter = dataloader.val_iter(
                    batch_size=FLAGS.compile_batch_size, env_names=[env_name])
                for val_batch, val_lengths, val_sketch_lens in val_iter:
                    if FLAGS.cuda:
                        val_batch.apply(lambda _t: _t.cuda())
                        val_lengths = val_lengths.cuda()
                        val_sketch_lens = val_sketch_lens.cuda()
                    with torch.no_grad():
                        val_outputs, extra_info = model.forward(
                            val_batch, val_lengths, val_sketch_lens)
                    val_metrics[env_name].append(val_outputs)

                # Parsing
                total_lengths = 0
                total_task_corrects = 0
                val_iter = dataloader.val_iter(batch_size=FLAGS.eval_episodes,
                                               env_names=[env_name],
                                               shuffle=True)
                val_batch, val_lengths, val_sketch_lens = val_iter.__next__()
                if FLAGS.cuda:
                    val_batch.apply(lambda _t: _t.cuda())
                    val_lengths = val_lengths.cuda()
                    val_sketch_lens = val_sketch_lens.cuda()
                with torch.no_grad():
                    val_outputs, extra_info = model.forward(
                        val_batch, val_lengths, val_sketch_lens)
                seg = torch.stack(extra_info['segment'], dim=1).argmax(-1)
                for batch_id, (length, sketch_length, _seg) in enumerate(
                        zip(val_lengths, val_sketch_lens, seg)):
                    traj = val_batch[batch_id]
                    traj = traj[:length]
                    _gt_subtask = traj.gt_onsets
                    target = point_of_change(_gt_subtask)
                    _seg = _seg[_seg.sort()[1]].cpu().tolist()

                    # Remove the last one because too trivial
                    val_metrics[env_name].append({
                        'f1_tol0': f1(target, _seg, 0),
                        'f1_tol1': f1(target, _seg, 1),
                        'f1_tol2': f1(target, _seg, 2)
                    })

                    # subtask
                    total_lengths += length.item()
                    _decoded_subtask = get_subtask_seq(
                        length.item(),
                        subtask=traj.tasks.tolist(),
                        use_ids=np.array(_seg))
                    total_task_corrects += (_gt_subtask.cpu(
                    ) == _decoded_subtask.cpu()).float().sum()

                # record task acc
                val_metrics[
                    env_name].task_acc = total_task_corrects / total_lengths

                # Print parsing result
                lines = []
                lines.append('tru_ids: {}'.format(target))
                lines.append('dec_ids: {}'.format(_seg))
                logging.info('\n'.join(lines))
                val_metrics[env_name].apply(
                    lambda _t: torch.tensor(_t).float().mean().item())

            # Logger
            for env_name, metric in val_metrics.items():
                line = ['[VALID][{}] steps={}'.format(env_name, train_steps)]
                for k, v in metric.items():
                    line.append('{}: {:.4f}'.format(k, v))
                logging.info('\t'.join(line))

            mean_val_metric = DictList()
            for metric in val_metrics.values():
                mean_val_metric.append(metric)
            mean_val_metric.apply(lambda t: torch.mean(torch.tensor(t)))
            for k, v in mean_val_metric.items():
                writer.add_scalar('val/' + k, v.item(), nb_frames)
            writer.flush()

            avg_loss = [val_metrics[env_name].loss for env_name in val_metrics]
            avg_loss = np.mean(avg_loss)
            if avg_loss < curr_best:
                curr_best = avg_loss
                logging.info('Save Best with loss: {}'.format(avg_loss))
                # Save the checkpoint
                with open(os.path.join(training_folder, 'bot_best.pkl'),
                          'wb') as f:
                    torch.save(model, f)

        model.train()
        train_batch, train_lengths, train_sketch_lens = train_iter.__next__()
        if FLAGS.cuda:
            train_batch.apply(lambda _t: _t.cuda())
            train_lengths = train_lengths.cuda()
            train_sketch_lens = train_sketch_lens.cuda()
        train_outputs, _ = model.forward(train_batch, train_lengths,
                                         train_sketch_lens)

        optimizer.zero_grad()
        train_outputs['loss'].backward()
        optimizer.step()
        train_steps += 1
        nb_frames += train_lengths.sum().item()

        train_outputs = DictList(train_outputs)
        train_outputs.apply(lambda _t: _t.item())
        train_stats.append(train_outputs)

        if train_steps % FLAGS.compile_eval_freq == 0:
            train_stats.apply(lambda _tensors: np.mean(_tensors))
            logger_str = ['[TRAIN] steps={}'.format(train_steps)]
            for k, v in train_stats.items():
                logger_str.append("{}: {:.4f}".format(k, v))
                writer.add_scalar('train/' + k, v, global_step=nb_frames)
            logging.info('\t'.join(logger_str))
            train_stats = DictList()
            writer.flush()
Exemplo n.º 16
0
def visualize(args):
    os.makedirs(args.outdir, exist_ok=True)
    bot = torch.load(args.model_ckpt, map_location=torch.device('cpu'))
    bot.eval()
    sketch_length = int(args.sketch_lengths[0])
    model_name = os.path.dirname(os.path.abspath(
        args.model_ckpt)).split('/')[-1]
    for episode_id in range(args.episodes):
        demo_traj = jacopinpad.collect_data(1,
                                            len_sketch=sketch_length,
                                            img_collect=True,
                                            permute=False,
                                            use_dart=True)

        # Teacher forcing
        batch = DictList({
            k: demo_traj[k]
            for k in ['states', 'actions', 'gt_onsets', 'tasks']
        })
        batch.apply(lambda _t: torch.tensor(_t))
        batch_lengths = torch.tensor([len(batch.states[0])])
        batch_sketch_lens = torch.tensor([sketch_length])
        with torch.no_grad():
            _, extra_info = bot.teacherforcing_batch(batch,
                                                     batch_lengths,
                                                     batch_sketch_lens,
                                                     recurrence=64)

        # Get prediction sorted
        ps = extra_info.p[0]
        ps[0, :-1] = 0
        ps[0, -1] = 1
        p_vals = torch.arange(bot.nb_slots + 1, device=ps.device).flip(0)
        avg_p = (p_vals * ps).sum(-1)
        avg_p = avg_p / (avg_p.max() - avg_p.min())
        p_avg_fig, p_avg_ax = plt.subplots()
        p_avg_ax.plot(avg_p, '--X')
        automatic_results = automatic_get_boundaries_peak(ps,
                                                          bot.nb_slots,
                                                          sketch_length,
                                                          with_details=True)
        final_thres, = p_avg_ax.plot([automatic_results['final_thres']] *
                                     len(avg_p), '--r')
        upper_thres, = p_avg_ax.plot([automatic_results['upper_thres']] *
                                     len(avg_p), '--y')
        lower_thres, = p_avg_ax.plot([automatic_results['lower_thres']] *
                                     len(avg_p), '--b')
        p_avg_ax.legend([final_thres, upper_thres, lower_thres],
                        ['final', 'upper', 'lower'],
                        fontsize=13,
                        loc='upper left')

        preds = [0] + automatic_results['final_res']
        for idx, pred in enumerate(preds):
            img = demo_traj['images'][0][pred]
            fig, ax = plt.subplots()
            ax.imshow(img)
            ax.tick_params(axis='x',
                           which='both',
                           bottom=False,
                           top=False,
                           labelbottom=False)
            ax.tick_params(axis='y',
                           which='both',
                           left=False,
                           right=False,
                           labelleft=False)
            fig.savefig(os.path.join(
                args.outdir,
                model_name + '_subtask_{}_{}.png'.format(episode_id, idx)),
                        bbox_inches='tight')
            p_avg_ax.plot([pred], [avg_p[pred]], 'r.', markersize=15)

        p_avg_fig.savefig(os.path.join(
            args.outdir, model_name + '_p_avg_{}.png'.format(episode_id)),
                          bbox_inches='tight')
Exemplo n.º 17
0
def video_and_gif(args):
    os.makedirs(args.outdir, exist_ok=True)
    bot = torch.load(args.model_ckpt, map_location=torch.device('cpu'))
    bot.eval()
    sketch_length = int(args.sketch_lengths[0])
    model_name = os.path.dirname(os.path.abspath(
        args.model_ckpt)).split('/')[-1]
    for episode_id in range(args.episodes):
        demo_traj = jacopinpad.collect_data(1,
                                            len_sketch=sketch_length,
                                            img_collect=True,
                                            permute=False,
                                            use_dart=True)

        # Teacher forcing
        batch = DictList({
            k: demo_traj[k]
            for k in ['states', 'actions', 'gt_onsets', 'tasks']
        })
        batch.apply(lambda _t: torch.tensor(_t))
        batch_lengths = torch.tensor([len(batch.states[0])])
        batch_sketch_lens = torch.tensor([sketch_length])
        with torch.no_grad():
            _, extra_info = bot.teacherforcing_batch(batch,
                                                     batch_lengths,
                                                     batch_sketch_lens,
                                                     recurrence=64)

        traj = batch[0]
        gt_subtask = traj.gt_onsets
        tasks = gt_subtask[point_of_change(gt_subtask)]

        # Get prediction sorted
        ps = extra_info.p[0]
        ps[0, :-1] = 0
        ps[0, -1] = 1
        p_vals = torch.arange(bot.nb_slots + 1, device=ps.device).flip(0)
        avg_p = (p_vals * ps).sum(-1)
        avg_p = avg_p / (avg_p.max() - avg_p.min())
        automatic_results = automatic_get_boundaries_peak(ps,
                                                          bot.nb_slots,
                                                          sketch_length,
                                                          with_details=True)
        preds = automatic_results['final_res']

        def get_p_avg_img(time):
            p_avg_fig, p_avg_ax = plt.subplots()
            plt.tight_layout()
            width, height = p_avg_fig.get_size_inches() * p_avg_fig.get_dpi()
            p_avg_ax.plot(avg_p, '--X')
            p_avg_ax.plot([automatic_results['final_thres']] * len(avg_p),
                          'b',
                          label='final threshold')
            p_avg_ax.plot([time, time], [0, 1], 'r')
            p_avg_ax.legend(fontsize=15, loc='lower left')
            for idx, pred in enumerate(preds):
                p_avg_ax.plot([pred], [avg_p[pred]], 'rX')

            canvas = FigureCanvas(p_avg_fig)
            canvas.draw()  # draw the canvas, cache the renderer
            np_image = np.frombuffer(canvas.tostring_rgb(),
                                     dtype='uint8').reshape(
                                         int(height), int(width), 3)
            plt.close(p_avg_fig)
            return np_image

        print()
        sketch_id = 0
        vid_frames = []
        for t, raw_img in tqdm(enumerate(demo_traj['images'][0])):
            img = Image.fromarray(raw_img)
            p_avg_img = Image.fromarray(get_p_avg_img(t))
            task = tasks[sketch_id]
            draw = ImageDraw.Draw(img)
            fonts_path = os.path.join(os.path.dirname(__file__), 'fonts')
            font = ImageFont.truetype(
                os.path.join(fonts_path, 'sans_serif.ttf'), 50)
            draw.text((20, 0), "Press {}".format(task), (0, 0, 0), font=font)

            ratio = p_avg_img.height / img.height
            new_height = p_avg_img.height
            new_width = int(img.width * ratio)
            img = img.resize((new_width, new_height))
            final = get_concat_h(p_avg_img, img)
            vid_frames.append(final)
            if t in preds:
                # Do extra 1 second frames
                for _ in range(2 * FPS):
                    vid_frames.append(final)
                sketch_id += 1

        # Produce video
        print('Producing videos...')
        videodims = (vid_frames[0].width, vid_frames[0].height)
        video = cv2.VideoWriter(
            os.path.join(args.outdir,
                         model_name + "_{}.mp4".format(episode_id)),
            0x7634706d, FPS, videodims)
        for frame in vid_frames:
            video.write(cv2.cvtColor(np.array(frame), cv2.COLOR_RGB2BGR))
        video.release()

        # Produce Gif
        print('Producing GIF...')
        gif_path = os.path.join(args.outdir,
                                model_name + "_{}.gif".format(episode_id))
        with imageio.get_writer(gif_path, mode='I', duration=0.1) as writer:
            for frame in tqdm(vid_frames[::5]):
                writer.append_data(np.array(frame))
Exemplo n.º 18
0
    def update_parameters(self, exps):
        # Compute starting indexes

        #with torch.autograd.set_detect_anomaly(True):

        inds = self._get_starting_indexes()

        # Initialize update values
        update_value_loss = 0
        update_entropy = 0
        update_policy_loss = 0
        update_reconstruction_loss = 0
        update_reward_loss = 0
        update_sr_loss = 0
        update_norm_loss = 0
        update_actor_loss = 0
        update_feature_loss = 0
        update_A_loss = 0

        # Initialize memory

        if self.model.recurrent:
            memory = exps.memory[inds]

        for i in range(self.recurrence):
            # Create a sub-batch of experience

            sb = exps[inds + i]

            # Run model
            #if self.model.feature_learn=="curiosity":
            if self.model.recurrent:
                _, _, _, predictions, _, _, _ = self.model(
                    sb[:-1].obs, sb[:-1].action, sb[1:].obs,
                    memory[:-1, :] * sb.mask[:-1])
            else:
                _, _, _, predictions, _, _ = self.model(
                    sb[:-1].obs, sb[:-1].action, sb[1:].obs)
            # else:
            #     if self.model.recurrent:
            #         _, _, _, predictions, _, _, _ = self.model(sb.obs, sb.action, sb.obs, memory * sb.mask)
            #     else:
            #         _, _, _, predictions, _, _ = self.model(sb.obs,sb.action,sb.obs)
            if self.model.recurrent:
                dist, value, embedding, _, successor, reward, memory = self.model(
                    sb.obs, memory=memory * sb.mask)
            else:
                dist, value, embedding, _, successor, reward = self.model(
                    sb.obs)

            # Compute loss

            # Feature loss
            if self.feature_learn == "reconstruction":
                reconstruction_loss = F.mse_loss(predictions, sb.obs.image)
            elif self.feature_learn == "curiosity":
                next_embedding, next_obs_pred, action_pred = predictions
                forward_loss = F.mse_loss(next_obs_pred, next_embedding)
                inverse_loss = F.nll_loss(
                    action_pred,
                    sb[:-1].action.long())  # mse if continuous action
                reconstruction_loss = forward_loss + inverse_loss

            norm_loss = (torch.norm(embedding, dim=1) - 1).pow(2).mean()
            feature_loss = reconstruction_loss + self.norm_loss_coef * norm_loss
            #reward_loss = F.mse_loss(reward, sb.reward )
            sr_loss = F.smooth_l1_loss(
                successor,
                sb.successorn)  #F.mse_loss(successor, sb.successorn)
            entropy = dist.entropy().mean()

            with torch.no_grad():
                SR_advanage_dot_R = self.target.reward(
                    sb.SR_advantage).reshape(-1)  #modle or target
                value_loss = (value - sb.returnn).pow(
                    2).mean()  # not used for optimization, just for logs

            A_diff = F.mse_loss(SR_advanage_dot_R, sb.V_advantage)
            #if self.num_updates < -1:
            #    policy_loss = -(dist.log_prob(sb.action) * sb.V_advantage).mean()
            #else:
            policy_loss = -(dist.log_prob(sb.action) *
                            SR_advanage_dot_R).mean()
            actor_loss = policy_loss - self.entropy_coef * entropy

            # Update batch values
            update_entropy += entropy.item()
            update_policy_loss += policy_loss.item()
            update_reconstruction_loss += reconstruction_loss.item()
            #update_reward_loss = update_reward_loss + reward_loss
            update_norm_loss += norm_loss.item()
            update_sr_loss = update_sr_loss + sr_loss
            update_actor_loss = update_actor_loss + actor_loss
            update_feature_loss = update_feature_loss + feature_loss
            update_value_loss += value_loss.item()
            update_A_loss += A_diff.item()

        # Update update values
        update_entropy /= self.recurrence
        update_value_loss /= self.recurrence
        update_policy_loss /= self.recurrence
        update_reconstruction_loss /= self.recurrence
        update_norm_loss /= self.recurrence
        #update_reward_loss = update_reward_loss/self.recurrence
        update_sr_loss = update_sr_loss / self.recurrence
        update_actor_loss = update_actor_loss / self.recurrence
        update_feature_loss = update_feature_loss / self.recurrence
        update_A_loss /= self.recurrence

        # Update actor-critic

        self.model.zero_grad()
        update_sr_loss.backward(retain_graph=True)
        update_grad_norm_sr = sum(
            p.grad.data.norm(2)**2 for p in self.model.SR.parameters())**0.5
        torch.nn.utils.clip_grad_norm_(self.model.SR.parameters(),
                                       self.max_grad_norm)
        self.sr_optimizer.step()

        # reward leanring: not on policy so do random samples
        transitions = self.replay_memory.sample(
            np.min([self.batch_size,
                    self.replay_memory.__len__()]))
        batch_state_t, batch_reward = zip(*transitions)
        batch_state = DictList()
        batch_state.image = torch.cat(batch_state_t)
        batch_reward = torch.cat(batch_reward)
        if self.model.recurrent:
            _, _, _, _, _, reward, _ = self.model(
                batch_state)  # issue with memory here
        else:
            _, _, _, _, _, reward = self.model(batch_state)
        update_reward_loss = F.smooth_l1_loss(reward, batch_reward.squeeze())

        self.model.zero_grad()
        update_reward_loss.backward(retain_graph=True)
        update_grad_norm_reward = sum(
            p.grad.data.norm(2)**2
            for p in self.model.reward.parameters())**0.5
        torch.nn.utils.clip_grad_norm_(self.model.reward.parameters(),
                                       self.max_grad_norm)
        self.reward_optimizer.step()

        # self.model.zero_grad()
        # update_actor_loss.backward(retain_graph=True)
        # update_grad_norm_actor = sum(p.grad.data.norm(2) ** 2 for p in self.model.actor.parameters()) ** 0.5
        # torch.nn.utils.clip_grad_norm_(self.model.actor.parameters(), self.max_grad_norm)
        # self.actor_optimizer.step()

        self.model.zero_grad()
        update_loss = self.recon_loss_coef * update_feature_loss + update_actor_loss
        update_loss.backward(retain_graph=False)
        torch.nn.utils.clip_grad_norm_(self.model.feature_in.parameters(),
                                       self.max_grad_norm)
        torch.nn.utils.clip_grad_norm_(self.model.feature_out.parameters(),
                                       self.max_grad_norm)
        self.feature_optimizer.step()

        update_grad_norm = np.max(
            [update_grad_norm_reward.item(),
             update_grad_norm_sr.item()])  #update_grad_norm_sr.item()

        # Log some values

        logs = {
            "reconstruction_loss": update_reconstruction_loss,
            "reward_loss": update_reward_loss.item(),
            "sr_loss": update_sr_loss.item(),
            "norm_loss": update_norm_loss,
            "entropy": update_entropy,
            "value_loss": update_value_loss,
            "policy_loss": update_policy_loss,
            "grad_norm": update_grad_norm,
            "A_mse": update_A_loss
        }

        self.num_updates += 1

        return logs
Exemplo n.º 19
0
def batch_evaluate(envs: List, bot: ModelBot, cuda, verbose=False) -> List:
    """ Return trajectories after roll out """
    obs = DictList()
    for env in envs:
        obs.append(DictList(env.reset()))
    obs.apply(lambda _t: torch.tensor(_t).float())
    actives = torch.tensor([i for i in range(len(envs))])
    if cuda:
        obs.apply(lambda _t: _t.cuda())
        actives = actives.cuda()

    trajs = [DictList() for _ in range(len(envs))]
    sketchs = obs.sketch.long()[0]
    sketch_lengths = torch.tensor(sketchs.shape, device=sketchs.device)
    mems = bot.init_memory(
        sketchs.unsqueeze(0).repeat(len(actives), 1),
        sketch_lengths.repeat(len(actives))) if bot.is_recurrent else None

    # Continue roll out while at least one active
    steps = 0
    while len(actives) > 0:
        if verbose:
            print('active env:', len(actives))
        active_trajs = [trajs[i] for i in actives]
        with torch.no_grad():
            model_outputs = bot.get_action(
                obs.state,
                sketchs.unsqueeze(0).repeat(len(actives), 1),
                sketch_lengths.repeat(len(actives)), mems)
        actions = model_outputs.actions
        next_obs, rewards, dones = step_batch_envs(envs, actions, actives,
                                                   cuda)
        transition = DictList({'rewards': rewards})
        transition.update(obs)

        for idx, active_traj in enumerate(active_trajs):
            active_traj.append(transition[idx])
        steps += 1

        # Memory
        next_mems = None
        if bot.is_recurrent:
            next_mems = model_outputs.mems

        # For next step
        un_done_ids = (~dones).nonzero().squeeze(-1)
        obs = next_obs[un_done_ids]
        actives = actives[un_done_ids]
        mems = next_mems[un_done_ids] if next_mems is not None else None

    metric = DictList()
    for traj, env in zip(trajs, envs):
        traj.apply(lambda _tensors: torch.stack(_tensors))
        metric.append({
            'ret': sum(env.local_score),
            'succs': traj.rewards.sum().item(),
            'length': len(traj.rewards)
        })
    metric.apply(lambda _t: np.mean(_t))
    return metric
Exemplo n.º 20
0
def run_batch(batch: DictList,
              batch_lengths,
              sketch_lengths,
              bot: ModelBot,
              mode='train') \
        -> (DictList, torch.Tensor):
    """
    :param batch: DictList object [bsz, seqlen]
    :param bot:  A model Bot
    :param mode: 'train' or 'eval'
    :return:
        stats: A DictList of bsz, mem_size
    """
    bsz, seqlen = batch.actions.shape[0], batch.actions.shape[1]
    sketchs = batch.tasks
    final_outputs = DictList({})
    mems = None
    if bot.is_recurrent:
        mems = bot.init_memory(sketchs, sketch_lengths)

    for t in range(seqlen):
        final_output = DictList({})
        model_output = bot.forward(batch.states[:, t], sketchs, sketch_lengths,
                                   mems)
        logprobs = model_output.dist.log_prob(batch.actions[:, t].float())
        if 'log_end' in model_output:
            # p_end + (1 - pend) action_prob
            log_no_end_term = model_output.log_no_end + logprobs
            logprobs = torch.logsumexp(torch.stack(
                [model_output.log_end, log_no_end_term], dim=-1),
                                       dim=-1)
            final_output.log_end = model_output.log_end
        final_output.logprobs = logprobs
        final_outputs.append(final_output)

        # Update memory
        next_mems = None
        if bot.is_recurrent:
            next_mems = model_output.mems
            if (t + 1) % FLAGS.il_recurrence == 0 and mode == 'train':
                next_mems = next_mems.detach()
        mems = next_mems

    # Stack on time dim
    final_outputs.apply(lambda _tensors: torch.stack(_tensors, dim=1))
    sequence_mask = torch.arange(
        batch_lengths.max().item(),
        device=batch_lengths.device)[None, :] < batch_lengths[:, None]
    final_outputs.loss = -final_outputs.logprobs
    if 'log_end' in final_outputs:
        batch_ids = torch.arange(bsz, device=batch.states.device)
        final_outputs.loss[batch_ids, batch_lengths -
                           1] = final_outputs.log_end[batch_ids,
                                                      batch_lengths - 1]
    final_outputs.apply(lambda _t: _t.masked_fill(~sequence_mask, 0.))
    return final_outputs
Exemplo n.º 21
0
def main_loop(bot, dataloader, opt, training_folder, test_dataloader=None):
    # Prepare
    train_steps = 0
    writer = SummaryWriter(training_folder)
    train_iter = dataloader.train_iter(batch_size=FLAGS.il_batch_size)
    nb_frames = 0
    train_stats = DictList()
    curr_best = 100000
    while True:
        if train_steps > FLAGS.il_train_steps:
            logging.info('Reaching maximum steps')
            break

        if train_steps % FLAGS.il_save_freq == 0:
            with open(
                    os.path.join(training_folder,
                                 'bot{}.pkl'.format(train_steps)), 'wb') as f:
                torch.save(bot, f)

        if train_steps % FLAGS.il_eval_freq == 0:
            # testing on valid
            val_metrics = evaluate_on_envs(bot, dataloader)
            logging_metrics(nb_frames,
                            train_steps,
                            val_metrics,
                            writer,
                            prefix='val')

            # testing on test env
            if test_dataloader is not None:
                test_metrics = evaluate_on_envs(bot, test_dataloader)
                logging_metrics(nb_frames,
                                train_steps,
                                test_metrics,
                                writer,
                                prefix='test')

            avg_loss = [
                val_metrics[env_name]['loss'] for env_name in val_metrics
            ]
            avg_loss = np.mean(avg_loss)

            if avg_loss < curr_best:
                curr_best = avg_loss
                logging.info('Save Best with loss: {}'.format(avg_loss))

                # Save the checkpoint
                with open(os.path.join(training_folder, 'bot_best.pkl'),
                          'wb') as f:
                    torch.save(bot, f)

        # Forward/Backward
        bot.train()
        train_batch, train_lengths, train_sketch_lengths = train_iter.__next__(
        )
        if FLAGS.cuda:
            train_batch.apply(lambda _t: _t.cuda())
            train_lengths = train_lengths.cuda()
            train_sketch_lengths = train_sketch_lengths.cuda()

        start = time.time()
        #train_batch_res = run_batch(train_batch, train_lengths, train_sketch_lengths, bot)
        train_batch_res, _ = bot.teacherforcing_batch(
            train_batch,
            train_lengths,
            train_sketch_lengths,
            recurrence=FLAGS.il_recurrence)
        train_batch_res.apply(lambda _t: _t.sum() / train_lengths.sum())
        batch_time = time.time() - start
        loss = train_batch_res.loss
        opt.zero_grad()
        loss.backward()
        params = [p for p in bot.parameters() if p.requires_grad]
        grad_norm = torch.nn.utils.clip_grad_norm_(parameters=params,
                                                   max_norm=FLAGS.il_clip)
        opt.step()
        train_steps += 1
        nb_frames += train_lengths.sum().item()
        fps = train_lengths.sum().item() / batch_time

        stats = DictList()
        stats.grad_norm = grad_norm
        stats.loss = train_batch_res.loss.detach()
        stats.fps = torch.tensor(fps)
        train_stats.append(stats)

        if train_steps % FLAGS.il_eval_freq == 0:
            train_stats.apply(
                lambda _tensors: torch.stack(_tensors).mean().item())
            logger_str = ['[TRAIN] steps={}'.format(train_steps)]
            for k, v in train_stats.items():
                logger_str.append("{}: {:.4f}".format(k, v))
                writer.add_scalar('train/' + k, v, global_step=nb_frames)
            logging.info('\t'.join(logger_str))
            train_stats = DictList()
            writer.flush()
Exemplo n.º 22
0
 def get_action(self, inputs, mode='greedy'):
     dists = self.forward(inputs)
     action = dists.action_dist.sample()
     stop = dists.stop_dist.sample()
     return DictList({'action': action, 'stop': stop})
Exemplo n.º 23
0
    def teacherforcing_batch(self, batch: DictList, batch_lengths,
                             sketch_lengths,
                             recurrence) -> (DictList, DictList):
        """
        :param batch: DictList object [bsz, seqlen]
        :param batch_lengths: [bsz]
        :param sketch_lengths: [bsz]
        :param recurrence: an int
        :return:
            stats: A DictList of bsz, mem_size
            extra_info: A DictList of extra info
        """
        bsz, seqlen = batch.actions.shape[0], batch.actions.shape[1]
        sketchs = batch.tasks
        final_outputs = DictList({})
        extra_info = DictList({})
        mems = None
        if self.is_recurrent:
            mems = self.init_memory(sketchs, sketch_lengths)

        for t in range(seqlen):
            final_output = DictList({})
            model_output = self.forward(batch.states[:, t], sketchs,
                                        sketch_lengths, mems)
            logprobs = model_output.dist.log_prob(batch.actions[:, t].float())
            if 'log_end' in model_output:
                # p_end + (1 - pend) action_prob
                log_no_end_term = model_output.log_no_end + logprobs
                logprobs = torch.logsumexp(torch.stack(
                    [model_output.log_end, log_no_end_term], dim=-1),
                                           dim=-1)
                final_output.log_end = model_output.log_end
            final_output.logprobs = logprobs
            if 'p' in model_output:
                extra_info.append({'p': model_output.p})
            final_outputs.append(final_output)

            # Update memory
            next_mems = None
            if self.is_recurrent:
                next_mems = model_output.mems
                if (t + 1) % recurrence == 0:
                    next_mems = next_mems.detach()
            mems = next_mems

        # Stack on time dim
        final_outputs.apply(lambda _tensors: torch.stack(_tensors, dim=1))
        extra_info.apply(lambda _tensors: torch.stack(_tensors, dim=1))
        sequence_mask = torch.arange(
            batch_lengths.max().item(),
            device=batch_lengths.device)[None, :] < batch_lengths[:, None]
        final_outputs.loss = -final_outputs.logprobs
        if 'log_end' in final_outputs:
            batch_ids = torch.arange(bsz, device=batch.states.device)
            final_outputs.loss[batch_ids, batch_lengths -
                               1] = final_outputs.log_end[batch_ids,
                                                          batch_lengths - 1]
        final_outputs.apply(lambda _t: _t.masked_fill(~sequence_mask, 0.))
        return final_outputs, extra_info
Exemplo n.º 24
0
 def encode_obs(self, obs: DictList):
     return self.inp_enc(self.snorm.normalize(obs.float()))