示例#1
0
def generate_demos_cluster():
    demos_per_job = args.episodes // args.jobs
    demos_path = utils.get_demos_path(args.demos, args.env, 'agent')
    job_demo_names = [
        os.path.realpath(demos_path + '.shard{}'.format(i))
        for i in range(args.jobs)
    ]
    for demo_name in job_demo_names:
        job_demos_path = utils.get_demos_path(demo_name)
        if os.path.exists(job_demos_path):
            os.remove(job_demos_path)

    processes = []

    command = [args.job_script]
    command += sys.argv[1:]
    for i in range(args.jobs):
        cmd_i = list(
            map(
                str, command + ['--seed', args.seed + i] +
                ['--demos', job_demo_names[i]] +
                ['--episodes', demos_per_job] + ['--jobs', 0] +
                ['--valid-episodes', 0]))
        logger.info('LAUNCH COMMAND')
        logger.info(cmd_i)

        process = subprocess.Popen(cmd_i)
        processes += [process]

    for p in processes:
        p.wait()

    job_demos = [None] * args.jobs
    while True:
        jobs_done = 0
        for i in range(args.jobs):
            if job_demos[i] is None or len(job_demos[i]) < demos_per_job:
                try:
                    logger.info("Trying to load shard {}".format(i))
                    job_demos[i] = utils.load_demos(
                        utils.get_demos_path(job_demo_names[i]))
                    logger.info("{} demos ready in shard {}".format(
                        len(job_demos[i]), i))
                except Exception:
                    logger.exception("Failed to load the shard")
            if job_demos[i] and len(job_demos[i]) == demos_per_job:
                jobs_done += 1
        logger.info("{} out of {} shards done".format(jobs_done, args.jobs))
        if jobs_done == args.jobs:
            break
        logger.info("sleep for 60 seconds")
        time.sleep(60)

    # Training demos
    all_demos = []
    for demos in job_demos:
        all_demos.extend(demos)
    utils.save_demos(all_demos, demos_path)
示例#2
0
    def __init__(self, env):
        super().__init__()
        self.initUI()

        # By default, manual stepping only
        self.fpsLimit = 0

        self.env = env
        self.lastObs = None

        # Demonstrations
        self.demos_path = utils.get_demos_path(args.demos,
                                               args.env,
                                               origin="human",
                                               valid=False)
        self.demos = utils.load_demos(self.demos_path, raise_not_found=False)
        utils.synthesize_demos(self.demos)

        self.shift = len(self.demos) if args.shift is None else args.shift

        self.shiftEnv()

        # Pointing and naming data
        self.pointingData = []
示例#3
0
    def __init__(
        self,
        args,
    ):
        self.args = args

        utils.seed(self.args.seed)

        # args.env is a list when training on multiple environments
        if getattr(args, 'multi_env', None):
            self.env = [gym.make(item) for item in args.multi_env]

            self.train_demos = []
            for demos, episodes in zip(args.multi_demos, args.multi_episodes):
                demos_path = utils.get_demos_path(demos,
                                                  None,
                                                  None,
                                                  valid=False)
                logger.info('loading {} of {} demos'.format(episodes, demos))
                train_demos = utils.load_demos(demos_path)
                logger.info('loaded demos')
                if episodes > len(train_demos):
                    raise ValueError(
                        "there are only {} train demos in {}".format(
                            len(train_demos), demos))
                self.train_demos.extend(train_demos[:episodes])
                logger.info('So far, {} demos loaded'.format(
                    len(self.train_demos)))

            self.val_demos = []
            for demos, episodes in zip(args.multi_demos, [args.val_episodes] *
                                       len(args.multi_demos)):
                demos_path_valid = utils.get_demos_path(demos,
                                                        None,
                                                        None,
                                                        valid=True)
                logger.info('loading {} of {} valid demos'.format(
                    episodes, demos))
                valid_demos = utils.load_demos(demos_path_valid)
                logger.info('loaded demos')
                if episodes > len(valid_demos):
                    logger.info(
                        'Using all the available {} demos to evaluate valid. accuracy'
                        .format(len(valid_demos)))
                self.val_demos.extend(valid_demos[:episodes])
                logger.info('So far, {} valid demos loaded'.format(
                    len(self.val_demos)))

            logger.info('Loaded all demos')

            observation_space = self.env[0].observation_space
            action_space = self.env[0].action_space

        else:
            self.env = gym.make(self.args.env)

            demos_path = utils.get_demos_path(args.demos,
                                              args.env,
                                              args.demos_origin,
                                              valid=False)
            demos_path_valid = utils.get_demos_path(args.demos,
                                                    args.env,
                                                    args.demos_origin,
                                                    valid=True)
            print("else")
            logger.info('loading demos')
            self.train_demos = utils.load_demos(demos_path)
            print(len(self.train_demos))
            print(self.train_demos[0])
            logger.info('loaded demos')
            if args.episodes:
                if args.episodes > len(self.train_demos):
                    raise ValueError("there are only {} train demos".format(
                        len(self.train_demos)))
                self.train_demos = self.train_demos[:args.episodes]

            self.val_demos = utils.load_demos(demos_path_valid)
            if args.val_episodes > len(self.val_demos):
                logger.info(
                    'Using all the available {} demos to evaluate valid. accuracy'
                    .format(len(self.val_demos)))
            self.val_demos = self.val_demos[:self.args.val_episodes]

            observation_space = self.env.observation_space
            action_space = self.env.action_space

            print("else")
        print(args.model)
        self.obss_preprocessor = utils.ObssPreprocessor(
            args.model, observation_space,
            getattr(self.args, 'pretrained_model', None))

        # Define actor-critic model
        self.acmodel = utils.load_model(args.model, raise_not_found=False)
        if self.acmodel is None:
            if getattr(self.args, 'pretrained_model', None):
                self.acmodel = utils.load_model(args.pretrained_model,
                                                raise_not_found=True)
            else:
                self.acmodel = ACModel(self.obss_preprocessor.obs_space,
                                       action_space, args.image_dim,
                                       args.memory_dim, args.instr_dim,
                                       not self.args.no_instr,
                                       self.args.instr_arch,
                                       not self.args.no_mem, self.args.arch)
        self.obss_preprocessor.vocab.save()
        utils.save_model(self.acmodel, args.model)

        self.acmodel.train()
        if torch.cuda.is_available():
            self.acmodel.cuda()

        self.optimizer = torch.optim.Adam(self.acmodel.parameters(),
                                          self.args.lr,
                                          eps=self.args.optim_eps)
        self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer,
                                                         step_size=100,
                                                         gamma=0.9)

        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
def main(args):
    args.model = args.model or ImitationLearning.default_model_name(args)
    utils.configure_logging(args.model)
    il_learn = ImitationLearning(args)

    # Define logger and Tensorboard writer
    header = ([
        "update", "frames", "FPS", "duration", "entropy", "policy_loss",
        "train_accuracy"
    ] + [
        "validation_accuracy", "validation_return", "validation_success_rate"
    ])
    writer = None
    if args.tb:
        from tensorboardX import SummaryWriter
        writer = SummaryWriter(utils.get_log_dir(args.model))

    # Define csv writer
    csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv')
    first_created = not os.path.exists(csv_path)
    # we don't buffer data going in the csv log, cause we assume
    # that one update will take much longer that one write to the log
    csv_writer = csv.writer(open(csv_path, 'a', 1))
    if first_created:
        csv_writer.writerow(header)

    # Log command, availability of CUDA, and model
    logger.info(args)
    logger.info("CUDA available: {}".format(torch.cuda.is_available()))
    logger.info(il_learn.acmodel)

    # Seed at which demo evaluation/generation will begin
    eval_seed = args.seed + len(il_learn.train_demos)

    # Phase at which we start
    cur_phase = 0

    # Try to load the status (if resuming)
    status_path = os.path.join(utils.get_log_dir(args.model), 'status.json')
    if os.path.exists(status_path):
        with open(status_path, 'r') as src:
            status = json.load(src)
            eval_seed = status.get('eval_seed', eval_seed)
            cur_phase = status.get('cur_phase', cur_phase)

    model_name = args.model

    for phase_no in range(cur_phase, args.phases):
        logger.info("Starting phase {} with {} demos, eval_seed={}".format(
            phase_no, len(il_learn.train_demos), eval_seed))

        # Each phase trains a different model from scratch
        args.model = model_name + ('_phase_%d' % phase_no)
        il_learn = ImitationLearning(args)

        # Train the imitation learning agent
        if len(il_learn.train_demos) > 0:
            train_status_path = os.path.join(utils.get_log_dir(args.model),
                                             'status.json')
            il_learn.train(il_learn.train_demos, writer, csv_writer,
                           train_status_path, header)

        # Stopping criterion
        valid_log = il_learn.validate(args.val_episodes)
        success_rate = np.mean(
            [1 if r > 0 else 0 for r in valid_log[0]['return_per_episode']])

        if success_rate >= 0.99:
            logger.info(
                "Reached target success rate with {} demos, stopping".format(
                    len(il_learn.train_demos)))
            break

        eval_seed = grow_training_set(il_learn, il_learn.train_demos,
                                      eval_seed, args.demo_grow_factor,
                                      args.num_eval_demos)

        # Save the current demo generation seed
        with open(status_path, 'w') as dst:
            status = {'eval_seed': eval_seed, 'cur_phase': phase_no + 1}
            json.dump(status, dst)

        # Save the demos
        demos_path = utils.get_demos_path(args.demos,
                                          args.env,
                                          args.demos_origin,
                                          valid=False)
        print('saving demos to:', demos_path)
        utils.save_demos(il_learn.train_demos, demos_path)
示例#5
0
def generate_demos(n_episodes, valid, seed, shift=0):
    utils.seed(seed)

    # Generate environment
    env = gym.make(args.env)
    env.seed(seed)
    for i in range(shift):
        env.reset()

    agent = utils.load_agent(env, args.model, args.demos, 'agent', args.argmax,
                             args.env)
    demos_path = utils.get_demos_path(args.demos, args.env, 'agent', valid)
    demos = []

    checkpoint_time = time.time()

    while True:
        # Run the expert for one episode

        done = False
        obs = env.reset()
        agent.on_reset()

        actions = []
        mission = obs["mission"]
        images = []
        directions = []

        try:
            while not done:
                action = agent.act(obs)['action']
                if isinstance(action, torch.Tensor):
                    action = action.item()
                new_obs, reward, done, _ = env.step(action)
                agent.analyze_feedback(reward, done)

                actions.append(action)
                images.append(obs['image'])
                directions.append(obs['direction'])

                obs = new_obs
            if reward > 0 and (args.filter_steps == 0
                               or len(images) <= args.filter_steps):
                demos.append((mission, blosc.pack_array(np.array(images)),
                              directions, actions))

            if len(demos) >= n_episodes:
                break
            if reward == 0:
                if args.on_exception == 'crash':
                    raise Exception("mission failed")
                logger.info("mission failed")
        except Exception:
            if args.on_exception == 'crash':
                raise
            logger.exception("error while generating demo #{}".format(
                len(demos)))
            continue

        if len(demos) and len(demos) % args.log_interval == 0:
            now = time.time()
            demos_per_second = args.log_interval / (now - checkpoint_time)
            to_go = (n_episodes - len(demos)) / demos_per_second
            logger.info(
                "demo #{}, {:.3f} demos per second, {:.3f} seconds to go".
                format(len(demos), demos_per_second, to_go))
            checkpoint_time = now

        # Save demonstrations

        if args.save_interval > 0 and len(
                demos) < n_episodes and len(demos) % args.save_interval == 0:
            logger.info("Saving demos...")
            utils.save_demos(demos, demos_path)
            logger.info("Demos saved")
            # print statistics for the last 100 demonstrations
            print_demo_lengths(demos[-100:])

    # Save demonstrations
    logger.info("Saving demos...")
    utils.save_demos(demos, demos_path)
    logger.info("Demos saved")
    print_demo_lengths(demos[-100:])
示例#6
0
    def __init__(self, args):
        """

        :param args:
        """
        super(MetaLearner, self).__init__()

        self.update_lr = args.update_lr
        self.meta_lr = args.meta_lr
        self.task_num = args.task_num
        self.args = args

        utils.seed(self.args.seed)

        self.env = gym.make(self.args.env)

        demos_path = utils.get_demos_path(args.demos,
                                          args.env,
                                          args.demos_origin,
                                          valid=False)
        demos_path_valid = utils.get_demos_path(args.demos,
                                                args.env,
                                                args.demos_origin,
                                                valid=True)

        logger.info('loading demos')
        self.train_demos = utils.load_demos(demos_path)
        logger.info('loaded demos')
        # if args.episodes:
        #     if args.episodes > len(self.train_demos):
        #         raise ValueError("there are only {} train demos".format(len(self.train_demos)))
        # self.train_demos = self.train_demos[:args.episodes]

        self.val_demos = utils.load_demos(demos_path_valid)
        # if args.val_episodes > len(self.val_demos):
        #     logger.info('Using all the available {} demos to evaluate valid. accuracy'.format(len(self.val_demos)))
        self.val_demos = self.val_demos[:self.args.val_episodes]

        observation_space = self.env.observation_space
        action_space = self.env.action_space

        print(args.model)
        self.obss_preprocessor = utils.ObssPreprocessor(
            args.model, observation_space,
            getattr(self.args, 'pretrained_model', None))

        # Define actor-critic model
        # self.net = utils.load_model(args.model, raise_not_found=False)
        # if self.net is None:
        #     if getattr(self.args, 'pretrained_model', None):
        #         self.net = utils.load_model(args.pretrained_model, raise_not_found=True)
        #     else:
        self.net = ACModel(self.obss_preprocessor.obs_space, action_space,
                           args.image_dim, args.memory_dim, args.instr_dim,
                           not self.args.no_instr, self.args.instr_arch,
                           not self.args.no_mem, self.args.arch)
        self.obss_preprocessor.vocab.save()
        # utils.save_model(self.net, args.model)
        self.fast_net = copy.deepcopy(self.net)
        self.net.train()
        self.fast_net.train()

        if torch.cuda.is_available():
            self.net.cuda()
            self.fast_net.cuda()

        self.optimizer = torch.optim.SGD(self.fast_net.parameters(),
                                         lr=self.args.update_lr)
        # self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=100, gamma=0.9)

        self.device = torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")
        self.meta_optim = optim.Adam(self.net.parameters(), lr=self.meta_lr)
示例#7
0
def generate_demos(n_episodes, valid, seed, shift=0):
    utils.seed(seed)

    # Generate environment
    env = gym.make(args.env)
    use_pixels = args.pixels
    if use_pixels:
        env = RGBImgPartialObsWrapper(env)

    agent = utils.load_agent(env, args.model, args.demos, 'agent', args.argmax,
                             args.env)
    demos_path = utils.get_demos_path(args.demos, args.env, 'agent', valid)
    demos = []

    checkpoint_time = time.time()

    just_crashed = False
    while True:
        if len(demos) == n_episodes:
            break

        done = False
        if just_crashed:
            logger.info(
                "reset the environment to find a mission that the bot can solve"
            )
            env.reset()
        else:
            env.seed(seed + len(demos))
        obs = env.reset()
        agent.on_reset()

        actions = []
        mission = obs["mission"]
        images = []
        directions = []

        try:
            while not done:
                action = agent.act(obs)['action']
                if isinstance(action, torch.Tensor):
                    action = action.item()
                new_obs, reward, done, _ = env.step(action)
                agent.analyze_feedback(reward, done)

                actions.append(action)
                images.append(obs['image'])
                if use_pixels:
                    directions.append(None)
                else:
                    directions.append(obs['direction'])

                obs = new_obs
            if reward > 0 and (args.filter_steps == 0
                               or len(images) <= args.filter_steps):
                demos.append((mission, blosc.pack_array(np.array(images)),
                              directions, actions))
                just_crashed = False

            if reward == 0:
                if args.on_exception == 'crash':
                    raise Exception(
                        "mission failed, the seed is {}".format(seed +
                                                                len(demos)))
                just_crashed = True
                logger.info("mission failed")
        except (Exception, AssertionError):
            if args.on_exception == 'crash':
                raise
            just_crashed = True
            logger.exception("error while generating demo #{}".format(
                len(demos)))
            continue

        if len(demos) and len(demos) % args.log_interval == 0:
            now = time.time()
            demos_per_second = args.log_interval / (now - checkpoint_time)
            to_go = (n_episodes - len(demos)) / demos_per_second
            logger.info(
                "demo #{}, {:.3f} demos per second, {:.3f} seconds to go".
                format(len(demos) - 1, demos_per_second, to_go))
            checkpoint_time = now

        # Save demonstrations

        if args.save_interval > 0 and len(
                demos) < n_episodes and len(demos) % args.save_interval == 0:
            logger.info("Saving demos...")
            utils.save_demos(demos, demos_path)
            logger.info("{} demos saved".format(len(demos)))
            # print statistics for the last 100 demonstrations
            print_demo_lengths(demos[-100:])

    # Save demonstrations
    logger.info("Saving demos...")
    utils.save_demos(demos, demos_path)
    logger.info("{} demos saved".format(len(demos)))
    print_demo_lengths(demos[-100:])
示例#8
0
logging.basicConfig(level='INFO',
                    format="%(asctime)s: %(levelname)s: %(message)s")
logger.info(args)
if args.jobs == 0:
    logger.info(args)
    generate_demos(args.episodes, False, args.seed, args.shift)
    if args.valid_episodes:
        generate_demos(args.valid_episodes, True, 0)
else:
    demos_per_job = args.episodes // args.jobs
    job_demo_names = [
        args.demos + '_shard{}'.format(i) for i in range(args.jobs)
    ]
    for demo_name in job_demo_names:
        job_demos_path = utils.get_demos_path(demo_name)
        if os.path.exists(job_demos_path):
            os.remove(job_demos_path)

    command = ['sbatch', '--mem=8g']
    command += args.sbatch_args.split(' ') if args.sbatch_args else []
    # babyai.sh should be in #PATH and should contain the following lines:
    #
    ##!/usr/bin/env bash
    #source activate babyai
    #"$@"
    #
    command += ['babyai.sh', 'python']
    command += sys.argv
    for i in range(args.jobs):
        cmd_i = list(