def generate_demos_cluster(): demos_per_job = args.episodes // args.jobs demos_path = utils.get_demos_path(args.demos, args.env, 'agent') job_demo_names = [ os.path.realpath(demos_path + '.shard{}'.format(i)) for i in range(args.jobs) ] for demo_name in job_demo_names: job_demos_path = utils.get_demos_path(demo_name) if os.path.exists(job_demos_path): os.remove(job_demos_path) processes = [] command = [args.job_script] command += sys.argv[1:] for i in range(args.jobs): cmd_i = list( map( str, command + ['--seed', args.seed + i] + ['--demos', job_demo_names[i]] + ['--episodes', demos_per_job] + ['--jobs', 0] + ['--valid-episodes', 0])) logger.info('LAUNCH COMMAND') logger.info(cmd_i) process = subprocess.Popen(cmd_i) processes += [process] for p in processes: p.wait() job_demos = [None] * args.jobs while True: jobs_done = 0 for i in range(args.jobs): if job_demos[i] is None or len(job_demos[i]) < demos_per_job: try: logger.info("Trying to load shard {}".format(i)) job_demos[i] = utils.load_demos( utils.get_demos_path(job_demo_names[i])) logger.info("{} demos ready in shard {}".format( len(job_demos[i]), i)) except Exception: logger.exception("Failed to load the shard") if job_demos[i] and len(job_demos[i]) == demos_per_job: jobs_done += 1 logger.info("{} out of {} shards done".format(jobs_done, args.jobs)) if jobs_done == args.jobs: break logger.info("sleep for 60 seconds") time.sleep(60) # Training demos all_demos = [] for demos in job_demos: all_demos.extend(demos) utils.save_demos(all_demos, demos_path)
def __init__(self, env): super().__init__() self.initUI() # By default, manual stepping only self.fpsLimit = 0 self.env = env self.lastObs = None # Demonstrations self.demos_path = utils.get_demos_path(args.demos, args.env, origin="human", valid=False) self.demos = utils.load_demos(self.demos_path, raise_not_found=False) utils.synthesize_demos(self.demos) self.shift = len(self.demos) if args.shift is None else args.shift self.shiftEnv() # Pointing and naming data self.pointingData = []
def __init__( self, args, ): self.args = args utils.seed(self.args.seed) # args.env is a list when training on multiple environments if getattr(args, 'multi_env', None): self.env = [gym.make(item) for item in args.multi_env] self.train_demos = [] for demos, episodes in zip(args.multi_demos, args.multi_episodes): demos_path = utils.get_demos_path(demos, None, None, valid=False) logger.info('loading {} of {} demos'.format(episodes, demos)) train_demos = utils.load_demos(demos_path) logger.info('loaded demos') if episodes > len(train_demos): raise ValueError( "there are only {} train demos in {}".format( len(train_demos), demos)) self.train_demos.extend(train_demos[:episodes]) logger.info('So far, {} demos loaded'.format( len(self.train_demos))) self.val_demos = [] for demos, episodes in zip(args.multi_demos, [args.val_episodes] * len(args.multi_demos)): demos_path_valid = utils.get_demos_path(demos, None, None, valid=True) logger.info('loading {} of {} valid demos'.format( episodes, demos)) valid_demos = utils.load_demos(demos_path_valid) logger.info('loaded demos') if episodes > len(valid_demos): logger.info( 'Using all the available {} demos to evaluate valid. accuracy' .format(len(valid_demos))) self.val_demos.extend(valid_demos[:episodes]) logger.info('So far, {} valid demos loaded'.format( len(self.val_demos))) logger.info('Loaded all demos') observation_space = self.env[0].observation_space action_space = self.env[0].action_space else: self.env = gym.make(self.args.env) demos_path = utils.get_demos_path(args.demos, args.env, args.demos_origin, valid=False) demos_path_valid = utils.get_demos_path(args.demos, args.env, args.demos_origin, valid=True) print("else") logger.info('loading demos') self.train_demos = utils.load_demos(demos_path) print(len(self.train_demos)) print(self.train_demos[0]) logger.info('loaded demos') if args.episodes: if args.episodes > len(self.train_demos): raise ValueError("there are only {} train demos".format( len(self.train_demos))) self.train_demos = self.train_demos[:args.episodes] self.val_demos = utils.load_demos(demos_path_valid) if args.val_episodes > len(self.val_demos): logger.info( 'Using all the available {} demos to evaluate valid. accuracy' .format(len(self.val_demos))) self.val_demos = self.val_demos[:self.args.val_episodes] observation_space = self.env.observation_space action_space = self.env.action_space print("else") print(args.model) self.obss_preprocessor = utils.ObssPreprocessor( args.model, observation_space, getattr(self.args, 'pretrained_model', None)) # Define actor-critic model self.acmodel = utils.load_model(args.model, raise_not_found=False) if self.acmodel is None: if getattr(self.args, 'pretrained_model', None): self.acmodel = utils.load_model(args.pretrained_model, raise_not_found=True) else: self.acmodel = ACModel(self.obss_preprocessor.obs_space, action_space, args.image_dim, args.memory_dim, args.instr_dim, not self.args.no_instr, self.args.instr_arch, not self.args.no_mem, self.args.arch) self.obss_preprocessor.vocab.save() utils.save_model(self.acmodel, args.model) self.acmodel.train() if torch.cuda.is_available(): self.acmodel.cuda() self.optimizer = torch.optim.Adam(self.acmodel.parameters(), self.args.lr, eps=self.args.optim_eps) self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=100, gamma=0.9) self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu")
def main(args): args.model = args.model or ImitationLearning.default_model_name(args) utils.configure_logging(args.model) il_learn = ImitationLearning(args) # Define logger and Tensorboard writer header = ([ "update", "frames", "FPS", "duration", "entropy", "policy_loss", "train_accuracy" ] + [ "validation_accuracy", "validation_return", "validation_success_rate" ]) writer = None if args.tb: from tensorboardX import SummaryWriter writer = SummaryWriter(utils.get_log_dir(args.model)) # Define csv writer csv_path = os.path.join(utils.get_log_dir(args.model), 'log.csv') first_created = not os.path.exists(csv_path) # we don't buffer data going in the csv log, cause we assume # that one update will take much longer that one write to the log csv_writer = csv.writer(open(csv_path, 'a', 1)) if first_created: csv_writer.writerow(header) # Log command, availability of CUDA, and model logger.info(args) logger.info("CUDA available: {}".format(torch.cuda.is_available())) logger.info(il_learn.acmodel) # Seed at which demo evaluation/generation will begin eval_seed = args.seed + len(il_learn.train_demos) # Phase at which we start cur_phase = 0 # Try to load the status (if resuming) status_path = os.path.join(utils.get_log_dir(args.model), 'status.json') if os.path.exists(status_path): with open(status_path, 'r') as src: status = json.load(src) eval_seed = status.get('eval_seed', eval_seed) cur_phase = status.get('cur_phase', cur_phase) model_name = args.model for phase_no in range(cur_phase, args.phases): logger.info("Starting phase {} with {} demos, eval_seed={}".format( phase_no, len(il_learn.train_demos), eval_seed)) # Each phase trains a different model from scratch args.model = model_name + ('_phase_%d' % phase_no) il_learn = ImitationLearning(args) # Train the imitation learning agent if len(il_learn.train_demos) > 0: train_status_path = os.path.join(utils.get_log_dir(args.model), 'status.json') il_learn.train(il_learn.train_demos, writer, csv_writer, train_status_path, header) # Stopping criterion valid_log = il_learn.validate(args.val_episodes) success_rate = np.mean( [1 if r > 0 else 0 for r in valid_log[0]['return_per_episode']]) if success_rate >= 0.99: logger.info( "Reached target success rate with {} demos, stopping".format( len(il_learn.train_demos))) break eval_seed = grow_training_set(il_learn, il_learn.train_demos, eval_seed, args.demo_grow_factor, args.num_eval_demos) # Save the current demo generation seed with open(status_path, 'w') as dst: status = {'eval_seed': eval_seed, 'cur_phase': phase_no + 1} json.dump(status, dst) # Save the demos demos_path = utils.get_demos_path(args.demos, args.env, args.demos_origin, valid=False) print('saving demos to:', demos_path) utils.save_demos(il_learn.train_demos, demos_path)
def generate_demos(n_episodes, valid, seed, shift=0): utils.seed(seed) # Generate environment env = gym.make(args.env) env.seed(seed) for i in range(shift): env.reset() agent = utils.load_agent(env, args.model, args.demos, 'agent', args.argmax, args.env) demos_path = utils.get_demos_path(args.demos, args.env, 'agent', valid) demos = [] checkpoint_time = time.time() while True: # Run the expert for one episode done = False obs = env.reset() agent.on_reset() actions = [] mission = obs["mission"] images = [] directions = [] try: while not done: action = agent.act(obs)['action'] if isinstance(action, torch.Tensor): action = action.item() new_obs, reward, done, _ = env.step(action) agent.analyze_feedback(reward, done) actions.append(action) images.append(obs['image']) directions.append(obs['direction']) obs = new_obs if reward > 0 and (args.filter_steps == 0 or len(images) <= args.filter_steps): demos.append((mission, blosc.pack_array(np.array(images)), directions, actions)) if len(demos) >= n_episodes: break if reward == 0: if args.on_exception == 'crash': raise Exception("mission failed") logger.info("mission failed") except Exception: if args.on_exception == 'crash': raise logger.exception("error while generating demo #{}".format( len(demos))) continue if len(demos) and len(demos) % args.log_interval == 0: now = time.time() demos_per_second = args.log_interval / (now - checkpoint_time) to_go = (n_episodes - len(demos)) / demos_per_second logger.info( "demo #{}, {:.3f} demos per second, {:.3f} seconds to go". format(len(demos), demos_per_second, to_go)) checkpoint_time = now # Save demonstrations if args.save_interval > 0 and len( demos) < n_episodes and len(demos) % args.save_interval == 0: logger.info("Saving demos...") utils.save_demos(demos, demos_path) logger.info("Demos saved") # print statistics for the last 100 demonstrations print_demo_lengths(demos[-100:]) # Save demonstrations logger.info("Saving demos...") utils.save_demos(demos, demos_path) logger.info("Demos saved") print_demo_lengths(demos[-100:])
def __init__(self, args): """ :param args: """ super(MetaLearner, self).__init__() self.update_lr = args.update_lr self.meta_lr = args.meta_lr self.task_num = args.task_num self.args = args utils.seed(self.args.seed) self.env = gym.make(self.args.env) demos_path = utils.get_demos_path(args.demos, args.env, args.demos_origin, valid=False) demos_path_valid = utils.get_demos_path(args.demos, args.env, args.demos_origin, valid=True) logger.info('loading demos') self.train_demos = utils.load_demos(demos_path) logger.info('loaded demos') # if args.episodes: # if args.episodes > len(self.train_demos): # raise ValueError("there are only {} train demos".format(len(self.train_demos))) # self.train_demos = self.train_demos[:args.episodes] self.val_demos = utils.load_demos(demos_path_valid) # if args.val_episodes > len(self.val_demos): # logger.info('Using all the available {} demos to evaluate valid. accuracy'.format(len(self.val_demos))) self.val_demos = self.val_demos[:self.args.val_episodes] observation_space = self.env.observation_space action_space = self.env.action_space print(args.model) self.obss_preprocessor = utils.ObssPreprocessor( args.model, observation_space, getattr(self.args, 'pretrained_model', None)) # Define actor-critic model # self.net = utils.load_model(args.model, raise_not_found=False) # if self.net is None: # if getattr(self.args, 'pretrained_model', None): # self.net = utils.load_model(args.pretrained_model, raise_not_found=True) # else: self.net = ACModel(self.obss_preprocessor.obs_space, action_space, args.image_dim, args.memory_dim, args.instr_dim, not self.args.no_instr, self.args.instr_arch, not self.args.no_mem, self.args.arch) self.obss_preprocessor.vocab.save() # utils.save_model(self.net, args.model) self.fast_net = copy.deepcopy(self.net) self.net.train() self.fast_net.train() if torch.cuda.is_available(): self.net.cuda() self.fast_net.cuda() self.optimizer = torch.optim.SGD(self.fast_net.parameters(), lr=self.args.update_lr) # self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=100, gamma=0.9) self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.meta_optim = optim.Adam(self.net.parameters(), lr=self.meta_lr)
def generate_demos(n_episodes, valid, seed, shift=0): utils.seed(seed) # Generate environment env = gym.make(args.env) use_pixels = args.pixels if use_pixels: env = RGBImgPartialObsWrapper(env) agent = utils.load_agent(env, args.model, args.demos, 'agent', args.argmax, args.env) demos_path = utils.get_demos_path(args.demos, args.env, 'agent', valid) demos = [] checkpoint_time = time.time() just_crashed = False while True: if len(demos) == n_episodes: break done = False if just_crashed: logger.info( "reset the environment to find a mission that the bot can solve" ) env.reset() else: env.seed(seed + len(demos)) obs = env.reset() agent.on_reset() actions = [] mission = obs["mission"] images = [] directions = [] try: while not done: action = agent.act(obs)['action'] if isinstance(action, torch.Tensor): action = action.item() new_obs, reward, done, _ = env.step(action) agent.analyze_feedback(reward, done) actions.append(action) images.append(obs['image']) if use_pixels: directions.append(None) else: directions.append(obs['direction']) obs = new_obs if reward > 0 and (args.filter_steps == 0 or len(images) <= args.filter_steps): demos.append((mission, blosc.pack_array(np.array(images)), directions, actions)) just_crashed = False if reward == 0: if args.on_exception == 'crash': raise Exception( "mission failed, the seed is {}".format(seed + len(demos))) just_crashed = True logger.info("mission failed") except (Exception, AssertionError): if args.on_exception == 'crash': raise just_crashed = True logger.exception("error while generating demo #{}".format( len(demos))) continue if len(demos) and len(demos) % args.log_interval == 0: now = time.time() demos_per_second = args.log_interval / (now - checkpoint_time) to_go = (n_episodes - len(demos)) / demos_per_second logger.info( "demo #{}, {:.3f} demos per second, {:.3f} seconds to go". format(len(demos) - 1, demos_per_second, to_go)) checkpoint_time = now # Save demonstrations if args.save_interval > 0 and len( demos) < n_episodes and len(demos) % args.save_interval == 0: logger.info("Saving demos...") utils.save_demos(demos, demos_path) logger.info("{} demos saved".format(len(demos))) # print statistics for the last 100 demonstrations print_demo_lengths(demos[-100:]) # Save demonstrations logger.info("Saving demos...") utils.save_demos(demos, demos_path) logger.info("{} demos saved".format(len(demos))) print_demo_lengths(demos[-100:])
logging.basicConfig(level='INFO', format="%(asctime)s: %(levelname)s: %(message)s") logger.info(args) if args.jobs == 0: logger.info(args) generate_demos(args.episodes, False, args.seed, args.shift) if args.valid_episodes: generate_demos(args.valid_episodes, True, 0) else: demos_per_job = args.episodes // args.jobs job_demo_names = [ args.demos + '_shard{}'.format(i) for i in range(args.jobs) ] for demo_name in job_demo_names: job_demos_path = utils.get_demos_path(demo_name) if os.path.exists(job_demos_path): os.remove(job_demos_path) command = ['sbatch', '--mem=8g'] command += args.sbatch_args.split(' ') if args.sbatch_args else [] # babyai.sh should be in #PATH and should contain the following lines: # ##!/usr/bin/env bash #source activate babyai #"$@" # command += ['babyai.sh', 'python'] command += sys.argv for i in range(args.jobs): cmd_i = list(