def generate_demos_cluster(): demos_per_job = args.episodes // args.jobs demos_path = utils.get_demos_path(args.demos, args.env, 'agent') job_demo_names = [ os.path.realpath(demos_path + '.shard{}'.format(i)) for i in range(args.jobs) ] for demo_name in job_demo_names: job_demos_path = utils.get_demos_path(demo_name) if os.path.exists(job_demos_path): os.remove(job_demos_path) processes = [] command = [args.job_script] command += sys.argv[1:] for i in range(args.jobs): cmd_i = list( map( str, command + ['--seed', args.seed + i] + ['--demos', job_demo_names[i]] + ['--episodes', demos_per_job] + ['--jobs', 0] + ['--valid-episodes', 0])) logger.info('LAUNCH COMMAND') logger.info(cmd_i) process = subprocess.Popen(cmd_i) processes += [process] for p in processes: p.wait() job_demos = [None] * args.jobs while True: jobs_done = 0 for i in range(args.jobs): if job_demos[i] is None or len(job_demos[i]) < demos_per_job: try: logger.info("Trying to load shard {}".format(i)) job_demos[i] = utils.load_demos( utils.get_demos_path(job_demo_names[i])) logger.info("{} demos ready in shard {}".format( len(job_demos[i]), i)) except Exception: logger.exception("Failed to load the shard") if job_demos[i] and len(job_demos[i]) == demos_per_job: jobs_done += 1 logger.info("{} out of {} shards done".format(jobs_done, args.jobs)) if jobs_done == args.jobs: break logger.info("sleep for 60 seconds") time.sleep(60) # Training demos all_demos = [] for demos in job_demos: all_demos.extend(demos) utils.save_demos(all_demos, demos_path)
def __init__(self, env): super().__init__() self.initUI() # By default, manual stepping only self.fpsLimit = 0 self.env = env self.lastObs = None # Demonstrations self.demos = utils.load_demos(args.env, "human") utils.synthesize_demos(self.demos) self.current_demo = [] self.shift = len(self.demos) if args.shift is None else args.shift self.shiftEnv() # Pointing and naming data self.pointingData = []
def __init__(self, env): super().__init__() self.initUI() # By default, manual stepping only self.fpsLimit = 0 self.env = env self.lastObs = None # Demonstrations self.demos_path = utils.get_demos_path(args.demos, args.env, origin="human", valid=False) self.demos = utils.load_demos(self.demos_path, raise_not_found=False) utils.synthesize_demos(self.demos) self.shift = len(self.demos) if args.shift is None else args.shift self.shiftEnv() # Pointing and naming data self.pointingData = []
def __init__( self, args, ): self.args = args utils.seed(self.args.seed) # args.env is a list when training on multiple environments if getattr(args, 'multi_env', None): self.env = [gym.make(item) for item in args.multi_env] self.train_demos = [] for demos, episodes in zip(args.multi_demos, args.multi_episodes): demos_path = utils.get_demos_path(demos, None, None, valid=False) logger.info('loading {} of {} demos'.format(episodes, demos)) train_demos = utils.load_demos(demos_path) logger.info('loaded demos') if episodes > len(train_demos): raise ValueError( "there are only {} train demos in {}".format( len(train_demos), demos)) self.train_demos.extend(train_demos[:episodes]) logger.info('So far, {} demos loaded'.format( len(self.train_demos))) self.val_demos = [] for demos, episodes in zip(args.multi_demos, [args.val_episodes] * len(args.multi_demos)): demos_path_valid = utils.get_demos_path(demos, None, None, valid=True) logger.info('loading {} of {} valid demos'.format( episodes, demos)) valid_demos = utils.load_demos(demos_path_valid) logger.info('loaded demos') if episodes > len(valid_demos): logger.info( 'Using all the available {} demos to evaluate valid. accuracy' .format(len(valid_demos))) self.val_demos.extend(valid_demos[:episodes]) logger.info('So far, {} valid demos loaded'.format( len(self.val_demos))) logger.info('Loaded all demos') observation_space = self.env[0].observation_space action_space = self.env[0].action_space else: self.env = gym.make(self.args.env) demos_path = utils.get_demos_path(args.demos, args.env, args.demos_origin, valid=False) demos_path_valid = utils.get_demos_path(args.demos, args.env, args.demos_origin, valid=True) print("else") logger.info('loading demos') self.train_demos = utils.load_demos(demos_path) print(len(self.train_demos)) print(self.train_demos[0]) logger.info('loaded demos') if args.episodes: if args.episodes > len(self.train_demos): raise ValueError("there are only {} train demos".format( len(self.train_demos))) self.train_demos = self.train_demos[:args.episodes] self.val_demos = utils.load_demos(demos_path_valid) if args.val_episodes > len(self.val_demos): logger.info( 'Using all the available {} demos to evaluate valid. accuracy' .format(len(self.val_demos))) self.val_demos = self.val_demos[:self.args.val_episodes] observation_space = self.env.observation_space action_space = self.env.action_space print("else") print(args.model) self.obss_preprocessor = utils.ObssPreprocessor( args.model, observation_space, getattr(self.args, 'pretrained_model', None)) # Define actor-critic model self.acmodel = utils.load_model(args.model, raise_not_found=False) if self.acmodel is None: if getattr(self.args, 'pretrained_model', None): self.acmodel = utils.load_model(args.pretrained_model, raise_not_found=True) else: self.acmodel = ACModel(self.obss_preprocessor.obs_space, action_space, args.image_dim, args.memory_dim, args.instr_dim, not self.args.no_instr, self.args.instr_arch, not self.args.no_mem, self.args.arch) self.obss_preprocessor.vocab.save() utils.save_model(self.acmodel, args.model) self.acmodel.train() if torch.cuda.is_available(): self.acmodel.cuda() self.optimizer = torch.optim.Adam(self.acmodel.parameters(), self.args.lr, eps=self.args.optim_eps) self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=100, gamma=0.9) self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu")
def __init__(self, args): """ :param args: """ super(MetaLearner, self).__init__() self.update_lr = args.update_lr self.meta_lr = args.meta_lr self.task_num = args.task_num self.args = args utils.seed(self.args.seed) self.env = gym.make(self.args.env) demos_path = utils.get_demos_path(args.demos, args.env, args.demos_origin, valid=False) demos_path_valid = utils.get_demos_path(args.demos, args.env, args.demos_origin, valid=True) logger.info('loading demos') self.train_demos = utils.load_demos(demos_path) logger.info('loaded demos') # if args.episodes: # if args.episodes > len(self.train_demos): # raise ValueError("there are only {} train demos".format(len(self.train_demos))) # self.train_demos = self.train_demos[:args.episodes] self.val_demos = utils.load_demos(demos_path_valid) # if args.val_episodes > len(self.val_demos): # logger.info('Using all the available {} demos to evaluate valid. accuracy'.format(len(self.val_demos))) self.val_demos = self.val_demos[:self.args.val_episodes] observation_space = self.env.observation_space action_space = self.env.action_space print(args.model) self.obss_preprocessor = utils.ObssPreprocessor( args.model, observation_space, getattr(self.args, 'pretrained_model', None)) # Define actor-critic model # self.net = utils.load_model(args.model, raise_not_found=False) # if self.net is None: # if getattr(self.args, 'pretrained_model', None): # self.net = utils.load_model(args.pretrained_model, raise_not_found=True) # else: self.net = ACModel(self.obss_preprocessor.obs_space, action_space, args.image_dim, args.memory_dim, args.instr_dim, not self.args.no_instr, self.args.instr_arch, not self.args.no_mem, self.args.arch) self.obss_preprocessor.vocab.save() # utils.save_model(self.net, args.model) self.fast_net = copy.deepcopy(self.net) self.net.train() self.fast_net.train() if torch.cuda.is_available(): self.net.cuda() self.fast_net.cuda() self.optimizer = torch.optim.SGD(self.fast_net.parameters(), lr=self.args.update_lr) # self.scheduler = torch.optim.lr_scheduler.StepLR(self.optimizer, step_size=100, gamma=0.9) self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.meta_optim = optim.Adam(self.net.parameters(), lr=self.meta_lr)
# Set seed for all randomness sources utils.seed(args.seed) # Generate environment env = gym.make(args.env) env.seed(args.seed) # Define agent agent = utils.load_agent(args, env) # Load demonstrations demos = utils.load_demos(args.env, "agent") utils.synthesize_demos(demos) for i in range(1, args.episodes+1): # Run the expert for one episode done = False obs = env.reset() demo = [] while not(done): action = agent.get_action(obs) new_obs, reward, done, _ = env.step(action) agent.analyze_feedback(reward, done) demo.append((obs, action, reward, done))
['--episodes', demos_per_job] + ['--jobs', 0] + ['--valid-episodes', 0] + ['--sbatch-args='])) logger.info('SBATCH COMMAND') logger.info(cmd_i) output = subprocess.check_output(cmd_i) logger.info('SBATCH OUTPUT') logger.info(output.decode('utf-8')) job_demos = [None] * args.jobs while True: jobs_done = 0 for i in range(args.jobs): if job_demos[i] is None or len(job_demos[i]) < demos_per_job: try: logger.info("Trying to load shard {}".format(i)) job_demos[i] = utils.load_demos( utils.get_demos_path(job_demo_names[i])) logger.info("{} demos ready in shard {}".format( len(job_demos[i]), i)) except Exception: logger.exception("Failed to load the shard") if job_demos[i] and len(job_demos[i]) == demos_per_job: jobs_done += 1 logger.info("{} out of {} shards done".format(jobs_done, args.jobs)) if jobs_done == args.jobs: break logger.info("sleep for 60 seconds") time.sleep(60) # Training demos all_demos = [] for demos in job_demos: