def __init__(self, params): ############# # INIT ############# # Get params, create logger, create TF session self.params = params self.logger = Logger(self.params['logdir']) # Set random seeds seed = self.params['seed'] np.random.seed(seed) torch.manual_seed(seed) ptu.init_gpu( use_gpu=not self.params['no_gpu'], gpu_id=self.params['which_gpu'] ) self.total_env_steps = 0 self.start_time = None self.log_video = False self.log_metrics = False self.initial_return = None ############# # ENV ############# # Make the gym environment self.env = gym.make(self.params['env_name']) self.env.seed(seed) # Maximum length for episodes self.params['ep_len'] = self.params['ep_len'] or self.env.spec.max_episode_steps MAX_VIDEO_LEN = self.params['ep_len'] # Is this env continuous, or self.discrete? discrete = isinstance(self.env.action_space, gym.spaces.Discrete) self.params['agent_params']['discrete'] = discrete # Observation and action sizes ob_dim = self.env.observation_space.shape[0] ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[0] self.params['agent_params']['ac_dim'] = ac_dim self.params['agent_params']['ob_dim'] = ob_dim # simulation time step, will be used for video saving if 'model' in dir(self.env): self.fps = 1 / self.env.model.opt.timestep else: self.fps = self.env.env.metadata['video.frames_per_second'] ############# # AGENT ############# agent_class = self.params['agent_class'] self.agent = agent_class(self.env, self.params['agent_params'])
def mp_worker(result_queue, env, serialized_policy, max_path_length, render, render_mode): ptu.init_gpu(use_gpu=False) cls = serialized_policy["__class__"] policy = cls.deserialize(serialized_policy) while True: result = sample_trajectory(env, policy, max_path_length, render, render_mode) result_queue.put(result)
def __init__(self, params): ############# ## INIT ############# # Get params, create logger self.params = params # Set random seeds seed = self.params['seed'] np.random.seed(seed) torch.manual_seed(seed) ptu.init_gpu() ############# ## ENV ############# # Make the gym environment self.env = gym.make(self.params['env_name']) self.env.seed(seed) # Maximum length for episodes self.params['ep_len'] = self.params['ep_len'] or self.env.spec.max_episode_steps # Observation and action sizes # ob_dim = self.env.observation_space.shape[0] # ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[0] # self.params['agent_params']['ac_dim'] = ac_dim # self.params['agent_params']['ob_dim'] = ob_dim ############# ## AGENT ############# n = 3 m = self.params['bins'] self.input_features = 11 self.hidden_features = 3 self.costs_vertices = np.random.randn(n, m) * 0.0005 self.costs_edges = np.random.randn(n, n, m, m) * 0.0005 self.edges = list(itertools.combinations(range(n), r=2)) # initiate the graphical model neural network self.gm_net = GraphicalModelNet(self.params['eta'], self.params['emp_epoch'], self.input_features, self.hidden_features, self.costs_vertices, self.costs_edges, self.edges, self.params['bins'], ptu.device) # load pre-trained dicts if available if self.params['load_dict']: print("Loading stored dict...") self.gm_net.load_state_dict(torch.load('cs285/data/emp_dagger_buckets_Hopper-v2_20_epoch7_eta40' + '_state_dict.pt')) self.gm_net.w1 = torch.load('cs285/data/emp_dagger_buckets_Hopper-v2_20_epoch7_eta40' + '_w1.pt', map_location=ptu.device) self.gm_net.w2 = torch.load('cs285/data/emp_dagger_buckets_Hopper-v2_20_epoch7_eta40' + '_w2.pt', map_location=ptu.device)
def __init__(self, params): ############# ## INIT ############# # Get params, create logger, create TF session self.params = params self.logger = Logger(self.params["logdir"]) # Set random seeds seed = self.params["seed"] np.random.seed(seed) torch.manual_seed(seed) ptu.init_gpu(use_gpu=not self.params["no_gpu"], gpu_id=self.params["which_gpu"]) ############# ## ENV ############# # Make the gym environment self.env = gym.make(self.params["env_name"]) self.env.seed(seed) # Maximum length for episodes self.params["ep_len"] = self.params[ "ep_len"] or self.env.spec.max_episode_steps MAX_VIDEO_LEN = self.params[ "ep_len"] if not "Humanoid" in self.params["env_name"] else 1000 # Is this env continuous, or self.discrete? discrete = isinstance(self.env.action_space, gym.spaces.Discrete) self.params["agent_params"]["discrete"] = discrete # Observation and action sizes ob_dim = self.env.observation_space.shape[0] ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[ 0] self.params["agent_params"]["ac_dim"] = ac_dim self.params["agent_params"]["ob_dim"] = ob_dim # simulation timestep, will be used for video saving if "model" in dir(self.env): self.fps = 1 / self.env.model.opt.timestep else: self.fps = self.env.env.metadata["video.frames_per_second"] ############# ## AGENT ############# agent_class = self.params["agent_class"] self.agent = agent_class(self.env, self.params["agent_params"])
def __init__(self, params): ############# ## INIT ############# # Get params, create logger self.params = params self.logger = Logger(self.params['logdir']) # Set random seeds seed = self.params['seed'] np.random.seed(seed) torch.manual_seed(seed) ptu.init_gpu(use_gpu=not self.params['no_gpu'], gpu_id=self.params['which_gpu']) ############# ## ENV ############# # Make the gym environment self.env = gym.make(self.params['env_name']) self.env.seed(seed) # import plotting (locally if 'obstacles' env) if not (self.params['env_name'] == 'obstacles-cs285-v0'): import matplotlib matplotlib.use('Agg') # Maximum length for episodes self.params['ep_len'] = self.params[ 'ep_len'] or self.env.spec.max_episode_steps global MAX_VIDEO_LEN MAX_VIDEO_LEN = self.params['ep_len'] # Is this env continuous, or self.discrete? discrete = isinstance(self.env.action_space, gym.spaces.Discrete) # Are the observations images? img = len(self.env.observation_space.shape) > 2 self.params['agent_params']['discrete'] = discrete # Observation and action sizes ob_dim = self.env.observation_space.shape if img else self.env.observation_space.shape[ 0] ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[ 0] self.params['agent_params']['ac_dim'] = ac_dim self.params['agent_params']['ob_dim'] = ob_dim # simulation timestep, will be used for video saving if 'model' in dir(self.env): self.fps = 1 / self.env.model.opt.timestep elif 'env_wrappers' in self.params: self.fps = 30 # This is not actually used when using the Monitor wrapper elif 'video.frames_per_second' in self.env.env.metadata.keys(): self.fps = self.env.env.metadata['video.frames_per_second'] else: self.fps = 10 ############# ## AGENT ############# agent_class = self.params['agent_class'] self.agent = agent_class(self.env, self.params['agent_params'])
def __init__(self, params): ############# ## INIT ############# # Get params, create logger self.params = params self.logger = Logger(self.params['logdir']) # Set random seeds seed = self.params['seed'] np.random.seed(seed) torch.manual_seed(seed) ptu.init_gpu(use_gpu=not self.params['no_gpu'], gpu_id=self.params['which_gpu']) ############# ## ENV ############# # Make the gym environment register_custom_envs() self.env = gym.make(self.params['env_name']) self.eval_env = gym.make(self.params['env_name']) if not ('pointmass' in self.params['env_name']): import matplotlib matplotlib.use('Agg') self.env.set_logdir(self.params['logdir'] + '/expl_') self.eval_env.set_logdir(self.params['logdir'] + '/eval_') if 'env_wrappers' in self.params: # These operations are currently only for Atari envs self.env = wrappers.Monitor(self.env, os.path.join(self.params['logdir'], "gym"), force=True) self.eval_env = wrappers.Monitor(self.eval_env, os.path.join( self.params['logdir'], "gym"), force=True) self.env = params['env_wrappers'](self.env) self.eval_env = params['env_wrappers'](self.eval_env) self.mean_episode_reward = -float('nan') self.best_mean_episode_reward = -float('inf') if 'non_atari_colab_env' in self.params and self.params[ 'video_log_freq'] > 0: self.env = wrappers.Monitor(self.env, os.path.join(self.params['logdir'], "gym"), write_upon_reset=True) #, force=True) self.eval_env = wrappers.Monitor(self.eval_env, os.path.join( self.params['logdir'], "gym"), write_upon_reset=True) self.mean_episode_reward = -float('nan') self.best_mean_episode_reward = -float('inf') self.env.seed(seed) self.eval_env.seed(seed) # Maximum length for episodes self.params['ep_len'] = self.params[ 'ep_len'] or self.env.spec.max_episode_steps global MAX_VIDEO_LEN MAX_VIDEO_LEN = self.params['ep_len'] # Is this env continuous, or self.discrete? discrete = isinstance(self.env.action_space, gym.spaces.Discrete) # Are the observations images? img = len(self.env.observation_space.shape) > 2 self.params['agent_params']['discrete'] = discrete # Observation and action sizes ob_dim = self.env.observation_space.shape if img else self.env.observation_space.shape[ 0] ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[ 0] self.params['agent_params']['ac_dim'] = ac_dim self.params['agent_params']['ob_dim'] = ob_dim # simulation timestep, will be used for video saving if 'model' in dir(self.env): self.fps = 1 / self.env.model.opt.timestep elif 'env_wrappers' in self.params: self.fps = 30 # This is not actually used when using the Monitor wrapper elif 'video.frames_per_second' in self.env.env.metadata.keys(): self.fps = self.env.env.metadata['video.frames_per_second'] else: self.fps = 10 ############# ## AGENT ############# agent_class = self.params['agent_class'] self.agent = agent_class(self.env, self.params['agent_params'])
def __init__(self, params): ############# ## INIT ############# # Get params, create logger, create TF session self.params = params self.logger = Logger(self.params['logdir']) self.sess = create_tf_session(self.params['use_gpu'], which_gpu=self.params['which_gpu']) # Set random seeds seed = self.params['seed'] tf.set_random_seed(seed) np.random.seed(seed) torch.manual_seed(seed) ptu.init_gpu(use_gpu=not self.params['no_gpu'], gpu_id=self.params['which_gpu']) ############# ## ENV ############# # Make the gym environment self.env = gym.make(self.params['env_name']) self.env.seed(seed) # Maximum length for episodes self.params['ep_len'] = self.params[ 'ep_len'] or self.env.spec.max_episode_steps # Is this env continuous, or self.discrete? discrete = isinstance(self.env.action_space, gym.spaces.Discrete) self.params['agent_params']['discrete'] = discrete # Observation and action sizes ob_dim = self.env.observation_space.shape[0] ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[ 0] self.params['agent_params']['ac_dim'] = ac_dim self.params['agent_params']['ob_dim'] = ob_dim # simulation timestep, will be used for video saving if 'model' in dir(self.env): self.fps = 1 / self.env.model.opt.timestep else: self.fps = self.env.env.metadata['video.frames_per_second'] ############# ## AGENT ############# agent_class = self.params['agent_class'] self.agent = agent_class(self.sess, self.env, self.params['agent_params']) ############# ## INIT VARS ############# ## TODO initialize all of the TF variables (that were created by agent, etc.) ## HINT: use global_variables_initializer TODO
def __init__(self, params): ############# ## INIT ############# # Get params, create logger self.params = params self.logger = Logger(self.params['logdir']) # Set random seeds seed = self.params['seed'] np.random.seed(seed) torch.manual_seed(seed) ptu.init_gpu(use_gpu=not self.params['no_gpu'], gpu_id=self.params['which_gpu']) ############# ## ENV ############# # Make the gym environment #register_custom_envs() self.env = City((self.params['width'], self.params['height']), self.params['n_drivers'], self.params['n_restaurants']) """ if 'env_wrappers' in self.params: # These operations are currently only for Atari envs self.env = wrappers.Monitor(self.env, os.path.join(self.params['logdir'], "gym"), force=True) self.env = params['env_wrappers'](self.env) self.mean_episode_reward = -float('nan') self.best_mean_episode_reward = -float('inf') if 'non_atari_colab_env' in self.params and self.params['video_log_freq'] > 0: self.env = wrappers.Monitor(self.env, os.path.join(self.params['logdir'], "gym"), force=True) self.mean_episode_reward = -float('nan') self.best_mean_episode_reward = -float('inf') """ self.env.seed(seed) # import plotting (locally if 'obstacles' env) if not (self.params['env_name'] == 'obstacles-cs285-v0'): import matplotlib matplotlib.use('Agg') # Maximum length for episodes self.params['ep_len'] = self.params[ 'ep_len'] or self.env.spec.max_episode_steps global MAX_VIDEO_LEN MAX_VIDEO_LEN = self.params['ep_len'] # Is this env multi binary, or self.discrete? #multi_bi = isinstance(self.env.action_space, gym.spaces.MultiBinary) is_city = True # Are the observations images? img = False self.params['agent_params']['is_city'] = is_city # Observation and action sizes #ob_dim = self.env.observation_space.shape if img else self.env.observation_space.shape[0] #ac_dim = self.env.action_space.n if multi_bi else self.env.action_space.shape[0] #ob_dim = self.env.observation_space.shape[0] #ac_dim = self.env.action_space.shape[0] self.params['agent_params']['n_drivers'] = self.params['n_drivers'] self.params['agent_params']['ac_dim'] = self.params['n_drivers'] self.params['agent_params']['ob_dim'] = (self.params['n_drivers'], (3 + 2 * MAX_CAP + 5 + 5 * MAX_CAND_NUM)) self.params['agent_params']['shared_exp'] = self.params['shared_exp'] self.params['agent_params']['shared_exp_lambda'] = self.params[ 'shared_exp_lambda'] self.params['agent_params']['size_ac'] = self.params['size_ac'] self.params['agent_params']['size_cr'] = self.params['size_cr'] # simulation timestep, will be used for video saving #if 'model' in dir(self.env): # self.fps = 1/self.env.model.opt.timestep #elif 'env_wrappers' in self.params: # self.fps = 30 # This is not actually used when using the Monitor wrapper #elif 'video.frames_per_second' in self.env.env.metadata.keys(): # self.fps = self.env.env.metadata['video.frames_per_second'] #else: # self.fps = 10 ############# ## AGENT ############# agent_class = self.params['agent_class'] self.agent = agent_class(self.env, self.params['agent_params'])
def __init__(self, params): ############# ## INIT ############# # Get params, create logger self.params = params self.logger = Logger(self.params["logdir"]) # Set random seeds seed = self.params["seed"] np.random.seed(seed) torch.manual_seed(seed) ptu.init_gpu(use_gpu=not self.params["no_gpu"], gpu_id=self.params["which_gpu"]) ############# ## ENV ############# # Make the gym environment register_custom_envs() self.env = gym.make(self.params["env_name"]) if "env_wrappers" in self.params: # These operations are currently only for Atari envs self.env = wrappers.Monitor( self.env, os.path.join(self.params["logdir"], "gym"), force=True, video_callable=(None if self.params["video_log_freq"] > 0 else False), ) self.env = params["env_wrappers"](self.env) self.mean_episode_reward = -float("nan") self.best_mean_episode_reward = -float("inf") if "non_atari_colab_env" in self.params and self.params[ "video_log_freq"] > 0: self.env = wrappers.Monitor( self.env, os.path.join(self.params["logdir"], "gym"), force=True, video_callable=(None if self.params["video_log_freq"] > 0 else False), ) self.mean_episode_reward = -float("nan") self.best_mean_episode_reward = -float("inf") self.env.seed(seed) # import plotting (locally if 'obstacles' env) if not (self.params["env_name"] == "obstacles-cs285-v0"): import matplotlib matplotlib.use("Agg") # Maximum length for episodes self.params["ep_len"] = self.params[ "ep_len"] or self.env.spec.max_episode_steps global MAX_VIDEO_LEN MAX_VIDEO_LEN = self.params["ep_len"] # Is this env continuous, or self.discrete? discrete = isinstance(self.env.action_space, gym.spaces.Discrete) # Are the observations images? img = len(self.env.observation_space.shape) > 2 self.params["agent_params"]["discrete"] = discrete # Observation and action sizes ob_dim = (self.env.observation_space.shape if img else self.env.observation_space.shape[0]) ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[ 0] self.params["agent_params"]["ac_dim"] = ac_dim self.params["agent_params"]["ob_dim"] = ob_dim # simulation timestep, will be used for video saving if "model" in dir(self.env): self.fps = 1 / self.env.model.opt.timestep elif "env_wrappers" in self.params: self.fps = 30 # This is not actually used when using the Monitor wrapper elif "video.frames_per_second" in self.env.env.metadata.keys(): self.fps = self.env.env.metadata["video.frames_per_second"] else: self.fps = 10 ############# ## AGENT ############# agent_class = self.params["agent_class"] self.agent = agent_class(self.env, self.params["agent_params"])
def __init__(self, params): ############# ## INIT ############# # Get params, create logger self.params = params # Set random seeds seed = self.params['seed'] np.random.seed(seed) torch.manual_seed(seed) ptu.init_gpu() ############# ## ENV ############# # Make the gym environment self.env = gym.make(self.params['env_name']) self.env.seed(seed) # Maximum length for episodes self.params['ep_len'] = self.params[ 'ep_len'] or self.env.spec.max_episode_steps # Observation and action sizes # ob_dim = self.env.observation_space.shape[0] # ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[0] # self.params['agent_params']['ac_dim'] = ac_dim # self.params['agent_params']['ob_dim'] = ob_dim ############# ## AGENT ############# n = self.params['vertices'] * self.params['bins'] m = 2 self.input_features = self.params['obs_space'] self.hidden_features = 10 self.costs_vertices = np.ones((n, m)) * 0.005 self.costs_edges = np.ones((n, n, m, m)) * 0.005 for row in range(3): for i in range(row * self.params['bins'], row * self.params['bins'] + self.params['bins']): for j in range(row * self.params['bins'], row * self.params['bins'] + self.params['bins']): if i != j: self.costs_edges[i, j] = [[.3, .03], [.03, -.3]] self.costs_edges[j, i] = [[.3, .03], [.03, -.3]] for i in range(n): self.costs_vertices[i] = [.3, .03] self.edges = list(itertools.combinations(range(n), r=2)) # initiate the graphical model neural network self.gm_net = GraphicalModelNet( self.params['eta'], self.params['emp_epoch'], self.input_features, self.hidden_features, self.costs_vertices, self.costs_edges, self.edges, self.params['bins'], ptu.device) # load pre-trained dicts if available if self.params['load_dict']: print("Loading stored dict...") self.gm_net.load_state_dict( torch.load('cs285/data/' + self.params['load_path'] + '_state_dict.pt')) self.gm_net.w1 = torch.load('cs285/data/' + self.params['load_path'] + '_w1.pt', map_location=ptu.device) self.gm_net.w2 = torch.load('cs285/data/' + self.params['load_path'] + '_w2.pt', map_location=ptu.device)
def __init__(self, params): ############# ## INIT ############# # Get params, create logger self.params = params self.logger = Logger(self.params['logdir']) # Set random seeds seed = self.params['seed'] np.random.seed(seed) torch.manual_seed(seed) ptu.init_gpu( use_gpu=not self.params['no_gpu'], gpu_id=self.params['which_gpu'] ) ############# ## ENV ############# # Make the gym environment register_custom_envs() self.env = gym.make(self.params['env_name']) if 'env_wrappers' in self.params: # These operations are currently only for Atari envs # <<<<<<< HEAD # self.env = wrappers.Monitor(self.env, os.path.join(self.params['logdir'], "gym"), force=True) # # self.env.enabled = (self.params['video_log_freq'] > 0) # ======= self.env = wrappers.Monitor( self.env, os.path.join(self.params['logdir'], "gym"), force=True, video_callable=(None if self.params['video_log_freq'] > 0 else False), ) # >>>>>>> b5d34989d30c72b353acc2a64e691d17ddbea81f self.env = params['env_wrappers'](self.env) self.mean_episode_reward = -float('nan') self.best_mean_episode_reward = -float('inf') if 'non_atari_colab_env' in self.params and self.params['video_log_freq'] > 0: self.env = wrappers.Monitor( self.env, os.path.join(self.params['logdir'], "gym"), force=True, video_callable=(None if self.params['video_log_freq'] > 0 else False), ) self.mean_episode_reward = -float('nan') self.best_mean_episode_reward = -float('inf') self.env.seed(seed) # import plotting (locally if 'obstacles' env) if not(self.params['env_name']=='obstacles-cs285-v0'): import matplotlib matplotlib.use('Agg') # Maximum length for episodes self.params['ep_len'] = self.params['ep_len'] or self.env.spec.max_episode_steps global MAX_VIDEO_LEN MAX_VIDEO_LEN = self.params['ep_len'] # Is this env continuous, or self.discrete? discrete = isinstance(self.env.action_space, gym.spaces.Discrete) # Are the observations images? img = len(self.env.observation_space.shape) > 2 self.params['agent_params']['discrete'] = discrete # Observation and action sizes ob_dim = self.env.observation_space.shape if img else self.env.observation_space.shape[0] ac_dim = self.env.action_space.n if discrete else self.env.action_space.shape[0] self.params['agent_params']['ac_dim'] = ac_dim self.params['agent_params']['ob_dim'] = ob_dim # simulation timestep, will be used for video saving if 'model' in dir(self.env): self.fps = 1/self.env.model.opt.timestep elif 'env_wrappers' in self.params: self.fps = 30 # This is not actually used when using the Monitor wrapper elif 'video.frames_per_second' in self.env.env.metadata.keys(): self.fps = self.env.env.metadata['video.frames_per_second'] else: self.fps = 10 ############# ## AGENT ############# agent_class = self.params['agent_class'] if 'hparam' in self.params['exp_name']: print('changing optimizer') self.params['agent_params']['optimizer_spec'] = lander_optimizer(self.params['lr']) print('using lr = ', self.params['lr']) self.agent = agent_class(self.env, self.params['agent_params']) else: self.agent = agent_class(self.env, self.params['agent_params'])
def __init__(self, params): self.params = params self.params['agent_params']['gamma'] = self.params['gamma'] # Set random seeds seed = self.params['seed'] torch.manual_seed(seed) # Setup GPU ptu.init_gpu(use_gpu=not self.params['no_gpu'], gpu_id=self.params['which_gpu']) self.logger = Logger(self.params['logdir']) ############# ## ENV ############# groups = self.params['groups'] self.env_name = self.params['env'] if self.env_name == 'StarCraft2Env': from smac.env import StarCraft2Env from cs285.infrastructure.wrappers import SC2Wrapper self.env = SC2Wrapper(StarCraft2Env(map_name=self.params['env_map'],seed = seed),\ groups = groups) elif self.env_name == "Paticles": from multiagent.environment import MultiAgentEnv import multiagent.scenarios as scenarios from cs285.infrastructure.wrappers import ParticlesWrapper scenario = scenarios.load(scenario_name + ".py").Scenario() world = scenario.make_world() self.env = ParticlesWrapper(\ MultiAgentEnv(world, scenario.reset_world, scenario.reward, scenario.observation),\ groups = groups) elif self.env_name == "Test": if self.params['random_init_state']: init_state = [ random.randint(2, 9), random.randint(2, 9), random.randint(2, 9) ] else: init_state = self.params['init_state'] from cs285.infrastructure.wrappers import Test self.env = Test(groups, init_state=init_state, goal_state=self.params['goal_state']) ############# ## AGENT ############# self.agents = [] agent_critics = [] for g_idx in range(len(groups)): ob_dim = len(self.env.observation_space[g_idx]['obs']) ac_dim = len(self.env.action_space[g_idx]) avail_ac_dim = sum([ac.n for ac in self.env.action_space[g_idx]]) self.params['agent_params']['n_agents'] = groups[g_idx] self.params['agent_params']['actor']['avail_ac_dim'] = avail_ac_dim self.params['agent_params']['actor']['ac_dim'] = ac_dim self.params['agent_params']['actor']['ob_dim'] = ob_dim self.params['agent_params']['critic']['ob_dim'] = ob_dim self.params['agent_params']['critic']['gamma'] = self.params[ 'agent_params']['gamma'] agent = GACAgent(self.env, self.params['agent_params'], groups[g_idx]) self.agents.append(agent) agent_critics.append(agent.critic) self.centralized_mixer = BootstrappedSumCriticMixer( self.params['agent_params']['critic'], agent_critics)