Exemplo n.º 1
0
	def __init__(self,args):
		self.args = args
		self.device = torch.device('cuda') if args.cuda else torch.device('cpu')
		dummy_env = gym.make(self.args.env_name)
		self.actor = ACNet(dummy_env.action_space.n,args.feedforward)
		del dummy_env
		if args.load_dir is not None:
			actorState = torch.load(args.load_dir,map_location=lambda storage, loc: storage)
		if args.continue_training:
			self.actor.load_state_dict(actorState)
			print("Loaded pretrained model successfully")
		if args.transfer:
			self.actor.load_autoturn_model(actorState)
		if args.cuda:
			self.actor.cuda()
		self.actor_optimizer = optim.Adam(self.actor.parameters(),lr=self.args.lr)
		self.env_list = [make_env(self.args.env_name,self.args.seed,i) for i in range(self.args.num_processes)]
		if self.args.num_processes > 1:
			self.envs = gym_vecenv.SubprocVecEnv(self.env_list)
		else:
			self.envs = gym_vecenv.DummyVecEnv(self.env_list)
		if len(self.envs.observation_space.shape) == 1:
			self.envs = gym_vecenv.VecNormalize(self.envs)
		
		self.obs_shape = self.envs.observation_space.shape
		self.obs_shape = (self.obs_shape[0] * args.num_stack, *self.obs_shape[1:])
		self.state_shape = 1 if args.feedforward else 256
		self.rollouts = RolloutStorage(self.args.num_fwd_steps, self.args.num_processes, self.obs_shape, self.envs.action_space, self.state_shape)
		self.num_updates = int(args.num_frames)//args.num_fwd_steps//args.num_processes
		self.current_obs = torch.zeros(self.args.num_processes,*self.obs_shape)
		self.writer = SummaryWriter(log_dir=self.args.save_dir)
		self.fortress_threshold = 650
		self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.actor_optimizer,
											mode='max',factor=0.2,patience=15,verbose=True,threshold=1e-3,
											threshold_mode='rel')
Exemplo n.º 2
0
def make_parallel_envs(args):
    # make parallel environments
    envs = [make_env(args.env_name, args.seed, i, args.num_agents,
                     args.dist_threshold, args.arena_size, args.identity_size) for i in range(args.num_processes)]
    if args.num_processes > 1:
        envs = gym_vecenv.SubprocVecEnv(envs)
    else:
        envs = gym_vecenv.DummyVecEnv(envs)

    envs = gym_vecenv.MultiAgentVecNormalize(envs, ob=False, ret=True)
    return envs
Exemplo n.º 3
0
        #parser.error("No env_name provided.")
        env_name = "CartPole-v0"
    save_dir = args.save_dir if args.save_dir else "model/"
    load_dir = args.load_dir
    num_runs = int(args.num_runs) if args.num_runs else 1

    size = 5000
    epochs = 200
    learning_rate = 1e-2
    opt = tf.optimizers.Adam(learning_rate)
    γ = .99
    λ = 0.97
    num_env = 5

    env = gym.make(env_name)
    env = gym_vecenv.DummyVecEnv([lambda: gym.make(env_name)] * num_env)

    obs_spc = env.observation_space
    act_spc = env.action_space

    for x in range(num_runs):
        wandb.init(project='ppo', entity='rlexp', reinit=True)
        wandb.config.env = env_name
        wandb.config.epochs = epochs
        wandb.config.size = size
        wandb.config.lam = λ
        wandb.config.gamma = γ

        # policy/actor model
        model = tf.keras.models.Sequential([
            tf.keras.layers.Dense(64,
Exemplo n.º 4
0
                    action='store_true',
                    default=False,
                    help='deterministic flag')
parser.add_argument('--feedforward',
                    action='store_true',
                    default=False,
                    help='use feedforward instead of recurrent architecture')
args = parser.parse_args()

torch.set_num_threads(1)
if args.env_name == 'autoturn':
    args.env = 'SpaceFortress-testautoturn-image-v0'
if args.env_name == 'youturn':
    args.env = 'SpaceFortress-testyouturn-image-v0'
env = make_env(args.env, args.seed, 0)
env = gym_vecenv.DummyVecEnv([env])
action_dim = env.action_space.n

s1 = 'SpaceFortress-' + args.env_name + '-image-v0_'
s2 = '_ppo_actor.pth.tar'
save_files = os.listdir(args.load_dir)
actorNet = ACNet(action_dim, args.feedforward)
result = {}

render_func = env.envs[0].render
obs_shape = env.observation_space.shape
obs_shape = (obs_shape[0] * args.num_stack, *obs_shape[1:])
current_obs = torch.zeros(1, *obs_shape)
masks = torch.zeros(1, 1)
state_size = 1 if args.feedforward else 256
states = torch.zeros(1, state_size)