def __init__(self): self.envs = gym.make('Walker2d-v2') self.envs.seed(args.seed) self.envpoch = 2048 self.inputsize = 17 self.actionsize = 6 self.net = PPOnet(self.inputsize, self.actionsize).double().to(device) self.optimizer = optim.Adam(self.net.parameters(), lr=args.lr, eps=args.eps) self.clip_param = 0.2 self.PPOepoch = args.ppoepoch self.gamma = args.gamma self.lam = args.gaelambda self.out_record = None self.trajectories = [] self.path_lsa = './csvfiles/lossa_lr' + str( args.lr) + '_ppoepoch' + str(args.ppoepoch) + '_gamma' + str( args.gamma) + '_gaelambda' + str(args.gaelambda) + '.csv' self.path_lsv = './csvfiles/lossv_lr' + str( args.lr) + '_ppoepoch' + str(args.ppoepoch) + '_gamma' + str( args.gamma) + '_gaelambda' + str(args.gaelambda) + '.csv' self.path_ep = './csvfiles/episode_lr' + str( args.lr) + '_ppoepoch' + str(args.ppoepoch) + '_gamma' + str( args.gamma) + '_gaelambda' + str(args.gaelambda) + '.csv' self.scaler = utils.Scaler(17)
def __init__(self): self.env = gym.make('Walker2d-v2') #self.env.seed(args.seed) self.envpoch = 2048 self.inputsize = 17 self.actionsize = 6 self.actor = Actor().double().to(device) self.actor_target = Actor().double().to(device) self.actor_optimizer = torch.optim.RMSprop(self.actor.parameters(), lr=1e-4) self.critic = Critic().double().to(device) self.critic_target = Critic().double().to(device) self.critic_optimizer = torch.optim.RMSprop(self.critic.parameters(), lr=1e-3) #self.random_noise = random_process.OrnsteinUhlenbeckActionNoise(self.actionsize) self.clip_param = 0.2 self.PPOepoch = args.ppoepoch self.gamma = args.gamma self.lam = args.gaelambda self.out_record = None self.trainstep = 0 self.trajectories = [] self.path_lsa = './csvfiles/lossa_lr.csv' self.path_lsv = './csvfiles/lossv_lr.csv' self.path_ep = './csvfiles/episode_lr.csv' self.scaler = utils.Scaler(17)
def __init__(self): self.env = gym.make('Walker2d-v2') self.envpoch = 1000 self.valuenet = valuenet().double().to(device) self.policynet = policynet().double().to(device) self.optimizer_value = optim.Adam(self.valuenet.parameters(), lr=7e-5, eps=1e-7) self.optimizer_policy = optim.Adam(self.policynet.parameters(), lr=7e-5, eps=1e-7) self.clip_param = 0.2 self.memory = store() self.gamma = 0.99 self.lam = 0.95 self.out_record = None self.trajectories = [] self.path_t7 = 'model_doublenet.t7' self.path_lsa = "loss_doublenet_a.csv" self.path_lsv = "loss_doublenet_v.csv" self.path_ep = "episode_doublenet.csv" self.scaler = utils.Scaler(18) if os.path.isfile(self.path_t7): self.net.load_state_dict( torch.load(self.path_t7, map_location='cpu'))
def __init__(self): self.env = gym.make('Walker2d-v2') self.envpoch = 1000 self.net = PPOnet().double().to(device) self.optimizer = optim.Adam(self.net.parameters(), lr=args.lr, eps=args.eps) self.clip_param = 0.2 self.PPOepoch = args.ppoepoch self.memory = store() self.gamma = 0.99 self.lam = 0.95 self.out_record = None self.trajectories = [] self.path_lsa = './csvfiles/lossa_lr' + str( args.lr) + '_ppoepoch' + str(args.ppoepoch) + '.csv' self.path_lsv = './csvfiles/lossv_lr' + str( args.lr) + '_ppoepoch' + str(args.ppoepoch) + '.csv' self.path_ep = './csvfiles/episode_lr' + str( args.lr) + '_ppoepoch' + str(args.ppoepoch) + '.csv' self.scaler = utils.Scaler(18)