def __init__(self, env_name, params): self.env = envs.make(env_name) self.params = params self.action_bound = self.env.action_bound[1] self.iterations = params["iterations"] self.mem_len = params["mem_len"] self.seed = params["seed"] self.render = params["render"] self.log_interval = params["log_interval"] self.warmup = params["warmup"] self.batch_size = params["batch_size"] self.save = params["save"] hidden_dim = params["hidden_dim"] state_dim = self.env.observation_space action_dim = self.env.action_space cuda = params["cuda"] network_settings = params["network_settings"] actor = utils.Actor(state_dim, hidden_dim, action_dim) target_actor = utils.Actor(state_dim, hidden_dim, action_dim) critic = utils.Critic(state_dim+action_dim, hidden_dim, 1) target_critic = utils.Critic(state_dim+action_dim, hidden_dim, 1) self.memory = utils.ReplayMemory(1000000) self.agent = sw.Sleepwalk(actor, critic, target_actor, target_critic, network_settings, GPU=cuda) self.noise = utils.OUNoise(action_dim) self.noise.set_seed(self.seed) self.memory = utils.ReplayMemory(self.mem_len) self.pol_opt = torch.optim.Adam(actor.parameters()) self.crit_opt = torch.optim.Adam(critic.parameters()) if cuda: self.Tensor = torch.cuda.FloatTensor else: self.Tensor = torch.Tensor if self.render: self.env.init_rendering() self.best = None # initialize experiment logging self.logging = params["logging"] if self.logging: self.directory = os.getcwd() filename = self.directory + "/data/qprop.csv" with open(filename, "w") as csvfile: self.writer = csv.writer(csvfile) self.writer.writerow(["episode", "reward"]) self.train() else: self.train()
def __init__(self, env_name, params): self.env = envs.make(env_name) self.params = params self.action_bound = self.env.action_bound[1] self.iterations = params["iterations"] self.seed = params["seed"] self.render = params["render"] self.log_interval = params["log_interval"] self.save = params["save"] self.cuda = params["cuda"] state_dim = self.env.observation_space action_dim = self.env.action_space hidden_dim = params["hidden_dim"] network_settings = params["network_settings"] pi = utils.Actor(state_dim, hidden_dim, action_dim) beta = utils.Actor(state_dim, hidden_dim, action_dim) critic = utils.Critic(state_dim, hidden_dim, 1) self.agent = fmis.FMIS(pi, beta, critic, self.env, network_settings, GPU=self.cuda) self.pi_optim = torch.optim.Adam(self.agent.parameters()) self.memory = fmis.ReplayMemory(1000000) if self.cuda: self.Tensor = torch.cuda.FloatTensor else: self.Tensor = torch.Tensor if self.render: self.env.init_rendering() self.best = None # use OU noise to explore and learn the model for n warmup episodes self.noise = utils.OUNoise(action_dim, mu=10) self.warmup = 5 # initialize experiment logging self.logging = params["logging"] if self.logging: self.directory = os.getcwd() filename = self.directory + "/data/fmis.csv" with open(filename, "w") as csvfile: self.writer = csv.writer(csvfile) self.writer.writerow(["episode", "reward"]) self.train() else: self.train()
def __init__(self, env_name, params): self.env = envs.make(env_name) self.params = params self.iterations = params["iterations"] self.seed = params["seed"] self.render = params["render"] self.log_interval = params["log_interval"] self.save = params["save"] self.action_bound = self.env.action_bound[1] state_dim = self.env.observation_space action_dim = self.env.action_space hidden_dim = params["hidden_dim"] cuda = params["cuda"] network_settings = params["network_settings"] pi = utils.Actor(state_dim, hidden_dim, action_dim) beta = utils.Actor(state_dim, hidden_dim, action_dim) critic = utils.Critic(state_dim, hidden_dim, 1) self.agent = offpac.OFFPAC(pi, beta, critic, network_settings, GPU=cuda) self.optim = torch.optim.Adam(self.agent.parameters()) if cuda: self.Tensor = torch.cuda.FloatTensor self.agent = self.agent.cuda() else: self.Tensor = torch.Tensor if self.render: self.env.init_rendering() self.best = None # initialize experiment logging self.logging = params["logging"] if self.logging: self.directory = os.getcwd() filename = self.directory + "/data/offpac.csv" with open(filename, "w") as csvfile: self.writer = csv.writer(csvfile) self.writer.writerow(["episode", "reward"]) self.train() else: self.train()
def __init__(self, env_name, params): # initialize environment self.env = envs.make(env_name) self.env_name = env_name # save important experiment parameters for the training loop self.iterations = params["iterations"] self.mem_len = params["mem_len"] self.seed = params["seed"] self.render = params["render"] self.log_interval = params["log_interval"] self.warmup = params["warmup"] self.batch_size = params["batch_size"] self.save = params["save"] # initialize DDPG agent using experiment parameters from config file self.action_bound = self.env.action_bound[1] state_dim = self.env.observation_space action_dim = self.env.action_space hidden_dim = params["hidden_dim"] cuda = params["cuda"] network_settings = params["network_settings"] actor = ddpg.Actor(state_dim, hidden_dim, action_dim) target_actor = ddpg.Actor(state_dim, hidden_dim, action_dim) critic = utils.Critic(state_dim + action_dim, hidden_dim, 1) target_critic = utils.Critic(state_dim + action_dim, hidden_dim, 1) self.agent = ddpg.DDPG(actor, target_actor, critic, target_critic, network_settings, GPU=cuda) # intitialize ornstein-uhlenbeck noise for random action exploration ou_scale = params["ou_scale"] ou_mu = params["ou_mu"] ou_sigma = params["ou_sigma"] self.noise = utils.OUNoise(action_dim, scale=ou_scale, mu=ou_mu, sigma=ou_sigma) self.noise.set_seed(self.seed) self.memory = utils.ReplayMemory(self.mem_len) self.pol_opt = torch.optim.Adam(actor.parameters()) self.crit_opt = torch.optim.Adam(critic.parameters()) # want to save the best policy self.best = None # send to GPU if flagged in experiment config file if cuda: self.Tensor = torch.cuda.FloatTensor self.agent = self.agent.cuda() else: self.Tensor = torch.Tensor if self.render: self.env.init_rendering() # initialize experiment logging. This wipes any previous file with the same name self.logging = params["logging"] if self.logging: self.directory = os.getcwd() filename = self.directory + "/data/ddpg.csv" with open(filename, "w") as csvfile: self.writer = csv.writer(csvfile) self.writer.writerow(["episode", "reward"]) self.train() else: self.train()
def __init__(self, env_name, params): # initialize environment self.__env = gym.make(env_name) self.__env_name = env_name # save important experiment parameters for the training loop self.__iterations = params["iterations"] self.__mem_len = params["mem_len"] self.__seed = params["seed"] self.__render = params["render"] self.__log_interval = params["log_interval"] self.__warmup = params["warmup"] self.__batch_size = params["batch_size"] self.__learning_updates = params["learning_updates"] self.__save = params["save"] # initialize DDPG agent using experiment parameters from config file state_dim = self.__env.observation_space.shape[0] action_dim = self.__env.action_space.shape[0] hidden_dim = params["hidden_dim"] cuda = params["cuda"] network_settings = params["network_settings"] actor = Actor(state_dim, hidden_dim, action_dim) target_actor = Actor(state_dim, hidden_dim, action_dim) critic = utils.Critic(state_dim + action_dim, hidden_dim, 1) target_critic = utils.Critic(state_dim + action_dim, hidden_dim, 1) self.__agent = DDPG(actor, target_actor, critic, target_critic, network_settings, GPU=cuda) # intitialize ornstein-uhlenbeck noise for random action exploration ou_scale = params["ou_scale"] ou_mu = params["ou_mu"] ou_sigma = params["ou_sigma"] self.__noise = utils.OUNoise(action_dim, scale=ou_scale, mu=ou_mu, sigma=ou_sigma) self.__noise.set_seed(self.__seed) self.__memory = ReplayMemory(self.__mem_len) self.__pol_opt = torch.optim.Adam(actor.parameters(), params["actor_lr"]) self.__crit_opt = torch.optim.Adam(critic.parameters(), params["critic_lr"]) # want to save the best policy self.__best = None # send to GPU if flagged in experiment config file if cuda: self.__Tensor = torch.cuda.FloatTensor self.__agent = self.__agent.cuda() else: self.__Tensor = torch.Tensor # initialize experiment logging. This wipes any previous file with the same name self.__logging = params["logging"] self.__directory = os.getcwd() if self.__logging: filename = self.__directory + "/data/ddpg-" + self.__env_name + ".csv" with open(filename, "w") as csvfile: self.__writer = csv.writer(csvfile) self.__writer.writerow(["episode", "reward"]) self._run_algo() else: self._run_algo()