def __init__(self, env_name, params):
        self.env = envs.make(env_name)
        self.params = params
        self.action_bound = self.env.action_bound[1]
        
        self.iterations = params["iterations"]
        self.mem_len = params["mem_len"]
        self.seed = params["seed"]
        self.render = params["render"]
        self.log_interval = params["log_interval"]
        self.warmup = params["warmup"]
        self.batch_size = params["batch_size"]
        self.save = params["save"]

        hidden_dim = params["hidden_dim"]
        state_dim = self.env.observation_space
        action_dim = self.env.action_space
        cuda = params["cuda"]
        network_settings = params["network_settings"]

        actor = utils.Actor(state_dim, hidden_dim, action_dim)
        target_actor = utils.Actor(state_dim, hidden_dim, action_dim)
        critic = utils.Critic(state_dim+action_dim, hidden_dim, 1)
        target_critic = utils.Critic(state_dim+action_dim, hidden_dim, 1)
        self.memory = utils.ReplayMemory(1000000)
        self.agent = sw.Sleepwalk(actor, 
                                critic,
                                target_actor, 
                                target_critic,
                                network_settings,
                                GPU=cuda)

        self.noise = utils.OUNoise(action_dim)
        self.noise.set_seed(self.seed)
        self.memory = utils.ReplayMemory(self.mem_len)

        self.pol_opt = torch.optim.Adam(actor.parameters())
        self.crit_opt = torch.optim.Adam(critic.parameters())

        if cuda:
            self.Tensor = torch.cuda.FloatTensor
        else:
            self.Tensor = torch.Tensor
        
        if self.render:
            self.env.init_rendering()
        
        self.best = None

        # initialize experiment logging
        self.logging = params["logging"]
        if self.logging:
            self.directory = os.getcwd()
            filename = self.directory + "/data/qprop.csv"
            with open(filename, "w") as csvfile:
                self.writer = csv.writer(csvfile)
                self.writer.writerow(["episode", "reward"])
                self.train()
        else:
            self.train()
示例#2
0
    def __init__(self, env_name, params):
        self.env = envs.make(env_name)
        self.params = params
        self.action_bound = self.env.action_bound[1]

        self.iterations = params["iterations"]
        self.seed = params["seed"]
        self.render = params["render"]
        self.log_interval = params["log_interval"]
        self.save = params["save"]

        self.cuda = params["cuda"]
        state_dim = self.env.observation_space
        action_dim = self.env.action_space
        hidden_dim = params["hidden_dim"]
        network_settings = params["network_settings"]

        pi = utils.Actor(state_dim, hidden_dim, action_dim)
        beta = utils.Actor(state_dim, hidden_dim, action_dim)
        critic = utils.Critic(state_dim, hidden_dim, 1)
        self.agent = fmis.FMIS(pi,
                               beta,
                               critic,
                               self.env,
                               network_settings,
                               GPU=self.cuda)

        self.pi_optim = torch.optim.Adam(self.agent.parameters())

        self.memory = fmis.ReplayMemory(1000000)

        if self.cuda:
            self.Tensor = torch.cuda.FloatTensor
        else:
            self.Tensor = torch.Tensor

        if self.render:
            self.env.init_rendering()

        self.best = None

        # use OU noise to explore and learn the model for n warmup episodes
        self.noise = utils.OUNoise(action_dim, mu=10)
        self.warmup = 5

        # initialize experiment logging
        self.logging = params["logging"]
        if self.logging:
            self.directory = os.getcwd()
            filename = self.directory + "/data/fmis.csv"
            with open(filename, "w") as csvfile:
                self.writer = csv.writer(csvfile)
                self.writer.writerow(["episode", "reward"])
                self.train()
        else:
            self.train()
    def __init__(self, env_name, params):
        self.env = envs.make(env_name)
        self.params = params

        self.iterations = params["iterations"]
        self.seed = params["seed"]
        self.render = params["render"]
        self.log_interval = params["log_interval"]
        self.save = params["save"]

        self.action_bound = self.env.action_bound[1]
        state_dim = self.env.observation_space
        action_dim = self.env.action_space
        hidden_dim = params["hidden_dim"]
        cuda = params["cuda"]
        network_settings = params["network_settings"]
        pi = utils.Actor(state_dim, hidden_dim, action_dim)
        beta = utils.Actor(state_dim, hidden_dim, action_dim)
        critic = utils.Critic(state_dim, hidden_dim, 1)
        self.agent = offpac.OFFPAC(pi,
                                   beta,
                                   critic,
                                   network_settings,
                                   GPU=cuda)
        self.optim = torch.optim.Adam(self.agent.parameters())

        if cuda:
            self.Tensor = torch.cuda.FloatTensor
            self.agent = self.agent.cuda()
        else:
            self.Tensor = torch.Tensor

        if self.render:
            self.env.init_rendering()

        self.best = None

        # initialize experiment logging
        self.logging = params["logging"]
        if self.logging:
            self.directory = os.getcwd()
            filename = self.directory + "/data/offpac.csv"
            with open(filename, "w") as csvfile:
                self.writer = csv.writer(csvfile)
                self.writer.writerow(["episode", "reward"])
                self.train()
        else:
            self.train()
    def __init__(self, env_name, params):
        # initialize environment
        self.env = envs.make(env_name)
        self.env_name = env_name

        # save important experiment parameters for the training loop
        self.iterations = params["iterations"]
        self.mem_len = params["mem_len"]
        self.seed = params["seed"]
        self.render = params["render"]
        self.log_interval = params["log_interval"]
        self.warmup = params["warmup"]
        self.batch_size = params["batch_size"]
        self.save = params["save"]

        # initialize DDPG agent using experiment parameters from config file
        self.action_bound = self.env.action_bound[1]
        state_dim = self.env.observation_space
        action_dim = self.env.action_space
        hidden_dim = params["hidden_dim"]
        cuda = params["cuda"]
        network_settings = params["network_settings"]
        actor = ddpg.Actor(state_dim, hidden_dim, action_dim)
        target_actor = ddpg.Actor(state_dim, hidden_dim, action_dim)
        critic = utils.Critic(state_dim + action_dim, hidden_dim, 1)
        target_critic = utils.Critic(state_dim + action_dim, hidden_dim, 1)
        self.agent = ddpg.DDPG(actor,
                               target_actor,
                               critic,
                               target_critic,
                               network_settings,
                               GPU=cuda)

        # intitialize ornstein-uhlenbeck noise for random action exploration
        ou_scale = params["ou_scale"]
        ou_mu = params["ou_mu"]
        ou_sigma = params["ou_sigma"]
        self.noise = utils.OUNoise(action_dim,
                                   scale=ou_scale,
                                   mu=ou_mu,
                                   sigma=ou_sigma)
        self.noise.set_seed(self.seed)
        self.memory = utils.ReplayMemory(self.mem_len)

        self.pol_opt = torch.optim.Adam(actor.parameters())
        self.crit_opt = torch.optim.Adam(critic.parameters())

        # want to save the best policy
        self.best = None

        # send to GPU if flagged in experiment config file
        if cuda:
            self.Tensor = torch.cuda.FloatTensor
            self.agent = self.agent.cuda()
        else:
            self.Tensor = torch.Tensor

        if self.render:
            self.env.init_rendering()

        # initialize experiment logging. This wipes any previous file with the same name
        self.logging = params["logging"]
        if self.logging:
            self.directory = os.getcwd()
            filename = self.directory + "/data/ddpg.csv"
            with open(filename, "w") as csvfile:
                self.writer = csv.writer(csvfile)
                self.writer.writerow(["episode", "reward"])
                self.train()
        else:
            self.train()
示例#5
0
    def __init__(self, env_name, params):
        # initialize environment
        self.__env = gym.make(env_name)
        self.__env_name = env_name

        # save important experiment parameters for the training loop
        self.__iterations = params["iterations"]
        self.__mem_len = params["mem_len"]
        self.__seed = params["seed"]
        self.__render = params["render"]
        self.__log_interval = params["log_interval"]
        self.__warmup = params["warmup"]
        self.__batch_size = params["batch_size"]
        self.__learning_updates = params["learning_updates"]
        self.__save = params["save"]

        # initialize DDPG agent using experiment parameters from config file
        state_dim = self.__env.observation_space.shape[0]
        action_dim = self.__env.action_space.shape[0]
        hidden_dim = params["hidden_dim"]
        cuda = params["cuda"]
        network_settings = params["network_settings"]
        actor = Actor(state_dim, hidden_dim, action_dim)
        target_actor = Actor(state_dim, hidden_dim, action_dim)
        critic = utils.Critic(state_dim + action_dim, hidden_dim, 1)
        target_critic = utils.Critic(state_dim + action_dim, hidden_dim, 1)
        self.__agent = DDPG(actor,
                            target_actor,
                            critic,
                            target_critic,
                            network_settings,
                            GPU=cuda)

        # intitialize ornstein-uhlenbeck noise for random action exploration
        ou_scale = params["ou_scale"]
        ou_mu = params["ou_mu"]
        ou_sigma = params["ou_sigma"]
        self.__noise = utils.OUNoise(action_dim,
                                     scale=ou_scale,
                                     mu=ou_mu,
                                     sigma=ou_sigma)
        self.__noise.set_seed(self.__seed)
        self.__memory = ReplayMemory(self.__mem_len)
        self.__pol_opt = torch.optim.Adam(actor.parameters(),
                                          params["actor_lr"])
        self.__crit_opt = torch.optim.Adam(critic.parameters(),
                                           params["critic_lr"])

        # want to save the best policy
        self.__best = None

        # send to GPU if flagged in experiment config file
        if cuda:
            self.__Tensor = torch.cuda.FloatTensor
            self.__agent = self.__agent.cuda()
        else:
            self.__Tensor = torch.Tensor

        # initialize experiment logging. This wipes any previous file with the same name
        self.__logging = params["logging"]
        self.__directory = os.getcwd()
        if self.__logging:
            filename = self.__directory + "/data/ddpg-" + self.__env_name + ".csv"
            with open(filename, "w") as csvfile:
                self.__writer = csv.writer(csvfile)
                self.__writer.writerow(["episode", "reward"])
                self._run_algo()
        else:
            self._run_algo()