Exemplo n.º 1
0
    def __init__(self,
                 sess,
                 dis=0.99,
                 REPLAY_MEMORY=100000,
                 batch_size=64,
                 max_steps=10000000,
                 max_episodes=500000,
                 layer_size_Q1=300,
                 layer_size_Q2=400,
                 learning_rate_Q=0.0001,
                 learning_rate_A=0.01,
                 training_step=1,
                 copy_step=1,
                 action_copy_step=1,
                 repu_num=1,
                 beta_max_step=1500000,
                 action_res=None,
                 random_action=True,
                 weighted_train=False,
                 ending_cond_epis=100,
                 ending_cond_reward=195,
                 alpha=0.6,
                 beta_init=0.4,
                 eps=0.01,
                 eps_div=256,
                 s_scale=1,
                 size_action_batch=500,
                 Double=True,
                 Prioritized=True,
                 Exp='softmax',
                 DDPG=False,
                 action_train=False,
                 seed_n=0,
                 Game='CartPole-v0',
                 file_name='steps',
                 save_epi=100,
                 save_network=False):

        env = gym.make(Game)
        tf.set_random_seed(seed_n)
        rng = np.random.RandomState(seed_n)

        input_size = env.observation_space.shape[0]
        self.action_dim = env.action_space.shape[0]
        output_size = self.action_dim

        action_map = []
        action_scale = env.action_space.high[0]
        """
        for o in range(self.action_dim):
            print(env.action_space.low[o])
            print(env.action_space.high[o])
        """
        # DDPG 시, Action Network 생성
        if DDPG:
            self.Action_Network = Action_Network(sess,
                                                 input_size,
                                                 output_size,
                                                 seed_n,
                                                 layer_size_Q1,
                                                 layer_size_Q2,
                                                 learning_rate_A,
                                                 action_scale=action_scale)
        # Action이 랜덤하지 않을 시, get_action_information을 사용해 균일하게 action 범위를 자름
        if not random_action:
            size_action_batch, conti_action_flag, action_map = self.get_action_information(
                env, Game, action_res=action_res)

        replay_memory = replaymemory.ReplayMemory(rng=rng,
                                                  memory_size=REPLAY_MEMORY,
                                                  per_alpha=alpha,
                                                  per_beta0=beta_init)

        ############### parameter 복사 ################
        self.sess = sess
        self.dis = dis
        self.REPLAY_MEMORY = REPLAY_MEMORY
        self.replay_memory_size = REPLAY_MEMORY
        self.replay_memory = replay_memory
        self.batch_size = batch_size
        self.size_action_batch = size_action_batch

        self.alpha = alpha
        self.beta_init = beta_init
        self.beta_max_step = beta_max_step
        self.eps = eps
        self.eps_div = eps_div
        self.s_scale = s_scale
        self.action_scale = action_scale

        self.layer_size_Q1 = layer_size_Q1
        self.layer_size_Q2 = layer_size_Q2
        self.learning_rate_Q = learning_rate_Q
        self.learning_rate_A = learning_rate_A

        self.training_step = training_step
        self.copy_step = copy_step
        self.action_copy_step = action_copy_step
        self.action_train = action_train
        self.weighted_train = weighted_train
        self.repu_num = repu_num

        self.Double = Double
        self.Prioritized = Prioritized
        self.Exp = Exp
        self.DDPG = DDPG

        self.seed_n = seed_n
        self.Game = Game
        self.save_epi = save_epi
        self.max_episodes = max_episodes
        self.max_steps = max_steps
        self.env = env
        self.file_name = file_name
        self.save_network = save_network
        self.random_action = random_action

        self.input_size = input_size
        self.output_size = output_size
        self.ending_cond_epis = ending_cond_epis
        self.ending_cond_reward = ending_cond_reward
        #################################################

        self.Q_Network = Q_Network(sess, input_size, output_size,
                                   size_action_batch, batch_size, seed_n,
                                   layer_size_Q1, layer_size_Q2,
                                   learning_rate_Q, learning_rate_A,
                                   action_map, random_action, action_scale)

        # run_DQN 실행
        self.run_DQN(seed_n=seed_n,
                     Exp=Exp,
                     Double=Double,
                     Prioritized=Prioritized)
Exemplo n.º 2
0
    def __init__(self,
                 sess,
                 dis=0.99,
                 REPLAY_MEMORY=10000,
                 batch_size=64,
                 max_episodes=500,
                 layer_size_Q1=64,
                 layer_size_Q2=64,
                 learning_rate_Q=0.0001,
                 training_step=1,
                 copy_step=1,
                 repu_num=1,
                 action_res=None,
                 ending_cond_epis=100,
                 ending_cond_reward=195,
                 alpha=0.6,
                 beta_init=0.4,
                 eps=0.01,
                 eps_div=256,
                 s_scale=1,
                 Double=True,
                 Dueling=True,
                 Prioritized=True,
                 Exp='softmax',
                 seed_n=0,
                 Game='CartPole-v0',
                 file_name='steps',
                 case_n=0,
                 save_epi=100):

        env = gym.make(Game)
        tf.set_random_seed(seed_n)
        rng = np.random.RandomState(seed_n)

        input_size = env.observation_space.shape[0]
        self.action_dim = env.action_space.shape[0]
        #self.action_dim = 1
        output_size, conti_action_flag, action_map = self.get_action_information(
            env, Game, action_res=action_res)

        replay_memory = replaymemory.ReplayMemory(rng=rng,
                                                  memory_size=REPLAY_MEMORY,
                                                  per_alpha=alpha,
                                                  per_beta0=beta_init)

        self.sess = sess
        self.dis = dis
        self.REPLAY_MEMORY = REPLAY_MEMORY
        self.replay_memory_size = REPLAY_MEMORY
        self.replay_memory = replay_memory
        self.batch_size = batch_size

        self.alpha = alpha
        self.beta_init = beta_init
        self.eps = eps
        self.eps_div = eps_div
        self.s_scale = s_scale

        self.layer_size_Q1 = layer_size_Q1
        self.layer_size_Q2 = layer_size_Q2
        self.learning_rate_Q = learning_rate_Q

        self.training_step = training_step
        self.copy_step = copy_step
        self.repu_num = repu_num

        self.Double = Double
        self.Dueling = Dueling
        self.Prioritized = Prioritized
        self.Exp = Exp

        self.seed_n = seed_n
        self.Game = Game
        self.save_epi = save_epi
        self.max_episodes = max_episodes
        self.env = env
        self.file_name = file_name

        self.input_size = input_size
        self.output_size = output_size

        self.conti_action_flag = conti_action_flag
        self.action_map = action_map

        if Dueling:
            self.Q_Network = Q_Network_D(sess, input_size, output_size, seed_n,
                                         layer_size_Q1, layer_size_Q2,
                                         learning_rate_Q)
        else:
            self.Q_Network = Q_Network(sess, input_size, output_size, seed_n,
                                       layer_size_Q1, layer_size_Q2,
                                       learning_rate_Q)

        self.ending_cond_epis = ending_cond_epis
        self.ending_cond_reward = ending_cond_reward

        self.run_DQN(case_n=case_n,
                     seed_n=seed_n,
                     Exp=Exp,
                     Double=Double,
                     Dueling=Dueling,
                     Prioritized=Prioritized)
Exemplo n.º 3
0
    def __init__(self,
                 dis=0.99,
                 REPLAY_MEMORY=100000,
                 batch_size=64,
                 max_steps=10000000,
                 max_episodes=500000,
                 layer_size_Q1=300,
                 layer_size_Q2=400,
                 learning_rate_Q=0.0001,
                 learning_rate_A=0.01,
                 training_step=1,
                 copy_step=1,
                 action_copy_step=1,
                 repu_num=1,
                 beta_max_step=1500000,
                 ending_cond_epis=100,
                 ending_cond_reward=195,
                 min_distance=0.1,
                 alpha=0.6,
                 beta_init=0.4,
                 eps=0.01,
                 scale=1,
                 size_action_batch=500,
                 seed_n=0,
                 Game='CartPole-v0',
                 file_name='steps',
                 save_epi=100,
                 save_network=False):

        env = gym.make(Game)
        rng = np.random.RandomState(seed_n)

        input_size = env.observation_space.shape[0]
        self.action_dim = env.action_space.shape[0]
        output_size = self.action_dim

        action_map = []
        action_scale = env.action_space.high[0]
        """
        for o in range(self.action_dim):
            print(env.action_space.low[o])
            print(env.action_space.high[o])
        """
        replay_memory = replaymemory.ReplayMemory(rng=rng,
                                                  memory_size=REPLAY_MEMORY,
                                                  per_alpha=alpha,
                                                  per_beta0=beta_init)

        ############### parameter 복사 ################
        self.dis = dis
        self.REPLAY_MEMORY = REPLAY_MEMORY
        self.replay_memory_size = REPLAY_MEMORY
        self.replay_memory = replay_memory
        self.batch_size = batch_size
        self.size_action_batch = size_action_batch

        self.alpha = alpha
        self.beta_init = beta_init
        self.beta_max_step = beta_max_step
        self.min_distance = min_distance
        self.eps = eps
        self.scale = scale
        self.action_scale = action_scale

        self.layer_size_Q1 = layer_size_Q1
        self.layer_size_Q2 = layer_size_Q2
        self.learning_rate_Q = learning_rate_Q
        self.learning_rate_A = learning_rate_A

        self.training_step = training_step
        self.copy_step = copy_step
        self.action_copy_step = action_copy_step
        self.repu_num = repu_num

        self.seed_n = seed_n
        self.Game = Game
        self.save_epi = save_epi
        self.max_episodes = max_episodes
        self.max_steps = max_steps
        self.env = env
        self.file_name = file_name
        self.save_network = save_network

        self.input_size = input_size
        self.output_size = output_size
        self.ending_cond_epis = ending_cond_epis
        self.ending_cond_reward = ending_cond_reward
        #################################################

        self.Q_Network = Q_Network(seed_n, input_size, output_size,
                                   size_action_batch, action_scale, batch_size,
                                   layer_size_Q1, layer_size_Q2,
                                   learning_rate_Q, learning_rate_A)

        # run_DQN 실행
        self.run_DQN(seed_n=seed_n)