示例#1
0
    def __init__(self, env, is_batch_norm=False, is_grad_inverter=True):
        super().__init__(env)
        assert isinstance(env.action_space,
                          Box), "action space must be continuous"
        if is_batch_norm:
            self.critic_net = CriticNet_bn(self.observation_space_size,
                                           self.action_space_size)
            self.actor_net = ActorNet_bn(self.observation_space_size,
                                         self.action_space_size)

        else:
            self.critic_net = CriticNet(self.observation_space_size,
                                        self.action_space_size)
            self.actor_net = ActorNet(self.observation_space_size,
                                      self.action_space_size)

        self.is_grad_inverter = is_grad_inverter
        self.replay_memory = deque()

        self.time_step = 0

        action_max = np.array(self.high).tolist()
        action_min = np.array(self.low).tolist()
        action_bounds = [action_max, action_min]
        self.grad_inv = grad_inverter(action_bounds)
示例#2
0
 def __init__(self,env):
     self.env = env 
     self.num_states = env.observation_space.shape[0]
     self.num_actions = env.action_space.shape[0]
     
     #Initialize Actor Network:
     action_bound = env.action_space.high
     self.critic_net = CriticNet(self.num_states, self.num_actions) #self.actor_net is an object
     self.actor_net = ActorNet(self.num_states, self.num_actions, action_bound)
     
     #Initialize Buffer Network:
     self.replay_memory = deque()
     
     #Intialize time step:
     self.time_step = 0
     
     #invert gradients (softthresholding)
     action_bounds = [[3], [-3]] #specify upper bound and lower bound of action space
     #action_bound structure for higher dimension actions[
     #[max_of_action_dim_0, max_of_action_dim_1, ..., max_of_action_dim_10], 
     #[min_of_action_dim_0, min_of_action_dim_1, ..., min_of_action_dim_10] 
     #]
     
     
     self.grad_inv = grad_inverter(action_bounds)
示例#3
0
 def __init__(self,env, is_batch_norm):
     self.env = env 
     self.num_states = 59
     self.num_actions = 3
     
     
     if is_batch_norm:
         self.critic_net = CriticNet_bn(self.num_states, self.num_actions) 
         self.actor_net = ActorNet_bn(self.num_states, self.num_actions)
         
     else:
         self.critic_net = CriticNet(self.num_states, self.num_actions) 
         self.actor_net = ActorNet(self.num_states, self.num_actions)
     
     #Initialize Buffer Network:
     self.replay_memory = deque()
     
     #Intialize time step:
     self.time_step = 0
     self.counter = 0
     
     action_max = [1.0, 1.0, 1.0]
     action_min = [-1.0, -1.0, -1.0]      
     action_bounds = [action_max,action_min] 
     self.grad_inv = grad_inverter(action_bounds)
示例#4
0
    def restore(self, path):
        print("restoring the agent")

        file = os.path.join(
            path, "agent_data.pkl")  # TODO -- come up with a not stupid name

        with open(file, "rb") as f:
            dump = pickle.load(f)

            i_vars = vars(dump)
            keys = i_vars.keys()
            for key in keys:
                tmp = getattr(dump, key)
                setattr(self, key, tmp)

        action_max = np.array(self.high).tolist()
        action_min = np.array(self.low).tolist()
        action_bounds = [action_max, action_min]
        self.grad_inv = grad_inverter(action_bounds)

        # Now replace the networks
        # IGNORE THE "IS BATCH " CONDITION FOR NOW
        saved_critic_net = CriticNet(self.observation_space_size,
                                     self.action_space_size)
        saved_actor_net = ActorNet(self.observation_space_size,
                                   self.action_space_size)

        # Load in the saved graphs
        critic_file = os.path.join(path, "critic_net.ckpt")
        saved_critic_net.restore(critic_file)
        actor_file = os.path.join(path, "actor_net.ckpt")
        saved_actor_net.restore(actor_file)

        self.critic_net = saved_critic_net
        self.actor_net = saved_actor_net
示例#5
0
 def __init__(self,env, is_batch_norm=False):
     self.env = env 
     self.num_states = env.observation_space.shape[0]
     self.num_actions = env.action_space.shape[0]
     
     
     if is_batch_norm:
         self.critic_net = CriticNet_bn(self.num_states, self.num_actions) 
         self.actor_net = ActorNet_bn(self.num_states, self.num_actions)
         
     else:
         self.critic_net = CriticNet(self.num_states, self.num_actions) 
         self.actor_net = ActorNet(self.num_states, self.num_actions)
     
     #Initialize Buffer Network:
     self.replay_memory = deque()
     
     #Intialize time step:
     self.time_step = 0
     self.counter = 0
     
     action_max = np.array(env.action_space.high).tolist()
     action_min = np.array(env.action_space.low).tolist()        
     action_bounds = [action_max,action_min] 
     self.grad_inv = grad_inverter(action_bounds)
示例#6
0
    def __init__(self, num_states, num_actions, is_batch_norm):
        self.num_states = num_states
        self.num_actions = num_actions

        if is_batch_norm:
            self.critic_net = CriticNet_bn(self.num_states, self.num_actions)
            self.actor_net = ActorNet_bn(self.num_states, self.num_actions)

        else:
            self.critic_net = CriticNet(self.num_states, self.num_actions)
            self.actor_net = ActorNet(self.num_states, self.num_actions)

        #Initialize Buffer Network:
        self.replay_memory = deque()

        #Intialize time step:
        self.time_step = 0
        self.counter = 0

        action_max = 5 * np.ones((1, num_actions))
        action_max = action_max.flatten()
        action_max = action_max.tolist()
        action_min = 0 * np.ones((1, num_actions))
        action_min = action_min.flatten()
        action_min = action_min.tolist()
        action_bounds = [action_max, action_min]
        self.grad_inv = grad_inverter(action_bounds)
示例#7
0
 def __init__(self,env, is_batch_norm):
     self.env = env 
     self.num_states = env.observation_space.shape[0]
     self.num_actions = env.action_space.shape[0]
     
     
     if is_batch_norm:
         self.critic_net = CriticNet_bn(self.num_states, self.num_actions) 
         self.actor_net = ActorNet_bn(self.num_states, self.num_actions)
         
     else:
         self.critic_net = CriticNet(self.num_states, self.num_actions) 
         self.actor_net = ActorNet(self.num_states, self.num_actions)
     
     #Initialize Buffer Network:
     self.replay_memory = deque()
     
     #Intialize time step:
     self.time_step = 0
     self.counter = 0
     
     action_max = np.array(env.action_space.high).tolist()
     action_min = np.array(env.action_space.low).tolist()        
     action_bounds = [action_max,action_min] 
     self.grad_inv = grad_inverter(action_bounds)
示例#8
0
文件: ddpg.py 项目: zxsted/rnnDPG
    def __init__(self, env, is_batch_norm):
        self.env = env
        self.num_states = env.observation_space.shape[0] - 1
        self.num_actions = env.action_space.shape[0]
        self.num_hidden_states = 5

        if is_batch_norm:
            self.critic_net = CriticNet(self.num_states, sself.num_actions)
            self.actor_net = ActorNet(self.num_states, self.num_actions)
        else:
            self.critic_net = CriticNet(self.num_states, self.num_actions)
            self.actor_net = ActorNet(self.num_states, self.num_actions)

        #因为是连续的,刚开始要确定action是多大,范围是多少。
        action_max = np.array(env.action_space.high).tolist()
        action_min = np.array(env.action_space.low).tolist()
        action_bounds = [action_max, action_min]

        #初始化一个东西,计算Q的梯度用的
        self.grad_inv = grad_inverter(action_bounds)
        self.replay_memory = deque()
示例#9
0
文件: agent.py 项目: yejunhong1/DRAG
    def __init__( self, hisar_size, ar_size, action_size, TAU = 0.001, is_batch_norm = 0, write_sum = 0, net_size_scale=1, max_load=1, beta0=beta):
        self.hisar_size  = hisar_size
        self.load_size   = action_size + 1
        self.ar_size     = ar_size
        self.state_size  = action_size * 2
        self.action_size = action_size
        self.ar_action_size = ar_size + action_size

        #print("net_size_scale: "+str(net_size_scale))
        if is_batch_norm:
            if len(CN_N_HIDDENS)==2:
                self.critic_net   = CriticNet_bn(  self.state_size, self.action_size, TAU, write_sum, net_size_scale  )
            else:
                self.critic_net   = CriticNet_bn_3(  self.state_size, self.action_size, TAU, write_sum, net_size_scale  )
            self.actor_net    = ActorNet_bn(   self.state_size, self.action_size, TAU, write_sum, net_size_scale  )
            self.ar_pred_net  = ARPredNet_bn(  self.hisar_size, self.ar_size,     write_sum, net_size_scale )           # arrival rate prediction network
            self.load_map_net = LoadMapNet_bn( self.ar_size,    self.action_size, self.load_size, write_sum, net_size_scale )           # load mapping network
        else:
            self.critic_net   = CriticNet(  self.state_size, self.action_size, TAU, write_sum, net_size_scale )
            self.actor_net    = ActorNet(   self.state_size, self.action_size, TAU, write_sum, net_size_scale )
            self.ar_pred_net  = ARPredNet(  self.hisar_size, self.ar_size,     write_sum, net_size_scale )           # arrival rate prediction network
            self.load_map_net = LoadMapNet( self.ar_size,    self.action_size, self.load_size, write_sum, net_size_scale )           # load mapping network

        self.env = ENV( action_size, max_load=max_load, beta0=beta0 )

        #self.k_nearest_neighbors = int(max_actions * k_ratio )
        #Initialize Network Buffers:
        self.replay_memory_ac  = deque()
        self.replay_memory_arp = deque()
        self.replay_memory_lm  = deque()

        #Intialize time step:
        self.time_step = 0
        self.counter   = 0
        
        action_max    = np.ones(  ( self.action_size ) ).tolist()
        action_min    = np.zeros( ( self.action_size ) ).tolist()
        action_bounds = [action_max, action_min] 
        self.grad_inv = grad_inverter( action_bounds )
示例#10
0
    def __init__(self, num_states, num_actions, action_space_high,
                 action_space_low, is_batch_norm):

        self.num_states = num_states
        self.num_actions = num_actions
        self.action_space_high = action_space_high
        self.action_space_low = action_space_low

        # Batch normalisation disabled.
        self.critic_net = CriticNet(self.num_states, self.num_actions)
        self.actor_net = ActorNet(self.num_states, self.num_actions)

        # Replay Memory 초기화
        self.replay_memory = deque()

        # time 초기화
        self.time_step = 0
        self.counter = 0

        action_max = np.array(action_space_high).tolist()
        action_min = np.array(action_space_low).tolist()
        action_bounds = [action_max, action_min]
        self.grad_inv = grad_inverter(action_bounds)
示例#11
0
    def run_DQN(self, seed_n, Exp, Double, Prioritized):
        ############## parameter 복사 ##############
        sess = self.sess
        dis = self.dis
        REPLAY_MEMORY = self.REPLAY_MEMORY
        replay_memory = self.replay_memory
        batch_size = self.batch_size
        size_action_batch = self.size_action_batch

        Game = self.Game
        save_epi = self.save_epi
        save_network = self.save_network
        max_episodes = self.max_episodes
        max_steps = self.max_steps
        env = self.env
        random_action = self.random_action

        input_size = self.input_size
        output_size = self.output_size

        alpha = self.alpha
        beta_init = self.beta_init
        beta_max_step = self.beta_max_step
        eps = self.eps
        eps_div = self.eps_div
        s_scale = self.s_scale

        training_step = self.training_step
        copy_step = self.copy_step
        action_copy_step = self.action_copy_step
        action_train = self.action_train
        weighted_train = self.weighted_train
        repu_num = self.repu_num

        DDPG = self.DDPG

        ending_cond_epis = self.ending_cond_epis
        ending_cond_reward = self.ending_cond_reward

        env.seed(seed_n)
        np.random.seed(seed_n)
        tf.set_random_seed(seed_n)
        random.seed(seed_n)
        #############################################

        Q_Network = self.Q_Network
        A_batch = Q_Network.get_action_batch()
        if DDPG:
            Action_Network = self.Action_Network

        # DDPG Action Network 학습 시 사용되는 grad_inv 설정
        action_max = np.array(env.action_space.high).tolist()
        action_min = np.array(env.action_space.low).tolist()
        action_bounds = [action_max, action_min]
        grad_inv = grad_inverter(sess, action_bounds)

        case_n = seed_n + 1
        end_episode = 0
        step_count_total = 0
        global_step = 0
        loss = 0
        e = 1.

        replay_buffer = deque()
        Q_list = []
        TD_buffer = deque()
        steps_list = []
        step_avg_list = []
        global_step_list = []

        average_distance = []
        rate_of_adjacent = []

        print("")
        print("CASE {}".format(case_n))
        print("  STATE DIM : {}, ACTION DIM : {}".format(
            input_size, self.action_dim))
        print("  Exp : {}".format(Exp))
        if DDPG:
            print("  Strategy : Double : {},  Prioritized : {},  DDPG : {}".
                  format(Double, Prioritized, DDPG))
        elif random_action:
            if action_train:
                print(
                    "  Strategy : Double : {},  Prioritized : {},  ACTION : RANDOM,  ACTION TRAIN 'ON'"
                    .format(Double, Prioritized))
            else:
                print(
                    "  Strategy : Double : {},  Prioritized : {},  ACTION : RANDOM"
                    .format(Double, Prioritized))
        else:
            if action_train:
                print(
                    "  Strategy : Double : {},  Prioritized : {},  ACTION : DISCRETIZATION,  ACTION TRAIN 'ON'"
                    .format(Double, Prioritized))
            else:
                print(
                    "  Strategy : Double : {},  Prioritized : {},  ACTION : DISCRETIZATION"
                    .format(Double, Prioritized))
        print("")

        for episode in range(1, max_episodes + 1):

            done = False
            step_count = 0
            current_step = 0
            cost = 0
            state = env.reset()

            while not done:
                # 입실론 값 조정, 0.001미만이 될 시 더 이상 작아지지 않는다.
                if e > 0.001:
                    #e = 1. / ((float(episode - 1) / eps_div) + 1)
                    e = 1. / ((float(global_step) / eps_div) + 1)

                t4 = time.time()
                if DDPG:  # DDPG true 시, 액션네트워크로부터 행동을 결정받음
                    action = Action_Network.evaluate_actor(
                        np.reshape(state, [1, input_size]))[0]
                else:  # DDPG false 시, state에 따른 각 행동 별 q 값을 get_q_batch로 받은 후 Exploration 방식에 따라 행동 결정
                    action0 = Exploration.choice_action(Exp, e, s_scale,\
                                                 np.reshape(Q_Network.get_q_batch(np.reshape(state,[1,-1])),[1,-1])[0])
                    action = A_batch[action0]

                next_state, reward, done, _ = env.step(action)
                step_count += reward
                global_step += 1
                current_step += 1

                # Prioritized 시 tree(replay_memory)에 저장, 아닐 시 랜덤으로 추출할 replay_beffer에 저장
                if Prioritized:
                    replay_memory.save_experience(state, action, reward,
                                                  next_state, done)
                else:
                    replay_buffer.append(
                        (state, next_state, action, reward, done))
                    if len(replay_buffer) > REPLAY_MEMORY:
                        replay_buffer.popleft()

                state = next_state

                if global_step <= beta_max_step:
                    replay_memory.anneal_per_importance_sampling(
                        global_step, beta_max_step)

                # training step마다 traing 실행
                if global_step > batch_size and global_step % training_step == 0:
                    for re in range(
                            repu_num):  # repu_num만큼 반복 training. 거의 1로 사용.
                        if Prioritized:
                            # replay_memory로부터 batch를 추출
                            idx, priorities, w_batch, experience = replay_memory.retrieve_experience(
                                batch_size)
                            minibatch = self.format_experience(experience)
                            if DDPG:
                                # DDPG true시 Q네트워크와 Action네트워크 모두 training
                                errors, cost = Train.train_prioritized_DDPG(
                                    Q_Network, Action_Network, minibatch,
                                    w_batch, output_size, grad_inv)
                                replay_memory.update_experience_weight(
                                    idx, errors)

                            else:
                                # DDPG false시 Q네트워크 training
                                errors, cost, state_t_batch = Train.train_prioritized(
                                    Q_Network, minibatch, w_batch, Exp,
                                    s_scale, input_size, output_size,
                                    size_action_batch)
                                replay_memory.update_experience_weight(
                                    idx, errors)

                                # action_copy_step 마다 action set을 training, action_train이 false 시 RAS 알고리즘
                                if action_train and global_step % action_copy_step == 0:
                                    action_weight = []

                                    if weighted_train:  # WARAS 알고리즘
                                        # weight 계산
                                        for k in range(batch_size):
                                            state_t = np.reshape(
                                                state_t_batch[k], [1, -1])

                                            q_batch = Q_Network.get_q_batch(
                                                state_t)
                                            q_batch = np.reshape(
                                                q_batch, [1, -1])[0]
                                            q_batch = q_batch * 10.
                                            max_q = np.max(q_batch)
                                            q_batch = np.exp(q_batch - max_q)
                                            action_weight.append(q_batch)

                                    else:  # ARAS 알고리즘
                                        # 모든 weight를 1로 설정
                                        action_weight = np.ones(
                                            [batch_size, size_action_batch])
                                    # weight 값을 이용한 Q네트워크 training
                                    Q_Network.train_weighted_actor(
                                        state_t_batch, action_weight)

                                    # target-action set을 update
                                    Q_Network.update_action_target_critic()
                                    A_batch = Q_Network.get_action_batch()
                                    t_A_batch = Q_Network.get_target_action_batch(
                                    )
                                    """
                                    # 거리가 가까운 action 쌍을 찾아 resampling
                                    A_batch, t_A_batch = self.realign_action_batch(A_batch, t_A_batch)
                                    Q_Network.realign_action_batch(A_batch, t_A_batch)
                                    A_batch = Q_Network.get_action_batch()
                                    t_A_batch = Q_Network.get_target_action_batch()
                                    """
                        else:  # Prioritized가 아닐 시 랜덤하게 minibatch를 생성해 training
                            minibatch = random.sample(replay_buffer,
                                                      batch_size)
                            if DDPG:
                                cost = Train.train_DDPG(
                                    Q_Network, Action_Network, minibatch,
                                    output_size, grad_inv)

                            else:
                                cost, state_t_batch = Train.train(
                                    Q_Network, minibatch, Exp, s_scale,
                                    input_size, output_size, size_action_batch)

                # copy_step 마다 Q네트워크 업데이트
                if global_step % copy_step == 0:
                    if DDPG:
                        # Update target Critic and actor network
                        Q_Network.update_target_critic()
                        Q_Network.update_action_target_critic()
                        Action_Network.update_target_actor()

                    else:
                        Q_Network.update_target_critic()
                        Q_Network.update_action_target_critic()

            steps_list.append(step_count)
            global_step_list.append(global_step)

            # Print the average of result
            if episode < ending_cond_epis:
                step_count_total += steps_list[episode - 1]
                step_avg_list.append(step_count_total / episode)

            if episode == ending_cond_epis:
                step_count_total += steps_list[episode - 1]
                step_avg_list.append(step_count_total / ending_cond_epis)

            if episode > ending_cond_epis:
                step_count_total += steps_list[episode - 1]
                step_count_total -= steps_list[episode - 1 - ending_cond_epis]
                step_avg_list.append(step_count_total / ending_cond_epis)

            print("{}           {}".format(
                episode, round(step_avg_list[episode - 1], 3)))
            if DDPG:
                print ("                   ( Result : {},  Loss : {},  Steps : {},  Global Steps : {} )"
                                   #.format(round(step_count, 3), round(cost, 5), current_step, global_step))
                                   .format(round(step_count, 3), 0, current_step, global_step))
            elif Exp == 'epsilon' or Exp == 'sparsemax':
                print ("                   ( Result : {},  Loss : {},  Epsilon : {},  Steps : {},  Global Steps : {} )"
                                   #.format(round(step_count, 3), round(cost, 5), round(e, 4), current_step, global_step))
                                   .format(round(step_count, 3), 0, round(e, 5), current_step, global_step))
            else:
                print ("                   ( Result : {},  Loss : {},  Steps : {},  Global Steps : {} )"
                                   #.format(round(step_count, 3), round(cost, 5), current_step, global_step))
                                   .format(round(step_count, 3), 0, current_step, global_step))

            distance, per_of_sim, per_of_sim2 = self.get_action_variance(
                A_batch)
            print(
                "                   ( Action Batch  ::::  Distance : {},  Percent : {}%({}%) )"
                .format(distance, per_of_sim, per_of_sim2))
            average_distance.append(distance)
            rate_of_adjacent.append(per_of_sim)

            # Save the networks
            if episode % save_epi == 0:
                file_case = str(case_n)
                if save_network:
                    Q_Network.save_network(game_name=self.file_name + '_seed' +
                                           file_case,
                                           episode=episode,
                                           save_epi=save_epi)

                with open(
                        '/home/minjae/Desktop/JOLP/' + self.file_name +
                        '_seed' + file_case, 'wb') as fout:
                    pickle.dump(step_avg_list, fout)
                with open(
                        '/home/minjae/Desktop/JOLP/' + self.file_name +
                        '_global_' + '_seed' + file_case, 'wb') as fout2:
                    pickle.dump(global_step_list, fout2)

                x_values = list(range(1, episode + 1))
                y_values = step_avg_list[:]
                plt.plot(x_values, y_values, c='green')
                plt.title(self.file_name)
                plt.grid(True)
                plt.show()

                with open(
                        '/home/minjae/Desktop/JOLP/' +
                        'Average_of_Distance_(' + self.file_name + '_seed' +
                        file_case + ')', 'wb') as fout:
                    pickle.dump(average_distance, fout)
                with open(
                        '/home/minjae/Desktop/JOLP/' + 'Rate_of_Adjacent_(' +
                        self.file_name + '_global_' + '_seed' + file_case +
                        ')', 'wb') as fout2:
                    pickle.dump(rate_of_adjacent, fout2)

                p_values = list(range(1, episode + 1))
                q_values = average_distance[:]
                r_values = rate_of_adjacent[:]
                plt.plot(p_values, q_values, c='r')
                plt.title('Average of Distance between Actions')
                plt.grid(True)
                plt.show()
                plt.plot(p_values, r_values, c='b')
                plt.title('Rate of Adjacent Actions')
                plt.grid(True)
                plt.show()

            end_episode += 1

            # 결과가 목표치를 달성하면 학습 중단
            if step_avg_list[episode - 1] > ending_cond_reward:
                break
            # max_steps 만큼 학습되었으면 학습 중단
            if global_step > max_steps:
                break

        print("--------------------------------------------------")
        print("--------------------------------------------------")

        # 목표치를 달성하여 학습 중단 시, 남은 episode 만큼 실행
        for episode in range(end_episode + 1, max_episodes + 1):

            if global_step > max_steps:
                break

            s = env.reset()
            reward_sum = 0
            done = False
            while not done:
                # 최대 Q 값을 나타내는 행동 선택
                action = np.argmax(
                    Q_Network.evaluate_critic(
                        np.reshape(state, [1, input_size])))

                if conti_action_flag:
                    action = [action_map[action]]
                else:
                    action = action

                state, reward, done, _ = env.step(action)
                reward_sum += reward
                global_step += 1

                if done:
                    steps_list.append(reward_sum)
                    global_step_list.append(global_step)
                    step_count_total += steps_list[episode - 1]
                    step_count_total -= steps_list[episode - 1 -
                                                   ending_cond_epis]
                    step_avg_list.append(step_count_total / ending_cond_epis)
                    print("{}           {}".format(
                        episode, round(step_avg_list[episode - 1], 3)))
                    print("                   ( Result : {} )".format(
                        reward_sum))

            if episode % save_epi == 0:
                file_case = str(case_n)
                if save_network:
                    Q_Network.save_network(game_name=self.file_name + '_seed' +
                                           file_case,
                                           episode=episode,
                                           save_epi=save_epi)
                with open(
                        '/home/minjae/Desktop/JOLP/' + self.file_name +
                        '_seed' + file_case, 'wb') as fout:
                    pickle.dump(step_avg_list, fout)
                with open(
                        '/home/minjae/Desktop/JOLP/' + self.file_name +
                        '_global_' + '_seed' + file_case, 'wb') as fout2:
                    pickle.dump(global_step_list, fout2)

                x_values = list(range(1, episode + 1))
                y_values = step_avg_list[:]
                plt.plot(x_values, y_values, c='green')
                plt.title(self.file_name)
                plt.grid(True)
                plt.show()

        # parameter 저장
        file_case = str(case_n)
        with open(
                '/home/minjae/Desktop/JOLP/' + self.file_name + '_seed' +
                file_case, 'wb') as fout:
            pickle.dump(step_avg_list, fout)
        with open(
                '/home/minjae/Desktop/JOLP/' + self.file_name + '_global_' +
                '_seed' + file_case, 'wb') as fout2:
            pickle.dump(global_step_list, fout2)

        # 그래프 출력
        x_values = list(range(1, len(step_avg_list) + 1))
        y_values = step_avg_list[:]
        plt.plot(x_values, y_values, c='green')
        plt.title(self.file_name)
        plt.grid(True)
        plt.show()

        with open(
                '/home/minjae/Desktop/JOLP/' + 'Average_of_Distance_(' +
                self.file_name + '_seed' + file_case + ')', 'wb') as fout:
            pickle.dump(average_distance, fout)
        with open(
                '/home/minjae/Desktop/JOLP/' + 'Rate_of_Adjacent_(' +
                self.file_name + '_global_' + '_seed' + file_case + ')',
                'wb') as fout2:
            pickle.dump(rate_of_adjacent, fout2)

        p_values = list(range(1, episode + 1))
        q_values = average_distance[:]
        r_values = rate_of_adjacent[:]
        plt.plot(p_values, q_values, c='r')
        plt.title('Average of Distance between Actions')
        plt.grid(True)
        plt.show()
        plt.plot(p_values, r_values, c='b')
        plt.title('Rate of Adjacent Actions')
        plt.grid(True)
        plt.show()