Пример #1
0
    def work(self):
        global GLOBAL_RUNNING_R, GLOBAL_EP  # GLOBAL_RUNNING_R is the reward of all workers, GLOBAL_EP is the total iterations of all workers
        total_step = 1  # iterations of this worker
        # 先执行一步
        self.clientsExecResult = self.net.updateClientVideo()
        allClientSNR = utils1.get_snr(self.clientsExecResult)
        buffer_s, buffer1_s, buffer2_s, buffer3_s, buffer4_s,\
        buffer_CR_a, buffer1_CR_a, buffer2_CR_a, buffer3_CR_a, buffer4_CR_a,\
        buffer_CR1_r, buffer_CR2_r, buffer_CR3_r, buffer_CR4_r, buffer_CR_r = [], [], [], [], [], [], [], [], [], [], [], [], [], [], []
        windowInfo = []
        rewardCRList = [[] for _ in range(options.HostNum)]

        # while not COORD.should_stop() and GLOBAL_EP < MAX_GLOBAL_EP:
        ep_r_CR = 0  # the total reward of this episode
        while GLOBAL_EP < MAX_GLOBAL_EP:

            while True:
                allClientsAction = {}
                c1_action = {}
                c2_action = {}
                c3_action = {}
                c4_action = {}

                # get the env info
                env, *s_env = utils1.env_state8(self.clientsExecResult)

                feed_dict1 = {self.AC.s1_CR: np.array(env[0]).reshape((-1, ENV_DIMS_new))}
                feed_dict2 = {self.AC.s2_CR: np.array(env[1]).reshape((-1, ENV_DIMS_new))}
                feed_dict3 = {self.AC.s3_CR: np.array(env[2]).reshape((-1, ENV_DIMS_new))}
                feed_dict4 = {self.AC.s4_CR: np.array(env[3]).reshape((-1, ENV_DIMS_new))}

                CR1_prob = SESS.run(self.AC.CR1_prob, feed_dict1)
                CR2_prob = SESS.run(self.AC.CR2_prob, feed_dict2)
                CR3_prob = SESS.run(self.AC.CR3_prob, feed_dict3)
                CR4_prob = SESS.run(self.AC.CR4_prob, feed_dict4)

                c1_CRList, c1_CRList_d = self.AC.choose_CR_p(CR1_prob)
                c2_CRList, c2_CRList_d = self.AC.choose_CR_p(CR2_prob)
                c3_CRList, c3_CRList_d = self.AC.choose_CR_p(CR3_prob)
                c4_CRList, c4_CRList_d = self.AC.choose_CR_p(CR4_prob)

                # 神经网络分配的CC:
                c1_CC = lib.CR_mapping[c1_CRList_d][0] * options.serverCC
                c2_CC = lib.CR_mapping[c2_CRList_d][0] * options.serverCC
                c3_CC = lib.CR_mapping[c3_CRList_d][0] * options.serverCC
                c4_CC = lib.CR_mapping[c4_CRList_d][0] * options.serverCC

                print("神经网络分配的CC:", "c1:", c1_CC, "\tc2:", c2_CC, "\tc3:", c3_CC, "\tc4:", c4_CC)

                # add buffer info
                capa1_prob = lib.CR_mapping[c1_CRList_d][0]
                env[0][-1] = capa1_prob

                capa2_prob = lib.CR_mapping[c2_CRList_d][0]
                env[1][-1] = capa2_prob

                capa3_prob = lib.CR_mapping[c3_CRList_d][0]
                env[2][-1] = capa3_prob

                capa4_prob = lib.CR_mapping[c4_CRList_d][0]
                env[3][-1] = capa4_prob

                allenv = np.concatenate([env[0], env[1], env[2], env[3]], axis=0)
                buffer_s.append(np.array(allenv))
                buffer1_s.append(np.array(env[0]))
                buffer2_s.append(np.array(env[1]))
                buffer3_s.append(np.array(env[2]))
                buffer4_s.append(np.array(env[3]))

                buffer1_CR_a.append(c1_CRList_d)
                buffer2_CR_a.append(c2_CRList_d)
                buffer3_CR_a.append(c3_CRList_d)
                buffer4_CR_a.append(c4_CRList_d)
                # buffer_CR_a.append(all_CRList_d)

                # 将神经网络分配的CC,按路由器规则映射成真实的传输速率 CC_real type:list
                disCC = [c1_CC, c2_CC, c3_CC, c4_CC]
                CC_real = utils1.adjust_CC(disCC, allClientSNR)

                c1_action["CC"] = CC_real[0]
                c2_action["CC"] = CC_real[1]
                c3_action["CC"] = CC_real[2]
                c4_action["CC"] = CC_real[3]

                c1_action["RR"] = c1_CRList[1]
                c2_action["RR"] = c2_CRList[1]
                c3_action["RR"] = c3_CRList[1]
                c4_action["RR"] = c4_CRList[1]

                allClientsAction['c1'] = c1_action
                allClientsAction['c2'] = c2_action
                allClientsAction['c3'] = c3_action
                allClientsAction['c4'] = c4_action

                # update env_state according to the real_CC and bitrate choices
                self.clientsExecResult = self.net.updateClientVideo(allClientsAction)
                # 取出下一时刻的snr_dict9
                allClientSNR = utils1.get_snr(self.clientsExecResult)
                # Use window to record the info
                windowInfo.append(copy.deepcopy(self.clientsExecResult))
                if len(windowInfo) > 5:
                    del windowInfo[0]

                qoe_list, reward_list = utils1.reward_joint2(self.clientsExecResult)

                buffer_CR1_r.append(reward_list[0])
                buffer_CR2_r.append(reward_list[1])
                buffer_CR3_r.append(reward_list[2])
                buffer_CR4_r.append(reward_list[3])
                buffer_CR_r.append(reward_list[-1])

                rewardCRList[0].append(copy.deepcopy(qoe_list[0]))  # 用户1的reward
                rewardCRList[1].append(copy.deepcopy(qoe_list[1]))
                rewardCRList[2].append(copy.deepcopy(qoe_list[2]))
                rewardCRList[3].append(copy.deepcopy(qoe_list[3]))  # 总体reward

                # print the env info
                if self.isPrint:
                    self.printMidInfo(qoe_list, reward_list)

                total_step += 1

                if total_step % UPDATE_GLOBAL_ITER < 0:  # update global and assign to local net
                    GLOBAL_EP += 1
                    break

                if total_step % UPDATE_GLOBAL_ITER == 0:  # update global and assign to local net

                    env,  *s_env = utils1.env_state8(self.clientsExecResult)

                    feed_dict = {self.AC.s_CR: np.array(env).reshape((-1, 4 * ENV_DIMS_new))}
                    CR_v_ = SESS.run(self.AC.CR_v, feed_dict)
                    v_.append(CR_v_[0][0])
                    CR_v_target = []

                    for r in buffer_CR_r[::-1]:    # 将下一个state的v评价进行一个反向衰减传递得到每一步的v现实
                        CR_v_ = r + GAMMA * CR_v_[0][0]
                        CR_v_target.append(CR_v_)  # 将每一步的v现实都加入缓存中
                    CR_v_target.reverse()
                    # print("CR_v_target: ", CR_v_target)
                    v_target.append(CR_v_target[0])
                    # *****************************************************************************************
                    ENV1 = buffer1_s
                    ENV2 = buffer2_s
                    ENV3 = buffer3_s
                    ENV4 = buffer4_s
                    ALLENV = buffer_s

                    allCR1 = buffer1_CR_a
                    allCR2 = buffer2_CR_a
                    allCR3 = buffer3_CR_a
                    allCR4 = buffer4_CR_a
                    # *****************************************************************************************
                    feed_dict_A1 = {
                        self.AC.s_CR: ALLENV,    # (?, 32)
                        self.AC.s1_CR: ENV1,       # (?, 8)
                        self.AC.cr1_a: allCR1,   # (?, )  # 用于计算A loss
                        self.AC.CR_v_target: np.reshape(CR_v_target, (-1, 1)),   # (?,4)
                        self.AC.C2C_var: np.reshape(np.var([CC_real[0], c1_CC]), [-1, 1])
                    }

                    feed_dict_A2 = {
                        self.AC.s_CR: ALLENV,
                        self.AC.s2_CR: ENV2,
                        self.AC.cr2_a: allCR2,
                        self.AC.CR_v_target: np.reshape(CR_v_target, (-1, 1)),
                        self.AC.C2C_var: np.reshape(np.var([CC_real[1], c2_CC]), [-1, 1])
                    }

                    feed_dict_A3 = {
                        self.AC.s_CR: ALLENV,
                        self.AC.s3_CR: ENV3,
                        self.AC.cr3_a: allCR3,
                        self.AC.CR_v_target: np.reshape(CR_v_target, (-1, 1)),
                        self.AC.C2C_var: np.reshape(np.var([CC_real[2], c3_CC]), [-1, 1])
                    }

                    feed_dict_A4 = {
                        self.AC.s_CR: ALLENV,
                        self.AC.s4_CR: ENV4,
                        self.AC.cr4_a: allCR4,
                        self.AC.CR_v_target: np.reshape(CR_v_target, (-1, 1)),
                        self.AC.C2C_var: np.reshape(np.var([CC_real[3], c4_CC]), [-1, 1])
                    }

                    feed_dict_C = {
                        self.AC.s_CR: ALLENV,
                        self.AC.CR_v_target: np.reshape(CR_v_target, (-1, 1))
                    }

                    # *********************************** Debug ******************************************************
                    # CR1_A_loss = SESS.run(self.AC.CR1_A_loss, feed_dict_A1)
                    # print("-" * 30)
                    # # print("CR1_A_loss:", CR1_A_loss)
                    #
                    # CR2_A_loss = SESS.run(self.AC.CR2_A_loss, feed_dict_A2)
                    # print("-" * 30)
                    # # print("CR2_A_loss:", CR2_A_loss)
                    #
                    # CR3_A_loss = SESS.run(self.AC.CR3_A_loss, feed_dict_A3)
                    # print("-" * 30)
                    # # print("CR3_A_loss:", CR3_A_loss)
                    #
                    # CR4_A_loss = SESS.run(self.AC.CR4_A_loss, feed_dict_A4)
                    # print("-" * 30)
                    # # print("CR4_A_loss:", CR4_A_loss)
                    #
                    # CR_C_loss = SESS.run(self.AC.CR_C_loss, feed_dict_C)
                    # # print("CR_C_loss", CR_C_loss)
                    #
                    # critic_loss.append(CR_C_loss[0])


                    # *************************************************** Train **********************************************************
                    self.AC.update_A1(feed_dict_A1)
                    self.AC.update_A2(feed_dict_A2)
                    self.AC.update_A3(feed_dict_A3)
                    self.AC.update_A4(feed_dict_A4)
                    self.AC.update_C(feed_dict_C)

                    self.AC.pull_CR()

                    rewardCRList = [[] for _ in range(options.HostNum)]
                    buffer_s, buffer1_s, buffer2_s, buffer3_s, buffer4_s, \
                    buffer_CR_a, buffer1_CR_a, buffer2_CR_a, buffer3_CR_a, buffer4_CR_a, \
                    buffer_CR1_r, buffer_CR2_r, buffer_CR3_r, buffer_CR4_r, buffer_CR_r= [], [], [], [], [], [], [], [], [], [], [], [], [], [], []

                    if len(GLOBAL_RUNNING_R) == 0:  # record running episode reward
                        GLOBAL_RUNNING_R.append(reward_list[-1])
                    else:
                        GLOBAL_RUNNING_R.append(0.99 * GLOBAL_RUNNING_R[-1] + 0.01 * reward_list[-1])
                    print(
                        self.name,
                        "Ep:", GLOBAL_EP,
                        "| Ep_CR_r: %i" % GLOBAL_RUNNING_R[-1],
                    )
                    GLOBAL_EP += 1
                    break
Пример #2
0
    def work(self):
        global GLOBAL_RUNNING_R, GLOBAL_EP  # GLOBAL_RUNNING_R is the reward of all workers, GLOBAL_EP is the total iterations of all workers
        total_step = 1  # iterations of this worker

        # Store the train_data and train_label
        allSNR = [[] for h_index in range(options.HostNum)]
        # Start to simulate the video-downloading
        self.clientsExecResult = self.net.updateClientVideo()
        allClientSNR = utils1.unitEnv_uni(self.clientsExecResult)
        for h_index in range(options.HostNum):
            allSNR[h_index] += allClientSNR[h_index].tolist()

        buffer_s, buffer1_s,buffer2_s, buffer3_s, buffer4_s,\
        buffer_CR_a, buffer1_CR_a, buffer2_CR_a, buffer3_CR_a, buffer4_CR_a,\
        buffer_CR1_r, buffer_CR2_r, buffer_CR3_r, buffer_CR4_r, buffer_CR_r = [], [], [], [], [], [], [], [], [], [], [], [] ,[], [], []
        windowInfo = []
        rewardCRList = [[] for _ in range(options.HostNum)]

        # while not COORD.should_stop() and GLOBAL_EP < MAX_GLOBAL_EP:
        while GLOBAL_EP < MAX_GLOBAL_EP:
            # ep_r = 0  # the total reward of this episode

            while True:
                allClientsAction = {}
                c1_action = {}
                c2_action = {}
                c3_action = {}
                c4_action = {}

                # get the env info
                env, *s_env = utils1.env_state8(self.clientsExecResult)

                feed_dict1 = {
                    self.AC.s1_CR: np.array(env[0]).reshape((-1, ENV_DIMS_new))
                }
                feed_dict2 = {
                    self.AC.s2_CR: np.array(env[1]).reshape((-1, ENV_DIMS_new))
                }
                feed_dict3 = {
                    self.AC.s3_CR: np.array(env[2]).reshape((-1, ENV_DIMS_new))
                }
                feed_dict4 = {
                    self.AC.s4_CR: np.array(env[3]).reshape((-1, ENV_DIMS_new))
                }

                CR1_prob = SESS.run(self.AC.CR1_prob, feed_dict1)
                CR2_prob = SESS.run(self.AC.CR2_prob, feed_dict2)
                CR3_prob = SESS.run(self.AC.CR3_prob, feed_dict3)
                CR4_prob = SESS.run(self.AC.CR4_prob, feed_dict4)

                c1_CRList, c1_CRList_d = self.AC.choose_CR_p(CR1_prob)
                c2_CRList, c2_CRList_d = self.AC.choose_CR_p(CR2_prob)
                c3_CRList, c3_CRList_d = self.AC.choose_CR_p(CR3_prob)
                c4_CRList, c4_CRList_d = self.AC.choose_CR_p(CR4_prob)

                capa2_all = options.serverCC - lib.CR_mapping[c1_CRList_d][
                    0] * options.serverCC
                capa3_all = capa2_all - lib.CR_mapping[c2_CRList_d][
                    0] * capa2_all
                capa4_all = capa3_all - lib.CR_mapping[c3_CRList_d][
                    0] * capa3_all

                # add buffer info
                capa1_prob = lib.CR_mapping[c1_CRList_d][0]
                env[0][-1] = capa1_prob

                capa2_prob = (lib.CR_mapping[c2_CRList_d][0] *
                              capa2_all) / options.serverCC
                env[1][-1] = capa2_prob

                capa3_prob = (lib.CR_mapping[c3_CRList_d][0] *
                              capa3_all) / options.serverCC
                env[2][-1] = capa3_prob

                capa4_prob = (lib.CR_mapping[c4_CRList_d][0] *
                              capa4_all) / options.serverCC
                env[3][-1] = capa4_prob

                allenv = np.concatenate([env[0], env[1], env[2], env[3]],
                                        axis=0)
                buffer_s.append(np.array(allenv))
                buffer1_s.append(np.array(env[0]))
                buffer2_s.append(np.array(env[1]))
                buffer3_s.append(np.array(env[2]))
                buffer4_s.append(np.array(env[3]))

                # all_CRList_d = np.concatenate([c1_CRList_d, c2_CRList_d, c3_CRList_d, c4_CRList_d], 0)
                buffer1_CR_a.append(c1_CRList_d)
                buffer2_CR_a.append(c2_CRList_d)
                buffer3_CR_a.append(c3_CRList_d)
                buffer4_CR_a.append(c4_CRList_d)
                # buffer_CR_a.append(all_CRList_d)   # todo

                c1_action[
                    "CC"] = lib.CR_mapping[c1_CRList_d][0] * options.serverCC
                c2_action["CC"] = lib.CR_mapping[c2_CRList_d][0] * capa2_all
                c3_action["CC"] = lib.CR_mapping[c3_CRList_d][0] * capa3_all
                c4_action["CC"] = lib.CR_mapping[c4_CRList_d][0] * capa4_all

                c1_action["RR"] = c1_CRList[1]
                c2_action["RR"] = c2_CRList[1]
                c3_action["RR"] = c3_CRList[1]
                c4_action["RR"] = c4_CRList[1]

                allClientsAction['c1'] = c1_action
                allClientsAction['c2'] = c2_action
                allClientsAction['c3'] = c3_action
                allClientsAction['c4'] = c4_action
                print("allAction:", allClientsAction)
                # update env_state according to the CC and resolution choices
                self.clientsExecResult = self.net.updateClientVideo(
                    allClientsAction)

                # Use window to record the info
                windowInfo.append(copy.deepcopy(self.clientsExecResult))
                if len(windowInfo) > 5:
                    del windowInfo[0]

                # compute reward
                # r = utils.reward_window(windowInfo)
                ep_r_CR1, ep_r_CR2, ep_r_CR3, ep_r_CR4 = utils1.reward_joint2(
                    self.clientsExecResult)  # todo:reward_joint3
                ep_r_CR = ep_r_CR1 + ep_r_CR2 + ep_r_CR3 + ep_r_CR4
                buffer_CR1_r.append(ep_r_CR1)
                buffer_CR2_r.append(ep_r_CR2)
                buffer_CR3_r.append(ep_r_CR3)
                buffer_CR4_r.append(ep_r_CR4)
                buffer_CR_r.append(ep_r_CR)

                rewardCRList[0].append(copy.deepcopy(ep_r_CR1))
                rewardCRList[1].append(copy.deepcopy(ep_r_CR2))
                rewardCRList[2].append(copy.deepcopy(ep_r_CR3))
                rewardCRList[3].append(copy.deepcopy(ep_r_CR4))

                capa2_all = options.serverCC - c1_CRList[0] * options.serverCC
                capa3_all = capa2_all - c2_CRList[0] * capa2_all
                capa4_all = capa3_all - c3_CRList[0] * capa3_all

                # print the env info
                if self.isPrint:
                    self.printMidInfo()

                if total_step % 1 == 0:
                    print("CC_client1: ", lib.CR_mapping[c1_CRList_d][0],
                          lib.CR_mapping[c1_CRList_d][0] * options.serverCC)
                    print("CC_client2: ", lib.CR_mapping[c2_CRList_d][0],
                          lib.CR_mapping[c2_CRList_d][0] * capa2_all)
                    print("CC_client3: ", lib.CR_mapping[c3_CRList_d][0],
                          lib.CR_mapping[c3_CRList_d][0] * capa3_all)
                    print("CC_client4: ", lib.CR_mapping[c4_CRList_d][0],
                          lib.CR_mapping[c4_CRList_d][0] * capa4_all)
                    print("-" * 30)  #
                    print("Reso_client1: ", c1_CRList[1])
                    print("Reso_client2: ", c2_CRList[1])
                    print("Reso_client3: ", c3_CRList[1])
                    print("Reso_client4: ", c4_CRList[1])

                total_step += 1
                if total_step % UPDATE_GLOBAL_ITER < 0:  # update global and assign to local net
                    GLOBAL_EP += 1
                    break

                if total_step % UPDATE_GLOBAL_ITER == 0:  # update global and assign to local net
                    print("GLOBAL_EP:", GLOBAL_EP)
                    if self.isPrint:
                        self.printMidInfo()

                    env, *s_env = utils1.env_state8(self.clientsExecResult)

                    feed_dict = {
                        self.AC.s_CR: np.array(env).reshape(
                            (-1, 4 * ENV_DIMS_new))
                    }
                    CR_v_ = SESS.run(self.AC.CR_v, feed_dict)
                    print("CR_v_: ", CR_v_)
                    print("buffer_CR1_r:", buffer_CR1_r)
                    print("buffer_CR2_r:", buffer_CR2_r)
                    print("buffer_CR3_r:", buffer_CR3_r)
                    print("buffer_CR4_r:", buffer_CR4_r)

                    CR_v_target = [[] for _ in range(options.HostNum)]

                    for h_index in range(options.HostNum):
                        reward_CR_client = rewardCRList[h_index][::-1]
                        value = CR_v_[0, h_index]
                        for r in reward_CR_client:
                            value = r + GAMMA * value
                            CR_v_target[h_index].append(value)
                        CR_v_target[h_index].reverse()
                    CR_v_target = np.array(CR_v_target).T

                    # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
                    # batch_num = len(buffer_s)
                    # # allENV = np.array(buffer_s).reshape((batch_num, 4 * ENV_DIMS_new))
                    # # allCR = np.array(buffer_CR_a).reshape((batch_num, 4))
                    #
                    #
                    # ENV1 = np.array(buffer_s[:0:]).reshape((batch_num, ENV_DIMS_new))
                    # ENV2 = np.array(buffer_s[1]).reshape((batch_num, ENV_DIMS_new))
                    # ENV3 = np.array(buffer_s[2]).reshape((batch_num, ENV_DIMS_new))
                    # ENV4 = np.array(buffer_s[3]).reshape((batch_num, ENV_DIMS_new))

                    # *****************************************************************************************
                    ENV1 = buffer1_s
                    ENV2 = buffer2_s
                    ENV3 = buffer3_s
                    ENV4 = buffer4_s
                    ALLENV = buffer_s

                    allCR1 = buffer1_CR_a
                    allCR2 = buffer2_CR_a
                    allCR3 = buffer3_CR_a
                    allCR4 = buffer4_CR_a
                    # *****************************************************************************************
                    feed_dict_A1 = {
                        self.AC.s_CR: ALLENV,  # (?, 32)
                        self.AC.s1_CR: ENV1,  # (?, 8)
                        self.AC.cr1_a: allCR1,  # (?, )
                        self.AC.CR_v_target: np.reshape(CR_v_target,
                                                        (-1, 4))  # (?,4)
                    }

                    feed_dict_A2 = {
                        self.AC.s_CR: ALLENV,
                        self.AC.s2_CR: ENV2,
                        self.AC.cr2_a: allCR2,
                        self.AC.CR_v_target: np.reshape(CR_v_target, (-1, 4))
                    }

                    feed_dict_A3 = {
                        self.AC.s_CR: ALLENV,
                        self.AC.s3_CR: ENV3,
                        self.AC.cr3_a: allCR3,
                        self.AC.CR_v_target: np.reshape(CR_v_target, (-1, 4))
                    }

                    feed_dict_A4 = {
                        self.AC.s_CR: ALLENV,
                        self.AC.s4_CR: ENV4,
                        self.AC.cr4_a: allCR4,
                        self.AC.CR_v_target: np.reshape(CR_v_target, (-1, 4))
                    }

                    feed_dict_C = {
                        self.AC.s_CR: ALLENV,
                        self.AC.CR_v_target: np.reshape(CR_v_target, (-1, 4))
                    }

                    # *********************************** Debug ******************************************************
                    CR1_A_loss = SESS.run(self.AC.CR1_A_loss, feed_dict_A1)
                    print("-" * 30)
                    print("CR1_A_loss:", CR1_A_loss)

                    CR2_A_loss = SESS.run(self.AC.CR2_A_loss, feed_dict_A2)
                    print("-" * 30)
                    print("CR2_A_loss:", CR2_A_loss)

                    CR3_A_loss = SESS.run(self.AC.CR3_A_loss, feed_dict_A3)
                    print("-" * 30)
                    print("CR3_A_loss:", CR3_A_loss)

                    CR4_A_loss = SESS.run(self.AC.CR4_A_loss, feed_dict_A4)
                    print("-" * 30)
                    print("CR4_A_loss:", CR4_A_loss)

                    CR_C_loss = SESS.run(self.AC.CR_C_loss, feed_dict_C)
                    critic_loss.append(CR_C_loss)
                    print("CR_C_loss", CR_C_loss)

                    # ************************************ Train *****************************************************
                    time = 3  # todo
                    for _ in range(time):
                        self.AC.train_CR1(feed_dict_A1)
                        self.AC.train_CR2(feed_dict_A2)
                        self.AC.train_CR3(feed_dict_A3)
                        self.AC.train_CR4(feed_dict_A4)

                    rewardCRList = [[] for _ in range(options.HostNum)]
                    buffer_s, buffer1_s, buffer2_s, buffer3_s, buffer4_s, \
                    buffer_CR_a, buffer1_CR_a, buffer2_CR_a, buffer3_CR_a, buffer4_CR_a, \
                    buffer_CR1_r, buffer_CR2_r, buffer_CR3_r, buffer_CR4_r, buffer_CR_r= [], [], [], [], [], [], [], [], [], [], [], [], [], [], []

                    GLOBAL_EP += 1
                    if len(GLOBAL_RUNNING_R
                           ) == 0:  # record running episode reward
                        GLOBAL_RUNNING_R.append(ep_r_CR)
                    else:
                        GLOBAL_RUNNING_R.append(0.99 * GLOBAL_RUNNING_R[-1] +
                                                0.01 * ep_r_CR)
                    print(
                        self.name,
                        "Ep:",
                        GLOBAL_EP,
                        "| Ep_CR_r: %i" % GLOBAL_RUNNING_R[-1],
                    )

                    break