Python ReinforceLearning.u1示例

编程语言: Python

命名空间/包名称: RL_Module

方法/功能: u1

hotexamples.com的示例: 2

Python ReinforceLearning.u1 - 已找到2个示例。这些是从开源项目中提取的最受好评的RL_Module.ReinforceLearning.u1现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

user_actions(6)

ReinforceLearning(6)

user_states(6)

action_selection(5)

autosave(5)

matrix_update(5)

ucb_action_selection(5)

user_matrices(5)

x1(3)

x2(3)

interpolation(2)

u1(2)

u2(2)

feedback_evaluation(1)

state_detection(1)

示例#1

显示文件

文件： RL_Tune_PID.py 项目： yunpeng-ma/Research

def simulation():

    # Reinforcement Learning Initiation
    rl = ReinforceLearning(discount_factor=0.95,
                           states_start=300,
                           states_stop=340,
                           states_interval=0.5,
                           actions_start=-15,
                           actions_stop=15,
                           actions_interval=2.5,
                           learning_rate=0.5,
                           epsilon=0.8,
                           doe=0,
                           eval_period=1)
    """
    Example of user defined states and actions.  Users do not need to do this.  This is only if users want to define 
    their own states and actions.  RL will automatically populate states and actions if user does not input their own.
    """

    states = []

    rl.x1 = np.linspace(1.5, 3.5, 16)
    rl.x2 = np.linspace(3.5, 5.5, 16)

    for i in rl.x1:
        for j in rl.x2:
            states.append([i, j])

    rl.user_states(list(states))

    actions = []

    rl.u1 = np.linspace(-0.4, 0.4, 5)
    rl.u2 = np.linspace(-0.4, 0.4, 5)

    for i in rl.u1:
        for j in rl.u2:
            actions.append([i, j])

    rl.user_actions(list(actions))
    """
    Load pre-trained Q, T and NT matrices
    """

    q = np.loadtxt("Q_Matrix.txt")
    t = np.loadtxt("T_Matrix.txt")
    nt = np.loadtxt("NT_Matrix.txt")

    rl.user_matrices(q, t, nt)
    """
    Simulation portion
    """

    sim_length = 90

    kp_ki = np.zeros([sim_length, 2])
    kp_ki[0, :] = np.array([1.4, 3.3])

    actions = np.zeros([sim_length, 2])

    for t in range(1, sim_length):
        """
        RL Evaluate
        """

        state, action = rl.ucb_action_selection(kp_ki[t - 1, :])
        action, action_index = rl.action_selection(state,
                                                   action,
                                                   actions[t - 1, :],
                                                   no_decay=25,
                                                   ep_greedy=False,
                                                   time=t,
                                                   min_eps_rate=0.4)
        actions[t, :] = action

        kp_ki[t, :], reward, x_trajectory = sim(kp_ki[t - 1], action)
        """
        Feedback evaluation
        """

        rl.matrix_update(action_index, reward, state, kp_ki[t, :], 5)

        # Bind actions
        if (kp_ki[t, :] > np.array([8, 8])).any() or (kp_ki[t, :] < np.array(
            [0, 0])).any():
            kp_ki[t, :] = np.array([1.5, 3.5])

        rl.autosave(t, 250)

    return kp_ki, actions, rl

示例#2

显示文件

def simulation():

    # Model Initiation
    model = LinearSystem(nsim=100,
                         model_type='MIMO',
                         x0=np.array([1.333, 4]),
                         u0=np.array([3, 6]),
                         xs=np.array([3.555, 4.666]),
                         us=np.array([5, 7]),
                         step_size=0.2)

    # Reinforcement Learning Initiation
    rl = ReinforceLearning(discount_factor=0.95,
                           states_start=300,
                           states_stop=340,
                           states_interval=0.5,
                           actions_start=-15,
                           actions_stop=15,
                           actions_interval=2.5,
                           learning_rate=0.5,
                           epsilon=0.2,
                           doe=1.2,
                           eval_period=1)
    """
    Example of user defined states and actions.  Users do not need to do this.  This is only if users want to define 
    their own states and actions.  RL will automatically populate states and actions if user does not input their own.
    """

    states = []

    rl.x1 = np.linspace(0, 6, 25)
    rl.x2 = np.linspace(2, 6, 17)

    for i in rl.x1:
        for j in rl.x2:
            states.append([i, j])

    rl.user_states(list(states))

    actions = []

    rl.u1 = np.linspace(1, 7, 19)
    rl.u2 = np.linspace(4, 9, 16)

    for i in rl.u1:
        for j in rl.u2:
            actions.append([i, j])

    rl.user_actions(list(actions))
    """
    Load pre-trained Q, T and NT matrices
    """

    q = np.loadtxt("Q_Matrix.txt")
    t = np.loadtxt("T_Matrix.txt")
    nt = np.loadtxt("NT_Matrix.txt")

    rl.user_matrices(q, t, nt)
    """
    Simulation portion
    """

    rlist = []

    for episode in range(1):

        # Reset the model after each episode
        model.reset(random_init=False)
        tot_reward = 0
        state = 0
        action_index = 0

        for t in range(1, model.Nsim + 1):
            """
            Disturbance
            """

            # if t % 10 == 0:
            #     model.x[t - 1, :] += np.random.uniform(-2.1, 2.1, size=2)
            """
            RL Evaluate
            """

            if t % rl.eval_period == 0:
                state, action = rl.ucb_action_selection(model.x[t - 1, :])
                action, action_index = rl.action_selection(state,
                                                           action,
                                                           model.u[t - 1, :],
                                                           no_decay=25,
                                                           ep_greedy=True,
                                                           time=t,
                                                           min_eps_rate=0.5)
            else:
                action = model.u[t - 1, :][0]

            next_state, reward, done, info = model.step([action],
                                                        t,
                                                        obj_function="MPC")
            """
            Feedback evaluation
            """

            if t == rl.eval_feedback:
                rl.matrix_update(action_index, reward, state, model.x[t, :], 5)
                tot_reward = tot_reward + reward

        rlist.append(tot_reward)

        rl.autosave(episode, 250)

        if episode % 100 == 0:
            print(model.cost_function(transient_period=200))

    return model, rl, rlist