Пример #1
0
def play_with_ACpred(u_init, noisy=True):
    """
    Function that plays a certain number of iterations of the game (until it finishes).
    This function can also be used to construct the rpm.
    The loop on the episodes is outside of the function. 
    
    Arguments :
    -----------
    u_init : To set the initialisation
    """
    episode_memory = []

    epsilon = 0
    total_rew = 0
    a_t = np.zeros([1, X.size])

    # Pour exploration de l'espace des actions
    #    noise = noiselevel
    #    noiselevel = noise * 0.999
    #    noise_t = np.zeros([1, action_size])

    # Mettre ici un tirage aléatoire d'un entier q entre 1 et max step.
    # Résoudre avec Roe jusqua la q eme itération, écrire cela en tant que s_t
    # Puis faire toute la procédure utilisé jusqu'ici.
    # Répéter ce processus jusqua ce que le replay buffer soit plein

    global graph
    while deque_obj.size() < cst_REIL["replay_memory_size"]:

        int_line = range(0, cst_simu["max_steps"])
        curr_step = np.random.choice(int_line)

        temp_state = u_init

        for j in range(0, curr_step + 1):
            temp_state1 = ACactions.action_with_burger(temp_state,
                                                       cst_simu["r"], f,
                                                       fprime)

            if j != curr_step:
                temp_state = temp_state1

        s_t = temp_state

        with graph.as_default():
            a_t_original = act.model.predict(np.array([s_t]))

            OU_noise = np.zeros_like(a_t_original)

            if noisy == True:
                epsilon = decays.create_decay_fn(
                    "linear",
                    curr_step=j,
                    initial_value=cst_REIL['EpsForNoise_init'],
                    final_value=cst_REIL['EpsForNoise_fina'],
                    max_step=cst_simu["max_steps"])

                args = {
                    "rp_type": "ornstein-uhlenbeck",
                    "n_action": 1,
                    "rp_theta": 0.1,
                    "rp_mu": 0.,
                    "rp_sigma": 0.2,
                    "rp_sigma_min": 0.05
                }

                coeffOU_noise = noise.create_random_process(args).sample()
                OU_noise = coeffOU_noise * (
                    np.array([np.random.rand()
                              for rand in range(X.size)]) - 0.5)

            a_t = a_t_original + OU_noise
            a_t = a_t.ravel()

            s_t1 = ACactions.action_with_delta_Un(s_t, a_t)

            r_t = reward(s_t1, s_t)

            #        print ("state :\n{}".format(s_t))
            #        print ("action :\n{}".format(a_t))
            #        print ("next state :\n{}".format(s_t1))
            #
            #        time.sleep(5)

            if r_t > 1 and r_t < 1000:
                done = True  # Game over
                rew = r_t

            elif r_t > 1000:
                done = True  # Game over
                rew = r_t * 10  # Grosse pénalité

            else:
                done = False  # On continue si c'est bon
                rew = r_t

            print("reward :\n{}".format(rew))
            deque_obj.append((s_t, a_t, rew, s_t1, done))
Пример #2
0
def play_with_ACpred(u_init, noisy=True):
    """
    Function that plays a certain number of iterations of the game (until it finishes).
    This function can also be used to construct the rpm.
    The loop on the episodes is outside of the function. 
    
    Arguments :
    -----------
    u_init : To set the initialisation
    """
    episode_memory = []
    s_t = u_init

    epsilon = 0
    total_rew = 0
    a_t = np.zeros([1, s_t.size])

    # Pour exploration de l'espace des actions
    #    noise = noiselevel
    #    noiselevel = noise * 0.999
    #    noise_t = np.zeros([1, action_size])

    for j in range(cst_simu["max_steps"]):
        global graph
        while deque_obj.size() < cst_REIL["replay_memory_size"]:
            with graph.as_default():
                a_t_original = act.model.predict(np.array([s_t]))

            OU_noise = np.zeros_like(a_t_original)

            if noisy == True:
                epsilon = decays.create_decay_fn(
                    "linear",
                    curr_step=j,
                    initial_value=cst_REIL['EpsForNoise_init'],
                    final_value=cst_REIL['EpsForNoise_fina'],
                    max_step=cst_simu["max_steps"])

                args = {
                    "rp_type": "ornstein-uhlenbeck",
                    "n_action": 1,
                    "rp_theta": 0.1,
                    "rp_mu": 0.,
                    "rp_sigma": 0.2,
                    "rp_sigma_min": 0.05
                }

                OU_noise = noise.create_random_process(args).sample()

            a_t = a_t_original + epsilon * OU_noise
            a_t = a_t.ravel()

            s_t1 = ACactions.action_with_delta_Un(s_t, a_t)

            r_t = reward(s_t1, s_t)

            #        print ("state :\n{}".format(s_t))
            #        print ("action :\n{}".format(a_t))
            #        print ("next state :\n{}".format(s_t1))
            #
            #        time.sleep(5)

            if r_t > 1 and r_t < 1000:
                done = True  # Game over
                rew = r_t

            elif r_t > 1000:
                done = True  # Game over
                rew = -r_t  # Grosse pénalité

            else:
                done = False  # On continue si c'est bon
                rew = r_t

            print("reward :\n{}".format(rew))
            deque_obj.append((s_t, a_t, rew, s_t1, done))

            s_t = s_t + noise.create_random_process(args).sample()
Пример #3
0
def play(u_init):
    """
    Function that plays a certain number of iterations of the game (until it finishes).
    This function can also be used to construct the rpm.
    The loop on the episodes is outside of the function. 
    
    Arguments :
    -----------
    u_init : To set the initialisation
    """
    episode_memory = []
    s_t = u_init

    total_rew = 0
    a_t = np.zeros([1, s_t.size])

    # Pour exploration de l'espace des actions
    #    noise = noiselevel
    #    noiselevel = noise * 0.999
    #    noise_t = np.zeros([1, action_size])

    for j in range(max_steps):
        global graph
        with graph.as_default():
            a_t_original = actor.model.predict(np.array([s_t]))

        epsilon = decays.create_decay_fn("linear",
                                         curr_step=j,
                                         initial_value=EpsForNoise_init,
                                         final_value=EpsForNoise_fina,
                                         max_step=max_steps)

        if j % 150 == 0:
            print("steps = %d\t eps = %0.8f" % (j, epsilon))

        args = {
            "rp_type": "ornstein-uhlenbeck",
            "n_action": 1,
            "rp_theta": 0.1,
            "rp_mu": 0.,
            "rp_sigma": 0.2,
            "rp_sigma_min": 0.05
        }

        a_t = a_t_original + epsilon * noise.create_random_process(
            args).sample()
        a_t = a_t.ravel()

        a_tt = np.copy(a_t)
        for a in range(len(a_t)):
            if a_tt[a] > 1.:
                a_tt[a] = 1.
            elif a_tt[a] < -1.:
                a_tt[a] = -1.
            else:
                pass
        a_t = np.array([a for a in a_tt])
        s_t1 = action_with_delta_Un(s_t, a_t)

        #        return s_t, a_t, st_1

        r_t = reward(s_t1, s_t)

        #        print ("state :\n{}".format(s_t))
        #        print ("action :\n{}".format(a_t))
        #        print ("reward :\n{}".format(r_t))
        #        print ("next state :\n{}".format(s_t1))

        if r_t < 0.001:
            goon = False
        else:
            goon = True

        if len(replay_memory) < replay_memory_size:
            replay_memory.append((s_t, a_t, r_t, s_t1, goon))

        else:
            if abs(np.random.randn()) > 0.5:
                replay_memory.popleft()  # Pop the leftmost element
            else:
                replay_memory.pop()  # Pop the rightmost element

        s_t = s_t1
        #        print ("next_state :\n{}".format(s_t1))

        total_rew += r_t

        if len(replay_memory) % 150 == 0:
            print("Memory size = %d" % len(replay_memory))
Пример #4
0
def play_with_burger(u_init):
    """
    Function that plays a certain number of iterations of the game (until it finishes).
    This function can also be used to construct the rpm.
    The loop on the episodes is outside of the function. 
    
    we use timestep_roe to provide next steps
    
    Arguments :
    -----------
    u_init : To set the initialisation
    """
    episode_memory = []
    s_t = u_init

    total_rew = 0
    a_t = np.zeros([1, s_t.size])

    for j in range(max_steps):
        s_t1 = action_with_burger(s_t)
        a_t_original = np.array([s_t1[i] - s_t[i] for i in range(len(s_t))])

        epsilon = decays.create_decay_fn("linear",
                                         curr_step=j,
                                         initial_value=EpsForNoise_init,
                                         final_value=EpsForNoise_fina,
                                         max_step=max_steps)

        #        if j % 200 == 0 :
        #            print ("steps = %d\t eps = %0.8f" %(j, epsilon))

        args = {
            "rp_type": "ornstein-uhlenbeck",
            "n_action": 1,
            "rp_theta": 0.1,
            "rp_mu": 0.,
            "rp_sigma": 0.2,
            "rp_sigma_min": 0.05
        }

        a_t = a_t_original + epsilon * noise.create_random_process(
            args).sample()
        a_t = a_t.ravel()

        #        a_tt = np.copy(a_t)
        #        for a in range(len(a_t)) :
        #            if a_tt[a] > 1. :
        #                a_tt[a] = 1.
        #            elif a_tt[a] < -1. :
        #                a_tt[a] = -1.
        #            else :
        #                pass
        #        a_t = np.array([a for a in a_tt])
        s_t1 = action_with_delta_Un(s_t, a_t)

        #        return s_t, a_t, st_1

        r_t = reward(s_t1, s_t)

        #        print ("state :\n{}".format(s_t))
        #        print ("action :\n{}".format(a_t))
        #        print ("reward :\n{}".format(r_t))
        #        print ("next state :\n{}".format(s_t1))

        if abs(r_t) < 10:
            goon = False
        else:
            goon = True

        if len(replay_memory) < replay_memory_size:
            replay_memory.append((s_t, a_t, r_t, s_t1, goon))

        else:
            if abs(np.random.randn()) > 0.5:
                replay_memory.popleft()  # Pop the leftmost element
            else:
                replay_memory.pop()  # Pop the rightmost element

        s_t = s_t1
        #        print ("next_state :\n{}".format(s_t1))

        total_rew += r_t