Exemplo n.º 1
0
        ep = 10000
        tau = 0.0001
        gamma = 0.99
        min_batch = 32
        actor_lr = 0.00001
        critic_lr = 0.0001
        buffer_size = 1000000
        layers = [1024, 512]

        state_dim = env.observation_space["observation"].shape[
            0] + env.observation_space["desired_goal"].shape[0]
        action_dim = env.action_space.shape[0]
        action_bound = env.action_space.high

        actor_noise = OUNoise(mu=np.zeros(action_dim))
        actor = ActorNetwork(sess, state_dim, action_dim, action_bound, layers,
                             actor_lr, tau, min_batch)
        critic = CriticNetwork(sess, state_dim, action_dim, layers, critic_lr,
                               tau, gamma, actor.get_num_trainable_vars())
        tf.compat.v1.summary.FileWriter("logdir/graphpend",
                                        graph=tf.compat.v1.get_default_graph())

        print("\033[0;1;32m")
        print("===================")
        print("LE DEBUT")
        print("===================")

        if "--demo" in sys.argv:
            if "--load" in sys.argv:
                print("loading weights")
                loader = tf.compat.v1.train.Saver()
                arg_index = sys.argv.index("--load")
Exemplo n.º 2
0
def main():
    with tf.compat.v1.Session() as sess:

        tic = time.time()

        env = customEnv()

        if "--mstep" in sys.argv:
            arg_index = sys.argv.index("--mstep")
            micro_stepping = int(sys.argv[arg_index + 1])
        else:
            micro_stepping = 1

        if "--ep" in sys.argv:
            arg_index = sys.argv.index("--ep")
            ep = int(sys.argv[arg_index + 1])
        else:
            ep = 10000

        tau = 0.001
        gamma = 0.99
        min_batch = 64
        actor_lr = 0.0001
        critic_lr = 0.001
        buffer_size = 1000000
        layers = [300]

        state_dim =  (env.observation_space["observation"].shape[0] + env.observation_space["desired_goal"].shape[0])*micro_stepping
        action_dim = env.action_space.shape[0]
        action_bound = env.action_space.high

        actor = ActorNetwork(sess, state_dim, action_dim, action_bound, layers, actor_lr, tau, min_batch)
        critic = CriticNetwork(sess, state_dim, action_dim, layers, critic_lr, tau, gamma, actor.get_num_trainable_vars())

        action_wanted = tf.compat.v1.placeholder(tf.float32, (None, action_dim))
        reward_wanted = tf.compat.v1.placeholder(tf.float32, (None, 1))

        actor_target = tf.reduce_mean(tf.square(actor.out-action_wanted))
        critic_target = tf.reduce_mean(tf.square(critic.out-reward_wanted))

        actor_train = tf.compat.v1.train.AdamOptimizer(actor_lr).minimize(actor_target)
        critic_train = tf.compat.v1.train.AdamOptimizer(critic_lr).minimize(critic_target)


        update_target_network_actor = [actor.target_network_params[i].assign(actor.network_params[i]) for i in range(len(actor.target_network_params))]
        update_target_network_critic = [critic.target_network_params[i].assign(critic.network_params[i]) for i in range(len(critic.target_network_params))]

        print("\033[0;1;32m")
        print("===================")
        print("LE DEBUT")
        print("===================")

        print("loading buffer")
        arg_index = sys.argv.index("--loadBuff")
        buffPath = sys.argv[arg_index + 1]
        buffer = compress_json.local_load("preTrain/"+buffPath+".json.gz")
        print("buffer loaded")

        sess.run(tf.compat.v1.global_variables_initializer())

        saver = tf.compat.v1.train.Saver()

        i = 0
        while i < ep:
            i += 1
            states, actions, rewards = sample(buffer,min_batch)

            sess.run(actor_train,{actor.inputs: states, action_wanted: actions})
            sess.run(critic_train,{critic.inputs: states, critic.action: actions, reward_wanted: np.reshape(rewards,(min_batch,1))})

            print("\033[0;1;4;97m", end='')
            print("miniBatch {} / {}".format(i,ep), end='')
            print("\033[0;m     ", end='')
            tac = time.time()
            print("\033[3;91m", end='')
            print("{} secondes".format(int(tac - tic)), end='')
            print("\033[0;m                  \r", end='')


        sess.run(update_target_network_actor)
        sess.run(update_target_network_critic)

        arg_index = sys.argv.index("--save")
        save_name = sys.argv[arg_index + 1]
        saver.save(sess, "savedir/" + save_name+"/save")
        print("\033[0;1;32m")
        print("session saved at : " + save_name)


    return 0
Exemplo n.º 3
0
        ep = 10000
        tau = 0.001
        gamma = 0.99
        min_batch = 64
        actor_lr = 0.0001
        critic_lr = 0.001
        buffer_size = 1000000
        layers = [400,300]

        state_dim = env.observation_space.shape[0]
        action_dim = env.action_space.shape[0]
        action_bound = env.action_space.high

        actor_noise = OUNoise(mu=np.zeros(action_dim))
        actor = ActorNetwork(sess, state_dim, action_dim, action_bound, layers, actor_lr, tau, min_batch)
        critic = CriticNetwork(sess, state_dim, action_dim, layers, critic_lr, tau, gamma, actor.get_num_trainable_vars())
        tf.compat.v1.summary.FileWriter("logdir/graphpend", graph=tf.compat.v1.get_default_graph())

        print("\033[0;1;32m")
        print("===================")
        print("LE DEBUT")
        print("===================")
        if "--demo" in sys.argv:
            if "--load" in sys.argv:
                print("loading weights")
                loader = tf.compat.v1.train.Saver()
                arg_index = sys.argv.index("--load")
                save_name = sys.argv[arg_index + 1]
                loader.restore(sess,"savedir/"+save_name+"/save")
                print("weights loaded")