Пример #1
0
def main():
    # creates an environment
    env = gym.make("MountainCarContinuous-v0")

    # chart series
    weighted_avg = ana.WeightedAvg(beta=0.9)
    all_ind_series = ana.Series(name="Individuals Performance")
    avg_series = ana.Series(name="Average (window = {})".format(round((1 / (1 - weighted_avg.beta)))))
    gen_series = ana.Series(name="Generation Performance")
    mut_prob_series = ana.Series(name="Mutation probability")

    # create linguistic variables in a registry
    reg = xmlToLinvars(open(LIN_VARS_FILE).read())

    # create GFT with linguistic variables in the registry
    reg = xmlToGFT(open(GFT_FILE).read(), registry=reg, defuzz_method=dfz.centroid)

    # create GA instance with the registry object
    ga = GeneticAlgorithm(registry=reg, seed=5)

    # create a mutation probability schedule
    mut_sch = sch.ExponentialDecaySchedule(initial_prob=.1, decay_factor=1e-2)

    # create GFT algorithm object with the registry
    rand_proc = OrnsteinUhlenbeckProcess(theta=0.01)
    alg = Algorithm(registry=reg, random_process=rand_proc)

    # create a cache for managing simulation data
    cache = Cache(reg.gft_dict.keys())

    # get initial population
    if LOAD_INIT_POP:
        pop = ga.load_initial_population(QLFD_IND_FILE, POP_SIZE)
        pop = pop[::-1]
        print("Num. of loaded individuals =", len(pop))
    else:
        pop = ga.generate_initial_population(POP_SIZE)

    # initialize epoch or generation counter
    epoch = 0

    # initialize individual counter
    ind_count = 0

    # create an object for retrieving input values
    obs_accessor = MountainCarObs()

    # perform the simulation for a specified number of generations
    while epoch < NUM_OF_GENS:

        # Run the simulation with the current population
        for ind in pop:
            ind_count += 1

            # initialize reward accumulator for the individual
            total_reward = 0

            # configure the GFT with the current individual
            alg.configuregft(chromosome=ind)

            # control the environment with the configured GFT

            # reset the environment
            observation = env.reset()

            # set the received observation as the current array for retrieving input values
            obs_accessor.current_observation = observation

            # run through the time steps of the simulation
            for t in range(MAX_TIME_STEPS):

                # show the environment
                env.render()

                # since only one agent applies to this case study set a dummy agent ID
                agent_id = 0

                # get an action
                actions_dict, input_vec_dict = alg.executebfc(obs_accessor, agent_id, add_noise=True)

                # mark the GFSs that executed for the agent in this time step
                cache.mark(output_dict_keys=actions_dict.keys())

                # apply the selected action to the environment and observe feedback
                next_state, reward, done, _ = env.step(list(actions_dict.values()))
                reward = reward_shaping(pos=next_state[0], r=reward)

                # decompose the received reward
                reward_dict = cache.decomposeReward(reward)

                # create experiences for the agent with respect to each GFSs that executed for the agent
                exp_dict = cache.createExperiences(agent_id=agent_id, action=list(actions_dict.values()),
                                                   dec_reward_dict=reward_dict,
                                                   input_vec_dict=input_vec_dict, output_dict=actions_dict,
                                                   next_state_dict=None)

                # add the experiences of the agent to the cache
                cache.addExperiences(time_step=t, exp_dict=exp_dict)

                # set the received observation as the current array for retrieving input values
                obs_accessor.current_observation = next_state

                # accumulate the rewards of all time steps
                total_reward += reward

                # if the episode is over end the current episode
                if done:
                    break

            # save contents of the cache and clear it for the next episode
            # cache.compute_states_value(gamma=.9)
            cache.save_csv(path="data/")
            print(
                "Episode: {t}/{T} | score: {r}".format(t=ind_count, T=(NUM_OF_GENS * POP_SIZE),
                                                       r=total_reward))

            # set the return from the environment as the fitness value of the current individual
            ind.fitness.values = (total_reward,)

            # save qualified individual
            if SAVE_BEST and total_reward >= SCORE_THRESHOLD:
                document = Document(name=QLFD_IND_FILE)
                document.addline(line=Line().add(text=Text(str(ind))))
                document.save(append=True)

            # store the performance of this individual in the corresponding series
            all_ind_series.addrecord(ind_count, total_reward)
            weighted_avg.update(total_reward)
            avg_series.addrecord(ind_count, weighted_avg.value)

        # Logging and other I/O operations
        print("Epoch {} completed".format(epoch))
        record = ga.stats.compile(pop)
        print("Statistics for epoch {} = {}".format(epoch, record))
        ga.logbook.record(epoch=epoch, **record)

        # store max return
        gen_series.addrecord(epoch, record["max"])
        if APPLY_EVO:
            # perform evolution
            offspring = applyEvolution(population=pop, ga_alg=ga, mut_sch=mut_sch, epoch=epoch)

            # set offspring as current population
            pop = offspring

        # update mutation probability series
        mut_prob_series.addrecord(epoch, mut_sch.prob)
        # increment epoch
        epoch += 1

    # print logbook
    ga.logbook.header = "epoch", "avg", "std", "min", "max"
    print(ga.logbook)

    # plotting
    plot_charts(avg_series, mut_prob_series)

    # terminates environment
    env.close()
Пример #2
0
import fuzzrl.core.plot.analysis as ana
import gym
import matplotlib.pyplot as plt
import seaborn as sb
from fuzzrl.core.fuzzy.runner import *
from fuzzrl.core.io.simdata import Document, Text, Line
from fuzzrl.core.conf import Defuzz as dfz

sb.set()

SAVE_BEST = True
SCORE_THRESHOLD = 450
QLFD_IND_FILE = "data/qualified.txt"

# chart series
weighted_avg = ana.WeightedAvg(beta=0.9)
all_ind_series = ana.Series(name="Individuals Performance")
avg_series = ana.Series(name="Average (window = {})".format(round((1 / (1 - weighted_avg.beta)))))
gen_series = ana.Series(name="Generation Performance")
mut_prob_series = ana.Series(name="Mutation probability")


def episode_finished(ind, ind_i, total_eps, total_r):
    print("Episode: {}/{} | score: {}".format(ind_i, total_eps, total_r))

    # save qualified individual
    if SAVE_BEST and total_r > SCORE_THRESHOLD:
        document = Document(name=QLFD_IND_FILE)
        document.addline(line=Line().add(text=Text(str(ind))))
        document.save(append=True)
Пример #3
0
def main():
    # creates an environment
    env = gym.make("CartPole-v1")

    # print observation space ranges
    print("observation space ranges\nhigh = {}\nlow = {}\n".format(str(env.observation_space.high),
                                                                   str(env.observation_space.low)))
    # chart series
    weighted_avg = ana.WeightedAvg(beta=0.9)
    all_ind_series = ana.Series(name="Episode Performance")
    avg_series = ana.Series(name="Average (window = {})".format(round((1 / (1 - weighted_avg.beta)))))

    # create linguistic variables in a registry
    reg = xmlToLinvars(open(LIN_VARS_FILE).read())

    # create GFT with linguistic variables in the registry
    reg = xmlToGFT(open(GFT_FILE).read(), registry=reg, defuzz_method=dfz.max_of_maximum)

    # Load pretrained NN model weights
    params = [10, 50, 30, 2]
    model = neural_net(num_inputs=4, params=params, lr=0.1, load=model_path, loss=neg_log_likelihood)
    reg.nn_models_dict["CartPoleMovement"] = model

    # create GFT algorithm object with the registry
    alg = Algorithm(registry=reg)

    # create a cache for managing simulation data
    cache = Cache(reg.nn_models_dict.keys())

    # create an object for retrieving input values
    obs_cartpole = CartPoleObs()

    # replay buffer
    cart_move_exp_rep = ReplayBuffer(max_size=1000)

    ts_elapsed = 0

    for i_episode in range(MAX_NUM_EPISODES):
        # get initial state
        state = env.reset()

        # initialize reward accumulator for the individual
        total_reward = 0

        # set the current state for retrieving specific inputs
        obs_cartpole.current_observation = state

        while True:
            # show the environment
            env.render()

            # since only one agent applies to this case study set a dummy agent ID
            agent_id = 0

            # get an action
            code, action, input_vec_dict, probs_dict = alg.executenntree(obs_cartpole, agent_id,
                                                                         action_selection_func=greedy_strategy,
                                                                         func_args=None)

            # apply the selected action to the environment and observe feedback
            next_state, reward, done, _ = env.step(code)

            # set the received observation as the current array for retrieving input values
            obs_cartpole.current_observation = next_state

            # mark the models that executed for the agent in this time step
            cache.mark(output_dict_keys=probs_dict.keys())

            # decompose the received reward
            reward_dict = cache.decomposeReward(reward)

            # create experiences for the agent with respect to each GFSs that executed for the agent
            state_dict = {"CartPoleMovement": np.array([obs_cartpole.getCartPosition(agent_id),
                                                        obs_cartpole.getCartVelocity(agent_id),
                                                        obs_cartpole.getPoleAngle(agent_id),
                                                        obs_cartpole.getPoleVelocity(agent_id)])}
            exp_dict = cache.createExperiences(agent_id=agent_id, action=code, dec_reward_dict=reward_dict,
                                               input_vec_dict=input_vec_dict, output_dict=probs_dict,
                                               next_state_dict=state_dict)

            # accumulate the rewards of all time steps
            total_reward += reward

            # add the experiences of an agent to their corresponding replay buffers
            for key, exp in exp_dict.items():
                if key == "CartPoleMovement":
                    cart_move_exp_rep.add(exp)

            # increment time steps played
            ts_elapsed += 1

            if ts_elapsed >= TIME_STEPS_BEFORE_TRAIN:
                # print("train the model")
                pass

            # if the episode is over end the current episode
            if done:
                break

        print("Episode: {}/{} | score: {}".format(i_episode + 1, MAX_NUM_EPISODES, total_reward))

        avg_series.addrecord(i_episode, weighted_avg.update(total_reward))

    plt.figure(0)
    plt.title("Cartpole with simple NN")
    plt.plot(avg_series.data()['x'], avg_series.data()['y'])
    plt.xlabel("episode")
    plt.ylabel("score")
    plt.show()
Пример #4
0
def main():
    # creates an environment
    env = gym.make(rlmarsenvs.carmunk_id)

    # print observation space ranges
    print("observation space ranges\nhigh = {}\nlow = {}\n".format(
        str(env.observation_space.high), str(env.observation_space.low)))
    # chart series
    weighted_avg = ana.WeightedAvg(beta=0.9)
    all_ind_series = ana.Series(name="Individuals Performance")
    avg_series = ana.Series(name="Average (window = {})".format(
        round((1 / (1 - weighted_avg.beta)))))
    gen_series = ana.Series(name="Generation Performance")
    mut_prob_series = ana.Series(name="Mutation probability")

    # create linguistic variables in a registry
    reg = xmlToLinvars(open(LIN_VARS_FILE).read())

    # create GFT with linguistic variables in the registry
    reg = xmlToGFT(open(GFT_FILE).read(),
                   registry=reg,
                   defuzz_method=dfz.max_of_maximum)

    # create GA instance with the registry object
    ga = GeneticAlgorithm(registry=reg, seed=123)

    # create a mutation probability schedule
    # mut_sch = sch.TimeBasedSchedule(decay_factor=1e-4)
    mut_sch = sch.LinearDecaySchedule(initial_prob=1.025, decay_factor=1e-2)

    # create GFT algorithm object with the registry
    alg = Algorithm(registry=reg)

    # create a cache for managing simulation data
    cache = Cache(reg.gft_dict.keys())

    # get initial population
    if LOAD_INIT_POP:
        pop = ga.load_initial_population(QLFD_IND_FILE, POP_SIZE)
        pop = pop[::-1]
    else:
        pop = ga.generate_initial_population(POP_SIZE)

    # initialize epoch or generation counter
    epoch = 0

    # initialize individual counter
    ind_count = 0

    # create an object for retrieving input values
    obs_carmunk = CarmunkObs()

    # Tau for Boltzmann exploration strategy
    tau_sch = sch.LinearDecaySchedule(initial_prob=20, decay_factor=0.02)

    # perform the simulation for a specified number of generations
    while epoch < NUM_OF_GENS:

        # Run the simulation with the current population
        for ind in pop:
            ind_count += 1

            # initialize reward accumulator for the individual
            total_reward = 0

            # configure the GFT with the current individual
            alg.configuregft(chromosome=ind)

            # control the environment with the configured GFT
            # for i_episode in range(NUM_EPISODES_PER_IND):

            # reset the environment
            observation = env.reset()

            # set the received observation as the current array for retrieving input values
            obs_carmunk.current_observation = observation

            # run through the time steps of the simulation
            t = 0
            while True:
                t += 1

                # show the environment
                env.render()

                # since only one agent applies to this case study set a dummy agent ID
                agent_id = 0

                # get an action
                code, action, input_vec_dict, probs_dict = alg.executegft(
                    obs_carmunk, agent_id)

                # apply the selected action to the environment and observe feedback
                next_state, reward, done, _ = env.step(code)

                # mark the GFSs that executed for the agent in this time step
                cache.mark(output_dict_keys=probs_dict.keys())

                # decompose the received reward
                reward_dict = cache.decomposeReward(reward)

                # create experiences for the agent with respect to each GFSs that executed for the agent
                exp_dict = cache.createExperiences(
                    agent_id=agent_id,
                    action=code,
                    dec_reward_dict=reward_dict,
                    input_vec_dict=input_vec_dict,
                    output_dict=probs_dict)

                # add the experiences of the agent to the cache
                cache.addExperiences(time_step=t, exp_dict=exp_dict)

                # set the received observation as the current array for retrieving input values
                obs_carmunk.current_observation = next_state

                # accumulate the rewards of all time steps
                total_reward += reward

                # if the episode is over end the current episode
                if done:
                    break

            # save contents of the cache and clear it for the next episode
            cache.save_csv()

            # if total_reward < 50:
            #     total_reward = - 50
            print("Episode finished after {} time steps".format(t + 1))
            print("Episode: {}/{} | score: {}".format(ind_count,
                                                      (NUM_OF_GENS * POP_SIZE),
                                                      total_reward))

            # set the return from the environment as the fitness value of the current individual
            ind.fitness.values = (total_reward, )

            # save qualified individual
            if SAVE_BEST and total_reward > SCORE_THRESHOLD:
                document = Document(name=QLFD_IND_FILE)
                document.addline(line=Line().add(text=Text(str(ind))))
                document.save(append=True)

            # store the performance of this individual in the corresponding series
            all_ind_series.addrecord(ind_count, total_reward)
            weighted_avg.update(total_reward)
            avg_series.addrecord(ind_count, weighted_avg.value)

        # Logging and other I/O operations
        print("Epoch {} completed".format(epoch))
        record = ga.stats.compile(pop)
        print("Statistics for epoch {} = {}".format(epoch, record))
        ga.logbook.record(epoch=epoch, **record)

        # store max return
        gen_series.addrecord(epoch, record["max"])
        if APPLY_EVO:
            # perform evolution
            offspring = applyEvolution(population=pop,
                                       ga_alg=ga,
                                       mut_sch=mut_sch,
                                       epoch=epoch)

            # set offspring as current population
            pop = offspring

        # update mutation probability series
        mut_prob_series.addrecord(epoch, mut_sch.prob)
        # increment epoch
        epoch += 1

    # print logbook
    ga.logbook.header = "epoch", "avg", "std", "min", "max"
    print(ga.logbook)

    # plotting
    plot_charts(avg_series, mut_prob_series)

    # terminates environment
    env.close()