Пример #1
0
                        action='store_true',
                        help='whether train DQN')
    parser.add_argument('--test_dqn',
                        action='store_true',
                        help='whether test DQN')
    try:
        from argument import add_arguments
        parser = add_arguments(parser)
    except:
        pass
    args = parser.parse_args()
    return args


args = parse()
env = Environment('BreakoutNoFrameskip-v4', "", atari_wrapper=True, test=False)
n = env.action_space
state = env.reset()
device = torch.device("cpu")
input = torch.tensor(state, device=device)
agent = Agent_DQN(env, args)
dqn = DQN()
torch.save(dqn.state_dict(), "checkpoint.pth")
state_dict = torch.load("checkpoint.pth")

agent.train()
# Experience = namedtuple(
#             'Experience',
#             ('state','action','next_state','reward')
#         )
# e = Experience(state,action,next_state,reward)
    #def minibatch(self, size):
    #   random_array = np.random.choice(len(self.buffer),size,replace=False)
    #    minibatch = np.zeros(size)
    #    for i in random_array:
    #        minibatch[i] = self.buffer[i]
    #    #minibatch = np.array([self.buffer[i] for i in random_array])
    #    return minibatch


# Main entry point
if __name__ == "__main__":

    # Create an environment.
    # If display is True, then the environment will be displayed after every agent step. This can be set to False to speed up training time. The evaluation in part 2 of the coursework will be done based on the time with display=False.
    # Magnification determines how big the window will be when displaying the environment on your monitor. For desktop monitors, a value of 1000 should be about right. For laptops, a value of 500 should be about right. Note that this value does not affect the underlying state space or the learning, just the visualisation of the environment.
    environment = Environment(display=False, magnification=500)
    # Create an agent
    agent = Agent(environment)
    # Create a DQN (Deep Q-Network)
    dqn = DQN()
    my_buffer = ReplayBuffer()

    losses = []
    iterations = []
    episode_length = 20

    #fig, ax = plt.subplots()
    #ax.set(xlabel='Iteration', ylabel='Loss', title='Loss Curve')
    plt.ion()
    training_iteration = 0
    # Loop over episodes
    def process(self, sess, global_t, summary_writer, summary_op,
                summary_placeholders):

        if self.env is None:
            # lazy evaluation
            time.sleep(self.thread_index * 1.0)
            self.env = Environment({
                'scene_name': self.scene_scope,
                'terminal_state_id': int(self.task_scope)
            })

        start_local_t = self.local_t

        # Initialization
        states = []
        actions = []
        rewards = []
        values = []
        targets = []
        terminal_end = False

        # Reset accmulated gradient variables
        sess.run(self.reset_gradients)
        # Obtain shared parameters from global
        sess.run(self.sync)

        # t_max times loop
        for i in range(LOCAL_T_MAX):
            pi_, value_ = self.local_network.run_policy_and_value(
                sess, self.env.s_t, self.env.target)

            pi_ = np.array(pi_) / np.sum(pi_)
            action = self.choose_action(pi_)

            states.append(self.env.s_t)
            actions.append(action)
            values.append(value_)
            targets.append(self.env.target)

            if VERBOSE and (self.thread_index
                            == 0) and (self.local_t % 1000) == 0:
                sys.stdout.write("%s:" % self.scene_scope)
                sys.stdout.write("Pi = {0} V = {1}\n".format(pi_, value_))

            # process game
            self.env.step(action)

            # receive game result
            reward = self.env.reward
            terminal = self.env.terminal

            # ad-hoc reward for navigation
            # reward = 10.0 if terminal else -0.01
            if self.episode_length > 5e3: terminal = True

            self.episode_reward += reward
            self.episode_length += 1
            self.episode_max_q = max(self.episode_max_q, np.max(value_))

            # clip reward
            rewards.append(np.clip(reward, -1, 1))

            self.local_t += 1

            if terminal:
                terminal_end = True
                sys.stdout.write(
                    "#Thread: %d \n time %d | thread #%d | scene %s | target #%s\n%s %s episode reward = %.3f\n%s %s episode length = %d\n%s %s episode max Q  = %.3f\n"
                    % (self.thread_index, global_t, self.thread_index,
                       self.scene_scope, self.task_scope, self.scene_scope,
                       self.task_scope, self.episode_reward, self.scene_scope,
                       self.task_scope, self.episode_length, self.scene_scope,
                       self.task_scope, self.episode_max_q))

                summary_values = {
                    "episode_reward_input": self.episode_reward,
                    "episode_length_input": float(self.episode_length),
                    "episode_max_q_input": self.episode_max_q,
                    "learning_rate_input": self._anneal_learning_rate(global_t)
                }

                self._record_score(sess, summary_writer, summary_op,
                                   summary_placeholders, summary_values,
                                   global_t)
                self.episode_reward = 0
                self.episode_length = 0
                self.episode_max_q = -np.inf
                self.env.reset()

                break

        R = 0.0
        if not terminal_end:
            R = self.local_network.run_value(sess, self.env.s_t,
                                             self.env.target)

        actions.reverse()
        states.reverse()
        rewards.reverse()
        values.reverse()

        batch_si = []
        batch_a = []
        batch_td = []
        batch_R = []
        batch_t = []

        # compute and accmulate gradients
        for (ai, ri, si, Vi, ti) in zip(actions, rewards, states, values,
                                        targets):
            R = ri + GAMMA * R
            td = R - Vi
            a = np.zeros([ACTION_SIZE])
            a[ai] = 1

            batch_si.append(si)
            batch_a.append(a)
            batch_td.append(td)
            batch_R.append(R)
            batch_t.append(ti)

        sess.run(self.accum_gradients,
                 feed_dict={
                     self.local_network.s: batch_si,
                     self.local_network.a: batch_a,
                     self.local_network.t: batch_t,
                     self.local_network.td: batch_td,
                     self.local_network.r: batch_R
                 })

        cur_learning_rate = self._anneal_learning_rate(global_t)

        sess.run(self.apply_gradients,
                 feed_dict={self.learning_rate_input: cur_learning_rate})

        if VERBOSE and (self.thread_index == 0) and (self.local_t % 100) == 0:
            sys.stdout.write(
                "#Thread-%d-%s-Local timestep-%d\n" %
                (self.thread_index, self.scene_scope, self.local_t))

        # return advanced local step size
        diff_local_t = self.local_t - start_local_t
        return diff_local_t
Пример #4
0
# INITIALIZATION
#
    environment_directory = str(args[1])
    identifier = str(args[2])
    log_directory = str(args[3])
    measurement_directory = str(args[4])

    # Configure logging parameters so we get output while the program runs
    logging.basicConfig(format='%(asctime)s %(message)s',
                        datefmt='%m/%d/%Y %H:%M:%S',
                        filename=log_directory + identifier + ".log",
                        level=logging.INFO)
    logging.info('START logging for run: %s',
                 environment_directory + identifier + ".xml")

    environment = Environment()
    environment.initialize(environment_directory, identifier)
    runner = Runner()
    measurement = Measurement()

    #
    # UPDATE STEP
    #
    for i in range(environment.parameters.numSimulations):
        logging.info('  STARTED with run %s', str(i))
        environment.initialize(environment_directory, identifier)
        # check that environment file has been read correctly
        #environment.write_environment_file(identifier)
        runner.initialize(environment)
        measurement.initialize()  # clear the previous measurement
Пример #5
0
    def train(self):
        self.model.load_weights(self.weight)
        self.model.compile(loss='mse', optimizer=Adam(lr=self.alpha))
        self.epsilon = 0.1
        for trial in range(self.ntrials):
            s = self.env.reset(test=trial + 1)
            s = np.reshape(s, [1, self.nstates])
            treward = 0
            while True:
                a = self.epsilon_greedy(s)
                s2, r, done = self.env.step(a)
                s = np.reshape(s2, [1, self.nstates])
                treward += r
                if done:
                    rospy.loginfo('test: ' + str(trial + 1) + ' reward: ' +
                                  str(treward))
                    break


if __name__ == '__main__':
    if not len(sys.argv) > 2:
        assert False, 'missing model and/or weight'
    rospy.init_node('ddqn_test')
    rospy.loginfo('start testing')
    env = Environment()
    agent = Agent(env, str(sys.argv[1]), str(sys.argv[2]))
    agent.train()
    env.reset()
    rospy.loginfo('COMPLETE TESTING')
    rospy.spin()
Пример #6
0
    def __init__(self, model_code, gamma=0.975, field='champions', crop_style=0, gray=False, fps=8,
                 history_length=2700, train_interval=150, num_epochs=1, keep_training=False, td=1,
                 batch_size=32, epsilon_decay=0.9, epsilon_floor=1/16, decay_interval=10, initial_epsilon=None):
        # reward_model = reward_models.get_model_F()
        # reward_func = reward_models.create_reward_func(self.reward_model)

        self.model_code = model_code
        self.gray = gray

        self.fps = fps
        self.field = field
        self.crop_style = crop_style

        self.gamma = gamma

        self.env = Environment(frame_time=1/fps, gray=gray, field=field, crop_style=crop_style)

        self.history_length = history_length

        self.td = td  # Determine how much td/mc to use
        self.batch_size = batch_size

        self.epsilon_decay = epsilon_decay
        self.epsilon_floor = epsilon_floor
        self.decay_interval = decay_interval

        self.trial_count = 0
        
        self.model_path = 'model_data/{}/q_net/{}'.format(field, model_code)
        for designator in (crop_style, 'G' if gray else 'C', fps, int(gamma*1000)):
            self.model_path += '_{}'.format(designator)
        self.model_path += '/'

        if not os.path.exists(self.model_path):
            os.makedirs(self.model_path)
        print('Model path: {}'.format(self.model_path))
        print()
        print('Field: {}'.format(self.field))
        print('Model code: {}'.format(self.model_code))
        print('Crop style: {}'.format(self.crop_style))
        print('Gray: {}'.format(self.gray))
        print('FPS: {}'.format(self.fps))
        print('Gamma: {}'.format(self.gamma))
        try:
            self.acting_model = load_model(self.model_path + 'latest.hdf5')
            self.training_model = load_model(self.model_path + 'latest.hdf5')
            print('Loaded model parameters from disk')
        except OSError as e:
            get_model = eval('models.get_model_{}'.format(self.model_code))
            self.acting_model = get_model(self.env.ball_obs_dims, self.env.car_obs_dims)
            self.training_model = get_model(self.env.ball_obs_dims, self.env.car_obs_dims)
            self.training_model.set_weights(self.acting_model.get_weights())
            print('Generated new parameters')

        self.trials = []

        if os.path.exists(self.model_path + 'history.pkl'):
            ball_history = np.load(self.model_path + 'ball_history.npy')
            car_history = np.load(self.model_path + 'car_history.npy')
            with open(self.model_path + 'history.pkl', 'rb') as history:
                action_history, reward_history, discounted_reward_history, history_index = pickle.load(history)

            old_len = len(reward_history)
            next_idx = history_index % old_len

            if old_len == self.history_length:
                self.ball_history = ball_history
                self.car_history = car_history
                self.action_history = action_history
                self.reward_history = reward_history
                self.discounted_reward_history = discounted_reward_history
                self.history_index = history_index
            else:
                self.ball_history = np.zeros((self.history_length,) + self.env.ball_obs_dims)
                self.car_history = np.zeros((self.history_length,) + self.env.car_obs_dims)

                self.action_history = np.zeros(self.history_length, dtype=np.int8)
                self.reward_history = np.zeros(self.history_length)
                self.discounted_reward_history = np.zeros(self.history_length)

                if history_index > old_len:
                    # Looped
                    # TODO Copy end of array first
                    if old_len - next_idx > self.history_length:
                        # new history length is shorter than first section of old history
                        self.ball_history[:] = ball_history[next_idx:next_idx+self.history_length]
                        self.car_history[:] = car_history[next_idx:next_idx+self.history_length]
                        self.action_history[:] = action_history[next_idx:next_idx+self.history_length]
                        self.reward_history[:] = reward_history[next_idx:next_idx+self.history_length]
                        self.discounted_reward_history[:] =\
                            discounted_reward_history[next_idx:next_idx+self.history_length]

                        self.history_index = history_index
                    else:
                        # new history length is longer than first section of old history
                        self.ball_history[:old_len - next_idx] = ball_history[next_idx:]
                        self.car_history[:old_len - next_idx] = car_history[next_idx:]
                        self.action_history[:old_len - next_idx] = action_history[next_idx:]
                        self.reward_history[:old_len - next_idx] = reward_history[next_idx:]
                        self.discounted_reward_history[:old_len - next_idx] =\
                            discounted_reward_history[next_idx:]

                        # copy second section, when new history is full or we run out of old history
                        stop = min(self.history_length - (old_len - next_idx), next_idx)

                        self.ball_history[old_len - next_idx:old_len] = ball_history[:stop]
                        self.car_history[old_len - next_idx:old_len] = car_history[:stop]
                        self.action_history[old_len - next_idx:old_len] = action_history[:stop]
                        self.reward_history[old_len - next_idx:old_len] = reward_history[:stop]
                        self.discounted_reward_history[old_len - next_idx:old_len] =\
                            discounted_reward_history[:stop]

                        self.history_index = old_len
                else:
                    # Not looped
                    stop = min(self.history_length, history_index)

                    self.ball_history[:stop] = ball_history[:stop]
                    self.car_history[:stop] = car_history[:stop]
                    self.action_history[:stop] = action_history[:stop]
                    self.reward_history[:stop] = reward_history[:stop]
                    self.discounted_reward_history[:stop] = discounted_reward_history[:stop]

                    self.history_index = stop
            if initial_epsilon is None:
                initial_epsilon = max(self.epsilon_floor, self.epsilon_decay ** (self.history_index/self.decay_interval))
        else:
            self.ball_history = np.zeros((self.history_length,) + self.env.ball_obs_dims)
            self.car_history = np.zeros((self.history_length,) + self.env.car_obs_dims)

            self.action_history = np.zeros(self.history_length, dtype=np.int8)
            self.reward_history = np.zeros(self.history_length)
            self.discounted_reward_history = np.zeros(self.history_length)
            self.history_index = 0

            if initial_epsilon is None:
                initial_epsilon = 1.0

        print("epsilon = {}".format(initial_epsilon))

        self.train_interval = train_interval
        self.num_epochs = num_epochs
        self.keep_training = keep_training
        self.trainer = Trainer(self, num_epochs=self.num_epochs)
        self.trainer.finished = True

        self.agent = Agent(self.acting_model, initial_epsilon, self.env.ball_obs_dims, self.env.car_obs_dims)

        self.frames_in_buffer = 0

        # Initializes reward models
        self.env.reset(read_only=True)

        self.playing = False

        print("Ready")
# imports other libs
import time
import numpy as np
from math import fabs, sqrt
import glob, os

experiment_name = 'individual_demo'
if not os.path.exists(experiment_name):
    os.makedirs(experiment_name)

# initializes simulation in individual evolution mode, for single static enemy.
env = Environment(experiment_name=experiment_name,
                  enemies=[2],
                  playermode="ai",
                  player_controller=player_controller(),
                  enemymode="static",
                  level=2,
                  speed="fastest")

# default environment fitness is assumed for experiment

env.state_to_log()  # checks environment state

####   Optimization for controller solution (best genotype-weights for phenotype-network): Ganetic Algorihm    ###

ini = time.time()  # sets time marker

# genetic algorithm params

run_mode = 'train'  # train or test
Пример #8
0
def run(args):
    if args.test_dqn:
        env = Environment('BreakoutNoFrameskip-v4', args, atari_wrapper=True, test=True)
        from agent_dqn import Agent_DQN
        agent = Agent_DQN(env, args)
        test(agent, env, total_episodes=100)
Пример #9
0
    def train(self):
        """
        Learn your (final) policy.

        Use evolution strategy algortihm CMA-ES: https://pypi.org/project/cma/

        Possible action: [0, 1, 2]
        Range observation (tuple):
            - position: [-1.2, 0.6]
            - velocity: [-0.07, 0.07]
        """
        # 1- Define state features
        # 2- Define search space (to define a policy)
        # 3- Define objective function (for policy evaluation)
        # 4- Use CMA-ES to optimize the objective function
        # 5- Save optimal policy

        generations = 10000
        for i in range(generations):
            solutions = self.es.ask()
            print("iteration:", i, " ;")
            result = []
            for solution in solutions:
                env = Environment()
                n_w1 = len(self.w1_flat)
                self.w1_flat = np.array(solution[0:len(self.w1_flat)])
                self.b1_flat = np.array(
                    solution[len(self.w1_flat):len(self.w1_flat) +
                             len(self.b1_flat)])
                self.w2_flat = np.array(
                    solution[len(self.w1_flat) +
                             len(self.b1_flat):len(self.w1_flat) +
                             len(self.b1_flat) + len(self.w2_flat)])
                self.b2_flat = np.array(
                    solution[len(self.w1_flat) + len(self.b1_flat) +
                             len(self.w2_flat):len(self.w1_flat) +
                             len(self.b1_flat) + len(self.w2_flat) +
                             len(self.b2_flat)])
                done = False
                accumulated_reward = 0
                while not done:
                    observation = env.observe()
                    reward, done = env.act(self.act(observation))
                    accumulated_reward += reward
                result.append(-accumulated_reward)
            self.es.tell(solutions, result)
            if np.mean(
                    result
            ) < 100:  # result.avg=200 when cound not achieve aim, less is better.
                print("Good generation founded")
                break

        index = np.argmin(result)
        weight = solutions[index]
        np.save("weights.npy", weight)
        self.w1_flat = np.array(weight[0:len(self.w1_flat)])
        self.b1_flat = np.array(weight[len(self.w1_flat):len(self.w1_flat) +
                                       len(self.b1_flat)])
        self.w2_flat = np.array(
            weight[len(self.w1_flat) + len(self.b1_flat):len(self.w1_flat) +
                   len(self.b1_flat) + len(self.w2_flat)])
        self.b2_flat = np.array(
            weight[len(self.w1_flat) + len(self.b1_flat) +
                   len(self.w2_flat):len(self.w1_flat) + len(self.b1_flat) +
                   len(self.w2_flat) + len(self.b2_flat)])
Пример #10
0
 def __init__(self):
     self.__environment = Environment()
     self.__board = Board()
     self.__drone = Drone(self.__environment)
import gym
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd
import torch.nn.functional as F
from environment import Environment

USE_CUDA = torch.cuda.is_available()
Variable = lambda *args, **kwargs: autograd.Variable(*args, **kwargs).cuda(
) if USE_CUDA else autograd.Variable(*args, **kwargs)

env_name = 'BreakoutNoFrameskip-v4'
env = Environment(env_name, {}, atari_wrapper=True)

epsilon_start = 1.0
epsilon_final = 0.01
epsilon_decay = 500

epsilon_by_frame = lambda frame_idx: epsilon_final + (
    epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay)

from collections import deque


class ReplayBuffer(object):
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)
Пример #12
0
                return
            if action == None:
                action = 0
            obs_, reward, done, _ = env.step(action)
            print('action:{}, reward:{}, done:{}'.format(action, reward, done))
            env.render()
            if save_or_not:
                s_list.append(obs)
                a_list.append(action)
                r_list.append(reward)
            time.sleep(0.1)


if __name__ == '__main__':
    game_name = 'Breakout-v0'
    env = Environment(game_name, atari_wrapper=True, test=0)

    print('We are playing {}'.format(game_name))
    print('-------- game information --------')
    print('observation space: ', end='')
    print(env.observation_space)
    print('action space: ', end='')
    print(env.action_space)

    try:
        print('action meanings: ', end='')
        print(env.unwrapped.get_action_meanings())
    except:
        print('don\'t know the action meanings')

    check_input_range(env)
Пример #13
0
from pando.website import Website

env = Environment(
    ASPEN_CHANGES_RELOAD=is_yesish,
    ASPEN_PROJECT_ROOT=str,
    ASPEN_SHOW_TRACEBACKS=is_yesish,
    ASPEN_WWW_ROOT=str,
    AWS_ACCESS_KEY_ID=str,
    AWS_SECRET_ACCESS_KEY=str,
    DATABASE_URL=str,
    DATABASE_MAXCONN=int,
    CANONICAL_HOST=str,
    CANONICAL_SCHEME=str,
    COMPRESS_ASSETS=is_yesish,
    CSP_EXTRA=str,
    SENTRY_DSN=str,
    SENTRY_RERAISE=is_yesish,
    LOG_DIR=str,
    KEEP_PAYDAY_LOGS=is_yesish,
    LOGGING_LEVEL=str,
    CACHE_STATIC=is_yesish,
    CLEAN_ASSETS=is_yesish,
    RUN_CRON_JOBS=is_yesish,
    OVERRIDE_PAYDAY_CHECKS=is_yesish,
    OVERRIDE_QUERY_CACHE=is_yesish,
    GRATIPAY_BASE_URL=str,
    SECRET_FOR_GRATIPAY=str,
    INSTANCE_TYPE=str,
)

logging.basicConfig(level=getattr(logging, env.logging_level.upper()))
Пример #14
0
def run(args):
    if args.test:
        env = Environment('Pong-v0', args, test=True)
        from agent_dir.agent_pg import Agent_PG
        agent = Agent_PG(env, args)
        test(agent, env)
Пример #15
0
def main1(game, enemy, algorithm):
    # Setting up the game
    experiment_name = 'adrian-testing' + "-algorithm-" + str(algorithm)
    print(experiment_name + " game: " + str(game) + " " + "enemy: " +
          str(enemy))
    if not os.path.exists(experiment_name):
        os.makedirs(experiment_name)

    # Initialize the amout of neurons
    n_hidden_neurons = 10

    # initializes simulation in individual evolution mode, for single static enemy.
    env = Environment(
        experiment_name=experiment_name,
        enemies=[enemy],
        playermode="ai",
        player_controller=player_controller(n_hidden_neurons),
        enemymode="static",
        level=2,  # default environment fitness is assumed for experiment
        speed="fastest")
    # default environment fitness is assumed for experiment
    env.state_to_log()  # checks environment state

    #Optimization for controller solution (best genotype-weights for phenotype-network): Ganetic Algorihm    ###
    ini = time.time()  # sets time marker

    # genetic algorithm params

    run_mode = 'train'  # train or test

    # number of weights for multilayer with 10 hidden neurons
    n_vars = (env.get_num_sensors() +
              1) * n_hidden_neurons + (n_hidden_neurons + 1) * 5

    #------------  Setting up the GA  ---------------
    # There are two main areas where change is possible a) and b) (Also new things could be added e.g. Doomsday..,)
    # a) GA Constants and parameters
    genome_lenght = n_vars  #100 #lenght of the bit string to be optimized -> bit lenght is actually n_vars
    pop_size = 50
    p_crossover = 0.8
    p_mutation = 0.2
    mutation_scaler = genome_lenght  # The bitflip function iterates over every single value in an individuals genome, with a probability indpb it decides wether to flip or not. This value is independent from the mutation probability which decides IF a given individual in the population will be selected for mutation.

    max_generations = 15  # stopping condition
    tournament_size = 5  # tournament size
    seed = 50  #random.randint(1, 126)
    random.seed(seed)

    # For optimizing continuous functions
    bound_low = -1
    bound_up = 1
    crowding_factor = 20

    # Defining a tool to create single gene
    toolbox = base.Toolbox()
    # toolbox.register("ZeroOrOne", random.randint, -1, 1)
    toolbox.register("ZeroOrOne", random.uniform, -1,
                     1)  # Each gene is a float between -1 and 1

    # Defining the fitness
    # creator.create("FitnessMin", base.Fitness, weights=(-30.0, -30.0))
    # creator.create("FitnessMin", base.Fitness, weights=(-100,))
    creator.create("FitnessMin", base.Fitness, weights=(100, ))

    # Defining an individual creatorfre
    creator.create(
        "Individual", list, fitness=creator.FitnessMin
    )  # An individual will be stored in a list format with fitness evaluated at "FitnessMin"
    toolbox.register(
        "individualCreator", tools.initRepeat, creator.Individual,
        toolbox.ZeroOrOne, genome_lenght
    )  # An individual consist of a list of n_var attributes (genes) populated by zeroorone

    # Defining the population cretor
    toolbox.register("populationCreator", tools.initRepeat, list,
                     toolbox.individualCreator)

    # Defining the fitness function
    def evaluate(x):
        return np.array(list(map(lambda y: simulation(env, y), x))),

    toolbox.register("evaluate", evaluate)

    #------------- b) Registering the EA operators --------------
    # 1. Standard operators
    toolbox.register("select", tools.selTournament, tournsize=tournament_size)
    # # toolbox.register("mate", tools.cxSimulatedBinaryBounded, low=bound_low, up=bound_up,eta=crowding_factor)
    # # toolbox.register("mutate", tools.mutPolynomialBounded, low=bound_low, up=bound_up,eta=crowding_factor,indpb=1.0/mutation_scaler)
    # toolbox.register("mate", tools.cxTwoPoint)
    # toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
    # 2. Multiple operators
    # 2.1 All operators
    # 2.1.1 Crossover
    # Two-point: toolbox.register("mate", tools.cxTwoPoint)
    # Partially matched: toolbox.register("mate", tools.cxPartialyMatched)
    # Uniform: toolbox.register("mate", tools.cxUniform, indpb=0.05)
    # 2.1.2 Mutation
    # flip: toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
    # shuffle: toolbox.register("mutate", tools.mutShuffleIndexes, indpb=0.05)
    # uniformint: toolbox.register("mutate", tools.mutUniformInt(low = -1, high = 1, indpb = 0.05)

    if algorithm == 1:
        toolbox.register("mate", tools.cxTwoPoint)
        toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
    elif algorithm == 2:
        toolbox.register("mate", tools.cxTwoPoint)
        toolbox.register("mutate", tools.mutShuffleIndexes, indpb=0.05)
    elif algorithm == 3:
        toolbox.register("mate", tools.cxTwoPoint)
        toolbox.register("mutate",
                         tools.mutUniformInt,
                         low=-1,
                         up=1,
                         indpb=0.05)
    elif algorithm == 4:
        toolbox.register("mate", tools.cxBlend, alpha=0.05)
        toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
    elif algorithm == 5:
        toolbox.register("mate", tools.cxBlend, alpha=0.05)
        toolbox.register("mutate", tools.mutShuffleIndexes, indpb=0.05)
    elif algorithm == 6:
        toolbox.register("mate", tools.cxBlend, alpha=0.05)
        toolbox.register("mutate",
                         tools.mutUniformInt,
                         low=-1,
                         up=1,
                         indpb=0.05)
    elif algorithm == 7:
        toolbox.register("mate", tools.cxUniform, indpb=0.05)
        toolbox.register("mutate", tools.mutFlipBit, indpb=0.05)
    elif algorithm == 8:
        toolbox.register("mate", tools.cxUniform, indpb=0.05)
        toolbox.register("mutate", tools.mutShuffleIndexes, indpb=0.05)
    elif algorithm == 9:
        toolbox.register("mate", tools.cxUniform, indpb=0.05)
        toolbox.register("mutate",
                         tools.mutUniformInt,
                         low=-1,
                         up=1,
                         indpb=0.05)
    elif algorithm == 10:
        toolbox.register("mate", tools.cxTwoPoint)
        toolbox.register("mutate", tools.mutFlipBit, indpb=0.01)
    elif algorithm == 11:
        toolbox.register("mate", tools.cxTwoPoint)
        toolbox.register("mutate", tools.mutFlipBit, indpb=0.15)
    elif algorithm == 12:
        toolbox.register("mate", tools.cxTwoPoint)
        toolbox.register("mutate", tools.mutFlipBit, indpb=0.3)
    else:
        print("why?")

#---------------- Setting the game enviroment ----------------

    def simulation(env, x):
        f, p, e, t = env.play(pcont=x)
        return f, p

    # Plotting
    maxFitnessValues = []
    meanFitnessValues = []

    # Running the simulation
    def main(game, enemy):
        file_aux=open(experiment_name+'/results_enemy' + \
                          str(enemy) + str(algorithm) + '.txt', 'a')
        file_aux.write(f'\ngame {game} \n')
        file_aux.write('gen, best, mean, std, median, q1, q3, life')
        file_aux.close()

        #Creating the population
        pop = toolbox.populationCreator(
            n=pop_size)  # Population is created as a list object
        pop_array = np.array(pop)
        generationCounter = 0
        print("Start of evolution")

        # Evaluating all the population
        # fitnessValues=list(map(toolbox.evaluate, pop_array)) -> Won't work. Used Kamiel's
        fitnessValue = evaluate(pop_array)
        fitnessValue = fitnessValue[0].tolist()
        fitnesses = []
        lifes = []
        for value in fitnessValue:
            fitnesses.append(value[0])
            lifes.append(value[1])
        for count, individual in enumerate(fitnesses):
            # Rewrites the fitness value in a way the DEAP algorithm can understand
            fitnesses[count] = (-individual, )

        # Assigning the fitness value to each individual
        for individual, fitnessValue in zip(pop, fitnesses):
            individual.fitness.values = fitnessValue

        # Extract each fitness value
        fitnessValues = [individual.fitness.values[0] for individual in pop]

        # Saves first generation
        fits = fitnessValues
        g = generationCounter
        length = len(pop)
        mean = sum(fits) / length
        sum2 = sum(x * x for x in fits)
        std = abs(sum2 / length - abs(mean)**2)**0.5
        q1 = np.percentile(fits, 25)
        median = np.percentile(fits, 50)
        q3 = np.percentile(fits, 75)
        max_life = max(lifes)
        file_aux = open(
            experiment_name + '/results_enemy' + str(enemy) + 'Tournement.txt',
            'a')
        file_aux.write(
            f'\n{str(g)}, {str(round(max(fits)))}, {str(round(mean,6))}, {str(round(std,6))}, {str(round(median,6))}, {str(round(q1,6))}, {str(round(q3,6))}, {str(round(max_life,6))}'
        )
        file_aux.close()

        # Beggin the genetic loop
        # First, we start with the stopping condition
        while max(fitnessValues) < 100 and generationCounter < max_generations:
            begin_time = datetime.datetime.now()
            print("Being evolution time:", begin_time, "!!!")
            # Update generation counter
            generationCounter = generationCounter + 1
            print("-- Generation %i --" % generationCounter)

            # Begin genetic operators
            # 1. Selection: since we already defined the tournament before
            # we only need to select the population and its lenght
            # Selected individuals now will be in a list
            print("selection...")
            offspring = toolbox.select(pop, len(pop))
            for i in offspring:
                print(i.fitness.values[0])

            # Cloning the selected indv so we can apply the next genetic operators without affecting the original pop
            offspring = list(map(toolbox.clone, offspring))
            print("done")
            # 2. Crossover. Note taht the mate function takes two individuals as arguments and
            # modifies them in place, meaning they don't need to be reassigned
            print("Crossover...")
            for child1, child2 in zip(offspring[::2], offspring[1::2]):
                if random.random() < p_crossover:
                    toolbox.mate(child1, child2)
                    del child1.fitness.values
                    del child2.fitness.values
            print("done")
            # 3. Mutation
            print("Mutation...")
            for mutant in offspring:
                # if random.random() < p_mutation:
                if random.random() < (1 -
                                      (generationCounter / max_generations)):
                    toolbox.mutate(mutant)
                    del mutant.fitness.values
            # Individuals that werent mutated remain intact, their fitness values don't need to eb recalculated
            # The rest of the individuals will have this value EMPTY
            # We now find those individuals and calculate the new fitness
            print("...re-evaluating fitness...")
            freshIndividuals = [
                ind for ind in offspring if not ind.fitness.valid
            ]
            # Eval not work!!! :(( used Kamiels
            # freshFitnessValues=list(map(toolbox.evaluate, freshIndividuals))
            # for individual, fitnessValue in zip(freshIndividuals, freshFitnessValues):
            #     individual.fitness.values=fitnessValue
            pop_array = np.array(freshIndividuals)
            values = evaluate(pop_array)
            values = values[0].tolist()
            fitnesses = []
            for value in values:
                fitnesses.append(value[0])
                lifes.append(value[1])

            for count, individual in enumerate(fitnesses):
                fitnesses[count] = (individual, )

            for ind, fit in zip(freshIndividuals, fitnesses):
                ind.fitness.values = fit
            print("done")
            # Changes best individuals of population with worst individuals of the offspring
            amount_swithed_individuals = int(len(pop) / 10)
            worst_offspring = deap.tools.selWorst(offspring,
                                                  amount_swithed_individuals,
                                                  fit_attr='fitness')
            best_gen = deap.tools.selBest(pop,
                                          amount_swithed_individuals,
                                          fit_attr='fitness')
            for count, individual in enumerate(worst_offspring):
                index = offspring.index(individual)
                offspring[index] = best_gen[count]

            # End of the proccess -> replace the old population wiht the new one
            pop[:] = offspring
            print(f"There are {len(pop)} individuals in the population ")

            # Gather all the fitnesses in one list and print the stats
            fits = [ind.fitness.values[0] for ind in pop]

            length = len(pop)
            mean = sum(fits) / length
            sum2 = sum(x * x for x in fits)
            std = abs(sum2 / length - abs(mean)**2)**0.5
            q1 = np.percentile(fits, 25)
            median = np.percentile(fits, 50)
            q3 = np.percentile(fits, 75)
            max_life = max(lifes)

            # For plotting
            maxFitness = max(fits)
            meanFitness = sum(fits) / len(pop)
            maxFitnessValues.append(maxFitness)
            meanFitnessValues.append(meanFitness)

            print("  Min %s" % min(fits))
            print("  Max %s" % max(fits))
            print("  Avg %s" % mean)
            print("  Std %s" % std)
            # Plot
            plt.plot(maxFitnessValues)
            plt.plot(meanFitnessValues)
            plt.ylabel("Values")
            plt.xlabel("Generations")
            plt.title(experiment_name + " game: " + str(game) + " " +
                      "enemy: " + str(enemy))
            plt.savefig("adrian-testing" + "-algorithm-" + str(algorithm) +
                        "-enemy-" + str(enemy) + ".png")
            plt.show()
            # DataFrame
            df.loc[int(algorithm)] = [min(fits), max(fits), mean, std]
            df.to_csv("adrian-testing" + "-algorithm-" + str(algorithm) +
                      "-enemy-" + str(enemy) + ".csv",
                      index=False)
            print("Saving...")
            # saves results for first pop
            file_aux=open(experiment_name+'/results_enemy' + \
                        str(enemy)+'Tournement.txt', 'a')
            file_aux.write(
                f'\n{str(g)}, {str(round(max(fits),6))}, {str(round(mean,6))}, {str(round(std,6))}, {str(round(median,6))}, {str(round(q1,6))}, {str(round(q3,6))}, {str(round(max_life,6))}'
            )
            file_aux.close()
            print("Evolution ended in:", datetime.datetime.now() - begin_time)
            print("-- End of (successful) evolution --")
            best_ind = tools.selBest(pop, 1)[0]
            print("Best individual is %s, %s" %
                  (best_ind, best_ind.fitness.values))
            np.savetxt(experiment_name+'/best_game_'+str(game) + \
                            ',enemy_'+str(enemy)+'Tournement.txt', best_ind)
            print("Done. New generation...")

    main(game, enemy)
    plt.show()
    print("Run ended in:", datetime.datetime.now() - begin_game)
Пример #16
0
    def bind(self, instance):
        environment = Environment(self.closure)
        environment.define('this', instance)

        return LoxFunction(self.declaration, environment, self.is_initializer)
Пример #17
0
            # # print(real_speed)
            # if position < self.goal < (position + real_speed):
            #     self.final_action = np.abs(self.goal - position) / real_speed
            # elif position > self.goal > (position + self.speed):
            #     self.final_action = np.abs(self.goal - position) / real_speed
            # else:
            #     print("Stable position")
            #     self.position = position
            # print(f"Position: {self.position}, Goal: {self.goal}, delta: {self.delta}, speed: {real_speed}, direction: {direction}")

        else:
            self.position = position

        if self.final_action is None:
            self.env.step(direction, False)
        else:
            print("FINAL")
            self.env.step(self.final_action, True)
            self.finished = True


if __name__ == "__main__":
    solver = Solver(Environment(4))
    solver.env.reset()
    for i in range(100):
        solver.zero(time.time(), solver.env.state[0])
        if solver.finished:
            break

    print(solver.env.state)
Пример #18
0
def main():
    trial_len = 1030

    env = Environment(100000, 1, trial_len, stock1, stock2)
    trials = 100

    action_info = {
        's1_buys_per_trial': [],
        's1_sells_per_trial': [],
        's2_buys_per_trial': [],
        's2_sells_per_trial': [],
        'holds_per_trial': [],
        'illegal_action_trial': [],
        'profits_per_trial': [],
        'ranges_per_trial': [],
        'good_profits_and_range': []
    }

    dqn_agent = DQNAgent(env, stock1.name, stock2.name)
    menu_option = input(
        "Press 1 to load a model from filepath. Press any other button to start a new model "
    )
    if menu_option == "1":
        dqn_agent.load_model()
    steps = []
    for trial in range(trials):
        print('Trial ', trial)
        cur_state = env.state
        step_count = 0
        start_funds = env.get_funds()
        action = ''

        stock1_buys = 0
        stock1_sells = 0
        stock2_buys = 0
        stock2_sells = 0
        holds = 0
        illegal_action = False
        returns = []

        for step in range(trial_len):
            action_num = dqn_agent.act(cur_state)
            action, stock = None, None

            # Get action from Deep Q Net output
            if action_num == 0:
                action, stock = 'BUY', stock1.name
                stock1_buys += 1
            elif action_num == 1:
                action, stock = 'SELL', stock1.name
                stock1_sells += 1
            elif action_num == 2:
                action, stock = 'BUY', stock2.name
                stock2_buys += 1
            elif action_num == 3:
                action, stock = 'SELL', stock2.name
                stock2_sells += 1
            elif action_num == 4:
                action, stock = 'HOLD', ''
                holds += 1
            else:
                action, stock = None, None

            prev_funds = env.get_funds()
            print('Step {}:'.format(step))
            print('  Action: ', action)
            print('  Stock:  ', stock)
            new_state, reward, illegal_action = env.step(action, stock, 1)
            reward = reward if not illegal_action else -10000
            new_funds = env.get_funds()
            returns.append(new_funds - prev_funds)
            print('  Reward: ', reward)
            dqn_agent.remember(cur_state, action_num, reward, new_state,
                               illegal_action)

            dqn_agent.replay()
            dqn_agent.target_train()
            cur_state = new_state
            step_count += 1
            if illegal_action:
                print('Illegal action taken, starting new trial')
                break

        profit = start_funds - env.get_funds()
        df_range = (env.init_day_index, env.init_day_index + trial_len)
        print('Profit: ', start_funds - env.get_funds())

        if profit >= 5000.00:
            action_info['good_profits_and_range'].append((df_range, returns))
            print(action_info['good_profits_and_range'])

        action_info['profits_per_trial'].append(profit)

        action_info['s1_buys_per_trial'].append(stock1_buys)
        action_info['s1_sells_per_trial'].append(stock1_sells)
        action_info['s2_buys_per_trial'].append(stock2_buys)
        action_info['s2_sells_per_trial'].append(stock2_sells)
        action_info['holds_per_trial'].append(holds)
        action_info['illegal_action_trial'].append(illegal_action)
        action_info['ranges_per_trial'].append(
            (env.init_day_index, env.init_day_index + trial_len))

        n = random.randint(0, len(stock1) - trial_len)
        env = Environment(100000, 1, trial_len, stock1, stock2)

    print(
        "Average Profit: ",
        sum(action_info['profits_per_trial']) /
        len(action_info['profits_per_trial']))
    data_file_name = input(
        'Please type the name of the file you would like to save the action info to: '
    )
    menu_option2 = input(
        "Press 0 to quit, press 1 to save to model to location/ ")
    if menu_option2 == "1":
        fp = input("Enter the filepath to save this model to ")
        dqn_agent.custom_save_model(fp)

    action_info_df = pd.DataFrame(action_info)
    action_info_df.to_csv(data_file_name)
Пример #19
0
def deap_generalist_twopoint(experiment_name, enemies_in_group,
                             iteration_number):
    if not os.path.exists(experiment_name):
        os.makedirs(experiment_name)

    if os.path.exists(experiment_name + '/results.csv'):
        os.remove(experiment_name + '/results.csv')

    if os.path.exists(experiment_name + '/best.txt'):
        os.remove(experiment_name + '/best.txt')

    n_hidden_neurons = 10

    # initializes simulation in individual evolution mode, for single static enemy.
    env = Environment(
        experiment_name=experiment_name,
        enemies=enemies_in_group,
        multiplemode="yes",
        playermode="ai",
        player_controller=player_controller(n_hidden_neurons),
        enemymode="static",
        level=2,
        speed="fastest",
    )

    # GLOBAL VARIABLES
    POP_SIZE = 50  # Population size
    GENS = 10  # Amount of generations
    MUTPB = 0.2  # MUTPB is the probability for mutating an individual
    toolbox = base.Toolbox()
    n_vars = (env.get_num_sensors() +
              1) * n_hidden_neurons + (n_hidden_neurons + 1) * 5

    #DATA
    genlist = []
    bestlist = []
    meanlist = []
    stdlist = []

    def evaluate(pop):
        """
        This function will start a game with one individual from the population

        Args:
            individual (np.ndarray of Floats between -1 and 1): One individual from the population

        Returns:
            Float: Fitness
        """
        for ind in pop:
            f, p, e, t = env.play(
                pcont=ind
            )  # return fitness, self.player.life, self.enemy.life, self.time
            fitness = p - e
            ind.fitness.values = [fitness]

    def setup_DEAP():
        """
        This function sets up the DEAP environment to our liking.

        creator.create is used to create a class under a certain name
        toolbox.register is used to register a function under a certain name which can be called

        For more information about which examples are used and the DEAP documentation:
            # https://deap.readthedocs.io/en/master/
            # https://deap.readthedocs.io/en/master/examples/ga_onemax.html
        """
        # this tells DEAP that the fitness should be as high as possible. (therefore Max)
        creator.create("FitnessMax", base.Fitness, weights=(1.0, ))

        # an individual is a np.ndarray filled with random floats which are the inputs of the game
        creator.create("Individual", np.ndarray, fitness=creator.FitnessMax)

        toolbox.register("attr_float", random.uniform, -1, 1)

        # registers function to create an individual
        # n_vars is the amount of floats in the individual
        toolbox.register("individual", tools.initRepeat, creator.Individual,
                         toolbox.attr_float, n_vars)

        # registers function to create the population of individuals
        toolbox.register("population", tools.initRepeat, list,
                         toolbox.individual)

        # registers function which links to our evaluate function
        toolbox.register("evaluate", evaluate)

        # registers crossover function: We use One Point Crossover
        toolbox.register("mate", tools.cxTwoPoint)

        # registers mutation function: We use shuffle index
        toolbox.register("mutate", tools.mutShuffleIndexes,
                         indpb=0.1)  # Because it already changes 2 values

        # registers selection function: We select using tournament selection of 2.
        toolbox.register("select", tools.selTournament, tournsize=2)

        # registers survival selection function
        toolbox.register("survive", tools.selRandom)

    def mutation(offspring):
        """
        'Mutation is applied to the offspring delivered by crossover.'

        Args:
            offspring (List of individuals): Selected offspring from the population
        """
        for mutant in offspring:
            if random.random() < MUTPB:
                toolbox.mutate(mutant)

    def crossover_and_mutation(offspring):
        """
        'In evolutionary computing, the combination of features from two individuals
        in offspring is often called crossover (or recombination).'
        We currently use one point crossover.

        Args:
            offspring (List of individuals): Selected offspring from the population
        """
        children = []
        for parent1, parent2 in zip(offspring[::2], offspring[1::2]):
            # NOT USED. if random.random() < CXPB:
            child1 = toolbox.clone(parent1)
            child2 = toolbox.clone(parent2)
            toolbox.mate(child1, child2)
            del child1.fitness.values
            del child2.fitness.values
            children.extend((child1, child2))

        # apply mutation to children
        mutation(children)

        # add children to population
        offspring.extend((child for child in children))

    def configure_results(pop, generation, ultimate_best):
        fits = [ind.fitness.values[0] for ind in pop]

        length = len(pop)
        mean = sum(fits) / length
        sum2 = sum(x * x for x in fits)
        std = abs(sum2 / length - mean**2)**0.5
        max_fitness = max(fits)

        print("  Min %s" % min(fits))
        print("  Max %s" % max_fitness)
        print("  Avg %s" % mean)
        print("  Std %s" % std)

        # 7.
        best = fits.index(max_fitness)
        if max_fitness > ultimate_best:
            print("ultimate best")
            ultimate_best = best
            np.savetxt(experiment_name + "/best.txt", pop[best])
        if max_fitness > winner["fitness"]:
            print("WINNER")
            winner["solution"] = pop[best]
            winner["fitness"] = max_fitness

        genlist.append(generation)
        bestlist.append(round(max_fitness, 6))
        meanlist.append(round(mean, 6))
        stdlist.append(round(std, 6))

        # save result of each generation
        # file_aux  = open(experiment_name+'/results.txt','a')
        # file_aux.write('\n\ngen best mean std')
        # file_aux.write('\n'+str(generation)+' '+str(round(max_fitness,6))+' '+str(round(mean,6))+' '+str(round(std,6))   )
        # file_aux.close()

        return fits, ultimate_best

    def evolution(pop, ultimate_best):
        """
        Evolution Steps:
        1. Select next generation of individuals from population
        2. Clone is used (I think) to let the DEAP algorithm know it is a new generation
        3. Apply Crossover on the offspring
        4. Apply Mutation on the offspring
        5. Evaluate individuals that have been changed due to crossover or mutation
        6. Apply survivor selection by picking the best of a group
        6. Show statistics of the fitness levels of the population and save best individual of that run
        7. Update environment solutions

        Args:
            pop (list): A list containing individuals
        """
        current_g = 0
        while current_g < GENS:
            current_g = current_g + 1
            print("-- Generation %i --" % current_g)

            # 1.
            selected = toolbox.select(pop, len(pop))

            # 2.
            offspring = list(map(toolbox.clone, selected))
            shuffle(offspring)
            #3. #4.
            crossover_and_mutation(offspring)

            #5.
            changed_individuals = [
                ind for ind in offspring if not ind.fitness.valid
            ]
            toolbox.evaluate(changed_individuals)
            #6
            survivors = toolbox.survive(offspring, POP_SIZE)

            # Replace old population by offspring
            pop[:] = survivors

            # 7.
            fits, ultimate_best = configure_results(pop, current_g,
                                                    ultimate_best)
            print(fits)
            # 8.
            solutions = [pop, fits]
            env.update_solutions(solutions)
            env.save_state()

    def main(iteration_number):
        """
        This is the start of the program.
        Program  Steps:
        1. Setup Deap Environment
        2. Initialize Population of individuals
        3. Evaluate population by playing the game and assigning fitness levels
        4. Show and save results for that population
        4. Start Evolution
        """

        # 1.
        setup_DEAP()

        # 2.
        print("-- Form Population --")
        random.seed(2)  #starts with the same population
        pop = toolbox.population(n=POP_SIZE)
        random.seed(iteration_number)
        print(iteration_number)
        # 3.
        toolbox.evaluate(pop)
        ultimate_best = -200
        # 4.
        fits, ultimate_best = configure_results(pop, 0, ultimate_best)

        # 5.
        evolution(pop, ultimate_best)

        # Print results to csv
        print("PRINT TO CSV")
        with open(experiment_name + '/results.csv', 'w+',
                  newline='') as csvfile:
            filewriter = csv.writer(csvfile, delimiter=',')
            filewriter.writerow(["generation", "best", "mean"])  #, "std"])
            for i in range(len(bestlist)):
                filewriter.writerow(
                    [genlist[i], bestlist[i], meanlist[i], stdlist[i]])

    main(iteration_number)
Пример #20
0
from environment import Environment
from robot import searchAStar
import random
import queue as q

Env1 = Environment((10, 10), .5, 1)
print("World Map:\n", Env1.envMatrix)
Env1.robots[0].updateMap(Env1.robotsLocation[0], Env1.envMatrix)
# print("Robot World Location: ", Env1.robotsLocation[0])
# print("Robot Relative Location: ", Env1.robots[0].location)
# print("Robot Local Map:\n", Env1.robots[0].localMap)
stuck = False
while not stuck:
    Env1.robots[0].updateMap(Env1.robotsLocation[0], Env1.envMatrix)
    if Env1.robots[0].currentPath.empty():
        # input("IN NEW PATH SEARCH...")
        solution = Env1.robots[0].getClosestUnknown()
        if solution == None:
            stuck = True
        else:
            for i in solution:
                Env1.robots[0].currentPath.put(i)

    # iBump = 0
    # jBump = 0
    while (Env1.robots[0].localMap[Env1.robots[0].goal[0]][
            Env1.robots[0].goal[1]]
           == 3) and (not Env1.robots[0].currentPath.empty()) and not stuck:
        print("Robot World Location: ", Env1.robotsLocation[0])
        print("Robot Relative Location: ", Env1.robots[0].location)
        print("Robot Local Map:\n", Env1.robots[0].localMap)
Пример #21
0
def run():
    """ Driving function for running the simulation. 
        Press ESC to close the simulation, or [SPACE] to pause the simulation. """
    # constant = 0.9957
    # alpha = 0.2
    tolerance = 0.01

    for constant in [
            0.0078, 0.0052, 0.0039, 0.0031, 0.0026, 0.0022, 0.0019, 0.0017
    ]:
        for alpha in [0.2, 0.5, 0.8]:
            good_counter = 0
            for n in range(20):
                ##############
                # Create the environment
                # Flags:
                #   verbose     - set to True to display additional output from the simulation
                #   num_dummies - discrete number of dummy agents in the environment, default is 100
                #   grid_size   - discrete number of intersections (columns, rows), default is (8, 6)
                env = Environment(verbose=True)
                ##############
                # Create the driving agent
                # Flags:
                #   learning   - set to True to force the driving agent to use Q-learning
                #    * epsilon - continuous value for the exploration factor, default is 1
                #    * alpha   - continuous value for the learning rate, default is 0.5
                agent = env.create_agent(
                    LearningAgent,
                    learning=True,
                    alpha=alpha,
                    constant=constant)
                ##############
                # Follow the driving agent
                # Flags:
                #   enforce_deadline - set to True to enforce a deadline metric
                env.set_primary_agent(agent, enforce_deadline=True)
                ##############
                # Create the simulation
                # Flags:
                #   update_delay - continuous time (in seconds) between actions, default is 2.0 seconds
                #   display      - set to False to disable the GUI if PyGame is enabled
                #   log_metrics  - set to True to log trial and simulation results to /logs
                #   optimized    - set to True to change the default log file name
                sim = Simulator(
                    env,
                    update_delay=0,
                    log_metrics=True,
                    display=False,
                    optimized=True)
                ##############
                # Run the simulator
                # Flags:
                #   tolerance  - epsilon tolerance before beginning testing, default is 0.05
                #   n_test     - discrete number of testing trials to perform, default is 0
                sim.run(n_test=100, tolerance=tolerance)

                safety_rating, reliability_rating = plot_trials(
                    'sim_improved-learning.csv')

                if safety_rating in ['A+', 'A'
                                     ] and reliability_rating in ['A', 'A+']:
                    good_counter += 1
                else:
                    break

            f = open('result.txt', 'a+')
            f.write('{}, {}, {}, {}\n'.format(constant, alpha, agent.counter,
                                              good_counter))
            f.close()
Пример #22
0
def main():
    """Main function."""

    log_levels = {
        u"NOTSET": logging.NOTSET,
        u"DEBUG": logging.DEBUG,
        u"INFO": logging.INFO,
        u"WARNING": logging.WARNING,
        u"ERROR": logging.ERROR,
        u"CRITICAL": logging.CRITICAL
    }

    args = parse_args()
    logging.basicConfig(format=u"%(asctime)s: %(levelname)s: %(message)s",
                        datefmt=u"%Y/%m/%d %H:%M:%S",
                        level=log_levels[args.logging])

    logging.info(u"Application started.")

    try:
        spec = Specification(args.specification)
        spec.read_specification()
    except PresentationError as err:
        logging.critical(u"Finished with error.")
        logging.critical(repr(err))
        return 1

    if spec.output[u"output"] not in OUTPUTS:
        logging.critical(
            f"The output {spec.output[u'output']} is not supported.")
        return 1

    return_code = 1
    try:
        env = Environment(spec.environment, args.force)
        env.set_environment()

        prepare_static_content(spec)

        data = InputData(spec, spec.output[u"output"])
        if args.input_file:
            data.process_local_file(args.input_file)
        elif args.input_directory:
            data.process_local_directory(args.input_directory)
        else:
            data.download_and_parse_data(repeat=1)

        if args.print_all_oper_data:
            data.print_all_oper_data()

        generate_tables(spec, data)
        generate_plots(spec, data)
        generate_files(spec, data)

        if spec.output[u"output"] == u"report":
            generate_report(args.release, spec, args.week)
        elif spec.output[u"output"] == u"trending":
            sys.stdout.write(generate_cpta(spec, data))
            try:
                alert = Alerting(spec)
                alert.generate_alerts()
            except AlertingError as err:
                logging.warning(repr(err))
        elif spec.output[u"output"] == u"convert-xml-to-json":
            convert_xml_to_json(spec, data)
        else:
            logging.info("No output will be generated.")

        logging.info(u"Successfully finished.")
        return_code = 0

    except AlertingError as err:
        logging.critical(f"Finished with an alerting error.\n{repr(err)}")
    except PresentationError as err:
        logging.critical(f"Finished with a PAL error.\n{str(err)}")
    except (KeyError, ValueError) as err:
        logging.critical(f"Finished with an error.\n{repr(err)}")
    finally:
        if spec is not None:
            clean_environment(spec.environment)
    return return_code
Пример #23
0
def run(restore, q_manual_init = False, LfD = False):
    env = Environment()
    agt = env.create_agent(LearningAgent, test=True)  # create agent
    env.set_agent(agt, enforce_deadline=False)  # specify agent to track
    # NOTE: You can set enforce_deadline=False while debugging to allow longer trials

    n_trials = 10000000
    quit = False
    parent_path = os.path.dirname(os.path.realpath(__file__))
    data_path = os.path.join(parent_path, 'q_table')
    lfd_path = os.path.join(parent_path, 'LfD')

    if not os.path.exists(data_path):
        os.makedirs(data_path)
    files_lst = os.listdir(data_path)
    max_index = 0
    filepath = ''
    for filename in files_lst:
        fileindex_list = re.findall(r'\d+', filename)
        if not fileindex_list:
            continue
        fileindex = int(fileindex_list[0])
        if fileindex >= max_index:
            max_index = fileindex
            filepath = os.path.join(data_path, filename)

    if restore:
        if os.path.exists(filepath):
            print 'restoring Q_values from {} ...'.format(filepath)
            agt.set_q_tables(filepath)
            print 'restoring done...'


    if LfD:
        print 'initializing Q_values from LfD(Learning from Demonstration)...'
        agt.q_table_LfD(lfd_path)



    for trial in xrange(max_index + 1, n_trials):
        print "Simulator.run(): Trial {}".format(trial)  # [debug]
        if not agt.test:
            if trial > 10000 and trial < 30000:
                agt.epsilon = 0.3
            elif trial > 30000 and trial < 50000:
                agt.epsilon = 0.2
            elif trial > 50000 and trial < 70000:
                agt.epsilon = 0.1
            elif trial > 70000:
                agt.epsilon = 0.05
        env.reset()
        print 'epsilon:', agt.epsilon
        while True:
            try:
                env.step()
            except KeyboardInterrupt:
                quit = True
            finally:
                if quit or env.done:
                    break

        env.set_agent_velocity(np.zeros(2))


        if not agt.test:
            if trial % 50 == 0:
                print "Trial {} done, saving Q table...".format(trial)
                q_table_file = os.path.join(data_path, 'trial' + str('{:07d}'.format(trial)) + '.cpickle')
                with open(q_table_file, 'wb') as f:
                    cPickle.dump(agt.Q_values, f, protocol=cPickle.HIGHEST_PROTOCOL)

        if quit:
            break

    print 'successful trials: ', env.succ_times
    print 'number of trials that hit the hard time limit: ', env.num_hit_time_limit
    print 'number of trials that ran out of time: ', env.num_out_of_time
    print 'number of trials that hit cars', env.hit_car_times
    print 'number of trials that hit walls', env.hit_wall_times
Пример #24
0
from pybrain3.rl.agents.learning import LearningAgent
from pybrain3.rl.learners.valuebased.nfq import NFQ
import pandas as pd

path = '/Users/arammoghaddassi/Google Drive/Projects/RL-Automated-Trading/data/'
aapl = pd.read_csv(path + 'AAPL.csv')
amzn = pd.read_csv(path + 'AMZN.csv')

#Model switches
n_episodes = 10
episdoe_length = 30

controller = ActionValueNetwork(dimState= 1, numActions= 3)#Maps states to actions.
learner = NFQ() #Does the actual learning, updates values in action value network.

env, agent = Environment(aapl), LearningAgent(controller, learner=learner)

agent.reset(), env.reset()

for ep in range(n_episodes):
    agent.newEpisode()
    for i in range(episdoe_length):
        state = env.state()
        agent.integrateObservation(state)
        action = agent.getAction() #Causing an error right now.
        state, reward = env.step(action)
        agent.giveReward(reward)
        print("Episode: {}, Trial: {}, Balance: {}".format(ep, i, env.account_value()))
        agent.learn() #When/how should I actually call this method?

Пример #25
0
# Deep Convolutional Neural Network - CNN
# Reinforcemente Learning - Trial and error

from environment import Environment
from train import Trainer
from dqn import DQN

# initialize gym environment and dqn
env = Environment(args)
agent = DQN(env, args)

# train agent
Trainer(agent).run()

# play the game
env.gym.monitor.start(args.out, force=True)
agent.play()

env.gym.monitor.close()
Пример #26
0
        if args.decay_method == "adaptive":
            if iteration % 10 == 0:
                if recent_total_reward < last_reward:
                    print "Policy is not improving. Decrease KL and increase steps."
                    if args.max_kl > 0.001:
                        args.max_kl -= args.kl_adapt
                else:
                    print "Policy is improving. Increase KL and decrease steps."
                    if args.max_kl < 0.01:
                        args.max_kl += args.kl_adapt
                last_reward = recent_total_reward
                recent_total_reward = 0

        if args.decay_method == "linear":
            if args.max_kl > 0.001:
                args.max_kl -= args.kl_adapt

        if args.decay_method == "exponential":
            if args.max_kl > 0.001:
                args.max_kl *= args.kl_adapt
        rollouts.set_policy_weights(theta)
else:
    from agent.agent_continous import TRPOAgent
    from environment import Environment
    env = Environment(gym.make(pms.environment_name))
    agent = TRPOAgent(env)
    agent.test(pms.checkpoint_file)


rollouts.end()
Пример #27
0
import q_learning as q
import value_iteration as vi

# generate sample environment
from common_utils import *
from environment import Environment

environment = Environment(n_states=5, n_actions=2, n_episodes=10000)

print('Q-learning: ')
policy, value = q.q_learning(environment)

print(value)
print(policy)

print('VI: ')
policy, value = vi.value_iteration(environment)

print(value)
print(policy)
# esta configurado para o modo multi inimigos ja que o intuito do treino desta rede neural e que ele treine
# para conseguir derrotar todos os 8 tipos de inimigos
#
# enimies - a lista indicando quais inimigos serao enfrentados, existem 8 tipos de inimigos diferentes, cada
# fenotipo (rede neural) sera avaliada contra os 8 inimigos uma vez contra cada inimigo
#
# playermode - usado para indicar qual modo de controle para o jogador, no caso ai indica que sera uma inteligencia artificial
#
# passado a classe player_controller para indicar para o ambiente como o jogador sera controlado
#
# speed - utilizado para indicar a velocidade do jogo, fastest rodara o jogo sem limitacao de quadros por segundo
# acelerando o processo de treinamento, tambem pode ser configurado para normal
#
# randomini - o atributo utilizado para configurar a posicao de "spawn" (nascimento) do inimigo no mapa, esta
# configurado para nascer em partes aleatorias dos cenarios
env = Environment(multiplemode = 'no', enemies = [3], playermode="ai", player_controller=player_controller(), speed = 'fastest', randomini = 'yes')

# Executa a simulacao e retorna o valor do fitness deste fenotipo (rede neural) que esta sendo avaliada
def simula(env,x):
    # f = fitness result
    # p = player life result
    # e = enemy life result
    # t = time result
    f,p,e,t = env.play(pcont=x)
    return f

def eval_genomes(genomes, config):
    for genome_id, genome in genomes:
        net = neat.nn.RecurrentNetwork.create(genome, config)
        genome.fitness = simula(env, net)
Пример #29
0
	start_epsilon = 1
	end_epsilon = 0.05
	discount = 0.99
	learning_rate = 0.0001
	BATCH_SIZE = 256
	TARGET_UPDATE = 100

	Transition = namedtuple('Transition', ('state', 'next_state', 'action', 'reward', 'done'))

	resize = T.Compose([
		T.ToPILImage(),
		T.Resize((120, 160), interpolation=Image.CUBIC),
		T.ToTensor()
	])

	env = Environment(map_name='loop_empty').create_env()
	env = DiscreteWrapper(env)
	env = DtRewardWrapper(env)
	env.reset()

	init_screen = get_screen()
	_, screen_height, screen_width = init_screen.shape
	n_actions = env.action_space.n

	policy_net = DQN(screen_height, screen_width, n_actions).to(device)
	policy_net.apply(weights_init)
	target_net = DQN(screen_height, screen_width, n_actions).to(device)
	target_net.apply(weights_init)
	target_net.load_state_dict(policy_net.state_dict())
	target_net.eval()
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.insert(0, 'evoman')
from environment import Environment
from demo_controller import player_controller, enemy_controller
from random import sample

MUTATION_RATE = 0.3

ENV = Environment(experiment_name="test",
                  enemies=[7],
                  playermode="ai",
                  player_controller=player_controller(),
                  enemy_controller=enemy_controller(),
                  level=2,
                  speed="fastest",
                  contacthurt='player',
                  logs='off')


class Individual:
    dom_u = 1
    dom_l = -1
    n_hidden = 10
    n_vars = (ENV.get_num_sensors() +
              1) * n_hidden + (n_hidden + 1) * 5  # multilayer with 50 neurons

    def __init__(self):
        self.age = 0
        self.weights = list()