Пример #1
0
qcopa_perf = np.zeros(10)
optimum_perf = np.zeros(10)
greedy_perf = np.zeros(10)
simultaneous_perf = np.zeros(10)

#Main loop
cnt = 0;
for bb in tqdm(np.linspace(0,1,10)):

    beta = bb
    actions_1 = np.linspace(Pmin, Pmax_1, Npower)
    actions_2 = np.linspace(Pmin, Pmax_2, Npower)
    states = np.array([0])

    agents = []
    PA_1 = Agent(actions_1.size, actions_2.size)
    PA_2 = Agent(actions_1.size, actions_2.size)
    agents.append(PA_1)
    agents.append(PA_2)

    # Q-learning
    Iterations = 30 * (actions_1.size * actions_2.size)

    for episode in np.arange(Iterations):

        if (episode / Iterations * 100) < 80:
            rnd = random.randint(1, 100)
            if rnd < epsilon:
                idx = random.randint(0, Npower - 1)
                PA_1.set_power(actions_1[idx])
                PA_1.p_index = idx
Пример #2
0
from Agent import Agent
from Environment import Environment
import numpy as np

TOTAL_LINES = 4
TOTAL_COLS = 4
TOTAL_ACTIONS = 4
IMM_REWARD = -1
ITERATION = 100
DISCOUNT_FACTOR = 1

agent = Agent(TOTAL_LINES, TOTAL_COLS, TOTAL_ACTIONS, IMM_REWARD,
              DISCOUNT_FACTOR)
environment = Environment(TOTAL_LINES, TOTAL_COLS, TOTAL_ACTIONS, IMM_REWARD,
                          DISCOUNT_FACTOR)

value_net = np.zeros((TOTAL_LINES, TOTAL_COLS))

for it in range(ITERATION):

    value_net = environment.update_value_net(value_net)
    print(value_net)
    policy_net = agent.update_policy_net(value_net)
    print(policy_net)
Пример #3
0
def get_Action(Humid, Temp):
    agent = Agent()
    agent.brain.Q.load_state_dict(torch.load(PATH))
    agent.brain.Q.eval()
    action = agent.action_process(state)
    return action
Пример #4
0
import sys
sys.path.append('../')
from ObstaclePotentialField import ObstaclePotentialField
from Agent import Agent
from Obstacle import Obstacle

Drones = []
Drone1 = Agent(0, (1, 2, 0), 1)
Drones.append(Drone1)
Drone2 = Agent(1, (9, 9, 0), 1)
Drones.append(Drone2)

Obstacles = []
Obstacle1 = Obstacle((5, 5, 0))
Obstacles.append(Obstacle1)

OPF = ObstaclePotentialField(0.5, 1, 20)
OPF2 = ObstaclePotentialField(0.5, 1, 100)


def test_calculate_obstacle_force():
    assert OPF.calculate_obstacle_force(0, 0, Drones, Obstacles) == (1, 1, 1)


def test_calculate_obstacle_forces():
    assert OPF.calculate_obstacle_forces(Drones, Obstacle1) == []
Пример #5
0
beta = 0.4


def beta_val(beta_number):
    beta_number = beta_number + 0.002 if beta_number < 1 else 1
    return beta_number


env = env_manager()
input_dim = env.observation_space
n_actions = env.action_space_n
agent = Agent(n_actions,
              eps_start,
              eps_end,
              eps_decay,
              lr,
              gamma,
              memory_size,
              name=file_path,
              input_dims=input_dim)
#agent.target_net.load_state_dict(file_path +'/3_'
#episode, agent.epsilon = agent.policy_net.load()
#_, _ = agent.target_net.load()
t_reward = []
avg = 0
for episode in range(num_episodes):
    beta = beta_val(beta)
    state = env.reset()
    done = False
    total_rewards = 0
    while not done:
from Agent import Agent
from Obstacle import Obstacle

import matplotlib.pyplot as plt
import matplotlib.patches as patches

from math import exp

from Exit import Exit

fig1 = plt.figure()
ax1 = fig1.add_subplot(111, aspect='equal')
ax1.set_xlim([1, 10])
ax1.set_ylim([1, 10])

agent1 = Agent((5, 5), 1)
agent2 = Agent((3, 7), 1)
agent3 = Agent((0, 2), 1)
agent4 = Agent((2, 2), 1)

agents = []
agents.append(agent1)
agents.append(agent2)
agents.append(agent3)
agents.append(agent4)

agent1.speed = (-2, 2)
agent2.speed = (0.4, -0.4)
agents = []
agents.append(agent1)
agents.append(agent2)
Пример #7
0
from Agent import Agent
from ProblemSet import ProblemSet
import logging
import problem_utils

logging.basicConfig()
LOGGER = logging.getLogger(__name__)
LOGGER.setLevel(logging.DEBUG)

agent = Agent()
n_correct = 0
n_total = len(ProblemSet("Basic Problems B").problems)

for p in ProblemSet("Basic Problems B").problems:
    LOGGER.info('=================================')
    LOGGER.info('Solving problem {}'.format(p.name))
    if problem_utils.is_problem2x2(p):
        source = p.figures['A']
        destination = p.figures['B']
        guess = agent.Solve(p)
        answer = p.checkAnswer(guess)
        if guess == answer:
            LOGGER.info('{}++++++++++++Correct+++++++++++++'.format(p.name))
            n_correct += 1
        else:
            LOGGER.error('Wrong')
    else:
        print 'Not 2x2 problem'

print('Total correct answers {} out of {}'.format(n_correct, n_total))
Пример #8
0
# window.title("hello world")
# # label =Label(window, text="helloo", font=('Arial Bold', 50))
# # label.grid(column=0,row=0)
# window.geometry('800x400')
# quit = Button(window, text="Quit!", command=window.destroy)
# quit.grid(column=0, row=1)
# playerName = Entry(window, width=10)
# playerName.grid(column=1, row=1)
# window.mainloop()


#Game is running window
players = []

try: 
    agent = Agent(input("Enter your name: ") or "Master Player", "Human")
except ValueError:
    agent = Agent("Human", "Human")
players.append(agent)

try:
    numAgents = int(input("Enter amount of CPU players: ") or 2)
except ValueError:
    print("Not a valid amount of players, default is 2")
iter = 0
while (iter < numAgents) :
    try: 
        agent = Agent(input("Enter the CPU name: ") or "Johnny Q" + str(iter), input("Enter the AI type: ") or "Random")
    except ValueError:
        agent = Agent("Agent" + str(iter), "Random")
    players.append(agent)
Пример #9
0
# #### (i) Defining some variables

# In[45]:

env = gym.make('SpaceInvaders-v0')
num_actions = 6  # 0 no action, 1 fire, 2 move right, 3 move left, 4 move right fire, 5 move left fire
scores = []
episodes = 500
batch_size = 32

# #### (ii) Making an object of agent class and initialising Experience Replay memory with random transitions

# In[ ]:

agent = Agent(num_actions)

# In[46]:

while agent.memCntr < agent.memSize:
    state = env.reset()
    done = False
    while not done:
        action = env.action_space.sample()
        next_state, reward, done, info = env.step(action)
        if done and info[
                'ale.lives'] == 0:  # TO avoid agent to loose, we are giving high penalty
            reward = -50
        agent.storeTransition(agent.process_state(state), action, reward,
                              agent.process_state(next_state))
        state = next_state
Пример #10
0
 def generateoffspring(self):
     parent = self.selection()
     offspring = Agent(self.K, self.T, self.r, self.p)
     offspring.setexpression(parent.expression)
     return offspring
from Agent import Agent
import gym
import numpy as np
import tensorflow as tf
import tensorflow.keras as keras

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

if __name__ == "__main__":
    env = gym.make("LunarLander-v2")
    n_games = 400
    agent = Agent(lr=0.001, n_actions=4, gamma=0.99, epsilon=1, epsilon_dec=2e-4,input_dims=[8], batch_size=32, lstm=True, replace=10, normalize=True)
    scores, eps_history = [], []
    
    for i in range(n_games):
        done = False
        score = 0
        observation = env.reset()   
        while not done:
            env.render()
            action = agent.choose_action(observation)
            obs_, reward, done, info = env.step(action)
            score += reward
            agent.store_transition(observation, action, reward, obs_, done)
            observation = obs_
            agent.learn()
        eps_history.append(agent.epsilon)
        scores.append(score)
        avg_score = np.mean(scores[-100:])
        
Пример #12
0
 def initialize(self):
     self.agents = []
     for i in range(self.s):
         self.agents.append(Agent(self.K, self.T, self.r, self.p))
         self.agents[i].initialize()
     self.calcallfitness()
Пример #13
0
from Extractor import Extractor
from Granulator import Granulator
from Agent import Agent
from Metric import Metric
from Representative import Representative
from Clustering_MBSAS import Clustering_MBSAS
from Clustering_K_Means import Clustering_K_Means

extractor1 = Extractor()

obj_clustering_MBSAS = Clustering_MBSAS(3, 0.2, 0.1, 1.1) # Lambda, theta_start ,theta_step, theta_stop
agent1 = Agent(Granulator, Metric, extractor1, Representative, obj_clustering_MBSAS)
agent1.execute(3.1,0.5) # S_T, eta

obj_clustering_K_Means = Clustering_K_Means(1,3) #k, k_max
agent2 = Agent(Granulator, Metric, extractor1, Representative, obj_clustering_K_Means)
agent2.execute(3.1,0.5) # S_T,  eta
Пример #14
0
def EADQN_main(table, num, weights_dir):  #actionDBs, num):
    import argparse
    import sys
    import time
    import tensorflow as tf

    from Environment import Environment
    from ReplayMemory import ReplayMemory
    from EADQN import DeepQLearner
    from Agent import Agent
    parser = argparse.ArgumentParser()

    envarg = parser.add_argument_group('Environment')
    envarg.add_argument("--model_dir",
                        default="/home/fengwf/Documents/",
                        help="")
    envarg.add_argument("--vec_model", default='mymodel5-5-50', help="")
    envarg.add_argument("--vec_length", type=int, default=50, help="")
    envarg.add_argument("--actionDB", default='tag_actions', help="")
    envarg.add_argument("--max_text_num", default='64', help="")
    envarg.add_argument("--reward_assign",
                        default='2.0 1.0 -1.0 -2.0',
                        help="")
    envarg.add_argument("--action_rate", type=float, default=0.15, help="")
    envarg.add_argument("--penal_radix", type=float, default=5.0, help="")
    envarg.add_argument("--action_label", type=int, default=2, help="")
    envarg.add_argument("--non_action_label", type=int, default=1, help="")
    envarg.add_argument("--long_text_flag", type=int, default=1, help="")

    memarg = parser.add_argument_group('Replay memory')
    memarg.add_argument("--replay_size", type=int, default=100000, help="")
    memarg.add_argument("--channel", type=int, default=1, help="")
    memarg.add_argument("--positive_rate", type=float, default=0.75, help="")
    memarg.add_argument("--priority", default=1, help="")
    memarg.add_argument("--reward_bound", type=float, default=0, help="")

    netarg = parser.add_argument_group('Deep Q-learning network')
    netarg.add_argument("--num_actions", type=int, default=1000, help="")
    netarg.add_argument("--words_num", type=int, default=500, help="")
    netarg.add_argument("--wordvec", type=int, default=100, help="")
    netarg.add_argument("--learning_rate", type=float, default=0.0025, help="")
    netarg.add_argument("--momentum", type=float, default=0.1, help="")
    netarg.add_argument("--epsilon", type=float, default=1e-6, help="")
    netarg.add_argument("--decay_rate", type=float, default=0.88, help="")
    netarg.add_argument("--discount_rate", type=float, default=0.9, help="")
    netarg.add_argument("--batch_size", type=int, default=8, help="")
    netarg.add_argument("--target_output", type=int, default=2, help="")

    antarg = parser.add_argument_group('Agent')
    antarg.add_argument("--exploration_rate_start",
                        type=float,
                        default=1,
                        help="")
    antarg.add_argument("--exploration_rate_end",
                        type=float,
                        default=0.1,
                        help="")
    antarg.add_argument("--exploration_decay_steps",
                        type=int,
                        default=1000,
                        help="")
    antarg.add_argument("--exploration_rate_test",
                        type=float,
                        default=0.0,
                        help="")
    antarg.add_argument("--train_frequency", type=int, default=1, help="")
    antarg.add_argument("--train_repeat", type=int, default=1, help="")
    antarg.add_argument("--target_steps", type=int, default=5, help="")
    antarg.add_argument("--random_play", default=0, help="")

    mainarg = parser.add_argument_group('Main loop')
    mainarg.add_argument("--result_dir", default="test_result", help="")
    mainarg.add_argument("--train_steps", type=int, default=0, help="")
    mainarg.add_argument("--test_one", type=int, default=1, help="")
    mainarg.add_argument("--text_dir", default='', help="")
    mainarg.add_argument("--test", type=int, default=1, help="")
    mainarg.add_argument("--test_text_num", type=int, default=8, help="")
    mainarg.add_argument("--epochs", type=int, default=2, help="")
    mainarg.add_argument("--start_epoch", type=int, default=0, help="")
    mainarg.add_argument("--home_dir", default="./", help="")
    mainarg.add_argument("--load_weights", default="", help="")
    mainarg.add_argument("--save_weights_prefix", default="", help="")
    mainarg.add_argument("--computer_id", type=int, default=1, help="")
    mainarg.add_argument("--gpu_rate", type=float, default=0.2, help="")
    mainarg.add_argument("--cnn_format", default='NCHW', help="")

    args = parser.parse_args()
    tables_num = len(args.actionDB.split())
    args.load_weights = weights_dir
    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_rate)
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        net = DeepQLearner(args, sess)
        env = Environment(args)
        mem = ReplayMemory(args.replay_size, args)
        agent = Agent(env, mem, net, args)
        words = []
        states = []

        if args.load_weights:
            print 'Loading weights from %s...' % args.load_weights
            net.load_weights(args.home_dir +
                             args.load_weights)  #load last trained weights

        if args.test_one and args.load_weights:
            '''
            for i,ad in enumerate(actionDBs):
                tmp_w = []
                tmp_s = []
                for j in range(num[i]):
                    print 'table = %s,  text_num = %d'%(actionDBs[i],j)
                    ws, act_seq, st = agent.test_one_db(actionDBs[i], j)
                    tmp_w.append(ws)
                    tmp_s.append(st)
                    #print '\nStates: %s\n'%str(st)
                    #print '\nWords: %s\n'%str(ws)
                    #print '\n\nAction_squence: %s\n'%str(act_seq)
                words.append(tmp_w)
                states.append(tmp_s)
            '''
            tmp_w = []
            tmp_s = []
            for j in range(num):
                #print 'table = %s,  text_num = %d'%(table,j)
                ws, act_seq, st = agent.test_one_db(table, j)
                tmp_w.append(ws)
                tmp_s.append(st)
            words = tmp_w
            states = tmp_s
            print 'len(words) = %d,  len(states) = %d' % (len(words),
                                                          len(states))
        return words, states
    epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model_1 = Actor(state_size=4, action_size=1, seed=0).to(device)
model_1.load_state_dict(torch.load("./actor5000_1.pth"))
model_1.eval()

model_2 = Actor(state_size=4, action_size=1, seed=0).to(device)
model_2.load_state_dict(torch.load("./actor4850_1.pth"))
model_2.eval()

Individual = Individualtanh(state_size=4, action_size=1, seed=0,
                            fc1_units=50).to(device)

agent = Agent(state_size=4, action_size=2, random_seed=0)

ppo = PPO(4, 2, method='penalty')
ppo.load_model(5499, 1)


def mkdir(path):
    folder = os.path.exists(path)
    if not folder:
        os.makedirs(path)


def update_target(current_model, target_model):
    target_model.load_state_dict(current_model.state_dict())

Пример #16
0
'''

import tensorflow as tf

from Agent import Agent

from Displayer import DISPLAYER

import parameters

if __name__ == '__main__':

    tf.reset_default_graph()

    with tf.Session() as sess:

        agent = Agent(sess)

        print("Beginning of the run")

        try:
            agent.run()
        except KeyboardInterrupt:
            agent.save("NetworkParam/FinalParam")
        print("End of the run")
        DISPLAYER.disp()

        agent.play(5)

    agent.close()
Пример #17
0
    # with Sess(options, meta, config=config) as sess:
    #                                                                              #
    ################################################################################

    with tf.Session() as sess:

        saver = Saver.Saver(sess)
        displayer = Displayer.Displayer()
        buffer = ExperienceBuffer()

        gui = GUI.Interface(['ep_reward', 'plot', 'render', 'gif', 'save'])
        gui_thread = threading.Thread(target=gui.run)

        threads = []
        for i in range(Settings.NB_ACTORS):
            agent = Agent(sess, i, gui, displayer, buffer)
            threads.append(threading.Thread(target=agent.run))

        # with tf.device('/device:GPU:0'):
        learner = QNetwork(sess, gui, saver, buffer)
        threads.append(threading.Thread(target=learner.run))

        if not saver.load():
            sess.run(tf.global_variables_initializer())

        gui_thread.start()
        for t in threads:
            t.start()

        print("Running...")
Пример #18
0
from Building import Building
from Agent import Agent

#====================================================================================


#====================================================================================

lift_num = 1
buliding_height = 5
max_people_in_floor = 30
#Create building with 2 elevators, height 10, max people 30
building = Building(lift_num, buliding_height, max_people_in_floor)
# building.generate_people()

agent = Agent(buliding_height, lift_num, 4)

#The goal is to bring down all the people in the building to the ground floor
max_steps = 500
agent.reload()

building.generate_people(0.8)
for step in range(max_steps):
	ave_reward = 0

	os.system('clear')
	state = building.get_state()
	state_input = np.array(state).reshape(1,-1)
	action = agent.get_action(state_input)
	building.perform_action(action)
	
Пример #19
0
from settings import Settings

if __name__ == '__main__':

    tf.reset_default_graph()

    with tf.Session() as sess:

        saver = Saver.Saver(sess)
        displayer = Displayer.Displayer()

        gui = GUI.Interface(['ep_reward', 'plot', 'render', 'gif', 'save'])
        gui_thread = threading.Thread(target=gui.run)

        agent = Agent(sess, gui, displayer, saver)

        if not saver.load():
            sess.run(tf.global_variables_initializer())

        gui_thread.start()
        try:
            agent.run()
        except KeyboardInterrupt:
            pass
        print("End of the run")

        saver.save(agent.total_steps)
        displayer.disp()

        gui_thread.join()
Пример #20
0
def main():
    sets = []  # The variable 'sets' stores multiple problem sets.
    # Each problem set comes from a different folder in /Problems/
    # Additional sets of problems will be used when grading projects.
    # You may also write your own problems.

    r = open(
        "Problems" + os.sep +
        "ProblemSetList.txt")  # ProblemSetList.txt lists the sets to solve.
    line = getNextLine(
        r)  # Sets will be solved in the order they appear in the file.
    while not line == "":  # You may modify ProblemSetList.txt for design and debugging.
        sets.append(
            ProblemSet(line)
        )  # We will use a fresh copy of all problem sets when grading.
        line = getNextLine(
            r)  # We will also use some problem sets not given in advance.

    # Initializing problem-solving agent from Agent.java
    agent = Agent(
    )  # Your agent will be initialized with its default constructor.
    # You may modify the default constructor in Agent.java

    # Running agent against each problem set
    results = open("ProblemResults.csv",
                   "w")  # Results will be written to ProblemResults.csv.
    # Note that each run of the program will overwrite the previous results.
    # Do not write anything else to ProblemResults.txt during execution of the program.
    setResults = open(
        "SetResults.csv",
        "w")  # Set-level summaries will be written to SetResults.csv.
    results.write("Problem,Correct Confidence\n")
    setResults.write("Set,Sum Correct Confidence\n")
    for set in sets:
        sum_correct_comfidence = 0
        for problem in set.problems:  # Your agent will solve one problem at a time.
            try:
                problem.setAnswerReceived(
                    agent.Solve(problem)
                )  # The problem will be passed to your agent as a RavensProblem object as a parameter to the Solve method
                # Your agent should return its answer at the conclusion of the execution of Solve.
                # Note that if your agent makes use of RavensProblem.check to check its answer, the answer passed to check() will be used.
                # Your agent cannot change its answer once it has checked its answer.

                correct_comfidence = 0
                if type(problem.givenAnswer) is list:
                    answer = problem.givenAnswer
                    if len(answer) >= problem.correctAnswer:
                        if sum(answer) > 1:
                            sum_answer = float(sum(answer))
                            answer = [i / sum_answer for i in answer]
                        correct_comfidence = answer[problem.correctAnswer - 1]
                sum_correct_comfidence += correct_comfidence
                result = problem.name + "," + str(correct_comfidence)

                results.write("%s\n" % result)
            except:
                print("Error encountered in " + problem.name + ":")
                #print(sys.exc_info()[0])
                print(traceback.format_exc())
                result = problem.name + "," + str(
                    problem.givenAnswer) + ",Error,"
                results.write("%s\n" % result)
        setResult = set.name + "," + str(sum_correct_comfidence)
        setResults.write("%s\n" % setResult)
    results.close()
    setResults.close()
Пример #21
0
from Agent import Agent
from Displayer import DISPLAYER
from Saver import SAVER

if __name__ == '__main__':

    tf.reset_default_graph()

    with tf.Session() as sess:

        with tf.device("/cpu:0"):

            # Create the global network
            render = parameters.DISPLAY
            master_agent = Agent(0, sess, render=render, master=True)

            # Create all the workers
            workers = []
            for i in range(parameters.THREADS):
                workers.append(Agent(i + 1, sess, render=False))

        coord = tf.train.Coordinator()
        SAVER.set_sess(sess)

        SAVER.load()

        # Run threads that each contains one worker
        worker_threads = []
        for i, worker in enumerate(workers):
            print("Threading worker", i + 1)
from Agent import Agent
from Obstacle import Obstacle
from ObstaclePotentialField import ObstaclePotentialField

Drones = []
Drone1 = Agent(0, (0, 0, 0), 1)
Drones.append(Drone1)

Drone2 = Agent(1, (0, 0, 9), 1)
Drones.append(Drone2)

Obstacles = []
Coral = Obstacle((9, 9, 9))
Obstacles.append(Coral)

OPF = ObstaclePotentialField(1, 1, 20)

for i in range(0, 20):
    print('------- ITERATION ', i, '-----------')
    obstacle1_forces = OPF.calculate_obstacle_forces(Drones, Coral)
    Drone1.ObstaclePotentialForce = obstacle1_forces[Drone1.index]
    Drone2.ObstaclePotentialForce = obstacle1_forces[Drone2.index]

    print('Drone1 OPF = ', Drone1.ObstaclePotentialForce, '\n')
    print(Drone1.calculateVelocity(Drone1.ObstaclePotentialForce))
    print('Drone1 Velocity', Drone1.velocity)
    Drone1.move()
    print('Drone1 Position', Drone1.position)

    print('\n')
Пример #23
0
from Buffer import Buffer
from Agent import Agent

# TODO: add parametrization for constructing models
# nn_params = {
#     'actor': ((512, 'relu'), (512, 'relu')),
#     'critic': {
#         'state': ((16, 'relu'), (32, 'relu')),
#         'action': ((32, 'relu')),
#         'connected': ((512, 'relu'), (512, 'relu'))
#     }
# }

if __name__ == '__main__':
    env_helper = EnvHelper()
    env, n_states, n_actions = env_helper.make_environment('BipedalWalker-v3')
    buffer = Buffer(n_states=n_states,
                    n_actions=n_actions,
                    capacity=75000,
                    batch_size=512)
    agent = Agent(gamma=0.99,
                  buffer=buffer,
                  env=env_helper,
                  alpha=0.005,
                  name='WalkerTest',
                  compile_nn=True)
    agent.run(iterations=500, render=False, verbose=True, train=True)
    agent.plot(agent.learning_rewards)
    agent.run(iterations=20, render=True, verbose=True, train=False)
    agent.plot(agent.testing_reward)
Пример #24
0
import random

display_width, display_height = 200, 200

pygame.init()
pygame.display.set_caption('Grid environment')
gameDisplay = pygame.display.set_mode((display_width, display_height))
clock = pygame.time.Clock()

#initialising the game matrix
game_matrix = Game_Matrix()
#creating the environment
env = Grid_Env(gameDisplay, clock, game_matrix)

#initialising agent
agent = Agent(env=env, alpha=0.5)

#uncomment
#getting agent's policy
#directory = 'policy_default_env.pickle'
#agent.set_policy(directory)

#training the agent via interaction
agent.interact(num_episodes=2000)

#testing the agent
pygame.display.set_caption('Test Phase')
for i in range(10):  #running for 10 times
    state = env.reset()
    total_reward = 0
    while True:
Пример #25
0
df['ma_22'] = df.close.rolling(22).mean()

df = df[['close', 'ma_5', 'ma_22']]
df['position'] = 0
df.dropna(inplace=True)

n_features = len(df.columns)
seq_size = 22

env = MarketEnv(df, seq_size)

if test_holdout:
    env_test = MarketEnv(df, seq_size, foreignScaler=env.scaler)

sess = tf.Session()
agent = Agent(sess, seq_size, n_features, hidden_size=16, a_size=3)
sess.run(tf.global_variables_initializer())

i = 0
reward_history = []
print('Sequence size: ' + str(seq_size))

while True:
    running_reward = 0
    s = env.reset()

    while True:
        a = agent.act(s)
        s_, r, done = env.step(a)

        td_error = agent.critic_learn(np.array([s]), r, np.array([s_]))
Пример #26
0
def train(n_games=1500,
          env_size_min=(10, 10),
          env_size_max=(30, 30),
          n_agents=10,
          resume=True,
          view_reduced=True,
          view_size=(2, 2, 2, 2),
          max_reward=200000,
          save_viz=False):
    dt = datetime.now().strftime('%Y-%m-%d-%H-%M-%S')
    print(
        f"------------------------------------------------------------------------------------------------"
    )
    print(f"Starting training for {n_games} with {n_agents} agents...")
    print(f"Time: {dt}")
    print(f"Settings:")
    print(f"Reduced view:\t{view_reduced}\nView size:\t{view_size}")
    print(
        f"------------------------------------------------------------------------------------------------"
    )

    score_saver = []
    avg_score_saver = []
    ddqn_scores = []
    eps_history = []
    visualisations = []
    prec = 40
    reached = np.zeros(n_agents, dtype=np.int32)
    reached_last_100 = np.zeros(n_agents, dtype=np.int32)

    if view_reduced:
        input_size = (view_size[0] + 1 + view_size[1]) * (view_size[2] + 1 +
                                                          view_size[3]) + 4
    else:
        input_size = env_size_max[0] * env_size_max[1]
    agents = []

    # Create the agents
    for agent_id in range(n_agents):
        agent = Agent(f"agent_{agent_id}",
                      gamma=0.99,
                      epsilon=1.0,
                      lr=1 * 5e-3,
                      n_actions=4,
                      input_dims=[input_size],
                      mem_size=100000,
                      batch_size=64,
                      eps_min=0.01,
                      eps_dec=5 * 1e-5,
                      replace=100)
        if resume:
            agent.load_models()
        agents.append(agent)

    # Main training loop
    for i_game in tqdm(range(n_games)):
        scores = np.zeros(n_agents)
        avg_scores = np.zeros(n_agents)
        agent_in_final_state = np.full(n_agents, False)

        # Define size of map randomly in given range
        env_size = [
            mi if mi == ma else np.random.randint(mi, ma)
            for mi, ma in zip(env_size_min, env_size_max)
        ]

        # Define a time limit based on the perimeter of the environment
        timeout = np.sum(env_size * 2)

        # Create obstacles randomly 6 - 15 % of the env size
        num_obs = int(
            np.max([
                np.round(
                    np.random.uniform(0.06, 0.15) * np.multiply(*env_size)) -
                2 * n_agents, 0
            ]))
        obstacles = []
        for i in range(num_obs):
            obstacles.append(
                Point(np.random.randint(1, env_size[0]),
                      np.random.randint(1, env_size[1])))

        env = Game(obstacles,
                   None,
                   env_size,
                   max_reward,
                   view_reduced=view_reduced,
                   view_size=view_size)
        for i in range(n_agents):
            env.add_player()

        observations = env.reset()
        game_sav = [observations]
        time_step = 0
        # Play the game: Run until all agents reached a final state
        while not np.all(agent_in_final_state):
            time_step += 1
            # Obtain actions for each agent
            actions = []
            # Get actions from all agents that are not in a final state
            for agent_id, agent in enumerate(agents):
                if not agent_in_final_state[agent_id]:
                    actions.append(agent.choose_action(observations[agent_id]))
                else:
                    actions.append(None)
            # Execute actions on board
            next_observations, rewards, agent_in_final_state = env.step(
                actions)
            # Save history for each agent and optimize
            for agent, observation, action, reward, next_observation, is_in_final_state in \
                    zip(agents, observations, actions, rewards, next_observations, agent_in_final_state):
                # Only store and optimize if the agent did something
                if action is not None:
                    agent.store_transition(observation, action, reward,
                                           next_observation,
                                           int(is_in_final_state))
                    agent.learn()

            # For statistics count agents that reached their aim with the action in this iteration
            for agent_id, action in enumerate(actions):
                if action is not None and rewards[agent_id] == max_reward:
                    reached[agent_id] += 1
                    # Special statistic counter for the last 100 games
                    if i_game > (n_games - 100):
                        reached_last_100[agent_id] += 1

            scores += rewards
            observations = next_observations
            game_sav.append(next_observations)
            eps_history.append([agent.epsilon for agent in agents])
            ddqn_scores.append(scores)

            # if we reach a timeout for the game just set all agents to being in a final state
            if time_step == timeout:
                agent_in_final_state = np.full(n_agents, True)

            # Save a checkpoint every 10 games
            if i_game > 0 and i_game % 10 == 0:
                for agent in agents:
                    agent.save_models()

            if all(agent_in_final_state) and i_game > 20:
                avg_scores = np.mean(ddqn_scores[:-10], axis=0)
        score_saver.append(scores)
        if i_game > 20:
            avg_score_saver.append(avg_scores)
            epsilons = {agent.id: agent.epsilon for agent in agents}
            if i_game % int(n_games / prec) == int(n_games / prec) - 1:
                print(
                    f"episode: {i_game} score: {np.round(scores.tolist(),3)}, average score {avg_scores.tolist()} "
                    f"epsilon {epsilons} Erreicht: {reached.tolist()}")

        # Save game for visualization purposes
        viz = Visualisation(game_sav,
                            env_size,
                            n_agents,
                            view_padding=view_size,
                            view_reduced=view_reduced,
                            truth_obstacles=np.array(
                                [o.to_numpy() for o in obstacles]),
                            dt=dt,
                            i_game=i_game,
                            scores=scores,
                            reached=reached)
        if save_viz:
            viz.save()
        visualisations.append(viz)

    print(
        f"\n{n_games} runs - {reached.tolist()} times aim reached - quota: {(reached / n_games).tolist()}"
    )
    print("Quota of the last 100 runs " +
          str((reached_last_100 / 100).tolist()))

    # Visualize 10 played games in equal distances between first and last run and in addition the best five games
    plot_game_i_list = np.arange(n_games - 1, 0, -int(max(n_games * 0.1, 1)))
    plot_game_i_list = np.concatenate(
        [[0], plot_game_i_list,
         np.argsort(-1 * np.max(score_saver, axis=1))[:5]])
    plot_game_i_list = np.unique(plot_game_i_list)
    plot_game_i_list = np.flip(plot_game_i_list)
    print()
    print('Visualize these games: {}'.format(plot_game_i_list))

    for i_game in plot_game_i_list:
        print(f'Generate visual output for game {i_game}...', end='\r')
        visualisations[i_game].plot_overview(time_step=-1,
                                             plot_info=False,
                                             save=True)

    plt.plot(score_saver)
    plt.show()
    plt.plot(avg_score_saver)
    plt.show()

    print()
    print()
    print('Done.')
    print(
        'IMPORTANT: A crash of Python at the end of the code is a known issue.'
    )
    print(
        'It comes from closing a lot of matplotlib figures in a short time (see visualisation.py, line 611 and 655).'
    )
Пример #27
0
from Node import Node
from Maze import Maze
from Agent import Agent

node1 = Node(False, False, False, False, (0, 0), False)
node2 = Node(False, False, False, False, (0, -1), False)
node3 = Node(False, False, False, False, (0, -2), False)
node4 = Node(False, False, False, False, (0, -3), False)
node5 = Node(False, False, False, False, (1, -3), False)
node1.set_down(node2)
node2.set_down(node3)
node3.set_down(node4)
node4.set_right(node5)
node2.set_up(node1)
node3.set_up(node2)
node4.set_up(node3)
node5.set_left(node4)

nodes = [node1, node2, node3, node4, node5]

foo = Maze(nodes)
agent = Agent((0, 0), foo)
agent.simple_discovery()
print agent.current_pos
Пример #28
0
node6 = Node(False,False,False,False,(0, 0), False)
node1.set_down(node2)
node2.set_up(node1)
node2.set_down(node3)
node3.set_up(node2)
node3.set_down(node4)
node4.set_up(node3)
node4.set_down(node5)
node5.set_up(node4)
node5.set_down(node6)
node6.set_up(node5)

nodes = [node1, node2, node3, node4, node5, node6]

foo = Maze(nodes)
agent1 = Agent(node4,foo)
agent2 = Agent(node3,foo)
agents = [agent1, agent2]

print("Begin maze")
print("Agent 1 location: ", agent1.current_pos)
print("Agent 2 location: ", agent2.current_pos)

while not check_win_condition(agents):
    input(">>> Press enter to continue")
    PPSOCycle(agents)
    PrintAgent(agents[0], 1)
    PrintAgent(agents[1], 2)

print("Maze fully Discovered!")
Пример #29
0
states = np.array([0])

# In[12]:

alpha = 0.5
gamma = 0.9
epsilon = 0.1
# QSize = actions.size * states.size
# half_size = (int) (0.5*QSize)
epsilon = 0.1 * 100

# In[13]:

agents = []
PA_1 = Agent(states.size, actions_1.size)
PA_2 = Agent(states.size, actions_2.size)
agents.append(PA_1)
agents.append(PA_2)

# In[14]:

#Channel conditions
g1 = 2.5
g2 = 1.5
Gamma = 3.532
sigma2 = 1
beta = 0.1
optimal = np.log2(1 + ((Pmax_1 * g1) / (
    (Pmax_2 * g1 * beta + 1) * Gamma))) + np.log2(1 + (Pmax_2 * g2) / (
        (Pmax_1 * g2 * beta + 1) * Gamma))
Пример #30
0
canvas.pack()


def callback():
    time.sleep(1)
    for i in range(len(labyrinth.fields)):
        for j in range(len(labyrinth.fields[i])):
            val = labyrinth.fields[i][j]
            fill = "#ffffff"
            if val == 1:
                fill = "#34ebc9"
            if val == 2:
                fill = "#edea39"
            if val == 3:
                fill = "#d92f23"
            if val == 4:
                fill = "#5334eb"
            canvas.create_rectangle(j * 15,
                                    i * 15,
                                    j * 15 + 8,
                                    i * 15 + 8,
                                    fill=fill)
    top.update()


callback()

initialAgent = Agent(labyrinth.startx, labyrinth.starty, labyrinth, callback)

top.mainloop()