qcopa_perf = np.zeros(10) optimum_perf = np.zeros(10) greedy_perf = np.zeros(10) simultaneous_perf = np.zeros(10) #Main loop cnt = 0; for bb in tqdm(np.linspace(0,1,10)): beta = bb actions_1 = np.linspace(Pmin, Pmax_1, Npower) actions_2 = np.linspace(Pmin, Pmax_2, Npower) states = np.array([0]) agents = [] PA_1 = Agent(actions_1.size, actions_2.size) PA_2 = Agent(actions_1.size, actions_2.size) agents.append(PA_1) agents.append(PA_2) # Q-learning Iterations = 30 * (actions_1.size * actions_2.size) for episode in np.arange(Iterations): if (episode / Iterations * 100) < 80: rnd = random.randint(1, 100) if rnd < epsilon: idx = random.randint(0, Npower - 1) PA_1.set_power(actions_1[idx]) PA_1.p_index = idx
from Agent import Agent from Environment import Environment import numpy as np TOTAL_LINES = 4 TOTAL_COLS = 4 TOTAL_ACTIONS = 4 IMM_REWARD = -1 ITERATION = 100 DISCOUNT_FACTOR = 1 agent = Agent(TOTAL_LINES, TOTAL_COLS, TOTAL_ACTIONS, IMM_REWARD, DISCOUNT_FACTOR) environment = Environment(TOTAL_LINES, TOTAL_COLS, TOTAL_ACTIONS, IMM_REWARD, DISCOUNT_FACTOR) value_net = np.zeros((TOTAL_LINES, TOTAL_COLS)) for it in range(ITERATION): value_net = environment.update_value_net(value_net) print(value_net) policy_net = agent.update_policy_net(value_net) print(policy_net)
def get_Action(Humid, Temp): agent = Agent() agent.brain.Q.load_state_dict(torch.load(PATH)) agent.brain.Q.eval() action = agent.action_process(state) return action
import sys sys.path.append('../') from ObstaclePotentialField import ObstaclePotentialField from Agent import Agent from Obstacle import Obstacle Drones = [] Drone1 = Agent(0, (1, 2, 0), 1) Drones.append(Drone1) Drone2 = Agent(1, (9, 9, 0), 1) Drones.append(Drone2) Obstacles = [] Obstacle1 = Obstacle((5, 5, 0)) Obstacles.append(Obstacle1) OPF = ObstaclePotentialField(0.5, 1, 20) OPF2 = ObstaclePotentialField(0.5, 1, 100) def test_calculate_obstacle_force(): assert OPF.calculate_obstacle_force(0, 0, Drones, Obstacles) == (1, 1, 1) def test_calculate_obstacle_forces(): assert OPF.calculate_obstacle_forces(Drones, Obstacle1) == []
beta = 0.4 def beta_val(beta_number): beta_number = beta_number + 0.002 if beta_number < 1 else 1 return beta_number env = env_manager() input_dim = env.observation_space n_actions = env.action_space_n agent = Agent(n_actions, eps_start, eps_end, eps_decay, lr, gamma, memory_size, name=file_path, input_dims=input_dim) #agent.target_net.load_state_dict(file_path +'/3_' #episode, agent.epsilon = agent.policy_net.load() #_, _ = agent.target_net.load() t_reward = [] avg = 0 for episode in range(num_episodes): beta = beta_val(beta) state = env.reset() done = False total_rewards = 0 while not done:
from Agent import Agent from Obstacle import Obstacle import matplotlib.pyplot as plt import matplotlib.patches as patches from math import exp from Exit import Exit fig1 = plt.figure() ax1 = fig1.add_subplot(111, aspect='equal') ax1.set_xlim([1, 10]) ax1.set_ylim([1, 10]) agent1 = Agent((5, 5), 1) agent2 = Agent((3, 7), 1) agent3 = Agent((0, 2), 1) agent4 = Agent((2, 2), 1) agents = [] agents.append(agent1) agents.append(agent2) agents.append(agent3) agents.append(agent4) agent1.speed = (-2, 2) agent2.speed = (0.4, -0.4) agents = [] agents.append(agent1) agents.append(agent2)
from Agent import Agent from ProblemSet import ProblemSet import logging import problem_utils logging.basicConfig() LOGGER = logging.getLogger(__name__) LOGGER.setLevel(logging.DEBUG) agent = Agent() n_correct = 0 n_total = len(ProblemSet("Basic Problems B").problems) for p in ProblemSet("Basic Problems B").problems: LOGGER.info('=================================') LOGGER.info('Solving problem {}'.format(p.name)) if problem_utils.is_problem2x2(p): source = p.figures['A'] destination = p.figures['B'] guess = agent.Solve(p) answer = p.checkAnswer(guess) if guess == answer: LOGGER.info('{}++++++++++++Correct+++++++++++++'.format(p.name)) n_correct += 1 else: LOGGER.error('Wrong') else: print 'Not 2x2 problem' print('Total correct answers {} out of {}'.format(n_correct, n_total))
# window.title("hello world") # # label =Label(window, text="helloo", font=('Arial Bold', 50)) # # label.grid(column=0,row=0) # window.geometry('800x400') # quit = Button(window, text="Quit!", command=window.destroy) # quit.grid(column=0, row=1) # playerName = Entry(window, width=10) # playerName.grid(column=1, row=1) # window.mainloop() #Game is running window players = [] try: agent = Agent(input("Enter your name: ") or "Master Player", "Human") except ValueError: agent = Agent("Human", "Human") players.append(agent) try: numAgents = int(input("Enter amount of CPU players: ") or 2) except ValueError: print("Not a valid amount of players, default is 2") iter = 0 while (iter < numAgents) : try: agent = Agent(input("Enter the CPU name: ") or "Johnny Q" + str(iter), input("Enter the AI type: ") or "Random") except ValueError: agent = Agent("Agent" + str(iter), "Random") players.append(agent)
# #### (i) Defining some variables # In[45]: env = gym.make('SpaceInvaders-v0') num_actions = 6 # 0 no action, 1 fire, 2 move right, 3 move left, 4 move right fire, 5 move left fire scores = [] episodes = 500 batch_size = 32 # #### (ii) Making an object of agent class and initialising Experience Replay memory with random transitions # In[ ]: agent = Agent(num_actions) # In[46]: while agent.memCntr < agent.memSize: state = env.reset() done = False while not done: action = env.action_space.sample() next_state, reward, done, info = env.step(action) if done and info[ 'ale.lives'] == 0: # TO avoid agent to loose, we are giving high penalty reward = -50 agent.storeTransition(agent.process_state(state), action, reward, agent.process_state(next_state)) state = next_state
def generateoffspring(self): parent = self.selection() offspring = Agent(self.K, self.T, self.r, self.p) offspring.setexpression(parent.expression) return offspring
from Agent import Agent import gym import numpy as np import tensorflow as tf import tensorflow.keras as keras import os os.environ["CUDA_VISIBLE_DEVICES"] = "-1" if __name__ == "__main__": env = gym.make("LunarLander-v2") n_games = 400 agent = Agent(lr=0.001, n_actions=4, gamma=0.99, epsilon=1, epsilon_dec=2e-4,input_dims=[8], batch_size=32, lstm=True, replace=10, normalize=True) scores, eps_history = [], [] for i in range(n_games): done = False score = 0 observation = env.reset() while not done: env.render() action = agent.choose_action(observation) obs_, reward, done, info = env.step(action) score += reward agent.store_transition(observation, action, reward, obs_, done) observation = obs_ agent.learn() eps_history.append(agent.epsilon) scores.append(score) avg_score = np.mean(scores[-100:])
def initialize(self): self.agents = [] for i in range(self.s): self.agents.append(Agent(self.K, self.T, self.r, self.p)) self.agents[i].initialize() self.calcallfitness()
from Extractor import Extractor from Granulator import Granulator from Agent import Agent from Metric import Metric from Representative import Representative from Clustering_MBSAS import Clustering_MBSAS from Clustering_K_Means import Clustering_K_Means extractor1 = Extractor() obj_clustering_MBSAS = Clustering_MBSAS(3, 0.2, 0.1, 1.1) # Lambda, theta_start ,theta_step, theta_stop agent1 = Agent(Granulator, Metric, extractor1, Representative, obj_clustering_MBSAS) agent1.execute(3.1,0.5) # S_T, eta obj_clustering_K_Means = Clustering_K_Means(1,3) #k, k_max agent2 = Agent(Granulator, Metric, extractor1, Representative, obj_clustering_K_Means) agent2.execute(3.1,0.5) # S_T, eta
def EADQN_main(table, num, weights_dir): #actionDBs, num): import argparse import sys import time import tensorflow as tf from Environment import Environment from ReplayMemory import ReplayMemory from EADQN import DeepQLearner from Agent import Agent parser = argparse.ArgumentParser() envarg = parser.add_argument_group('Environment') envarg.add_argument("--model_dir", default="/home/fengwf/Documents/", help="") envarg.add_argument("--vec_model", default='mymodel5-5-50', help="") envarg.add_argument("--vec_length", type=int, default=50, help="") envarg.add_argument("--actionDB", default='tag_actions', help="") envarg.add_argument("--max_text_num", default='64', help="") envarg.add_argument("--reward_assign", default='2.0 1.0 -1.0 -2.0', help="") envarg.add_argument("--action_rate", type=float, default=0.15, help="") envarg.add_argument("--penal_radix", type=float, default=5.0, help="") envarg.add_argument("--action_label", type=int, default=2, help="") envarg.add_argument("--non_action_label", type=int, default=1, help="") envarg.add_argument("--long_text_flag", type=int, default=1, help="") memarg = parser.add_argument_group('Replay memory') memarg.add_argument("--replay_size", type=int, default=100000, help="") memarg.add_argument("--channel", type=int, default=1, help="") memarg.add_argument("--positive_rate", type=float, default=0.75, help="") memarg.add_argument("--priority", default=1, help="") memarg.add_argument("--reward_bound", type=float, default=0, help="") netarg = parser.add_argument_group('Deep Q-learning network') netarg.add_argument("--num_actions", type=int, default=1000, help="") netarg.add_argument("--words_num", type=int, default=500, help="") netarg.add_argument("--wordvec", type=int, default=100, help="") netarg.add_argument("--learning_rate", type=float, default=0.0025, help="") netarg.add_argument("--momentum", type=float, default=0.1, help="") netarg.add_argument("--epsilon", type=float, default=1e-6, help="") netarg.add_argument("--decay_rate", type=float, default=0.88, help="") netarg.add_argument("--discount_rate", type=float, default=0.9, help="") netarg.add_argument("--batch_size", type=int, default=8, help="") netarg.add_argument("--target_output", type=int, default=2, help="") antarg = parser.add_argument_group('Agent') antarg.add_argument("--exploration_rate_start", type=float, default=1, help="") antarg.add_argument("--exploration_rate_end", type=float, default=0.1, help="") antarg.add_argument("--exploration_decay_steps", type=int, default=1000, help="") antarg.add_argument("--exploration_rate_test", type=float, default=0.0, help="") antarg.add_argument("--train_frequency", type=int, default=1, help="") antarg.add_argument("--train_repeat", type=int, default=1, help="") antarg.add_argument("--target_steps", type=int, default=5, help="") antarg.add_argument("--random_play", default=0, help="") mainarg = parser.add_argument_group('Main loop') mainarg.add_argument("--result_dir", default="test_result", help="") mainarg.add_argument("--train_steps", type=int, default=0, help="") mainarg.add_argument("--test_one", type=int, default=1, help="") mainarg.add_argument("--text_dir", default='', help="") mainarg.add_argument("--test", type=int, default=1, help="") mainarg.add_argument("--test_text_num", type=int, default=8, help="") mainarg.add_argument("--epochs", type=int, default=2, help="") mainarg.add_argument("--start_epoch", type=int, default=0, help="") mainarg.add_argument("--home_dir", default="./", help="") mainarg.add_argument("--load_weights", default="", help="") mainarg.add_argument("--save_weights_prefix", default="", help="") mainarg.add_argument("--computer_id", type=int, default=1, help="") mainarg.add_argument("--gpu_rate", type=float, default=0.2, help="") mainarg.add_argument("--cnn_format", default='NCHW', help="") args = parser.parse_args() tables_num = len(args.actionDB.split()) args.load_weights = weights_dir gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_rate) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: net = DeepQLearner(args, sess) env = Environment(args) mem = ReplayMemory(args.replay_size, args) agent = Agent(env, mem, net, args) words = [] states = [] if args.load_weights: print 'Loading weights from %s...' % args.load_weights net.load_weights(args.home_dir + args.load_weights) #load last trained weights if args.test_one and args.load_weights: ''' for i,ad in enumerate(actionDBs): tmp_w = [] tmp_s = [] for j in range(num[i]): print 'table = %s, text_num = %d'%(actionDBs[i],j) ws, act_seq, st = agent.test_one_db(actionDBs[i], j) tmp_w.append(ws) tmp_s.append(st) #print '\nStates: %s\n'%str(st) #print '\nWords: %s\n'%str(ws) #print '\n\nAction_squence: %s\n'%str(act_seq) words.append(tmp_w) states.append(tmp_s) ''' tmp_w = [] tmp_s = [] for j in range(num): #print 'table = %s, text_num = %d'%(table,j) ws, act_seq, st = agent.test_one_db(table, j) tmp_w.append(ws) tmp_s.append(st) words = tmp_w states = tmp_s print 'len(words) = %d, len(states) = %d' % (len(words), len(states)) return words, states
epsilon_start - epsilon_final) * math.exp(-1. * frame_idx / epsilon_decay) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model_1 = Actor(state_size=4, action_size=1, seed=0).to(device) model_1.load_state_dict(torch.load("./actor5000_1.pth")) model_1.eval() model_2 = Actor(state_size=4, action_size=1, seed=0).to(device) model_2.load_state_dict(torch.load("./actor4850_1.pth")) model_2.eval() Individual = Individualtanh(state_size=4, action_size=1, seed=0, fc1_units=50).to(device) agent = Agent(state_size=4, action_size=2, random_seed=0) ppo = PPO(4, 2, method='penalty') ppo.load_model(5499, 1) def mkdir(path): folder = os.path.exists(path) if not folder: os.makedirs(path) def update_target(current_model, target_model): target_model.load_state_dict(current_model.state_dict())
''' import tensorflow as tf from Agent import Agent from Displayer import DISPLAYER import parameters if __name__ == '__main__': tf.reset_default_graph() with tf.Session() as sess: agent = Agent(sess) print("Beginning of the run") try: agent.run() except KeyboardInterrupt: agent.save("NetworkParam/FinalParam") print("End of the run") DISPLAYER.disp() agent.play(5) agent.close()
# with Sess(options, meta, config=config) as sess: # # ################################################################################ with tf.Session() as sess: saver = Saver.Saver(sess) displayer = Displayer.Displayer() buffer = ExperienceBuffer() gui = GUI.Interface(['ep_reward', 'plot', 'render', 'gif', 'save']) gui_thread = threading.Thread(target=gui.run) threads = [] for i in range(Settings.NB_ACTORS): agent = Agent(sess, i, gui, displayer, buffer) threads.append(threading.Thread(target=agent.run)) # with tf.device('/device:GPU:0'): learner = QNetwork(sess, gui, saver, buffer) threads.append(threading.Thread(target=learner.run)) if not saver.load(): sess.run(tf.global_variables_initializer()) gui_thread.start() for t in threads: t.start() print("Running...")
from Building import Building from Agent import Agent #==================================================================================== #==================================================================================== lift_num = 1 buliding_height = 5 max_people_in_floor = 30 #Create building with 2 elevators, height 10, max people 30 building = Building(lift_num, buliding_height, max_people_in_floor) # building.generate_people() agent = Agent(buliding_height, lift_num, 4) #The goal is to bring down all the people in the building to the ground floor max_steps = 500 agent.reload() building.generate_people(0.8) for step in range(max_steps): ave_reward = 0 os.system('clear') state = building.get_state() state_input = np.array(state).reshape(1,-1) action = agent.get_action(state_input) building.perform_action(action)
from settings import Settings if __name__ == '__main__': tf.reset_default_graph() with tf.Session() as sess: saver = Saver.Saver(sess) displayer = Displayer.Displayer() gui = GUI.Interface(['ep_reward', 'plot', 'render', 'gif', 'save']) gui_thread = threading.Thread(target=gui.run) agent = Agent(sess, gui, displayer, saver) if not saver.load(): sess.run(tf.global_variables_initializer()) gui_thread.start() try: agent.run() except KeyboardInterrupt: pass print("End of the run") saver.save(agent.total_steps) displayer.disp() gui_thread.join()
def main(): sets = [] # The variable 'sets' stores multiple problem sets. # Each problem set comes from a different folder in /Problems/ # Additional sets of problems will be used when grading projects. # You may also write your own problems. r = open( "Problems" + os.sep + "ProblemSetList.txt") # ProblemSetList.txt lists the sets to solve. line = getNextLine( r) # Sets will be solved in the order they appear in the file. while not line == "": # You may modify ProblemSetList.txt for design and debugging. sets.append( ProblemSet(line) ) # We will use a fresh copy of all problem sets when grading. line = getNextLine( r) # We will also use some problem sets not given in advance. # Initializing problem-solving agent from Agent.java agent = Agent( ) # Your agent will be initialized with its default constructor. # You may modify the default constructor in Agent.java # Running agent against each problem set results = open("ProblemResults.csv", "w") # Results will be written to ProblemResults.csv. # Note that each run of the program will overwrite the previous results. # Do not write anything else to ProblemResults.txt during execution of the program. setResults = open( "SetResults.csv", "w") # Set-level summaries will be written to SetResults.csv. results.write("Problem,Correct Confidence\n") setResults.write("Set,Sum Correct Confidence\n") for set in sets: sum_correct_comfidence = 0 for problem in set.problems: # Your agent will solve one problem at a time. try: problem.setAnswerReceived( agent.Solve(problem) ) # The problem will be passed to your agent as a RavensProblem object as a parameter to the Solve method # Your agent should return its answer at the conclusion of the execution of Solve. # Note that if your agent makes use of RavensProblem.check to check its answer, the answer passed to check() will be used. # Your agent cannot change its answer once it has checked its answer. correct_comfidence = 0 if type(problem.givenAnswer) is list: answer = problem.givenAnswer if len(answer) >= problem.correctAnswer: if sum(answer) > 1: sum_answer = float(sum(answer)) answer = [i / sum_answer for i in answer] correct_comfidence = answer[problem.correctAnswer - 1] sum_correct_comfidence += correct_comfidence result = problem.name + "," + str(correct_comfidence) results.write("%s\n" % result) except: print("Error encountered in " + problem.name + ":") #print(sys.exc_info()[0]) print(traceback.format_exc()) result = problem.name + "," + str( problem.givenAnswer) + ",Error," results.write("%s\n" % result) setResult = set.name + "," + str(sum_correct_comfidence) setResults.write("%s\n" % setResult) results.close() setResults.close()
from Agent import Agent from Displayer import DISPLAYER from Saver import SAVER if __name__ == '__main__': tf.reset_default_graph() with tf.Session() as sess: with tf.device("/cpu:0"): # Create the global network render = parameters.DISPLAY master_agent = Agent(0, sess, render=render, master=True) # Create all the workers workers = [] for i in range(parameters.THREADS): workers.append(Agent(i + 1, sess, render=False)) coord = tf.train.Coordinator() SAVER.set_sess(sess) SAVER.load() # Run threads that each contains one worker worker_threads = [] for i, worker in enumerate(workers): print("Threading worker", i + 1)
from Agent import Agent from Obstacle import Obstacle from ObstaclePotentialField import ObstaclePotentialField Drones = [] Drone1 = Agent(0, (0, 0, 0), 1) Drones.append(Drone1) Drone2 = Agent(1, (0, 0, 9), 1) Drones.append(Drone2) Obstacles = [] Coral = Obstacle((9, 9, 9)) Obstacles.append(Coral) OPF = ObstaclePotentialField(1, 1, 20) for i in range(0, 20): print('------- ITERATION ', i, '-----------') obstacle1_forces = OPF.calculate_obstacle_forces(Drones, Coral) Drone1.ObstaclePotentialForce = obstacle1_forces[Drone1.index] Drone2.ObstaclePotentialForce = obstacle1_forces[Drone2.index] print('Drone1 OPF = ', Drone1.ObstaclePotentialForce, '\n') print(Drone1.calculateVelocity(Drone1.ObstaclePotentialForce)) print('Drone1 Velocity', Drone1.velocity) Drone1.move() print('Drone1 Position', Drone1.position) print('\n')
from Buffer import Buffer from Agent import Agent # TODO: add parametrization for constructing models # nn_params = { # 'actor': ((512, 'relu'), (512, 'relu')), # 'critic': { # 'state': ((16, 'relu'), (32, 'relu')), # 'action': ((32, 'relu')), # 'connected': ((512, 'relu'), (512, 'relu')) # } # } if __name__ == '__main__': env_helper = EnvHelper() env, n_states, n_actions = env_helper.make_environment('BipedalWalker-v3') buffer = Buffer(n_states=n_states, n_actions=n_actions, capacity=75000, batch_size=512) agent = Agent(gamma=0.99, buffer=buffer, env=env_helper, alpha=0.005, name='WalkerTest', compile_nn=True) agent.run(iterations=500, render=False, verbose=True, train=True) agent.plot(agent.learning_rewards) agent.run(iterations=20, render=True, verbose=True, train=False) agent.plot(agent.testing_reward)
import random display_width, display_height = 200, 200 pygame.init() pygame.display.set_caption('Grid environment') gameDisplay = pygame.display.set_mode((display_width, display_height)) clock = pygame.time.Clock() #initialising the game matrix game_matrix = Game_Matrix() #creating the environment env = Grid_Env(gameDisplay, clock, game_matrix) #initialising agent agent = Agent(env=env, alpha=0.5) #uncomment #getting agent's policy #directory = 'policy_default_env.pickle' #agent.set_policy(directory) #training the agent via interaction agent.interact(num_episodes=2000) #testing the agent pygame.display.set_caption('Test Phase') for i in range(10): #running for 10 times state = env.reset() total_reward = 0 while True:
df['ma_22'] = df.close.rolling(22).mean() df = df[['close', 'ma_5', 'ma_22']] df['position'] = 0 df.dropna(inplace=True) n_features = len(df.columns) seq_size = 22 env = MarketEnv(df, seq_size) if test_holdout: env_test = MarketEnv(df, seq_size, foreignScaler=env.scaler) sess = tf.Session() agent = Agent(sess, seq_size, n_features, hidden_size=16, a_size=3) sess.run(tf.global_variables_initializer()) i = 0 reward_history = [] print('Sequence size: ' + str(seq_size)) while True: running_reward = 0 s = env.reset() while True: a = agent.act(s) s_, r, done = env.step(a) td_error = agent.critic_learn(np.array([s]), r, np.array([s_]))
def train(n_games=1500, env_size_min=(10, 10), env_size_max=(30, 30), n_agents=10, resume=True, view_reduced=True, view_size=(2, 2, 2, 2), max_reward=200000, save_viz=False): dt = datetime.now().strftime('%Y-%m-%d-%H-%M-%S') print( f"------------------------------------------------------------------------------------------------" ) print(f"Starting training for {n_games} with {n_agents} agents...") print(f"Time: {dt}") print(f"Settings:") print(f"Reduced view:\t{view_reduced}\nView size:\t{view_size}") print( f"------------------------------------------------------------------------------------------------" ) score_saver = [] avg_score_saver = [] ddqn_scores = [] eps_history = [] visualisations = [] prec = 40 reached = np.zeros(n_agents, dtype=np.int32) reached_last_100 = np.zeros(n_agents, dtype=np.int32) if view_reduced: input_size = (view_size[0] + 1 + view_size[1]) * (view_size[2] + 1 + view_size[3]) + 4 else: input_size = env_size_max[0] * env_size_max[1] agents = [] # Create the agents for agent_id in range(n_agents): agent = Agent(f"agent_{agent_id}", gamma=0.99, epsilon=1.0, lr=1 * 5e-3, n_actions=4, input_dims=[input_size], mem_size=100000, batch_size=64, eps_min=0.01, eps_dec=5 * 1e-5, replace=100) if resume: agent.load_models() agents.append(agent) # Main training loop for i_game in tqdm(range(n_games)): scores = np.zeros(n_agents) avg_scores = np.zeros(n_agents) agent_in_final_state = np.full(n_agents, False) # Define size of map randomly in given range env_size = [ mi if mi == ma else np.random.randint(mi, ma) for mi, ma in zip(env_size_min, env_size_max) ] # Define a time limit based on the perimeter of the environment timeout = np.sum(env_size * 2) # Create obstacles randomly 6 - 15 % of the env size num_obs = int( np.max([ np.round( np.random.uniform(0.06, 0.15) * np.multiply(*env_size)) - 2 * n_agents, 0 ])) obstacles = [] for i in range(num_obs): obstacles.append( Point(np.random.randint(1, env_size[0]), np.random.randint(1, env_size[1]))) env = Game(obstacles, None, env_size, max_reward, view_reduced=view_reduced, view_size=view_size) for i in range(n_agents): env.add_player() observations = env.reset() game_sav = [observations] time_step = 0 # Play the game: Run until all agents reached a final state while not np.all(agent_in_final_state): time_step += 1 # Obtain actions for each agent actions = [] # Get actions from all agents that are not in a final state for agent_id, agent in enumerate(agents): if not agent_in_final_state[agent_id]: actions.append(agent.choose_action(observations[agent_id])) else: actions.append(None) # Execute actions on board next_observations, rewards, agent_in_final_state = env.step( actions) # Save history for each agent and optimize for agent, observation, action, reward, next_observation, is_in_final_state in \ zip(agents, observations, actions, rewards, next_observations, agent_in_final_state): # Only store and optimize if the agent did something if action is not None: agent.store_transition(observation, action, reward, next_observation, int(is_in_final_state)) agent.learn() # For statistics count agents that reached their aim with the action in this iteration for agent_id, action in enumerate(actions): if action is not None and rewards[agent_id] == max_reward: reached[agent_id] += 1 # Special statistic counter for the last 100 games if i_game > (n_games - 100): reached_last_100[agent_id] += 1 scores += rewards observations = next_observations game_sav.append(next_observations) eps_history.append([agent.epsilon for agent in agents]) ddqn_scores.append(scores) # if we reach a timeout for the game just set all agents to being in a final state if time_step == timeout: agent_in_final_state = np.full(n_agents, True) # Save a checkpoint every 10 games if i_game > 0 and i_game % 10 == 0: for agent in agents: agent.save_models() if all(agent_in_final_state) and i_game > 20: avg_scores = np.mean(ddqn_scores[:-10], axis=0) score_saver.append(scores) if i_game > 20: avg_score_saver.append(avg_scores) epsilons = {agent.id: agent.epsilon for agent in agents} if i_game % int(n_games / prec) == int(n_games / prec) - 1: print( f"episode: {i_game} score: {np.round(scores.tolist(),3)}, average score {avg_scores.tolist()} " f"epsilon {epsilons} Erreicht: {reached.tolist()}") # Save game for visualization purposes viz = Visualisation(game_sav, env_size, n_agents, view_padding=view_size, view_reduced=view_reduced, truth_obstacles=np.array( [o.to_numpy() for o in obstacles]), dt=dt, i_game=i_game, scores=scores, reached=reached) if save_viz: viz.save() visualisations.append(viz) print( f"\n{n_games} runs - {reached.tolist()} times aim reached - quota: {(reached / n_games).tolist()}" ) print("Quota of the last 100 runs " + str((reached_last_100 / 100).tolist())) # Visualize 10 played games in equal distances between first and last run and in addition the best five games plot_game_i_list = np.arange(n_games - 1, 0, -int(max(n_games * 0.1, 1))) plot_game_i_list = np.concatenate( [[0], plot_game_i_list, np.argsort(-1 * np.max(score_saver, axis=1))[:5]]) plot_game_i_list = np.unique(plot_game_i_list) plot_game_i_list = np.flip(plot_game_i_list) print() print('Visualize these games: {}'.format(plot_game_i_list)) for i_game in plot_game_i_list: print(f'Generate visual output for game {i_game}...', end='\r') visualisations[i_game].plot_overview(time_step=-1, plot_info=False, save=True) plt.plot(score_saver) plt.show() plt.plot(avg_score_saver) plt.show() print() print() print('Done.') print( 'IMPORTANT: A crash of Python at the end of the code is a known issue.' ) print( 'It comes from closing a lot of matplotlib figures in a short time (see visualisation.py, line 611 and 655).' )
from Node import Node from Maze import Maze from Agent import Agent node1 = Node(False, False, False, False, (0, 0), False) node2 = Node(False, False, False, False, (0, -1), False) node3 = Node(False, False, False, False, (0, -2), False) node4 = Node(False, False, False, False, (0, -3), False) node5 = Node(False, False, False, False, (1, -3), False) node1.set_down(node2) node2.set_down(node3) node3.set_down(node4) node4.set_right(node5) node2.set_up(node1) node3.set_up(node2) node4.set_up(node3) node5.set_left(node4) nodes = [node1, node2, node3, node4, node5] foo = Maze(nodes) agent = Agent((0, 0), foo) agent.simple_discovery() print agent.current_pos
node6 = Node(False,False,False,False,(0, 0), False) node1.set_down(node2) node2.set_up(node1) node2.set_down(node3) node3.set_up(node2) node3.set_down(node4) node4.set_up(node3) node4.set_down(node5) node5.set_up(node4) node5.set_down(node6) node6.set_up(node5) nodes = [node1, node2, node3, node4, node5, node6] foo = Maze(nodes) agent1 = Agent(node4,foo) agent2 = Agent(node3,foo) agents = [agent1, agent2] print("Begin maze") print("Agent 1 location: ", agent1.current_pos) print("Agent 2 location: ", agent2.current_pos) while not check_win_condition(agents): input(">>> Press enter to continue") PPSOCycle(agents) PrintAgent(agents[0], 1) PrintAgent(agents[1], 2) print("Maze fully Discovered!")
states = np.array([0]) # In[12]: alpha = 0.5 gamma = 0.9 epsilon = 0.1 # QSize = actions.size * states.size # half_size = (int) (0.5*QSize) epsilon = 0.1 * 100 # In[13]: agents = [] PA_1 = Agent(states.size, actions_1.size) PA_2 = Agent(states.size, actions_2.size) agents.append(PA_1) agents.append(PA_2) # In[14]: #Channel conditions g1 = 2.5 g2 = 1.5 Gamma = 3.532 sigma2 = 1 beta = 0.1 optimal = np.log2(1 + ((Pmax_1 * g1) / ( (Pmax_2 * g1 * beta + 1) * Gamma))) + np.log2(1 + (Pmax_2 * g2) / ( (Pmax_1 * g2 * beta + 1) * Gamma))
canvas.pack() def callback(): time.sleep(1) for i in range(len(labyrinth.fields)): for j in range(len(labyrinth.fields[i])): val = labyrinth.fields[i][j] fill = "#ffffff" if val == 1: fill = "#34ebc9" if val == 2: fill = "#edea39" if val == 3: fill = "#d92f23" if val == 4: fill = "#5334eb" canvas.create_rectangle(j * 15, i * 15, j * 15 + 8, i * 15 + 8, fill=fill) top.update() callback() initialAgent = Agent(labyrinth.startx, labyrinth.starty, labyrinth, callback) top.mainloop()