def main(): parser = argparse.ArgumentParser() parser.add_argument('--port', default=6000, type=int) parser.add_argument('--feature_set', default=hfo.LOW_LEVEL_FEATURE_SET, type=int) parser.add_argument('--trials', default=10000, type=int) args = parser.parse_args() env = hfo.HFOEnvironment() env.connectToServer( args.feature_set, '/home/student/Desktop/HFO-master_ruben/bin/teams/base/config/formations-dt', args.port, 'localhost', 'base_right', True) #num_features = hfo.getStateSize() for episode in range(args.trials): status = hfo.IN_GAME while status != hfo.SERVER_DOWN: features = env.getState() print math.degrees(math.asin(features[51])) status = env.step() #advanced_stats.append(stats[status]) print(('Episode %d ended with %s' % (episode, env.statusToString(status)))) # Quit if the server goes down if status == hfo.SERVER_DOWN: env.act(hfo.QUIT) exit()
def __init__(self, agent_id=0, port=6000, server_addr='localhost', team_name=None, num_opponents=0, num_teammates=0): # Hfo game interface: self.hfo = hfo.HFOEnvironment() # Server configuration: self.feature_set = hfo.HIGH_LEVEL_FEATURE_SET self.config_dir = settings.CONFIG_DIR self.port = port self.server_addr = server_addr if team_name and team_name != "base": self.team_name = f'{team_name.upper()}_left' else: self.team_name = "base_left" self.play_goalie = False # Attributes: self.num_teammates = num_teammates self.num_opponents = num_opponents self.agent_id = agent_id # Metrics: self._check_flag = 0 self.episode = 0 self.num_steps = 0 self.status = hfo.IN_GAME # Last Player to touch the ball: self.last_player_to_touch_ball = 0
def test_basic(): hfo_env = hfo.HFOEnvironment() for action in range(hfo.NUM_HFO_ACTIONS): assert len(hfo_env.actionToString(action)) for state in range(hfo.NUM_GAME_STATUS_STATES): assert len(hfo_env.statusToString(state))
def main(): hfo_env = hfo.HFOEnvironment() hfo_env.connectToServer(hfo.HIGH_LEVEL_FEATURE_SET) agent = DDPG() # model_file=tf.train.latest_checkpoint('ckpt/') # agent.saver.restore(agent.sess,model_file) for episode in range(130): status = hfo.IN_GAME stop_perceive = False with open('/home/ruizhao/Desktop/a.txt', 'a') as f: print('Hello World!', file=f) while True: state = hfo_env.getState() # print(state) action = agent.noise_action(state) print(action) next_state, reward, done, status = env_step(agent, hfo_env, action) # print(reward) if state_violated(next_state): # print("hhhhhhhhhhhhhh") stop_perceive = True if not stop_perceive: # print(state, next_state,done) # print(stop_perceive) agent.perceive(state, action, reward, next_state, done) if status != hfo.IN_GAME: break if status == hfo.SERVER_DOWN: hfo_env.act(hfo.QUIT) exit() # print(episode) # print(episode % 100 == 0 and episode > 100) if episode % 100 == 0 and episode > 100: # if True: total_reward = 0 for i in range(TEST): # state = env.reset() while True: state = hfo_env.getState() action = agent.action(state) next_state, reward, done, status = env_step( agent, hfo_env, action) total_reward += reward if done: break ave_reward = total_reward / TEST agent.saver.save(agent.sess, 'ckpt/mnist.ckpt', global_step=episode) print(' episode: ', episode, 'Evaluation Average Reward:', ave_reward)
def __init__(self, envir=hfo.HFOEnvironment(), action_set="low_level", seed=123): Agent.__init__(self, env=envir, agent_type="low_level_random_agent", action_space=LowLevelActionSpace(), state_space=NeuralStateSpace(), feature_set=hfo.LOW_LEVEL_FEATURE_SET, port=6000, base="base_right", goalie=True) self.seed = seed
def run(num_episodes): env = hfo.HFOEnvironment() env.connectToServer( hfo.LOW_LEVEL_FEATURE_SET, '/home/student/Desktop/HFO-master_ruben/bin/teams/base/config/formations-dt', 6000, 'localhost', 'base_right', True) for episode in xrange( num_episodes): # replace with xrange(5) for Python 2.X status = hfo.IN_GAME while status == hfo.IN_GAME: features = env.getState() env.act(hfo.DASH, 100.0, -90.0) status = env.step() print 'Episode', episode, 'ended'
def connect_server(self, agentIndex): """Connects the client subprocess in the hfo server The learning process should be all executed in here because of strange errors in the HFO server when executing more than one client at the same time """ #Path with formations file connectPath = self.serverPath + 'teams/base/config/formations-dt' self.hfoObj[agentIndex] = hfo.HFOEnvironment() #Connecting in the server serverResponse = self.hfoObj[agentIndex].connectToServer( feature_set=hfo.HIGH_LEVEL_FEATURE_SET, config_dir=connectPath, server_port=self.serverPort, server_addr='localhost', team_name='base_left', play_goalie=False) print("%%%% Server connection FeedBack: " + str(serverResponse))
def __init__(self, taskParam, limitFrames=200, agentsControl=1): """Initiates the HFO environment""" #Returns a port that is not being used self.serverPort = portmanager.get_free_port() #self.serverPort = 2000 self.numberFriends = taskParam[0] self.numberOpponents = taskParam[1] self.applyAction = None #[None]*agentsControl self.actionParameter = None #[None]*agentsControl #self.agentsControl = agentsControl self.lastAction = [None] #*agentsControl self.hfoObj = [] #for i in range(agentsControl): #self.hfoObj.append(hfo.HFOEnvironment()) self.hfoObj = hfo.HFOEnvironment() self.stepRequest = False #[False]*agentsControl self.clearServer = False #[False]*agentsControl #self.init_server(taskParam,limitFrames) #Initiates a new thread only to avoid an error when loading the strategy.cpp file self.terminateThread = False t = Thread(target=init_server, args=(self, taskParam, limitFrames, agentsControl)) t.start() t.join() time.sleep(2) #Initiates one thread for each agent controlled by learning algorithms #for i in range(self.agentsControl): #t = Thread(target=connect_server, args=(self, i)) #t.start() #time.sleep(2) t = Thread(target=connect_server, args=(self, )) t.start() #The connection with the server is OK after here. time.sleep(3) self.totalEpisodes = 0 self.goals = 0 self.stateSpaceManager = HFOStateManager(self.numberFriends, self.numberOpponents)
def run(num_episodes): env = hfo.HFOEnvironment() r = 0 env.connectToServer( hfo.LOW_LEVEL_FEATURE_SET, '/home/student/Desktop/HFO-master_ruben/bin/teams/base/config/formations-dt', 6000, 'localhost', 'base_left', False) for episode in xrange( num_episodes): # replace with xrange(5) for Python 2.X kick_angle = random.randint(-8, 8) status = hfo.IN_GAME while status == hfo.IN_GAME: features = env.getState() env.act(hfo.KICK, 100.0, kick_angle) status = env.step() print 'Episode', episode, 'ended'
def main(): parser = argparse.ArgumentParser() parser.add_argument('--port', type=int, default=12345, help="Server port") parser.add_argument( '--seed', type=int, default=None, help="Python randomization seed; uses python default if 0 or not given" ) parser.add_argument('--record', action='store_true', help="Doing HFO --record") parser.add_argument('--rdir', type=str, default='log/', help="Set directory to use if doing HFO --record") args = parser.parse_args() serverCommands = [ "/home/leno/gitProjects/Curriculum_HFO/HFO/bin/HFO --offense-team helios --fullstate --offense-on-ball 12 --no-logging --headless --port 12345 --frames-per-trial 200 --offense-agents 1 --offense-npcs 2 --defense-npcs 3 --offense-team base --defense-team helios --ball-x-min 0.6 --ball-x-max 0.7999999999999999 --seed 123 --verbose >> tt.log", "/home/leno/gitProjects/Curriculum_HFO/HFO/bin/HFO --offense-team helios --fullstate --offense-on-ball 12 --no-logging --headless --port 12350 --frames-per-trial 200 --offense-agents 1 --offense-npcs 2 --defense-npcs 3 --offense-team base --defense-team helios --ball-x-min 0.6 --ball-x-max 0.7999999999999999 --seed 123 --verbose >> tt.log", "/home/leno/gitProjects/Curriculum_HFO/HFO/bin/HFO --offense-team helios --fullstate --offense-on-ball 12 --no-logging --headless --port 12355 --frames-per-trial 200 --offense-agents 1 --offense-npcs 2 --defense-npcs 3 --offense-team base --defense-team helios --ball-x-min 0.6 --ball-x-max 0.7999999999999999 --seed 123 --verbose >> tt.log", "/home/leno/gitProjects/Curriculum_HFO/HFO/bin/HFO --offense-team helios --fullstate --offense-on-ball 12 --no-logging --headless --port 12360 --frames-per-trial 200 --offense-agents 1 --offense-npcs 2 --defense-npcs 3 --offense-team base --defense-team helios --ball-x-min 0.6 --ball-x-max 0.7999999999999999 --seed 123 --verbose >> tt.log" ] for serverCommand in serverCommands: serverProcess = subprocess.Popen(serverCommand, shell=True) print("NEW Server") time.sleep(1) if args.seed: random.seed(args.seed) # Create the HFO Environment hfo_env = hfo.HFOEnvironment() # Connect to the server with the specified # feature set. See feature sets in hfo.py/hfo.hpp. t = Thread(target=init_connect, args=(hfo_env, args)) t.start() t.join() subprocess.call("kill -9 -" + str(serverProcess.pid), shell=True) args.port = args.port + 5
def main(): env = hfo.HFOEnvironment() env.connectToServer(LOW_LEVEL_FEATURE_SET, config_dir='./', server_port=1234) for episode in itertools.count(): status = env.step() s1 = env.getState() already_close_to_ball = False while status == IN_GAME: time.sleep(3) env.act(GO_TO_BALL) status = env.step() s2 = env.getState() low_level_reward_function(s2, s1, already_close_to_ball, status) s1 = s2 print("Episode %d ended with") if status == SERVER_DOWN: env.act(QUIT) break
def __init__(self, port): game_cmd = "{}/bin/HFO --offense-agents=1" \ " --defense-npcs=1 --port={} --trials 200 --headless &".format(hfo_root, port) os.system(game_cmd) time.sleep(2) self.env = hfo.HFOEnvironment() self.env.connectToServer( HIGH_LEVEL_FEATURE_SET, '{}/bin/teams/base/config/formations-dt'.format(hfo_root), port, 'localhost', 'base_left', False) self.avaliable_actions = { 0: MOVE, 1: SHOOT, 2: DRIBBLE, 3: GO_TO_BALL, 4: NOOP } self.action_space = len(self.avaliable_actions) self.state_space = self.env.getStateSize()
def __init__(self, agent_id=0, port=6000, server_addr='localhost', num_opponents=0, num_teammates=0): # Hfo game interface: self.hfo = hfo.HFOEnvironment() # Server configuration: self.feature_set = hfo.HIGH_LEVEL_FEATURE_SET self.config_dir = settings.CONFIG_DIR self.port = port self.server_addr = server_addr self.team_name = 'base_left' self.play_goalie = False # Attributes: self.num_teammates = num_teammates self.num_opponents = num_opponents self.agent_id = agent_id # Metrics: self.episode = 0 self.num_steps = 0 self.status = hfo.IN_GAME
def playGame(train_indicator=0): # 1 means Train, 0 means simply Run BUFFER_SIZE = 100000. BATCH_SIZE = 32 GAMMA = 0.99 TAU = 0.001 # Target Network HyperParameters LRA = 0.0005 # Learning rate for Actor LRC = 0.001 # Lerning rate for Critic action_dim = 10 # 4 actions and their 6 continuous parameters state_dim = 58 # of sensors input np.random.seed(1337) EXPLORE = 100000 episode_count = 20000 max_steps = 1000 reward = 0 step = 0 epsilon = 1 indicator = 0 # Tensorflow GPU optimization config = tf.ConfigProto() # config.gpu_options.allow_growth = True sess = tf.Session(config=config) from keras import backend as K K.set_session(sess) actor = ActorNetwork(sess, state_dim, action_dim, BATCH_SIZE, TAU, LRA) critic = CriticNetwork(sess, state_dim, action_dim, BATCH_SIZE, TAU, LRC) buff = ReplayBuffer(BUFFER_SIZE) #Create replay buffer # Generate a HFO environment env = hfo.HFOEnvironment() env.connectToServer(hfo.LOW_LEVEL_FEATURE_SET, config_dir='./conf', server_port=1111) #Now load the weight print("Now we load the weight") try: actor.model.load_weights("actormodel.h5") critic.model.load_weights("criticmodel.h5") actor.target_model.load_weights("actormodel.h5") critic.target_model.load_weights("criticmodel.h5") print("Weight load successfully") except: print("Cannot find the weight") print("Soccer Experiment Start.") for episode in range(episode_count): print("Episode : " + str(episode) + " Replay Buffer " + str(buff.count())) isBall = Locker() s_t = np.hstack(env.getState()) status = env.step() total_reward = 0. total_target_q_values = 0 for j in range(max_steps): # time.sleep(.1) loss = 0 epsilon -= 1.0 / EXPLORE a_t = np.zeros([1, action_dim]) noise_t = np.zeros([1, action_dim]) a_t_original = actor.model.predict(s_t.reshape(1, s_t.shape[0])) noise_t[0][0] = train_indicator * max(epsilon, 0) * OU.function( a_t_original[0][0], 0.60, 0.15, 0.20) noise_t[0][1] = train_indicator * max(epsilon, 0) * OU.function( a_t_original[0][1], 0.25, 0.15, 0.20) noise_t[0][2] = train_indicator * max(epsilon, 0) * OU.function( a_t_original[0][2], 0.20, 0.15, 0.20) noise_t[0][3] = train_indicator * max(epsilon, 0) * OU.function( a_t_original[0][3], 0.40, 0.15, 0.20) noise_t[0][4] = train_indicator * max(epsilon, 0) * OU.function( a_t_original[0][4], 0.0, 0.15, 0.20) noise_t[0][5] = train_indicator * max(epsilon, 0) * OU.function( a_t_original[0][5], 0.0, 0.15, 0.20) noise_t[0][6] = train_indicator * max(epsilon, 0) * OU.function( a_t_original[0][6], 0.0, 0.15, 0.20) noise_t[0][7] = train_indicator * max(epsilon, 0) * OU.function( a_t_original[0][7], 0.0, 0.15, 0.20) noise_t[0][8] = train_indicator * max(epsilon, 0) * OU.function( a_t_original[0][8], 0.0, 0.15, 0.20) noise_t[0][9] = train_indicator * max(epsilon, 0) * OU.function( a_t_original[0][9], 0.0, 0.15, 0.20) a_t[0][0] = a_t_original[0][0] + noise_t[0][0] a_t[0][4] = a_t_original[0][4] + noise_t[0][4] a_t[0][5] = a_t_original[0][5] + noise_t[0][5] a_t[0][1] = a_t_original[0][1] + noise_t[0][1] a_t[0][6] = a_t_original[0][6] + noise_t[0][6] a_t[0][2] = a_t_original[0][2] + noise_t[0][2] a_t[0][7] = a_t_original[0][7] + noise_t[0][7] a_t[0][3] = a_t_original[0][3] + noise_t[0][3] a_t[0][8] = a_t_original[0][8] + noise_t[0][8] a_t[0][9] = a_t_original[0][9] + noise_t[0][9] dash_tuple = namedtuple('Dash', ['SOFTMAX', 'PWR', 'ANGLE']) turn_tuple = namedtuple('Turn', ['SOFTMAX', 'ANGLE']) tackle_tuple = namedtuple('Tackle', ['SOFTMAX', 'ANGLE']) kick_tuple = namedtuple('Kick', ['SOFTMAX', 'PWR', 'ANGLE']) dash = dash_tuple(a_t[0][0], 100 * a_t[0][4], 180 * a_t[0][5]) turn = turn_tuple(a_t[0][1], 180 * a_t[0][6]) tackle = tackle_tuple(a_t[0][2], 180 * a_t[0][7]) kick = kick_tuple(a_t[0][3], 100 * a_t[0][8], 180 * a_t[0][9]) print("Actions:\n--{}\n--{}\n--{}\n--{}".format( dash, turn, tackle, kick)) # if 0 <= episode <= 200: # r = .6 # elif 201 < episode <=500: # r = 0.7 # elif 501 <= episode < 1000: # r = 0.75 # elif 1001 <- episode < 2000: # r = .8 # else: # r = .9 actions = sorted([dash, turn, tackle, kick], key=lambda x: x.SOFTMAX, reverse=True) # action = actions[0 if random.random() < r else random.randint(0,3)] action = actions[0] print(action) if type(action) == type(dash): env.act(hfo.DASH, dash.PWR, dash.ANGLE) elif type(action) == type(turn): env.act(hfo.TURN, turn.ANGLE) elif type(action) == type(tackle): env.act(hfo.TACKLE, tackle.ANGLE) elif type(action) == type(kick): env.act(hfo.KICK, kick.PWR, kick.ANGLE) else: print('I am not acting') player = env.playerOnBall() status = env.step() s_t1 = np.array(env.getState()) r_t = low_level_reward_function(s_t1, s_t, isBall, status) buff.add(s_t, a_t[0], r_t, s_t1, status) #Do the batch update batch = buff.getBatch(BATCH_SIZE) states = np.asarray([e[0] for e in batch]) actions = np.asarray([e[1] for e in batch]) rewards = np.asarray([e[2] for e in batch]) new_states = np.asarray([e[3] for e in batch]) dones = np.asarray([e[4] for e in batch]) y_t = np.asarray([e[1] for e in batch]) predicted_actions = actor.target_model.predict(new_states) target_q_values = critic.target_model.predict( [new_states, predicted_actions]) for k in range(len(batch)): if dones[k]: y_t[k] = rewards[k] else: y_t[k] = rewards[k] + GAMMA * target_q_values[k] for k in range(len(batch)): total_target_q_values = total_target_q_values + target_q_values[ k] if (train_indicator): loss += critic.model.train_on_batch([states, actions], y_t) a_for_grad = actor.model.predict(states) grads = critic.gradients(states, a_for_grad) inverted_grads = invert_grads( grads, a_for_grad ) # Invert the gradients if they exceed the parameter max and min values actor.train(states, inverted_grads) actor.target_train() critic.target_train() total_reward += r_t s_t = s_t1 step += 1 if status != hfo.IN_GAME: break if status == hfo.SERVER_DOWN: env.act(hfo.QUIT) break with open('rewards.csv', 'a') as f: f.writelines("{},{}\n".format(episode, total_reward)) with open('q_values.csv', 'a') as g: g.write("{},{}\n".format(episode, sum(total_target_q_values))) if np.mod(episode, 3) == 0: if (train_indicator): print("Now we save model") actor.model.save_weights("actormodel.h5", overwrite=True) with open("actormodel.json", "w") as outfile: json.dump(actor.model.to_json(), outfile) critic.model.save_weights("criticmodel.h5", overwrite=True) with open("criticmodel.json", "w") as outfile: json.dump(critic.model.to_json(), outfile) print("TOTAL REWARD @ " + str(episode) + "-th Episode : Reward " + str(total_reward)) print("Total Step: " + str(step)) print("") print("Finish.")
"""A few tests using a server""" from __future__ import print_function import os import subprocess import sys import time import hfo hfo_env = hfo.HFOEnvironment() def try_step(): # if a game ends within ~20 frames, something is wrong... status = hfo_env.step() assert (status == hfo.IN_GAME), ( "Status is {!s} ({!r}), not IN_GAME".format( hfo_env.statusToString(status), status)) return hfo_env.getState() def test_with_server(): test_dir = os.path.dirname(os.path.abspath(os.path.realpath(__file__))) binary_dir = os.path.normpath(test_dir + "/../bin") conf_dir = os.path.join(binary_dir, 'teams/base/config/formations-dt') bin_HFO = os.path.join(binary_dir, "HFO") popen_list = [ sys.executable, "-x", bin_HFO, "--offense-agents=1",
def player(mark): print('--I am player', mark, ctime()) # Create the HFO Environment hfo_env = hfo.HFOEnvironment() hfo_env.connectToServer( hfo.LOW_LEVEL_FEATURE_SET, 'C:/Users/Administrator/HFO/bin/teams/base/config/formations-dt', args.port, 'localhost', 'base_right', False) total_step = 0 ep_rewards = [] ep_steps = [] ep_goals = [] for episode in itertools.count(): status = hfo.IN_GAME episode_step = 0 #FIXME 0061 isBall = Locker total_reward = 0. while status == hfo.IN_GAME: total_step += 1 episode_step += 1 # Get the vector of state features for the current state st = np.hstack(hfo_env.getState()) action, c_action = ma_pdqn.act(state=st, index=mark) if action == 0: hfo_env.act(hfo.DASH, c_action[0], c_action[1]) elif action == 1: hfo_env.act(hfo.TURN, c_action) elif action == 2: hfo_env.act(hfo.TACKLE, c_action) elif action == 3: hfo_env.act(hfo.KICK, c_action[0], c_action[1]) else: print('I am not acting', mark) # Advance the environment and get the game status #player = env.playerOnBall() status = hfo_env.step() st_ = np.array(hfo_env.getState()) r_t = low_level_reward_function(st_, st, isBall, status) #FIXME st[0:58] if mark == 0: ma_pdqn.storeTransition1(st, st_, action, c_action, r_t, st[0:58], st_[0:58]) else: ma_pdqn.storeTransition2(st, st_, action, c_action, r_t, st[0:58], st_[0:58]) total_reward += r_t ma_pdqn.train() #FIXME 0065 ma_pdqn.episode_done(index=mark) ep_steps.append(episode_step) #FIXME rewards.append ep_rewards.append(total_reward) if status == hfo.GOAL: ep_goals.append(1) else: ep_goals.append(0) ep_steps = ep_steps[-100:] ep_rewards = ep_rewards[-100:] ep_goals = ep_goals[-100:] if (episode + 1) % print_interval == 0 and mark == 0: print("================================================") print("--Agent:", mark) print("--Episode: ", episode) print("----Avg_steps: ", sum(ep_steps[-100:]) / 100.0) print("----Avg_reward: ", sum(ep_rewards[-100:]) / 100.0) print("----Goal_rate: ", sum(ep_goals[-100:]) / 100.0) print("------------------------------------------------") # Check the outcome of the episode # end_status = hfo_env.statusToString(status) # print("Episode {0:n} ended with {1:s}".format(episode, end_status)) # Quit if the server goes down if status == hfo.SERVER_DOWN: hfo_env.act(hfo.QUIT) exit()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--port', type=int, default=6000, help="Server port") parser.add_argument( '--seed', type=int, default=None, help="Python randomization seed; uses python default if 0 or not given" ) parser.add_argument( '--rand-pass', action="store_true", help="Randomize order of checking teammates for a possible pass") parser.add_argument( '--epsilon', type=float, default=0, help= "Probability of a random action if has the ball, to adjust difficulty") parser.add_argument('--record', action='store_true', help="If doing HFO --record") parser.add_argument('--rdir', type=str, default='log/', help="Set directory to use if doing --record") args = parser.parse_args() if args.seed: random.seed(args.seed) hfo_env = hfo.HFOEnvironment() if args.record: hfo_env.connectToServer(hfo.HIGH_LEVEL_FEATURE_SET, 'bin/teams/base/config/formations-dt', args.port, 'localhost', 'base_left', False, record_dir=args.rdir) else: hfo_env.connectToServer(hfo.HIGH_LEVEL_FEATURE_SET, 'bin/teams/base/config/formations-dt', args.port, 'localhost', 'base_left', False) num_teammates = hfo_env.getNumTeammates() #num_opponents = hfo_env.getNumOpponents() if args.seed: if (args.rand_pass and (num_teammates > 1)) or (args.epsilon > 0): print("Python randomization seed: {0:d}".format(args.seed)) else: print( "Python randomization seed useless without --rand-pass w/2+ teammates or --epsilon >0" ) if args.rand_pass and (num_teammates > 1): print("Randomizing order of checking for a pass") if args.epsilon > 0: print("Using epsilon {0:n}".format(args.epsilon)) for episode in itertools.count(): num_eps = 0 num_had_ball = 0 num_move = 0 status = hfo.IN_GAME while status == hfo.IN_GAME: state = hfo_env.getState() #print(state) if int(state[5]) == 1: # state[5] is 1 when player has the ball if (args.epsilon > 0) and (random.random() < args.epsilon): if random.random() < 0.5: hfo_env.act(hfo.SHOOT) else: hfo_env.act(hfo.DRIBBLE) num_eps += 1 else: get_action(state, hfo_env, num_teammates, args.rand_pass) num_had_ball += 1 else: hfo_env.act(hfo.MOVE) num_move += 1 status = hfo_env.step() #print(status) # Quit if the server goes down if status == hfo.SERVER_DOWN: hfo_env.act(hfo.QUIT) exit() # Check the outcome of the episode print("Episode {0:d} ended with {1:s}".format( episode, hfo_env.statusToString(status))) if args.epsilon > 0: print("\tNum move: {0:d}; Random action: {1:d}; Nonrandom: {2:d}". format(num_move, num_eps, (num_had_ball - num_eps)))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--port', default=6000, type=int) parser.add_argument('--feature_set', default=hfo.HIGH_LEVEL_FEATURE_SET, type=int) parser.add_argument('--trials', default=10000, type=int) args = parser.parse_args() # Starting the TensorFlow network tf.reset_default_graph() # Create the HFO Environment env = hfo.HFOEnvironment() feature_space_n = 100 action_space_n = 8 # Connect to the server with the specified # feature set. See feature sets in hfo.py/hfo.hpp. env.connectToServer(args.feature_set, '/home/student/Desktop/HFO-master_ruben/bin/teams/base/config/formations-dt', args.port, 'localhost', 'base_right', True) # setting feed-forward part of the network inputs1 = tf.placeholder(shape=[1,feature_space_n],dtype=tf.float32) W = tf.Variable(tf.random_uniform([feature_space_n,action_space_n],0,0.01)) Qout = tf.matmul(inputs1,W) predict = tf.argmax(Qout,1) nextQ = tf.placeholder(shape=[1,action_space_n],dtype=tf.float32) loss = tf.reduce_sum(tf.square(nextQ - Qout)) trainer = tf.train.GradientDescentOptimizer(learning_rate=0.1) updateModel = trainer.minimize(loss) init = tf.initialize_all_variables() action_set = LowLevelActionSet() gamma = 0.99 e = 0.1 x_bounds = [-1, 1] y_bounds = [-0.3, 0.3] #num_features = hfo.getStateSize() with tf.Session() as sess: sess.run(init) for episode in range(args.trials): s = helper.bin_ball_position(env.getState()[3:5], x_bounds, y_bounds) status = hfo.IN_GAME while status == hfo.IN_GAME: a,allQ = sess.run([predict,Qout],feed_dict={inputs1:np.identity(feature_space_n)[s:s+1]}) if np.random.rand(1) < e: a[0] = random.randint(0, 8) #Get new state and reward from environment env.act(*action_set[a[0]]) status = env.step() s1 = helper.bin_ball_position(env.getState()[3:5], x_bounds, y_bounds) reward = (1-np.linalg.norm(env.getState()[3:5]-env.getState()[0:2]))*100 if status == hfo.GOAL : reward = -500 e = 1./((episode/50) + 10) break elif status == hfo.CAPTURED_BY_DEFENSE: e = 1./((episode/50) + 10) reward = 500 break #Obtain the Q' values by feeding the new state through our network Q1 = sess.run(Qout,feed_dict={inputs1:np.identity(feature_space_n)[s1:s1+1]}) #Obtain maxQ' and set our target value for chosen action. maxQ1 = np.max(Q1) targetQ = allQ targetQ[0,a[0]] = reward + gamma*maxQ1 #Train our network using target and predicted Q values _,W1 = sess.run([updateModel,W],feed_dict={inputs1:np.identity(feature_space_n)[s:s+1],nextQ:targetQ}) s = s1 #advanced_stats.append(stats[status]) print(('Episode %d ended with %s'%(episode, env.statusToString(status)))) # Quit if the server goes down if status == hfo.SERVER_DOWN: env.act(hfo.QUIT) exit()
from pathlib import Path import hfo from hfo.hfo import * base_dir = Path('/home/goncalo/HFO') config_dir = base_dir / 'bin/teams/base/config/formations-dt' hfo = hfo.HFOEnvironment() hfo.connectToServer(HIGH_LEVEL_FEATURE_SET, config_dir=str(config_dir)) for episode in range(5): # replace with xrange(5) for Python 2.X status = IN_GAME while status == IN_GAME: features = hfo.getState() print(features[:2]) # dist = features[33] hfo.act(DASH, 20.0, 0.0) status = hfo.step() print('episode', episode)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--port', default=6000, type=int) parser.add_argument('--feature_set', default=hfo.HIGH_LEVEL_FEATURE_SET, type=int) parser.add_argument('--trials', default=10000, type=int) args = parser.parse_args() state_space = 100 # amount of bins for ball to go in alpha = 0.8 gamma = 0.95 #stats_path = '/home/student/Desktop/HFO-master_ruben/example/test_keepers/stats.bin' stats = { hfo.GOAL: 0, hfo.CAPTURED_BY_DEFENSE: 0, hfo.OUT_OF_BOUNDS: 0, hfo.OUT_OF_TIME: 0, hfo.SERVER_DOWN: 0 } #advanced_stats = array('b') # Create the HFO Environment env = hfo.HFOEnvironment() actions = ActionSet("high_level") action_space = len(actions) Q = np.zeros((state_space, action_space)) # Connect to the server with the specified # feature set. See feature sets in hfo.py/hfo.hpp. # saving first state env.connectToServer( args.feature_set, '/home/student/Desktop/HFO-master_ruben/bin/teams/base/config/formations-dt', args.port, 'localhost', 'base_right', True) #num_features = hfo.getStateSize() x_bounds = [-1, 1] y_bounds = [-0.3, 0.3] for episode in range(args.trials): s = helper.bin_ball_position(env.getState()[3:5], x_bounds, y_bounds) status = hfo.IN_GAME while status == hfo.IN_GAME: a = np.argmax(Q[s, :] + np.random.randn(1, action_space) * (1. / (episode + 1))) env.act(*actions[a]) status = env.step() features = env.getState() reward = (1 - features[9]) * 100 if status == hfo.GOAL: reward = -500 break elif status == hfo.CAPTURED_BY_DEFENSE: reward = 500 break s1 = helper.bin_ball_position(features[3:5], x_bounds, y_bounds) Q[s, a] = Q[s, a] + alpha * (reward + gamma * np.max(Q[s1, :]) - Q[s, a]) s = s1 stats[status] += 1 # Grab the state features from the environment #advanced_stats.append(stats[status]) print(('Episode %d ended with %s' % (episode, env.statusToString(status)))) # Quit if the server goes down if status == hfo.SERVER_DOWN: env.act(hfo.QUIT) exit()
def __init__(self): self.state_space = StateSpace(5, 5) self.action_space = ActionSpace() self.env = hfo.HFOEnvironment() self._connect_to_server()
def __init__(self, envir=hfo.HFOEnvironment(), action_set="high_level",seed=123): Agent.__init__(self,env=envir, agent_type="high_level_agent", action_set=HighLevelActionSpace(), state_space=StateSpace(500), feature_set=hfo.HIGH_LEVEL_FEATURE_SET, port=6000,base="base_right", goalie=True) self.seed = seed
def main(): parser = argparse.ArgumentParser() parser.add_argument('--port', type=int, default=6000, help="Server port") parser.add_argument( '--seed', type=int, default=None, help="Python randomization seed; uses python default if 0 or not given" ) parser.add_argument( '--epsilon', type=float, default=0, help="Probability of a random action, to adjust difficulty") parser.add_argument('--record', action='store_true', help="If doing HFO --record") parser.add_argument('--rdir', type=str, default='log/', help="Set directory to use if doing --record") args = parser.parse_args() if args.seed: random.seed(args.seed) hfo_env = hfo.HFOEnvironment() if args.record: hfo_env.connectToServer(hfo.HIGH_LEVEL_FEATURE_SET, settings.CONFIG_DIR, args.port, 'localhost', 'base_right', play_goalie=False, record_dir=args.rdir) else: hfo_env.connectToServer(hfo.HIGH_LEVEL_FEATURE_SET, settings.CONFIG_DIR, args.port, 'localhost', 'base_right', play_goalie=False) numTeammates = hfo_env.getNumTeammates() numOpponents = hfo_env.getNumOpponents() if args.seed: if args.epsilon > 0: print("Python randomization seed: {0:d}".format(args.seed)) else: print("Python randomization seed useless without --epsilon >0") if args.epsilon > 0: print("Using epsilon {0:n}".format(args.epsilon)) my_unum = hfo_env.getUnum() assert ((my_unum > 1) and (my_unum <= 11)), "Bad unum {!r}".format(my_unum) print("My unum is {0:d}".format(my_unum)) num_times_overall = {} num_times_kickable = {} for action in range(hfo.NUM_HFO_ACTIONS): num_times_overall[action] = 0 num_times_kickable[action] = 0 misc_tracked = {'max_kickable_dist': 0} for episode in itertools.count(): old_ball_pos_x = -1 old_ball_pos_y = 0 episode_start = True status = hfo.IN_GAME while status == hfo.IN_GAME: state = hfo_env.getState() if episode_start: if (state[3] >= -1) and (state[3] <= 1): old_ball_pos_x = state[3] if (state[4] >= -1) and (state[4] <= 1): old_ball_pos_y = state[4] episode_start = False if (args.epsilon > 0) and (random.random() < args.epsilon): do_random_defense_action(state, hfo_env) else: do_defense_action(state_vec=state, hfo_env=hfo_env, num_opponents=numOpponents, num_teammates=numTeammates, old_ball_pos_x=old_ball_pos_x, old_ball_pos_y=old_ball_pos_y, num_times_overall=num_times_overall, num_times_kickable=num_times_kickable, misc_tracked=misc_tracked) old_ball_pos_x = state[3] old_ball_pos_y = state[4] status = hfo_env.step() #print(status) # Quit if the server goes down if status == hfo.SERVER_DOWN: for action in range(hfo.NUM_HFO_ACTIONS): if num_times_overall[action]: print("Overall times {0!s}: {1:d}".format( hfo_env.actionToString(action), num_times_overall[action])) for action in range(hfo.NUM_HFO_ACTIONS): if num_times_kickable[action]: print("Kickable times {0!s}: {1:d}".format( hfo_env.actionToString(action), num_times_kickable[action])) print("Max kickable dist: {0:n}".format( misc_tracked['max_kickable_dist'])) hfo_env.act(hfo.QUIT) exit()
feature_set = hfo.LOW_LEVEL_FEATURE_SET config = '/Users/codeMan/Documents/hfo/HFO/bin/teams/base/config/formations-dt' port = 6000 host = 'localhost' side = 'base_left' # ACTION_LOOKUP = { # 0: hfo.DASH, # 1: hfo.TURN, # 2: hfo.KICK, # 3: hfo.TACKLE, # Used on defense to slide tackle the ball # 4: hfo.CATCH, # Used only by goalie to catch the ball # } server = hfo.HFOEnvironment() server.connectToServer(feature_set, config, port, host, side, False) env = SoccerEnv(env=server) for i in range(1000): rand_action_index = np.random.randint(0, 3) rand_dash_pow = np.random.uniform(0, 100) rand_dash_dic = np.random.uniform(-180, 180) rand_turn_dic = np.random.uniform(-180, 180) rand_kick_pow = np.random.uniform(0, 100) rand_kick_dic = np.random.uniform(-180, 180)