class RLController: def __init__(self, gamma=0.9, alpha=0.01, epsilon=0.1): # Initialize agent self.agent = RLAgent(gamma, alpha, epsilon) self.reset_episode() def reset_episode(self): #print("RLController.reset_episode()") # [debug] self.last_timestamp = 0.0 self.last_position = 0.0 def set_params(self, gamma, alpha, epsilon): self.agent.set_params(gamma, alpha, epsilon) def set_target(self, target): print("RLController.set_target({})".format(target)) # [debug] self.target = target def update(self, timestamp, position, done): # Compute delta_time delta_time = timestamp - self.last_timestamp self.last_timestamp = timestamp # Compute delta_position delta_position = position - self.last_position self.last_position = position # Check for t = 0 or dt = 0 (special case: send action = 0) if timestamp == 0.0 or delta_time <= 0.0: return 0.0 # Prepare state vector velocity = delta_position / delta_time state = np.array([position, velocity, self.target]).reshape(1, -1) # Compute reward / penalty (note: current action's effect may be delayed) reward = -min(abs(self.target - position), 20.0) # Take one RL step, passing in current state and reward, and return action action = self.agent.step(state, reward, done) if done: self.reset_episode() return np.clip(action, -50.0, 50.0) # clamp final action
def play(hparams): gomoku = Gomoku(hparams) render = GameRender(gomoku, hparams) # change the AI here, bigger the depth stronger the AI ai = RLAgent(gomoku, BoardState.BLACK) result = BoardState.EMPTY # AI plays first ai.first_step() result = gomoku.get_chess_result() render.change_state() while True: if hparams['enable_second_ai']: result = gomoku.get_chess_result() if result != BoardState.EMPTY: print(result, "wins") break if hparams['enable_ai']: ai.one_step() result = gomoku.get_chess_result() if result != BoardState.EMPTY: print(result, "wins") break else: render.change_state() # pygame event, player vs. ai section for event in pygame.event.get(): if event.type == pygame.QUIT: exit() elif event.type == pygame.MOUSEBUTTONDOWN: if render.one_step(): result = gomoku.get_chess_result() else: continue if result != BoardState.EMPTY: break if hparams['enable_ai']: ai.one_step() result = gomoku.get_chess_result() else: render.change_state() else: continue render.draw_chess() render.draw_mouse() if result != BoardState.EMPTY: render.draw_result(result) pygame.display.update()
print("2. we train the agent on that trajectory.") print( "3. we test that trained agent on new data from the same DGP: we show that its performance does NOT generalize well (as predicted)." ) out_samplePnLs = [] # 1. generate a non-stationary "historical" trajectory x, y = tvdgp.generateDGP(N) spread = y - x plt.plot(spread) plt.show() print("Training the agent...") # 2. train the agent on that trajectory, show that it learned some optimum agent = RLAgent(2, 3) training_pnls = [] DELTA = 20 for j in range(NUM_TRAINING_ITERATIONS): training_pnl = run_simulation(x, y, agent, True) training_pnls.append(training_pnl) if j % DELTA == 0: agent.replay() pct_progress = (float(j) / float(NUM_TRAINING_ITERATIONS)) * 100.0 if j == 0: print(f"pct_progress = {pct_progress} %") else: print( f"pct_progress = {pct_progress} % (current average P&L is {np.mean(training_pnls[-DELTA:])})" )
def __init__(self, gamma=0.9, alpha=0.01, epsilon=0.1): # Initialize agent self.agent = RLAgent(gamma, alpha, epsilon) self.reset_episode()
#!/usr/bin/env python3 from game import Game from rl_agent import RLAgent from random_agent import Agent import weight_logger, weight_plotter import sys from collections import deque if __name__ == '__main__': agent = RLAgent() opponent = Agent(1) # random agent nb_games = 0 history_length = 10000 # number of wins/illegals considered "recent" wins = deque() illegals = deque() nb_curves = 15 log_filename = 'rl_trainer.log' log = weight_logger.WeightLogger(log_filename, overwrite=True) try: while True: game = Game(5, 1, agent, opponent) while not game.over: game.turn() # count wins won = game.get_winning_team() == game.teams[0] wins.append(int(won))