示例#1
0
class RLController:
    def __init__(self, gamma=0.9, alpha=0.01, epsilon=0.1):
        # Initialize agent
        self.agent = RLAgent(gamma, alpha, epsilon)
        self.reset_episode()

    def reset_episode(self):
        #print("RLController.reset_episode()")  # [debug]
        self.last_timestamp = 0.0
        self.last_position = 0.0

    def set_params(self, gamma, alpha, epsilon):
        self.agent.set_params(gamma, alpha, epsilon)

    def set_target(self, target):
        print("RLController.set_target({})".format(target))  # [debug]
        self.target = target

    def update(self, timestamp, position, done):
        # Compute delta_time
        delta_time = timestamp - self.last_timestamp
        self.last_timestamp = timestamp

        # Compute delta_position
        delta_position = position - self.last_position
        self.last_position = position

        # Check for t = 0 or dt = 0 (special case: send action = 0)
        if timestamp == 0.0 or delta_time <= 0.0:
            return 0.0

        # Prepare state vector
        velocity = delta_position / delta_time
        state = np.array([position, velocity, self.target]).reshape(1, -1)

        # Compute reward / penalty (note: current action's effect may be delayed)
        reward = -min(abs(self.target - position), 20.0)

        # Take one RL step, passing in current state and reward, and return action
        action = self.agent.step(state, reward, done)
        if done:
            self.reset_episode()
        return np.clip(action, -50.0, 50.0)  # clamp final action
示例#2
0
def play(hparams):
    gomoku = Gomoku(hparams)
    render = GameRender(gomoku, hparams)

    # change the AI here, bigger the depth stronger the AI
    ai = RLAgent(gomoku, BoardState.BLACK)

    result = BoardState.EMPTY

    # AI plays first
    ai.first_step()
    result = gomoku.get_chess_result()
    render.change_state()

    while True:
        if hparams['enable_second_ai']:
            result = gomoku.get_chess_result()
            if result != BoardState.EMPTY:
                print(result, "wins")
                break
            if hparams['enable_ai']:
                ai.one_step()
                result = gomoku.get_chess_result()
                if result != BoardState.EMPTY:
                    print(result, "wins")
                    break
            else:
                render.change_state()
        # pygame event, player vs. ai section
        for event in pygame.event.get():
            if event.type == pygame.QUIT:
                exit()
            elif event.type == pygame.MOUSEBUTTONDOWN:
                if render.one_step():
                    result = gomoku.get_chess_result()
                else:
                    continue
                if result != BoardState.EMPTY:
                    break
                if hparams['enable_ai']:
                    ai.one_step()
                    result = gomoku.get_chess_result()
                else:
                    render.change_state()
            else:
                continue
        render.draw_chess()
        render.draw_mouse()

        if result != BoardState.EMPTY:
            render.draw_result(result)

        pygame.display.update()
print("2. we train the agent on that trajectory.")
print(
    "3. we test that trained agent on new data from the same DGP: we show that its performance does NOT generalize well (as predicted)."
)

out_samplePnLs = []

# 1. generate a non-stationary "historical" trajectory
x, y = tvdgp.generateDGP(N)
spread = y - x
plt.plot(spread)
plt.show()

print("Training the agent...")
# 2. train the agent on that trajectory, show that it learned some optimum
agent = RLAgent(2, 3)
training_pnls = []
DELTA = 20
for j in range(NUM_TRAINING_ITERATIONS):
    training_pnl = run_simulation(x, y, agent, True)
    training_pnls.append(training_pnl)
    if j % DELTA == 0:
        agent.replay()

        pct_progress = (float(j) / float(NUM_TRAINING_ITERATIONS)) * 100.0
        if j == 0:
            print(f"pct_progress = {pct_progress} %")
        else:
            print(
                f"pct_progress = {pct_progress} % (current average P&L is {np.mean(training_pnls[-DELTA:])})"
            )
示例#4
0
 def __init__(self, gamma=0.9, alpha=0.01, epsilon=0.1):
     # Initialize agent
     self.agent = RLAgent(gamma, alpha, epsilon)
     self.reset_episode()
#!/usr/bin/env python3

from game import Game
from rl_agent import RLAgent
from random_agent import Agent
import weight_logger, weight_plotter

import sys
from collections import deque

if __name__ == '__main__':
    agent = RLAgent()
    opponent = Agent(1)  # random agent
    nb_games = 0
    history_length = 10000  # number of wins/illegals considered "recent"
    wins = deque()
    illegals = deque()

    nb_curves = 15
    log_filename = 'rl_trainer.log'
    log = weight_logger.WeightLogger(log_filename, overwrite=True)

    try:
        while True:
            game = Game(5, 1, agent, opponent)
            while not game.over:
                game.turn()

            # count wins
            won = game.get_winning_team() == game.teams[0]
            wins.append(int(won))