Exemplo n.º 1
0
class makersim:
    def __init__(self):
        self.env = Environment()

    def start_simulation(self):
        while True:
            self.env.update()
Exemplo n.º 2
0
    def test_save_state(self):
        env = Environment()
        agent = DQNAgent(env.actions)

        zeros = np.zeros((agent.q.SIZE, agent.q.SIZE), np.float32)
        pre_state = None
        for ep, s, r in env.play(agent, episode=1):
            state = agent.get_state()
            self.assertEqual(agent.q.n_history, len(state))
            last_state = np.maximum(agent._observations[0],
                                    agent._observations[-1])

            if s == 0:
                # after first action
                self.assertEqual(0, np.sum(zeros != agent._observations[-1]))
                self.assertEqual(1, len(agent._state))

            if s < agent.q.n_history:
                # until n_history
                self.assertEqual(0, np.sum(last_state != state[s]))
                if pre_state is not None:
                    self.assertEqual(0, np.sum(pre_state != state[s - 1]))
            else:
                # over n_history
                self.assertEqual(0, np.sum(last_state != state[-1]))
                if pre_state is not None:
                    self.assertEqual(0, np.sum(pre_state != state[-2]))

            pre_state = last_state.copy()
Exemplo n.º 3
0
class makersim:

    def __init__(self):
        self.env = Environment()

    def start_simulation(self):
        while True:
            self.env.update()
Exemplo n.º 4
0
 def test_format_image(self):
     agent = FormatAgent(self.IMG_PATH)
     env = Environment()
     for ep, s, r in env.play(agent, episode=1):
         pass
     img = Image.open(os.path.join(self.IMG_PATH, "image_0.png"))
     self.assertTrue(img)
     arr = np.asarray(img)
     self.assertTrue(arr.shape, (Q.SIZE, Q.SIZE))
Exemplo n.º 5
0
def main(options):
    if len(options) == 0:
        usage()

    # Setup environment
    environment = Environment()
    environment.load()

    # Start GUI
    startGui(environment)
Exemplo n.º 6
0
def train(render, gpu):
    env = Environment()
    agent = DQNAgent(env.actions, epsilon=1, model_path=PATH, on_gpu=gpu)
    trainer = DQNTrainer(agent)

    for ep, s, r in env.play(trainer,
                             episode=10**5,
                             render=render,
                             report_interval=10,
                             action_interval=4):
        pass
Exemplo n.º 7
0
def play_game(p1: Agent, p2: Agent, env: Environment, draw=False):
    print("play game!")
    current_player =None
    
    while not env.game_over():
        #alternate between players
        if current_player == p1:
            current_player = p2
        else:
            current_player = p1
            
        # draw the board before the user who wants to see it makes a move
        if draw:
            if draw == 1 and current_player == p1:
                env.draw_board()
            if draw == 2 and current_player == p2:
                env.draw_board()            

            
        #make an action
        current_player.take_action(env)
        
        #update state history
        state = env.get_state()
        p1.update_state_history(state)
        p2.update_state_history(state)
    
    if draw:
        env.draw_board()
        
    #do the value function update
    p1.update(env)
    p2.update(env)
    def auralize_from_environment(self, environment: Environment,
                                  wav_len: float):
        # read all data
        content = environment.toString()

        # send
        return self.auralize_from_content3(content, wav_len)
Exemplo n.º 9
0
def get_state_hash_and_winner(env: Environment, i = 0, j = 0):
    results = []
    
    for v in (0, env.x, env.o):
        env.board[i,j] = v #if empty board it should already be 0
        if j == 2:
            #j goes back to 0, incerease i, unless i = 2, then we are done
            if i == 2:
                state = env.get_state()
                ended = env.game_over(force_recalculate=True)
                winner = env.winner
                results.append((state, winner, ended))
            else:
                results += get_state_hash_and_winner(env, i + 1, 0)
        else:
            results += get_state_hash_and_winner(env, i, j + 1)
            
    return results;
Exemplo n.º 10
0
    def take_action(self, env: Environment):
        # choose an action based on epsilon-greedy strategy
        r = np.random.rand()
        best_state = None
        if r < self.eps:
            # take a random action
            if self.verbose:
                print("Taking a random action")

            possible_moves = []
            for i in range(LENGTH):
                for j in range(LENGTH):
                    if env.is_empty(i, j):
                        possible_moves.append((i, j))
            idx = np.random.choice(len(possible_moves))
            next_move = possible_moves[idx]
        else:
            # choose the best action based on current values of states
            # loop through all possible moves, get their values
            # keep track of the best value
            pos2value = {}  # for debugging
            next_move = None
            best_value = -1
            for i in range(LENGTH):
                for j in range(LENGTH):
                    if env.is_empty(i, j):
                        # what is the state if we made this move?
                        env.board[i, j] = self.sym
                        state = env.get_state()
                        env.board[i, j] = 0  # don't forget to change it back!
                        pos2value[(i, j)] = self.V[state]
                        if self.V[state] > best_value:
                            best_value = self.V[state]
                            best_state = state
                            print(best_state)
                            next_move = (i, j)

            # if verbose, draw the board w/ the values
            if self.verbose:
                print("Taking a greedy action")
                for i in range(LENGTH):
                    print("------------------")
                    for j in range(LENGTH):
                        if env.is_empty(i, j):
                            # print the value
                            print(" %.2f|" % pos2value[(i, j)], end="")
                        else:
                            print("  ", end="")
                            if env.board[i, j] == env.x:
                                print("x  |", end="")
                            elif env.board[i, j] == env.o:
                                print("o  |", end="")
                            else:
                                print("   |", end="")
                    print("")
                print("------------------")

        # make the move
        env.board[next_move[0], next_move[1]] = self.sym
Exemplo n.º 11
0
def run(submit_key, gpu):
    env = Environment()
    agent = DQNAgent(env.actions, epsilon=0.01, model_path=PATH, on_gpu=gpu)
    path = ""
    episode = 5
    if submit_key:
        print("make directory to submit result")
        path = os.path.join(os.path.dirname(__file__), "submit")
        episode = 100

    for ep, s, r in env.play(agent,
                             episode=episode,
                             render=True,
                             action_interval=4,
                             record_path=path):
        pass

    if submit_key:
        gym.upload(path, api_key=submit_key)
Exemplo n.º 12
0
    def test_trainer(self):
        env = Environment()
        agent = DQNAgent(env.actions, epsilon=1, model_path=self.MODEL_PATH)
        trainer = DQNTrainer(agent,
                             memory_size=100,
                             replay_size=10,
                             initial_exploration=2000,
                             target_update_freq=100,
                             epsilon_decay=0.1)

        global_step = -1  # because "step" of trainer is count of train, so first start is not counted
        last_state = []
        for ep, s, r in env.play(trainer, episode=2, report_interval=1):
            if global_step < trainer.initial_exploration:
                self.assertEqual(1, trainer.agent.epsilon)
            else:
                self.assertTrue(trainer.agent.epsilon < 1)

            global_step += 1
            last_state = agent.get_state()
Exemplo n.º 13
0
def main(config_file_path):
    config_parser = get_config_parser(config_file_path)
    config = get_config(config_parser)
    logger = get_logger(config)

    with tf.Session() as sess:
        processor = Processor(config, logger)
        env = Environment(logger, config, processor.price_blocks,
                          processor.timestamp_blocks)
        agent = Agent(sess, logger, config, env)

        agent.summary_writer.close()
Exemplo n.º 14
0
def main(config_file_path):
    config_parser = get_config_parser(config_file_path)
    config = get_config(config_parser)
    logger = get_logger(config)

    with tf.Session() as sess:
        preprocessor = Preprocessor(config, logger)
        env = Environment(logger, config, preprocessor.price_blocks)
        agent = Agent(sess, logger, config, env)

        summary_writer = tf.summary.FileWriter(config[TENSORBOARD_LOG_DIR])
        summary_writer.add_graph(sess.graph)
        summary_writer.close()
Exemplo n.º 15
0
 def update(self, env: Environment):
     # we want to BACKTRACK over the states, so that:
     # V(prev_state) = V(prev_state) + alpha*(V(next_state) - V(prev_state))
     # where V(next_state) = reward if it's the most current state
     #
     # NOTE: we ONLY do this at the end of an episode
     # not so for all the algorithms we will study
     reward = env.reward(self.sym)
     target = reward
     for prev in reversed(self.state_history):
         value = self.V[prev] + self.alpha * (target - self.V[prev])
         self.V[prev] = value
         target = value
     self.reset_history()
Exemplo n.º 16
0
from model.agent import Agent
from model.environment import Environment
from model.human import Human
from state_util import initialV_x, initialV_o, play_game, get_state_hash_and_winner

if __name__ == '__main__':
    # train the agent
    p1 = Agent()
    p2 = Agent()

    # set initial V for p1 and p2
    env = Environment()
    state_winner_triples = get_state_hash_and_winner(env)


    Vx = initialV_x(env, state_winner_triples)
    p1.setV(Vx)
    Vo = initialV_o(env, state_winner_triples)
    p2.setV(Vo)

    # give each player their symbol
    p1.set_symbol(env.x)
    p2.set_symbol(env.o)

    T = 1000000
    for t in range(T):
        if t % 1000 == 0:
            print(t)
    play_game(p1, p2, Environment())
    
    # play human vs. agent
Exemplo n.º 17
0
 def test_run_environment(self):
     env = Environment()
     agent = RandomAgent(env.actions)
     for episode, step, reward in env.play(agent, episode=1):
         pass
Exemplo n.º 18
0
 def test_cycle_agent(self):
     env = Environment()
     agent = CycleAgent(env.actions, keep_length=200)
     for episode, step, reward in env.play(agent, episode=3):
         pass
Exemplo n.º 19
0
 def __init__(self):
     self.env = Environment()
Exemplo n.º 20
0
 def __init__(self):
     self.env = Environment()
Exemplo n.º 21
0
    def test_funfun_defence(self):
        env = Environment(env_name="Pong-v0")
        agent = CycleAgent((2, 3), keep_length=20)

        for episode, step, reward in env.play(agent, episode=1):
            pass
Exemplo n.º 22
0
 def setup_class(self):
     self.env = Environment()
     self.env.feeds["SDR/ETH"] = 0.67