class makersim: def __init__(self): self.env = Environment() def start_simulation(self): while True: self.env.update()
def test_save_state(self): env = Environment() agent = DQNAgent(env.actions) zeros = np.zeros((agent.q.SIZE, agent.q.SIZE), np.float32) pre_state = None for ep, s, r in env.play(agent, episode=1): state = agent.get_state() self.assertEqual(agent.q.n_history, len(state)) last_state = np.maximum(agent._observations[0], agent._observations[-1]) if s == 0: # after first action self.assertEqual(0, np.sum(zeros != agent._observations[-1])) self.assertEqual(1, len(agent._state)) if s < agent.q.n_history: # until n_history self.assertEqual(0, np.sum(last_state != state[s])) if pre_state is not None: self.assertEqual(0, np.sum(pre_state != state[s - 1])) else: # over n_history self.assertEqual(0, np.sum(last_state != state[-1])) if pre_state is not None: self.assertEqual(0, np.sum(pre_state != state[-2])) pre_state = last_state.copy()
class makersim: def __init__(self): self.env = Environment() def start_simulation(self): while True: self.env.update()
def test_format_image(self): agent = FormatAgent(self.IMG_PATH) env = Environment() for ep, s, r in env.play(agent, episode=1): pass img = Image.open(os.path.join(self.IMG_PATH, "image_0.png")) self.assertTrue(img) arr = np.asarray(img) self.assertTrue(arr.shape, (Q.SIZE, Q.SIZE))
def main(options): if len(options) == 0: usage() # Setup environment environment = Environment() environment.load() # Start GUI startGui(environment)
def train(render, gpu): env = Environment() agent = DQNAgent(env.actions, epsilon=1, model_path=PATH, on_gpu=gpu) trainer = DQNTrainer(agent) for ep, s, r in env.play(trainer, episode=10**5, render=render, report_interval=10, action_interval=4): pass
def play_game(p1: Agent, p2: Agent, env: Environment, draw=False): print("play game!") current_player =None while not env.game_over(): #alternate between players if current_player == p1: current_player = p2 else: current_player = p1 # draw the board before the user who wants to see it makes a move if draw: if draw == 1 and current_player == p1: env.draw_board() if draw == 2 and current_player == p2: env.draw_board() #make an action current_player.take_action(env) #update state history state = env.get_state() p1.update_state_history(state) p2.update_state_history(state) if draw: env.draw_board() #do the value function update p1.update(env) p2.update(env)
def auralize_from_environment(self, environment: Environment, wav_len: float): # read all data content = environment.toString() # send return self.auralize_from_content3(content, wav_len)
def get_state_hash_and_winner(env: Environment, i = 0, j = 0): results = [] for v in (0, env.x, env.o): env.board[i,j] = v #if empty board it should already be 0 if j == 2: #j goes back to 0, incerease i, unless i = 2, then we are done if i == 2: state = env.get_state() ended = env.game_over(force_recalculate=True) winner = env.winner results.append((state, winner, ended)) else: results += get_state_hash_and_winner(env, i + 1, 0) else: results += get_state_hash_and_winner(env, i, j + 1) return results;
def take_action(self, env: Environment): # choose an action based on epsilon-greedy strategy r = np.random.rand() best_state = None if r < self.eps: # take a random action if self.verbose: print("Taking a random action") possible_moves = [] for i in range(LENGTH): for j in range(LENGTH): if env.is_empty(i, j): possible_moves.append((i, j)) idx = np.random.choice(len(possible_moves)) next_move = possible_moves[idx] else: # choose the best action based on current values of states # loop through all possible moves, get their values # keep track of the best value pos2value = {} # for debugging next_move = None best_value = -1 for i in range(LENGTH): for j in range(LENGTH): if env.is_empty(i, j): # what is the state if we made this move? env.board[i, j] = self.sym state = env.get_state() env.board[i, j] = 0 # don't forget to change it back! pos2value[(i, j)] = self.V[state] if self.V[state] > best_value: best_value = self.V[state] best_state = state print(best_state) next_move = (i, j) # if verbose, draw the board w/ the values if self.verbose: print("Taking a greedy action") for i in range(LENGTH): print("------------------") for j in range(LENGTH): if env.is_empty(i, j): # print the value print(" %.2f|" % pos2value[(i, j)], end="") else: print(" ", end="") if env.board[i, j] == env.x: print("x |", end="") elif env.board[i, j] == env.o: print("o |", end="") else: print(" |", end="") print("") print("------------------") # make the move env.board[next_move[0], next_move[1]] = self.sym
def run(submit_key, gpu): env = Environment() agent = DQNAgent(env.actions, epsilon=0.01, model_path=PATH, on_gpu=gpu) path = "" episode = 5 if submit_key: print("make directory to submit result") path = os.path.join(os.path.dirname(__file__), "submit") episode = 100 for ep, s, r in env.play(agent, episode=episode, render=True, action_interval=4, record_path=path): pass if submit_key: gym.upload(path, api_key=submit_key)
def test_trainer(self): env = Environment() agent = DQNAgent(env.actions, epsilon=1, model_path=self.MODEL_PATH) trainer = DQNTrainer(agent, memory_size=100, replay_size=10, initial_exploration=2000, target_update_freq=100, epsilon_decay=0.1) global_step = -1 # because "step" of trainer is count of train, so first start is not counted last_state = [] for ep, s, r in env.play(trainer, episode=2, report_interval=1): if global_step < trainer.initial_exploration: self.assertEqual(1, trainer.agent.epsilon) else: self.assertTrue(trainer.agent.epsilon < 1) global_step += 1 last_state = agent.get_state()
def main(config_file_path): config_parser = get_config_parser(config_file_path) config = get_config(config_parser) logger = get_logger(config) with tf.Session() as sess: processor = Processor(config, logger) env = Environment(logger, config, processor.price_blocks, processor.timestamp_blocks) agent = Agent(sess, logger, config, env) agent.summary_writer.close()
def main(config_file_path): config_parser = get_config_parser(config_file_path) config = get_config(config_parser) logger = get_logger(config) with tf.Session() as sess: preprocessor = Preprocessor(config, logger) env = Environment(logger, config, preprocessor.price_blocks) agent = Agent(sess, logger, config, env) summary_writer = tf.summary.FileWriter(config[TENSORBOARD_LOG_DIR]) summary_writer.add_graph(sess.graph) summary_writer.close()
def update(self, env: Environment): # we want to BACKTRACK over the states, so that: # V(prev_state) = V(prev_state) + alpha*(V(next_state) - V(prev_state)) # where V(next_state) = reward if it's the most current state # # NOTE: we ONLY do this at the end of an episode # not so for all the algorithms we will study reward = env.reward(self.sym) target = reward for prev in reversed(self.state_history): value = self.V[prev] + self.alpha * (target - self.V[prev]) self.V[prev] = value target = value self.reset_history()
from model.agent import Agent from model.environment import Environment from model.human import Human from state_util import initialV_x, initialV_o, play_game, get_state_hash_and_winner if __name__ == '__main__': # train the agent p1 = Agent() p2 = Agent() # set initial V for p1 and p2 env = Environment() state_winner_triples = get_state_hash_and_winner(env) Vx = initialV_x(env, state_winner_triples) p1.setV(Vx) Vo = initialV_o(env, state_winner_triples) p2.setV(Vo) # give each player their symbol p1.set_symbol(env.x) p2.set_symbol(env.o) T = 1000000 for t in range(T): if t % 1000 == 0: print(t) play_game(p1, p2, Environment()) # play human vs. agent
def test_run_environment(self): env = Environment() agent = RandomAgent(env.actions) for episode, step, reward in env.play(agent, episode=1): pass
def test_cycle_agent(self): env = Environment() agent = CycleAgent(env.actions, keep_length=200) for episode, step, reward in env.play(agent, episode=3): pass
def __init__(self): self.env = Environment()
def __init__(self): self.env = Environment()
def test_funfun_defence(self): env = Environment(env_name="Pong-v0") agent = CycleAgent((2, 3), keep_length=20) for episode, step, reward in env.play(agent, episode=1): pass
def setup_class(self): self.env = Environment() self.env.feeds["SDR/ETH"] = 0.67