Exemplo n.º 1
0
def main(_):
    # initialize parameters
    DATA_DIR = ""
    NUM_ACTIONS = 3  # number of valid actions (left, stay, right)
    GAMMA = 0.99  # decay rate of past observations
    INITIAL_EPSILON = 0.1  # starting value of epsilon
    FINAL_EPSILON = 0.0001  # final value of epsilon
    MEMORY_SIZE = 50000  # number of previous transitions to remember
    NUM_EPOCHS_OBSERVE = 1
    NUM_EPOCHS_TRAIN = 1000

    BATCH_SIZE = 1
    NUM_EPOCHS = NUM_EPOCHS_OBSERVE + NUM_EPOCHS_TRAIN

    model_file = "pong_net"

    if os.path.exists(model_file):
        # load the model
        pass
    else:
        # build the model
        pass

    pong_game = PongGame()
    experience = collections.deque(maxlen=MEMORY_SIZE)

    num_games, num_wins = 0, 0
    epsilon = INITIAL_EPSILON

    for e in range(NUM_EPOCHS):
        loss = 0.0
        pong_game.reset_game()

        # get first state
        a_0 = 1  # (0 = left, 1 = stay, 2 = right)
        x_t, r_0, game_over = pong_game.step(a_0)
        s_t = preprocess_frames(x_t)

        while not game_over:
            pass
Exemplo n.º 2
0
import numpy as np
from pong import PongGame
if __name__ == '__main__':
	env = PongGame(competitive = False)
	obs = env.reset()
	while 1:
		env.render(mode='human')
		obs, reward, dones, win = env.step([np.random.uniform(-1,1),np.random.uniform(-1,1)])