Пример #1
0
# long_mem = []

mlp = MLP([4, 16, 16, 2])

# TRAINING
for i in range(games):
    state = env.reset()
    done = False
    score = 0
    short_mem = []
    # Play a game
    while not done:
        if random.random() < exploration_rate:
            action = random.randint(0, 1)
        else:
            actions = mlp.frontprop(normalize(np.array(state)))
            action = np.argmax(actions)
            # action = np.argmin(actions)
        short_mem.append((state, action))
        state, reward, done, _ = env.step(action)
        if render: env.render()
        score += 1
    # If win then learn short mem
    if score == 500:
        for state, action in short_mem:
            mlp.backprop(normalize(np.array(state)),
                         np.array([1, 0] if action == 0 else [0, 1]))
            mlp.fit()
            # long_mem.append((state, action))
    # If loose, don't
    else: