Пример #1
0
            delta = center - v
            intensity = np.exp(-delta * delta * encoderSharpness)

            inputArr.append(intensity)

    #reward = dir * paddleX * 0.01

    #reward = np.abs(paddleX - ballPosition[0]) < 0.1

    a.simStep(reward, 0.002, 0.98, 0.2, 1.0, np.matrix([inputArr]).T, 0.005, 0.005, 0.0005, 0.95, 0.01)

    print(a._prevValue)

    prevReward = reward

    if rewardTimer > 0.0:
        rewardTimer -= 1.0
    if punishmentTimer > 0.0:
        punishmentTimer -= 1.0

    paddleX = np.minimum(1.0, np.maximum(0.0, paddleX + 0.15 * np.sum(a.getActions()) * 0.25))

    # Render
    display.fill((255,255,255))

    display.blit(paddleImage, (displayWidth * paddleX - 64.0, displayHeight - 32.0))
    display.blit(ballImage, (displayWidth * ballPosition[0] - 16.0, displayHeight * (1.0 - ballPosition[1]) - 16.0))

    pygame.display.flip()
    clock.tick(60)
Пример #2
0
            intensity = np.absolute(delta) < 0.5 / encoderSize

            inputArr.append(intensity)

    # reward = dir * paddleX * 0.01

    # reward = np.abs(paddleX - ballPosition[0]) < 0.1

    a.simStep(reward, 0.001, 0.95, 0.05, np.matrix([inputArr]).T, 0.001, 0.001, 0.01, 0.01, 0.92)

    print(a._prevValue)

    prevReward = reward

    if rewardTimer > 0.0:
        rewardTimer -= 1.0
    if punishmentTimer > 0.0:
        punishmentTimer -= 1.0

    paddleX = np.minimum(1.0, np.maximum(0.0, paddleX + 0.2 * np.sum(a.getActions()) / numActions))

    # Render
    display.fill((255, 255, 255))

    display.blit(paddleImage, (displayWidth * paddleX - 64.0, displayHeight - 32.0))
    display.blit(ballImage, (displayWidth * ballPosition[0] - 16.0, displayHeight * (1.0 - ballPosition[1]) - 16.0))

    pygame.display.flip()
    clock.tick(60)
Пример #3
0
    #reward = np.abs(paddleX - ballPosition[0]) < 0.1

    a.simStep(reward, 0.001, 0.95, 0.05,
              np.matrix([inputArr]).T, 0.001, 0.001, 0.01, 0.01, 0.92)

    print(a._prevValue)

    prevReward = reward

    if rewardTimer > 0.0:
        rewardTimer -= 1.0
    if punishmentTimer > 0.0:
        punishmentTimer -= 1.0

    paddleX = np.minimum(
        1.0,
        np.maximum(0.0, paddleX + 0.2 * np.sum(a.getActions()) / numActions))

    # Render
    display.fill((255, 255, 255))

    display.blit(paddleImage,
                 (displayWidth * paddleX - 64.0, displayHeight - 32.0))
    display.blit(ballImage,
                 (displayWidth * ballPosition[0] - 16.0, displayHeight *
                  (1.0 - ballPosition[1]) - 16.0))

    pygame.display.flip()
    clock.tick(60)