Пример #1
0
    encoderSharpness = 30.0

    for v in inputs:
        for i in range(0, encoderSize):
            center = i / encoderSize * 2.0 - 1.0
            delta = center - v
            intensity = np.exp(-delta * delta * encoderSharpness)

            inputArr.append(intensity)

    #reward = dir * paddleX * 0.01

    #reward = np.abs(paddleX - ballPosition[0]) < 0.1

    a.simStep(reward, 0.002, 0.98, 0.2, 1.0, np.matrix([inputArr]).T, 0.005, 0.005, 0.0005, 0.95, 0.01)

    print(a._prevValue)

    prevReward = reward

    if rewardTimer > 0.0:
        rewardTimer -= 1.0
    if punishmentTimer > 0.0:
        punishmentTimer -= 1.0

    paddleX = np.minimum(1.0, np.maximum(0.0, paddleX + 0.15 * np.sum(a.getActions()) * 0.25))

    # Render
    display.fill((255,255,255))
Пример #2
0
    for v in inputs:
        for i in range(0, encoderSize):
            center = i / encoderSize * 2.0 - 1.0
            delta = center - v
            # intensity = np.exp(-delta * delta * encoderSharpness)

            intensity = np.absolute(delta) < 0.5 / encoderSize

            inputArr.append(intensity)

    # reward = dir * paddleX * 0.01

    # reward = np.abs(paddleX - ballPosition[0]) < 0.1

    a.simStep(reward, 0.001, 0.95, 0.05, np.matrix([inputArr]).T, 0.001, 0.001, 0.01, 0.01, 0.92)

    print(a._prevValue)

    prevReward = reward

    if rewardTimer > 0.0:
        rewardTimer -= 1.0
    if punishmentTimer > 0.0:
        punishmentTimer -= 1.0

    paddleX = np.minimum(1.0, np.maximum(0.0, paddleX + 0.2 * np.sum(a.getActions()) / numActions))

    # Render
    display.fill((255, 255, 255))
Пример #3
0
    for v in inputs:
        for i in range(0, encoderSize):
            center = i / encoderSize * 2.0 - 1.0
            delta = center - v
            #intensity = np.exp(-delta * delta * encoderSharpness)

            intensity = np.absolute(delta) < 0.5 / encoderSize

            inputArr.append(intensity)

    #reward = dir * paddleX * 0.01

    #reward = np.abs(paddleX - ballPosition[0]) < 0.1

    a.simStep(reward, 0.001, 0.95, 0.05,
              np.matrix([inputArr]).T, 0.001, 0.001, 0.01, 0.01, 0.92)

    print(a._prevValue)

    prevReward = reward

    if rewardTimer > 0.0:
        rewardTimer -= 1.0
    if punishmentTimer > 0.0:
        punishmentTimer -= 1.0

    paddleX = np.minimum(
        1.0,
        np.maximum(0.0, paddleX + 0.2 * np.sum(a.getActions()) / numActions))

    # Render