encoderSharpness = 30.0 for v in inputs: for i in range(0, encoderSize): center = i / encoderSize * 2.0 - 1.0 delta = center - v intensity = np.exp(-delta * delta * encoderSharpness) inputArr.append(intensity) #reward = dir * paddleX * 0.01 #reward = np.abs(paddleX - ballPosition[0]) < 0.1 a.simStep(reward, 0.002, 0.98, 0.2, 1.0, np.matrix([inputArr]).T, 0.005, 0.005, 0.0005, 0.95, 0.01) print(a._prevValue) prevReward = reward if rewardTimer > 0.0: rewardTimer -= 1.0 if punishmentTimer > 0.0: punishmentTimer -= 1.0 paddleX = np.minimum(1.0, np.maximum(0.0, paddleX + 0.15 * np.sum(a.getActions()) * 0.25)) # Render display.fill((255,255,255))
for v in inputs: for i in range(0, encoderSize): center = i / encoderSize * 2.0 - 1.0 delta = center - v # intensity = np.exp(-delta * delta * encoderSharpness) intensity = np.absolute(delta) < 0.5 / encoderSize inputArr.append(intensity) # reward = dir * paddleX * 0.01 # reward = np.abs(paddleX - ballPosition[0]) < 0.1 a.simStep(reward, 0.001, 0.95, 0.05, np.matrix([inputArr]).T, 0.001, 0.001, 0.01, 0.01, 0.92) print(a._prevValue) prevReward = reward if rewardTimer > 0.0: rewardTimer -= 1.0 if punishmentTimer > 0.0: punishmentTimer -= 1.0 paddleX = np.minimum(1.0, np.maximum(0.0, paddleX + 0.2 * np.sum(a.getActions()) / numActions)) # Render display.fill((255, 255, 255))
for v in inputs: for i in range(0, encoderSize): center = i / encoderSize * 2.0 - 1.0 delta = center - v #intensity = np.exp(-delta * delta * encoderSharpness) intensity = np.absolute(delta) < 0.5 / encoderSize inputArr.append(intensity) #reward = dir * paddleX * 0.01 #reward = np.abs(paddleX - ballPosition[0]) < 0.1 a.simStep(reward, 0.001, 0.95, 0.05, np.matrix([inputArr]).T, 0.001, 0.001, 0.01, 0.01, 0.92) print(a._prevValue) prevReward = reward if rewardTimer > 0.0: rewardTimer -= 1.0 if punishmentTimer > 0.0: punishmentTimer -= 1.0 paddleX = np.minimum( 1.0, np.maximum(0.0, paddleX + 0.2 * np.sum(a.getActions()) / numActions)) # Render