delta = center - v intensity = np.exp(-delta * delta * encoderSharpness) inputArr.append(intensity) #reward = dir * paddleX * 0.01 #reward = np.abs(paddleX - ballPosition[0]) < 0.1 a.simStep(reward, 0.002, 0.98, 0.2, 1.0, np.matrix([inputArr]).T, 0.005, 0.005, 0.0005, 0.95, 0.01) print(a._prevValue) prevReward = reward if rewardTimer > 0.0: rewardTimer -= 1.0 if punishmentTimer > 0.0: punishmentTimer -= 1.0 paddleX = np.minimum(1.0, np.maximum(0.0, paddleX + 0.15 * np.sum(a.getActions()) * 0.25)) # Render display.fill((255,255,255)) display.blit(paddleImage, (displayWidth * paddleX - 64.0, displayHeight - 32.0)) display.blit(ballImage, (displayWidth * ballPosition[0] - 16.0, displayHeight * (1.0 - ballPosition[1]) - 16.0)) pygame.display.flip() clock.tick(60)
intensity = np.absolute(delta) < 0.5 / encoderSize inputArr.append(intensity) # reward = dir * paddleX * 0.01 # reward = np.abs(paddleX - ballPosition[0]) < 0.1 a.simStep(reward, 0.001, 0.95, 0.05, np.matrix([inputArr]).T, 0.001, 0.001, 0.01, 0.01, 0.92) print(a._prevValue) prevReward = reward if rewardTimer > 0.0: rewardTimer -= 1.0 if punishmentTimer > 0.0: punishmentTimer -= 1.0 paddleX = np.minimum(1.0, np.maximum(0.0, paddleX + 0.2 * np.sum(a.getActions()) / numActions)) # Render display.fill((255, 255, 255)) display.blit(paddleImage, (displayWidth * paddleX - 64.0, displayHeight - 32.0)) display.blit(ballImage, (displayWidth * ballPosition[0] - 16.0, displayHeight * (1.0 - ballPosition[1]) - 16.0)) pygame.display.flip() clock.tick(60)
#reward = np.abs(paddleX - ballPosition[0]) < 0.1 a.simStep(reward, 0.001, 0.95, 0.05, np.matrix([inputArr]).T, 0.001, 0.001, 0.01, 0.01, 0.92) print(a._prevValue) prevReward = reward if rewardTimer > 0.0: rewardTimer -= 1.0 if punishmentTimer > 0.0: punishmentTimer -= 1.0 paddleX = np.minimum( 1.0, np.maximum(0.0, paddleX + 0.2 * np.sum(a.getActions()) / numActions)) # Render display.fill((255, 255, 255)) display.blit(paddleImage, (displayWidth * paddleX - 64.0, displayHeight - 32.0)) display.blit(ballImage, (displayWidth * ballPosition[0] - 16.0, displayHeight * (1.0 - ballPosition[1]) - 16.0)) pygame.display.flip() clock.tick(60)