def computeRewards(states, reward_halflife=2.0): # reward_halflife is measured in seconds fps = 60.0 discount = 0.5**(1.0 / (fps * reward_halflife)) kills = [isDying(state.players[0]) for state in states] deaths = [isDying(state.players[1]) for state in states] # print(states[random.randint(0, len(states))].players[0]) kills = processDeaths(kills) deaths = processDeaths(deaths) # print("Deaths for current memory: ", sum(deaths)) # print("Kills for current memory: ", sum(kills)) damage_dealt = [ max(states[i + 1].players[0].percent - states[i].players[0].percent, 0) for i in range(len(states) - 1) ] # damage_dealt = util.zipWith(lambda prev, next: max(next.players[0].percent - prev.players[0].percent, 0), states[:-1], states[1:]) scores = util.zipWith(lambda x, y: x - y, kills[1:], deaths[1:]) final_scores = util.zipWith(lambda x, y: x + y / 100, scores, damage_dealt) # print("Damage for current memory: ", sum(damage_dealt)) # print("Scores for current memory: ", final_scores[:1000]) # use last action taken? lastQ = max(scoreActions(states[-1])) discounted_rewards = util.scanr(lambda r1, r2: r1 + discount * r2, lastQ, final_scores)[:-1] # print("discounted_rewards for current memory: ", discounted_rewards[:]) return discounted_rewards
def computeRewards(states, reward_halflife = 2.0): # reward_halflife is measured in seconds fps = 60.0 discount = 0.5 ** ( 1.0 / (fps*reward_halflife) ) kills = [isDying(state.players[0]) for state in states] deaths = [isDying(state.players[1]) for state in states] # print(states[random.randint(0, len(states))].players[0]) kills = processDeaths(kills) deaths = processDeaths(deaths) # print("Deaths for current memory: ", sum(deaths)) # print("Kills for current memory: ", sum(kills)) damage_dealt = [max(states[i+1].players[0].percent - states[i].players[0].percent, 0) for i in range(len(states)-1)] # damage_dealt = util.zipWith(lambda prev, next: max(next.players[0].percent - prev.players[0].percent, 0), states[:-1], states[1:]) scores = util.zipWith(lambda x, y: x - y, kills[1:], deaths[1:]) final_scores = util.zipWith(lambda x, y: x + y / 100, scores, damage_dealt) # print("Damage for current memory: ", sum(damage_dealt)) # print("Scores for current memory: ", final_scores[:1000]) # use last action taken? lastQ = max(scoreActions(states[-1])) discounted_rewards = util.scanr(lambda r1, r2: r1 + discount * r2, lastQ, final_scores)[:-1] # print("discounted_rewards for current memory: ", discounted_rewards[:]) return discounted_rewards
def processDeaths(deaths): return util.zipWith(lambda prev, next: (not prev) and next, [False] + deaths[:-1], deaths)
def processDeaths(deaths): return np.array(util.zipWith(lambda prev, next: float((not prev) and next), deaths, deaths[1:]))
def processDamages(percents): return np.array(util.zipWith(lambda prev, next: max(next-prev, 0), percents, percents[1:]))
def processDeaths(deaths): return util.zipWith(lambda prev, next: (not prev) and next, [False] + deaths[:-1] , deaths)