def cal_target_ppo_2(prev_state, cur_state, next_state, hero_name, rival_hero_name, line_idx): LineModel_PPO1.assert_tower_in_input(cur_state, hero_name, rival_hero_name) # 只计算当前帧的得失,得失为金币获取情况 + 敌方血量变化 # 获得小兵死亡情况, 根据小兵属性计算他们的金币情况 cur_rival_hero = cur_state.get_hero(rival_hero_name) rival_team = cur_rival_hero.team cur_hero = cur_state.get_hero(hero_name) cur_rival_hero = cur_state.get_hero(rival_hero_name) next_hero = next_state.get_hero(hero_name) next_rival_hero = next_state.get_hero(rival_hero_name) # 找到英雄附近死亡的敌方小兵 dead_units = StateUtil.get_dead_units_in_line( next_state, rival_team, line_idx, cur_hero, StateUtil.GOLD_GAIN_RADIUS) dead_golds = sum([ StateUtil.get_unit_value(u.unit_name, u.cfg_id) for u in dead_units ]) # 如果英雄有小额金币变化,则忽略 gold_delta = next_hero.gold - cur_hero.gold if gold_delta % 10 == 3 or gold_delta % 10 == 8 or gold_delta == int( dead_golds / 2) + 3: gold_delta -= 3 # 很难判断英雄的最后一击,所以我们计算金币变化,超过死亡单位一半的金币作为英雄获得金币 if gold_delta > 0: gold_delta = gold_delta * 2 - dead_golds if gold_delta < 0: print('获得击杀金币不应该小于零', cur_state.tick, 'dead_golds', dead_golds, 'gold_delta', (next_hero.gold - cur_hero.gold)) gold_delta = 0 # if dead_golds > 0: # print('dead_gold', dead_golds, 'delta_gold', gold_delta, "hero", hero_name, "tick", cur_state.tick) reward = float(gold_delta) / 100 # 将所有奖励缩小 final_reward = reward / 100 final_reward = min(max(final_reward, -1), 1) # 特殊奖励,放在最后面 # 英雄击杀最后一击,直接最大奖励(因为gamma的存在,扩大这个惩罚) if cur_rival_hero.hp > 0 and next_rival_hero.hp <= 0: # print('对线英雄%s死亡' % rival_hero_name) dmg_hit_rival = next_state.get_hero_total_dmg( hero_name, rival_hero_name) if dmg_hit_rival > 0: # print('英雄%s对对方造成了最后一击' % hero_name) final_reward = 1 if cur_hero.hp > 0 and next_hero.hp <= 0: final_reward = 0 elif cur_hero.hp > 0 and next_hero.hp <= 0: print('英雄死亡') final_reward = -1 return final_reward
def cal_target_ppo(prev_state, cur_state, next_state, hero_name, rival_hero_name, line_idx): # 只计算当前帧的得失,得失为金币获取情况 + 敌方血量变化 # 获得小兵死亡情况, 根据小兵属性计算他们的金币情况 cur_rival_hero = cur_state.get_hero(rival_hero_name) rival_team = cur_rival_hero.team cur_hero = cur_state.get_hero(hero_name) cur_rival_hero = cur_state.get_hero(rival_hero_name) next_hero = next_state.get_hero(hero_name) next_rival_hero = next_state.get_hero(rival_hero_name) # 找到英雄附近死亡的敌方小兵 dead_units = StateUtil.get_dead_units_in_line( next_state, rival_team, line_idx, cur_hero, StateUtil.GOLD_GAIN_RADIUS) dead_golds = sum([ StateUtil.get_unit_value(u.unit_name, u.cfg_id) for u in dead_units ]) dead_unit_str = (','.join([u.unit_name for u in dead_units])) # 如果英雄有小额金币变化,则忽略 gold_delta = next_hero.gold - cur_hero.gold if gold_delta % 10 == 3 or gold_delta % 10 == 8 or gold_delta == int( dead_golds / 2) + 3: gold_delta -= 3 # 很难判断英雄的最后一击,所以我们计算金币变化,超过死亡单位一半的金币作为英雄获得金币 gold_delta = gold_delta * 2 - dead_golds if gold_delta < 0: print('获得击杀金币不应该小于零', cur_state.tick, 'dead_units', dead_unit_str, 'gold_gain', (next_hero.gold - cur_hero.gold)) gold_delta = 0 # if dead_golds > 0: # print('dead_gold', dead_golds, 'delta_gold', gold_delta, "hero", hero_name, "tick", cur_state.tick) # 计算对指定敌方英雄造成的伤害,计算接受的伤害 # 伤害信息和击中信息都有延迟,在两帧之后(但是一般会出现在同一条信息中,偶尔也会出现在第二条中) # 这里只计算下一帧中英雄对对方造成的伤害 # 扩大自己受到伤害的惩罚 # 扩大对方低血量下受到伤害的奖励 # 扩大攻击伤害的权重 # TODO 防御型辅助型法术的定义,辅助法术不能乱放,否则惩罚 dmg = next_state.get_hero_total_dmg( hero_name, rival_hero_name) / float(cur_rival_hero.maxhp) dmg *= 3 * cur_rival_hero.maxhp / float(cur_rival_hero.hp + cur_rival_hero.maxhp) # 估算玩家接收的伤害时候,只考虑下一帧中的变化,像塔的攻击需要飞行所有有延迟这种情况这里不需要考虑 self_hp_loss = (cur_hero.hp - next_hero.hp) / float(cur_hero.maxhp) / 2 if ( cur_hero.hp >= next_hero.hp >= next_hero.hp) else 0 self_hp_loss *= 3 * cur_hero.maxhp / float(cur_hero.hp + cur_hero.maxhp) dmg_delta = int((dmg - self_hp_loss) * LineModel.REWARD_RIVAL_DMG) # 统计和更新变量 # print('reward debug info, hero: %s, max_gold: %s, gold_gain: %s, dmg: %s, hp_loss: %s, dmg_delta: %s, ' # 'dead_units: %s' # % ( # hero_name, str(dead_golds), str(gold_delta), str(dmg), str(self_hp_loss), str(dmg_delta), dead_unit_str)) # 最大奖励是击杀小兵和塔的金币加上对方一条命血量的奖励 # 最大惩罚是被对方造成了一条命伤害 # 零分为获得了所有的死亡奖励 reward = float(gold_delta + dmg_delta) / 100 # 特殊情况处理 # 鼓励攻击对方小兵,塔 if_hit_unit = next_state.if_hero_hit_any_unit(hero_name, rival_hero_name) if if_hit_unit is not None: # print("物理攻击到了小兵", if_hit_unit) reward += 0.01 if_hit_tower = next_state.if_hero_hit_tower(hero_name) if if_hit_tower is not None: # print("物理攻击到了塔", if_hit_tower) reward += 0.01 # 将所有奖励缩小 final_reward = reward / 10 final_reward = min(max(final_reward, -1), 1) # 特殊奖励,放在最后面 # 英雄击杀最后一击,直接最大奖励(因为gamma的存在,扩大这个惩罚) if cur_rival_hero.hp > 0 and next_rival_hero.hp <= 0: # print('对线英雄%s死亡' % rival_hero_name) dmg_hit_rival = next_state.get_hero_total_dmg( hero_name, rival_hero_name) if dmg_hit_rival > 0: # print('英雄%s对对方造成了最后一击' % hero_name) final_reward = 1 if cur_hero.hp > 0 and next_hero.hp <= 0: final_reward = 0 elif cur_hero.hp > 0 and next_hero.hp <= 0: print('英雄死亡') final_reward = -5 return final_reward
def cal_target_v3(state_infos, state_idx, hero_name, rival_hero_name, line_idx): # 只计算当前帧的得失,得失为金币获取情况,敌我血量变化 # 获得小兵死亡情况, 根据小兵属性计算他们的金币情况 cur_state = state_infos[state_idx] cur_hero = cur_state.get_hero(hero_name) cur_rival_hero = cur_state.get_hero(rival_hero_name) rival_team = cur_rival_hero.team cur_hero = cur_state.get_hero(hero_name) cur_rival_hero = cur_state.get_hero(rival_hero_name) next_state = state_infos[state_idx + 1] next_hero = next_state.get_hero(hero_name) next_next_state = state_infos[state_idx + 2] dead_units = StateUtil.get_dead_units_in_line(next_state, rival_team, line_idx) dead_golds = sum([ StateUtil.get_unit_value(u.unit_name, u.cfg_id) for u in dead_units ]) dead_unit_str = (','.join([u.unit_name for u in dead_units])) # 如果英雄有小额金币变化,则忽略 gold_delta = next_hero.gold - cur_hero.gold if gold_delta % 10 == 3 or gold_delta % 10 == 8 or gold_delta == int( dead_golds / 2) + 3: gold_delta -= 3 # 忽略英雄死亡的奖励金,这部分金币在其他地方计算 # 这里暂时将英雄获得金币清零了,因为如果英雄表现好(最后一击,会在后面有所加成) # TODO 这个金币奖励值应该是个变化值,目前取的是最小值 prev_state_rival = state_infos[state_idx - 1].get_hero(rival_hero_name) if prev_state_rival.hp > 0 and cur_rival_hero.hp <= 0 and gold_delta >= 80 > dead_golds: print("敌方英雄死亡奖励,扣减") gold_delta = int(dead_golds / 2) # 计算对指定敌方英雄造成的伤害,计算接受的伤害 # 伤害信息和击中信息都有延迟,在两帧之后 # 扩大自己受到伤害的惩罚 # 扩大对方低血量下受到伤害的奖励 # 扩大攻击伤害的权重 # TODO 防御型辅助型法术的定义 dmg = next_next_state.get_hero_total_dmg( hero_name, rival_hero_name) / float(cur_rival_hero.maxhp) if float(cur_rival_hero.hp) / cur_rival_hero.maxhp <= 0.3: dmg *= 3 self_hp_loss = (cur_hero.hp - next_hero.hp) / float( cur_hero.maxhp) if cur_hero.hp > next_hero.hp else 0 # self_hp_loss *= 1.5 dmg_delta = int((dmg - self_hp_loss) * LineModel.REWARD_RIVAL_DMG) # 统计和更新变量 print( 'reward debug info, hero: %s, max_gold: %s, gold_gain: %s, dmg: %s, hp_loss: %s, dmg_delta: %s, dead_units: %s' % (hero_name, str(dead_golds), str(gold_delta), str(dmg), str(self_hp_loss), str(dmg_delta), dead_unit_str)) # 最大奖励是击杀小兵和塔的金币加上对方一条命血量的奖励 # 最大惩罚是被对方造成了一条命伤害 # 零分为获得了所有的死亡奖励 max_score = dead_golds + LineModel.REWARD_RIVAL_DMG / 6 min_score = -LineModel.REWARD_RIVAL_DMG / 6 mid_score = int(dead_golds / 2) hero_score = gold_delta + dmg_delta reward = 0 if hero_score > mid_score: reward = (hero_score - mid_score) / float(max_score - mid_score) elif hero_score < mid_score: reward = -(mid_score - hero_score) / float(mid_score - min_score) # 特殊情况处理 # 撤退的话首先将惩罚值设置为-0.2吧 cur_state = state_infos[state_idx] hero_action = cur_state.get_hero_action(hero_name) if hero_action.output_index == 48: if float(cur_hero.hp) / cur_hero.maxhp > 0.7: print('高血量撤退') reward = -1 else: print('撤退基础惩罚') reward = -0.2 # 特定英雄的大招必须要打到英雄才行 if_cast_ultimate_skill = RewardUtil.if_cast_skill( state_infos, state_idx, hero_name, 3) if if_cast_ultimate_skill: if_skill_hit_rival = RewardUtil.if_skill_hit_hero( state_infos, state_idx, hero_name, 3, rival_hero_name) if not if_skill_hit_rival: print('特定英雄的大招必须要打到英雄才行') reward = -1 # 被塔攻击情况下,只有杀死对方才不会有惩罚,否则最高惩罚。只看当前帧 hit_by_tower = RewardUtil.if_hit_by_tower(state_infos, state_idx, 3, hero_name) if_rival_dead = RewardUtil.if_hero_dead(state_infos, state_idx, 3, rival_hero_name) if hit_by_tower and not if_rival_dead: print('被塔攻击情况下,只有杀死对方才不会有惩罚') reward = -1 # 英雄死亡直接返回-1 if_hero_dead = RewardUtil.if_hero_dead(state_infos, state_idx, 6, hero_name) if if_hero_dead: print('英雄死亡') reward = -1 # 是否离线太远 cur_state = state_infos[state_idx] leave_line = RewardUtil.if_hero_leave_line(state_infos, state_idx, hero_name, line_idx) if leave_line: print('离线太远') reward = -1 # 暂时忽略模型选择立刻离开选择范围这种情况,让英雄可以在危险时候拉远一些距离 if RewardUtil.if_leave_linemodel_range(state_infos, state_idx, hero_name, line_idx): if hero_action.output_index != 48: print('离开模型范围,又不是撤退') reward = -1 # 是否高血量回城 go_town_high_hp = RewardUtil.if_return_town_high_hp( state_infos, state_idx, hero_name, 0.3) if go_town_high_hp: print('高血量回城') reward = -1 # 是否回城被打断 go_town_break = RewardUtil.if_return_town_break( state_infos, state_idx, hero_name) if go_town_break: print('回城被打断') reward = -1 # 特殊奖励,放在最后面 # 英雄击杀最后一击,直接最大奖励 cur_state = state_infos[state_idx] cur_rival_hero = cur_state.get_hero(rival_hero_name) next_state = state_infos[state_idx + 1] next_rival = next_state.get_hero(rival_hero_name) if cur_rival_hero.hp > 0 and next_rival.hp <= 0: print('对线英雄%s死亡' % rival_hero_name) next_next_state = state_infos[state_idx + 2] dmg_hit_rival = next_next_state.get_hero_total_dmg( hero_name, rival_hero_name) if dmg_hit_rival > 0: print('英雄%s对对方造成了最后一击' % hero_name) reward = 1 return min(max(reward, -1), 1)
def cal_target_v2(state_infos, state_idx, hero_name, rival_hero_name, line_idx): state_max_golds = [] state_gold_gains = [] state_dmg_deltas = [] state_score = [] dead_unit_list = [] # 首先计算每个英雄的获得情况 cur_state = state_infos[state_idx] if cur_state.tick >= 592548: db = True cur_hero = cur_state.get_hero(hero_name) cur_rival_hero = cur_state.get_hero(rival_hero_name) rival_team = cur_rival_hero.team for i in range(1, 10): # 获得小兵死亡情况, 根据小兵属性计算他们的金币情况 cur_hero = cur_state.get_hero(hero_name) cur_rival_hero = cur_state.get_hero(rival_hero_name) next_state = state_infos[state_idx + i] next_hero = next_state.get_hero(hero_name) next_next_state = state_infos[state_idx + i + 1] dead_units = StateUtil.get_dead_units_in_line( next_state, rival_team, line_idx) dead_golds = sum([ StateUtil.get_unit_value(u.unit_name, u.cfg_id) for u in dead_units ]) state_max_golds.append(dead_golds) dead_unit_list.append(','.join([u.unit_name for u in dead_units])) # 如果英雄有小额金币变化,则忽略 gold_delta = next_hero.gold - cur_hero.gold if gold_delta % 10 == 3 or gold_delta % 10 == 8 or gold_delta == int( dead_golds / 2) + 3: gold_delta -= 3 # 忽略英雄死亡的奖励金,这部分金币在其他地方计算 # 这里暂时将英雄获得金币清零了,因为如果英雄表现好(最后一击,会在后面有所加成) # TODO 这个金币奖励值应该是个变化值,目前取的是最小值 if gold_delta >= 200 > dead_golds: gold_delta = int(dead_golds / 2) state_gold_gains.append(gold_delta) # 计算对指定敌方英雄造成的伤害,计算接受的伤害 # 伤害信息和击中信息都有延迟,在两帧之后 # 扩大自己受到伤害的惩罚 # 扩大对方低血量下受到伤害的奖励 # 扩大攻击伤害的权重 dmg = next_next_state.get_hero_total_dmg(hero_name, rival_hero_name) if float(cur_rival_hero.hp) / cur_rival_hero.maxhp <= 0.3: dmg *= 3 self_dmg = cur_hero.hp - next_hero.hp if cur_hero.hp > next_hero.hp else 0 self_dmg *= 1.5 dmg_delta = int( float(dmg - self_dmg) / cur_rival_hero.maxhp * LineModel.REWARD_RIVAL_DMG) dmg_delta *= 6 state_dmg_deltas.append(dmg_delta) # 统计和更新变量 state_score.append(gold_delta + dmg_delta) cur_state = next_state print( 'reward debug info, hero: %s, max_gold: %s, gold_gain: %s, dmg_delta: %s, dead_units: %s' % (hero_name, ','.join([str(s) for s in state_max_golds]), ','.join([str(s) for s in state_gold_gains]), ','.join( [str(s) for s in state_dmg_deltas]), ','.join(dead_unit_list))) # 最大奖励是击杀小兵和塔的金币加上对方一条命血量的奖励 # 最大惩罚是被对方造成了一条命伤害 # 零分为获得了所有的死亡奖励 max_score = LineModel.cal_score( state_max_golds, LineModel.REWARD_GAMMA) + LineModel.REWARD_RIVAL_DMG min_score = -LineModel.REWARD_RIVAL_DMG mid_score = LineModel.cal_score(state_max_golds, LineModel.REWARD_GAMMA) / 2 hero_score = LineModel.cal_score(state_score, LineModel.REWARD_GAMMA) reward = 0 if hero_score > mid_score: reward = (hero_score - mid_score) / (max_score - mid_score) elif hero_score < mid_score: reward = -(mid_score - hero_score) / (mid_score - min_score) # 特殊情况处理 # 撤退的话首先将惩罚值设置为0.2吧 cur_state = state_infos[state_idx] hero_action = cur_state.get_hero_action(hero_name) if hero_action.output_index == 48: if float(cur_hero.hp) / cur_hero.maxhp > 0.5: print('高血量撤退') reward = -1 else: print('撤退基础惩罚') reward = -0.2 # 特定英雄的大招必须要打到英雄才行 if_cast_ultimate_skill = RewardUtil.if_cast_skill( state_infos, state_idx, hero_name, 3) if if_cast_ultimate_skill: if_skill_hit_rival = RewardUtil.if_skill_hit_hero( state_infos, state_idx, hero_name, 3, rival_hero_name) if not if_skill_hit_rival: print('特定英雄的大招必须要打到英雄才行') reward = -1 # 被塔攻击情况下,只有杀死对方才不会有惩罚,否则最高惩罚。只看当前帧 # hit_by_tower = RewardUtil.if_hit_by_tower(state_infos, state_idx, 3, hero_name) # if_rival_dead = RewardUtil.if_hero_dead(state_infos, state_idx, 3, rival_hero_name) # if hit_by_tower and not if_rival_dead: # print('被塔攻击情况下,只有杀死对方才不会有惩罚') # reward = -1 # 英雄死亡直接返回-1 if_hero_dead = RewardUtil.if_hero_dead(state_infos, state_idx, 6, hero_name) if if_hero_dead: print('英雄死亡') reward = -1 # 是否离线太远 cur_state = state_infos[state_idx] leave_line = RewardUtil.if_hero_leave_line(state_infos, state_idx, hero_name, line_idx) if leave_line: print('离线太远') reward = -1 # 暂时忽略模型选择立刻离开选择范围这种情况,让英雄可以在危险时候拉远一些距离 if RewardUtil.if_leave_linemodel_range(state_infos, state_idx, hero_name, line_idx): if hero_action.output_index != 48: print('离开模型范围,又不是撤退') reward = -1 # 是否高血量回城 go_town_high_hp = RewardUtil.if_return_town_high_hp( state_infos, state_idx, hero_name, 0.3) if go_town_high_hp: print('高血量回城') reward = -1 # 是否回城被打断 go_town_break = RewardUtil.if_return_town_break( state_infos, state_idx, hero_name) if go_town_break: print('回城被打断') reward = -1 # 特殊奖励,放在最后面 # 英雄击杀最后一击,直接最大奖励 cur_state = state_infos[state_idx] cur_rival_hero = cur_state.get_hero(rival_hero_name) next_state = state_infos[state_idx + 1] next_rival = next_state.get_hero(rival_hero_name) if cur_rival_hero.hp > 0 and next_rival.hp <= 0: print('对线英雄%s死亡' % rival_hero_name) next_next_state = state_infos[state_idx + 2] dmg_hit_rival = next_next_state.get_hero_total_dmg( hero_name, rival_hero_name) if dmg_hit_rival > 0: print('英雄%s对对方造成了最后一击' % hero_name) reward = 1 return min(max(reward, -1), 1)
def cal_target_v3(state_infos, state_idx, hero_name, rival_hero_name, line_idx): # 只计算当前帧的得失,得失为金币获取情况 + 敌方血量变化 # 获得小兵死亡情况, 根据小兵属性计算他们的金币情况 cur_state = state_infos[state_idx] cur_hero = cur_state.get_hero(hero_name) act_info = cur_state.get_hero_action(hero_name) cur_rival_hero = cur_state.get_hero(rival_hero_name) rival_team = cur_rival_hero.team cur_hero = cur_state.get_hero(hero_name) cur_rival_hero = cur_state.get_hero(rival_hero_name) next_state = state_infos[state_idx + 1] next_hero = next_state.get_hero(hero_name) next_next_state = state_infos[state_idx + 2] next_next_hero = next_next_state.get_hero(hero_name) dead_units = StateUtil.get_dead_units_in_line(next_state, rival_team, line_idx) dead_golds = sum([ StateUtil.get_unit_value(u.unit_name, u.cfg_id) for u in dead_units ]) dead_unit_str = (','.join([u.unit_name for u in dead_units])) # 如果英雄有小额金币变化,则忽略 gold_delta = next_hero.gold - cur_hero.gold if gold_delta % 10 == 3 or gold_delta % 10 == 8 or gold_delta == int( dead_golds / 2) + 3: gold_delta -= 3 # 暂时解决不了的是释放技能,延迟造成的金币获得 # 忽略英雄死亡的奖励金,这部分金币在其他地方计算 # 这里暂时将英雄获得金币清零了,因为如果英雄表现好(最后一击,会在后面有所加成) # TODO 这个金币奖励值应该是个变化值,目前取的是最小值 prev_state_rival = state_infos[state_idx - 1].get_hero(rival_hero_name) if prev_state_rival.hp > 0 and cur_rival_hero.hp <= 0 and gold_delta >= 80 > dead_golds: print("敌方英雄死亡奖励,扣减") gold_delta = int(dead_golds / 2) # 计算对指定敌方英雄造成的伤害,计算接受的伤害 # 伤害信息和击中信息都有延迟,在两帧之后(但是一般会出现在同一条信息中,偶尔也会出现在第二条中) # 扩大自己受到伤害的惩罚 # 扩大对方低血量下受到伤害的奖励 # 扩大攻击伤害的权重 # TODO 防御型辅助型法术的定义,辅助法术不能乱放,否则惩罚 dmg = StateUtil.get_attack_cast_dmg( cur_state, next_state, next_next_state, hero_name, rival_hero_name) / float(cur_rival_hero.maxhp) dmg *= 3 * cur_rival_hero.maxhp / float(cur_rival_hero.hp + cur_rival_hero.maxhp) # 估算玩家接收的伤害时候,我们考虑后两帧的伤害的平均值,因为有些伤害会有延迟,比如小兵和建筑的攻击,因为弹道和攻速,血量变化会有延迟 self_hp_loss = (cur_hero.hp - next_next_hero.hp) / float( cur_hero.maxhp) / 2 if ( cur_hero.hp >= next_hero.hp >= next_next_hero.hp) else 0 self_hp_loss *= 3 * cur_hero.maxhp / float(cur_hero.hp + cur_hero.maxhp) dmg_delta = int((dmg - self_hp_loss) * LineModel.REWARD_RIVAL_DMG) hit_rival_tower_dmg_ratio = StateUtil.get_hit_rival_tower_dmg_ratio( cur_state, next_state, next_next_state, hero_name) # # 计算塔的被攻击情况 # self_tower_hp_change, destroyed = StateUtil.get_tower_hp_change(cur_state, next_state, hero_name, line_idx, self_tower=True) # rival_tower_hp_change, _ = StateUtil.get_tower_hp_change(cur_state, next_state, hero_name, line_idx, self_tower=False) # 统计和更新变量 print( 'reward debug info, hero: %s, max_gold: %s, gold_gain: %s, dmg: %s, hp_loss: %s, dmg_delta: %s, ' 'dead_units: %s, rival_tower: %s' % (hero_name, str(dead_golds), str(gold_delta), str(dmg), str(self_hp_loss), str(dmg_delta), dead_unit_str, hit_rival_tower_dmg_ratio)) # 最大奖励是击杀小兵和塔的金币加上对方一条命血量的奖励 # 最大惩罚是被对方造成了一条命伤害 # 零分为获得了所有的死亡奖励 reward = float(gold_delta + dmg_delta) / 100 + hit_rival_tower_dmg_ratio # 特殊情况处理 # 鼓励攻击对方小兵 if_hit_unit = next_next_state.if_hero_hit_any_unit( hero_name, rival_hero_name) if if_hit_unit is not None: print("物理攻击到了小兵", if_hit_unit) reward += 0.01 # 撤退的话首先将惩罚值设置为-0.2吧 # cur_state = state_infos[state_idx] # hero_action = cur_state.get_hero_action(hero_name) # if hero_action.output_index == 48: # if float(cur_hero.hp) / cur_hero.maxhp > 0.7: # print('高血量撤退') # reward = -1 # else: # print('撤退基础惩罚') # reward = -0.2 # # 特定英雄的大招必须要打到英雄才行 # if_cast_ultimate_skill = RewardUtil.if_cast_skill(state_infos, state_idx, hero_name, 3) # if if_cast_ultimate_skill: # if_skill_hit_rival = RewardUtil.if_skill_hit_hero(state_infos, state_idx, hero_name, 3, rival_hero_name) # if not if_skill_hit_rival: # print('特定英雄的大招必须要打到英雄才行') # reward = -1 # # # 是否离线太远 # cur_state = state_infos[state_idx] # leave_line = RewardUtil.if_hero_leave_line(state_infos, state_idx, hero_name, line_idx) # if leave_line: # print('离线太远') # reward = -1 # # # 暂时忽略模型选择立刻离开选择范围这种情况,让英雄可以在危险时候拉远一些距离 # if RewardUtil.if_leave_linemodel_range(state_infos, state_idx, hero_name, line_idx): # if hero_action.output_index != 48: # print('离开模型范围,又不是撤退') # reward = -1 # 特殊奖励,放在最后面 # 英雄击杀最后一击,直接最大奖励 cur_state = state_infos[state_idx] cur_hero = cur_state.get_hero(hero_name) cur_rival_hero = cur_state.get_hero(rival_hero_name) next_state = state_infos[state_idx + 1] next_hero = next_state.get_hero(hero_name) next_rival = next_state.get_hero(rival_hero_name) if cur_rival_hero.hp > 0 and next_rival.hp <= 0: print('对线英雄%s死亡' % rival_hero_name) next_next_state = state_infos[state_idx + 2] dmg_hit_rival = next_next_state.get_hero_total_dmg( hero_name, rival_hero_name) if dmg_hit_rival > 0: print('英雄%s对对方造成了最后一击' % hero_name) reward = 1 if cur_hero.hp > 0 and next_hero.hp <= 0: reward = -1 return min(max(reward, -1), 1)