예제 #1
    def cal_target_ppo_2(prev_state, cur_state, next_state, hero_name,
                         rival_hero_name, line_idx):
        LineModel_PPO1.assert_tower_in_input(cur_state, hero_name,

        # 只计算当前帧的得失,得失为金币获取情况 + 敌方血量变化
        # 获得小兵死亡情况, 根据小兵属性计算他们的金币情况
        cur_rival_hero = cur_state.get_hero(rival_hero_name)
        rival_team = cur_rival_hero.team
        cur_hero = cur_state.get_hero(hero_name)
        cur_rival_hero = cur_state.get_hero(rival_hero_name)
        next_hero = next_state.get_hero(hero_name)
        next_rival_hero = next_state.get_hero(rival_hero_name)
        # 找到英雄附近死亡的敌方小兵
        dead_units = StateUtil.get_dead_units_in_line(
            next_state, rival_team, line_idx, cur_hero,
        dead_golds = sum([
            StateUtil.get_unit_value(u.unit_name, u.cfg_id) for u in dead_units

        # 如果英雄有小额金币变化,则忽略
        gold_delta = next_hero.gold - cur_hero.gold
        if gold_delta % 10 == 3 or gold_delta % 10 == 8 or gold_delta == int(
                dead_golds / 2) + 3:
            gold_delta -= 3

        # 很难判断英雄的最后一击,所以我们计算金币变化,超过死亡单位一半的金币作为英雄获得金币
        if gold_delta > 0:
            gold_delta = gold_delta * 2 - dead_golds
            if gold_delta < 0:
                print('获得击杀金币不应该小于零', cur_state.tick, 'dead_golds', dead_golds,
                      'gold_delta', (next_hero.gold - cur_hero.gold))
                gold_delta = 0

        # if dead_golds > 0:
        #     print('dead_gold', dead_golds, 'delta_gold', gold_delta, "hero", hero_name, "tick", cur_state.tick)

        reward = float(gold_delta) / 100

        # 将所有奖励缩小
        final_reward = reward / 100
        final_reward = min(max(final_reward, -1), 1)

        # 特殊奖励,放在最后面
        # 英雄击杀最后一击,直接最大奖励(因为gamma的存在,扩大这个惩罚)
        if cur_rival_hero.hp > 0 and next_rival_hero.hp <= 0:
            # print('对线英雄%s死亡' % rival_hero_name)
            dmg_hit_rival = next_state.get_hero_total_dmg(
                hero_name, rival_hero_name)
            if dmg_hit_rival > 0:
                # print('英雄%s对对方造成了最后一击' % hero_name)
                final_reward = 1
                if cur_hero.hp > 0 and next_hero.hp <= 0:
                    final_reward = 0
        elif cur_hero.hp > 0 and next_hero.hp <= 0:
            final_reward = -1
        return final_reward
예제 #2
    def cal_target_ppo(prev_state, cur_state, next_state, hero_name,
                       rival_hero_name, line_idx):
        # 只计算当前帧的得失,得失为金币获取情况 + 敌方血量变化
        # 获得小兵死亡情况, 根据小兵属性计算他们的金币情况
        cur_rival_hero = cur_state.get_hero(rival_hero_name)
        rival_team = cur_rival_hero.team
        cur_hero = cur_state.get_hero(hero_name)
        cur_rival_hero = cur_state.get_hero(rival_hero_name)
        next_hero = next_state.get_hero(hero_name)
        next_rival_hero = next_state.get_hero(rival_hero_name)
        # 找到英雄附近死亡的敌方小兵
        dead_units = StateUtil.get_dead_units_in_line(
            next_state, rival_team, line_idx, cur_hero,
        dead_golds = sum([
            StateUtil.get_unit_value(u.unit_name, u.cfg_id) for u in dead_units
        dead_unit_str = (','.join([u.unit_name for u in dead_units]))

        # 如果英雄有小额金币变化,则忽略
        gold_delta = next_hero.gold - cur_hero.gold
        if gold_delta % 10 == 3 or gold_delta % 10 == 8 or gold_delta == int(
                dead_golds / 2) + 3:
            gold_delta -= 3

        # 很难判断英雄的最后一击,所以我们计算金币变化,超过死亡单位一半的金币作为英雄获得金币
        gold_delta = gold_delta * 2 - dead_golds
        if gold_delta < 0:
            print('获得击杀金币不应该小于零', cur_state.tick, 'dead_units', dead_unit_str,
                  'gold_gain', (next_hero.gold - cur_hero.gold))
            gold_delta = 0

        # if dead_golds > 0:
        # print('dead_gold', dead_golds, 'delta_gold', gold_delta, "hero", hero_name, "tick", cur_state.tick)

        # 计算对指定敌方英雄造成的伤害,计算接受的伤害
        # 伤害信息和击中信息都有延迟,在两帧之后(但是一般会出现在同一条信息中,偶尔也会出现在第二条中)
        # 这里只计算下一帧中英雄对对方造成的伤害
        # 扩大自己受到伤害的惩罚
        # 扩大对方低血量下受到伤害的奖励
        # 扩大攻击伤害的权重
        # TODO 防御型辅助型法术的定义,辅助法术不能乱放,否则惩罚
        dmg = next_state.get_hero_total_dmg(
            hero_name, rival_hero_name) / float(cur_rival_hero.maxhp)
        dmg *= 3 * cur_rival_hero.maxhp / float(cur_rival_hero.hp +

        # 估算玩家接收的伤害时候,只考虑下一帧中的变化,像塔的攻击需要飞行所有有延迟这种情况这里不需要考虑
        self_hp_loss = (cur_hero.hp -
                        next_hero.hp) / float(cur_hero.maxhp) / 2 if (
                            cur_hero.hp >= next_hero.hp >= next_hero.hp) else 0
        self_hp_loss *= 3 * cur_hero.maxhp / float(cur_hero.hp +
        dmg_delta = int((dmg - self_hp_loss) * LineModel.REWARD_RIVAL_DMG)

        # 统计和更新变量
        # print('reward debug info, hero: %s, max_gold: %s, gold_gain: %s, dmg: %s, hp_loss: %s, dmg_delta: %s, '
        #       'dead_units: %s'
        #       % (
        #       hero_name, str(dead_golds), str(gold_delta), str(dmg), str(self_hp_loss), str(dmg_delta), dead_unit_str))

        # 最大奖励是击杀小兵和塔的金币加上对方一条命血量的奖励
        # 最大惩罚是被对方造成了一条命伤害
        # 零分为获得了所有的死亡奖励
        reward = float(gold_delta + dmg_delta) / 100

        # 特殊情况处理
        # 鼓励攻击对方小兵,塔
        if_hit_unit = next_state.if_hero_hit_any_unit(hero_name,
        if if_hit_unit is not None:
            # print("物理攻击到了小兵", if_hit_unit)
            reward += 0.01
        if_hit_tower = next_state.if_hero_hit_tower(hero_name)
        if if_hit_tower is not None:
            # print("物理攻击到了塔", if_hit_tower)
            reward += 0.01

        # 将所有奖励缩小
        final_reward = reward / 10
        final_reward = min(max(final_reward, -1), 1)

        # 特殊奖励,放在最后面
        # 英雄击杀最后一击,直接最大奖励(因为gamma的存在,扩大这个惩罚)
        if cur_rival_hero.hp > 0 and next_rival_hero.hp <= 0:
            # print('对线英雄%s死亡' % rival_hero_name)
            dmg_hit_rival = next_state.get_hero_total_dmg(
                hero_name, rival_hero_name)
            if dmg_hit_rival > 0:
                # print('英雄%s对对方造成了最后一击' % hero_name)
                final_reward = 1
                if cur_hero.hp > 0 and next_hero.hp <= 0:
                    final_reward = 0
        elif cur_hero.hp > 0 and next_hero.hp <= 0:
            final_reward = -5
        return final_reward
예제 #3
    def cal_target_v3(state_infos, state_idx, hero_name, rival_hero_name,
        # 只计算当前帧的得失,得失为金币获取情况,敌我血量变化
        # 获得小兵死亡情况, 根据小兵属性计算他们的金币情况
        cur_state = state_infos[state_idx]
        cur_hero = cur_state.get_hero(hero_name)
        cur_rival_hero = cur_state.get_hero(rival_hero_name)
        rival_team = cur_rival_hero.team
        cur_hero = cur_state.get_hero(hero_name)
        cur_rival_hero = cur_state.get_hero(rival_hero_name)
        next_state = state_infos[state_idx + 1]
        next_hero = next_state.get_hero(hero_name)
        next_next_state = state_infos[state_idx + 2]
        dead_units = StateUtil.get_dead_units_in_line(next_state, rival_team,
        dead_golds = sum([
            StateUtil.get_unit_value(u.unit_name, u.cfg_id) for u in dead_units
        dead_unit_str = (','.join([u.unit_name for u in dead_units]))

        # 如果英雄有小额金币变化,则忽略
        gold_delta = next_hero.gold - cur_hero.gold
        if gold_delta % 10 == 3 or gold_delta % 10 == 8 or gold_delta == int(
                dead_golds / 2) + 3:
            gold_delta -= 3

        # 忽略英雄死亡的奖励金,这部分金币在其他地方计算
        # 这里暂时将英雄获得金币清零了,因为如果英雄表现好(最后一击,会在后面有所加成)
        # TODO 这个金币奖励值应该是个变化值,目前取的是最小值
        prev_state_rival = state_infos[state_idx - 1].get_hero(rival_hero_name)
        if prev_state_rival.hp > 0 and cur_rival_hero.hp <= 0 and gold_delta >= 80 > dead_golds:
            gold_delta = int(dead_golds / 2)

        # 计算对指定敌方英雄造成的伤害,计算接受的伤害
        # 伤害信息和击中信息都有延迟,在两帧之后
        # 扩大自己受到伤害的惩罚
        # 扩大对方低血量下受到伤害的奖励
        # 扩大攻击伤害的权重
        # TODO 防御型辅助型法术的定义
        dmg = next_next_state.get_hero_total_dmg(
            hero_name, rival_hero_name) / float(cur_rival_hero.maxhp)
        if float(cur_rival_hero.hp) / cur_rival_hero.maxhp <= 0.3:
            dmg *= 3
        self_hp_loss = (cur_hero.hp - next_hero.hp) / float(
            cur_hero.maxhp) if cur_hero.hp > next_hero.hp else 0
        # self_hp_loss *= 1.5
        dmg_delta = int((dmg - self_hp_loss) * LineModel.REWARD_RIVAL_DMG)

        # 统计和更新变量
            'reward debug info, hero: %s, max_gold: %s, gold_gain: %s, dmg: %s, hp_loss: %s, dmg_delta: %s, dead_units: %s'
            % (hero_name, str(dead_golds), str(gold_delta), str(dmg),
               str(self_hp_loss), str(dmg_delta), dead_unit_str))

        # 最大奖励是击杀小兵和塔的金币加上对方一条命血量的奖励
        # 最大惩罚是被对方造成了一条命伤害
        # 零分为获得了所有的死亡奖励
        max_score = dead_golds + LineModel.REWARD_RIVAL_DMG / 6
        min_score = -LineModel.REWARD_RIVAL_DMG / 6
        mid_score = int(dead_golds / 2)

        hero_score = gold_delta + dmg_delta
        reward = 0
        if hero_score > mid_score:
            reward = (hero_score - mid_score) / float(max_score - mid_score)
        elif hero_score < mid_score:
            reward = -(mid_score - hero_score) / float(mid_score - min_score)

        # 特殊情况处理

        # 撤退的话首先将惩罚值设置为-0.2吧
        cur_state = state_infos[state_idx]
        hero_action = cur_state.get_hero_action(hero_name)
        if hero_action.output_index == 48:
            if float(cur_hero.hp) / cur_hero.maxhp > 0.7:
                reward = -1
                reward = -0.2

        # 特定英雄的大招必须要打到英雄才行
        if_cast_ultimate_skill = RewardUtil.if_cast_skill(
            state_infos, state_idx, hero_name, 3)
        if if_cast_ultimate_skill:
            if_skill_hit_rival = RewardUtil.if_skill_hit_hero(
                state_infos, state_idx, hero_name, 3, rival_hero_name)
            if not if_skill_hit_rival:
                reward = -1

        # 被塔攻击情况下,只有杀死对方才不会有惩罚,否则最高惩罚。只看当前帧
        hit_by_tower = RewardUtil.if_hit_by_tower(state_infos, state_idx, 3,
        if_rival_dead = RewardUtil.if_hero_dead(state_infos, state_idx, 3,
        if hit_by_tower and not if_rival_dead:
            reward = -1

        # 英雄死亡直接返回-1
        if_hero_dead = RewardUtil.if_hero_dead(state_infos, state_idx, 6,
        if if_hero_dead:
            reward = -1

        # 是否离线太远
        cur_state = state_infos[state_idx]
        leave_line = RewardUtil.if_hero_leave_line(state_infos, state_idx,
                                                   hero_name, line_idx)
        if leave_line:
            reward = -1

        # 暂时忽略模型选择立刻离开选择范围这种情况,让英雄可以在危险时候拉远一些距离
        if RewardUtil.if_leave_linemodel_range(state_infos, state_idx,
                                               hero_name, line_idx):
            if hero_action.output_index != 48:
                reward = -1

        # 是否高血量回城
        go_town_high_hp = RewardUtil.if_return_town_high_hp(
            state_infos, state_idx, hero_name, 0.3)
        if go_town_high_hp:
            reward = -1

        # 是否回城被打断
        go_town_break = RewardUtil.if_return_town_break(
            state_infos, state_idx, hero_name)
        if go_town_break:
            reward = -1

        # 特殊奖励,放在最后面
        # 英雄击杀最后一击,直接最大奖励
        cur_state = state_infos[state_idx]
        cur_rival_hero = cur_state.get_hero(rival_hero_name)
        next_state = state_infos[state_idx + 1]
        next_rival = next_state.get_hero(rival_hero_name)
        if cur_rival_hero.hp > 0 and next_rival.hp <= 0:
            print('对线英雄%s死亡' % rival_hero_name)
            next_next_state = state_infos[state_idx + 2]
            dmg_hit_rival = next_next_state.get_hero_total_dmg(
                hero_name, rival_hero_name)
            if dmg_hit_rival > 0:
                print('英雄%s对对方造成了最后一击' % hero_name)
                reward = 1
        return min(max(reward, -1), 1)
예제 #4
    def cal_target_v2(state_infos, state_idx, hero_name, rival_hero_name,
        state_max_golds = []
        state_gold_gains = []
        state_dmg_deltas = []
        state_score = []
        dead_unit_list = []

        # 首先计算每个英雄的获得情况
        cur_state = state_infos[state_idx]

        if cur_state.tick >= 592548:
            db = True

        cur_hero = cur_state.get_hero(hero_name)
        cur_rival_hero = cur_state.get_hero(rival_hero_name)
        rival_team = cur_rival_hero.team
        for i in range(1, 10):
            # 获得小兵死亡情况, 根据小兵属性计算他们的金币情况
            cur_hero = cur_state.get_hero(hero_name)
            cur_rival_hero = cur_state.get_hero(rival_hero_name)
            next_state = state_infos[state_idx + i]
            next_hero = next_state.get_hero(hero_name)
            next_next_state = state_infos[state_idx + i + 1]
            dead_units = StateUtil.get_dead_units_in_line(
                next_state, rival_team, line_idx)
            dead_golds = sum([
                StateUtil.get_unit_value(u.unit_name, u.cfg_id)
                for u in dead_units
            dead_unit_list.append(','.join([u.unit_name for u in dead_units]))

            # 如果英雄有小额金币变化,则忽略
            gold_delta = next_hero.gold - cur_hero.gold
            if gold_delta % 10 == 3 or gold_delta % 10 == 8 or gold_delta == int(
                    dead_golds / 2) + 3:
                gold_delta -= 3

            # 忽略英雄死亡的奖励金,这部分金币在其他地方计算
            # 这里暂时将英雄获得金币清零了,因为如果英雄表现好(最后一击,会在后面有所加成)
            # TODO 这个金币奖励值应该是个变化值,目前取的是最小值
            if gold_delta >= 200 > dead_golds:
                gold_delta = int(dead_golds / 2)


            # 计算对指定敌方英雄造成的伤害,计算接受的伤害
            # 伤害信息和击中信息都有延迟,在两帧之后
            # 扩大自己受到伤害的惩罚
            # 扩大对方低血量下受到伤害的奖励
            # 扩大攻击伤害的权重
            dmg = next_next_state.get_hero_total_dmg(hero_name,
            if float(cur_rival_hero.hp) / cur_rival_hero.maxhp <= 0.3:
                dmg *= 3
            self_dmg = cur_hero.hp - next_hero.hp if cur_hero.hp > next_hero.hp else 0
            self_dmg *= 1.5
            dmg_delta = int(
                float(dmg - self_dmg) / cur_rival_hero.maxhp *
            dmg_delta *= 6

            # 统计和更新变量
            state_score.append(gold_delta + dmg_delta)
            cur_state = next_state

            'reward debug info, hero: %s, max_gold: %s, gold_gain: %s, dmg_delta: %s, dead_units: %s'
            % (hero_name, ','.join([str(s) for s in state_max_golds]),
               ','.join([str(s) for s in state_gold_gains]), ','.join(
                    for s in state_dmg_deltas]), ','.join(dead_unit_list)))

        # 最大奖励是击杀小兵和塔的金币加上对方一条命血量的奖励
        # 最大惩罚是被对方造成了一条命伤害
        # 零分为获得了所有的死亡奖励
        max_score = LineModel.cal_score(
            LineModel.REWARD_GAMMA) + LineModel.REWARD_RIVAL_DMG
        min_score = -LineModel.REWARD_RIVAL_DMG
        mid_score = LineModel.cal_score(state_max_golds,
                                        LineModel.REWARD_GAMMA) / 2

        hero_score = LineModel.cal_score(state_score, LineModel.REWARD_GAMMA)
        reward = 0
        if hero_score > mid_score:
            reward = (hero_score - mid_score) / (max_score - mid_score)
        elif hero_score < mid_score:
            reward = -(mid_score - hero_score) / (mid_score - min_score)

        # 特殊情况处理

        # 撤退的话首先将惩罚值设置为0.2吧
        cur_state = state_infos[state_idx]
        hero_action = cur_state.get_hero_action(hero_name)
        if hero_action.output_index == 48:
            if float(cur_hero.hp) / cur_hero.maxhp > 0.5:
                reward = -1
                reward = -0.2

        # 特定英雄的大招必须要打到英雄才行
        if_cast_ultimate_skill = RewardUtil.if_cast_skill(
            state_infos, state_idx, hero_name, 3)
        if if_cast_ultimate_skill:
            if_skill_hit_rival = RewardUtil.if_skill_hit_hero(
                state_infos, state_idx, hero_name, 3, rival_hero_name)
            if not if_skill_hit_rival:
                reward = -1

        # 被塔攻击情况下,只有杀死对方才不会有惩罚,否则最高惩罚。只看当前帧
        # hit_by_tower = RewardUtil.if_hit_by_tower(state_infos, state_idx, 3, hero_name)
        # if_rival_dead = RewardUtil.if_hero_dead(state_infos, state_idx, 3, rival_hero_name)
        # if hit_by_tower and not if_rival_dead:
        #     print('被塔攻击情况下,只有杀死对方才不会有惩罚')
        #     reward = -1

        # 英雄死亡直接返回-1
        if_hero_dead = RewardUtil.if_hero_dead(state_infos, state_idx, 6,
        if if_hero_dead:
            reward = -1

        # 是否离线太远
        cur_state = state_infos[state_idx]
        leave_line = RewardUtil.if_hero_leave_line(state_infos, state_idx,
                                                   hero_name, line_idx)
        if leave_line:
            reward = -1

        # 暂时忽略模型选择立刻离开选择范围这种情况,让英雄可以在危险时候拉远一些距离
        if RewardUtil.if_leave_linemodel_range(state_infos, state_idx,
                                               hero_name, line_idx):
            if hero_action.output_index != 48:
                reward = -1

        # 是否高血量回城
        go_town_high_hp = RewardUtil.if_return_town_high_hp(
            state_infos, state_idx, hero_name, 0.3)
        if go_town_high_hp:
            reward = -1

        # 是否回城被打断
        go_town_break = RewardUtil.if_return_town_break(
            state_infos, state_idx, hero_name)
        if go_town_break:
            reward = -1

        # 特殊奖励,放在最后面
        # 英雄击杀最后一击,直接最大奖励
        cur_state = state_infos[state_idx]
        cur_rival_hero = cur_state.get_hero(rival_hero_name)
        next_state = state_infos[state_idx + 1]
        next_rival = next_state.get_hero(rival_hero_name)
        if cur_rival_hero.hp > 0 and next_rival.hp <= 0:
            print('对线英雄%s死亡' % rival_hero_name)
            next_next_state = state_infos[state_idx + 2]
            dmg_hit_rival = next_next_state.get_hero_total_dmg(
                hero_name, rival_hero_name)
            if dmg_hit_rival > 0:
                print('英雄%s对对方造成了最后一击' % hero_name)
                reward = 1
        return min(max(reward, -1), 1)
예제 #5
    def cal_target_v3(state_infos, state_idx, hero_name, rival_hero_name,
        # 只计算当前帧的得失,得失为金币获取情况 + 敌方血量变化
        # 获得小兵死亡情况, 根据小兵属性计算他们的金币情况
        cur_state = state_infos[state_idx]
        cur_hero = cur_state.get_hero(hero_name)
        act_info = cur_state.get_hero_action(hero_name)
        cur_rival_hero = cur_state.get_hero(rival_hero_name)
        rival_team = cur_rival_hero.team
        cur_hero = cur_state.get_hero(hero_name)
        cur_rival_hero = cur_state.get_hero(rival_hero_name)
        next_state = state_infos[state_idx + 1]
        next_hero = next_state.get_hero(hero_name)
        next_next_state = state_infos[state_idx + 2]
        next_next_hero = next_next_state.get_hero(hero_name)
        dead_units = StateUtil.get_dead_units_in_line(next_state, rival_team,
        dead_golds = sum([
            StateUtil.get_unit_value(u.unit_name, u.cfg_id) for u in dead_units
        dead_unit_str = (','.join([u.unit_name for u in dead_units]))

        # 如果英雄有小额金币变化,则忽略
        gold_delta = next_hero.gold - cur_hero.gold
        if gold_delta % 10 == 3 or gold_delta % 10 == 8 or gold_delta == int(
                dead_golds / 2) + 3:
            gold_delta -= 3

        # 暂时解决不了的是释放技能,延迟造成的金币获得

        # 忽略英雄死亡的奖励金,这部分金币在其他地方计算
        # 这里暂时将英雄获得金币清零了,因为如果英雄表现好(最后一击,会在后面有所加成)
        # TODO 这个金币奖励值应该是个变化值,目前取的是最小值
        prev_state_rival = state_infos[state_idx - 1].get_hero(rival_hero_name)
        if prev_state_rival.hp > 0 and cur_rival_hero.hp <= 0 and gold_delta >= 80 > dead_golds:
            gold_delta = int(dead_golds / 2)

        # 计算对指定敌方英雄造成的伤害,计算接受的伤害
        # 伤害信息和击中信息都有延迟,在两帧之后(但是一般会出现在同一条信息中,偶尔也会出现在第二条中)
        # 扩大自己受到伤害的惩罚
        # 扩大对方低血量下受到伤害的奖励
        # 扩大攻击伤害的权重
        # TODO 防御型辅助型法术的定义,辅助法术不能乱放,否则惩罚
        dmg = StateUtil.get_attack_cast_dmg(
            cur_state, next_state, next_next_state, hero_name,
            rival_hero_name) / float(cur_rival_hero.maxhp)
        dmg *= 3 * cur_rival_hero.maxhp / float(cur_rival_hero.hp +

        # 估算玩家接收的伤害时候,我们考虑后两帧的伤害的平均值,因为有些伤害会有延迟,比如小兵和建筑的攻击,因为弹道和攻速,血量变化会有延迟
        self_hp_loss = (cur_hero.hp - next_next_hero.hp) / float(
            cur_hero.maxhp) / 2 if (
                cur_hero.hp >= next_hero.hp >= next_next_hero.hp) else 0
        self_hp_loss *= 3 * cur_hero.maxhp / float(cur_hero.hp +
        dmg_delta = int((dmg - self_hp_loss) * LineModel.REWARD_RIVAL_DMG)

        hit_rival_tower_dmg_ratio = StateUtil.get_hit_rival_tower_dmg_ratio(
            cur_state, next_state, next_next_state, hero_name)

        # # 计算塔的被攻击情况
        # self_tower_hp_change, destroyed = StateUtil.get_tower_hp_change(cur_state, next_state, hero_name, line_idx, self_tower=True)
        # rival_tower_hp_change, _ = StateUtil.get_tower_hp_change(cur_state, next_state, hero_name, line_idx, self_tower=False)

        # 统计和更新变量
            'reward debug info, hero: %s, max_gold: %s, gold_gain: %s, dmg: %s, hp_loss: %s, dmg_delta: %s, '
            'dead_units: %s, rival_tower: %s' %
            (hero_name, str(dead_golds), str(gold_delta), str(dmg),
             str(self_hp_loss), str(dmg_delta), dead_unit_str,

        # 最大奖励是击杀小兵和塔的金币加上对方一条命血量的奖励
        # 最大惩罚是被对方造成了一条命伤害
        # 零分为获得了所有的死亡奖励
        reward = float(gold_delta +
                       dmg_delta) / 100 + hit_rival_tower_dmg_ratio

        # 特殊情况处理
        # 鼓励攻击对方小兵
        if_hit_unit = next_next_state.if_hero_hit_any_unit(
            hero_name, rival_hero_name)
        if if_hit_unit is not None:
            print("物理攻击到了小兵", if_hit_unit)
            reward += 0.01

        # 撤退的话首先将惩罚值设置为-0.2吧
        # cur_state = state_infos[state_idx]
        # hero_action = cur_state.get_hero_action(hero_name)
        # if hero_action.output_index == 48:
        #     if float(cur_hero.hp) / cur_hero.maxhp > 0.7:
        #         print('高血量撤退')
        #         reward = -1
        #     else:
        #         print('撤退基础惩罚')
        #         reward = -0.2

        # # 特定英雄的大招必须要打到英雄才行
        # if_cast_ultimate_skill = RewardUtil.if_cast_skill(state_infos, state_idx, hero_name, 3)
        # if if_cast_ultimate_skill:
        #     if_skill_hit_rival = RewardUtil.if_skill_hit_hero(state_infos, state_idx, hero_name, 3, rival_hero_name)
        #     if not if_skill_hit_rival:
        #         print('特定英雄的大招必须要打到英雄才行')
        #         reward = -1
        # # 是否离线太远
        # cur_state = state_infos[state_idx]
        # leave_line = RewardUtil.if_hero_leave_line(state_infos, state_idx, hero_name, line_idx)
        # if leave_line:
        #     print('离线太远')
        #     reward = -1
        # # 暂时忽略模型选择立刻离开选择范围这种情况,让英雄可以在危险时候拉远一些距离
        # if RewardUtil.if_leave_linemodel_range(state_infos, state_idx, hero_name, line_idx):
        #     if hero_action.output_index != 48:
        #         print('离开模型范围,又不是撤退')
        #         reward = -1

        # 特殊奖励,放在最后面
        # 英雄击杀最后一击,直接最大奖励
        cur_state = state_infos[state_idx]
        cur_hero = cur_state.get_hero(hero_name)
        cur_rival_hero = cur_state.get_hero(rival_hero_name)
        next_state = state_infos[state_idx + 1]
        next_hero = next_state.get_hero(hero_name)
        next_rival = next_state.get_hero(rival_hero_name)
        if cur_rival_hero.hp > 0 and next_rival.hp <= 0:
            print('对线英雄%s死亡' % rival_hero_name)
            next_next_state = state_infos[state_idx + 2]
            dmg_hit_rival = next_next_state.get_hero_total_dmg(
                hero_name, rival_hero_name)
            if dmg_hit_rival > 0:
                print('英雄%s对对方造成了最后一击' % hero_name)
                reward = 1

        if cur_hero.hp > 0 and next_hero.hp <= 0:
            reward = -1
        return min(max(reward, -1), 1)