Python reward示例

编程语言: Python

命名空间/包名称: main

方法/功能: reward

hotexamples.com的示例: 9

Python reward - 已找到9个示例。这些是从开源项目中提取的最受好评的main.reward现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

 def test_reward_lose(self):
     m.I.combo = 5
     m.I.combo_max = 99
     m.I.points = 5
     m.reward()
     self.assertEqual(m.I.combo, 0)
     self.assertEqual(m.I.combo_max, 99)
     self.assertEqual(m.I.points, 5)

示例#2

显示文件

def play(model):

    # parameters
    gamma = 0.9

    board = np.zeros((N, N), dtype=np.int8)
    turn = True

    while True:
        # change black and white
        if not turn:
            board = -board
            actions = np.array(np.where(board == 0))
            r = input()
            r = map(int, r.split(' '))
            board[next(r), next(r)] = 1

            board = -board
            main.dispBoard(board)
            if main.winning(-board):
                return
            turn = not turn

        # can move
        actions = np.array(np.where(board == 0))
        # as feature vectors
        # features = main.getFeatures(board, actions)

        # set algorithm here.
        # r = np.argmax(model.get(features)[:, 0])
        r = main.getMove(board, model, True, 2)

        action = actions[:, r]

        Reward = main.reward(board, action)

        # put
        board[action[0], action[1]] = 1

        # restore black and white
        if not turn:
            board = -board

        main.dispBoard(board)

        # all masses are filled, win
        if Reward != 0:
            return

        # end of this turn
        turn = not turn

示例#3

显示文件

def com_turn(image, board, points):
    actions = np.array(np.where(board == 0))

    # r = np.argmax(model.get(features)[:, 0])
    r = main.getMove(board, model, True, depth=2)
    action = actions[:, r]

    Reward = main.reward(board, action)

    # put
    board[action[0], action[1]] = 1

    # all masses are filled, win
    p = action * 70 + 35
    p = p.tolist()
    p.reverse()
    cv2.circle(image, center=tuple(p), radius=20, color=255, thickness=-1)

    return (board, Reward)

示例#4

显示文件

 def test_reward_gain_vlv6(self):
     m.I.lvl = 6
     m.reward(1)
     self.assertEqual(m.I.points, 15)
     self.assertEqual(m.I.time_left, 15)

示例#5

显示文件

 def test_reward_gain_vlv5(self):
     m.I.lvl = 5
     m.reward(1)
     self.assertEqual(m.I.points, 10)
     self.assertEqual(m.I.time_left, 13)

示例#6

显示文件

 def test_reward_gain_vlv4(self):
     m.I.lvl = 4
     m.reward(1)
     self.assertEqual(m.I.points, 5)
     self.assertEqual(m.I.time_left, 12)

示例#7

显示文件

 def test_reward_gain_vlv0(self):
     m.reward(1)
     self.assertEqual(m.I.points, 1)
     self.assertEqual(m.I.time_left, 11)

示例#8

显示文件

 def test_reward_gain_combo_max(self):
     m.I.combo = 40
     m.reward(1)
     self.assertEqual(m.I.combo, 40)
     self.assertEqual(m.I.lvl, 8)

示例#9

显示文件

 def test_reward_gain_vlv8(self):
     m.I.lvl = 8
     m.reward(1)
     self.assertEqual(m.I.points, 30)
     self.assertEqual(m.I.time_left, 18)