Python initGrid示例，gridworld.initGrid Python示例

示例#1

0

显示文件

def testAlgo(init=0):
    arrow = ["^", "v", "<", ">"]
    i = 0
    if init == 0:
        state = initGrid()
    elif init == 1:
        state = initGridPlayer()
    else:
        state = initGridRand()

    drawgridworld.draw_state(state, 0)
    time.sleep(0.5)
    #print("Initial State:")
    #print(dispGrid(state))
    status = 1
    #while game still in progress
    while (status == 1):
        qval = model.predict(state.reshape(1, 64), batch_size=1)
        action = (np.argmax(qval))  #take action with highest Q-value
        print('Move #: %s; Taking action: %s' % (i, arrow[action]))
        state = makeMove(state, action)
        #print(dispGrid(state))
        reward = getReward(state)
        if reward != -1:
            status = 0
            print("Reward: %s" % (reward, ))
        i += 1  #If we're taking more than 10 actions, just stop, we probably can't win this game
        drawgridworld.draw_state(state, i - 10)
        time.sleep(0.5)
        if (i > 10):
            print("Game lost; too many moves.")
            reward = -10
            break
    return reward

示例#2

0

显示文件

文件： keras_nn.py 项目： EdDT-Phi/ML-Learning

def testAlgo(init=0):
    i = 0
    if init == 0:
        state = initGrid()
    elif init == 1:
        state = initGridPlayer()
    elif init == 2:
        state = initGridRand()

    str_to_print = 'Initial State:'
    str_to_print += '\n%s' % dispGrid(state)

    # while game still in progress
    for i in range(10):
        qval = model.predict(state.reshape(1, 64), batch_size=1)
        str_to_print += '\n%s' % str(qval)
        action = (np.argmax(qval))  #take action with highest Q-value

        new_state = makeMove(state, action)
        reward = getReward(new_state)
        state = new_state

        str_to_print += '\n%s' % 'Move #: %s; Taking action: %s' % (i, action)
        str_to_print += '\n%s' % dispGrid(state)

        if reward != -1 and reward != -5:
            if reward == -10:
                print(str_to_print + '\n%s' % "Reward: %s" % (reward, ))
            return reward
    print(str_to_print + '\n%s' % "Game lost; too many moves.")
    return -10

示例#3

0

显示文件

文件： validate.py 项目： rickerliang/Simplest_Reinforcement_Learning

def testAlgo(init=0):
    arrow = ["^", "v", "<", ">"]
    i = 0
    if init==0:
        state = initGrid()
    elif init==1:
        state = initGridPlayer()
    else:
        state = initGridRand()

    drawgridworld.draw_state(state, 0)
    time.sleep(0.5)
    #print("Initial State:")
    #print(dispGrid(state))
    status = 1
    #while game still in progress
    while(status == 1):
        qval = model.predict(state.reshape(1,64), batch_size=1)
        action = (np.argmax(qval)) #take action with highest Q-value
        print('Move #: %s; Taking action: %s' % (i, arrow[action]))
        state = makeMove(state, action)
        #print(dispGrid(state))
        reward = getReward(state)
        if reward != -1:
            status = 0
            print("Reward: %s" % (reward,))
        i += 1 #If we're taking more than 10 actions, just stop, we probably can't win this game
        drawgridworld.draw_state(state, i - 10)
        time.sleep(0.5)
        if (i > 10):
            print("Game lost; too many moves.")
            reward = -10
            break
    return reward

示例#4

0

显示文件

def testAlgo(init=0):
    state = initGrid()
    print("Initial State:")
    dispGrid(state)
    status = 1
    i = 0
    #while game still in progress
    while (status == 1):
        qval = model.predict(state.reshape(1, 80), batch_size=1)
        action = (np.argmax(qval))  #take action with highest Q-value
        print('Move #: %s; Taking action: %s' % (i, action))
        state = makeMove(state, action, 4)
        dispGrid(state)
        reward = getReward(state)
        if reward == -10:
            print("The agent steped on the pit.. You won!")
            state = 0
            break
        elif reward == 10:
            print("The agent won!")
            state = 0
            break

        print("Enter your move (0,1,2,3) for (up,down,left,right)")
        action = int(input())
        state = makeMove(state, action, 3)
        reward = getReward(state, 3)
        dispGrid(state)
        if reward == -10:
            print("You Lost!")
            state = 0
            break
        elif reward == 10:
            print("You won!")
            state = 0
            break
        i += 1

示例#5

0

显示文件

#model.add(Dropout(0.2))

model.add(Dense(4, init='lecun_uniform'))
model.add(Activation(
    'linear'))  #linear output so we can have range of real-valued outputs

rms = RMSprop()
model.compile(loss='mse', optimizer=rms)

epochs = 1000
gamma = 0.9  #since it may take several moves to goal, making gamma high
epsilon = 1

for i in range(epochs):

    state = initGrid()
    status = 1
    #while game still in progress
    while (status == 1):
        #We are in state S
        #Let's run our Q function on S to get Q values for all possible actions
        qval = model.predict(state.reshape(1, 80), batch_size=1)
        if (random.random() < epsilon):  #choose random action
            action = np.random.randint(0, 4)
        else:  #choose best action from Q(s,a) values
            action = (np.argmax(qval))
        #Take action, observe new state S'
        new_state = makeMove(state, action)
        new_state = makeMove(new_state, np.random.randint(0, 4), 3)
        #Observe reward
        reward = getReward(new_state)