Python State.playersum示例

编程语言: Python

命名空间/包名称: environment

类/类型: State

方法/功能: playersum

hotexamples.com的示例: 5

Python State.playersum - 已找到5个示例。这些是从开源项目中提取的最受好评的environment.State.playersum现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

State(19)

dealercard(5)

playersum(5)

as_tuple(3)

load(2)

sample_status(2)

step(2)

agent_action(1)

deserialize_json(1)

play_with_ai(1)

play_with_human(1)

put(1)

reset(1)

示例#1

显示文件

文件： lfa.py 项目： zhan0903/easy21

def expand_Q(w):
    Q = np.zeros((10, 21, 2))

    for dealer in DEALER_RANGE:
        for player in PLAYER_RANGE:
            for action in ACTIONS:
                #state = (dealer, player)
                state = State()
                state.dealercard = dealer
                state.playersum = player
                feats = phi(state, action)
                Q[dealer - 1, player - 1][action] = np.sum(feats * w)

    return Q

示例#2

显示文件

def Lfa():
    lmbd = [0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]
    learning_curves = {}
    state1 = State()
    num_episodes = 2000

    with open("./Q_dump_episodes_1000000.pkl", "rb") as f:
        opt_value = pickle.load(f)

    for item in lmbd:
        state1.dealercard = random.randint(1,10)
        state1.playersum = random.randint(1,10)
        Q_value,error_history = lfa_learn(item,opt_value,num_episodes)
        learning_curves[item] = error_history

    plot_file = ("./outcome/lfa_error_{}_episodes_time_{}.pdf".format(20000,time.time()))
    plot_learning_curve(learning_curves, save=plot_file)

示例#3

显示文件

文件： sarsa.py 项目： zhan0903/easy21

def Sarsa_lamda_Control(lmbd, opt_value, num_episodes):
    #initialize
    value = np.zeros((10, 21, 2))
    counter = np.zeros((10, 21, 2))
    totalreward = 0
    error_history = []

    for episode in range(1, num_episodes + 1):
        # initialize env
        state1 = State()
        state1.dealercard = random.randint(1, 10)
        state1.playersum = random.randint(1, 10)

        E = np.zeros((10, 21, 2))
        while state1 != "terminal":
            action1 = Epsilon_greedy_policy(value, counter, state1)
            state2, reward = Step(state1, action1)
            idx1 = (state1.dealercard - 1, state1.playersum - 1, action1)
            Q1 = value[idx1]

            if state2 == "terminal":
                Q2 = 0.0
            else:
                action2 = Policy(value, counter, state2)
                idx2 = (state2.dealercard - 1, state2.playersum - 1, action2)
                Q2 = value[idx2]

            counter[idx1] += 1
            E[idx1] += 1

            alpha = 1.0 / counter[idx1]
            delta = reward + GAMMA * Q2 - Q1

            value += alpha * delta * E
            E *= GAMMA * lmbd

            state1 = state2

        error_history.append((episode, mse(value, opt_value)))

    return value, error_history

示例#4

显示文件

def Sarsa():
    lmbd = [0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0]
    #lmbd = [0.1]
    learning_curves = {}
    state1 = State()
    num_episodes = 20000

    with open("./Q_dump_episodes_1000000.pkl", "rb") as f:
        opt_value = pickle.load(f)
    for item in lmbd:
        #print("in main, item:",item)
        state1.dealercard = random.randint(1,10)
        state1.playersum = random.randint(1,10)
        #print("state in main:",state1.dealercard,state1.playersum)
        Q_value,error_history = Sarsa_lamda_Control(item,opt_value,num_episodes)
        learning_curves[item] = error_history
        #print("learning_curves:",learning_curves)


    plot_file = ("./outcome/Sarsa_error_{}_episodes_time_{}.pdf".format(20000,time.time()))
    plot_learning_curve(learning_curves, save=plot_file)

示例#5

显示文件

文件： lfa.py 项目： zhan0903/easy21

def lfa_learn(lmbd, opt_value, num_episodes):
    #initialize
    Q = np.zeros((10, 21, 2))
    counter = np.zeros((10, 21, 2))
    totalreward = 0
    error_history = []
    w = (np.random.rand(*FEATS_SHAPE) - 0.5) * 0.001

    for episode in range(1, num_episodes + 1):
        # initialize env
        state1 = State()
        state1.dealercard = random.randint(1, 10)
        state1.playersum = random.randint(1, 10)
        #state1 = (state1.dealercard,state1.playersum)
        E = np.zeros_like(w)

        while state1 != "terminal":
            Qhat1, action1 = policy(state1, w)
            state2, reward = Step(state1, action1)
            Qhat2, action2 = policy(state2, w)

            feats1 = phi(state1, action1)
            grad_w_Qhat1 = feats1

            delta = reward + GAMMA * Qhat2 - Qhat1
            E = GAMMA * lmbd * E + grad_w_Qhat1
            dw = ALPHA * delta * E

            w += dw
            state1 = state2

            Q = expand_Q(w)
            #print("in lfa while")
            error_history.append((episode, mse(Q, opt_value)))

    return Q, error_history