Exemplos de str_key em Python, exemplos de utils.str_key em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: mdp.py Projeto: MonaHe123/coding-exercise

def update_V_without_pi(MDP, V):
    S, _, _, _, _ = MDP
    V_prime = V.copy()
    for s in S:
        #这里用新的计算新的，应该更快收敛，但是一轮一轮更新也是可以的
        #V_prime[ste_key(s)] = compute_v_from_max_q(MDP,V,s)
        V_prime[str_key(s)] = compute_v_from_max_q(MDP, V_prime, s)
    return V_prime

Exemplo n.º 2

0

Exibir arquivo

def update_V_without_pi(MDP, V):
    '''
    Update the state value directly through the value of the subsequent state without relying on the strategy
    '''
    S, _, _, _, _ = MDP
    V_prime = V.copy()
    for s in S:
        V_prime[str_key(s)] = compute_v_from_max_q(MDP, V_prime, s)
    return V_prime

Exemplo n.º 3

0

Exibir arquivo

def update_V(MDP, V, Pi):
    '''
    Given an MDP and a strategy, update the value function V under the strategy
    '''
    S, _, _, _, _ = MDP
    V_prime = V.copy()
    for s in S:
        #set_value(V_prime,s,compute_v(MDP, V, Pi, s))
        V_prime[str_key(s)] = compute_v(MDP, V_prime, Pi, s)
    return V_prime

Exemplo n.º 4

0

Exibir arquivo

Arquivo: MDP.py Projeto: yangqinjiang/reinforcement-learning

def update_V_without_pi(MDP, V):
    '''在不依赖策略的情况下， 直接通过后续状态的价值来更新状态价值

    '''
    S, _, _, _, _ = MDP
    V_prime = V.copy()
    for s in S:
        V_prime[str_key(s)] = compute_v_from_max_q(MDP, V_prime, s)

    return V_prime

Exemplo n.º 5

0

Exibir arquivo

Arquivo: test.py Projeto: 15779235038/algorithm

def update_V(MDP, V, Pi):
    '''给定一个MDP和一个策略，更新该策略下的价值函数V
    '''
    S, _, _, _, _ = MDP
    V_prime = V.copy()
    print('对所有的状态，计算其V，需要计算状态，有5个状态')
    for s in S:  #对所有状态，
        # set_value(V_prime, s, V_S(MDP, V_prime, Pi, s))
        V_prime[str_key(s)] = compute_v(MDP, V_prime, Pi, s)
    return V_prime

Exemplo n.º 6

0

Exibir arquivo

Arquivo: mdp.py Projeto: MonaHe123/coding-exercise

def update_v(MDP, V, Pi):
    S, _, _, _, _ = MDP
    #这里的动态规划的实现是用新的计算新的
    V_prime = V.copy()
    for s in S:
        #用新的更新新的值
        V_prime[str_key(s)] = compute_v(MDP, V_prime, Pi, s)
        #一轮一轮地更新，最基本的动态规划
        #V[str_key(s)] = compute_v(MDP,V_prime,Pi,s)
    return V_prime

Exemplo n.º 7

0

Exibir arquivo

Arquivo: MDP.py Projeto: yangqinjiang/reinforcement-learning

def update_V(MDP, V, Pi):
    '''
    根据当前策略使用回溯法来更新状态价值， 本章不做要求
    给定一个MDP和一个策略， 更新该策略下的价值函数V
    '''
    S, _, _, _, _ = MDP
    V_prime = V.copy()
    for s in S:
        # set_value(V_prime,s,V_S(MDP,V_prime,Pi,s))
        V_prime[str_key(s)] = compute_v(MDP, V_prime, Pi, s)

    return V_prime

Exemplo n.º 8

0

Exibir arquivo

 def get_state_name(self,dealer):
     return str_key(self.get_state(dealer))

Exemplo n.º 9

0

Exibir arquivo

 def get_state_name(self, dealer):
     '''返回状态的key'''
     return str_key(self.get_state(dealer))