Exemplos de getOptimalAction em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: mcar_sarsa_semigrad_TileSutton

Método / Função: getOptimalAction

Exemplos em hotexamples.com: 2

getOptimalAction em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de mcar_sarsa_semigrad_TileSutton.getOptimalAction em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Exemplo n.º 1

0

Exibir arquivo

def get_feature_half_planes(state_feature_map, env, start_state, valueFunction, horizon, min_margin = 0, discount = 1.0, threshold = 0.0001): #run rollouts counting up the features for each possible action opt_action = getOptimalAction(start_state[0], start_state[1], valueFunction) half_plane_normals = set() #store the constraints in a set #get the feature counts from taking the optimal action #print("taking opt", opt_action) states_visited, opt_reward = run_rollout(env, start_state, opt_action, valueFunction, render=False) #print(states_visited) #compute feature counts opt_fcounts = compute_feature_counts(state_feature_map, states_visited, discount) #print(opt_fcounts) best_margin = 0 #take other actions for init_action in range(env.action_space.n): if init_action != opt_action: #print("init action", init_action) states_visited, cum_reward = run_rollout(env, start_state, init_action, valueFunction, render=False) #make sure that opt_reward is no worse than cum_reward from other actions if opt_reward - cum_reward < 0: return set() #return nothing if opt_action isn't really optimal #print(states_visited) #compute feature counts fcounts = compute_feature_counts(state_feature_map, states_visited, discount) #print(fcounts) #compute half-plane normal vector normal = opt_fcounts - fcounts if np.linalg.norm(normal, np.inf) > best_margin: best_margin = np.linalg.norm(normal, np.inf) #check if close to zero non_zero = False for i in range(len(normal)): if np.abs(normal[i]) > threshold: non_zero = True else: normal[i] = 0.0 #truncate the normal vector if less than threshold if non_zero: #normalize normal vector normal = normal / np.linalg.norm(normal) half_plane_normals.add(tuple(normal)) if best_margin < min_margin: return set() else: return half_plane_normals

Exemplo n.º 2

0

Exibir arquivo

def get_feature_half_planes(state_feature_map, env, start_state, valueFunction, horizon, discount=1.0): #run rollouts counting up the features for each possible action for init_action in range(env.action_space.n): print("init action", init_action) print("opt", getOptimalAction(start_state[0], start_state[1], valueFunction)) states_visited = run_rollout(env, start_state, init_action, valueFunction, render=True) #print(states_visited) #compute feature counts fcounts = compute_feature_counts(state_feature_map, states_visited, discount) print(fcounts)