Exemplos de Environment.get_actions em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: environment.environment

Classe / Tipo: Environment

Método / Função: get_actions

Exemplos em hotexamples.com: 3

Environment.get_actions em Python - 3 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de environment.environment.Environment.get_actions em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Métodos Frequentes

Exibir Ocultar

create_environment(30)

Environment(30)

get_action_size(30)

reset(23)

observation_size(12)

get_objective_size(10)

set_cluster_size(8)

set_observation_rotation_size(4)

__init__(3)

get_obs_size(3)

render(3)

robotStep(3)

run(3)

get(3)

get_actions(3)

set_mode(3)

outer_env(2)

getObservation(2)

define(2)

get_state(2)

get_actions_n(2)

get_observation_size_buffer(2)

start(2)

obs(2)

setRunTimeStat(1)

get_valid_actions(1)

initialize_memory(1)

set_wall(1)

set_time_step(1)

new_simulation(1)

set_snake(1)

reached_top(1)

perform_action(1)

reached_max_steps(1)

run_human_game(1)

relations(1)

set_scene(1)

set_primary_agent(1)

reward(1)

set_log_dir(1)

set_fruit(1)

get_state_size(1)

get_observation_size(1)

get_scenes(1)

getRunTimeStat(1)

add_obstacle(1)

adjust_learning_rate_by_stage(1)

agents(1)

assign(1)

close(1)

Métodos Frequentes

create_environment (30)

Environment (30)

get_action_size (30)

reset (23)

observation_size (12)

get_objective_size (10)

set_cluster_size (8)

set_observation_rotation_size (4)

__init__ (3)

get_obs_size (3)

Métodos Frequentes

render (3)

robotStep (3)

run (3)

get (3)

get_actions (3)

set_mode (3)

outer_env (2)

getObservation (2)

define (2)

get_state (2)

get_actions_n (2)

get_observation_size_buffer (2)

start (2)

obs (2)

setRunTimeStat (1)

get_valid_actions (1)

initialize_memory (1)

set_wall (1)

set_time_step (1)

new_simulation (1)

Métodos Frequentes

get_actions_n (2)

get_observation_size_buffer (2)

start (2)

obs (2)

setRunTimeStat (1)

get_valid_actions (1)

initialize_memory (1)

set_wall (1)

set_time_step (1)

new_simulation (1)

set_snake (1)

reached_top (1)

perform_action (1)

reached_max_steps (1)

run_human_game (1)

relations (1)

set_scene (1)

set_primary_agent (1)

reward (1)

set_log_dir (1)

set_fruit (1)

get_state_size (1)

get_observation_size (1)

get_scenes (1)

getRunTimeStat (1)

add_obstacle (1)

adjust_learning_rate_by_stage (1)

agents (1)

assign (1)

close (1)

Métodos Frequentes

set_snake (1)

reached_top (1)

perform_action (1)

reached_max_steps (1)

run_human_game (1)

relations (1)

set_scene (1)

set_primary_agent (1)

reward (1)

set_log_dir (1)

set_fruit (1)

get_state_size (1)

get_observation_size (1)

get_scenes (1)

getRunTimeStat (1)

add_obstacle (1)

adjust_learning_rate_by_stage (1)

agents (1)

assign (1)

close (1)

connect_client (1)

create (1)

create_agent (1)

done (1)

draw (1)

eat_fruit_if_possible (1)

evaluate (1)

execute (1)

get_active_player (1)

get_possible_actions (1)

get_affordances (1)

get_agents (1)

get_cur_img (1)

get_entities (1)

get_eye_dir (1)

get_intentions (1)

get_max_time_step (1)

get_n_instances (1)

get_nearest_actor (1)

get_new_nodes (1)

Exemplo n.º 1

0

Exibir arquivo

trans_cost = 0 # The cost to do a single transaction (expressed in pips) batch_size = 30 # The number of tuple (state, action, reward, next_state) to save before replay stop_loss_value = -50 # The maximum loss that we can handle (expressed in pips) performance_file_path = "performance/train_performance.txt" # Path where to store the training performance log file log = "performance/train_log.txt" # Path where to store the training log file models_path = "models/" # Path where are stored the models n_prev_iterations = len(next(os.walk( models_path))[2]) # Get the number of existent models in the models_path setup(seed_value=7) # ********************************* Creating the Agent Model and the Environment Model ********************************* env = Environment(ds_path=ds_path, window_size=window_size, pip_pos=pip_pos, stop_loss=stop_loss_value, trans_cost=trans_cost) actions = env.get_actions() # Getting the available action of the environment agent = Agent(env.get_state_size(), env.get_actions_n()) if os.path.exists( performance_file_path ): # Checking if there are previous training performances saved os.remove(performance_file_path) # Deleting the old train performances if os.path.exists( log): # Checking if there are previous training performances saved os.remove(log) # Deleting the old train performances print(dt.now()) print("stop loss:", stop_loss_value) print("pc: BH") # ********************************************* Looping over all Episodes ***************-****************************** for ep in range(n_episodes - n_prev_iterations):

Exemplo n.º 2

0

Exibir arquivo

Arquivo: testing.py Projeto: Dracy88/Reinforcement-Learning

def evaluate(model_name): time_start = dt.now() model = load_model(model_name) # Load the NN-agent model state_size = model.layers[0].input.shape.as_list()[ 1] # Load the state size from the model window_size = int(state_size / 2) env = Environment(ds_path=ds_path, window_size=window_size, pip_pos=pip_pos, stop_loss=stop_loss_value, trans_cost=trans_cost) actions = env.get_actions( ) # Getting the available actions of the environment actions_size = env.get_actions_n( ) # Getting the number of the actions available into the environment agent = Agent(state_size=state_size, action_size=actions_size, is_eval=True, model_name=model_name) state, reward = env.step( "Hold") # Making a first neutral action for get the first state total_revenue = 0 while not env.done: # Loop until we finish all the instances action = agent.act( state) # The agent choose an action based on the current state next_state, reward = env.step( actions[action] ) # Getting the next state and reward based on the action choose #with open(log, "a+") as file: #file.write(str(actions[action]) + "\n") # Saving the performance on a file #if env.stop_loss_triggered: #file.write("Stop Loss Triggered!" + "\n") # Saving the stop loss taken on a file #file.write(str(reward) + "\n") # Saving the performance on a file '''print(colored("Observation:", 'blue'), state) print(colored("Action:", 'yellow'), actions[action]) if env.stop_loss_triggered: # Alert when we got a stop loss from the environment print(colored('Stop loss triggered!', 'red')) print(colored("Next Observation:", 'blue'), next_state) print(colored("Reward:", 'cyan'), reward)''' total_revenue += reward #agent.memory.append((state, action, reward, next_state)) # Saving the experience state = next_state #if len(agent.memory) > batch_size: # Making an analysis based on our experience # agent.exp_replay(batch_size) # ***************************** Showing and Saving the Results over a Single Episode ******************************* #print("-----------------------------------------------------------------------------------------------------------") if total_revenue > 0: print(colored("Total Profit: ", 'blue'), colored(str(round(total_revenue, 1)), 'cyan'), "pips") else: print(colored("Total Profit: ", 'blue'), colored(str(round(total_revenue, 1)), 'red'), "pips") with open(performance_file_path, "a+") as file: file.write(str(round(total_revenue, 1)) + "\n") # Saving the performance on a file time_stop = dt.now() print(colored("Execution time for this episode:", 'yellow'), round((time_stop - time_start).total_seconds(), 0), "seconds")

Exemplo n.º 3

0

Exibir arquivo

Arquivo: main.py Projeto: fredrikwaaler/AiProg3

def main(): """ Sets the parameters for the Environment, Critic, and Actor according to the imported config file. Creates an environment where a predefined number of episodes can be performed. Instantiates an actor to keep track of the policy, and a critic to keep track of the value at each state Runs a predefined number of episodes creating a new board for each episode. For each episode, the actor and the critic are updated according to the Actor-Critic model. Finally, epsilon is set to zero, and the environment plays a game with the updated policy. """ env = Environment(env_cfg) granularity = env_cfg["granularity"] critic = Critic(critic_cfg, granularity) actor = Actor(actor_cfg) episodes = training_cfg["number_of_episodes"] visualize_episodes = training_cfg["visualize_episodes"] steps_per_episode = [] for episode in tqdm(range(episodes), desc=f"Playing {episodes} episodes", colour='#39ff14'): env.new_simulation() path = [] positions = [] critic.reset_eli_dict() actor.reset_eli_dict() while not env.reached_top() and not env.reached_max_steps(): env.update_steps() current_state = copy(env.get_state()) legal_actions = env.get_actions() action = actor.get_action(state=current_state, legal_actions=legal_actions) path.append((str(current_state), str(action))) reward = env.perform_action(action=action) td_err = critic.compute_td_err(current_state=current_state, next_state=env.get_state(), reward=reward) # Previous states on the path are updated as well during the call to train() by eligibility traces critic.train(state=current_state, td_error=td_err) critic.update_eligs() # Update actor beliefs on SAPs for all pairs seen thus far in the episode for i, sap in enumerate(reversed(path)): actor.update_eli_dict(state=str(sap[0]), action=str(sap[1]), i=i) actor.update_policy_dict(state=str(sap[0]), action=str(sap[1]), td_err=td_err) positions.append(env.get_position()) print("steps used in this episode", env.steps) if episode in visualize_episodes: env.visualize_landscape(positions) steps_per_episode.append(env.steps) plot_learning(steps_per_episode) # Enable history tracking to visualize final simulation env.new_simulation() print(f"Actor final epsilon: {actor.epsilon}") actor.epsilon = 0 # Set exploration to 0 print("Attempting final simulation to show you how smart I am now") while not env.reached_top() and not env.reached_max_steps(): current_state = env.get_state() legal_actions = env.get_actions() action = actor.get_action(current_state, legal_actions) env.perform_action(action)