Exemplo n.º 1
0
trans_cost = 0  # The cost to do a single transaction (expressed in pips)
batch_size = 30  # The number of tuple (state, action, reward, next_state) to save before replay
stop_loss_value = -50  # The maximum loss that we can handle (expressed in pips)
performance_file_path = "performance/train_performance.txt"  # Path where to store the training performance log file
log = "performance/train_log.txt"  # Path where to store the training log file
models_path = "models/"  # Path where are stored the models
n_prev_iterations = len(next(os.walk(
    models_path))[2])  # Get the number of existent models in the models_path
setup(seed_value=7)
# ********************************* Creating the Agent Model and the Environment Model *********************************
env = Environment(ds_path=ds_path,
                  window_size=window_size,
                  pip_pos=pip_pos,
                  stop_loss=stop_loss_value,
                  trans_cost=trans_cost)
actions = env.get_actions()  # Getting the available action of the environment
agent = Agent(env.get_state_size(), env.get_actions_n())

if os.path.exists(
        performance_file_path
):  # Checking if there are previous training performances saved
    os.remove(performance_file_path)  # Deleting the old train performances
if os.path.exists(
        log):  # Checking if there are previous training performances saved
    os.remove(log)  # Deleting the old train performances

print(dt.now())
print("stop loss:", stop_loss_value)
print("pc: BH")
# ********************************************* Looping over all Episodes ***************-******************************
for ep in range(n_episodes - n_prev_iterations):
Exemplo n.º 2
0
def evaluate(model_name):
    time_start = dt.now()

    model = load_model(model_name)  # Load the NN-agent model
    state_size = model.layers[0].input.shape.as_list()[
        1]  # Load the state size from the model
    window_size = int(state_size / 2)
    env = Environment(ds_path=ds_path,
                      window_size=window_size,
                      pip_pos=pip_pos,
                      stop_loss=stop_loss_value,
                      trans_cost=trans_cost)
    actions = env.get_actions(
    )  # Getting the available actions of the environment
    actions_size = env.get_actions_n(
    )  # Getting the number of the actions available into the environment

    agent = Agent(state_size=state_size,
                  action_size=actions_size,
                  is_eval=True,
                  model_name=model_name)

    state, reward = env.step(
        "Hold")  # Making a first neutral action for get the first state
    total_revenue = 0

    while not env.done:  # Loop until we finish all the instances

        action = agent.act(
            state)  # The agent choose an action based on the current state
        next_state, reward = env.step(
            actions[action]
        )  # Getting the next state and reward based on the action choose
        #with open(log, "a+") as file:
        #file.write(str(actions[action]) + "\n")  # Saving the performance on a file
        #if env.stop_loss_triggered:
        #file.write("Stop Loss Triggered!" + "\n")  # Saving the stop loss taken on a file
        #file.write(str(reward) + "\n")  # Saving the performance on a file
        '''print(colored("Observation:", 'blue'), state)
		print(colored("Action:", 'yellow'), actions[action])
		if env.stop_loss_triggered:  # Alert when we got a stop loss from the environment
			print(colored('Stop loss triggered!', 'red'))
		print(colored("Next Observation:", 'blue'), next_state)
		print(colored("Reward:", 'cyan'), reward)'''

        total_revenue += reward

        #agent.memory.append((state, action, reward, next_state))  # Saving the experience
        state = next_state

        #if len(agent.memory) > batch_size:  # Making an analysis based on our experience
        #	agent.exp_replay(batch_size)

    # ***************************** Showing and Saving the Results over a Single Episode *******************************
    #print("-----------------------------------------------------------------------------------------------------------")
    if total_revenue > 0:
        print(colored("Total Profit: ", 'blue'),
              colored(str(round(total_revenue, 1)), 'cyan'), "pips")
    else:
        print(colored("Total Profit: ", 'blue'),
              colored(str(round(total_revenue, 1)), 'red'), "pips")
    with open(performance_file_path, "a+") as file:
        file.write(str(round(total_revenue, 1)) +
                   "\n")  # Saving the performance on a file
    time_stop = dt.now()
    print(colored("Execution time for this episode:", 'yellow'),
          round((time_stop - time_start).total_seconds(), 0), "seconds")
Exemplo n.º 3
0
def main():
    """
    Sets the parameters for the Environment, Critic, and Actor according to the imported config file.
    Creates an environment where a predefined number of episodes can be performed.
    Instantiates an actor to keep track of the policy, and a critic to keep track of the value at each state
    Runs a predefined number of episodes creating a new board for each episode.
    For each episode, the actor and the critic are updated according to the Actor-Critic model.
    Finally, epsilon is set to zero, and the environment plays a game with the updated policy.
    """

    env = Environment(env_cfg)
    granularity = env_cfg["granularity"]
    critic = Critic(critic_cfg, granularity)
    actor = Actor(actor_cfg)

    episodes = training_cfg["number_of_episodes"]
    visualize_episodes = training_cfg["visualize_episodes"]
    steps_per_episode = []

    for episode in tqdm(range(episodes),
                        desc=f"Playing {episodes} episodes",
                        colour='#39ff14'):
        env.new_simulation()
        path = []
        positions = []
        critic.reset_eli_dict()
        actor.reset_eli_dict()
        while not env.reached_top() and not env.reached_max_steps():
            env.update_steps()
            current_state = copy(env.get_state())
            legal_actions = env.get_actions()
            action = actor.get_action(state=current_state,
                                      legal_actions=legal_actions)
            path.append((str(current_state), str(action)))
            reward = env.perform_action(action=action)

            td_err = critic.compute_td_err(current_state=current_state,
                                           next_state=env.get_state(),
                                           reward=reward)

            # Previous states on the path are updated as well during the call to train() by eligibility traces
            critic.train(state=current_state, td_error=td_err)
            critic.update_eligs()

            # Update actor beliefs on SAPs for all pairs seen thus far in the episode
            for i, sap in enumerate(reversed(path)):
                actor.update_eli_dict(state=str(sap[0]),
                                      action=str(sap[1]),
                                      i=i)
                actor.update_policy_dict(state=str(sap[0]),
                                         action=str(sap[1]),
                                         td_err=td_err)

            positions.append(env.get_position())

        print("steps used in this episode", env.steps)
        if episode in visualize_episodes:
            env.visualize_landscape(positions)
        steps_per_episode.append(env.steps)

    plot_learning(steps_per_episode)

    # Enable history tracking to visualize final simulation
    env.new_simulation()

    print(f"Actor final epsilon: {actor.epsilon}")
    actor.epsilon = 0  # Set exploration to 0
    print("Attempting final simulation to show you how smart I am now")
    while not env.reached_top() and not env.reached_max_steps():
        current_state = env.get_state()
        legal_actions = env.get_actions()
        action = actor.get_action(current_state, legal_actions)
        env.perform_action(action)