Python BitcoinTradingEnv.reset示例

编程语言: Python

命名空间/包名称: env.BitcoinTradingEnv

方法/功能: reset

hotexamples.com的示例: 3

Python BitcoinTradingEnv.reset - 已找到3个示例。这些是从开源项目中提取的最受好评的env.BitcoinTradingEnv.BitcoinTradingEnv.reset现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

BitcoinTradingEnv(17)

render(3)

reset(3)

step(3)

_get_hold_profit(2)

_get_profit(2)

_get_current_price(1)

示例#1

显示文件

model = MODEL(c_in=test_env.observation_space.shape[0],
                   c_out=test_env.action_space.n,
                   seq_len=test_env.observation_space.shape[1])
model = model.to(device)

agent = PPO(model=model, memory=memory, config=config, device=device)

if os.path.exists('./save/model.m5'):
    agent.model.load_state_dict(torch.load('./save/model.m5'))

avg_t = 0
avg_r = 0

for epi in range(1, n_episodes + 1):
    print("episode {} start!".format(epi))
    obs = test_env.reset()
    done = False

    while not done:
        t = 0
        action_list = []

        while t < T_horizon:
            prob = agent.model.pi(torch.FloatTensor(obs).unsqueeze(0).to(device))
            action = Categorical(prob).sample().item()
            obs_prime, reward, done, _ = test_env.step(action)

            if reward is None:
                continue

            action_list.append(action)

示例#2

显示文件

文件： train.py 项目： denguir/deep-trading-bot

    #device = torch.device('cpu')
    print(f"Device used: {device}")

    policy_net = PolicyNetwork(input_dim, output_dim1, output_dim2, hidden_dim, n_layers=lstm_layers)
    # Loading the best model
    model_name = 'model/state_dict3.pt'
    policy_net.load_state_dict(torch.load(model_name))
    policy_net.to(device)

    max_episode_num = 1_000
    all_rewards = [0]
    avg_rewards = [0]

    policy_net.train() # to tell the model how to treat dropout (train: uses dropout, eval: do not use dropout)
    for episode in range(max_episode_num):
        state = train_env.reset()
        log_probs = []
        rewards = []
        profits = []
        hold_profits = []

        for steps in range(train_env.steps_left):
            if steps % 500 ==0:
                train_env.render()

            action, log_prob = policy_net.get_action(state, device)
            new_state, reward, done, _ = train_env.step(action)

            log_probs.append(log_prob)
            rewards.append(reward)
            profits.append(train_env._get_profit())

示例#3

显示文件

    print(f"Device used: {device}")

    policy_net = PolicyNetwork(input_dim,
                               output_dim1,
                               output_dim2,
                               hidden_dim,
                               n_layers=lstm_layers)
    # Loading the best model
    model_name = 'model/state_dict2.pt'
    policy_net.load_state_dict(torch.load(model_name))
    policy_net.to(device)

    policy_net.eval(
    )  # to tell the model how to treat dropout (train: uses dropout, eval: do not use dropout)
    state = test_env.reset()
    log_probs = []
    rewards = []
    profits = []
    hold_profits = []

    for steps in range(test_env.steps_left):
        if steps % 500 == 0:
            test_env.render()

        with torch.no_grad():
            action, log_prob = policy_net.get_action(state, device)
            new_state, reward, done, _ = test_env.step(action)

            log_probs.append(log_prob)
            rewards.append(reward)