Python Agent.remember示例

编程语言: Python

命名空间/包名称: ai_agent

类/类型: Agent

方法/功能: remember

hotexamples.com的示例: 2

Python Agent.remember - 已找到2个示例。这些是从开源项目中提取的最受好评的ai_agent.Agent.remember现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

Agent(6)

choose_best_action(4)

experience_replay(2)

remember(2)

open_orders(1)

示例#1

显示文件

    def learn(self, data, episodes, num_features, batch_size, use_existing_model, random_action_min=0.1, random_action_decay=0.99995, num_neurons=64, future_reward_importance=0.95):
        agent              = Agent(num_features, use_existing_model, '', random_action_min, random_action_decay, num_neurons, future_reward_importance)
        l                  = len(data) - 1
        rewards_vs_episode = []
        profit_vs_episode  = []
        trades_vs_episode  = []
        epsilon_vs_episode = []
        for episode in range(1,episodes + 1):
            #print("Episode " + str(e) + "/" + str(episode_count))
            state            = self.get_state(data, num_features, num_features)
            total_profits    = 0
            total_holds      = 0
            total_buys       = 1
            total_sells      = 0
            #total_rewards    = 0
            self.open_orders = [data[0]]

            for t in range(num_features,l):

                action = agent.choose_best_action(state)#tradeoff bw predict and random
                #print(f'state={state}')
                reward, total_profits, total_holds, total_buys, total_sells = self.execute_action (action, data[t], t, total_profits, total_holds, total_buys, total_sells)

                done = True if t == l - 1 else False

                next_state = self.get_state(data, t + 1, num_features)

                print(f'row #{t} {agent.actions[action]} @{data[t]}, state1={state}, state2={next_state}, reward={reward}')
                agent.remember(state, action, reward, next_state, done)#store contents of memory in buffer for future learning
                state = next_state

                if done:
                    eps = np.round(agent.epsilon,3)
                    print(f'Episode {episode}/{episodes} Total Profit: {formatPrice(total_profits)} , Total trades: {total_buys}, probability of random action: {eps}')
                    print("---------------------------------------")
                    #rewards_vs_episode.append(total_rewards)
                    profit_vs_episode.append(np.round(total_profits,4))
                    trades_vs_episode.append(total_buys)
                    epsilon_vs_episode.append(eps)

                if len(agent.memory) > batch_size:#if memory of agent gets full:
                    agent.experience_replay(batch_size)#fit
                #clean memory ?
            # if episode % 1000 == 0:
            #     model_name = "files/output/model_ep" + str(episode)
            #     agent.model.save(model_name)
            #     print(f'{model_name} saved')


        model_name = "files/output/model_ep" + str(episodes)
        agent.model.save(model_name)
        print(f'{model_name} saved')
        return  profit_vs_episode, trades_vs_episode, epsilon_vs_episode, model_name, agent.num_trains, agent.epsilon

示例#2

显示文件

文件： dqn.py 项目： MaayanLucyYaari/py-ML-rl-trade

    def learn(self,
              data,
              n_episodes,
              n_features,
              batch_size,
              use_existing_model,
              random_action_min=0.1,
              random_action_decay=0.99995,
              n_neurons=64,
              future_reward_importance=0.95):

        agent = Agent(n_features, use_existing_model, '', random_action_min,
                      random_action_decay, n_neurons, future_reward_importance)
        l = len(data) - 1
        rewards_vs_episode = []
        profit_vs_episode = []
        trades_vs_episode = []
        epsilon_vs_episode = []
        for episode in range(1, n_episodes + 1):
            state = self.get_state(data, n_features, n_features)
            total_profits = 0
            total_holds = 0
            total_buys = 1
            total_sells = 0
            total_notvalid = 0  # add-on buys or sells without previous buy
            # total_rewards    = 0
            self.open_orders = [data[0]]

            for t in range(n_features, l):

                action = agent.choose_best_action(
                    state)  # tradeoff bw predict and random
                # print(f'state={state}')
                reward, total_profits, total_holds, total_buys, total_sells, total_notvalid = \
                    self.execute_action(action, data[t], t, total_profits, total_holds, total_buys, total_sells,
                                        total_notvalid)

                done = True if t == l - 1 else False

                next_state = self.get_state(data, t + 1, n_features)

                #if len(self.open_orders) > 0:  # if long add next state return as reward
                #print(action, agent.actions[action])
                if agent.actions[action] == 'buy':
                    immediate_reward = next_state[0][-1]
                elif agent.actions[action] == 'sell':
                    immediate_reward = -next_state[0][-1]
                else:
                    immediate_reward = 0
                #print("Immediate reward:{0:.5f} Reward:{1:.5f} Time:{2} Price:{3} Action:{4}".
                #      format(immediate_reward, reward, t, data[t], agent.actions[action]))
                #reward = reward + immediate_reward
                reward = immediate_reward

                #print(f'row #{t} {agent.actions[action]} @{data[t]}, state1={state}, state2={next_state}, reward={reward}')

                # store contents of memory in buffer for future learning
                agent.remember(state, action, reward, next_state, done)
                state = next_state

                if done:
                    # sell position at end of episode
                    reward, total_profits, total_holds, total_buys, total_sells, total_notvalid = \
                        self.execute_action(2, data[t+1], t+1, total_profits, total_holds, total_buys, total_sells,
                                            total_notvalid)
                    eps = np.round(agent.epsilon, 3)
                    print(
                        f'Episode {episode}/{n_episodes} Total Profit: {formatPrice(total_profits * 100)},'
                        f' Total hold/buy/sell/notvalid trades: {total_holds} / {total_buys} / {total_sells} / {total_notvalid},'
                        f' probability of random action: {eps}')
                    print("---------------------------------------")
                    # rewards_vs_episode.append(total_rewards)
                    profit_vs_episode.append(np.round(total_profits, 4))
                    trades_vs_episode.append(total_buys)
                    epsilon_vs_episode.append(eps)

                if len(agent.memory
                       ) >= batch_size:  # if enough recorded memory available
                    agent.experience_replay(batch_size)  # fit
                # clean memory ?

        model_name = "files/output/model_ep" + str(n_episodes)
        agent.model.save(model_name)
        print(f'{model_name} saved')
        return profit_vs_episode, trades_vs_episode, epsilon_vs_episode, model_name, agent.num_trains, agent.epsilon