示例#1
0
def main(filename):
    env = BTgymEnv(filename=filename,
                   state_shape={'raw': spaces.Box(low=-100, high=100, shape=(30, 4))},
                   skip_frame=5,
                   start_cash=100000,
                   broker_commission=0.02,
                   fixed_stake=100,
                   connect_timeout=180,
                   drawdown_call=90,
                   render_state_as_image=True,
                   render_ylabel='Price Lines',
                   render_size_episode=(12, 8),
                   render_size_human=(8, 3.5),
                   render_size_state=(10, 3.5),
                   render_dpi=75,
                   multiprocessing=1,
                   port=5000,
                   data_port=4999,
                   verbose=0, )

    env.reset()  # <=== CORRECTED HERE: fake reset() tells data_master to start data_server_process

    gamma = 0.9
    epsilon = .95

    trials = 10
    trial_len = 1000

    # updateTargetNetwork = 1000
    dqn_agent = DQN(env=env)
    steps = []
    for trial in range(trials):
        # dqn_agent.model= load_model("./model.model")
        cur_state = np.array(list(env.reset().items())[0][1])
        cur_state = np.reshape(cur_state, (30, 4, 1))
        for step in range(trial_len):
            action = dqn_agent.act(cur_state)
            new_state, reward, done, _ = env.step(action)
            reward = reward * 10 if not done else -10
            new_state = list(new_state.items())[0][1]
            new_state = np.reshape(new_state, (30, 4, 1))
            dqn_agent.target_train()  # iterates target model

            cur_state = new_state
            if done:
                break

        print("Completed trial #{} ".format(trial))
        dqn_agent.render_all_modes(env)
        dqn_agent.save_model("model.model".format(trial))
示例#2
0
def main():
    env = BTgymEnv(
        filename='./data/DAT_ASCII_EURUSD_M1_2016.csv',
        state_shape={
            'raw_state': spaces.Box(low=-100, high=100, shape=(4, 4))
        },
        skip_frame=5,
        start_cash=100000,
        broker_commission=0.02,
        fixed_stake=100,
        drawdown_call=90,
        render_ylabel='Price Lines',
        render_size_episode=(12, 8),
        render_size_human=(8, 3.5),
        render_size_state=(10, 3.5),
        render_dpi=75,
        verbose=0,
    )
    gamma = 0.9
    epsilon = .95

    trials = 100
    trial_len = 1000

    # updateTargetNetwork = 1000
    dqn_agent = DQN(env=env)
    steps = []
    for trial in range(trials):
        #dqn_agent.model= load_model("./model.model")
        cur_state = np.array(list(env.reset().items())[0][1])
        cur_state = np.reshape(cur_state, (4, 4, 1))
        for step in range(trial_len):
            action = dqn_agent.act(cur_state)
            new_state, reward, done, _ = env.step(action)
            reward = reward * 10 if not done else -10
            new_state = list(new_state.items())[0][1]
            new_state = np.reshape(new_state, (4, 4, 1))
            dqn_agent.target_train()  # iterates target model

            cur_state = new_state
            if done:
                break
        print("Completed trial #{} ".format(trial))
        dqn_agent.render_all_modes(env)
        dqn_agent.save_model("model.model".format(trial))
示例#3
0
    def testInitial(self):
        with open("log.txt", "a") as myfile:
            env = BTgymEnv(
                filename='./btgym/examples/data/DAT_ASCII_EURUSD_M1_2016.csv',
                start_cash=100,
                broker_commission=0.0001,
                leverage=10.0,
                fixed_stake=10)

            done = False

            o = env.reset()

            while not done:
                action = env.action_space.sample()
                obs, reward, done, info = env.step(action)
                myfile.write('action: {},reward: {},info: {}\n'.format(
                    action, reward, info))

        env.close()
示例#4
0
文件: ksp2.py 项目: aaron8tang/btgym
    render_size_episode=(12,8),
    render_size_human=(8, 3.5),
    render_size_state=(10, 3.5),
    render_dpi=75,
    verbose=0,
)
'''

env = BTgymEnv(
    filename='./data/sh601318.csv',
    start_weekdays={0, 1, 2, 3, 4},
    episode_duration={
        'days': 90,
        'hours': 0,
        'minutes': 0
    },
    # Want to start every episode at the begiining of the day:
    start_00=True,
    time_gap={'days': 1},
    start_cash=100,
    render_ylabel='Price Lines',
    render_size_episode=(12, 8),
    render_size_human=(8, 3.5),
    render_size_state=(10, 3.5),
    render_dpi=75,
    verbose=1,
)

o = env.reset()
take_some_steps(env, 10000)
render_all_modes(env)
class wrapper_btgym_env(gym.Env):
    def __init__(self):

        self.MyCerebro = bt.Cerebro()

        self.MyCerebro.addstrategy(
            DevStrat_4_11,
            start_cash=2000,  # initial broker cash
            commission=0.0001,  # commisssion to imitate spread
            leverage=10.0,
            order_size=2000,  # fixed stake, mind leverage
            drawdown_call=10,  # max % to loose, in percent of initial cash
            target_call=10,  # max % to win, same
            skip_frame=10,
            gamma=0.99,
            reward_scale=7,  # gardient`s nitrox, touch with care!
            state_ext_scale=np.linspace(3e3, 1e3, num=5))

        self.MyCerebro.addobserver(Reward)
        self.MyCerebro.addobserver(Position)
        self.MyCerebro.addobserver(NormPnL)

        self.MyDataset = BTgymDataset(
            filename=
            "/Users/bluecharles/Desktop/data/DAT_ASCII_EURUSD_M1_2016.csv",
            start_weekdays=[0, 1, 2, 3, 4],
            start_00=True,
            episode_duration={
                'days': 0,
                'hours': 23,
                'minutes': 55
            },
            time_gap={'hours': 5},
        )

        self._env = BTgymEnv(
            dataset=self.MyDataset,
            engine=self.MyCerebro,
            port=5555,
            render_enabled=False,
            verbose=0,
        )
        time_dim = 30
        avg_period = 20

        self.observation_space = spaces.Dict({
            'external':
            spaces.Box(low=-100,
                       high=100,
                       shape=(time_dim, 1, 5),
                       dtype=np.float32),
            'internal':
            spaces.Box(low=-2,
                       high=2,
                       shape=(avg_period, 1, 6),
                       dtype=np.float32),
            'metadata':
            spaces.Dict({
                'type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'trial_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'trial_type':
                spaces.Box(shape=(), low=0, high=1, dtype=np.uint32),
                'sample_num':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'first_row':
                spaces.Box(shape=(), low=0, high=10**10, dtype=np.uint32),
                'timestamp':
                spaces.Box(shape=(),
                           low=0,
                           high=np.finfo(np.float64).max,
                           dtype=np.float64),
            })
        })

        self.action_space = spaces.Discrete(4)

    def reset(self):
        self.obs = self._env.reset()
        return self.obs

    def step(self, action):
        self.a = self._env.step(action)
        return self.a
示例#6
0
class Forex:
    def __init__(self, dir):
        self.dir = dir
        self.config = json.load(open('{}/config.json'.format(dir)))
        self.sample_batch_size = 32
        self.episodes = 10000
        self.time_steps = 30
        self.features = 8
        self.input_shape = (30, 4)
        self.monitor = Monitor(dir)
        self.env = BTgymEnv(
            filename=self.config['data'],
            episode_duration={
                'days': 1,
                'hours': 0,
                'minutes': 0
            },
            strategy=MyStrategy,
            start_00=True,
            start_cash=self.config['capital'],
            broker_commission=self.config['commission'],
            fixed_stake=self.config['stake'],
            #drawdown_call=50,
            state_shape={
                'raw_state':
                spaces.Box(low=1, high=2, shape=self.input_shape),
                'indicator_states':
                spaces.Box(low=-1, high=10, shape=self.input_shape)
            },
            port=5006,
            data_port=4804,
            verbose=0,
        )
        self.state_size = self.env.observation_space.shape['raw_state'][
            0] + self.env.observation_space.shape['indicator_states'][0]
        self.action_size = self.env.action_space.n
        self.agent = Agent(self.state_size, self.action_size, dir)
        print("Engine:{}".format(self.env.engine.broker.getcash()))

    def run(self):
        #path = dict([(0, 2),(54, 3),(72, 2),(83, 3),(84, 1),(125, 3),(126, 2),(156, 3),(157, 1),(171, 3),(179, 1),(188, 3),(189, 2),(204, 3),(205, 1),(295, 3),(307, 2),(316, 3),(363, 2),(390, 3),(391, 1),(476, 3),(477, 2),(484, 3),(485, 1),(574, 3)])
        try:
            episodes = 0
            for index_episode in range(self.config['episodes'] -
                                       self.config['trained_episodes']):
                state = self.env.reset()
                # state = np.reshape(state['raw_state'], [4, self.state_size])
                state = self.getFullState(state)
                #print("State:{}".format(state))
                state = np.reshape(state, (1, self.time_steps, 8))
                #print("Re State:{}".format(state))
                # np.concatenate((state['raw_state'],state['indicator_states']),axis=1)
                done = False
                index = 0
                final_cash = 0
                message = ""

                count = 0
                negativeReward = 0
                positiveReward = 0
                mapper = {
                    0: 0,
                    1: 0,
                    2: 0,
                    3: 0,
                }
                while not done:
                    # self.env.render()
                    action = self.agent.act(state)
                    next_state, reward, done, info = self.env.step(action)
                    mapper[action] += 1
                    if reward > 0:
                        positiveReward += reward
                    else:
                        negativeReward += reward
                    # next_state = np.reshape(next_state, [1, self.state_size])
                    # next_state = np.array(next_state['raw_state'])
                    # print("Shape1 :{} Shape2 :{}".format(next_state['raw_state'].shape,next_state['indicator_states'].shape))

                    final_cash = info[0]['broker_cash']
                    message = info[0]['broker_message']
                    next_state = self.getFullState(next_state)
                    next_state = np.reshape(next_state,
                                            (1, self.time_steps, 8))
                    #print("Action:{} Reward:{} Done:{}".format(action, reward,done))
                    self.agent.remember(state, action, reward, next_state,
                                        done)
                    state = next_state
                    index += 1
                    self.config['steps'] += info[0]['step']
                    self.monitor.logstep({
                        "reward":
                        reward,
                        "drawdown":
                        info[0]['drawdown'],
                        'broker_value':
                        info[0]['broker_value'],
                        "steps":
                        self.config['steps']
                    })
                    if self.config['steps'] % 100 == 0:
                        self.monitor.logimage(
                            feed_dict={
                                'human': self.env.render('human')[None, :],
                            },
                            global_step=self.config['steps'],
                        )
                    # print('action: {},reward: {},info: {}\n'.format(action, reward, info))
                episodes += 1
                episode_stat = self.env.get_stat()
                self.monitor.logepisode({
                    "reward":
                    reward,
                    "cpu_time_sec":
                    episode_stat['runtime'].total_seconds(),
                    "global_step":
                    self.config['trained_episodes'] + episodes,
                    'broker_value':
                    info[0]['broker_value'],
                    "episode":
                    self.env.render('episode')[None, :]
                })
                if "CLOSE, END OF DATA" == message:
                    print("\x1b[6;30;42m{}\t{} {} \t{} {}\t\t{}\x1b[0m".format(
                        time.strftime("%H:%M:%S"), index + 1, positiveReward,
                        int(final_cash), mapper, message))
                else:
                    if positiveReward > 0:
                        print("{}\t{} {} \t{} {}\t\t{}".format(
                            time.strftime("%H:%M:%S"), index + 1,
                            positiveReward, int(final_cash), mapper, message))
                    #print("{} {} \t{} {}\t\t{}".format(index + 1,positiveReward,int(final_cash),mapper,message))
                self.agent.replay(self.sample_batch_size)
        finally:
            self.config['trained_episodes'] += episodes
            self.agent.save_model()
            self.monitor.close()
            with open('{}/config.json'.format(self.dir), 'w') as outfile:
                json.dump(self.config, outfile)

    def getFullState(self, state):
        return np.concatenate((state['raw_state'], state['indicator_states']),
                              axis=1)

    def testInitial(self):
        with open("log.txt", "a") as myfile:
            env = BTgymEnv(
                filename='./btgym/examples/data/DAT_ASCII_EURUSD_M1_2016.csv',
                start_cash=100,
                broker_commission=0.0001,
                leverage=10.0,
                fixed_stake=10)

            done = False

            o = env.reset()

            while not done:
                action = env.action_space.sample()
                obs, reward, done, info = env.step(action)
                myfile.write('action: {},reward: {},info: {}\n'.format(
                    action, reward, info))

        env.close()

    def test(self):
        state = self.env.reset()
        # state = np.reshape(state['raw_state'], [4, self.state_size])
        state = self.getFullState(state)
        state = np.reshape(state, (1, self.time_steps, 8))
        done = False
        index = 0
        final_cash = 0
        message = ""
        while not done:
            #self.env.render()
            action = self.agent.act(state)
            next_state, reward, done, info = self.env.step(action)
            final_cash = info[0]['broker_cash']
            message = info[0]['broker_message']
            # next_state = np.reshape(next_state, [1, self.state_size])
            # next_state = np.array(next_state['raw_state'])
            # print("Shape1 :{} Shape2 :{}".format(next_state['raw_state'].shape,next_state['indicator_states'].shape))
            next_state = self.getFullState(next_state)
            next_state = np.reshape(next_state, (1, self.time_steps, 8))
            state = next_state
            index += 1
        print("\x1b[6;30;42m{}\t{} \t{} \t\t{}\x1b[0m".format(
            time.strftime("%H:%M:%S"), index + 1, final_cash, message))