def use_dataset(self, mode, no_kill=False): """Fetches, transforms, and stores the portion of data you'll be working with (ie, 80% train data, 20% test data, or the live database). Make sure to call this before reset()! """ before_time = time.time() self.mode = mode self.no_kill = no_kill if mode in (Mode.LIVE, Mode.TEST_LIVE): self.conn = data.engine_live.connect() # Work with 6000 timesteps up until the present (play w/ diff numbers, depends on LSTM) # Offset=0 data.py currently pulls recent-to-oldest, then reverses rampup = int( 3e4 ) # 6000 # FIXME temporarily using big number to build up Scaler (since it's not saved) limit, offset = ( rampup, 0 ) # if not self.conv2d else (self.hypers.step_window + 1, 0) df, self.last_timestamp = data.db_to_dataframe( self.conn, limit=limit, offset=offset, arbitrage=self.hypers.arbitrage, last_timestamp=True) # save away for now so we can keep transforming it as we add new data (find a more efficient way) self.df = df else: self.row_ct = data.count_rows(self.conn, arbitrage=self.hypers.arbitrage) split = .9 # Using 90% training data. n_train, n_test = int(self.row_ct * split), int(self.row_ct * (1 - split)) limit, offset = (n_test, n_train) if mode == mode.TEST else (n_train, 0) df = data.db_to_dataframe(self.conn, limit=limit, offset=offset, arbitrage=self.hypers.arbitrage) self.observations, self.prices = self._xform_data(df) self.prices_diff = self._diff(self.prices, percent=True) after_time = round(time.time() - before_time)
def use_dataset(self, mode, full_set=False): """Fetches, transforms, and stores the portion of data you'll be working with (ie, 80% train data, 20% test data, or the live database). Make sure to call this before reset()! """ self.mode = mode if mode in (Mode.LIVE, Mode.TEST_LIVE): self.conn = data.engine_live.connect() # Work with 6000 timesteps up until the present (play w/ diff numbers, depends on LSTM) # Offset=0 data.py currently pulls recent-to-oldest, then reverses rampup = int( 1e5 ) # 6000 # FIXME temporarily using big number to build up Scaler (since it's not saved) limit, offset = ( rampup, 0 ) # if not self.conv2d else (self.hypers.step_window + 1, 0) df, self.last_timestamp = data.db_to_dataframe( self.conn, limit=limit, offset=offset, arbitrage=self.hypers.arbitrage, last_timestamp=True) # save away for now so we can keep transforming it as we add new data (find a more efficient way) self.df = df else: row_ct = data.count_rows(self.conn, arbitrage=self.hypers.arbitrage) split = .9 # Using 90% training data. n_train, n_test = int(row_ct * split), int(row_ct * (1 - split)) if mode == mode.TEST: offset = n_train limit = 40000 if full_set else 10000 # should be `n_test` in full_set, getting idx errors else: # Grab a random window from the 90% training data. The random bit is important so the agent # sees a variety of data. The window-size bit is a hack: as long as the agent doesn't die (doesn't cause # `terminal=True`), PPO's MemoryModel can keep filling up until it crashes TensorFlow. This ensures # there's a stopping point (limit). I'd rather see how far he can get w/o dying, figure out a solution. limit = self.EPISODE_LEN offset_start = 0 if not self.conv2d else self.hypers.step_window + 1 offset = random.randint(offset_start, n_train - self.EPISODE_LEN) self.offset, self.limit = offset, limit self.prices = self.all_prices[offset:offset + limit] self.prices_diff = self.all_prices_diff[offset:offset + limit]
def __init__(self, hypers, cli_args={}): """Initialize hyperparameters (done here instead of __init__ since OpenAI-Gym controls instantiation)""" self.hypers = h = Box(hypers) self.conv2d = self.hypers['net.type'] == 'conv2d' self.cli_args = cli_args # cash/val start @ about $3.5k each. You should increase/decrease depending on how much you'll put into your # exchange accounts to trade with. Presumably the agent will learn to work with what you've got (cash/value # are state inputs); but starting capital does effect the learning process. self.start_cash, self.start_value = .4, .4 # We have these "accumulator" objects, which collect values over steps, over episodes, etc. Easier to keep # same-named variables separate this way. self.acc = Box( episode=dict( i=0, total_steps=0, sharpes=[], returns=[], uniques=[], ), step=dict(), # setup in reset() tests=dict(i=0, n_tests=0)) self.mode = Mode.TRAIN self.conn = data.engine.connect() # gdax min order size = .01btc; kraken = .002btc self.min_trade = {Exchange.GDAX: .01, Exchange.KRAKEN: .002}[EXCHANGE] self.update_btc_price() # Our data is too high-dimensional for the way MemoryModel handles batched episodes. Reduce it (don't like this) all_data = data.db_to_dataframe(self.conn, arbitrage=h.arbitrage) self.all_observations, self.all_prices = self.xform_data(all_data) self.all_prices_diff = self.diff(self.all_prices, True) # Action space if h.action_type == 'single': # In single_action we discard any vals b/w [-min_trade, +min_trade] and call it "hold" (in execute()) self.actions_ = dict(type='float', shape=(), min_value=-1., max_value=1.) elif h.action_type == 'multi': # In multi-modal, hold is an actual action (in which case we discard "amount") self.actions_ = dict(action=dict(type='int', shape=(), num_actions=3), amount=dict(type='float', shape=(), min_value=0., max_value=1.)) # Observation space stationary_ct = 2 self.cols_ = self.all_observations.shape[1] self.states_ = dict( series=dict( type='float', shape=self.cols_), # all state values that are time-ish stationary=dict( type='float', shape=stationary_ct) # everything that doesn't care about time ) if self.conv2d: # width = step-window (150 time-steps) # height = nothing (1) # channels = features/inputs (price actions, OHCLV, etc). self.states_['series']['shape'] = (h.step_window, 1, self.cols_)