def render(self, mode='live', **kwargs): # Render the environment to the screen if mode == 'file': self._render_to_file(kwargs.get('filename', 'render.txt')) elif mode == 'live': if self.visualization == None: self.visualization = StockTradingGraph(self.df) # if self.current_step > LOOKBACK_WINDOW_SIZE: self.visualization.render( self.current_step, self.net_worth, self.trades, window_size=LOOKBACK_WINDOW_SIZE)
def render(self, mode='live', **kwargs): # Render the environment to the screen if mode == 'file': self._render_to_file(kwargs.get('filename', 'render.txt')) elif mode == 'live': if self.visualization == None: self.visualization = StockTradingGraph( self.df, self.training_set_size, kwargs.get('title', None)) if self.current_step > self.lookback_days: self.visualization.render(self.current_step, self.net_worth, self.trades, window_size=self.lookback_days)
class StockTradingEnv(gym.Env): """A stock trading environment for OpenAI gym""" metadata = {'render.modes': ['live', 'file', 'none']} visualization = None def __init__(self, df): super(StockTradingEnv, self).__init__() self.df = self._adjust_prices(df) self.reward_range = (0, MAX_ACCOUNT_BALANCE) # Actions of the format Buy x%, Sell x%, Hold, etc. self.action_space = spaces.Box(low=np.array([0, 0]), high=np.array([3, 1]), dtype=np.float16) # Prices contains the OHCL values for the last five prices self.observation_space = spaces.Box(low=0, high=1, shape=(5, LOOKBACK_WINDOW_SIZE + 2), dtype=np.float16) def _adjust_prices(self, df): adjust_ratio = df['Adjusted_Close'] / df['Close'] df['Open'] = df['Open'] * adjust_ratio df['High'] = df['High'] * adjust_ratio df['Low'] = df['Low'] * adjust_ratio df['Close'] = df['Close'] * adjust_ratio return df def _next_observation(self): frame = np.zeros((5, LOOKBACK_WINDOW_SIZE + 1)) # Get the stock data points for the last 5 days and scale to between 0-1 np.put(frame, [0, 4], [ self.df.loc[self.current_step:self.current_step + LOOKBACK_WINDOW_SIZE, 'Open'].values / MAX_SHARE_PRICE, self.df.loc[self.current_step:self.current_step + LOOKBACK_WINDOW_SIZE, 'High'].values / MAX_SHARE_PRICE, self.df.loc[self.current_step:self.current_step + LOOKBACK_WINDOW_SIZE, 'Low'].values / MAX_SHARE_PRICE, self.df.loc[self.current_step:self.current_step + LOOKBACK_WINDOW_SIZE, 'Close'].values / MAX_SHARE_PRICE, self.df.loc[self.current_step:self.current_step + LOOKBACK_WINDOW_SIZE, 'Volume'].values / MAX_NUM_SHARES, ]) # Append additional data and scale each value to between 0-1 obs = np.append(frame, [ [self.balance / MAX_ACCOUNT_BALANCE], [self.max_net_worth / MAX_ACCOUNT_BALANCE], [self.shares_held / MAX_NUM_SHARES], [self.cost_basis / MAX_SHARE_PRICE], [self.total_sales_value / (MAX_NUM_SHARES * MAX_SHARE_PRICE)], ], axis=1) return obs def _take_action(self, action): current_price = random.uniform(self.df.loc[self.current_step, "Open"], self.df.loc[self.current_step, "Close"]) action_type = action[0] amount = action[1] if action_type < 1: # Buy amount % of balance in shares total_possible = int(self.balance / current_price) shares_bought = int(total_possible * amount) prev_cost = self.cost_basis * self.shares_held additional_cost = shares_bought * current_price self.balance -= additional_cost self.cost_basis = (prev_cost + additional_cost) / ( self.shares_held + shares_bought) self.shares_held += shares_bought if shares_bought > 0: self.trades.append({ 'step': self.current_step, 'shares': shares_bought, 'total': additional_cost, 'type': "buy" }) elif action_type < 2: # Sell amount % of shares held shares_sold = int(self.shares_held * amount) self.balance += shares_sold * current_price self.shares_held -= shares_sold self.total_shares_sold += shares_sold self.total_sales_value += shares_sold * current_price if shares_sold > 0: self.trades.append({ 'step': self.current_step, 'shares': shares_sold, 'total': shares_sold * current_price, 'type': "sell" }) self.net_worth = self.balance + self.shares_held * current_price if self.net_worth > self.max_net_worth: self.max_net_worth = self.net_worth if self.shares_held == 0: self.cost_basis = 0 def step(self, action): # Execute one time step within the environment self._take_action(action) self.current_step += 1 delay_modifier = (self.current_step / MAX_STEPS) reward = self.balance * delay_modifier + self.current_step done = self.net_worth <= 0 or self.current_step >= len( self.df.loc[:, 'Open'].values) obs = self._next_observation() return obs, reward, done, {} def reset(self): # Reset the state of the environment to an initial state self.balance = INITIAL_ACCOUNT_BALANCE self.net_worth = INITIAL_ACCOUNT_BALANCE self.max_net_worth = INITIAL_ACCOUNT_BALANCE self.shares_held = 0 self.cost_basis = 0 self.total_shares_sold = 0 self.total_sales_value = 0 self.current_step = 0 self.trades = [] return self._next_observation() def _render_to_file(self, filename='render.txt'): profit = self.net_worth - INITIAL_ACCOUNT_BALANCE file = open(filename, 'a+') file.write(f'Step: {self.current_step}\n') file.write(f'Balance: {self.balance}\n') file.write( f'Shares held: {self.shares_held} (Total sold: {self.total_shares_sold})\n' ) file.write( f'Avg cost for held shares: {self.cost_basis} (Total sales value: {self.total_sales_value})\n' ) file.write( f'Net worth: {self.net_worth} (Max net worth: {self.max_net_worth})\n' ) file.write(f'Profit: {profit}\n\n') file.close() def render(self, mode='live', **kwargs): # Render the environment to the screen if mode == 'file': self._render_to_file(kwargs.get('filename', 'render.txt')) elif mode == 'live': if self.visualization == None: self.visualization = StockTradingGraph( self.df, kwargs.get('title', None)) if self.current_step > LOOKBACK_WINDOW_SIZE: self.visualization.render(self.current_step, self.net_worth, self.trades, window_size=LOOKBACK_WINDOW_SIZE) def close(self): if self.visualization != None: self.visualization.close() self.visualization = None
class StockTradingEnv(gym.Env): """A stock trading environment for OpenAI gym""" metadata = {'render.modes': ['live', 'file', 'none']} visualization = None def __init__(self, config): # super(StockTradingEnv, self).__init__() self.df = config['df'] # self.reward_range = (0, MAX_ACCOUNT_BALANCE) self.lookback_window_size = 40 self.initial_balance = 10000 self.commission = 0.00075 self.serial = False # Actions of the format Buy x%, Sell x%, Hold, etc. # self.action_space = spaces.MultiDiscrete([3, 10]) self.action_space = spaces.Box(low=np.array([0, 0]), high=np.array([3, 1]), dtype=np.float16) # Prices contains the OHCL values for the last five prices self.observation_space = spaces.Box(low=-np.finfo(np.float32).max, high=np.finfo(np.float32).max, shape=(18, ), dtype=np.float16) # def _adjust_prices(self, df): # # adjust_ratio = df['Adjusted_Close'] / df['Close'] # df['Open'] = df['Open'] * adjust_ratio # df['High'] = df['High'] * adjust_ratio # df['Low'] = df['Low'] * adjust_ratio # df['Close'] = df['Close'] * adjust_ratio # return df def _next_observation(self): frame = np.zeros(12) # Get the stock data points for the last 5 days and scale to between 0-1 # CRITICAL POINT HERE # ================= np.put(frame, [0, 1, 2, 3, 4, 5, 6, 7, 8.9, 10, 11], [ self.df.loc[self.current_step:self.current_step + 1, 'open'].values, self.df.loc[self.current_step:self.current_step + 1, 'high'].values, self.df.loc[self.current_step:self.current_step + 1, 'low'].values, self.df.loc[self.current_step:self.current_step + 1, 'close'].values, self.df.loc[self.current_step:self.current_step + 1, 'volumefrom'].values, self.df.loc[self.current_step:self.current_step + 1, 'MOM'].values, self.df.loc[self.current_step:self.current_step + 1, 'RSI'].values, self.df.loc[self.current_step:self.current_step + 1, 'HT_DCPERIOD'].values, self.df.loc[self.current_step:self.current_step + 1, 'EMA'].values, self.df.loc[self.current_step:self.current_step + 1, 'WILLR'].values, self.df.loc[self.current_step:self.current_step + 1, 'BBANDS_upper'].values, self.df.loc[self.current_step:self.current_step + 1, 'PPO'].values, ]) # Append additional data and scale each value to between 0-1 # obs = np.append(frame, [ # [self.balance], # [self.max_net_worth], # [self.shares_held], # [self.cost_basis], # [self.total_sales_value], # ]) obs = np.append(frame, [[self.balance], [self.btc_bought], [self.btc_sold], [self.cost], [self.sales], [self.net_worth]]) # print(obs) return obs def _take_action(self, action): current_price = random.uniform(self.df.loc[self.current_step, "open"], self.df.loc[self.current_step, "close"]) self.buy_and_hold = self.initial_balance * current_price action_type = action[0] amount = action[1] # print('amount', amount) self.btc_bought = 0 self.btc_sold = 0 self.cost = 0 self.sales = 0 if action_type < 1: self.btc_bought = self.balance * current_price * amount self.cost = self.btc_bought * current_price * (1 + self.commission) self.btc_held += self.btc_bought self.balance -= self.cost # Buy amount % of balance in shares # total_possible = int(self.balance / current_price) # shares_bought = int(total_possible * amount) # prev_cost = self.cost_basis * self.shares_held # additional_cost = shares_bought * current_price # self.balance -= additional_cost # self.cost_basis = ( # prev_cost + additional_cost) / (self.shares_held + shares_bought) # self.shares_held += shares_bought # if shares_bought > 0: # self.trades.append({'step': self.current_step, # 'shares': shares_bought, 'total': additional_cost, # 'type': "buy"}) elif action_type < 2: # Sell amount % of shares held # shares_sold = int(self.shares_held * amount) # self.balance += shares_sold * current_price # self.shares_held -= shares_sold # self.total_shares_sold += shares_sold # self.total_sales_value += shares_sold * current_price self.btc_sold = self.btc_held * amount self.sales = self.btc_sold * current_price * (1 - self.commission) self.btc_held -= self.btc_sold self.balance += self.sales # if shares_sold > 0: # self.trades.append({'step': self.current_step, # 'shares': shares_sold, 'total': shares_sold * current_price, # 'type': "sell"}) if self.btc_sold > 0 or self.btc_bought > 0: self.trades.append({ 'step': self.current_step, 'amount': self.btc_sold if self.btc_sold > 0 else self.btc_bought, 'total': self.sales if self.btc_sold > 0 else self.cost, 'type': "sell" if self.btc_sold > 0 else "buy" }) self.net_worth = self.balance + self.btc_held * current_price # if self.net_worth > self.max_net_worth: # self.max_net_worth = self.net_worth # if self.shares_held == 0: # self.cost_basis = 0 # self.account_history = np.append(self.account_history, [ # [self.net_worth], # [btc_bought], # [cost], # [btc_sold], # [sales] # ]) def step(self, action): # Execute one time step within the environment self._take_action(action) self.current_step += 1 delay_modifier = (self.current_step / MAX_STEPS) # reward = self.balance * delay_modifier + self.current_step reward = self.net_worth - INITIAL_ACCOUNT_BALANCE done = self.net_worth <= 0 or self.current_step >= len( self.df.loc[:, 'open'].values) obs = self._next_observation() return obs, reward, done, {} def reset(self): # Reset the state of the environment to an initial state self.balance = INITIAL_ACCOUNT_BALANCE self.net_worth = INITIAL_ACCOUNT_BALANCE self.max_net_worth = INITIAL_ACCOUNT_BALANCE self.shares_held = 0 self.btc_held = 0 self.btc_bought = 0 self.btc_sold = 0 self.cost = 0 self.sales = 0 self.cost_basis = 0 self.total_shares_sold = 0 self.total_sales_value = 0 self.current_step = 0 # self.account_history = np.array([ # [self.net_worth], # [0], # [0], # [0], # [0] # ]) self.trades = [] return self._next_observation() def _render_to_file(self, filename='render.txt'): profit = self.net_worth - INITIAL_ACCOUNT_BALANCE file = open(filename, 'a+') file.write('Step: {}\n'.format(self.current_step)) file.write('Balance: {}\n'.format(self.balance)) file.write('Shares held: {} (Total sold: {})\n'.format( self.shares_held, self.total_shares_sold)) file.write( 'Avg cost for held shares: {} (Total sales value: {})\n'.format( self.cost_basis, self.total_sales_value)) file.write('Net worth: {} (Max net worth: {})\n'.format( self.net_worth, self.max_net_worth)) file.write('Profit: {}\n\n'.format(profit)) file.close() def render(self, mode='live', **kwargs): # Render the environment to the screen if mode == 'file': self._render_to_file(kwargs.get('filename', 'render.txt')) elif mode == 'live': if self.visualization == None: self.visualization = StockTradingGraph(self.df) if self.current_step > LOOKBACK_WINDOW_SIZE: self.visualization.render(self.current_step, self.net_worth, self.trades, window_size=LOOKBACK_WINDOW_SIZE) def close(self): if self.visualization != None: self.visualization.close() self.visualization = None
class StockTradingEnv(gym.Env): """A stock trading environment for OpenAI gym""" metadata = {'render.modes': ['live', 'file', 'none']} visualization = None def __init__(self, config): self.df = config['df'] self.render_title = config['render_title'] self.lookback_window_size = LOOKBACK_WINDOW_SIZE self.initial_balance = INITIAL_ACCOUNT_BALANCE self.commission = 0.00075 self.serial = False self.action_space = spaces.Box(low=np.array([0, 0]), high=np.array([3, 1]), dtype=np.float16) self.observation_space = spaces.Box(low=-np.finfo(np.float32).max, high=np.finfo(np.float32).max, shape=(18, ), dtype=np.float16) def _next_observation(self): frame = np.zeros(12) np.put(frame, [0, 1, 2, 3, 4, 5, 6, 7, 8.9, 10, 11], [ self.df.loc[self.current_step:self.current_step + 1, 'open'].values, self.df.loc[self.current_step:self.current_step + 1, 'high'].values, self.df.loc[self.current_step:self.current_step + 1, 'low'].values, self.df.loc[self.current_step:self.current_step + 1, 'close'].values, self.df.loc[self.current_step:self.current_step + 1, 'volumefrom'].values, self.df.loc[self.current_step:self.current_step + 1, 'MOM'].values, self.df.loc[self.current_step:self.current_step + 1, 'RSI'].values, self.df.loc[self.current_step:self.current_step + 1, 'HT_DCPERIOD'].values, self.df.loc[self.current_step:self.current_step + 1, 'EMA'].values, self.df.loc[self.current_step:self.current_step + 1, 'WILLR'].values, self.df.loc[self.current_step:self.current_step + 1, 'BBANDS_upper'].values, self.df.loc[self.current_step:self.current_step + 1, 'PPO'].values, ]) obs = np.append(frame, [[self.balance], [self.btc_bought], [self.btc_sold], [self.cost], [self.sales], [self.net_worth]]) return obs def _take_action(self, action): current_price = random.uniform(self.df.loc[self.current_step, "open"], self.df.loc[self.current_step, "close"]) action_type = action[0] amount = action[1] self.btc_bought = 0 self.btc_sold = 0 self.cost = 0 self.sales = 0 if action_type < 1: self.btc_bought = self.balance * amount / current_price self.cost = self.btc_bought * current_price * (1 + self.commission) self.shares_held += self.btc_bought self.balance -= self.cost elif action_type < 2: self.btc_sold = self.shares_held * amount self.sales = self.btc_sold * current_price * (1 - self.commission) self.shares_held -= self.btc_sold self.balance += self.sales if self.btc_sold > 0 or self.btc_bought > 0: self.trades.append({ 'step': self.current_step, 'amount': self.btc_sold if self.btc_sold > 0 else self.btc_bought, 'total': self.sales if self.btc_sold > 0 else self.cost, 'type': "sell" if self.btc_sold > 0 else "buy" }) self.net_worth = self.balance + self.shares_held * current_price self.buy_and_hold = self.initial_bought * current_price def step(self, action): # Execute one time step within the environment self._take_action(action) self.current_step += 1 net_worth_and_buyhold_mean = (self.net_worth + self.buy_and_hold) / 2 reward = (self.net_worth - self.buy_and_hold) / net_worth_and_buyhold_mean # print('\nnet',self.net_worth,'buyandhold',self.buy_and_hold,'btc_bought',self.btc_bought,'balance',self.balance,'shares_held',self.shares_held,'balance on sold',self.balance,'reward', reward) done = self.net_worth <= 0 or self.balance <= 0 or self.current_step >= len( self.df.loc[:, 'open'].values) obs = self._next_observation() return obs, reward, done, {} def reset(self): # Reset the state of the environment to an initial state self.balance = INITIAL_ACCOUNT_BALANCE self.net_worth = INITIAL_ACCOUNT_BALANCE self.shares_held = 0 self.btc_bought = 0 self.btc_sold = 0 self.cost = 0 self.sales = 0 self.current_step = 0 self.first_price = self.df.loc[0, "close"] self.initial_bought = self.initial_balance / self.first_price self.trades = [] return self._next_observation() def _render_to_file(self, filename='render.txt'): profit = self.net_worth - INITIAL_ACCOUNT_BALANCE file = open(filename, 'a+') file.write('Step: {}\n'.format(self.current_step)) file.write('Balance: {}\n'.format(self.balance)) file.write('Shares held: {}\n'.format(self.shares_held)) file.write('Avg cost for held shares: {}\n'.format(self.cost)) file.write('Net worth: {}\n'.format(self.net_worth)) file.write('Buy and hold strategy: {}\n'.format(self.buy_and_hold)) file.write('Profit: {}\n\n'.format(profit)) file.close() def render(self, mode='live', **kwargs): # Render the environment to the screen if mode == 'file': self._render_to_file(kwargs.get('filename', 'render.txt')) elif mode == 'live': if self.visualization == None: self.visualization = StockTradingGraph(self.df, self.render_title) if self.current_step > LOOKBACK_WINDOW_SIZE: self.visualization.render(self.current_step, self.net_worth, self.buy_and_hold, self.trades, window_size=LOOKBACK_WINDOW_SIZE) def close(self): if self.visualization != None: self.visualization.close() self.visualization = None