class Environment(Common): configuration = None max_states_ = 0 data_ = None current_state_ = 0 t = 0 portfolio_ = None price_ = 0. forecast_ = 0. max_actions_ = 0 done_ = False reward_ = 0 new_state_: int = 0 stop_loss_alert: bool = False def __init__(self, configuration): np.random.seed(1) self.configuration = configuration self.display = self.configuration.display self.states = SCombiner(self.configuration.states_list) self.read_market_data(self.configuration._data_path) self.init_environment(creation_time=True) def init_environment(self, creation_time): """ Initialize the portfolio by updating market price according to the current timeslot 't', creating a new object, and updating the internal state of the environment accordingly. :return: The initial state. """ self.update_market_price() self.portfolio_ = Portfolio(self.configuration, self.price_, self.forecast_) if creation_time is not True: self.display.report(self.portfolio_, t=0, disp_header=True) return self.update_state() def reset(self): """ Reset all internal states :return: """ self.done_ = False self.t = 0 del self.portfolio_ self.configuration.results.drop(self.configuration.results.index, inplace=True) return self.init_environment(creation_time=False) def read_market_data(self, path): """ Reads the simulation data. :param path: :return: """ self.data_ = pd.read_csv(path) self.max_states_ = self.data_.shape[0] def update_market_price(self): """ Set the price to the current time slot, reading column 0 from DF """ assert self.data_ is not None, 'Price series data has not been read yet' self.price_ = self.data_.iloc[self.t, 0] self.forecast_ = self.data_.iloc[self.t, 1] @staticmethod def decide_next_action(state, strategy): return strategy[state] def update_state(self): """ Determine the state of my portfolio value :return: New state """ # Iterate through the list of states defined in the parameters file # and call the update_state() static method in them. new_substates = [] for module_param_name in self.configuration._state.keys(): # The extended classes are defined in the params file and must # start with the 'state_' string. # The '[1:]' serves to remove the leading underscore. module_name = 'state_' + module_param_name[1:] module = importlib.import_module(module_name) state_class = getattr(module, module_name) new_substate = state_class.update_state(self.portfolio_) new_substates.append(new_substate) # Get the ID resulting from the combination of the sub-states self.current_state_ = self.states.get_id(*new_substates) return self.current_state_ def step(self, action): """ Send an action to my Environment. :param action: the action. :return: state, reward, done and iter count. """ assert action < self.configuration._num_actions, \ 'Action ID must be between 0 and {}'.format( self.configuration._num_actions) # Call to the proper portfolio method, based on the action number # passed to this argument. self.reward_ = getattr(self.portfolio_, self.configuration._action_name[action])() # If I'm in stop loss situation, rewards gets a different value self.reward_ = self.fix_reward(self.configuration._action_name[action]) self.display.report_reward(self.reward_, self.states.name(self.current_state_)) self.t += 1 if self.t >= self.max_states_: self.done_ = True self.display.report(self.portfolio_, self.t - 1, disp_footer=True) self.portfolio_.reset_history() return self.new_state_, self.reward_, self.done_, self.t self.update_market_price() self.portfolio_.update(self.price_, self.forecast_) self.new_state_ = self.update_state() self.display.report(self.portfolio_, self.t) self.portfolio_.append_to_history(self) return self.new_state_, self.reward_, self.done_, self.t def fix_reward(self, action_name: str) -> int: """ Reward cannot be the same under stop loss alarm. :param action_name: the name of the action determined. :return: the new reward value, given that we might be under stop loss """ if self.stop_loss is not True: return self.reward_ # Fix the reward if I try to buy and it is not a failed attempt cause # I've no money to buy. if action_name == 'buy' and \ self.portfolio_.latest_price > self.portfolio_.budget: return self.configuration._environment._reward_stoploss_buy # Fix the reward if I'm trying to sell and I DO have shares to sell elif action_name == 'sell' and self.portfolio_.shares > 0.: return self.configuration._environment._reward_stoploss_sell else: return self.configuration._environment._reward_stoploss_donothing @property def stop_loss(self) -> bool: """ Determine if we're under stop loss alarm condition. It is based on the net value of my investment at current moment in time. The parameter can be expressed as a percentage or actual value. :return: True or False """ net_value = self.portfolio_.portfolio_value - self.portfolio_.investment stop_loss = self.portfolio_.configuration._environment._stop_loss if net_value == 0.: return False if stop_loss < 1.0: # percentage of initial budget if (net_value / self.portfolio_.initial_budget) < 0.0 and \ fabs( net_value / self.portfolio_.initial_budget) >= stop_loss: value = True else: value = False else: # actual value if net_value < stop_loss: value = True else: value = False return value