def __init__(self, expert_a: IExpert, expert_b: IExpert, load_trained_model: bool = True, train_while_trading: bool = False, color: str = 'black', name: str = 'dql_trader', ): """ Constructor Args: expert_a: Expert for stock A expert_b: Expert for stock B load_trained_model: Flag to trigger loading an already trained neural network train_while_trading: Flag to trigger on-the-fly training while trading """ # Save experts, training mode and name super().__init__(color, name) assert expert_a is not None and expert_b is not None self.expert_a = expert_a self.expert_b = expert_b self.train_while_trading = train_while_trading # Parameters for neural network self.state_size = 2 self.action_size = 4 self.hidden_size = 50 # Parameters for deep Q-learning self.learning_rate = 0.001 self.epsilon = 1.0 self.epsilon_decay = 0.90 self.epsilon_min = 0.01 self.batch_size = 64 self.min_size_of_memory_before_training = 1000 # should be way bigger than batch_size, but smaller than memory self.memory = deque(maxlen=2000) # Attributes necessary to remember our last actions and fill our memory with experiences self.last_state = None self.last_action_a = None self.last_action_b = None self.last_portfolio_value = None self.last_price_a = None self.last_price_b = None # Create main model, either as trained model (from file) or as untrained model (from scratch) self.model = None if load_trained_model: self.model = load_keras_sequential(self.RELATIVE_DATA_DIRECTORY, self.get_name()) logger.info(f"DQL Trader: Loaded trained model") if self.model is None: # loading failed or we didn't want to use a trained model self.model = Sequential() self.model.add(Dense(self.hidden_size * 2, input_dim=self.state_size, activation='relu')) self.model.add(Dense(self.hidden_size, activation='relu')) self.model.add(Dense(self.action_size, activation='linear')) logger.info(f"DQL Trader: Created new untrained model") assert self.model is not None self.model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate)) self.vote_map = { Vote.SELL : 0, Vote.HOLD : 1, Vote.BUY : 2, } self.vote_map_inverse = {v: k for k, v in self.vote_map.items()}
def __init__(self, expert_a: IExpert, expert_b: IExpert, load_trained_model: bool = True, train_while_trading: bool = False, color: str = 'black', name: str = 'dql_trader', other_data_directory: typing.Optional[str] = None, plot_name: typing.Optional[str] = None): """ Constructor Args: expert_a: Expert for stock A expert_b: Expert for stock B load_trained_model: Flag to trigger loading an already trained neural network train_while_trading: Flag to trigger on-the-fly training while trading other_data_directory: relative directory path from project root to .json and .h5 files of model plot_name: name to use in a plot for this trader, overriding parameter "name" (which also determines path where the model gets saved) """ # Save experts, training mode and name super().__init__(color, name) assert expert_a is not None and expert_b is not None self.experts = [expert_a, expert_b] self.train_while_trading = train_while_trading self.other_data_directory = other_data_directory self.plot_name = plot_name # Parameters for neural network self.state_size = State.get_number_of_input_neurons() self.action_size = Action.get_number_of_output_neurons() self.hidden_size = 30 # Parameters for deep Q-learning self.learning_rate = 0.001 # default for Adam: 0.001 self.epsilon = 0.9995 # determines how quickly epsilon decreases to epsilon_min self.random_action_min_probability = 0.01 # minimum probability of choosing a random action self.train_each_n_days = 128 # how many trading days pass between each training self.batch_size = 128 # how many experience samples from memory to train on each training occasion self.min_size_of_memory_before_training = 1000 # should be way bigger than batch_size, but smaller than memory self.memory: typing.Deque[Experience] = deque(maxlen=2000) # discount factor: how quickly we expect an action to pay off # (0 -> only evaluate immediate effect on portfolio value on next day; # near 1 -> also include development of portfolio value on future days, where each day has a weight of # discount_factor ^ #days_ahead; higher values allow to factor in payoffs at later time while also # making it harder to attribute the effects to the actions from a concrete day) self.discount_factor = 0 assert 0 <= self.discount_factor < 1.0 # min_horizon: number of more recent experiences required until discount_factor ^ #days_ahead drops to 1% if 0 < self.discount_factor < 1: self.min_horizon = min(100, math.ceil(math.log(0.01, self.discount_factor))) else: self.min_horizon = 1 self.q_value_cap = 1.0 # limit Q-value to this magnitude; 0.0 to deactivate self.is_evolved_model = False # set to True when loading model from file self.days_passed = 0 self.training_occasions = 0 # Create main model, either as trained model (from file) or as untrained model (from scratch) self.model = None if load_trained_model: data_dir = self.RELATIVE_DATA_DIRECTORY if self.other_data_directory is None else self.other_data_directory self.model = load_keras_sequential(data_dir, 'dql_trader') # logger.info(f"DQL Trader: Loaded trained model") self.is_evolved_model = True if self.model is None: # loading failed or we didn't want to use a trained model self.model = Sequential() # original code: # self.model.add(Dense(self.hidden_size * 2, input_dim=self.state_size, activation='relu')) # self.model.add(Dense(self.hidden_size, activation='relu')) # self.model.add(Dense(self.action_size, activation='linear')) # modified code: # relu -> elu: avoid "dead nodes" problem of relu # lecun_normal: initialization from gaussian accounting for number of nodes as well # initialize bias with zeros self.model.add(Dense(self.hidden_size * 2, input_dim=self.state_size, activation='elu', kernel_initializer='lecun_normal', bias_initializer='zeros')) self.model.add(Dense(self.hidden_size, activation='elu', kernel_initializer='lecun_normal', bias_initializer='zeros')) self.model.add(Dense(self.action_size, activation='linear', kernel_initializer='lecun_normal', bias_initializer='zeros')) # logger.info(f"DQL Trader: Created new untrained model") assert self.model is not None # use one of the following solvers: self.model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))