def execute(self): # parameters epsilon = .5 # exploration epsilon_decay = 0.95 epsilon_min = 0.1 epoch = 4000 # is number of cycles... max_memory = 2000 # NEEDS TO BE AS BIG AS AT LEAST 1 TRADING DAY!!! batch_size = 50 # 50 sequence_length = 250 # 500 discount = 0.95 training_days = 1 testing_days = 1 features_list = list(range(1, 33)) ## FULL features_list = list(range(1, 6)) ## SHORT!! training_store = ds.DataStore(training_days=training_days, features_list=features_list, sequence_length=sequence_length) features_length = training_store.get_features_length() env = Trading(data_store=training_store, sequence_length=sequence_length, features_length=features_length) num_actions = env.get_action_count( ) # [sell, buy, flat] # get From TRADING!! #testing_store = ds.DataStore(training_days=training_days, testing_days=10, features_list=features_list, sequence_length=sequence_length) mo = Models() rms = RMSprop(lr=0.0001, rho=0.9, epsilon=1e-06) use_ufcnn = True if use_ufcnn: model = mo.model_ufcnn_concat(sequence_length=sequence_length, features=features_length, nb_filter=15, filter_length=5, output_dim=num_actions, optimizer=rms, loss='mse', batch_size=batch_size, init="normal") base_model_name = "ufcnn" else: model = mo.atari_conv_model(output_dim=num_actions, features=features_length, loss='mse', sequence_length=sequence_length, optimizer=rms, batch_size=batch_size, init="normal") base_model_name = "atari" testing_store = ds.DataStore(training_days=training_days, testing_days=testing_days, features_list=features_list, sequence_length=sequence_length, mean=training_store.mean, std=training_store.std) test_env = Trading(data_store=testing_store, sequence_length=sequence_length, features_length=features_length) #model = mo.atari_conv_model(regression=False, output_dim=num_actions, features=features_length, nb_filter=50, # loss='mse', sequence_length=sequence_length, optimizer=rms, batch_size=batch_size) # If you want to continue training from a previous model, just uncomment the line bellow #mo.load_model("ufcnn_rl_training") # Define environment/game # Initialize experience replay object start_time = time.time() best_pnl = -99999. best_rndless_pnl = -99999. exp_replay = ExperienceReplay(max_memory=max_memory, env=env, sequence_dim=(sequence_length, features_length), discount=discount) lineindex = 0 # Train for e in range(epoch): loss = 0. game_over = False total_reward = 0 win_cnt = 0 loss_cnt = 0 random_cnt = 0 no_random_cnt = 0 ### loop over days-... for i in range(training_days): input_t = env.reset() j = 0 while not game_over: # game_over ... end of trading day... input_tm1 = input_t #print("INPUT ",input_tm1) # get next action if np.random.rand() <= epsilon: action = np.random.randint(0, num_actions, size=1)[0] random_cnt += 1 #print("RANDOM") else: q = model.predict(exp_replay.resize_input(input_tm1)) action = np.argmax(q[0]) no_random_cnt += 1 #print("SELECT") ##action = np.argmax(q) # apply action, get rewards and new state input_t, reward, game_over, idays, lineindex = env.act( action) if reward > 0: win_cnt += 1 if reward < 0: loss_cnt += 1 total_reward += reward if reward > 1.: reward = 1. if reward < -1.: reward = -1. # store experience exp_replay.remember([action, reward, idays, lineindex - 1], game_over) # adapt model if j > batch_size: # do not run exp_rep if the store is empty... inputs, targets = exp_replay.get_batch( model, batch_size=batch_size) curr_loss = model.train_on_batch( exp_replay.resize_input(inputs), targets) loss += curr_loss j += 1 rndless_pnl = self.get_randomless_pnl(test_env=test_env, model=model, testing_days=testing_days) secs = time.time() - start_time print( "Epoch {:05d}/{} | Time {:7.1f} | Loss {:11.4f} | Win trades {:5d} | Loss trades {:5d} | Total PnL {:8.2f} | Rndless PnL {:8.2f} | Eps {:.4f} | Rnd: {:5d}| No Rnd: {:5d} " .format(e, epoch, secs, loss, win_cnt, loss_cnt, total_reward, rndless_pnl, epsilon, random_cnt, no_random_cnt), flush=True) if epsilon > epsilon_min: epsilon *= epsilon_decay # Save trained model weights and architecture, this will be used by the visualization code if total_reward > best_pnl: mo.save_model(model, base_model_name + "_rl_best") best_pnl = total_reward else: mo.save_model(model, base_model_name + "_rl_training") if rndless_pnl > best_pnl: mo.save_model(model, base_model_name + "_rl_rndless_best") best_rndless_pnl = rndless_pnl
def execute(self): # parameters epsilon = .5 # exploration epsilon_decay = 0.95 epsilon_min = 0.1 epoch = 4000 # is number of cycles... max_memory = 2000 # NEEDS TO BE AS BIG AS AT LEAST 1 TRADING DAY!!! batch_size = 50 # 50 sequence_length = 250 # 500 discount = 0.95 training_days = 1 testing_days = 1 features_list = list(range(1,33)) ## FULL features_list = list(range(1,6)) ## SHORT!! training_store = ds.DataStore(training_days=training_days, features_list=features_list, sequence_length=sequence_length) features_length = training_store.get_features_length() env = Trading(data_store=training_store, sequence_length=sequence_length, features_length=features_length) num_actions = env.get_action_count() # [sell, buy, flat] # get From TRADING!! #testing_store = ds.DataStore(training_days=training_days, testing_days=10, features_list=features_list, sequence_length=sequence_length) mo = Models() rms = RMSprop(lr=0.0001, rho=0.9, epsilon=1e-06) use_ufcnn=True if use_ufcnn: model = mo.model_ufcnn_concat(sequence_length=sequence_length, features=features_length, nb_filter=15, filter_length=5, output_dim=num_actions, optimizer=rms, loss='mse', batch_size = batch_size, init="normal") base_model_name = "ufcnn" else: model = mo.atari_conv_model(output_dim=num_actions, features=features_length, loss='mse', sequence_length=sequence_length, optimizer=rms, batch_size=batch_size, init="normal") base_model_name = "atari" testing_store = ds.DataStore(training_days=training_days, testing_days=testing_days, features_list=features_list, sequence_length=sequence_length, mean=training_store.mean, std=training_store.std) test_env = Trading(data_store=testing_store, sequence_length=sequence_length, features_length=features_length) #model = mo.atari_conv_model(regression=False, output_dim=num_actions, features=features_length, nb_filter=50, # loss='mse', sequence_length=sequence_length, optimizer=rms, batch_size=batch_size) # If you want to continue training from a previous model, just uncomment the line bellow #mo.load_model("ufcnn_rl_training") # Define environment/game # Initialize experience replay object start_time = time.time() best_pnl = -99999. best_rndless_pnl = -99999. exp_replay = ExperienceReplay(max_memory=max_memory, env=env, sequence_dim=(sequence_length, features_length), discount=discount) lineindex = 0 # Train for e in range(epoch): loss = 0. game_over = False total_reward = 0 win_cnt = 0 loss_cnt = 0 random_cnt = 0 no_random_cnt = 0 ### loop over days-... for i in range(training_days): input_t = env.reset() j = 0 while not game_over: # game_over ... end of trading day... input_tm1 = input_t #print("INPUT ",input_tm1) # get next action if np.random.rand() <= epsilon: action = np.random.randint(0, num_actions, size=1)[0] random_cnt += 1 #print("RANDOM") else: q = model.predict(exp_replay.resize_input(input_tm1)) action = np.argmax(q[0]) no_random_cnt += 1 #print("SELECT") ##action = np.argmax(q) # apply action, get rewards and new state input_t, reward, game_over, idays, lineindex = env.act(action) if reward > 0: win_cnt += 1 if reward < 0: loss_cnt += 1 total_reward += reward if reward > 1.: reward = 1. if reward < -1.: reward = -1. # store experience exp_replay.remember([action, reward, idays, lineindex-1], game_over) # adapt model if j > batch_size : # do not run exp_rep if the store is empty... inputs, targets = exp_replay.get_batch(model, batch_size=batch_size) curr_loss = model.train_on_batch(exp_replay.resize_input(inputs), targets) loss += curr_loss j += 1 rndless_pnl = self.get_randomless_pnl(test_env=test_env, model=model, testing_days=testing_days) secs = time.time() - start_time print("Epoch {:05d}/{} | Time {:7.1f} | Loss {:11.4f} | Win trades {:5d} | Loss trades {:5d} | Total PnL {:8.2f} | Rndless PnL {:8.2f} | Eps {:.4f} | Rnd: {:5d}| No Rnd: {:5d} ".format(e, epoch, secs, loss, win_cnt, loss_cnt, total_reward, rndless_pnl, epsilon, random_cnt, no_random_cnt), flush=True) if epsilon > epsilon_min: epsilon *= epsilon_decay # Save trained model weights and architecture, this will be used by the visualization code if total_reward > best_pnl: mo.save_model(model,base_model_name + "_rl_best") best_pnl = total_reward else: mo.save_model(model,base_model_name + "_rl_training") if rndless_pnl > best_pnl: mo.save_model(model,base_model_name + "_rl_rndless_best") best_rndless_pnl = rndless_pnl