def main(): env = MarketEnv() env.set_period('2001-01', '2015-12') session_name = 'model3' weights_predictor = WeightsPredictor(env) price_predictor = PricePredictor(env, lookback_window=30) # train_price_predictor(env, 250, session_name, price_predictor) # train_weights_predictor(env, 50, session_name, price_predictor, weights_predictor) backtest(env, session_name, price_predictor, weights_predictor)
def get_test_performance(epoch, modelFilename='model.h5', model=None): import codecs codeListFilename = 'input_code.csv' codeMap = {} f = codecs.open(codeListFilename, "r", "utf-8") for line in f: if line.strip() != "": tokens = line.strip().split( ",") if not "\t" in line else line.strip().split("\t") codeMap[tokens[0]] = tokens[1] f.close() env = MarketEnv(dir_path = "./If_index/", target_codes = \ codeMap.keys(), start_date = "2015-05-29", \ end_date = "2016-08-25", sudden_death = -1.0) target_close = env.get_close() from keras.optimizers import SGD if (model == None and modelFilename == 'model_dqn.h5'): model = MarketModelBuilder(modelFilename).getModel() elif (model == None and modelFilename == 'model_pg.h5'): model = MarketPolicyGradientModelBuilder(modelFilename).getModel() loss = 0. game_over = False # get initial input input_t = env.reset() cumReward = 0 cum_profit = {} pre_action = {} while not game_over: input_tm1 = input_t q = model.predict(input_tm1) action = np.argmax(q[0]) input_t, reward, game_over, info = env.step(action) cumReward += reward cum_profit[info["dt"]] = cumReward if env.actions[action] == "LONG" or env.actions[action] == "SHORT": pre_action[info['dt']] = env.actions[action] color = bcolors.FAIL if env.actions[ action] == "LONG" else bcolors.OKBLUE print "%s:\t%s\t%d\t%.2f\t%.2f\t" % (info["dt"], color + env.actions[action] + \ bcolors.ENDC, info['correct_action'], cumReward, info["cum"]) + ("\t".join(["%s:%.2f" % (l, i)\ for l, i in zip(env.actions, q[0].tolist())]) ) print len(cum_profit.keys()), len(target_close) plot_profit(cum_profit, target_close, pre_action, "test_" + str(epoch)) return cum_profit, target_close
def make_env(wrap=True): env = MarketEnv(dir_path=args.market_data, target_codes=codeMap.keys(), input_codes=[], start_date='2010-08-25', end_date='2015-08-25', sudden_death=-1.0) return env
def train_price_predictor(env: MarketEnv, epochs, name, predictor): print('Training price predictor:\n' ' session name: {}\n' ' training data period: {} - {}\n' ' number of epochs: {}'.format(name, env.period_start, env.period_end, epochs)) if predictor.try_load(name): print('Successfully loaded session data') else: print('Could not load session data') env.reset() steps = [] while env.should_continue(): lookback_prices = env.get_returns_from_n_days(predictor.lookback_window) if not env.in_last_month(): steps.append((lookback_prices, env.next_mtm_returns())) env.step() predictor.train(steps, epochs) predictor.save(name) print('Successfully saved session data')
def pg_train(): import sys import codecs codeListFilename = sys.argv[1] modelFilename = sys.argv[2] if len(sys.argv) > 2 else None historyFilename = sys.argv[3] if len(sys.argv) > 3 else None codeMap = {} f = codecs.open(codeListFilename, "r", "utf-8") for line in f: if line.strip() != "": tokens = line.strip().split(",") if not "\t" in line else line.strip().split("\t") codeMap[tokens[0]] = tokens[1] f.close() env = MarketEnv(dir_path = "./If_index/", target_codes = codeMap.keys(), start_date = "2013-05-26", end_date = "2015-08-25", sudden_death = -1.0) pg = PolicyGradient(env, discount = 0.9, model_filename = modelFilename, history_filename = historyFilename) pg.train(verbose = 2)
def init_env1(): dir_path = os.path.join(local_path, 'tmp', 'market_env') util.mkdirp(dir_path) df = pd.DataFrame( data={ 'date': [10000 + i for i in range(1, 21)], 'open': [1.0 + 0.1 * i for i in range(1, 21)], 'high': [1.0 + 0.1 * i for i in range(1, 21)], 'low': [1.0 + 0.1 * i for i in range(1, 21)], 'close': [1.0 + 0.1 * i for i in range(1, 21)], 'volume': [1000 + 10 * i for i in range(1, 21)], }) df = df[['date', 'open', 'high', 'low', 'close', 'volume']] df.to_csv(os.path.join(dir_path, 'SYM1.csv'), index=False) env = MarketEnv(dir_path=dir_path, codes=['SYM1'], target_date_start='10015', target_date_end='10020', scope=4) return env
def exploreFolder(folder): files = [] for (dirpath, dirnames, filenames) in walk(folder): for f in filenames: files.append(f.replace(".csv", "")) break return files if __name__ == "__main__": targetCodes = exploreFolder('sample_data') for t in targetCodes: print t env = MarketEnv(dir_path="./sample_data/", target_codes=targetCodes, start_date="2015-03-13", scope=60, end_date="2017-08-11", sudden_death_rate=0.7) pg = DeepQ(env, current_discount=0.66, future_discount=0.80, model_file_name="./model/600197.model") pg.train(verbose=1)
# import codecs #keras.layers.Convolution2D() historyFilename = None modelFilename = None # 'model_1.h5' # None name = 'SP500_' codeList = [] for i in range(1, 10): codeList.append(name + str(i)) # codeList = ['SP500'] # ['DJI','SP500','NASDAQ','005380','005930','005935','012330','015760','028260','032830','035420'] # '090430' '000660' ['DJI','SP500'] env = MarketEnv(dir_path="/home/mercy/notebook/sample_data/", target_codes=codeList, input_codes=[], start_date="2009-01-03", end_date="2016-01-03", sudden_death=-1) pg = PolicyGradient(env, discount=0.9, model_filename=modelFilename, history_filename=historyFilename ) # start_date="2009-01-03", end_date="2016-01-03" pg.train(max_episode=50, threshold=0.5, verbose=0) #%% import numpy as np import pandas as pd import seaborn as sn from matplotlib import pyplot as plt
import sys import codecs codeListFilename = sys.argv[1] modelFilename = sys.argv[2] if len(sys.argv) > 2 else None historyFilename = sys.argv[3] if len(sys.argv) > 3 else None codeMap = {} f = codecs.open(codeListFilename, "r", "utf-8") for line in f: if line.strip() != "": tokens = line.strip().split( ",") if not "\t" in line else line.strip().split("\t") codeMap[tokens[0]] = tokens[1] f.close() env = MarketEnv(dir_path="./data/", target_codes=codeMap.keys(), input_codes=[], start_date="2010-08-25", end_date="2015-08-25", sudden_death=-1.0) pg = PolicyGradient(env, discount=0.9, model_filename=modelFilename, history_filename=historyFilename) pg.train(verbose=1)
def train(): import sys import codecs codeListFilename = sys.argv[1] modelFilename = sys.argv[2] if len(sys.argv) > 2 else None codeMap = {} f = codecs.open(codeListFilename, "r", "utf-8") for line in f: if line.strip() != "": tokens = line.strip().split( ",") if not "\t" in line else line.strip().split("\t") codeMap[tokens[0]] = tokens[1] f.close() env = MarketEnv(dir_path = "./If_index/", target_codes = \ codeMap.keys(), start_date = "2013-05-26", \ end_date = "2015-08-25", sudden_death = -1.0) target_close = env.get_close() # parameters epsilon = .45 # exploration min_epsilon = 0.1 epoch = 1000 max_memory = 5000 batch_size = 256 discount = 0.9 update_target_weight_step = 10 from keras.optimizers import SGD model = MarketModelBuilder(modelFilename).getModel() target_model = MarketModelBuilder(modelFilename).getModel() sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='mse', optimizer='rmsprop') target_model.compile(loss='mse', optimizer='rmsprop') # Initialize experience replay object exp_replay = ExperienceReplay(max_memory=max_memory, discount=discount) # Train win_cnt = 0 for e in range(epoch): loss = 0. env.reset() game_over = False # get initial input input_t = env.reset() cumReward = 0 cum_profit = {} pre_action = {} iter_cnt = 0 while not game_over: input_tm1 = input_t isRandom = False # get next action if np.random.rand() <= epsilon: action = np.random.randint(0, env.action_space.n, size=1)[0] isRandom = True else: q = model.predict(input_tm1) action = np.argmax(q[0]) if np.nan in q: print "OCCUR NaN!!!" exit() # apply action, get rewards and new state input_t, reward, game_over, info = env.step(action) cumReward += reward cum_profit[info["dt"]] = cumReward if env.actions[action] == "LONG" or env.actions[action] == "SHORT": color = bcolors.FAIL if env.actions[ action] == "LONG" else bcolors.OKBLUE pre_action[info['dt']] = env.actions[action] if isRandom: color = bcolors.WARNING if env.actions[ action] == "LONG" else bcolors.OKGREEN print "%s:\t%s\t%d\t%.5f\t%.2f\t" % (info["dt"], color + env.actions[action] + \ bcolors.ENDC, info['correct_action'], cumReward, info["cum"]) + ("\t".join(["%s:%.2f" % (l, i)\ for l, i in zip(env.actions, q[0].tolist())]) if isRandom == False else "") # store experience exp_replay.remember([input_tm1, action, reward, input_t], game_over) # adapt model if (len(exp_replay.memory) >= batch_size): inputs, targets = exp_replay.get_batch(target_model, batch_size=batch_size) loss += model.train_on_batch(inputs, targets) if (iter_cnt % update_target_weight_step == 0 ): # update target estimator every 5 step print "update target model weights" target_model = copy_model_weight(model, target_model) iter_cnt += 1 if cumReward > 0 and game_over: win_cnt += 1 test_util.plot_profit(cum_profit, target_close, pre_action, "dqn_train_" + str(e)) test_util.get_test_performance(e, 'model_dqn.h5', model) print("Epoch {:03d}/{} | Loss {:.4f} | Win count {} | Epsilon {:.4f}". format(e, epoch, loss, win_cnt, epsilon)) # Save trained model weights and architecture, this will be used by the visualization code if (e / 5 == 0 and e != 0): model.save_weights( "model.h5" if modelFilename == None else modelFilename, overwrite=True) epsilon = max(min_epsilon, epsilon * 0.99)
# 创建环境 codeMap = {} codeListFilename = "./work/kospi_10.csv" f = codecs.open(codeListFilename, "r", "utf-8") for line in f: if line.strip() != "": tokens = line.strip().split( ",") if not "\t" in line else line.strip().split("\t") codeMap[tokens[0]] = tokens[1] f.close() env = MarketEnv(dir_path="./work/sample_data/", target_codes=list(codeMap.keys()), input_codes=[], start_date="2013-08-26", end_date="2015-08-25", sudden_death=-1.0) obs_dim = 60 * 2 act_dim = 2 print(env.actions) logger.info('obs_dim {}, act_dim {}'.format(obs_dim, act_dim)) model = Model(act_dim=act_dim) alg = PolicyGradient(model, lr=LEARNING_RATE) agent = Agent(alg, obs_dim=obs_dim, act_dim=act_dim) # 加载模型 # if os.path.exists('./model.ckpt'): # agent.restore('./model.ckpt')
argi += 1 codeMap = {} f = codecs.open(codeListFilename, "r", "utf-8") for line in f: if line.strip() != "": tokens = line.strip().split( ",") if not "\t" in line else line.strip().split("\t") codeMap[tokens[0]] = tokens[1] f.close() env = MarketEnv(dir_path="./data/", target_codes=list(codeMap.keys()), input_codes=[], start_date="2002-12-25", end_date="2007-12-25", sudden_death=-1.0) env_test = MarketEnv(dir_path="./data/", target_codes=list(codeMap.keys()), input_codes=[], start_date="2007-12-26", end_date="2008-12-25", sudden_death=-1.0) pg = PolicyGradient(env, env_test, discount=0.9, model_filename=modelFilename, history_filename=historyFilename) if mode == 'train': pg.train(verbose=0)
def backtest(env: MarketEnv, name, price_predictor, weights_predictor: WeightsPredictor = None): env.reset() if env.current_ind == 0: raise ValueError('Backtesting should be offset by at least one month from data start') print('Backtesting model:\n' ' session name: {}\n' ' period: {} - {}'.format(name, env.period_start, env.period_end)) if price_predictor.try_load(name): print('Successfully loaded session data') else: print('Could not load session data, aborting') return if weights_predictor is not None: if weights_predictor.try_load(name): print('Successfully loaded weights predictor session data') else: print('Could not load weights predictor session data, aborting') return weights = [] returns = [] total_reward = 1 prev_weights = None while env.should_continue(): lookback_prices = env.get_returns_from_n_days(price_predictor.lookback_window, month_offset=-1) prices_pred = price_predictor.predict(lookback_prices) log_prices_pred = np.log(1 + prices_pred) if prev_weights is None: curr_weights = np.clip(0.2 + log_prices_pred, 0, 1) elif weights_predictor is not None: curr_weights = weights_predictor.predict(prices_pred, prev_weights) weights_min = curr_weights.min() if weights_min < 0: curr_weights += -weights_min else: curr_weights = prev_weights + np.clip(log_prices_pred, -0.2, 0.2) weights_min = curr_weights.min() if weights_min < 0: curr_weights += -weights_min curr_weights /= curr_weights.sum() mtm_return = env.current_mtm_returns() r = np.dot(curr_weights, mtm_return) if prev_weights is not None: r -= env.transaction_cost(prev_weights, curr_weights) total_reward *= (1 + r) weights.append((env.current_month_timestamp(), *list(curr_weights))) returns.append((env.current_month_timestamp(end=True), r)) prev_weights = curr_weights env.step() print(total_reward) start = weights[0][0] end = weights[-1][0] date_str = '{}_{}_{}_{}'.format(start.month, start.year, end.month, end.year) weights_df = pd.DataFrame(weights, columns=('Date', *env.data.columns)) weights_df.set_index('Date', inplace=True) weights_df.to_csv(os.path.join(results_output_path(name), 'weights_{}.csv'.format(date_str))) returns_df = pd.DataFrame(returns, columns=('Date', 'Return')) returns_df.set_index('Date', inplace=True) returns_df.to_csv(os.path.join(results_output_path(name), 'returns_{}.csv'.format(date_str)))
import codecs argi = 1 codeListFilename = sys.argv[argi]; argi +=1 train_start = sys.argv[argi]; argi += 1 train_end = sys.argv[argi]; argi += 1 test_code = sys.argv[argi]; argi += 1 test_start = sys.argv[argi]; argi += 1 test_end = sys.argv[argi]; argi += 1 prefix = sys.argv[argi]; argi += 1 model_filename = "%s-%s-%s.pg.model.h5" % (codeListFilename, train_start, train_end) print("model file name : %s" % model_filename) history_filename = "%s-%s-%s.pg.history.h5" % (codeListFilename, train_start, train_end) print("history file name : %s" % history_filename) codeMap = {} f = codecs.open(codeListFilename, "r", "utf-8") for line in f: if line.strip() != "": tokens = line.strip().split(",") if not "\t" in line else line.strip().split("\t") codeMap[tokens[0]] = tokens[1] f.close() env = MarketEnv(dir_path = "./data/", target_codes = list(codeMap.keys()), input_codes = [], start_date = train_start, end_date = train_end, sudden_death = -1.0) env_test = MarketEnv(dir_path = "./data/", target_codes = list([test_code]), input_codes = [], start_date = test_start, end_date = test_end, sudden_death = -1.0) pg = PolicyGradient(env, env_test, discount = 0.9, model_filename = model_filename, history_filename = history_filename) pg.paper(test_code, filename="paper-%s-%s-%s-%s.csv" % (prefix, test_code, test_start, test_end))
import sys import codecs codeListFilename = sys.argv[1] modelFilename = sys.argv[2] if len(sys.argv) > 2 else None historyFilename = sys.argv[3] if len(sys.argv) > 3 else None codeMap = {} f = codecs.open(codeListFilename, "r", "utf-8") for line in f: if line.strip() != "": tokens = line.strip().split( ",") if not "\t" in line else line.strip().split("\t") codeMap[tokens[0]] = tokens[1] f.close() # 1 step 1 data env = MarketEnv(dir_path="../../dataset/", target_codes=codeMap.keys(), input_codes=[], start_date="1514764800", end_date="1560828600", sudden_death=-1.0, cumulative_reward=True) #pg = PolicyGradient_run(env, discount = 0.9, model_filename = modelFilename, history_filename = historyFilename, max_memory=200) #pg.train(verbose = 1) main(env)
def train_weights_predictor(env: MarketEnv, epochs, name, price_predictor, weights_predictor: WeightsPredictor): print('Training weights predictor:\n' ' session name: {}\n' ' training data period: {} - {}\n' ' number of epochs: {}'.format(name, env.period_start, env.period_end, epochs)) if price_predictor.try_load(name): print('Successfully loaded price predictor session data') else: print('Could not load price predictor session data, aborting') return if weights_predictor.try_load(name): print('Successfully loaded weights predictor session data') else: print('Could not load weights predictor session data') for i in range(epochs): print('Epoch # {}'.format(i)) training_progress = i / epochs env.reset() prev_weights = None total_reward = 1 steps = [] while env.should_continue(): lookback_prices = env.get_returns_from_n_days(price_predictor.lookback_window, month_offset=-1) pred_prices = price_predictor.predict(lookback_prices) pred_prices = np.clip(pred_prices, a_min=-0.99, a_max=None) log_pred_prices = np.log(1 + pred_prices) if prev_weights is None: curr_weights = np.clip(0.2 + log_pred_prices, 0, 1) else: calculated_weights = prev_weights + np.clip(log_pred_prices, -0.2, 0.2) weights_min = calculated_weights.min() if weights_min < 0: calculated_weights += -weights_min calculated_weights /= calculated_weights.sum() pred_weights = weights_predictor.predict(pred_prices, prev_weights) weights_min = pred_weights.min() if weights_min < 0: pred_weights += -weights_min pred_weights /= pred_weights.sum() curr_weights = ((0.8 - 0.3 * training_progress) * calculated_weights + (0.2 + 0.3 * training_progress) * pred_weights) curr_weights /= curr_weights.sum() mtm_return = env.current_mtm_returns() r = np.dot(curr_weights, mtm_return) if prev_weights is not None: r -= env.transaction_cost(prev_weights, curr_weights) total_reward *= (1 + r) if prev_weights is not None: steps.append((pred_prices, prev_weights, curr_weights)) prev_weights = curr_weights env.step() print(total_reward) print(curr_weights) weights_predictor.train(steps, epochs=3, verbose=0) weights_predictor.save(name) print('Successfully saved session data')
train_start, train_end) print("history file name : %s" % history_filename) codeMap = {} f = codecs.open(codeListFilename, "r", "utf-8") for line in f: if line.strip() != "": tokens = line.strip().split( ",") if not "\t" in line else line.strip().split("\t") codeMap[tokens[0]] = tokens[1] f.close() env = MarketEnv(dir_path="./data/", codes=list(codeMap.keys()), target_date_start=train_start, target_date_end=train_end, sudden_death=-1.0) env_test = MarketEnv(dir_path="./data/", codes=list(codeMap.keys()), target_date_start=test_start, target_date_end=test_end, sudden_death=-1.0) pg = PolicyGradient(env, env_test, discount=0.9, model_filename=model_filename, history_filename=history_filename) pg.train(verbose=0, max_episode=max_episode)
def setUp(self): self.env = MarketEnv(csv_name="../data/BTCETH60.csv", window_size=10, seed=42) self.calc = self.env._calculate_step
modelFilename = sys.argv[2] if len(sys.argv) > 2 else None codeMap = {} f = codecs.open(codeListFilename, "r", "utf-8") for line in f: if line.strip() != "": tokens = line.strip().split( ",") if not "\t" in line else line.strip().split("\t") codeMap[tokens[0]] = tokens[1] f.close() env = MarketEnv(dir_path="./data/", target_codes=list(codeMap.keys()), input_codes=[], start_date="2013-08-26", end_date="2015-08-25", sudden_death=-1.0) # parameters epsilon = .5 # exploration min_epsilon = 0.1 epoch = 100000 max_memory = 5000 batch_size = 128 discount = 0.8 from keras.optimizers import SGD model = MarketModelBuilder(modelFilename).getModel() sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='mse', optimizer='rmsprop')
codeMap = {} f = codecs.open(codeListFilename, "r", "utf-8") for line in f: if line.strip() != "": tokens = line.strip().split( ",") if not "\t" in line else line.strip().split("\t") codeMap[tokens[0]] = tokens[1] f.close() # env = MarketEnv(dir_path = "./data/", target_codes = codeMap.keys(), input_codes = [], start_date = "2013-08-26", end_date = "2015-08-25", sudden_death = -1.0) env = MarketEnv(dir_path="../../dataset/", target_codes=codeMap.keys(), input_codes=[], start_date="1546300800", end_date="1558673100", sudden_death=-1.0) # parameters epsilon = .5 # exploration min_epsilon = 0.1 epoch = 10 max_memory = 5000 batch_size = 1 discount = 0.8 from keras.optimizers import SGD model = MarketModelBuilder(modelFilename).getModel() sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='mse', optimizer='rmsprop')