def learn_agent(use_last_run=False): start_date = '2017-10-01' end_date = '2018-06-27' prefix = 'btc_' ticket = 'BTC-EUR' data = load_all_data_from_file2(prefix + 'etf_data_adj_close.csv', start_date, end_date) data = data[[ticket]] data = data.reset_index(drop=True) data.fillna(method="bfill", inplace=True) print(data.head(2)) print(data.tail(2)) states = load_states(data, 30) if use_last_run: max_qtable = None last_ror = None for table in os.listdir('q_tables'): q_table = pd.read_csv('q_tables/' + table) q_table = q_table.set_index('Unnamed: 0') agent = CryptoQLearnedAgentSimulator(ticket, q_table=q_table) agent.invest(data, states) if last_ror is None or last_ror < agent.ror_history[-1]: last_ror = agent.ror_history[-1] max_qtable = agent.q_table.copy() agent = CryptoQLearningAgent(ticket) if use_last_run: agent.run_learning(data, states=states, last_ror=last_ror) else: agent.run_learning(data, states=states)
def rank_by_score(dir_models): ranked_by_score = {} ranked_by_ror = {} ticket = 'BTC-EUR' # best_dir = 'best_models_btc_eur' # # if not os.path.isdir(best_dir): # os.makedirs(best_dir) # # best_models_dir = os.listdir(best_dir) start_date = '2011-08-07' end_date = '2018-06-27' df_adj_close = load_all_data_from_file2('btc_etf_data_adj_close.csv', start_date, end_date) for it in range(100): print('-' * 80) print(it) print('-' * 80) data, start, end = get_data_random_dates(df_adj_close, 2011, 2018) while len(data) < 30: data, start, end = get_data_random_dates(df_adj_close, 2011, 2018) print(start, ' - ', end) result_score, result_ror = score_models(data, ticket=ticket, dir_models=dir_models) for key in result_score.keys(): if key in ranked_by_score.keys(): ranked_by_score[key].append(result_score[key]) ranked_by_ror[key].append(result_ror[key]) else: ranked_by_score[key] = [result_score[key]] ranked_by_ror[key] = [result_ror[key]] import pandas as pd df = pd.DataFrame(columns=['model', 'score', 'ror']) for key in ranked_by_score.keys(): median_score = np.mean(np.array(ranked_by_score[key])) median_ror = np.mean(np.array(ranked_by_ror[key])) df = df.append({ 'model': key, 'score': median_score, 'ror': median_ror }, ignore_index=True) best_by_score = df.sort_values(by=['score'], ascending=False).head(1) print(best_by_score) best_by_ror = df.sort_values(by=['ror'], ascending=False).head(1) print(best_by_ror) result_list = best_by_score['model'].tolist() result_list.extend(best_by_ror['model'].tolist())
def get_data_random_dates(min_year, max_year): rand_start = gen_random_date(min_year, max_year) rand_end = gen_random_date(min_year, max_year) if rand_start > rand_end: tmp = rand_start rand_start = rand_end rand_end = tmp return load_all_data_from_file2('btc_etf_data_adj_close.csv', str(rand_start), str(rand_end)), rand_start, rand_end
def find_best_models(start_date, end_date, dir_models='models_btc_eur/', ticket='BTC-EUR', debug=False): data = load_all_data_from_file2('btc_etf_data_adj_close.csv', start_date, end_date) data = data[data['date'] > str(start_date)] data = data[data['date'] < str(end_date)] print(start_date, " - ", end_date, " ,len = ", len(data)) data = data[data[ticket] > 0.] data = data.reindex(method='bfill') print(data[[ticket]].head(2)) print(data[[ticket]].tail(2)) models = sorted(os.listdir(dir_models)) print(models) dirs = [d for d in models if os.path.isdir(dir_models + d)] [models.remove(d) for d in dirs if d in models] for d in dirs: for f in os.listdir(dir_models + d): models.append(d + '/' + f) max = -99999999. max_ror = -99999999. max_agent = None best_ror_agent = None for model in models: agent = CryptoTraderAgent(ticket, model=dir_models + str(model), coef=.5) agent.invest(data[ticket], window=30, debug=debug) print('testing:', model, ' => score:', agent.score, '=> ror:', agent.ror_history[-1], ' mean ror => ', np.mean(agent.ror_history)) if max < agent.score: max = agent.score max_agent = agent if max_ror < agent.ror_history[-1]: max_ror = agent.ror_history[-1] best_ror_agent = agent # print(max_agent.clf.feature_importances_) return data, max_agent, best_ror_agent
def test_agent(show_graph=False): # start_date = '2011-08-07' start_date = '2017-10-01' end_date = '2018-06-27' prefix = 'btc_' ticket = 'BTC-EUR' data = load_all_data_from_file2(prefix + 'etf_data_adj_close.csv', start_date, end_date) data = data[[ticket]] data = data.reset_index(drop=True) data.fillna(method="bfill", inplace=True) print(data.head(2)) print(data.tail(2)) states = load_states(data, 30) legend = [] result = {} for table in os.listdir('qtables_short'): q_table = pd.read_csv('qtables_short/' + table) q_table = q_table.set_index('Unnamed: 0') agent = CryptoQLearnedAgentSimulator(ticket, q_table) agent.invest(data, states=states) print(table, '->', agent.ror_history[-1]) result[agent.ror_history[-1][0]] = table if show_graph: plt.plot(agent.ror_history) legend.append(table) if show_graph: plt.legend(legend, loc="upper left") plt.show() top5 = sorted(result.keys())[::-1][:5] for key in result.keys(): if key not in top5: print('removed: ', result[key], '->', key) os.remove('qtables_short/' + result[key]) else: print('TOP5: ', result[key], '->', key)
def find_best_models(start_date, end_date, dir_models='models_btc_eur/', ticket='BTC-EUR', debug=False): data = load_all_data_from_file2('btc_etf_data_adj_close.csv', start_date, end_date) data = data[data['date'] > str(start_date)] data = data[data['date'] < str(end_date)] print(start_date, " - ", end_date, " ,len = ", len(data)) data = data[data[ticket] > 0.] data = data.reindex(method='bfill') print(data[[ticket]].head(2)) print(data[[ticket]].tail(2)) models = [dir_models + f for f in sorted(os.listdir(dir_models))] print(models) max = -99999999. max_ror = -99999999. max_agent = None best_ror_agent = None for model in models: agent = CryptoRegressionAgent(ticket, dir_models=model, coef=1.) agent.invest(data[ticket], window=30, debug=debug) print('testing:', model, ' => score:', agent.score, '=> ror:', agent.ror_history[-1], ' mean ror => ', np.mean(agent.ror_history)) if max < agent.score: max = agent.score max_agent = agent if max_ror < agent.ror_history[-1]: max_ror = agent.ror_history[-1] best_ror_agent = agent # print(max_agent.clf.feature_importances_) return data, max_agent, best_ror_agent
sys.path.insert(0, '../../../etf_data') from etf_data_loader import load_all_data_from_file2 import numpy as np import matplotlib.pyplot as plt dir_data = 'data_btc_eur/' dir_models = 'models_btc_eur/' ticket = 'BTC-EUR' # start_date = '2010-01-01' # start_date = '2017-10-01' start_date = '2017-01-01' end_date = '2018-06-15' prefix = 'btc_' df_adj_close = load_all_data_from_file2(prefix + 'etf_data_adj_close.csv', start_date, end_date) np.warnings.filterwarnings('ignore') try: df_ticket_data = df_adj_close[['date', ticket]] except: print('failed to find ticket: ' + ticket) exit(1) df_ticket_data = df_ticket_data[df_ticket_data[ticket] > 0.] df_ticket_data = df_ticket_data.reindex(method='bfill') print(df_ticket_data.head()) print(df_ticket_data.tail()) plt.plot(df_ticket_data[[ticket]])
self.shares = self.shares - to_sell return 1 if __name__ == '__main__': start_date = '2018-07-01' end_date = '2018-09-14' ticket = 'BTC-EUR' # model = 'models_ga_periodic/BTC_EUR_random_forrest_0.pkl' # model = '/home/martin/model/BTC_EUR_mlp_5.pkl' # model = '/home/martin/model/BTC_EUR_random_forrest_4.pkl' # model = '/home/martin/models_eu/BTC_EUR_random_forrest_49.pkl' model = '/home/martin/models_eu/BTC_EUR_random_forrest_558.pkl' data = load_all_data_from_file2('btc_etf_data_adj_close.csv', start_date, end_date) agent = CryptoTraderAgent(ticket, model=model, invested=100., coef=.5) window = 30 agent.invest(data[ticket], window=window, debug=True) print('testing:', model, ' => score:', agent.score, '=> ror:', agent.ror_history[-1], ' mean ror => ', np.mean(agent.ror_history)) ror_bah = data[ticket].apply( lambda x: (x / data[ticket].iloc[0]) - 1.).as_matrix()[window:] plt.plot(ror_bah, color='black') plt.plot(agent.ror_history, color='red') plt.legend(['bench', agent.model], loc='upper left') plt.show() print('best(score):', agent.model)
def copy_best_score(): rank_by_score = {} rank_by_ror = {} ticket = 'BTC-EUR' best_dir = 'best_models_btc_eur' if not os.path.isdir(best_dir): os.makedirs(best_dir) best_models_dir = os.listdir(best_dir) models = [ d + '/' for d in os.listdir('.') if d.startswith('models_btc_eur') ] print(models) start_date = '2011-08-07' end_date = '2018-06-27' df_adj_close = load_all_data_from_file2('btc_etf_data_adj_close.csv', start_date, end_date) for iter in range(100): print('-' * 80) print(iter) print('-' * 80) data, start, end = get_data_random_dates(df_adj_close, 2011, 2018) while len(data) < 30: data, start, end = get_data_random_dates(df_adj_close, 2011, 2018) print(start, ' - ', end) for m in models: result_score, result_ror = score_models(data, ticket=ticket, dir_models=m) for key in result_score.keys(): if key in rank_by_score.keys(): rank_by_score[key].append(result_score[key]) rank_by_ror[key].append(result_ror[key]) else: rank_by_score[key] = [result_score[key]] rank_by_ror[key] = [result_ror[key]] import pandas as pd df = pd.DataFrame(columns=['model', 'score', 'ror']) for key in rank_by_score.keys(): median_score = np.mean(np.array(rank_by_score[key])) median_ror = np.mean(np.array(rank_by_ror[key])) df = df.append({ 'model': key, 'score': median_score, 'ror': median_ror }, ignore_index=True) best_by_score = df.sort_values(by=['score'], ascending=False).head(1) print(best_by_score) best_by_ror = df.sort_values(by=['ror'], ascending=False).head(1) print(best_by_ror) result_list = best_by_score['model'].tolist() result_list.extend(best_by_ror['model'].tolist()) from shutil import copyfile for m in result_list: split = m.split('/') _dir = split[0] file = split[1] if file not in best_models_dir: copyfile(m, best_dir + '/' + _dir + file)
def run_agent(): start_date = '2011-01-01' end_date = '2018-09-14' ticket = 'BTC-EUR' data = load_all_data_from_file2('btc_etf_data_adj_close.csv', start_date, end_date) data = data[data['date'] > str(start_date)] data = data[data['date'] < str(end_date)] print(start_date, " - ", end_date, " ,len = ", len(data)) data = data[data[ticket] > 0.] data = data.reindex(method='bfill') data.reset_index(inplace=True) data = data[ticket] print(data) window = 30 states = State(window, data) print(states.bench[0]) print(states.bench[1]) print(states.bench[-2]) print(states.bench[-1]) print(len(data) - window) learning_rate = 0.1 # model = policy.create_lstm_model(learning_rate) model = policy.create_dense_model(learning_rate) x = states.get_whole_state()[window:] # x = np.reshape(x, (x.shape[0], 1, x.shape[1])) rors = [] losses = [] discos = [] print() max_ror = None for it in range(1000): predicted_action_proba = model.predict(x) actions = np.empty(predicted_action_proba.shape[0], dtype=np.int32) for i in range(predicted_action_proba.shape[0]): actions[i] = np.random.choice(3, 1, p=predicted_action_proba[i]) agent_evaluator = policy_evaluator.Agent(actions) agent_evaluator.run(data, states) rors.append(agent_evaluator.ror_history[-1]) if max_ror is None or max_ror < rors[it]: max_ror = rors[it] print('saving at ror:', rors[it]) model.save_weights('weights_temp.h5', overwrite=True) disco = agent_evaluator.disco_rewards - np.mean( agent_evaluator.disco_rewards) disco = disco / np.std(agent_evaluator.disco_rewards) # disco = np.reshape(disco, (disco.shape[0], 1)) discos.append(disco[-1]) # y = predicted_action_proba + learning_rate * disco y = np.zeros_like(predicted_action_proba) for i in range(predicted_action_proba.shape[0]): y[i][actions[i]] = disco[i] loss = model.fit(x, y, nb_epoch=10, verbose=0, shuffle=True, validation_split=0.3) losses.append(loss.history['loss']) print('\r[%d] %f | %f | %f | %f' % (it, rors[it], losses[it][-1], agent_evaluator.rewards[-1], disco[-1]), end='') # if loss.history['loss'][-1] <= 0. or np.isnan(loss.history['loss'][-1]): # break if np.isnan(loss.history['loss'][-1]): print('loading model...') model.load_weights('weights_temp.h5') model.save_weights('weights.h5', overwrite=True) _, ax = plt.subplots(3, 1) ax[0].plot(rors) ax[0].set_title('rors') ax[1].plot(losses) ax[1].set_title('loss') ax[2].plot(discos) ax[2].set_title('disco') plt.show()
def run_agent(): # start_date = '2011-01-01' start_date = '2018-01-01' end_date = '2018-09-14' ticket = 'BTC-EUR' data = load_all_data_from_file2('btc_etf_data_adj_close.csv', start_date, end_date) data = data[data['date'] > str(start_date)] data = data[data['date'] < str(end_date)] print(start_date, " - ", end_date, " ,len = ", len(data)) data = data[data[ticket] > 0.] data = data.reindex(method='bfill') data.reset_index(inplace=True) data = data[ticket] window = 30 learning_rate = 0.001 timesteps = 7 model = policy.create_lstm_model(learning_rate, timesteps) # model = policy.create_dense_model(learning_rate) # x = np.reshape(x, (x.shape[0], 1, x.shape[1])) all_rors = [] all_losses = [] all_discos = [] print() print( '[episode][it/max it] ror | loss | reward | expected_reward | action') actions = {0: 'hold', 1: 'sell', 2: 'buy'} states = State(window, data) for episode in range(10): input = [] labels = [] losses = [] discos = [] rors = [] for t in range(window + timesteps, len(data)): agent_evaluator = cont_policy_evaluator.RecordingAgent( data, states) # x = states.get_state(t) x = states.get_partial_state(t, timesteps) # lstm x = x.reshape((1, timesteps, 5)) input.append(x) x = np.array(input).reshape((len(input), timesteps, 5)) # dense input # x = x.reshape((1, 5)) predicted_action_proba = model.predict(x) runs = predicted_action_proba.shape[0] - 1 for run in range(predicted_action_proba.shape[0]): action = np.random.choice(3, 1, p=predicted_action_proba[run])[0] agent_evaluator.run(action, t - runs + run) # print(run, '|', action, '|', agent_evaluator.rewards[t - window-runs+run]) index = t - window rors.append(agent_evaluator.ror_history[index]) discos.append(agent_evaluator.disco_rewards[-1]) # y = predicted_action_proba + learning_rate * agent_evaluator.disco_rewards y = predicted_action_proba * agent_evaluator.disco_rewards # print(y.shape) # labels.append(y.reshape((3,))) # y = np.array(labels) loss = model.fit(x, y, nb_epoch=1, verbose=0, shuffle=True, validation_split=0.3) if 'loss' in loss.history.keys(): losses.append(loss.history['loss']) print('\r[%d][%d/%d] %f | %f | %f | %f | %s' % (episode, t, len(data), rors[-1], losses[-1][-1], np.mean(agent_evaluator.rewards), agent_evaluator.disco_rewards[-1], actions[action]), end='') all_losses.append(losses) all_discos.append(discos) all_rors.append(rors) model.save_weights('weights.h5', overwrite=True) _, ax = plt.subplots(3, 1) for ii in range(len(all_rors)): ax[0].plot(all_rors[ii], label=str(ii)) ax[0].set_title('rors') for ii in range(len(all_losses)): ax[1].plot(all_losses[ii], label=str(ii)) ax[1].set_title('loss') for ii in range(len(all_discos)): ax[2].plot(all_discos[ii], label=str(ii)) ax[2].set_title('expected_reward') for axis in ax: axis.legend() plt.show()
def run_agent(): start_date = '2018-04-01' end_date = '2018-09-14' # start_date = '2011-01-01' # end_date = '2018-04-01' ticket = 'BTC-EUR' data = load_all_data_from_file2('btc_etf_data_adj_close.csv', start_date, end_date) data = data[data['date'] > str(start_date)] data = data[data['date'] < str(end_date)] print(start_date, " - ", end_date, " ,len = ", len(data)) data = data[data[ticket] > 0.] data = data.reindex(method='bfill') data.reset_index(inplace=True) data = data[ticket] print(data) window = 30 states = State(window, data) print(states.bench[0]) print(states.bench[1]) print(states.bench[-2]) print(states.bench[-1]) print(len(data) - window) # model = policy.create_dense_model(0.001) timesteps = 7 model = policy.create_lstm_model(0.001, timesteps) model.load_weights('weights.h5') # x = states.get_whole_state()[window:] # x = np.reshape(x, (x.shape[0], timesteps, x.shape[1])) input = [] for t in range(window + timesteps, len(data)): x = states.get_partial_state(t, timesteps) x = x.reshape((1, timesteps, 5)) input.append(x) x = np.array(input).reshape((len(input), timesteps, 5)) predicted_action_proba = model.predict(x) print(predicted_action_proba.shape) print(x.shape) # actions = np.empty(predicted_action_proba.shape[0], dtype=np.int32) agent_evaluator = cont_policy_evaluator.RecordingAgent(data, states) # for i in range(predicted_action_proba.shape[0]): # actions[i] = np.random.choice(3, 1, p=predicted_action_proba[i]) for run in range(predicted_action_proba.shape[0]): action = np.random.choice(3, 1, p=predicted_action_proba[run])[0] agent_evaluator.run(action, window + run) # agent_evaluator = policy_evaluator.Agent(actions) # agent_evaluator.run(data, states) print(agent_evaluator.ror_history[-1]) plt.plot(agent_evaluator.ror_history) plt.title('ror') plt.show() _, ax = plt.subplots(2, 1) ax[0].plot(agent_evaluator.rewards) ax[1].plot(agent_evaluator.disco_rewards) plt.show()