# disable GPU os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # specify what to test delta_action_test = False bartlett_action_test = False # specify weights file to load tag = "49" # set init_ttm, spread, and other parameters according to the env that the model is trained env_test = TradingEnv(continuous_action_flag=True, sabr_flag=True, dg_random_seed=2, spread=0.01, num_contract=1, init_ttm=20, trade_freq=1, num_sim=100001) ddpg_test = DDPG(env_test) print("\n\n***") if delta_action_test: print("Testing delta actions.") else: print("Testing agent actions.") if tag == "": print("tesing the model saved at the end of the training.") else: print("Testing model saved at " + tag + "K episode.") ddpg_test.load(tag=tag)
help='either "train" or "test"') parser.add_argument('-w', '--weights', type=str, help='a trained model weights') args = parser.parse_args() maybe_make_dir('weights') maybe_make_dir('portfolio_val') timestamp = time.strftime('%Y%m%d%H%M') data = np.around(get_data()) data_size = data.shape[1] data_cut_point = int(0.75*data_size) train_data = data[:, :data_cut_point] test_data = data[:, data_cut_point:] env = TradingEnv(train_data, args.initial_invest) state_size = env.observation_space.shape action_size = env.action_space.n agent = DQNAgent(state_size, action_size) scaler = get_scaler(env) portfolio_value = [] if args.mode == 'test': # remake the env with test data env = TradingEnv(test_data, args.initial_invest) # load trained weights agent.load(args.weights) # when test, the timestamp is same as time when weights was trained timestamp = re.findall(r'\d{12}', args.weights)[0]
data_size = data[0].shape[0] end_row_train = (int)(data_size * (args.ratio / 100)) end_row_validate = (data_size - end_row_train) // 2 + end_row_train train_data = np.array([d[:end_row_train, :] for d in data]) validation_data = np.array( [d[end_row_train:end_row_validate, :] for d in data]) test_data = np.array([d[end_row_validate:, :] for d in data]) ''' print("There are {} rows".format(data_size)) print("The training data spans from 0 to {}".format(end_row_train-1)) print("The validation data spans from {} to {}".format(end_row_train, end_row_validate-1)) print("The test data spans from {} to {}".format(end_row_validate, data_size)) ''' env = TradingEnv(train_data, args.initial_invest) state_size = env.observation_space.shape action_size = env.action_space.n agent = DQNAgent(state_size, action_size, N_HIDDEN_LAYERS) scaler = get_scaler(env) portfolio_value = [] # Append initial account value portfolio_value.append(args.initial_invest) if args.mode != 'train': # remake the env with validation data env = TradingEnv( validation_data if args.mode == 'validate' else test_data, args.initial_invest)
from flask import Flask, render_template, request, jsonify import pickle import time import numpy as np import argparse import re import itertools from envs import TradingEnv from agent import DQNAgent from utils import get_data, get_scaler, maybe_make_dir app = Flask(__name__) data, pred, pred5 = get_data() env = TradingEnv(data, pred, pred5, 20000) state_size = env.observation_space.shape action_size = env.action_space.n action_combo = list(map(list, itertools.product([0, 1, 2], repeat=3))) action_map = {0: "sell", 1: "hold", 2: "buy"} @app.route('/') def index(): return render_template('index.html') @app.route('/', methods=['POST']) def advise(): n1 = float(request.form['n1']) n2 = float(request.form['n2'])
def train(): profits_list = [ ] # Will hold list of all profits as we go through training # Given command line input as below # if len(sys.argv) != 4: # print("Usage: python train.py [stock] [window] [episodes]") # exit() with open(os.path.join(os.path.dirname(__file__), 'config.yml'), 'r') as stream: config = yaml.load(stream) # Unpackage data from terminal/config # stock_name, window_size, episode_count = sys.argv[1], int(sys.argv[2]), int(sys.argv[3]) stock_name, window_size, episode_count = config['stock_name'], config[ 'window_size'], config["num_epochs"] num_tech_indicators = config['num_tech_indicators'] agent = Agent(window_size + num_tech_indicators, config) data = getStockDataVec(stock_name) env = TradingEnv(data, window_size) l = len(data) - 1 for e in range(episode_count + 1): print("Episode " + str(e) + "/" + str(episode_count)) state = env.get_state(0) env.reset_holdings() for t in range(l): action = agent.act(state) # sit next_state = env.get_state(t + 1) reward = 0 if action == 1: # buy #remembers the price bought at t, and the time bought env.buy(t) # print("Buy: " + formatPrice(data[t])) elif action == 2: # sell reward, profit = env.sell(t) # print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(profit)) done = True if t == l - 1 else False # Push all values to memory agent.memory.push(state, action, next_state, reward) state = next_state total_profit = env.net_profit(t) max_staked = env.max_spent if done: percent_return = total_profit / max_staked * 100 print("--------------------------------") print("Total Profit: " + formatPrice(total_profit)) print("Max staked: " + formatPrice(max_staked)) print("Percent return: " + "{0:.2f}%".format(percent_return)) print("--------------------------------") profits_list.append((total_profit, percent_return)) # print(profits_list) agent.optimize() if e % config['save_freq'] == 0: agent.target_net.load_state_dict(agent.policy_net.state_dict()) torch.save(agent.policy_net, config['policy_model']) torch.save(agent.target_net, config['target_model'])
def DqnProgram(args, setResult, training_result): parser = argparse.ArgumentParser() parser.add_argument('-e', '--episode', type=int, default=2000, help='number of episode to run') parser.add_argument('-b', '--batch_size', type=int, default=32, help='batch size for experience replay') parser.add_argument('-i', '--initial_invest', type=int, default=20000, help='initial investment amount') parser.add_argument('-m', '--mode', type=str, required=True, help='either "train" or "test"') parser.add_argument('-w', '--weights', type=str, help='a trained model weights') args = parser.parse_args(args) maybe_make_dir('weights') maybe_make_dir('portfolio_val') import time timestamp = time.strftime('%Y%m%d%H%M') data = get_data(mode=args.mode) # TODO UI의 종목과 연결시키기. data = np.array([c['종가'] for c in data]) env = TradingEnv(data, args.initial_invest) state_size = env.observation_space.shape action_size = env.action_space.shape agent = DQNAgent(state_size, action_size) scaler = get_scaler(env) portfolio_value = [] if not args.weights is None: agent.load(args.weights) timestamp = re.findall(r'\d{12}', args.weights)[0] for e in range(args.episode): state = env.reset() state = scaler.transform([state]) for time in range(env.n_step): action = agent.act(state) next_state, reward, done, info = env.step(action) next_state = scaler.transform([next_state]) if args.mode == 'train': agent.remember(state, action, reward, next_state, done) state = next_state if done: msg = "episode: {}/{}, episode end value: {}".format( e + 1, args.episode, info['cur_val']) print(msg) setResult(msg=msg) training_result.append(info['cur_val']) portfolio_value.append( info['cur_val']) # append episode end portfolio value break if args.mode == 'train' and len(agent.memory) > args.batch_size: agent.replay(args.batch_size) if args.mode == 'train' and (e + 1) % 10 == 0: # checkpoint weights agent.save('weights/{}-dqn.h5'.format(timestamp)) # save portfolio value history to disk with open('portfolio_val/{}-{}.p'.format(timestamp, args.mode), 'wb') as fp: pickle.dump(portfolio_value, fp)
timestamp = time.strftime('%Y%m%d%H%M') stocks = Stocks() portfolio = Portfolio() data = np.around(get_data(ticker=args.ticker)) dataSplit = data.shape[1] - args.testingDays train_data = data[:, :dataSplit] test_data = data[:, dataSplit:] dates = (get_dates(ticker=args.ticker)) dataesSplit = dates.shape[1] - args.testingDays train_dates = dates[:, :dataSplit] test_dates = dates[:, dataSplit:] env = TradingEnv(stocks, train_data, train_dates, args.initial_invest) state_size = env.observation_space.shape action_size = env.action_space.n agent = DQNAgent(state_size, action_size) scaler = get_scaler(env) portfolio_value = [] if args.mode == 'test': # remake the env with test data env = TradingEnv(stocks, test_data, test_dates, args.initial_invest) # load trained weights agent.load(args.weights) # when test, the timestamp is same as time when weights was trained timestamp = re.findall(r'\d{12}', args.weights)[0]