def run_knn( X_train: pd.DataFrame, X_test: pd.DataFrame, y_train: pd.DataFrame, y_test: pd.DataFrame, config: Dict[str, Any], ) -> Any: """ Finds optimal model parameters for a KNN classifier, evaluates model and return model object.""" LOGGER.info("Finding best knn..") search_space = { "type": "knn", "n_neighbors": hp.uniformint("n_neighbors", 2, 15), "weights": hp.choice("weights", ["uniform", "distance"]), } best_params = hyperopt_search(X_train, y_train, search_space, config) model = make_pipeline(get_scaler(config), KNeighborsClassifier(**best_params)) mean_cross_val_score = cross_validate_model(model, X_train, y_train) LOGGER.info(f"KNN cross validation score: {mean_cross_val_score}") if config["test"]: print(classification_report(model.predict(X_test), y_test)) return model
def run_svm_vote( X_train: pd.DataFrame, X_test: pd.DataFrame, y_train: pd.DataFrame, y_test: pd.DataFrame, config: Dict[str, Any], ) -> Any: LOGGER.info("Finding best svm..") search_space = { "type": "svm", "C": hp.lognormal("C", 0, 100.0), "gamma": hp.lognormal("gamma", 0, 1.0), "kernel": hp.choice("kernel", ["rbf"]), } best_params = hyperopt_search(X_train, y_train, search_space, config) model = make_pipeline( get_scaler(config), SVC(**best_params, class_weight="balanced", probability=True), ) mean_cross_val_score = cross_validate_model(model, X_train, y_train) LOGGER.info(f"SVM cross validation score: {mean_cross_val_score}") if config["test"]: print(classification_report(model.predict(X_test), y_test)) return model
def advise(): n1 = float(request.form['n1']) n2 = float(request.form['n2']) n3 = float(request.form['n3']) cash = float(request.form['cash']) print(n1) print(cash) agent = DQNAgent(state_size, action_size) scaler = get_scaler(env) agent.load("202005011635-dqn.h5") state = env.reset() state[0] = n1 state[1] = n2 state[2] = n3 state[-1] = cash state = scaler.transform([state]) action = agent.act(state) # action_combo = list(map(list, itertools.product([0, 1, 2], repeat=3))) action_vec = action_combo[action] # action_map = {0: "sell", 1: "hold", 2: "buy"} # print(action_map[action_vec[0]], action_map[action_vec[1]], action_map[action_vec[2]]) ans = [] tmp = 1 if action_vec[0] == 0 and n1 == 0 else action_vec[0] if cash == 0 and tmp == 2: tmp = 1 ans.append(action_map[tmp]) tmp = 1 if action_vec[1] == 0 and n2 == 0 else action_vec[1] if cash == 0 and tmp == 2: tmp = 1 ans.append(action_map[tmp]) tmp = 1 if action_vec[2] == 0 and n3 == 0 else action_vec[2] if cash == 0 and tmp == 2: tmp = 1 ans.append(action_map[tmp]) print(ans) return render_template('index.html', ans=ans, n1=n1, n2=n2, n3=n3, cash=cash)
def run_gaussian( X_train: pd.DataFrame, X_test: pd.DataFrame, y_train: pd.DataFrame, y_test: pd.DataFrame, config: Dict[str, Any], ) -> Any: LOGGER.info("Finding best gaussian model..") kernel = 1.0 * RBF(1.0) model = make_pipeline(get_scaler(config), GaussianProcessClassifier(kernel=kernel)) mean_cross_val_score = cross_validate_model(model, X_train, y_train) LOGGER.info( f"Gaussian classifier cross validation score: {mean_cross_val_score}") if config["test"]: print(classification_report(model.predict(X_test), y_test)) return model
def main(): env = TradingEnv(train_data) state_size = env.observation_space.shape action_size = env.action_space.n agent = DQAgent(state_size, action_size) scaler = get_scaler(env) portfolio_value = [] mode = "train" if mode == "test": env = TradingEnv(test_data) agent.load(weights) for e in range(episode): state = env._reset() state = scaler.transform([state]) for time in range(env.n_step): action = agent.act(state) next_state, reward, done, info = env._step(action) next_state = scaler.transform([next_state]) if mode == "train": agent.remember(state, action, reward, next_state, done) state = next_state runs[e].append(info["cur_val"]) if done: print("Episode: {}/{}, episode end value: {}".format( e + 1, episode, info["cur_val"])) portfolio_value.append(info["cur_val"]) break if mode == "train" and len(agent.memory) > batch_size: agent.replay(batch_size) for k, v in runs.items(): if k % 5 == 0: plt.plot(v, label=str(k)) print("Sharpe hos " + str(k), sharpe(v)) plt.legend() plt.show()
maybe_make_dir('weights') maybe_make_dir('portfolio_val') timestamp = time.strftime('%Y%m%d%H%M') data = np.around(get_data()) data_size = data.shape[1] data_cut_point = int(0.75*data_size) train_data = data[:, :data_cut_point] test_data = data[:, data_cut_point:] env = TradingEnv(train_data, args.initial_invest) state_size = env.observation_space.shape action_size = env.action_space.n agent = DQNAgent(state_size, action_size) scaler = get_scaler(env) portfolio_value = [] if args.mode == 'test': # remake the env with test data env = TradingEnv(test_data, args.initial_invest) # load trained weights agent.load(args.weights) # when test, the timestamp is same as time when weights was trained timestamp = re.findall(r'\d{12}', args.weights)[0] for e in range(args.episode): state = env.reset() state = scaler.transform([state]) for time in range(env.n_step):
def setup(self, dbars): print("Setup") # data #train_data = np.around(get_data(dbars)) train_data = get_data(dbars) self.stock_price_history = train_data # round up to integer to reduce state space self.n_stock, self.n_step = self.stock_price_history.shape print(self.n_stock, self.n_step) # instance attributes self.init_invest = START_MONEY self.cur_step = None self.stock_owned = None self.stock_price = None self.cash_in_hand = None # action space self.action_space = spaces.Discrete(3**self.n_stock) self.action_combo = [ *map(list, itertools.product([0, 1, 2], repeat=self.n_stock)) ] # observation space: give estimates in order to sample and build scaler stock_max_price = self.stock_price_history.max(axis=1) #stock_range = [[0, self.init_invest * 2 // mx] for mx in stock_max_price] stock_range = [[0, 1000], [0, 1000], [0, 1000], [0, 1000], [0, 1000]] price_range = [[0, mx * 100] for mx in stock_max_price] cash_in_hand_range = [[0, self.init_invest * 2]] print(stock_range + price_range + cash_in_hand_range) self.observation_space = spaces.MultiDiscrete(stock_range + price_range + cash_in_hand_range) # seed and start self.seed() self.reset() state_size = self.observation_space.shape action_size = self.action_space.n self.agent = QAgent(state_size, action_size) self.scaler = get_scaler(self.stock_price_history, self.init_invest, self.n_stock) # parameters self.batch_size = 500 # here we could have a variable called 'train'. If it is true we train, otherwise we load from weight file. # here we train =] state = self.reset() state = self.scaler.transform([state]) for time in range(self.n_step): print("time:", time, "/", self.n_step) action = self.agent.act(state) next_state, reward, done = self.train_step(action) next_state = self.scaler.transform([next_state]) self.agent.remember(state, action, reward, next_state, done) state = next_state if done: break if len(self.agent.memory ) > self.batch_size: # train faster with this self.agent.replay(self.batch_size) self.agent.save('./weights/dqn') self.last_state = self.reset() self.last_state = self.scaler.transform([self.last_state])
def DqnProgram(args, setResult, training_result): parser = argparse.ArgumentParser() parser.add_argument('-e', '--episode', type=int, default=2000, help='number of episode to run') parser.add_argument('-b', '--batch_size', type=int, default=32, help='batch size for experience replay') parser.add_argument('-i', '--initial_invest', type=int, default=20000, help='initial investment amount') parser.add_argument('-m', '--mode', type=str, required=True, help='either "train" or "test"') parser.add_argument('-w', '--weights', type=str, help='a trained model weights') args = parser.parse_args(args) maybe_make_dir('weights') maybe_make_dir('portfolio_val') import time timestamp = time.strftime('%Y%m%d%H%M') data = get_data(mode=args.mode) # TODO UI의 종목과 연결시키기. data = np.array([c['종가'] for c in data]) env = TradingEnv(data, args.initial_invest) state_size = env.observation_space.shape action_size = env.action_space.shape agent = DQNAgent(state_size, action_size) scaler = get_scaler(env) portfolio_value = [] if not args.weights is None: agent.load(args.weights) timestamp = re.findall(r'\d{12}', args.weights)[0] for e in range(args.episode): state = env.reset() state = scaler.transform([state]) for time in range(env.n_step): action = agent.act(state) next_state, reward, done, info = env.step(action) next_state = scaler.transform([next_state]) if args.mode == 'train': agent.remember(state, action, reward, next_state, done) state = next_state if done: msg = "episode: {}/{}, episode end value: {}".format( e + 1, args.episode, info['cur_val']) print(msg) setResult(msg=msg) training_result.append(info['cur_val']) portfolio_value.append( info['cur_val']) # append episode end portfolio value break if args.mode == 'train' and len(agent.memory) > args.batch_size: agent.replay(args.batch_size) if args.mode == 'train' and (e + 1) % 10 == 0: # checkpoint weights agent.save('weights/{}-dqn.h5'.format(timestamp)) # save portfolio value history to disk with open('portfolio_val/{}-{}.p'.format(timestamp, args.mode), 'wb') as fp: pickle.dump(portfolio_value, fp)
if not os.path.exists(file_results): os.makedirs(file_results) checkpointer = ModelCheckpoint(filepath=file_results+'phonemes_weights.hdf5', verbose=1, save_best_only=True) #perc=test_labels(file_feat_test) #print("perc_classes=", perc) if os.path.exists(file_results+"mu.npy"): mu=np.load(file_results+"mu.npy") std=np.load(file_results+"std.npy") else: mu, std=get_scaler(file_feat_train) np.save(file_results+"mu.npy", mu) np.save(file_results+"std.npy", std) phonemes=Phon.get_list_phonemes() input_size=(40,34) GRU_size=128 hidden=128 num_labels=len(phonemes) Learning_rate=0.0005 recurrent_droput_prob=0.0 epochs=1000 batch_size=64