def opt_func_base(stock_name, start_day_index, end_day_index, X_list): if len(X_list.shape) == 2: X_list = X_list[0] optimize_result = OptimizeResult() cache_filename = get_cache_filename(stock_name, start_day_index, end_day_index) if os.path.isfile(cache_filename): optimize_result.load(cache_filename) #print("stock worm cache loaded, size={}".format(optimize_result.get_size())) else: print("cannot find file cache:{}, will create new cache.".format(cache_filename)) print("Checking: {}".format(X_list)) hmm_model = HmmModel(stock_name) total_profit, profit_daily_avg = hmm_model.train(X_list, start_day_index, end_day_index) if total_profit == -1: print("Training failed.") return total_profit print("Finished, total_profit:{}".format(total_profit)) strategy_features = hmm_model.get_strategy_features() optimize_result.insert_result(X_list, strategy_features + [total_profit, profit_daily_avg]) optimize_result.save(cache_filename) return profit_daily_avg
def create_from_file(self, filename, n_number): optimize_result = OptimizeResult(result_column_index=-1) optimize_result.load(filename) data = optimize_result.get_best_results(n_number) trade_strategy_list = [] for i in range(n_number): X_list = data[i, :4] trade_strategy_list.append( TradeStrategy(X_list, self.n_max_trades_per_day, self.slippage, self.courtage)) return trade_strategy_list
def update_worms_from_cache(self, n_number, start_day_index, end_day_index): optimize_result = OptimizeResult() stockworm_cache_file = self.get_stockworm_cache_file( start_day_index, end_day_index) optimize_result.load(stockworm_cache_file) top_worms = optimize_result.get_best_results(n_number) assert (len(top_worms) == n_number) for i in range(n_number): features = top_worms[i, :15] strategy_features = top_worms[i, 15:21] md5 = top_worms[i, -1] model_save_path = self.get_model_save_path(start_day_index, end_day_index, md5) new_worm = StockWorm(self.stock_name, self.stock_id, self.npy_files_path, model_save_path, is_future=self.is_future, slippage=self.slippage) if os.path.isdir(model_save_path) and new_worm.load() == True: pass else: total_profit, profit_daily, errors_daily = new_worm.init( features, start_day_index, end_day_index, strategy_features=strategy_features) new_worm.save() print("training finished for model {}, total_profit:{}".format( i, total_profit)) new_worm.report() testing_total_profit, testing_profit_daily, n_data_appended = new_worm.test( ) if n_data_appended > 0: print( "testing finished for model {}, total_profit:{} in {} days, new data for {} days appended" .format(i, testing_total_profit, len(testing_profit_daily), n_data_appended)) new_worm.save() new_worm.report() self.worm_list.append(new_worm)
def update_worms_from_cache(self, n_number, start_day_index, end_day_index): optimize_result = OptimizeResult(result_column_index=15) stockworm_cache_file = self.get_stockworm_cache_file( start_day_index, end_day_index) optimize_result.load(stockworm_cache_file) top_worms = optimize_result.get_best_results(n_number) trade_strategy_factory = TradeStrategyFactory() strategy_cache_file = self.get_strategy_cache_file( start_day_index, end_day_index) strategy_list = trade_strategy_factory.create_from_file( strategy_cache_file, NUM_STRATEGIES) assert (len(top_worms) == n_number) swarm_path = self.get_swarm_path(start_day_index, end_day_index) for i in range(n_number): features = top_worms[i, :13] features_str = self.get_parameter_str(features) model_save_path = os.path.join(swarm_path, "models", md5(features_str)) new_worm = StockWorm(self.stock_index, self.npy_files_path, model_save_path) if os.path.isdir(model_save_path) and new_worm.load() == True: pass else: total_profit, profit_daily, errors_daily = new_worm.init( features, strategy_list, start_day_index, end_day_index) new_worm.save() print("training finished for model {}, total_profit:{}".format( i, total_profit)) testing_total_profit, testing_profit_daily, n_data_appended = new_worm.test( ) if n_data_appended > 0: print( "testing finished for model {}, total_profit:{} in {} days, new data for {} days appended" .format(i, testing_total_profit, len(testing_profit_daily), n_data_appended)) new_worm.save() self.worm_list.append(new_worm)
class StockWormManager: mixed_domain = [ { 'name': 'n_neurons', 'type': 'discrete', 'domain': tuple(range(20, 160, 20)) }, { 'name': 'learning_rate', 'type': 'discrete', 'domain': (0.001, 0.002, 0.003, 0.004) }, { 'name': 'num_layers', 'type': 'discrete', 'domain': (1, 2, 3, 4, 5, 6, 7, 8) }, { 'name': 'rnn_type', 'type': 'discrete', 'domain': (0, 1, 2) }, { 'name': 'learning_period', 'type': 'discrete', 'domain': (20, 30, 40, 50) }, { 'name': 'prediction_period', 'type': 'discrete', 'domain': (5, 10, 20) }, { 'name': 'n_repeats', 'type': 'discrete', 'domain': (1, 3, 5, 10, 20, 30, 40) }, { 'name': 'beta', 'type': 'discrete', 'domain': (99, 98) }, { 'name': 'ema', 'type': 'discrete', 'domain': (1, 10, 20) }, { 'name': 'time_format', 'type': 'discrete', 'domain': (0, 1, 2) }, #1 for stepofday, 2 for stepofweek { 'name': 'volume_input', 'type': 'discrete', 'domain': (0, 1) }, { 'name': 'use_centralized_bid', 'type': 'discrete', 'domain': (0, 1) }, { 'name': 'split_daily_data', 'type': 'discrete', 'domain': (0, 1) }, { 'name': 'is_stateful', 'type': 'discrete', 'domain': (0, 1) }, { 'name': 'ref_stock_id', 'type': 'discrete', 'domain': (-1, 992, 3524, 139301, 160271) }, ] mixed_domain_test = [ { 'name': 'n_neurons', 'type': 'discrete', 'domain': tuple(range(20, 160, 20)) }, { 'name': 'learning_rate', 'type': 'discrete', 'domain': (0.001, 0.002, 0.003, 0.004) }, { 'name': 'num_layers', 'type': 'discrete', 'domain': (1, 2, 3, 4) }, { 'name': 'rnn_type', 'type': 'discrete', 'domain': (0, 1, 2) }, { 'name': 'learning_period', 'type': 'discrete', 'domain': (20, ) }, { 'name': 'prediction_period', 'type': 'discrete', 'domain': (10, ) }, { 'name': 'n_repeats', 'type': 'discrete', 'domain': (1, ) }, { 'name': 'beta', 'type': 'discrete', 'domain': (99, ) }, { 'name': 'ema', 'type': 'discrete', 'domain': (20, ) }, { 'name': 'time_format', 'type': 'discrete', 'domain': (0, 1, 2) }, #1 for stepofday, 2 for stepofweek { 'name': 'volume_input', 'type': 'discrete', 'domain': (0, 1) }, { 'name': 'use_centralized_bid', 'type': 'discrete', 'domain': (0, 1) }, { 'name': 'split_daily_data', 'type': 'discrete', 'domain': (0, 1) }, { 'name': 'is_stateful', 'type': 'discrete', 'domain': (0, 1) }, { 'name': 'ref_stock_id', 'type': 'discrete', 'domain': (-1, 992, 3524, 139301, 160271) }, ] mixed_domain_future = [ { 'name': 'n_neurons', 'type': 'discrete', 'domain': tuple(range(20, 160, 20)) }, { 'name': 'learning_rate', 'type': 'discrete', 'domain': (0.001, 0.002, 0.003, 0.004) }, { 'name': 'num_layers', 'type': 'discrete', 'domain': (1, 2, 3, 4, 5, 6, 7, 8) }, { 'name': 'rnn_type', 'type': 'discrete', 'domain': (0, 1, 2) }, { 'name': 'learning_period', 'type': 'discrete', 'domain': (20, 30, 40, 50) }, { 'name': 'prediction_period', 'type': 'discrete', 'domain': (5, 10, 20) }, { 'name': 'n_repeats', 'type': 'discrete', 'domain': (1, 3, 5, 10, 20, 30) }, { 'name': 'beta', 'type': 'discrete', 'domain': (99, 98) }, { 'name': 'ema', 'type': 'discrete', 'domain': (1, 10, 20) }, { 'name': 'time_format', 'type': 'discrete', 'domain': (0, 1, 2) }, #1 for stepofday, 2 for stepofweek { 'name': 'volume_input', 'type': 'discrete', 'domain': (0, 1) }, { 'name': 'use_centralized_bid', 'type': 'discrete', 'domain': (1, ) }, { 'name': 'split_daily_data', 'type': 'discrete', 'domain': (0, ) }, { 'name': 'is_stateful', 'type': 'discrete', 'domain': (0, 1) }, { 'name': 'ref_stock_id', 'type': 'discrete', 'domain': (-1, 800005) }, ] def __init__(self, stock_name, stock_data_path, npy_files_path, is_future=False, slippage=0): self.stock_name = stock_name self.stock_id = get_stock_id_by_name(stock_name) if self.stock_id == None: print("cannot find stock id with name: {}".format(stock_name)) os.exit() self.stock_data_path = stock_data_path self.npy_files_path = npy_files_path self.worm_list = [] self.is_future = is_future self.slippage = slippage def search_worms(self, start_day_index, end_day_index, max_iter=300, is_test=False): if is_test == True: mixed_domain = self.mixed_domain_test else: mixed_domain = self.mixed_domain if self.is_future == True: mixed_domain = self.mixed_domain_future self.optimize_result = OptimizeResult() stock_worm_cache_file = self.get_stockworm_cache_file( start_day_index, end_day_index) create_if_not_exist(os.path.dirname(stock_worm_cache_file)) if os.path.isfile(stock_worm_cache_file): self.optimize_result.load(stock_worm_cache_file) print("stock worm cache loaded, size={}".format( self.optimize_result.get_size())) else: print("cannot find file cache:{}, will create new cache.".format( stock_worm_cache_file)) self.stock_worm_cache_file = stock_worm_cache_file opt_func = partial(self.opt_func, start_day_index, end_day_index) opt_handler = GPyOpt.methods.BayesianOptimization( f=opt_func, # Objective function domain=mixed_domain, # Box-constraints of the problem initial_design_numdata=30, # Number data initial design acquisition_type='EI', # Expected Improvement exact_feval=True, maximize=True) # True evaluations, no sample noise opt_handler.run_optimization(max_iter, eps=0) def opt_func(self, start_day, end_day, X_list): assert (len(X_list) == 1) features = X_list[0] ref_stock_id = features[14] if ref_stock_id == self.stock_id: ref_stock_id = -1 features[14] = ref_stock_id print("starting test: {}".format(self.get_parameter_str(features))) cached_result, index = self.optimize_result.find_result(features) if cached_result is not None: print("find from cache. skip...") else: stock_worm = StockWorm(self.stock_name, self.stock_id, self.npy_files_path, is_future=self.is_future, slippage=self.slippage) if stock_worm.validate(features, start_day, end_day) == False: print("validate failed for worm: {}".format( self.get_parameter_str(features))) return np.array(-1).reshape((1, 1)) total_profit, profit_daily, errors_daily = stock_worm.init( features, start_day, end_day) n_days = len(profit_daily) profit_mean = np.mean(profit_daily) error_mean = np.mean(errors_daily) # to do insert strategy features. strategy_features = list(stock_worm.get_strategy_features()) self.optimize_result.insert_result( features, strategy_features + [total_profit, n_days, error_mean, profit_mean]) print("result saved to: {}".format(self.stock_worm_cache_file)) self.optimize_result.save(self.stock_worm_cache_file) print( "total_profit:{} in {} days, profit_mean:{} error:{} parameters:{}" .format(total_profit, n_days, profit_mean, error_mean, self.get_parameter_str(features))) return np.array(profit_mean).reshape((1, 1)) def get_swarm_path(self, start_day_index, end_day_index): return os.path.join(self.stock_data_path, "{}_{}".format(self.stock_name, self.stock_id), "{}-{}".format(start_day_index, end_day_index)) def get_stockworm_cache_file(self, start_day_index, end_day_index): swarm_path = self.get_swarm_path(start_day_index, end_day_index) return os.path.join(swarm_path, "stockworm_cache.txt") def update_worms_from_cache(self, n_number, start_day_index, end_day_index): optimize_result = OptimizeResult() stockworm_cache_file = self.get_stockworm_cache_file( start_day_index, end_day_index) optimize_result.load(stockworm_cache_file) top_worms = optimize_result.get_best_results(n_number) assert (len(top_worms) == n_number) for i in range(n_number): features = top_worms[i, :15] strategy_features = top_worms[i, 15:21] md5 = top_worms[i, -1] model_save_path = self.get_model_save_path(start_day_index, end_day_index, md5) new_worm = StockWorm(self.stock_name, self.stock_id, self.npy_files_path, model_save_path, is_future=self.is_future, slippage=self.slippage) if os.path.isdir(model_save_path) and new_worm.load() == True: pass else: total_profit, profit_daily, errors_daily = new_worm.init( features, start_day_index, end_day_index, strategy_features=strategy_features) new_worm.save() print("training finished for model {}, total_profit:{}".format( i, total_profit)) new_worm.report() testing_total_profit, testing_profit_daily, n_data_appended = new_worm.test( ) if n_data_appended > 0: print( "testing finished for model {}, total_profit:{} in {} days, new data for {} days appended" .format(i, testing_total_profit, len(testing_profit_daily), n_data_appended)) new_worm.save() new_worm.report() self.worm_list.append(new_worm) def report(self): assert (self.worm_list is not None) for i in range(len(self.worm_list)): print("Report for Worm No.{}".format(i + 1)) self.worm_list[i].report() def get_model_save_path(self, start_day_index, end_day_index, md5=None): swarm_path = self.get_swarm_path(start_day_index, end_day_index) if md5 is None: model_save_path = os.path.join(swarm_path, "models") else: model_save_path = os.path.join(swarm_path, "models", md5) return model_save_path def load(self): model_save_path = self.get_model_save_path(0, 60) directories = [ os.path.join(model_save_path, f) for f in os.listdir(model_save_path) if os.path.isdir(os.path.join(model_save_path, f)) ] self.worm_list = [] for d in directories: new_worm = StockWorm(self.stock_id, self.npy_files_path, d) new_worm.load() self.worm_list.append(new_worm) def get_worm(self, n): assert (len(self.worm_list) > n) return self.worm_list[n] def test(self): profit_avg_list = [] profit_list = [] for worm in self.worm_list: training_total_profit, \ training_daily_profit, testing_total_profit, \ testing_daily_profit = worm.get_historic_metrics() testing_len = len(testing_daily_profit) traning_len = len(training_daily_profit) all_profit = np.concatenate( (training_daily_profit, testing_daily_profit), axis=0) all_profit_cumsum = np.cumsum(all_profit) profit_avg = all_profit_cumsum / ( np.arange(len(all_profit_cumsum)) + 1) profit_avg_list.append(profit_avg[traning_len - 1:]) profit_list.append(testing_daily_profit) assert (len(profit_avg[traning_len - 1:]) == len(testing_daily_profit) + 1) profit_avg_arr = np.array(profit_avg_list) profit_arr = np.array(profit_list) # find the argmax from profit_avg_arr argmax_arr = np.argmax(profit_avg_arr, axis=0) overall_profit = profit_arr[argmax_arr[:-1], np.arange(len(profit_arr[0]))] print(argmax_arr[:-1]) print(overall_profit) print("OVERALL RESULTS:") print(np.prod(overall_profit + 1) - 1) def plot(self): pass def get_parameter_str(self, X): parameter_str = "" for i in range(len(self.mixed_domain)): parameter_str += self.mixed_domain[i]["name"] parameter_str += ':' parameter_str += str(X[i]) parameter_str += ',' return parameter_str
import sys import os.path sys.path.append("../") from util import * from optimizeresult import OptimizeResult from pathlib import Path stock_data_path = get_stock_data_dir() # This is to get the directory that the program # is currently running in. for filename in Path(stock_data_path).glob('**/stockworm_cache.txt'): optimize_result = OptimizeResult(result_column_index=-2) optimize_result.load(filename) print(optimize_result.get_n_columns()) if (optimize_result.get_n_columns() == 24): print("{} has been already migrated.".format(filename)) continue print('migrating {}'.format(filename)) assert(optimize_result.get_n_columns() == 23) optimize_result.add_column(14, -1) assert(optimize_result.get_n_columns() == 24) optimize_result.save(filename)
sys.exit() stock_name = sys.argv[1] stock_id = get_stock_id_by_name(stock_name) training_start_day_index = int(sys.argv[2]) training_end_day_index = int(sys.argv[3]) number = 10 if len(sys.argv) == 5: number = int(sys.argv[4]) swarm_dir = get_swarm_dir(stock_name, stock_id, training_start_day_index, training_end_day_index) strategy_file = os.path.join(swarm_dir, 'stockworm_cache.txt') worm_results = OptimizeResult(-1) worm_results.load(strategy_file) print("Top 10 Worms in {} results for {}: swarm: {}-{}".format( worm_results.get_size(), stock_name, training_start_day_index, training_end_day_index)) worm_results.get_best_results(number) columns = [ 'n_neurons', 'learning_rate', 'num_layers', 'rnn_type', 'learning_period', 'prediction_period', 'n_repeats', 'beta', 'ema', 'time_format', 'volume_input', 'use_centralized_bid', 'split_daily_data', 'is_stateful', 'ref_stock_id', 'buy_threshold', 'sell_threshold', 'stop_gain', 'stop_gain', 'skip_at_beginning', 'value_ma', 'total_profit', 'days', 'avg_error', 'avg_profit' ] print("Columns:") for i in range(len(columns)):
if len(sys.argv) < 5: print( "usage: python3 search-worms.py stock_name, stock_index traning_start_day_index, training_end_day_index" ) sys.exit() stock_name = sys.argv[1] stock_index = int(sys.argv[2]) training_start_day_index = int(sys.argv[3]) training_end_day_index = int(sys.argv[4]) swarm_dir = get_swarm_dir(stock_name, stock_index, training_start_day_index, training_end_day_index) strategy_file = os.path.join(swarm_dir, 'strategy_cache.txt') result_strategies = OptimizeResult(-1) result_strategies.load(strategy_file) print("Top 10 Strategies in {} results for {}: swarm: {}-{}".format( result_strategies.get_size(), stock_name, training_start_day_index, training_end_day_index)) result_strategies.get_best_results(10) strategy_file = os.path.join(swarm_dir, 'stockworm_cache.txt') worm_results = OptimizeResult(-2) worm_results.load(strategy_file) print("Top 10 Worms in {} results for {}: swarm: {}-{}".format( worm_results.get_size(), stock_name, training_start_day_index, training_end_day_index)) worm_results.get_best_results(10)
class TradeStrategyFactory: mixed_domain = [{'name': 'buy_threshold', 'type': 'continuous', 'domain': (0, 0.005)}, {'name': 'sell_threshold', 'type': 'continuous', 'domain': (-0.005, 0)}, {'name': 'stop_loss', 'type': 'continuous', 'domain': (-0.02, -0.003)}, {'name': 'stop_gain', 'type': 'continuous', 'domain': (0.003, 0.02)}, {'name': 'skip_at_beginning', 'type': 'discrete', 'domain': (0,5, 10, 20)}, {'name': 'value_ma', 'type': 'discrete', 'domain': (1,3,5,10,20)} ] def __init__(self, cache_file=None, n_max_trades_per_day=4, slippage=0, courtage=0, is_future=False): self.n_max_trades_per_day = n_max_trades_per_day self.slippage = slippage self.courtage = courtage self.is_future = is_future self.trade_strategy = None # load the initial data file self.optimize_result = OptimizeResult() self.cache_file = cache_file if cache_file is not None: self.optimize_result.load(cache_file) return def create_strategy(self, features): print("is_future") print(self.is_future) if self.is_future: classTradeStrategy = TradeStrategyFuture else: classTradeStrategy = TradeStrategy return classTradeStrategy(features, self.n_max_trades_per_day, self.slippage, self.courtage) def create_from_file(self, filename, n_number): optimize_result = OptimizeResult(result_column_index=-1) optimize_result.load(filename) data = optimize_result.get_best_results(n_number) trade_strategy_list = [] if self.is_future: classTradeStrategy = TradeStrategyFuture else: classTradeStrategy = TradeStrategy for i in range(n_number): X_list = data[i,:4] trade_strategy_list.append(classTradeStrategy(X_list, self.n_max_trades_per_day, self.slippage, self.courtage)) return trade_strategy_list def create_trade_strategies(self, data, iter, max_iter=100): #assert(data.shape[1]==504) self.data = data init_numdata = int(max_iter / 4) trade_strategy_list = [] self.max_profit = -1 self.trade_strategy = None for i in range(iter): print("Searching Strategies, Run: {}".format(i)) self.n_iter = 0 myBopt = GPyOpt.methods.BayesianOptimization(self.get_profit, # Objective function domain=self.mixed_domain, # Box-constraints of the problem initial_design_numdata = init_numdata, # Number data initial design acquisition_type='EI', # Expected Improvement exact_feval = True, maximize = True) # True evaluations, no sample noise myBopt.run_optimization(max_iter, eps=0) return self.trade_strategy def get_profit(self, X_list): assert(len(X_list)==1) if self.is_future: classTradeStrategy = TradeStrategyFuture else: classTradeStrategy = TradeStrategy X_list = X_list[0] self.n_iter += 1 cached_result, index = self.optimize_result.find_result(X_list) trade_strategy = classTradeStrategy(X_list, n_max_trades_per_day=self.n_max_trades_per_day, slippage=self.slippage, courtage=self.courtage) if cached_result is not None: print("find cached result: {} for {}".format(cached_result, trade_strategy.get_parameter_str())) avg_daily_profit = cached_result[0] else: total_profit, daily_profit_list,_ = trade_strategy.get_profit(self.data) avg_daily_profit = np.mean(daily_profit_list) self.optimize_result.insert_result(X_list, avg_daily_profit) if avg_daily_profit > self.max_profit: print("find new record: {}, {}".format(avg_daily_profit, trade_strategy.get_parameter_str())) self.max_profit = avg_daily_profit self.trade_strategy = trade_strategy if self.n_iter % 10 == 0: print("iteration: {}, cachesize={}, avg_daily_profit:{}".format(self.n_iter, self.optimize_result.get_size(), avg_daily_profit)) if self.cache_file != None: self.optimize_result.save(self.cache_file) return avg_daily_profit.reshape((1,1))
class StockWormManager: mixed_domain = [ { 'name': 'n_neurons', 'type': 'discrete', 'domain': tuple(range(20, 160, 20)) }, { 'name': 'learning_rate', 'type': 'discrete', 'domain': (0.001, 0.002, 0.003, 0.004) }, { 'name': 'num_layers', 'type': 'discrete', 'domain': (1, 2, 3, 4, 5, 6, 7, 8) }, { 'name': 'rnn_type', 'type': 'discrete', 'domain': (0, 1, 2) }, { 'name': 'learning_period', 'type': 'discrete', 'domain': (20, 30, 40) }, { 'name': 'prediction_period', 'type': 'discrete', 'domain': (2, 5, 10, 20) }, { 'name': 'n_repeats', 'type': 'discrete', 'domain': (1, 3, 5, 10, 20, 30, 40) }, { 'name': 'beta', 'type': 'discrete', 'domain': (99, ) }, { 'name': 'ema', 'type': 'discrete', 'domain': (20, ) }, { 'name': 'time_format', 'type': 'discrete', 'domain': (0, 1, 2) }, #1 for stepofday, 2 for stepofweek { 'name': 'volume_input', 'type': 'discrete', 'domain': (0, 1) }, { 'name': 'use_centralized_bid', 'type': 'discrete', 'domain': (0, 1) }, { 'name': 'split_daily_data', 'type': 'discrete', 'domain': (1, ) } ] mixed_domain_test = [ { 'name': 'n_neurons', 'type': 'discrete', 'domain': tuple(range(20, 160, 20)) }, { 'name': 'learning_rate', 'type': 'discrete', 'domain': (0.001, 0.002, 0.003, 0.004) }, { 'name': 'num_layers', 'type': 'discrete', 'domain': (1, 2, 3, 4) }, { 'name': 'rnn_type', 'type': 'discrete', 'domain': (0, 1, 2) }, { 'name': 'learning_period', 'type': 'discrete', 'domain': (20, ) }, { 'name': 'prediction_period', 'type': 'discrete', 'domain': (10, ) }, { 'name': 'n_repeats', 'type': 'discrete', 'domain': (1, ) }, { 'name': 'beta', 'type': 'discrete', 'domain': (99, ) }, { 'name': 'ema', 'type': 'discrete', 'domain': (20, ) }, { 'name': 'time_format', 'type': 'discrete', 'domain': (0, 1, 2) }, #1 for stepofday, 2 for stepofweek { 'name': 'volume_input', 'type': 'discrete', 'domain': (0, 1) }, { 'name': 'use_centralized_bid', 'type': 'discrete', 'domain': (0, 1) }, { 'name': 'split_daily_data', 'type': 'discrete', 'domain': (0, 1) } ] def __init__(self, stock_name, stock_index, model_save_path, npy_files_path): self.stock_name = stock_name self.stock_index = stock_index self.model_save_path = model_save_path self.npy_files_path = npy_files_path self.worm_list = [] def search_worms(self, start_day_index, end_day_index, max_iter=300, is_test=False, search_strategy=False): if is_test == True: mixed_domain = self.mixed_domain_test else: mixed_domain = self.mixed_domain self.optimize_result = OptimizeResult(result_column_index=15) stock_worm_cache_file = self.get_stockworm_cache_file( start_day_index, end_day_index) if os.path.isfile(stock_worm_cache_file): self.optimize_result.load(stock_worm_cache_file) print("stock worm cache loaded, size={}".format( self.optimize_result.get_size())) else: print("cannot find file cache:{}, will create new cache.".format( stock_worm_cache_file)) self.stock_worm_cache_file = stock_worm_cache_file strategy_cache_file = self.get_strategy_cache_file( start_day_index, end_day_index) trade_strategy_factory = TradeStrategyFactory( cache_file=strategy_cache_file) if os.path.isfile(strategy_cache_file) and search_strategy == False: print("find strategy_cache:{}, loading...".format( strategy_cache_file)) strategy_list = trade_strategy_factory.create_from_file( strategy_cache_file, NUM_STRATEGIES) else: if search_strategy == True: print("search_strategy is True, searching strategies again...") else: print("cannot find strategy cache:{}, generating...".format( strategy_cache_file)) data = load_strategy_input_data(self.stock_index, start_day_index, end_day_index) # the input data: timestamp, value, and price. strategy_list = trade_strategy_factory.create_trade_strategies( data, 5) opt_func = partial(self.opt_func, strategy_list, start_day_index, end_day_index) opt_handler = GPyOpt.methods.BayesianOptimization( f=opt_func, # Objective function domain=mixed_domain, # Box-constraints of the problem initial_design_numdata=30, # Number data initial design acquisition_type='EI', # Expected Improvement exact_feval=True, maximize=True) # True evaluations, no sample noise opt_handler.run_optimization(max_iter, eps=0) def opt_func(self, strategy_list, start_day, end_day, X_list): assert (len(X_list) == 1) features = X_list[0] print("starting test: {}".format(self.get_parameter_str(features))) cached_result, index = self.optimize_result.find_result(features) if cached_result is not None: total_profit = cached_result[0] n_days = cached_result[1] profit_mean = cached_result[2] error_mean = cached_result[3] print("find from cache. skip...") else: save_path = self.get_save_path(features) stock_worm = StockWorm(self.stock_index, self.npy_files_path, save_path) total_profit, profit_daily, errors_daily = stock_worm.init( features, strategy_list, start_day, end_day) n_days = len(profit_daily) profit_mean = np.mean(profit_daily) error_mean = np.mean(errors_daily) self.optimize_result.insert_result( features, [total_profit, n_days, profit_mean, error_mean]) print("result saved to: {}".format(self.stock_worm_cache_file)) self.optimize_result.save(self.stock_worm_cache_file) print( "total_profit:{} in {} days, profit_mean:{} error:{} parameters:{}" .format(total_profit, n_days, profit_mean, error_mean, self.get_parameter_str(features))) return np.array(profit_mean).reshape((1, 1)) def get_swarm_path(self, start_day_index, end_day_index): return os.path.join(self.model_save_path, "{}_{}".format(self.stock_name, self.stock_index), "{}-{}".format(start_day_index, end_day_index)) def get_model_path(self, start_day_index, end_day_index): swarm_path = self.get_swarm_path(start_day_index, end_day_index) return os.path.join(swarm_path, "models") def get_stockworm_cache_file(self, start_day_index, end_day_index): swarm_path = self.get_swarm_path(start_day_index, end_day_index) return os.path.join(swarm_path, "stockworm_cache.txt") def get_strategy_cache_file(self, start_day_index, end_day_index): swarm_path = self.get_swarm_path(start_day_index, end_day_index) return os.path.join(swarm_path, "strategy_cache.txt") def update_worms_from_cache(self, n_number, start_day_index, end_day_index): optimize_result = OptimizeResult(result_column_index=15) stockworm_cache_file = self.get_stockworm_cache_file( start_day_index, end_day_index) optimize_result.load(stockworm_cache_file) top_worms = optimize_result.get_best_results(n_number) trade_strategy_factory = TradeStrategyFactory() strategy_cache_file = self.get_strategy_cache_file( start_day_index, end_day_index) strategy_list = trade_strategy_factory.create_from_file( strategy_cache_file, NUM_STRATEGIES) assert (len(top_worms) == n_number) swarm_path = self.get_swarm_path(start_day_index, end_day_index) for i in range(n_number): features = top_worms[i, :13] features_str = self.get_parameter_str(features) model_save_path = os.path.join(swarm_path, "models", md5(features_str)) new_worm = StockWorm(self.stock_index, self.npy_files_path, model_save_path) if os.path.isdir(model_save_path) and new_worm.load() == True: pass else: total_profit, profit_daily, errors_daily = new_worm.init( features, strategy_list, start_day_index, end_day_index) new_worm.save() print("training finished for model {}, total_profit:{}".format( i, total_profit)) testing_total_profit, testing_profit_daily, n_data_appended = new_worm.test( ) if n_data_appended > 0: print( "testing finished for model {}, total_profit:{} in {} days, new data for {} days appended" .format(i, testing_total_profit, len(testing_profit_daily), n_data_appended)) new_worm.save() self.worm_list.append(new_worm) def report(self): assert (self.worm_list is not None) for i in range(len(self.worm_list)): print("Report for Worm No.{}".format(i + 1)) self.worm_list[i].report() def get_save_path(self, X): params_str = self.get_parameter_str(X) return os.path.join(self.model_save_path, md5(params_str)) def get_parameter_str(self, X): parameter_str = "" for i in range(len(self.mixed_domain)): parameter_str += self.mixed_domain[i]["name"] parameter_str += ':' parameter_str += str(X[i]) parameter_str += ',' return parameter_str
end_day_index, overnight=True) profit, mean = get_stock_change_rate(stock_name, dev_start_day_index, end_day_index, overnight=False) print("Stock profit w/o overnight: {}, mean: {}".format(profit, mean)) print("Stock profit w overnight: {}, mean: {}".format(profit_overnight, mean_overnight)) cache_file = get_cache_filename(stock_name, start_day_index, end_day_index) if not os.path.isfile(cache_file): print("{} does not exist.".format(cache_file)) os.exit(0) worm_results = OptimizeResult(-1) worm_results.load(cache_file) print("Top 10 Worms in {} results for {}: swarm: {}-{}".format( worm_results.get_size(), stock_name, start_day_index, end_day_index)) worm_results.get_best_results(number) columns = [ 'n_components', 'ema', 'beta', 'use_volume', 'ref_stock_id', 'time_format', 'buy_threshold', 'sell_threshold', 'stop_loss', 'stop_gain', 'skip_at_beginning', 'value_ma', 'total_profit' ] print("Columns:") for i in range(len(columns)): print("{}: {}".format(i, columns[i]))
"usage: python3 update-models.py stock_name, start_day_index end_day_index [number=1]" ) sys.exit() stock_name = sys.argv[1] start_day_index = int(sys.argv[2]) end_day_index = int(sys.argv[3]) if len(sys.argv) == 5: number = int(sys.argv[4]) else: number = 1 cache_file = get_cache_filename(stock_name, start_day_index, end_day_index) optimize_result = OptimizeResult() optimize_result.load(cache_file) top_worms = optimize_result.get_best_results(number) for i in range(len(top_worms)): features = top_worms[i, :6] strategy_features = top_worms[i, 6:12] md5 = top_worms[i, -1] model_path = get_model_path(stock_name, start_day_index, end_day_index) save_path = os.path.join(model_path, md5) hmm_model_filename = os.path.join(save_path, "hmm_model.pkl") hmm_model = None if os.path.isfile(hmm_model_filename): print("file: {} exists, load it.".format(hmm_model_filename)) with open(hmm_model_filename, "rb") as file: hmm_model = pickle.load(file) else: