def load_sp500(input_size, num_steps, k=None, target_symbol=None, test_ratio=0.05): if target_symbol is not None: return [ StockDataSet( target_symbol, input_size=input_size, num_steps=num_steps, test_ratio=test_ratio) ] # Load metadata of s & p 500 stocks info = pd.read_csv("data/constituents-financials.csv") info = info.rename(columns={col: col.lower().replace(' ', '_') for col in info.columns}) info['file_exists'] = info['symbol'].map(lambda x: os.path.exists("data/{}.csv".format(x))) print info['file_exists'].value_counts().to_dict() info = info[info['file_exists'] == True].reset_index(drop=True) info = info.sort('market_cap', ascending=False).reset_index(drop=True) if k is not None: info = info.head(k) print "Head of S&P 500 info:\n", info.head() # Generate embedding meta file info[['symbol', 'sector']].to_csv(os.path.join("logs/metadata.tsv"), sep='\t', index=False) return [ StockDataSet(row['symbol'], input_size=input_size, num_steps=num_steps, test_ratio=0.05) for _, row in info.iterrows()]
def load_sp500(input_size, num_steps, k=None, target_symbol=None, test_ratio=0.05, fwd_ret=5,pca=True,wave=True): if target_symbol is not None: return [ StockDataSet( target_symbol, input_size=input_size, num_steps=num_steps, test_ratio=test_ratio,close_price_only=False,fwd_ret=fwd_ret,pca=pca,wave=wave) ] # Load metadata of s & p 500 stocks data_dir = "D:\\Users\\ftran_zim\\data_cna\\" info = pd.read_csv(data_dir + "constituents-financials.csv",converters={'symbol':str}) info = info.rename(columns={col: col.lower().replace(' ', '_') for col in info.columns}) info['file_exists'] = info['symbol'].map(lambda x: os.path.exists(data_dir + "{}.csv".format(x))) print (info['file_exists'].value_counts().to_dict()) info = info[info['file_exists'] == True].reset_index(drop=True) info = info.sort_values(by=['market_cap'], ascending=False).reset_index(drop=True) #filter bad one. info = info[~info.symbol.isin(['XOM','JNJ','JPM'])] if k is not None: info = info.head(k) # Generate embedding meta file info[['symbol', 'sector']].to_csv(os.path.join("logs/metadata.tsv"), sep='\t', index=False) return [ StockDataSet(row['symbol'], input_size=input_size, num_steps=num_steps, test_ratio=test_ratio,close_price_only=False,fwd_ret=fwd_ret,pca=pca,wave=True) for _, row in info.iterrows()]
def load_stock_market_data(self): info = pd.read_csv("data/companylist.csv") info = info.rename(columns={ col: col.lower().replace(' ', '_') for col in info.columns }) info['file_exists'] = info['symbol'].map( lambda x: os.path.exists("data/{}.csv".format(x))) #print(info['file_exists'].value_counts().to_dict()) info = info[info['file_exists'] == True].reset_index(drop=True) info = info.sort_values('marketcap', ascending=False).reset_index(drop=True) if self.stock_count is not None: info = info.head(self.stock_count) self.stock_market_data = [ StockDataSet(row['symbol'], input_size=self.input_size, num_steps=self.num_steps, test_ratio=self.test_ratio) for _, row in info.iterrows() ]
def load_sp500(input_size, num_steps, k=None, target_symbol=None, test_ratio=0.05): if target_symbol is not None: return [ StockDataSet( target_symbol, input_size=input_size, num_steps=num_steps, test_ratio=test_ratio) ]
def load_data(stock_name, input_size, num_steps): stock_dataset = StockDataSet(stock_name, input_size=input_size, num_steps=num_steps, test_ratio=0.1, close_price_only=True) print("Train data size:", len(stock_dataset.train_X)) print("Test data size:", len(stock_dataset.test_X)) return stock_dataset
def load_data(input_size, num_steps, k=None, target_symbol=None, test_ratio=0.20): if target_symbol is not None: return [ StockDataSet(target_symbol, input_size=input_size, num_steps=num_steps, test_ratio=test_ratio) ] info = pd.read_csv('index_list.csv') if k is not None: info = info.head(k) return [ StockDataSet(row['symbol'], input_size=input_size, num_steps=num_steps, test_ratio=test_ratio) for _, row in info.iterrows() ]
def load_stock(input_size, num_steps, k=None, target_symbol=None, TushareFlag=False, test_ratio=0.05): if target_symbol is not None: print("-------get symbol -------------:") if TushareFlag: return [ StockDataSetFromTushare(target_symbol, input_size=input_size, num_steps=num_steps, test_ratio=test_ratio) ] else: return [ StockDataSet(target_symbol, input_size=input_size, num_steps=num_steps, test_ratio=test_ratio) ]
def main(): tf.reset_default_graph() logs_path = './tensorboard_output/' config = RNNConfig() stock_data = StockDataSet(config.company, num_features=config.num_features, num_classes=config.num_classes, num_steps=config.num_steps, test_ratio=0.2, include_stopwords=config.include_stopwords, stay_percent=config.stay_percent) train_X, train_y, test_X, test_y = stock_data.get_data() print('train data shape: {}'.format(train_X.shape)) print('train target shape: {}'.format(train_y.shape)) _, num_steps, num_features = train_X.shape num_classes = train_y.shape[1] with tf.name_scope('input'): # (batch_size, time_steps, features) data = tf.placeholder(tf.float32, [None, num_steps, num_features]) # (batch_size, num_classes) target = tf.placeholder(tf.float32, [None, num_classes]) with tf.name_scope('dropout'): dropout = tf.placeholder(tf.float32) if config.num_classes == 1: model = SequenceRegression(data, target, dropout, config.stay_percent, num_hidden=config.num_hidden, num_layers=config.num_layers) else: model = SequenceClassification(data, target, dropout, num_hidden=config.num_hidden, num_layers=config.num_layers) # create a summary for our cost and error tf.summary.scalar("cost", model.cost) tf.summary.scalar("error", model.error) # merge all summaries into a single "operation" which we can execute in a session summary_op = tf.summary.merge_all() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) # create log writer object writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph()) batch_count = 0 for epoch in range(config.num_epoch): for batch_X, batch_y in stock_data.generate_one_epoch(config.batch_size): batch_count += 1 _, summary = sess.run([model.optimize, summary_op], { data: batch_X, target: batch_y, dropout: config.dropout}) writer.add_summary(summary, batch_count) # calculate train and test error train_cost = sess.run(model.cost, { data: train_X, target: train_y, dropout: config.dropout}) train_error = sess.run(model.error, { data: train_X, target: train_y, dropout: config.dropout}) test_error = sess.run(model.error, { data: test_X, target: test_y, dropout: config.dropout}) print('Epoch {:2d} cost: {:4.2} train error: {:4.2f}% test error: {:4.2f}%'.format(epoch + 1, 100 * train_cost, 100 * train_error, 100 * test_error)) prediction = sess.run(model.prediction, {data: test_X, target: test_y, dropout: config.dropout}) if config.num_classes == 3: prediction = [pred.index(max(pred)) for pred in prediction.tolist()] expected = [y.index(max(y)) for y in test_y.tolist()] print(list(zip(prediction, expected))) result_count = [0, 0, 0] for pred in prediction: result_count[pred] += 1 print(result_count) if config.num_classes == 1: prediction_percent = [pred[0] * 100 for pred in prediction.tolist()] expected_percent = [y[0] * 100 for y in test_y.tolist()] print(list(zip(prediction_percent, expected_percent))) prediction = [price_to_tag(pred[0], config.stay_percent) for pred in prediction.tolist()] expected = [price_to_tag(y[0], config.stay_percent) for y in test_y.tolist()] print(list(zip(prediction, expected))) mistakes = reduce(lambda x, item: x + 1 if item[0] != item[1] else x, list(zip(prediction, expected)), 0) print(mistakes / len(prediction) * 100) print(sess.run(model.error, { data: test_X, target: test_y, dropout: config.dropout}) * 100) print(sess.run(model.error, { data: test_X, target: test_y, dropout: config.dropout}) * 100) print(sess.run(model.error, { data: test_X, target: test_y, dropout: config.dropout}) * 100)
optimizer = tf.train.RMSPropOptimizer(learning_rate) minimize = optimizer.minimize(loss) # 训练过程 with tf.Session(graph=graph) as sess: # merged_summary = tf.summary.merge_all() # writer = tf.summary.FileWriter('log', sess.graph) # writer.add_graph(sess.graph) # 初始化各种变量 tf.global_variables_initializer().run(session=sess) learning_rates_to_use = [ config.init_learning_rate * (config.learning_rate_decay**max( float(i + 1 - config.init_epoch), 0.0)) for i in range(config.max_epoch) ] # 进行一次 epoch stock_dataset = StockDataSet(stock_sym='IBM') for epoch_step in range(config.max_epoch): current_lr = learning_rates_to_use[epoch_step] # Check https://github.com/lilianweng/stock-rnn/blob/master/data_wrapper.py # if you are curious to know what is StockDataSet and how generate_one_epoch() # is implemented. for batch_X, batch_y in stock_dataset.generate_one_epoch( config.batch_size): train_data_feed = { inputs: batch_X, targets: batch_y, learning_rate: current_lr } train_loss, _ = sess.run([loss, minimize], train_data_feed) # 保存模型