Пример #1
0
def load_sp500(input_size, num_steps, k=None, target_symbol=None, test_ratio=0.05):
    if target_symbol is not None:
        return [
            StockDataSet(
                target_symbol,
                input_size=input_size,
                num_steps=num_steps,
                test_ratio=test_ratio)
        ]

    # Load metadata of s & p 500 stocks
    info = pd.read_csv("data/constituents-financials.csv")
    info = info.rename(columns={col: col.lower().replace(' ', '_') for col in info.columns})
    info['file_exists'] = info['symbol'].map(lambda x: os.path.exists("data/{}.csv".format(x)))
    print info['file_exists'].value_counts().to_dict()

    info = info[info['file_exists'] == True].reset_index(drop=True)
    info = info.sort('market_cap', ascending=False).reset_index(drop=True)

    if k is not None:
        info = info.head(k)

    print "Head of S&P 500 info:\n", info.head()

    # Generate embedding meta file
    info[['symbol', 'sector']].to_csv(os.path.join("logs/metadata.tsv"), sep='\t', index=False)

    return [
        StockDataSet(row['symbol'],
                     input_size=input_size,
                     num_steps=num_steps,
                     test_ratio=0.05)
        for _, row in info.iterrows()]
Пример #2
0
def load_sp500(input_size, num_steps, k=None, target_symbol=None, test_ratio=0.05, fwd_ret=5,pca=True,wave=True):
    if target_symbol is not None:
        return [
            StockDataSet(
                target_symbol,
                input_size=input_size,
                num_steps=num_steps,
                test_ratio=test_ratio,close_price_only=False,fwd_ret=fwd_ret,pca=pca,wave=wave)
        ]

    # Load metadata of s & p 500 stocks
    data_dir = "D:\\Users\\ftran_zim\\data_cna\\"
    info = pd.read_csv(data_dir + "constituents-financials.csv",converters={'symbol':str})
    info = info.rename(columns={col: col.lower().replace(' ', '_') for col in info.columns})
    info['file_exists'] = info['symbol'].map(lambda x: os.path.exists(data_dir + "{}.csv".format(x)))
    print (info['file_exists'].value_counts().to_dict())

    info = info[info['file_exists'] == True].reset_index(drop=True)
    info = info.sort_values(by=['market_cap'], ascending=False).reset_index(drop=True)

    #filter bad one.
    info = info[~info.symbol.isin(['XOM','JNJ','JPM'])]
    if k is not None:
        info = info.head(k)

    # Generate embedding meta file
    info[['symbol', 'sector']].to_csv(os.path.join("logs/metadata.tsv"), sep='\t', index=False)

    return [
        StockDataSet(row['symbol'],
                     input_size=input_size,
                     num_steps=num_steps,
                     test_ratio=test_ratio,close_price_only=False,fwd_ret=fwd_ret,pca=pca,wave=True)
        for _, row in info.iterrows()]
Пример #3
0
    def load_stock_market_data(self):

        info = pd.read_csv("data/companylist.csv")
        info = info.rename(columns={
            col: col.lower().replace(' ', '_')
            for col in info.columns
        })
        info['file_exists'] = info['symbol'].map(
            lambda x: os.path.exists("data/{}.csv".format(x)))
        #print(info['file_exists'].value_counts().to_dict())

        info = info[info['file_exists'] == True].reset_index(drop=True)
        info = info.sort_values('marketcap',
                                ascending=False).reset_index(drop=True)

        if self.stock_count is not None:
            info = info.head(self.stock_count)

        self.stock_market_data = [
            StockDataSet(row['symbol'],
                         input_size=self.input_size,
                         num_steps=self.num_steps,
                         test_ratio=self.test_ratio)
            for _, row in info.iterrows()
        ]
Пример #4
0
def load_sp500(input_size, num_steps, k=None, target_symbol=None, test_ratio=0.05):
    if target_symbol is not None:
        return [
            StockDataSet(
                target_symbol,
                input_size=input_size,
                num_steps=num_steps,
                test_ratio=test_ratio)
        ]
def load_data(stock_name, input_size, num_steps):
    stock_dataset = StockDataSet(stock_name,
                                 input_size=input_size,
                                 num_steps=num_steps,
                                 test_ratio=0.1,
                                 close_price_only=True)
    print("Train data size:", len(stock_dataset.train_X))
    print("Test data size:", len(stock_dataset.test_X))
    return stock_dataset
Пример #6
0
def load_data(input_size,
              num_steps,
              k=None,
              target_symbol=None,
              test_ratio=0.20):
    if target_symbol is not None:
        return [
            StockDataSet(target_symbol,
                         input_size=input_size,
                         num_steps=num_steps,
                         test_ratio=test_ratio)
        ]

    info = pd.read_csv('index_list.csv')

    if k is not None:
        info = info.head(k)

    return [
        StockDataSet(row['symbol'],
                     input_size=input_size,
                     num_steps=num_steps,
                     test_ratio=test_ratio) for _, row in info.iterrows()
    ]
Пример #7
0
def load_stock(input_size,
               num_steps,
               k=None,
               target_symbol=None,
               TushareFlag=False,
               test_ratio=0.05):
    if target_symbol is not None:
        print("-------get symbol -------------:")
        if TushareFlag:
            return [
                StockDataSetFromTushare(target_symbol,
                                        input_size=input_size,
                                        num_steps=num_steps,
                                        test_ratio=test_ratio)
            ]
        else:
            return [
                StockDataSet(target_symbol,
                             input_size=input_size,
                             num_steps=num_steps,
                             test_ratio=test_ratio)
            ]
Пример #8
0
def main():
    tf.reset_default_graph()
    logs_path = './tensorboard_output/'
    config = RNNConfig()
    stock_data = StockDataSet(config.company, num_features=config.num_features,
                                 num_classes=config.num_classes,
                                 num_steps=config.num_steps,
                                 test_ratio=0.2,
                                 include_stopwords=config.include_stopwords,
                                 stay_percent=config.stay_percent)
    train_X, train_y, test_X, test_y = stock_data.get_data()
    print('train data shape: {}'.format(train_X.shape))
    print('train target shape: {}'.format(train_y.shape))
    _, num_steps, num_features = train_X.shape
    num_classes = train_y.shape[1]
    with tf.name_scope('input'):
        # (batch_size, time_steps, features)
        data = tf.placeholder(tf.float32, [None, num_steps, num_features])
        # (batch_size, num_classes)
        target = tf.placeholder(tf.float32, [None, num_classes])
    with tf.name_scope('dropout'):
        dropout = tf.placeholder(tf.float32)
    if config.num_classes == 1:
        model = SequenceRegression(data, target, dropout, config.stay_percent,
                                    num_hidden=config.num_hidden,
                                    num_layers=config.num_layers)
    else:
        model = SequenceClassification(data, target, dropout, 
                                    num_hidden=config.num_hidden, 
                                    num_layers=config.num_layers)
    # create a summary for our cost and error
    tf.summary.scalar("cost", model.cost)
    tf.summary.scalar("error", model.error)
    # merge all summaries into a single "operation" which we can execute in a session
    summary_op = tf.summary.merge_all()

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        # create log writer object
        writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph())

        batch_count = 0
        for epoch in range(config.num_epoch):
            for batch_X, batch_y in stock_data.generate_one_epoch(config.batch_size):
                batch_count += 1
                _, summary = sess.run([model.optimize, summary_op], {
                    data: batch_X, target: batch_y, dropout: config.dropout})
                writer.add_summary(summary, batch_count)
            # calculate train and test error
            train_cost = sess.run(model.cost, {
                data: train_X, target: train_y, dropout: config.dropout})
            train_error = sess.run(model.error, {
                data: train_X, target: train_y, dropout: config.dropout})
            test_error = sess.run(model.error, {
                data: test_X, target: test_y, dropout: config.dropout})
            print('Epoch {:2d} cost: {:4.2} train error: {:4.2f}% test error: {:4.2f}%'.format(epoch + 1, 
                                                                              100 * train_cost,
                                                                              100 * train_error, 
                                                                              100 * test_error))
        prediction = sess.run(model.prediction, 
                        {data: test_X, target: test_y, dropout: config.dropout})
        if config.num_classes == 3:
            prediction = [pred.index(max(pred)) for pred in prediction.tolist()]
            expected = [y.index(max(y)) for y in test_y.tolist()]
            print(list(zip(prediction, expected)))
            result_count = [0, 0, 0]
            for pred in prediction:
                result_count[pred] += 1
            print(result_count)
        if config.num_classes == 1:
            prediction_percent = [pred[0] * 100 for pred in prediction.tolist()]
            expected_percent = [y[0] * 100 for y in test_y.tolist()]
            print(list(zip(prediction_percent, expected_percent)))
            prediction = [price_to_tag(pred[0], config.stay_percent) for pred in prediction.tolist()]
            expected = [price_to_tag(y[0], config.stay_percent) for y in test_y.tolist()]
            print(list(zip(prediction, expected)))
            mistakes = reduce(lambda x, item: x + 1 if item[0] != item[1] else x, 
                                list(zip(prediction, expected)), 0)
            print(mistakes / len(prediction) * 100)
            print(sess.run(model.error, {
                data: test_X, target: test_y, dropout: config.dropout}) * 100)
            print(sess.run(model.error, {
                data: test_X, target: test_y, dropout: config.dropout}) * 100)
            print(sess.run(model.error, {
                data: test_X, target: test_y, dropout: config.dropout}) * 100)
Пример #9
0
        optimizer = tf.train.RMSPropOptimizer(learning_rate)
        minimize = optimizer.minimize(loss)
    # 训练过程
    with tf.Session(graph=graph) as sess:
        # merged_summary = tf.summary.merge_all()
        # writer = tf.summary.FileWriter('log', sess.graph)
        # writer.add_graph(sess.graph)
        # 初始化各种变量
        tf.global_variables_initializer().run(session=sess)
        learning_rates_to_use = [
            config.init_learning_rate * (config.learning_rate_decay**max(
                float(i + 1 - config.init_epoch), 0.0))
            for i in range(config.max_epoch)
        ]
        # 进行一次 epoch
        stock_dataset = StockDataSet(stock_sym='IBM')
        for epoch_step in range(config.max_epoch):
            current_lr = learning_rates_to_use[epoch_step]

            # Check https://github.com/lilianweng/stock-rnn/blob/master/data_wrapper.py
            # if you are curious to know what is StockDataSet and how generate_one_epoch()
            # is implemented.
            for batch_X, batch_y in stock_dataset.generate_one_epoch(
                    config.batch_size):
                train_data_feed = {
                    inputs: batch_X,
                    targets: batch_y,
                    learning_rate: current_lr
                }
                train_loss, _ = sess.run([loss, minimize], train_data_feed)
        # 保存模型