示例#1
0
def train_and_test_by_ticker(test_epochs, test_display_step, buy_threshold,
                             sell_threshold, use_random_data):
    # GET DATA
    data_df = dml.get_all_ml_data()
    tickers = list({t for t in data_df['ticker']})
    training_df = data_df[data_df.date < test_data_date].copy()
    test_df = data_df[data_df.date >= test_data_date].copy()
    del data_df
    prediction_files = []
    print("BULK TRAINING for {} tickers.".format(len(tickers)))

    for ticker in tickers:
        try:
            print(" ----- Begin Training for {} ----- ".format(ticker))
            # DATA
            training_data_class = td.TrainingDataTicker(
                training_df, feature_series_count, feature_count, label_count,
                ticker)
            testing_data_class = td.TrainingDataTicker(test_df,
                                                       feature_series_count,
                                                       feature_count,
                                                       label_count, ticker)
            # TRAIN
            ticker_path = _model_path + ticker + '/'
            if os.path.exists(ticker_path):
                print("Found ticker directory, skipping {}".format(ticker))
                # add existing file to predictions list
                file_list = [
                    ticker_path + a_file for a_file in os.listdir(ticker_path)
                ]
                if len(file_list) > 0:
                    latest_file = max(file_list, key=os.path.getmtime)
                    if latest_file.endswith('.csv'):
                        print("adding prediction file: {}".format(latest_file))
                        prediction_files.append(latest_file)
                    else:
                        print("couldn't find csv as latest file in {}".format(
                            ticker_path))
            else:
                os.makedirs(ticker_path)
                saved_model_file = train_rnn(training_data_class, ticker_path,
                                             use_random_data)
                saved_model_file = saved_model_file + '.meta'
                prediction_files.append(saved_model_file + '.csv')

                # TEST
                test_rnn(testing_data_class, test_epochs, test_display_step,
                         buy_threshold, sell_threshold, [saved_model_file])
        except ValueError as ve:
            print(ve)
    # GENERATE PREDICTION AGGREGATE FILE
    merge_csv_files(prediction_files, prediction_file)
示例#2
0
def get_data_and_test_rnn(test_epochs,
                          test_display_step,
                          buy_threshold,
                          sell_threshold,
                          specific_file=None):
    # GET DATA
    data_df = dml.get_all_ml_data()
    test_df = data_df[data_df.date >= test_data_date].copy()
    del data_df
    # TEST
    testing_data_class = td.TrainingData(test_df, feature_series_count,
                                         feature_count, label_count)
    test_rnn(testing_data_class, test_epochs, test_display_step, buy_threshold,
             sell_threshold, specific_file)
示例#3
0
def get_data_and_test_rnn_by_ticker(test_epochs, test_display_step,
                                    buy_threshold, sell_threshold,
                                    specific_file):
    # Get ticker
    parse_filename = specific_file.replace('\\', "/")
    print(parse_filename)
    ticker = "WIKI/" + parse_filename.split("/")[-2]
    print("Using ticker - {}".format(ticker))
    # GET DATA
    data_df = dml.get_all_ml_data()
    test_df = data_df[data_df.date >= test_data_date].copy()
    del data_df
    # TEST
    testing_data_class = td.TrainingDataTicker(test_df, feature_series_count,
                                               feature_count, label_count,
                                               ticker)
    test_rnn(testing_data_class, test_epochs, test_display_step, buy_threshold,
             sell_threshold, [specific_file])
示例#4
0
def get_data_train_and_test_rnn(test_epochs, test_display_step, buy_threshold,
                                sell_threshold, use_random_data):
    # GET DATA
    data_df = dml.get_all_ml_data()
    training_df = data_df[data_df.date < test_data_date].copy()
    test_df = data_df[data_df.date >= test_data_date].copy()
    del data_df
    # TRAIN
    training_data_class = td.TrainingData(training_df, feature_series_count,
                                          feature_count, label_count)
    # TODO: switch rnn to use batch data, testing below
    # fff, lll, ddd = training_data_class.get_batch(3)
    train_rnn(training_data_class, _model_path, use_random_data)
    # TEST
    testing_data_class = td.TrainingData(test_df, feature_series_count,
                                         feature_count, label_count)
    test_rnn(testing_data_class, test_epochs, test_display_step, buy_threshold,
             sell_threshold)
    merge_csv_files(_get_meta_prediction_files(), prediction_file)