def run_xgboost_classification(root_path, need_training,
                               need_plot_training_diagram, need_predict):
    df = getStocksList_CHN(root_path)
    df.index = df.index.astype(str).str.zfill(6)
    df = df.sort_index(ascending=True)
    predict_symbols = df.index.values.tolist()

    paras = SP_Paras('xgboost', root_path, predict_symbols, predict_symbols)
    paras.save = True
    paras.load = False
    paras.run_hyperopt = True
    paras.plot = need_plot_training_diagram

    # A_B_C format:
    # A: require window split or not -> 0 for not, 1 for yes
    # B: normalization method -> 0: none 1: standard 2: minmax 3: zscore
    # C: normalization index, same normalization requires different index
    paras.features = {
        '0_0_0': ['week_day'],
        '1_0_1': ['c_2_o', 'h_2_o', 'l_2_o', 'c_2_h', 'h_2_l', 'vol_p'],
        '1_1_0': ['buy_amount', 'sell_amount', 'even_amount'],
        '1_1_1': ['buy_volume', 'sell_volume', 'even_volume'],
        '1_1_2': [
            'buy_max', 'buy_min', 'buy_average', 'sell_max', 'sell_min',
            'sell_average', 'even_max', 'even_min', 'even_average'
        ]
    }

    paras.window_len = [3]
    paras.pred_len = 1
    paras.valid_len = 20
    paras.start_date = '2016-11-01'
    paras.end_date = datetime.datetime.now().strftime("%Y-%m-%d")
    paras.verbose = 1
    paras.batch_size = 64
    paras.epoch = 5000
    paras.out_class_type = 'classification'
    paras.n_out_class = 7  # ignore for regression

    from hyperopt import hp
    paras.hyper_opt = {
        "max_depth": hp.randint("max_depth", 10),
        "n_estimators": hp.randint("n_estimators",
                                   20),  #[0,1,2,3,4,5] -> [50,]
        "gamma": hp.randint("gamma", 4),  #0-0.4
        "learning_rate": hp.randint("learning_rate",
                                    6),  #[0,1,2,3,4,5] -> 0.05,0.06
        "subsample": hp.randint("subsample",
                                4),  #[0,1,2,3] -> [0.7,0.8,0.9,1.0]
        "min_child_weight": hp.randint("min_child_weight", 5),
    }

    # run
    xgboost_cla = xgboost_classification(paras)
    xgboost_cla.run(need_training, need_predict)
    return paras
Exemplo n.º 2
0
def updateStockData_CHN_Daily(root_path, force_check=False):
    config = configparser.ConfigParser()
    config.read(root_path + "/" + "config.ini")
    storeType = int(config.get('Setting', 'StoreType'))

    symbols = getStocksList_CHN(root_path).index.values.tolist()

    pbar = tqdm(total=len(symbols))

    if storeType == 2:
        for symbol in symbols:
            startTime, message = updateSingleStockData(root_path, symbol,
                                                       force_check)
            outMessage = '%-*s fetched in:  %.4s seconds' % (6, symbol,
                                                             (time.time() -
                                                              startTime))
            pbar.set_description(outMessage)
            pbar.update(1)

    if storeType == 1:
        log_errors = []
        log_update = []
        with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
            # Start the load operations and mark each future with its URL
            future_to_stock = {
                executor.submit(updateSingleStockData, root_path, symbol,
                                force_check): symbol
                for symbol in symbols
            }
            for future in concurrent.futures.as_completed(future_to_stock):
                stock = future_to_stock[future]
                try:
                    startTime, message = future.result()
                except Exception as exc:
                    startTime = time.time()
                    log_errors.append('%r generated an exception: %s' %
                                      (stock, exc))
                else:
                    if len(message) > 0: log_update.append(message)
                outMessage = '%-*s fetched in:  %.4s seconds' % (6, stock,
                                                                 (time.time() -
                                                                  startTime))
                pbar.set_description(outMessage)
                pbar.update(1)
        if len(log_errors) > 0: print(log_errors)
        # if len(log_update) > 0: print(log_update)

    pbar.close()
    return symbols
Exemplo n.º 3
0
def process_all_stocks_data(root_path, window = 1):
    df = getStocksList_CHN(root_path)
    df.index = df.index.astype(str).str.zfill(6)
    symbols = df.index.values.tolist()

    pbar = tqdm(total=len(symbols))

    day_selection = []
    week_selection = []
    month_selection = []

   

    startTime_1 = time.time()
    for symbol in symbols:
        startTime = processing_stock_data(root_path, symbol, window, day_selection, week_selection, month_selection)
        outMessage = '%-*s processed in:  %.4s seconds' % (6, symbol, (time.time() - startTime))
        pbar.set_description(outMessage)
        pbar.update(1)

    print('total processing in:  %.4s seconds' % ((time.time() - startTime_1)))



    day_week_selection   = list(set(day_selection)      & set(week_selection      ))
    week_month_selection = list(set(week_selection)     & set(month_selection     ))
    day_month_selection  = list(set(day_selection)      & set(month_selection     ))
    all_selection        = list(set(day_week_selection) & set(week_month_selection))

    print("all_selection", len(all_selection), sorted(all_selection))
    print("day_week_selection", len(day_week_selection), sorted(day_week_selection))
    print("week_month_selection", len(week_month_selection), sorted(week_month_selection))
    print("day_month_selection", len(day_month_selection), sorted(day_month_selection))
    print("/n ------------------------ /n")
    print("day_selection", len(day_selection), sorted(day_selection))
    print("week_selection", len(week_selection), sorted(week_selection))
    print("month_selection", len(month_selection), sorted(month_selection))
def process_all_stocks_data(root_path, window=1):
    df = getStocksList_CHN(root_path)
    df.index = df.index.astype(str).str.zfill(6)
    symbols = df.index.values.tolist()

    pbar = tqdm(total=len(symbols))

    day_selection = []
    week_selection = []
    month_selection = []

    # for index in range(0, window):
    #     day_window = []
    #     day_selection.append(day_window)
    #     week_window = []
    #     week_selection.append(week_window)
    #     month_window = []
    #     month_selection.append(month_window)

    startTime_1 = time.time()
    for symbol in symbols:
        startTime = processing_stock_data(root_path, symbol, window,
                                          day_selection, week_selection,
                                          month_selection)
        outMessage = '%-*s processed in:  %.4s seconds' % (6, symbol,
                                                           (time.time() -
                                                            startTime))
        pbar.set_description(outMessage)
        pbar.update(1)

    print('total processing in:  %.4s seconds' % ((time.time() - startTime_1)))

    # with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
    #     # Start the load operations and mark each future with its URL
    #     future_to_stock = {executor.submit(processing_stock_data, root_path, symbol, window, day_selection, week_selection, month_selection): symbol for symbol in symbols}
    #     for future in concurrent.futures.as_completed(future_to_stock):
    #         stock = future_to_stock[future]
    #         try:
    #             startTime = future.result()
    #         except Exception as exc:
    #             startTime = time.time()
    #             print('%r generated an exception: %s' % (stock, exc))
    #         outMessage = '%-*s processed in:  %.4s seconds' % (6, stock, (time.time() - startTime))
    #         pbar.set_description(outMessage)
    #         pbar.update(1)

    # day_week_selection = []
    # week_month_selection = []
    # day_month_selection = []
    # all_selection = []

    #count = []

    day_week_selection = list(set(day_selection) & set(week_selection))
    week_month_selection = list(set(week_selection) & set(month_selection))
    day_month_selection = list(set(day_selection) & set(month_selection))
    all_selection = list(set(day_week_selection) & set(week_month_selection))

    print("all_selection", len(all_selection), sorted(all_selection))
    print("day_week_selection", len(day_week_selection),
          sorted(day_week_selection))
    print("week_month_selection", len(week_month_selection),
          sorted(week_month_selection))
    print("day_month_selection", len(day_month_selection),
          sorted(day_month_selection))
    print("/n ------------------------ /n")
    print("day_selection", len(day_selection), sorted(day_selection))
    print("week_selection", len(week_selection), sorted(week_selection))
    print("month_selection", len(month_selection), sorted(month_selection))
Exemplo n.º 5
0
    sector_count.to_csv("cashflow_sector.csv")


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Input parameter error")
        exit()

    pd.set_option('precision', 3)
    pd.set_option('display.width', 1000)
    warnings.filterwarnings('ignore',
                            category=pd.io.pytables.PerformanceWarning)

    update = str(sys.argv[1])

    df = getStocksList_CHN(root_path)
    df.index = df.index.astype(str).str.zfill(6)
    df = df.sort_index(ascending=True)
    symbols = df.index.values.tolist()

    sh = ts.get_k_data("sh")

    months = 12
    start_date = (datetime.datetime.now() -
                  datetime.timedelta(days=months * 30)).strftime("%Y-%m-%d")
    #start_date = sh['date'][0]

    if update == '1':
        print("Updating cashflow data...")
        update_all_stocks_data(root_path, symbols, start_date)
Exemplo n.º 6
0
def run_lstm_classification(root_path, need_training, need_plot_training_diagram, need_predict):
    df = getStocksList_CHN(root_path)
    df.index = df.index.astype(str).str.zfill(6)
    df = df.sort_index(ascending = True)
    predict_symbols = df.index.values.tolist()

    paras = SP_Paras('lstm', root_path, predict_symbols, predict_symbols)
    paras.save = True
    paras.load = False
    paras.run_hyperopt = True
    paras.plot = need_plot_training_diagram

    # A_B_C format:
    # A: require window split or not -> 0 for not, 1 for yes
    # B: normalization method -> 0: none 1: standard 2: minmax 3: zscore
    # C: normalization index, same normalization requires different index
    paras.features = {'1_0_0':['week_day'], # 1
                      '1_0_1':['c_2_o', 'h_2_o', 'l_2_o', 'c_2_h', 'h_2_l', 'vol_p'],  # 5
                      '1_1_0':['buy_amount', 'sell_amount', 'even_amount'], # 3
                      '1_1_1':['buy_volume', 'sell_volume', 'even_volume'], # 3
                      '1_1_2':['buy_max', 'buy_min', 'buy_average', 'sell_max', 'sell_min', 'sell_average', 'even_max', 'even_min', 'even_average']} # 9

    paras.pred_len = 1
    paras.valid_len = 20
    paras.start_date = '2016-11-01'
    paras.end_date = datetime.datetime.now().strftime("%Y-%m-%d")
    paras.verbose = 1
    
    paras.out_class_type = 'classification'
    paras.n_out_class = 7  # ignore for regression
    paras.epoch = 1000

    paras.window_len = [5]
    paras.batch_size = 64
    paras.model['hidden_layers'] = [210, 140, 210, 140, 210, 140, 70]
    paras.model['dropout'] = 0.5
    paras.model['activation'] = 'relu'
    paras.model['optimizer'] = 'adam'
    paras.model['learning_rate'] = 0.01

    paras.model['out_activation'] = 'softmax'
    paras.model['out_layer'] = paras.n_out_class
    paras.model['loss'] = 'categorical_crossentropy'

    from hyperopt import hp
    paras.hyper_opt = {"batch_size_opt"   :[32, 64, 128],
                       "activation_opt"   :['relu', 'tanh', 'sigmoid'],
                       "optimizer_opt"    :['sgd', 'rmsprop', 'adagrad', 'adam'],
    }

    paras.hyper_opt.update({
                       "batch_size"       :hp.choice ("batch_size"   , paras.hyper_opt['batch_size_opt'] ), 
                       "dropout"          :hp.uniform("dropout"      , 0.3, 0.7     ), 
                       "learning_rate"    :hp.uniform("learning_rate", 0.005, 0.02  ),  
                       "activation"       :hp.choice ("activation"   , paras.hyper_opt['activation_opt']), 
                       "optimizer"        :hp.choice ("optimizer"    , paras.hyper_opt['optimizer_opt']), 
    })

   
    lstm_cla = rnn_lstm_classification(paras)
    lstm_cla.run(need_training, need_predict)
    return paras