def get_features(oanda_data): """Given OANDA data get some specified indicators using TA-Lib """ # price and volume price, volume = extract_timeseries_from_oanda_data(oanda_data, ['closeMid', 'volume']) price_change = np.array( [float(i) / float(j) - 1 for i, j in zip(price[1:], price)]) volume_change = np.array( [float(i) / float(j) - 1 for i, j in zip(volume[1:], volume)]) price_change = np.concatenate([[np.nan], price_change], axis=0) volume_change = np.concatenate([[np.nan], volume_change], axis=0) inputs = prep_data_for_feature_gen(oanda_data) # overlap studies par_sar = SAREXT(inputs) outm, outf = MAMA(inputs, optInFastLimit=12, optInSlowLimit=24) upper, middle, lower = BBANDS(inputs, optInTimePeriod=12, optInNbDevUp=2, optInNbDevDn=2, optinMAType='EMA') upper = upper - price.ravel() middle = middle - price.ravel() lower = price.ravel() - lower # momentum bop = BOP(inputs) cci = CCI(inputs) adx = ADX(inputs, optInTimePeriod=24) cmo = CMO(inputs, optInTimePeriod=6) will = WILLR(inputs, optInTimePeriod=16) slowk, slowd = STOCH(inputs, optInFastK_Period=5, optInSlowK_Period=3, optInSlowK_MAType=0, optInSlowD_Period=3, optInSlowD_MAType=0) macd1, macd2, macd3 = MACD(inputs, optInFastPeriod=12, optInSlowPeriod=6, optInSignalPeriod=3) stocf1, stockf2 = STOCHF(inputs, optInFastK_Period=12, optInFastD_Period=6, optInFastD_MAType='EXP') rsi1, rsi2 = STOCHRSI(inputs, optInTimePeriod=24, optInFastK_Period=12, optInFastD_Period=24, optInFastD_MAType='EXP') # volume indicators ados = ADOSC(inputs, optInFastPeriod=24, optInSlowPeriod=12) # cycle indicators ht_sine1, ht_sine2 = HT_SINE(inputs) ht_phase = HT_DCPHASE(inputs) ht_trend = HT_TRENDMODE(inputs) # price transform indicators wcp = WCLPRICE(inputs) # volatility indicators avg_range = NATR(inputs, optInTimePeriod=6) # markets dummies time = np.array([ datetime.strptime(x['time'], '%Y-%m-%dT%H:%M:%S.000000Z') for x in oanda_data ]) mrkt_london = [3 <= x.hour <= 11 for x in time] mrkt_ny = [8 <= x.hour <= 16 for x in time] mrkt_sydney = [17 <= x.hour <= 24 or 0 <= x.hour <= 1 for x in time] mrkt_tokyo = [19 <= x.hour <= 24 or 0 <= x.hour <= 3 for x in time] # sorting indicators all_indicators = np.array([ price_change, volume_change, par_sar, outm, outf, upper, middle, lower, bop, cci, adx, cmo, macd1, macd2, macd3, stocf1, stockf2, rsi1, rsi2, ados, ht_sine1, ht_sine2, ht_phase, wcp, avg_range ]) all_dummies = np.array( [ht_trend, mrkt_london, mrkt_ny, mrkt_sydney, mrkt_tokyo]) return all_indicators.T, all_dummies.T # transpose to get (data_points, features)
np.random.seed(0) tf.set_random_seed(0) # hyper-params batch_size = 1024 learning_rate = 0.002 drop_keep_prob = 1 value_moving_average = 50 split = (0.5, 0.3, 0.2) plotting = False saving = False # load data oanda_data = np.load('data\\EUR_USD_H1.npy')[-50000:] output_data_raw = price_to_binary_target(oanda_data, delta=0.0001) price_data_raw = extract_timeseries_from_oanda_data(oanda_data, ['closeMid']) input_data_raw, input_data_dummy_raw = get_features(oanda_data) price_data_raw = np.concatenate([[[0]], (price_data_raw[1:] - price_data_raw[:-1]) / (price_data_raw[1:] + 1e-10)], axis=0) # prepare data input_data, output_data, input_data_dummy, price_data = \ remove_nan_rows([input_data_raw, output_data_raw, input_data_dummy_raw, price_data_raw]) input_data_scaled_no_dummies = ( input_data - min_max_scaling[1, :]) / (min_max_scaling[0, :] - min_max_scaling[1, :]) input_data_scaled = np.concatenate( [input_data_scaled_no_dummies, input_data_dummy], axis=1) # split to train, test and cross validation input_train, input_test, input_cv, output_train, output_test, output_cv, price_train, price_test, price_cv = \
def get_features_v2(oanda_data, time_periods, return_numpy): """Returns all (mostly) indicators from ta-lib library for given time periods""" # load primary data inputs = prep_data_for_feature_gen(oanda_data) # get name of all the functions function_groups = [ 'Cycle Indicators', 'Momentum Indicators', 'Overlap Studies', 'Volume Indicators', 'Volatility Indicators', 'Statistic Functions' ] function_list = [ talib.get_function_groups()[group] for group in function_groups ] function_list = [item for sublist in function_list for item in sublist] # flatten the list function_list.remove('MAVP') # price and volume price, volume = extract_timeseries_from_oanda_data(oanda_data, ['closeMid', 'volume']) price_change = np.array( [float(i) / float(j) - 1 for i, j in zip(price[1:], price)]) volume_change = np.array( [float(i) / float(j) - 1 for i, j in zip(volume[1:], volume)]) price_change = np.concatenate([[0], price_change], axis=0) volume_change = np.concatenate([[0], volume_change], axis=0) # get all indicators df_indicators = pd.DataFrame() df_indicators['price'] = price.ravel() df_indicators['price_delta'] = price_change df_indicators['volume_change'] = volume_change for func in function_list: if 'timeperiod' in getattr(talib.abstract, func).info['parameters']: for time_period in time_periods: indicator = getattr(talib.abstract, func)(inputs, timeperiod=time_period) if any(isinstance(item, np.ndarray) for item in indicator): # if indicator returns > 1 time-series indicator_id = 0 for x in indicator: df_indicators[func + '_' + str(indicator_id) + '_tp_' + str(time_period)] = x indicator_id += 1 else: # if indicator returns 1 time-series df_indicators[func + '_tp_' + str(time_period)] = indicator else: indicator = getattr(talib.abstract, func)(inputs) if any(isinstance(item, np.ndarray) for item in indicator): indicator_id = 0 for x in indicator: df_indicators[func + str(indicator_id)] = x indicator_id += 1 else: df_indicators[func] = indicator # manual handling of features df_indicators['AD'] = df_indicators['AD'].pct_change() df_indicators['OBV'] = df_indicators['OBV'].pct_change() df_indicators['HT_DCPERIOD'] = ( df_indicators['HT_DCPERIOD'] > pd.rolling_mean( df_indicators['HT_DCPERIOD'], 50)).astype(float) df_indicators['HT_DCPHASE'] = (df_indicators['HT_DCPHASE'] > pd.rolling_mean(df_indicators['HT_DCPHASE'], 10)).astype(float) df_indicators['ADX_tp_10'] = (df_indicators['ADX_tp_10'] > pd.rolling_mean( df_indicators['ADX_tp_10'], 10)).astype(float) df_indicators['MACD0'] = df_indicators['MACD0'] - df_indicators['MACD1'] df_indicators['MINUS_DI_tp_10'] = ( df_indicators['MINUS_DI_tp_10'] > pd.rolling_mean( df_indicators['MINUS_DI_tp_10'], 20)).astype(float) df_indicators['RSI_tp_10'] = (df_indicators['RSI_tp_10'] > pd.rolling_mean( df_indicators['RSI_tp_10'], 15)).astype(float) df_indicators['ULTOSC'] = (df_indicators['ULTOSC'] > pd.rolling_mean( df_indicators['ULTOSC'], 15)).astype(float) df_indicators['BBANDS_0_tp_10'] = df_indicators[ 'BBANDS_0_tp_10'] - df_indicators['price'] df_indicators['BBANDS_1_tp_10'] = df_indicators[ 'BBANDS_1_tp_10'] - df_indicators['price'] df_indicators['BBANDS_2_tp_10'] = df_indicators[ 'BBANDS_2_tp_10'] - df_indicators['price'] df_indicators[ 'DEMA_tp_10'] = df_indicators['DEMA_tp_10'] - df_indicators['price'] df_indicators[ 'EMA_tp_10'] = df_indicators['EMA_tp_10'] - df_indicators['price'] df_indicators['HT_TRENDLINE'] = df_indicators[ 'HT_TRENDLINE'] - df_indicators['price'] df_indicators[ 'KAMA_tp_10'] = df_indicators['KAMA_tp_10'] - df_indicators['price'] df_indicators['MAMA0'] = df_indicators['MAMA0'] - df_indicators['price'] df_indicators['MAMA1'] = df_indicators['MAMA1'] - df_indicators['price'] df_indicators['MIDPOINT_tp_10'] = df_indicators[ 'MIDPOINT_tp_10'] - df_indicators['price'] df_indicators['MIDPRICE_tp_10'] = df_indicators[ 'MIDPRICE_tp_10'] - df_indicators['price'] df_indicators[ 'SMA_tp_10'] = df_indicators['SMA_tp_10'] - df_indicators['price'] df_indicators[ 'T3_tp_10'] = df_indicators['T3_tp_10'] - df_indicators['price'] df_indicators[ 'TEMA_tp_10'] = df_indicators['TEMA_tp_10'] - df_indicators['price'] df_indicators[ 'TRIMA_tp_10'] = df_indicators['TRIMA_tp_10'] - df_indicators['price'] df_indicators[ 'WMA_tp_10'] = df_indicators['WMA_tp_10'] - df_indicators['price'] df_indicators['SAR'] = df_indicators['SAR'] - df_indicators['price'] df_indicators['LINEARREG_tp_10'] = df_indicators[ 'LINEARREG_tp_10'] - df_indicators['price'] df_indicators['LINEARREG_INTERCEPT_tp_10'] = df_indicators[ 'LINEARREG_INTERCEPT_tp_10'] - df_indicators['price'] df_indicators[ 'TSF_tp_10'] = df_indicators['TSF_tp_10'] - df_indicators['price'] # markets dummies time = np.array([ datetime.strptime(x['time'], '%Y-%m-%dT%H:%M:%S.000000Z') for x in oanda_data ]) df_indicators['mrkt_london'] = np.array([3 <= x.hour <= 11 for x in time]).astype(int) df_indicators['mrkt_ny'] = np.array([8 <= x.hour <= 16 for x in time]).astype(int) df_indicators['mrkt_sydney'] = np.array( [17 <= x.hour <= 24 or 0 <= x.hour <= 1 for x in time]).astype(int) df_indicators['mrkt_tokyo'] = np.array( [19 <= x.hour <= 24 or 0 <= x.hour <= 3 for x in time]).astype(int) print('Features shape: {}'.format(df_indicators.shape)) return df_indicators.as_matrix() if return_numpy else df_indicators