def analysis(): """ A simple API endpoint to compare data from two sensors Example http://127.0.0.1:5000/api/stats/compare?a=sensoraname&b=sensorbname """ if 'wotkit_token' in session: a = request.args.get('a') b = request.args.get('b') hours = int(request.args.get('hours')) if (a and b and hours): msph = 3600000 #milliseconds per hour result = defaultdict(dict) sensoraDataSeries = WotKitDataToSeries( WoTKitgetSensorData(a, msph * hours)) sensorbDataSeries = WotKitDataToSeries( WoTKitgetSensorData(b, msph * hours)) # Labels object result['labels'] = [ ` i ` + "h" for i in range(1, hours)] # Sensor A object sensoraDailyMeans = sensoraDataSeries.resample('H', how='mean') result['a']['mean'] = SeriesToList(sensoraDailyMeans) result['a']['rolling_mean'] = SeriesToList( pd.rolling_mean(sensoraDailyMeans, 5)) result['a']['rolling_stdev'] = SeriesToList( pd.rolling_std(sensoraDailyMeans, 5)) result['a']['rolling_skewness'] = SeriesToList( pd.rolling_skew(sensoraDailyMeans, 5)) result['a']['rolling_kurtosis'] = SeriesToList( pd.rolling_kurt(sensoraDailyMeans, 5)) #Sensor B object sensorbDailyMeans = sensorbDataSeries.resample('H', how='mean') result['b']['mean'] = SeriesToList(sensorbDailyMeans) result['b']['rolling_mean'] = SeriesToList( pd.rolling_mean(sensorbDailyMeans, 5)) result['b']['rolling_stdev'] = SeriesToList( pd.rolling_std(sensorbDailyMeans, 5)) result['b']['rolling_skewness'] = SeriesToList( pd.rolling_skew(sensorbDailyMeans, 5)) result['b']['rolling_kurtosis'] = SeriesToList( pd.rolling_kurt(sensorbDailyMeans, 5)) #Comparison object result['comparison']['correlation'] = SeriesToList( pd.rolling_corr(sensoraDailyMeans, sensorbDailyMeans, 5)) result['comparison']['covariance'] = SeriesToList( pd.rolling_cov(sensoraDailyMeans, sensorbDailyMeans, 5)) json_response = json.dumps(result) return Response(json_response, content_type='application/json')
def plot_rolling_functions(series, window_size=128): pd.rolling_median(series,window_size).plot(label='median') pd.rolling_mean(series,window_size).plot(label='mean') pd.rolling_std(series,window_size).plot(label='std') pd.rolling_skew(series,window_size).plot(label='skew') pd.rolling_kurt(series,window_size).plot(label='kurt') pd.rolling_min(series,window_size).plot(label='min') pd.rolling_max(series,window_size).plot(label='max') plt.title('Various rolling window functions, window size %s' % (window_size)) plt.legend() plt.show()
def visualize_sequential_relationships(training_data, plot_size, smooth=None, window=1): """ Generates line plots to visualize sequential data. Assumes the data frame index is time series. """ training_data.index.name = None num_features = plot_size if plot_size < len(training_data.columns) else len(training_data.columns) num_plots = num_features / 16 if num_features % 16 == 0 else num_features / 16 + 1 for i in range(num_plots): fig, ax = plt.subplots(4, 4, sharex=True, figsize=(20, 10)) for j in range(16): index = (i * 16) + j if index < num_features: if index != 3: # this column is all 0s in the bike set if smooth == 'mean': training_data.iloc[:, index] = pd.rolling_mean(training_data.iloc[:, index], window) elif smooth == 'var': training_data.iloc[:, index] = pd.rolling_var(training_data.iloc[:, index], window) elif smooth == 'skew': training_data.iloc[:, index] = pd.rolling_skew(training_data.iloc[:, index], window) elif smooth == 'kurt': training_data.iloc[:, index] = pd.rolling_kurt(training_data.iloc[:, index], window) training_data.iloc[:, index].plot(ax=ax[j / 4, j % 4], kind='line', legend=False, title=training_data.columns[index]) fig.tight_layout()
def rolling_functions_tests(p, d): # Old-fashioned rolling API assert_eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) assert_eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) assert_eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) assert_eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) assert_eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) assert_eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) assert_eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) assert_eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) # see note around test_rolling_dataframe for logic concerning precision assert_eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3), check_less_precise=True) assert_eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3), check_less_precise=True) assert_eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) assert_eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): assert_eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes assert_eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) assert_eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs assert_eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def Calc(df): """ 计算250日偏度 """ ret = np.log(df["price_adj"]) - np.log(df["price_adj"].shift(1)) res = pd.rolling_skew(ret, 250).to_frame("Skewness250d") return res
def rolling_functions_tests(p, d): # Old-fashioned rolling API assert_eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) assert_eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) assert_eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) assert_eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) assert_eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) assert_eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) assert_eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) assert_eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) # see note around test_rolling_dataframe for logic concerning precision assert_eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3), check_less_precise=True) assert_eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3), check_less_precise=True) assert_eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) assert_eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) assert_eq(pd.rolling_window(p, 3, win_type='boxcar'), dd.rolling_window(d, 3, win_type='boxcar')) # Test with edge-case window sizes assert_eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) assert_eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs assert_eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def analysis(): """ A simple API endpoint to compare data from two sensors Example http://127.0.0.1:5000/api/stats/compare?a=sensoraname&b=sensorbname """ if 'wotkit_token' in session: a = request.args.get('a') b = request.args.get('b') hours = int(request.args.get('hours')) if (a and b and hours): msph = 3600000 #milliseconds per hour result = defaultdict(dict) sensoraDataSeries = WotKitDataToSeries(WoTKitgetSensorData(a, msph*hours)) sensorbDataSeries = WotKitDataToSeries(WoTKitgetSensorData(b, msph*hours)) # Labels object result['labels'] = [`i`+"h" for i in range(1,hours)] # Sensor A object sensoraDailyMeans = sensoraDataSeries.resample('H', how = 'mean') result['a']['mean'] = SeriesToList( sensoraDailyMeans ) result['a']['rolling_mean'] = SeriesToList( pd.rolling_mean(sensoraDailyMeans, 5) ) result['a']['rolling_stdev'] = SeriesToList( pd.rolling_std(sensoraDailyMeans, 5) ) result['a']['rolling_skewness'] = SeriesToList( pd.rolling_skew(sensoraDailyMeans, 5) ) result['a']['rolling_kurtosis'] = SeriesToList( pd.rolling_kurt(sensoraDailyMeans, 5) ) #Sensor B object sensorbDailyMeans = sensorbDataSeries.resample('H', how = 'mean') result['b']['mean'] = SeriesToList(sensorbDailyMeans) result['b']['rolling_mean'] = SeriesToList( pd.rolling_mean(sensorbDailyMeans, 5) ) result['b']['rolling_stdev'] = SeriesToList( pd.rolling_std(sensorbDailyMeans, 5) ) result['b']['rolling_skewness'] = SeriesToList( pd.rolling_skew(sensorbDailyMeans, 5) ) result['b']['rolling_kurtosis'] = SeriesToList( pd.rolling_kurt(sensorbDailyMeans, 5) ) #Comparison object result['comparison']['correlation'] = SeriesToList( pd.rolling_corr(sensoraDailyMeans, sensorbDailyMeans, 5) ) result['comparison']['covariance'] = SeriesToList( pd.rolling_cov(sensoraDailyMeans, sensorbDailyMeans, 5) ) json_response = json.dumps(result) return Response(json_response, content_type='application/json')
def get_estimator(ticker, start, end, window=30, clean=True): prices = data.get_data(ticker, start, end) log_return = (prices['Adj Close'] / prices['Adj Close'].shift(1)).apply(np.log) result = pandas.rolling_skew(log_return, window=window) result[:window-1] = np.nan if clean: return result.dropna() else: return result
def test_ts_skew(self): self.env.add_operator('ts_skew', { 'operator': OperatorTSSkew, 'arg1': { 'value': [3, 5] }, }) string1 = 'ts_skew(2, open1)' gene1 = self.env.parse_string(string1) self.assertFalse(gene1.validate()) string2 = 'ts_skew(3, open1)' gene2 = self.env.parse_string(string2) self.assertTrue(gene2.validate()) self.assertEqual(gene2.dimension, '') self.assertRaises(IndexError, gene2.eval, self.env, self.date1, self.date2) date1 = self.env.shift_date(self.date1, 2) df = pd.rolling_skew(self.env.get_data_value('open1'), 3).iloc[2:] self.assertTrue( frame_equal(gene2.eval(self.env, date1, self.date2), df))
def rolling_tests(p, d): eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3)) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3)) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) mad = lambda x: np.fabs(x - x.mean()).mean() eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def test_ts_skew(self): self.env.add_operator('ts_skew', { 'operator': OperatorTSSkew, 'arg1': {'value': [3, 5]}, }) string1 = 'ts_skew(2, open1)' gene1 = self.env.parse_string(string1) self.assertFalse(gene1.validate()) string2 = 'ts_skew(3, open1)' gene2 = self.env.parse_string(string2) self.assertTrue(gene2.validate()) self.assertEqual(gene2.dimension, '') self.assertRaises(IndexError, gene2.eval, self.env, self.date1, self.date2) date1 = self.env.shift_date(self.date1, 2) df = pd.rolling_skew(self.env.get_data_value('open1'), 3).iloc[2:] self.assertTrue( frame_equal( gene2.eval(self.env, date1, self.date2), df) )
def rolling_functions_tests(p, d): # Old-fashioned rolling API eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3)) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3)) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def rolling_functions_tests(p, d): # Old-fashioned rolling API eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3)) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3)) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): eq(pd.rolling_window(p, 3, "boxcar"), dd.rolling_window(d, 3, "boxcar")) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def rolling_tests(p, d): eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3)) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3)) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) mad = lambda x: np.fabs(x - x.mean()).mean() eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def visualize_sequential_relationships(training_data, plot_size, smooth=None, window=1): """ Generates line plots to visualize sequential data. Assumes the data frame index is time series. """ training_data.index.name = None num_features = plot_size if plot_size < len( training_data.columns) else len(training_data.columns) num_plots = num_features / 16 if num_features % 16 == 0 else num_features / 16 + 1 for i in range(num_plots): fig, ax = plt.subplots(4, 4, sharex=True, figsize=(20, 10)) for j in range(16): index = (i * 16) + j if index < num_features: if index != 3: # this column is all 0s in the bike set if smooth == 'mean': training_data.iloc[:, index] = pd.rolling_mean( training_data.iloc[:, index], window) elif smooth == 'var': training_data.iloc[:, index] = pd.rolling_var( training_data.iloc[:, index], window) elif smooth == 'skew': training_data.iloc[:, index] = pd.rolling_skew( training_data.iloc[:, index], window) elif smooth == 'kurt': training_data.iloc[:, index] = pd.rolling_kurt( training_data.iloc[:, index], window) training_data.iloc[:, index].plot( ax=ax[j / 4, j % 4], kind='line', legend=False, title=training_data.columns[index]) fig.tight_layout()
def ts_skew(self, x, n): return pd.rolling_skew(x, n)
def rolling_skew(self, *args, **kwargs): return MySeries(pd.rolling_skew(self.x, *args, **kwargs))
def ts_skewFn(arr, min_periods, max_periods): if not (max_periods): max_periods = len(arr) return pd.rolling_skew(arr, max_periods, min_periods=min_periods)
def ts_operation(df, n): return pd.rolling_skew(df, n)
def calculate_features(data: pd.DataFrame, normalization=False, train_data: list = None, start=None, end=None): Open = data['Open'].values High = data['High'].values Low = data['Low'].values Close = data['Close'].values Volume = data['Volume'].values data['ret'] = data['Close'].pct_change() * 100.0 data['ret_2'] = data['Close'].pct_change().shift() * 100.0 data['ret_3'] = data['Close'].pct_change().shift(2) * 100.0 data['ret_4'] = data['Close'].pct_change().shift(3) * 100.0 data['ret_5'] = data['Close'].pct_change().shift(4) * 100.0 data['ret_ratio'] = (data['ret'] / data['ret_5'] - 1) * 100.0 data['log_ret'] = (np.log(data['Close'])).diff() * 100.0 data['gap'] = ((data['Open'] - data['Close'].shift()) / data['Open'] * 100.0) data['gap2'] = ((data['Open'] - data['Close'].shift()) / data['Open'] * 100.0).shift() data['gap3'] = ((data['Open'] - data['Close'].shift()) / data['Open'] * 100.0).shift(2) data['gap4'] = ((data['Open'] - data['Close'].shift()) / data['Open'] * 100.0).shift(3) data['gap5'] = ((data['Open'] - data['Close'].shift()) / data['Open'] * 100.0).shift(4) data['hl'] = ((data['High'] - data['Low']) / data['Open'] * 100.0) data['hl2'] = ((data['High'] - data['Low']) / data['Open'] * 100.0).shift() data['hl3'] = ((data['High'] - data['Low']) / data['Open'] * 100.0).shift(2) data['hl4'] = ((data['High'] - data['Low']) / data['Open'] * 100.0).shift(3) data['hl5'] = ((data['High'] - data['Low']) / data['Open'] * 100.0).shift(4) data['oc'] = ((data['Close'] - data['Open']) / data['Open'] * 100.0) data['oc2'] = ((data['Close'] - data['Open']) / data['Open'] * 100.0).shift() data['oc3'] = ((data['Close'] - data['Open']) / data['Open'] * 100.0).shift(2) data['oc4'] = ((data['Close'] - data['Open']) / data['Open'] * 100.0).shift(3) data['oc5'] = ((data['Close'] - data['Open']) / data['Open'] * 100.0).shift(4) data['MA_short'] = talib.EMA(data['Close'].values, 10) data['MA_long'] = talib.EMA(data['Close'].values, 120) data['MA_ratio'] = (data['MA_short'] / data['MA_long'] - 1) * 100.0 data['MA2_short'] = talib.EMA(data['Close'].values, 10) data['MA2_long'] = talib.EMA(data['Close'].values, 60) data['MA2_ratio'] = (data['MA2_short'] / data['MA2_long'] - 1) * 100.0 data['vol_long'] = pd.rolling_std(data['Close'], 30) data['vol_short'] = pd.rolling_std(data['Close'], 15) data['vol_ratio'] = (data['vol_short'] / data['vol_long'] - 1) * 100.0 data['EMA'] = (Close / talib.EMA(Close, 5) - 1) * 100.0 data['EMA_long'] = (Close / talib.EMA(Close, 60) - 1) * 100.0 data['RSI'] = talib.RSI(data['Close'].values) / 100.0 data['MOM'] = talib.MOM(data['Close'].values, timeperiod=14) / 100.0 data['MACD_vfast'], data['MACD_signal_vfast'], data['MACD_hist'] = \ talib.MACD(data['Close'].values, fastperiod=4, slowperiod=9, signalperiod=3) data['MACD_fast'], data['MACD_signal_fast'], _ = \ talib.MACD(data['Close'].values, fastperiod=12, slowperiod=26, signalperiod=9) data['MACD_slow'], _, _ = talib.MACD(data['Close'].values, fastperiod=25, slowperiod=50) data['MACD'], data['MACD_signal'], data['MACD_hist'] = talib.MACD( data['Close'].values, fastperiod=30, slowperiod=65, signalperiod=22) data['ATR'] = talib.ATR(High, Low, Close, timeperiod=28) data['ADX_vlong'] = talib.ADX(High, Low, Close, timeperiod=120) data['ADX_long'] = talib.ADX(High, Low, Close, timeperiod=28) data['ADX_short'] = talib.ADX(High, Low, Close, timeperiod=14) data['TSF_short'] = talib.TSF(data['Close'].values, timeperiod=14) data['TSF_long'] = talib.TSF(data['Close'].values, timeperiod=28) data['TSF_ratio'] = (data['TSF_short'] / data['TSF_long'] - 1) * 100.0 data['BBand_up'], data['BBand_mid'], data['BBand_low'] = talib.BBANDS( data['Close'].values, timeperiod=20) data['BBand_width'] = (data['BBand_up'] / data['BBand_low'] - 1) * 100.0 data['HMA_short'] = HMA(data['Close'].values, timeperiod=9) data['HMA_long'] = HMA(data['Close'].values, timeperiod=60) data['HMA_ratio'] = (data['HMA_short'] / data['HMA_long'] - 1) * 100.0 data['HMA_ret'] = HMA(data['Close'].values, 100) # data['HMA_ret'] = data['HMA_ret'].pct_change() data['OBV'] = talib.OBV(Close, Volume) data['mean'] = pd.rolling_mean(data['ret'], 10) data['std'] = pd.rolling_std(data['ret'], 10) data['skewness'] = pd.rolling_skew(data['ret'], 10) data['kurtosis'] = (pd.rolling_kurt(data['ret'], 10) - 3) data['STOCHk'], data['STOCHd'] = talib.STOCH(High, Low, Close, fastk_period=28, slowk_period=3, slowd_period=3) data['STOCHRSId'], data['STOCHRSIk'] = talib.STOCHRSI(Close) data['Chaikin_vol'] = Chaikin_vol(High, Low) data['Chaikin_oscillator'] = Chaikin_oscillator(High, Low, Close, Volume) data['PDI'] = talib.PLUS_DI(High, Low, Close, timeperiod=14) data['MDI'] = talib.MINUS_DI(High, Low, Close, timeperiod=14) data['DI'] = data['ADX_short'] - data['PDI'] + data['MDI'] # train_data = ['ret', 'ret_2', 'ret_3', 'ret_4', 'ret_5', 'vol_ratio', 'hl', 'oc', 'gap'] # 'ret_2', 'ret_3', 'ret_4', 'ret_5'] # data = include_VIX(data) data.replace(np.nan, 0, inplace=True) if normalization is True: for feature in data.columns: if feature not in [ 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Product', 'log_ret', 'ret', 'ret_2', 'ret_3', 'ret_4', 'ret_5', 'Date' ]: data[feature] = (normalize(data[feature], start=start, end=end)) if train_data is None: # train_data = ['MACD_vfast', 'vol_ratio', 'oc', 'hl', 'ret', 'ADX_short', 'MA_ratio', 'MA2_ratio', # 'RSI', 'skewness', 'kurtosis', 'mean', 'std'] train_data = ['oc', 'vol_ratio', 'hl', 'ret'] # train_data = ['MACD_vfast', 'vol_ratio', 'oc', 'hl', 'gap', 'ret', 'ADX_short', 'BBand_width', 'MA_ratio', # 'RSI', 'skewness', 'kurtosis', 'mean', 'std'] # most original # train_data = ['MACD_vfast', 'vol_ratio', 'oc', 'hl', 'gap', 'ret', # 'ADX_short', 'BBand_width', 'MA_ratio', 'RSI', 'skewness', 'kurtosis', 'mean', 'std'] data = feature_analysis(data, feature=train_data, pca_components=len(train_data), start=start, end=end) return data
def add_features(stock): stock.loc[:, "stock_index"] = stock[["Symbol", "Date"]].apply(lambda x: '_'.join(x), axis=1) stock.loc[:, "Date"] = stock["Date"].apply(lambda x: pd.to_datetime(x, format="%Y-%m-%d")) stock = stock.set_index(['stock_index']) stock = stock.sort_index() print stock.iloc[-3:, :] stock["Adj_Close"] = stock["Adj_Close"].astype(float) stock["Volume"] = stock["Volume"].astype(float) stock["Adj_Close_Volume"] = stock["Adj_Close"] * stock["Volume"] stock["High"] = stock["High"].astype(float) stock["Low"] = stock["Low"].astype(float) close = np.array(stock.Adj_Close) volume = np.array(stock.Volume) high = np.array(stock.High) low = np.array(stock.Low) stock.loc[:, "macd1"], stock.loc[:, "macd2"], stock.loc[:, "macd"] = talib.MACD(close, fastperiod=12, slowperiod=26, signalperiod=9) stock.loc[:, "rsi"] = talib.RSI(close, 12) for i in xrange(10, 60, 10): ema_var_name = "ema_" + str(i) v_ema_var_name = "v_ema_" + str(i) stock.loc[:, ema_var_name] = talib.EMA(close, i) stock.loc[:, v_ema_var_name] = talib.EMA(volume, i) for i in xrange(100, 200, 100): ema_var_name = "ema_" + str(i) v_ema_var_name = "v_ema_" + str(i) stock.loc[:, ema_var_name] = talib.EMA(close, i) stock.loc[:, v_ema_var_name] = talib.EMA(volume, i) stock.loc[:, "slowk"], stock.loc[:, "slowd"] = talib.STOCH(high=high, low=low, close=close, fastk_period=14, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0) stock.loc[:,"rolling_min_7d"] = pd.rolling_min(stock["Adj_Close"], window=7) stock.loc[:, "rolling_max_7d"] = pd.rolling_max(stock["Adj_Close"], window=7) stock.loc[:, "rolling_std_7d"] = pd.rolling_std(stock["Adj_Close"], window=7) stock.loc[:, "rolling_skew_7d"] = pd.rolling_skew(stock["Adj_Close"], window=7) stock.loc[:,"rolling_min_1m"] = pd.rolling_min(stock["Adj_Close"], window=30) stock.loc[:, "rolling_max_1m"] = pd.rolling_max(stock["Adj_Close"], window=30) stock.loc[:, "rolling_std_1m"] = pd.rolling_std(stock["Adj_Close"], window=30) stock.loc[:, "rolling_skew_1m"] = pd.rolling_skew(stock["Adj_Close"], window=30) stock.loc[:,"rolling_min_1y"] = pd.rolling_min(stock["Adj_Close"], window=252) stock.loc[:, "rolling_max_1y"] = pd.rolling_max(stock["Adj_Close"], window=252) stock.loc[:, "rolling_std_1y"] = pd.rolling_std(stock["Adj_Close"], window=252) stock.loc[:, "rolling_skew_1y"] = pd.rolling_skew(stock["Adj_Close"], window=252) stock.loc[:, "cv_rolling_min_7d"] = pd.rolling_min(stock["Adj_Close_Volume"], window=7) stock.loc[:, "cv_rolling_max_7d"] = pd.rolling_max(stock["Adj_Close_Volume"], window=7) stock.loc[:, "cv_rolling_std_7d"] = pd.rolling_std(stock["Adj_Close_Volume"], window=7) stock.loc[:, "cv_rolling_skew_7d"] = pd.rolling_skew(stock["Adj_Close_Volume"], window=7) stock.loc[:, "cv_rolling_min_1m"] = pd.rolling_min(stock["Adj_Close_Volume"], window=30) stock.loc[:, "cv_rolling_max_1m"] = pd.rolling_max(stock["Adj_Close_Volume"], window=30) stock.loc[:, "cv_rolling_std_1m"] = pd.rolling_std(stock["Adj_Close_Volume"], window=30) stock.loc[:, "cv_rolling_skew_1m"] = pd.rolling_skew(stock["Adj_Close_Volume"], window=30) stock.loc[:, "cv_rolling_min_1y"] = pd.rolling_min(stock["Adj_Close_Volume"], window=252) stock.loc[:, "cv_rolling_max_1y"] = pd.rolling_max(stock["Adj_Close_Volume"], window=252) stock.loc[:, "cv_rolling_std_1y"] = pd.rolling_std(stock["Adj_Close_Volume"], window=252) stock.loc[:, "cv_rolling_skew_1y"] = pd.rolling_skew(stock["Adj_Close_Volume"], window=252) #Shift the historical data to today variables = [x for x in stock.columns if x not in ["Date", "Symbol"]] lag_data_1 = buildLaggedFeatures(stock[variables], lag=1, dropna=False) lag_data_7 = buildLaggedFeatures(stock[variables], lag=7, dropna=False) lag_data_15 = buildLaggedFeatures(stock[variables], lag=15, dropna=False) lag_data_30 = buildLaggedFeatures(stock[variables], lag=30, dropna=False) lag_data_60 = buildLaggedFeatures(stock[variables], lag=60, dropna=False) stock_with_lag_data = pd.merge(stock, lag_data_1, left_index=True, right_index=True) stock_with_lag_data = pd.merge(stock_with_lag_data, lag_data_7, left_index=True, right_index=True) stock_with_lag_data = pd.merge(stock_with_lag_data, lag_data_15, left_index=True, right_index=True) stock_with_lag_data = pd.merge(stock_with_lag_data, lag_data_30, left_index=True, right_index=True) stock_with_lag_data = pd.merge(stock_with_lag_data, lag_data_60, left_index=True, right_index=True) stock_with_lag_data.dropna(inplace=True) stock_with_lag_data["increase_from_last_day"] \ = (stock_with_lag_data["Adj_Close"] - stock_with_lag_data["Adj_Close_lag1"]) / stock_with_lag_data[ "Adj_Close_lag1"] * 100 stock_with_lag_data["increase_from_last_week"] \ = (stock_with_lag_data["Adj_Close"] - stock_with_lag_data["Adj_Close_lag7"]) / stock_with_lag_data[ "Adj_Close_lag7"] * 100 return stock_with_lag_data
def add_features(stock): stock.loc[:, "stock_index"] = stock[["Symbol", "Date"]].apply(lambda x: '_'.join(x), axis=1) stock.loc[:, "Date"] = stock["Date"].apply( lambda x: pd.to_datetime(x, format="%Y-%m-%d")) stock = stock.set_index(['stock_index']) stock = stock.sort_index() print stock.iloc[-3:, :] stock["Adj_Close"] = stock["Adj_Close"].astype(float) stock["Volume"] = stock["Volume"].astype(float) stock["Adj_Close_Volume"] = stock["Adj_Close"] * stock["Volume"] stock["High"] = stock["High"].astype(float) stock["Low"] = stock["Low"].astype(float) close = np.array(stock.Adj_Close) volume = np.array(stock.Volume) high = np.array(stock.High) low = np.array(stock.Low) stock.loc[:, "macd1"], stock.loc[:, "macd2"], stock.loc[:, "macd"] = talib.MACD( close, fastperiod=12, slowperiod=26, signalperiod=9) stock.loc[:, "rsi"] = talib.RSI(close, 12) for i in xrange(10, 60, 10): ema_var_name = "ema_" + str(i) v_ema_var_name = "v_ema_" + str(i) stock.loc[:, ema_var_name] = talib.EMA(close, i) stock.loc[:, v_ema_var_name] = talib.EMA(volume, i) for i in xrange(100, 200, 100): ema_var_name = "ema_" + str(i) v_ema_var_name = "v_ema_" + str(i) stock.loc[:, ema_var_name] = talib.EMA(close, i) stock.loc[:, v_ema_var_name] = talib.EMA(volume, i) stock.loc[:, "slowk"], stock.loc[:, "slowd"] = talib.STOCH(high=high, low=low, close=close, fastk_period=14, slowk_period=3, slowk_matype=0, slowd_period=3, slowd_matype=0) stock.loc[:, "rolling_min_7d"] = pd.rolling_min(stock["Adj_Close"], window=7) stock.loc[:, "rolling_max_7d"] = pd.rolling_max(stock["Adj_Close"], window=7) stock.loc[:, "rolling_std_7d"] = pd.rolling_std(stock["Adj_Close"], window=7) stock.loc[:, "rolling_skew_7d"] = pd.rolling_skew(stock["Adj_Close"], window=7) stock.loc[:, "rolling_min_1m"] = pd.rolling_min(stock["Adj_Close"], window=30) stock.loc[:, "rolling_max_1m"] = pd.rolling_max(stock["Adj_Close"], window=30) stock.loc[:, "rolling_std_1m"] = pd.rolling_std(stock["Adj_Close"], window=30) stock.loc[:, "rolling_skew_1m"] = pd.rolling_skew(stock["Adj_Close"], window=30) stock.loc[:, "rolling_min_1y"] = pd.rolling_min(stock["Adj_Close"], window=252) stock.loc[:, "rolling_max_1y"] = pd.rolling_max(stock["Adj_Close"], window=252) stock.loc[:, "rolling_std_1y"] = pd.rolling_std(stock["Adj_Close"], window=252) stock.loc[:, "rolling_skew_1y"] = pd.rolling_skew(stock["Adj_Close"], window=252) stock.loc[:, "cv_rolling_min_7d"] = pd.rolling_min(stock["Adj_Close_Volume"], window=7) stock.loc[:, "cv_rolling_max_7d"] = pd.rolling_max(stock["Adj_Close_Volume"], window=7) stock.loc[:, "cv_rolling_std_7d"] = pd.rolling_std(stock["Adj_Close_Volume"], window=7) stock.loc[:, "cv_rolling_skew_7d"] = pd.rolling_skew( stock["Adj_Close_Volume"], window=7) stock.loc[:, "cv_rolling_min_1m"] = pd.rolling_min(stock["Adj_Close_Volume"], window=30) stock.loc[:, "cv_rolling_max_1m"] = pd.rolling_max(stock["Adj_Close_Volume"], window=30) stock.loc[:, "cv_rolling_std_1m"] = pd.rolling_std(stock["Adj_Close_Volume"], window=30) stock.loc[:, "cv_rolling_skew_1m"] = pd.rolling_skew( stock["Adj_Close_Volume"], window=30) stock.loc[:, "cv_rolling_min_1y"] = pd.rolling_min(stock["Adj_Close_Volume"], window=252) stock.loc[:, "cv_rolling_max_1y"] = pd.rolling_max(stock["Adj_Close_Volume"], window=252) stock.loc[:, "cv_rolling_std_1y"] = pd.rolling_std(stock["Adj_Close_Volume"], window=252) stock.loc[:, "cv_rolling_skew_1y"] = pd.rolling_skew( stock["Adj_Close_Volume"], window=252) #Shift the historical data to today variables = [x for x in stock.columns if x not in ["Date", "Symbol"]] lag_data_1 = buildLaggedFeatures(stock[variables], lag=1, dropna=False) lag_data_7 = buildLaggedFeatures(stock[variables], lag=7, dropna=False) lag_data_15 = buildLaggedFeatures(stock[variables], lag=15, dropna=False) lag_data_30 = buildLaggedFeatures(stock[variables], lag=30, dropna=False) lag_data_60 = buildLaggedFeatures(stock[variables], lag=60, dropna=False) stock_with_lag_data = pd.merge(stock, lag_data_1, left_index=True, right_index=True) stock_with_lag_data = pd.merge(stock_with_lag_data, lag_data_7, left_index=True, right_index=True) stock_with_lag_data = pd.merge(stock_with_lag_data, lag_data_15, left_index=True, right_index=True) stock_with_lag_data = pd.merge(stock_with_lag_data, lag_data_30, left_index=True, right_index=True) stock_with_lag_data = pd.merge(stock_with_lag_data, lag_data_60, left_index=True, right_index=True) stock_with_lag_data.dropna(inplace=True) stock_with_lag_data["increase_from_last_day"] \ = (stock_with_lag_data["Adj_Close"] - stock_with_lag_data["Adj_Close_lag1"]) / stock_with_lag_data[ "Adj_Close_lag1"] * 100 stock_with_lag_data["increase_from_last_week"] \ = (stock_with_lag_data["Adj_Close"] - stock_with_lag_data["Adj_Close_lag7"]) / stock_with_lag_data[ "Adj_Close_lag7"] * 100 return stock_with_lag_data
def sequential_relationships(self, time='index', smooth_method=None, window=1, grid_size=4): """ Generates line plots to visualize sequential data. Parameters ---------- time : string, optional, default 'index' Datetime input column to use for visualization. smooth_method : {'mean', 'var', 'skew', 'kurt', None}, optional, default None Apply a function to the time series to smooth out variations. window : int, optional, default 1 Size of the moving window used to calculate the smoothing function. grid_size : int, optional, default 4 Number of vertical/horizontal plots to display in a single window. """ self.print_message('Generating sequential relationship plots...') if smooth_method not in ['mean', 'var', 'skew', 'kurt', None]: raise Exception('Invalid value for smooth_method.') data = self.data.fillna(0) if time is not 'index': data = data.reset_index() data = data.set_index(time) data.index.name = None n_features = len(data.columns) plot_size = grid_size**2 n_plots = n_features // plot_size if n_features % plot_size == 0 else n_features // plot_size + 1 for i in range(n_plots): fig, ax = plt.subplots(grid_size, grid_size, sharex=True, figsize=(self.fig_size, self.fig_size / 2)) for j in range(plot_size): index = (i * plot_size) + j if index < n_features: if type(data.iloc[0, index]) is not str: if smooth_method == 'mean': data.iloc[:, index] = pd.rolling_mean( data.iloc[:, index], window) elif smooth_method == 'var': data.iloc[:, index] = pd.rolling_var( data.iloc[:, index], window) elif smooth_method == 'skew': data.iloc[:, index] = pd.rolling_skew( data.iloc[:, index], window) elif smooth_method == 'kurt': data.iloc[:, index] = pd.rolling_kurt( data.iloc[:, index], window) data.iloc[:, index].plot(ax=ax[j // grid_size, j % grid_size], kind='line', legend=False, title=data.columns[index]) fig.tight_layout() self.print_message('Plot generation complete.')
def visualize_sequential_relationships(data, time='index', smooth_method=None, window=1, grid_size=4, fig_size=20): """ Generates line plots to visualize sequential data. Assumes the data frame index is time series. Parameters ---------- data : array-like Pandas data frame containing the entire data set. time : string, optional, default 'index' Datetime input column to use for visualization. smooth_method : {'mean', 'var', 'skew', 'kurt'}, optional, default None Apply a function to the time series to smooth out variations. window : int, optional, default 1 Size of the moving window used to calculate the smoothing function. grid_size : int, optional, default 4 Number of vertical/horizontal plots to display in a single window. fig_size : int, optional, default 20 Size of the plot. """ # replace NaN values with 0 to prevent exceptions in the lower level API calls data = data.fillna(0) if time is not 'index': data = data.reset_index() data = data.set_index(time) data.index.name = None n_features = len(data.columns) plot_size = grid_size**2 n_plots = n_features / plot_size if n_features % plot_size == 0 else n_features / plot_size + 1 for i in range(n_plots): fig, ax = plt.subplots(grid_size, grid_size, sharex=True, figsize=(fig_size, fig_size / 2)) for j in range(plot_size): index = (i * plot_size) + j if index < n_features: if type(data.iloc[0, index]) is not str: if smooth_method == 'mean': data.iloc[:, index] = pd.rolling_mean( data.iloc[:, index], window) elif smooth_method == 'var': data.iloc[:, index] = pd.rolling_var( data.iloc[:, index], window) elif smooth_method == 'skew': data.iloc[:, index] = pd.rolling_skew( data.iloc[:, index], window) elif smooth_method == 'kurt': data.iloc[:, index] = pd.rolling_kurt( data.iloc[:, index], window) data.iloc[:, index].plot(ax=ax[j / grid_size, j % grid_size], kind='line', legend=False, title=data.columns[index]) fig.tight_layout()
def rolling_smoother(self, data, stype='rolling_mean', win_size=10, win_type='boxcar', center=False, std=0.1, beta=0.1, power=1, width=1): """ Perform a espanding smooting on the data for a complete help refer to http://pandas.pydata.org/pandas-docs/dev/computation.html :param data: :param stype: :param win_size: :param win_type: :param center: :param std: :param beta: :param power: :param width: :moothing types: ROLLING : rolling_count Number of non-null observations rolling_sum Sum of values rolling_mean Mean of values rolling_median Arithmetic median of values rolling_min Minimum rolling_max Maximum rolling_std Unbiased standard deviation rolling_var Unbiased variance rolling_skew Unbiased skewness (3rd moment) rolling_kurt Unbiased kurtosis (4th moment) rolling_window Moving window function window types: boxcar triang blackman hamming bartlett parzen bohman blackmanharris nuttall barthann kaiser (needs beta) gaussian (needs std) general_gaussian (needs power, width) slepian (needs width) """ if stype == 'count': newy = pd.rolling_count(data, win_size) if stype == 'sum': newy = pd.rolling_sum(data, win_size) if stype == 'mean': newy = pd.rolling_mean(data, win_size) if stype == 'median': newy = pd.rolling_median(data, win_size) if stype == 'min': newy = pd.rolling_min(data, win_size) if stype == 'max': newy = pd.rolling_max(data, win_size) if stype == 'std': newy = pd.rolling_std(data, win_size) if stype == 'var': newy = pd.rolling_var(data, win_size) if stype == 'skew': newy = pd.rolling_skew(data, win_size) if stype == 'kurt': newy = pd.rolling_kurt(data, win_size) if stype == 'window': if win_type == 'kaiser': newy = pd.rolling_window(data, win_size, win_type, center=center, beta=beta) if win_type == 'gaussian': newy = pd.rolling_window(data, win_size, win_type, center=center, std=std) if win_type == 'general_gaussian': newy = pd.rolling_window(data, win_size, win_type, center=center, power=power, width=width) else: newy = pd.rolling_window(data, win_size, win_type, center=center) return newy
def evaluate(self, table): expr = self.expr val = None if expr is not None: val = expr.evaluate(table) return pd.rolling_skew(val, self.window)
def get_rolling_skew(values, window): """Return rolling skew of given values, using specified window size.""" return pd.rolling_skew(values.shift(1), window=window)