def _reg_rolling_APM(reg_: pd.DataFrame, x1: str, y1: str, x2: str, y2: str, has_const: bool = False, use_const: bool = True, window: int = 20) -> pd.Series: # print(reg_.index[0]) if len(reg_) <= window: res = pd.Series(index=reg_.index) else: reg_object_am = PandasRollingOLS(x=reg_[x1], y=reg_[y1], has_const=has_const, use_const=use_const, window=window) reg_object_pm = PandasRollingOLS(x=reg_[x2], y=reg_[y2], has_const=has_const, use_const=use_const, window=window) diff_resids = reg_object_am._resids - reg_object_pm._resids stat = np.nanmean(diff_resids, axis=1) / np.nanstd( diff_resids, axis=1, ddof=1) * np.sqrt(window) res = pd.Series(stat, index=reg_object_am.index[window - 1:]) return res
def velocity(df): vel_xz = PandasRollingOLS(y=df.lws_xz, x=df.td, window=7) df['vel_xz'] = ([np.nan] * 6) + list(abs(vel_xz.beta.values)) vel_xy = PandasRollingOLS(y=df.lws_xy, x=df.td, window=7) df['vel_xy'] = ([np.nan] * 6) + list(abs(vel_xy.beta.values)) return df
def node_inst_vel(df, roll_window_numpts, start): lr_xz = PandasRollingOLS(y=df.xz, x=df.td, window=roll_window_numpts).beta lr_xy = PandasRollingOLS(y=df.xy, x=df.td, window=roll_window_numpts).beta df = df.loc[df.index >= start] vel_xz = lr_xz[lr_xz.index >= start]['feature1'].values vel_xy = lr_xy[lr_xy.index >= start]['feature1'].values df.loc[:, 'vel_xz'] = np.round(vel_xz, 4) df.loc[:, 'vel_xy'] = np.round(vel_xy, 4) return df
def Spread_RollingRegression(df,pair1,pair2): spread=pd.DataFrame() #calculate hedge ratio using Rolling Regression Function. rolling_ols=PandasRollingOLS(df[pair1],df[pair2],window=20) spread['{}_{}'.format(pair1,pair2)]=df[pair1]-rolling_ols.beta['feature1']*df[pair2] spread.dropna(inplace=True) return(spread)
def slope_intercept(df, roll_window_numpts, start): slope = PandasRollingOLS(y=df.magnitude, x=df.td, window=7).beta intercept = PandasRollingOLS(y=df.magnitude, x=df.td, window=7).alpha inter = pd.DataFrame(intercept, columns=['intercept']) df = df.loc[df.index >= start] m = slope[slope.index >= start]['feature1'].values df.loc[:, 'slope'] = np.round(m, 4) b = inter[inter.index >= start]['intercept'].values df.loc[:, 'intercept'] = np.round(b, 4) return (df)
def RegBeta(factor, x, y, num, name): temp = copy.deepcopy(factor[['trade_date', 'ts_code', x, y]]) temp.sort_values(by=['ts_code', 'trade_date'], inplace=True) res = PandasRollingOLS(temp[x], temp[y], num) factor[name] = res.beta index = factor.groupby('ts_code').head(num - 1).index factor.loc[index, name] = np.nan return factor
def acceleration(vel_,td): start_index = len(td) - len(vel_) accel = PandasRollingOLS(y = pd.Series(vel_), x = pd.Series(td[start_index:]), window = 7) accel = accel.beta.values return accel
def get_spread(data): spread = pd.DataFrame() XStock = list(data)[0] YStock = list(data)[1] pairs = YStock+'-'+XStock model = PandasRollingOLS(y=data[YStock], x=data[XStock], window=50) spread[pairs] = data[YStock] - model.beta['feature1']*data[XStock] spread.dropna(inplace=True) return spread
def beta_calculation(self): self.data = self.import_data() model0 = PandasRollingOLS(y=self.data[self.y_ticker], x=self.data[self.x_ticker], window=self.lookback) self.data = self.data[self.lookback - 1:] self.data['betas'] = model0.beta return self.data
def fit(self, x_train, y_train, standardize=False): self.x_train = x_train self.y_train = y_train self.standardize = standardize if self.standardize: self.standardizescaler.fit(x_train) x_train = self.standardizescaler.transform(x_train) self.regressor = PandasRollingOLS(y=y_train, x=x_train, window=self.window_size, has_const=self.has_const, use_const=self.use_const)
def accel(df, roll_window_numpts, start): accel = PandasRollingOLS(y=df.velocity, x=df.td, window=144).beta df = df.loc[df.index >= start + timedelta(hours=75.5)] acceleration = accel[accel.index >= start + timedelta(hours=75.5)]['feature1'].values df.loc[:, 'acceleration'] = np.round(acceleration, 4) return (df)
def node_inst_vel(df, roll_window_numpts, start): lr_xyz = PandasRollingOLS(y=df.magnitude, x=df.td, window=7).beta df = df.loc[df.index >= start + timedelta(hours=3.5)] velocity = lr_xyz[lr_xyz.index >= start + timedelta(hours=3.5)]['feature1'].values df.loc[:, 'velocity'] = np.round(velocity, 4) return (df)
def node_inst_vel(filled_smoothened, roll_window_numpts, start): print (filled_smoothened) try: lr_xz = PandasRollingOLS(y=filled_smoothened.xz, x=filled_smoothened.td, window=roll_window_numpts) except: print('Error') pass return lr_xz
def _reg_beta(data_x, data_y, window): window = window[0] if type(window) != int: return np.zeros(len(data_x)) data = pd.DataFrame({'x': data_x.flatten(), 'y': data_y.flatten()}) res = PandasRollingOLS(data['x'], data['y'], window) data['beta'] = res.beta value = np.array(data['beta'].tolist()) value = np.nan_to_num(value) return value
def get_spread(self, close_df): spread = pd.DataFrame() pair1 = list(close_df)[0] pair2 = list(close_df)[1] pairs = pair1 + '-' + pair2 rolling_ols = PandasRollingOLS(y=close_df[pair1], x=close_df[pair2], window=48) spread[pairs] = close_df[ pair1] - rolling_ols.beta['feature1'] * close_df[pair2] spread.dropna(inplace=True) return spread
def _ts_beta(df1, df2, window): # 历史滚动回归系数 window = window[0] if type(window) != int: return np.zeros(len(df1)) df = pd.DataFrame({'0': df1, '1': df2}) df['time'] = trade_date df['code'] = stock_code res = PandasRollingOLS(df['0'], df['1'], window) df['res'] = res.beta index = df.groupby('code').head(window - 1).index df.loc[index, 'res'] = np.nan value = df['res'] return np.nan_to_num(value.values)
def test_const_false(): # Case where use_const=False and has_const=False # See Issue # 6 X = pd.DataFrame(np.arange(5), columns=['X']) Y = pd.DataFrame(np.arange(0, 10, 2) + 1, columns=['Y']) window = 2 reg_df = pd.concat([Y, X], axis=1) rr = PandasRollingOLS( y=reg_df.iloc[:, 0], # Series x=reg_df.iloc[:, 1:], # DataFrame window=window, has_const=False, use_const=False) assert np.allclose(rr.beta.values, np.array([[3.], [2.6], [2.38461538], [2.28]]))
def _reg_rolling(reg_: pd.DataFrame, x_name: str, y_name: str, has_const: bool = False, use_const: bool = True, window: int = 20) -> pd.Series: if len(reg_) <= window: alpha = pd.Series(index=reg_.index) else: reg_object = PandasRollingOLS(x=reg_[x_name], y=reg_[y_name], has_const=has_const, use_const=use_const, window=window) alpha = reg_object.alpha return alpha
def acceleration(vel, td, thresh): accel = PandasRollingOLS(y=pd.Series(vel),x=pd.Series(td),window=7) accel = accel.beta.values # array = array.reshape(len(array)) # vel = [] # m = 2 # for i in range(len(array) - m): # vel.append(array[i+m]) # # vel = pd.Series(vel) # accel = np.array(vel - vel.shift(1)) # accel = abs(accel) # actual = [1 if i >= (0.0003) else 0 for i in accel] # actual = np.array(actual) return accel
def test_datareader_frame(): import os.path p = os.path.join(os.path.dirname(os.path.abspath(__file__)), "pdr.csv") data = pd.read_csv(p) y = data["usd"] x = data.drop("usd", axis=1) window = 12 # months model = PandasRollingOLS(y=y, x=x, window=window) assert isinstance(model.beta, pd.DataFrame) assert model.beta.shape == (219, 2) tgt = np.array([ [3.28409826e-05, -5.42606172e-02], [2.77474638e-04, -1.88556396e-01], [2.43179753e-03, -2.94865331e-01], [2.79584924e-03, -3.34879522e-01], [2.44759386e-03, -2.41902450e-01], ]) assert np.allclose(model.beta.head().values, tgt)
def _reg_rolling(reg_: pd.DataFrame, x_name: str, y_name: str, win: int, has_cons: bool = False): if len(reg_) <= win: res = pd.Series(index=reg_.index) else: try: X = reg_[x_name] Y = reg_[y_name] reg_object = PandasRollingOLS(x=X, y=Y, has_const=False, use_const=has_cons, window=win) res = pd.Series(reg_object._resids[:, -1], index=reg_.index[win - 1:]) except Exception as e: print(e) res = pd.Series(index=reg_.index) return res
def hr_signal(df, stock1, stock2): window = 20 entry = 2 exit = 0 temp = pd.DataFrame() temp[stock1] = df[stock1] temp[stock2] = df[stock2] model = PandasRollingOLS(y=temp[stock1], x=temp[stock2], window=window) temp['beta'] = model.beta temp = temp.dropna() temp['spread'] = temp[stock1] - temp.beta*temp[stock2] temp['zscore'] = (temp['spread'] - np.mean(temp['spread']))/np.std(temp['spread']) temp['longsignal'] = (temp['zscore'] <= -entry) & (temp['zscore'] < -exit) temp['shortsignal'] = (temp['zscore'] >= entry) & (temp['zscore'] > exit) temp['closelong'] = (temp['zscore'] >= -exit) temp['closeshort'] = (temp['zscore'] <= exit) return temp
def rolling_ndays_ffmodels(ndays=90): register_matplotlib_converters() code_list = [ 'SPY', 'XLB', 'XLE', 'XLF', 'XLI', 'XLK', 'XLP', 'XLU', 'XLV', 'XLY' ] for code in code_list: etf = ETF(code, '2010-01-01', '2019-09-14') etf.price_acquire() etf.data['ETF_Daily_return'] = ( etf.data['Close'] / etf.data['Close'].shift(1) - 1) etf.data['Date'] = etf.data['Date'].apply( lambda x: x.strftime("%Y%m%d")) data = pd.DataFrame.merge(etf.data, ff.data, how='left', on='Date') data = data.dropna(axis=0, how='any') model = PandasRollingOLS(y=data.ETF_Daily_return, x=data[['Mkt_RF', 'SMB', 'HML']], window=ndays) X = (data.loc[ndays:, 'Date']).reset_index(drop=True) X = X.apply(lambda x: datetime.datetime.strptime(x, "%Y%m%d")) plt.plot(X, model.beta) plt.title('Beta to the Fama-French factors of ETF:' + code + '') plt.show() return
def getSignal(index): # 在此处定义因子 df = copy.deepcopy(index) df['pre_close'] = df['close'].shift(1) df['HnLOpen'] = (df['open']-df['pre_close'])/df['pre_close'] df['period'] = df['close'].rolling(window=10).mean() df['MA'] = (df['close']-df['period'])/df['period'] df['DIF'] = df['close'].ewm(min_periods=12,adjust=False,alpha=2/(1+12)).mean()-df['close'].ewm(min_periods=26,adjust=False,alpha=2/(1+26)).mean() df['DEA'] = df['DIF'].ewm(min_periods=9,adjust=False,alpha=2/(1+9)).mean() df['macd'] = 2*(df['DIF']-df['DEA']) #df['macd'] = ta.MACD(df['close'],12,26,9)[2] df['max'] = df.apply(lambda x: max(x['close']-x['pre_close'],0),axis=1) df['abs'] = df.apply(lambda x: abs(x['close']-x['pre_close']),axis=1) df['RSI'] = 100*df['max'].ewm(min_periods=14,adjust=False,alpha=2/(1+14)).mean()/df['abs'].ewm(min_periods=14,adjust=False,alpha=2/(1+14)).mean() #df['RSI'] = ta.RSI(df['close'],14) df['ATR'] = (ta.ATR(df['high'],df['low'],df['close'],10))/df['period'] df['ADX'] = ta.ADX(df['high'],df['low'],df['close'],14) df['buy'] = df.apply(lambda x: max(x['high']-x['pre_close'],0),axis=1)+df['close']-df['low'] df['sell'] = df['buy']+df.apply(lambda x: max(x['pre_close']-x['low'],0),axis=1)+df['high']-df['close'] df['dmkTD'] = df['buy'].rolling(window=9).sum()/df['sell'].rolling(window=9).sum() df['pre_low'] = df['low'].shift(1) df['l_max'] = df.apply(lambda x: max(x['low']-x['pre_low'],0),axis=1) df['l_abs'] = df.apply(lambda x: abs(x['low']-x['pre_low']),axis=1) df['sma'] = df['l_abs'].rolling(window=3).mean()/df['l_max'].rolling(window=3).mean()*1000 df['ema'] = df['sma'].ewm(min_periods=3,adjust=False,alpha=2/(1+3)).mean() df['ll'] = df['low'].rolling(window=13).min() df['hh'] = df['ema'].rolling(window=13).max() df['turn'] = df.apply(lambda x: (x['ema']+x['hh']*2)/2 if x['low']<=x['ll'] else 0,axis=1) df['turn'] = df['turn'].ewm(min_periods=3,adjust=False,alpha=2/(1+3)).mean()/618 df['turn'] = df['turn'].apply(lambda x: 500 if x>500 else x) df['low_n'] = df['low'].rolling(window=9).min() df['high_n'] = df['high'].rolling(window=9).max() df['RSV'] = (df['close']-df['low_n'])/(df['high_n']-df['low_n'])*100 n = len(df) K,D,J = [],[],[] for i in range(n): if df['RSV'].isnull().iloc[i]: K.append(50) D.append(50) J.append(np.nan) else: K.append(K[-1]*2/3+df['RSV'].iloc[i]/3) D.append(D[-1]*2/3+K[-1]/3) J.append(3*K[-1]-2*D[-1]) df['K'] = K df['D'] = D df['KDJ'] = J df['KDJ_1'] = df['KDJ'].shift(1) df['KDJ_2'] = df['KDJ'].shift(2) df['turn'] = df.apply(lambda x: x['turn'] if ((x['KDJ']>x['KDJ_1']) and (x['KDJ_2']>x['KDJ_1'])) else 0, axis=1) df['DAY'] = df.index up,dw,mid = PandasRollingOLS(df['high'],df['DAY'],15),PandasRollingOLS(df['low'],df['DAY'],15),PandasRollingOLS(df['close'],df['DAY'],15) df['upA'],df['upB'],df['dwA'],df['dwB'],df['midA'],df['midB'] = up.alpha,up.beta,dw.alpha,dw.beta,mid.alpha,mid.beta df['err_up'] = (df['high']-df['DAY']*df['upB']-df['upA'])/df['high'] df['err_dw'] = (df['high']-df['DAY']*df['dwB']-df['dwA'])/df['high'] df['err_mid'] = (df['high']-df['DAY']*df['midB']-df['midA'])/df['high'] df['meanHigh'] = (df['close']-df['high'].rolling(window=15).mean())/df['close'] df['meanLow'] = (df['close']-df['low'].rolling(window=15).mean())/df['close'] df['rtn'] = (df['close']-df['pre_close'])/df['pre_close'] df['dwrtn'] = df['rtn'].apply(lambda x: 0 if x>0 else x) df['negILLIQ'] = df['dwrtn']/df['volume'] df['negILLIQ'] = 10**5*df['negILLIQ'].rolling(window=20).sum()/(df['dwrtn']<0).rolling(window=20).sum() df['rng'] = (df['high']-df['low'])/df['pre_close'] df['ILLIQ'] = df['rng'].rolling(window=10).mean() df['cvILLIQ'] = df['rng'].rolling(window=20).std()/df['rng'].rolling(window=20).mean() df['vol_quantile'] = df['volume'].rolling(window=40).quantile(.75, interpolation='lower') df['vol_max'] = df.apply(lambda x: x['volume'] if x['volume']>x['vol_quantile'] else x['vol_quantile'],axis=1) df['vol_increase'] = (df['volume']-df['vol_quantile'])/df['vol_max'] return df[['trade_date','pre_close','HnLOpen','MA','macd','RSI','ATR','ADX','dmkTD','turn',\ 'err_up','err_dw','err_mid','meanHigh','meanLow','negILLIQ','cvILLIQ','ILLIQ','vol_increase']]
timestamp = np.array(node.ts.values) df_td = np.array(node.td.values) rol_xz = fitting.rolling_window(xz, window=17) timestamp = fitting.rolling_window(timestamp, window=17) df_td = fitting.rolling_window(df_td, window=17) lws_xz, lst_val, ts, n_td = fitting.rolling_lws(rol_xz, timestamp, df_td) lst_val = np.array(lst_val) # # lws = low_ess(xz,np.arange(len(xz)), fraction = 0.1) # lws = lws[:,1] # # lws = pd.Series(lws) vel = PandasRollingOLS(y=pd.Series(lst_val),x=pd.Series(lst_val),window=7) vel = abs(vel.beta.values) vel = vel[0:2000] td = df_td[0:2000] # vel = PandasRollingOLS(y=lws, x=node.td,window=7) # vel = abs(vel.beta.values) # # vel = vel[0:20000] # accel = acceleration(vel,td,t) # pred = current_pred(vel,t) # # start_index = len(pred) - len(act) # # actual.append(act) # predicted.append(pred[start_index:])
def roll_slope(data0, data1, per): return PandasRollingOLS(y=data1, x=data0, window=per).beta
def velocity(df): vel_ = PandasRollingOLS(y=df.lws,x=df.td, window=7) vel_ = ([np.nan] * 6) + list(abs(vel_.beta.values)) return vel_
'time': time }) return img_data if __name__ == '__main__': dir_path = 'D:\\Users\\yonat\\Desktop\\HUJI\\HUJI Homework\\Advanced Physics Lab A\\Water Heating - Experiment B\\Camera Pics\\8.6.2020\\main exp pics\\' results_path = 'D:\\Users\\yonat\\Desktop\\HUJI\\HUJI Homework\\Advanced Physics Lab A\\Water Heating - Experiment B\\Results\\8.6.2020\\bubble_pixels_ex1.csv' experiment_path = 'D:\\Users\\yonat\\Desktop\\HUJI\\HUJI Homework\\Advanced Physics Lab A\\Water Heating - Experiment B\\Results\\8.6.2020\\main_run_data.csv' exp_data = read_experiment_data(experiment_path, cooling_measurement=True) # bubbles_data = analyze_all_images_in_dir(dir_path) bubbles_data = pd.read_csv(results_path) window_size = 20 exp_data['temp_change_rate'] = PandasRollingOLS(x=exp_data.time, y=exp_data.temp, window=window_size).beta \ .shift(-int(window_size / 2)) fig, axs = plt.subplots(2, sharex=True) exp_data.plot(x='time', y='temp_change_rate', linestyle='None', marker='.', grid=True, ax=axs[0], label='Temperature rate of change', c='k') # exp_data.plot(x='time', y='temp', linestyle='None', marker='.', grid=True, ax=ax) scale_const = 2e-7 secondary_axis = axs[0].secondary_yaxis( 'right',
data = pd.read_csv(config.input_data_path + '/' + 'EWA EWC' + '.csv', index_col='Date') x = data['EWA'] y = data['EWC'] x_ticker = 'EWA' y_ticker = 'EWC' k = np.polyfit(x, y, 1) xx = np.linspace(min(x), max(x), 1000) yy = np.polyval(k, xx) lookback = 100 modelo2 = PandasRollingOLS(y=y, x=x, window=lookback) data = data[lookback - 1:] betas = modelo2.beta data['beta'] = betas data['numunits'] = data.apply(lambda x: x[x_ticker] - x['beta'] * x[y_ticker], axis=1) model = smf.OLS(y, x) results = model.fit() def cointegration_test(y, x): ols_result = smf.OLS(y, x).fit() return ts.adfuller(ols_result.resid, maxlag=1)
def alpha2_genetic_TFZZ( cls, data: pd.DataFrame, high_name: str = PVN.HIGH.value, close_name: str = PVN.CLOSE.value, amount_name: str = PVN.AMOUNT.value, volume_name: str = PVN.VOLUME.value, adj_factor_name: str = PVN.ADJ_FACTOR.value) -> pd.Series: """ alpha2因子来自: <<20200220-天风证券-基于基因表达式规划的价量因子挖掘>> alpha2计算公式: 𝐴𝑙𝑝ℎ𝑎2: 𝑡𝑠_𝑟𝑒𝑔𝑏𝑒𝑡𝑎(𝑛𝑒𝑔(𝑠_𝑙𝑜𝑔(𝑠𝑢𝑏(𝑑𝑖𝑣(𝑉𝑊𝐴𝑃,𝑃𝑅𝐸𝐶𝐿𝑂𝑆𝐸),1))), 𝑚𝑖𝑛(𝑠𝑢𝑏(𝑑𝑖𝑣(𝐻𝐼𝐺𝐻,𝑃𝑅𝐸𝐶𝐿𝑂𝑆𝐸),1),𝐴𝑀𝑂𝑈𝑁𝑇),20) VWAP = Amount / Volume: 计算VWAP后需要用复权因子进行调整,否则VWAP与PRECLOSE计算出来的收益率存在跳空现象 价格序列需要进行复权因子调整:因为后续需要进行滚动回归,不进行复权因子调整会出现价格不连续 标准化过程可能会出现最大值等于最小值情况,分母为零,出现无限大,将值换为空值 进行大小比较时,若存在空值则为空 :param data: :param high_name: 最高价 :param close_name: 收盘价 :param amount_name: 成交额 :param volume_name: 成交量 :param adj_factor_name: 复权因子 :return: """ # 设置双重索引并且排序 data.set_index([KN.TRADE_DATE.value, KN.STOCK_ID.value], inplace=True) data.sort_index(inplace=True) data['VWAP'] = data[amount_name] / data[volume_name] * data[ adj_factor_name] # 生成Y cal_sub1 = data[[close_name, 'VWAP' ]].groupby(KN.STOCK_ID.value, group_keys=False).apply( lambda x: x['VWAP'] / x[close_name].shift(1) - 1) cal_sub1 = cal_sub1.droplevel(0) data['reg_y'] = -np.sign(cal_sub1) * np.log(abs(cal_sub1)) # 生成X cal_sub2 = data[[high_name, 'VWAP']].groupby(KN.STOCK_ID.value).apply( lambda x: x[high_name] / x[close_name].shift(1) - 1) data['return_sta'] = cal_sub2.groupby(KN.TRADE_DATE.value).apply( lambda x: (x - x.min()) / (x.max() - x.min())) # 处理无限大值 data[np.isinf(data['return_sta'])] = np.nan data['volume_sta'] = data[amount_name].groupby( KN.TRADE_DATE.value).apply(lambda x: (x - x.min()) / (x.max() - x.min())) # 处理无限大值 data[np.isinf(data['volume_sta'])] = np.nan data['reg_x'] = data[['return_sta', 'volume_sta']].min(axis=1, skipna=False) # 滚动回归 result = data[['reg_x', 'reg_y']].groupby( KN.TRADE_DATE.value, group_keys=False).apply(lambda x: pd.Series( index=x.index) if len(x) < 20 else PandasRollingOLS( x=x['reg_x'], y=x['reg_y'], window=20).beta['feature1']) result.name = sys._getframe().f_code.co_name return result