Python PandasRollingOLS示例，pyfinance.ols.PandasRollingOLS Python示例

示例#1

0

显示文件

    def _reg_rolling_APM(reg_: pd.DataFrame,
                         x1: str,
                         y1: str,
                         x2: str,
                         y2: str,
                         has_const: bool = False,
                         use_const: bool = True,
                         window: int = 20) -> pd.Series:
        # print(reg_.index[0])
        if len(reg_) <= window:
            res = pd.Series(index=reg_.index)
        else:
            reg_object_am = PandasRollingOLS(x=reg_[x1],
                                             y=reg_[y1],
                                             has_const=has_const,
                                             use_const=use_const,
                                             window=window)

            reg_object_pm = PandasRollingOLS(x=reg_[x2],
                                             y=reg_[y2],
                                             has_const=has_const,
                                             use_const=use_const,
                                             window=window)

            diff_resids = reg_object_am._resids - reg_object_pm._resids
            stat = np.nanmean(diff_resids, axis=1) / np.nanstd(
                diff_resids, axis=1, ddof=1) * np.sqrt(window)
            res = pd.Series(stat, index=reg_object_am.index[window - 1:])
        return res

示例#2

0

显示文件

文件： protocol_counting_alerts.py 项目： vmgmarv/Different-fitting-algorithms-and-ROC-for-deep-seated-landslide-sensors

def velocity(df):

    vel_xz = PandasRollingOLS(y=df.lws_xz, x=df.td, window=7)
    df['vel_xz'] = ([np.nan] * 6) + list(abs(vel_xz.beta.values))

    vel_xy = PandasRollingOLS(y=df.lws_xy, x=df.td, window=7)
    df['vel_xy'] = ([np.nan] * 6) + list(abs(vel_xy.beta.values))

    return df

示例#3

0

显示文件

文件： proc.py 项目： jgeliberte/cbews_iloilo

def node_inst_vel(df, roll_window_numpts, start):
    lr_xz = PandasRollingOLS(y=df.xz, x=df.td, window=roll_window_numpts).beta
    lr_xy = PandasRollingOLS(y=df.xy, x=df.td, window=roll_window_numpts).beta

    df = df.loc[df.index >= start]

    vel_xz = lr_xz[lr_xz.index >= start]['feature1'].values
    vel_xy = lr_xy[lr_xy.index >= start]['feature1'].values
    df.loc[:, 'vel_xz'] = np.round(vel_xz, 4)
    df.loc[:, 'vel_xy'] = np.round(vel_xy, 4)

    return df

示例#4

0

显示文件

文件： PairsTrading.py 项目： Sdoof/sav1

def Spread_RollingRegression(df,pair1,pair2):    
       spread=pd.DataFrame()
       #calculate hedge ratio using Rolling Regression Function.
       rolling_ols=PandasRollingOLS(df[pair1],df[pair2],window=20)
       spread['{}_{}'.format(pair1,pair2)]=df[pair1]-rolling_ols.beta['feature1']*df[pair2]
       spread.dropna(inplace=True)
       return(spread)

示例#5

0

显示文件

def slope_intercept(df, roll_window_numpts, start):

    slope = PandasRollingOLS(y=df.magnitude, x=df.td, window=7).beta
    intercept = PandasRollingOLS(y=df.magnitude, x=df.td, window=7).alpha

    inter = pd.DataFrame(intercept, columns=['intercept'])

    df = df.loc[df.index >= start]

    m = slope[slope.index >= start]['feature1'].values
    df.loc[:, 'slope'] = np.round(m, 4)

    b = inter[inter.index >= start]['intercept'].values
    df.loc[:, 'intercept'] = np.round(b, 4)

    return (df)

示例#6

0

显示文件

文件： Factor_Calculator.py 项目： yuba316/FactorBackTest

def RegBeta(factor, x, y, num, name):
    temp = copy.deepcopy(factor[['trade_date', 'ts_code', x, y]])
    temp.sort_values(by=['ts_code', 'trade_date'], inplace=True)
    res = PandasRollingOLS(temp[x], temp[y], num)
    factor[name] = res.beta
    index = factor.groupby('ts_code').head(num - 1).index
    factor.loc[index, name] = np.nan
    return factor

示例#7

0

显示文件

文件： lowess_protocol.py 项目： vmgmarv/Different-fitting-algorithms-and-ROC-for-deep-seated-landslide-sensors

def acceleration(vel_,td):
    start_index = len(td) - len(vel_)
    
    accel = PandasRollingOLS(y = pd.Series(vel_), 
                             x = pd.Series(td[start_index:]), window = 7)
    
    accel = accel.beta.values
    
    return accel

示例#8

0

显示文件

def get_spread(data):
    spread = pd.DataFrame()
    XStock = list(data)[0]
    YStock = list(data)[1]
    pairs = YStock+'-'+XStock
    model = PandasRollingOLS(y=data[YStock], x=data[XStock], window=50)
    spread[pairs] = data[YStock] - model.beta['feature1']*data[XStock]
    spread.dropna(inplace=True)
    return spread

示例#9

0

显示文件

文件： Bollinger_Band.py 项目： superidylle/modelling_project

    def beta_calculation(self):
        self.data = self.import_data()

        model0 = PandasRollingOLS(y=self.data[self.y_ticker],
                                  x=self.data[self.x_ticker],
                                  window=self.lookback)
        self.data = self.data[self.lookback - 1:]
        self.data['betas'] = model0.beta
        return self.data

示例#10

0

显示文件

    def fit(self, x_train, y_train, standardize=False):
        self.x_train = x_train
        self.y_train = y_train
        self.standardize = standardize
        if self.standardize:
            self.standardizescaler.fit(x_train)
            x_train = self.standardizescaler.transform(x_train)

        self.regressor = PandasRollingOLS(y=y_train, x=x_train, window=self.window_size, has_const=self.has_const,
                                          use_const=self.use_const)

示例#11

0

显示文件

def accel(df, roll_window_numpts, start):
    accel = PandasRollingOLS(y=df.velocity, x=df.td, window=144).beta

    df = df.loc[df.index >= start + timedelta(hours=75.5)]

    acceleration = accel[accel.index >= start +
                         timedelta(hours=75.5)]['feature1'].values
    df.loc[:, 'acceleration'] = np.round(acceleration, 4)

    return (df)

示例#12

0

显示文件

def node_inst_vel(df, roll_window_numpts, start):
    lr_xyz = PandasRollingOLS(y=df.magnitude, x=df.td, window=7).beta

    df = df.loc[df.index >= start + timedelta(hours=3.5)]

    velocity = lr_xyz[lr_xyz.index >= start +
                      timedelta(hours=3.5)]['feature1'].values
    df.loc[:, 'velocity'] = np.round(velocity, 4)

    return (df)

示例#13

0

显示文件

文件： roc_lowess.py 项目： vmgmarv/Different-fitting-algorithms-and-ROC-for-deep-seated-landslide-sensors

def node_inst_vel(filled_smoothened, roll_window_numpts, start):
    print (filled_smoothened)
    try:          
        lr_xz = PandasRollingOLS(y=filled_smoothened.xz, x=filled_smoothened.td,
                    window=roll_window_numpts)
    
    except:
        print('Error')
        pass
    return lr_xz

示例#14

0

显示文件

def _reg_beta(data_x, data_y, window):
    window = window[0]
    if type(window) != int:
        return np.zeros(len(data_x))
    data = pd.DataFrame({'x': data_x.flatten(), 'y': data_y.flatten()})
    res = PandasRollingOLS(data['x'], data['y'], window)
    data['beta'] = res.beta
    value = np.array(data['beta'].tolist())
    value = np.nan_to_num(value)
    return value

示例#15

0

显示文件

    def get_spread(self, close_df):
        spread = pd.DataFrame()

        pair1 = list(close_df)[0]
        pair2 = list(close_df)[1]
        pairs = pair1 + '-' + pair2
        rolling_ols = PandasRollingOLS(y=close_df[pair1],
                                       x=close_df[pair2],
                                       window=48)
        spread[pairs] = close_df[
            pair1] - rolling_ols.beta['feature1'] * close_df[pair2]
        spread.dropna(inplace=True)
        return spread

示例#16

0

显示文件

文件： Factor_gplearn_2.py 项目： yuba316/FactorBackTest

def _ts_beta(df1, df2, window):  # 历史滚动回归系数
    window = window[0]
    if type(window) != int:
        return np.zeros(len(df1))
    df = pd.DataFrame({'0': df1, '1': df2})
    df['time'] = trade_date
    df['code'] = stock_code
    res = PandasRollingOLS(df['0'], df['1'], window)
    df['res'] = res.beta
    index = df.groupby('code').head(window - 1).index
    df.loc[index, 'res'] = np.nan
    value = df['res']

    return np.nan_to_num(value.values)

示例#17

0

显示文件

def test_const_false():
    # Case where use_const=False and has_const=False
    # See Issue # 6
    X = pd.DataFrame(np.arange(5), columns=['X'])
    Y = pd.DataFrame(np.arange(0, 10, 2) + 1, columns=['Y'])
    window = 2
    reg_df = pd.concat([Y, X], axis=1)
    rr = PandasRollingOLS(
        y=reg_df.iloc[:, 0],  # Series
        x=reg_df.iloc[:, 1:],  # DataFrame
        window=window,
        has_const=False,
        use_const=False)
    assert np.allclose(rr.beta.values,
                       np.array([[3.], [2.6], [2.38461538], [2.28]]))

示例#18

0

显示文件

文件： TechnicalMomentumFactor.py 项目： xiaotfeng/SecuritySelect

    def _reg_rolling(reg_: pd.DataFrame,
                     x_name: str,
                     y_name: str,
                     has_const: bool = False,
                     use_const: bool = True,
                     window: int = 20) -> pd.Series:

        if len(reg_) <= window:
            alpha = pd.Series(index=reg_.index)
        else:
            reg_object = PandasRollingOLS(x=reg_[x_name],
                                          y=reg_[y_name],
                                          has_const=has_const,
                                          use_const=use_const,
                                          window=window)
            alpha = reg_object.alpha
        return alpha

示例#19

0

显示文件

文件： roc_lowess.py 项目： vmgmarv/Different-fitting-algorithms-and-ROC-for-deep-seated-landslide-sensors

def acceleration(vel, td, thresh):
    
    accel = PandasRollingOLS(y=pd.Series(vel),x=pd.Series(td),window=7)
    accel = accel.beta.values
    
#    array = array.reshape(len(array))
#    vel = []
#    m = 2
#    for i in range(len(array) - m):
#        vel.append(array[i+m])
#        
#    vel = pd.Series(vel)
#    accel = np.array(vel - vel.shift(1))
#    accel = abs(accel)    
#    actual = [1 if i >= (0.0003) else 0 for i in accel]
#    actual = np.array(actual)
    
    return accel

示例#20

0

显示文件

文件： test_ols.py 项目： vilasfe/pyfinance

def test_datareader_frame():
    import os.path

    p = os.path.join(os.path.dirname(os.path.abspath(__file__)), "pdr.csv")
    data = pd.read_csv(p)
    y = data["usd"]
    x = data.drop("usd", axis=1)
    window = 12  # months
    model = PandasRollingOLS(y=y, x=x, window=window)
    assert isinstance(model.beta, pd.DataFrame)
    assert model.beta.shape == (219, 2)
    tgt = np.array([
        [3.28409826e-05, -5.42606172e-02],
        [2.77474638e-04, -1.88556396e-01],
        [2.43179753e-03, -2.94865331e-01],
        [2.79584924e-03, -3.34879522e-01],
        [2.44759386e-03, -2.41902450e-01],
    ])
    assert np.allclose(model.beta.head().values, tgt)

示例#21

0

显示文件

 def _reg_rolling(reg_: pd.DataFrame,
                  x_name: str,
                  y_name: str,
                  win: int,
                  has_cons: bool = False):
     if len(reg_) <= win:
         res = pd.Series(index=reg_.index)
     else:
         try:
             X = reg_[x_name]
             Y = reg_[y_name]
             reg_object = PandasRollingOLS(x=X,
                                           y=Y,
                                           has_const=False,
                                           use_const=has_cons,
                                           window=win)
             res = pd.Series(reg_object._resids[:, -1],
                             index=reg_.index[win - 1:])
         except Exception as e:
             print(e)
             res = pd.Series(index=reg_.index)
     return res

示例#22

0

显示文件

文件： backtester.py 项目： jaNGOB/pt_switzerland

def hr_signal(df, stock1, stock2):
    window = 20
    entry = 2
    exit = 0
    
    temp = pd.DataFrame()
    temp[stock1] = df[stock1]
    temp[stock2] = df[stock2]
    
    model = PandasRollingOLS(y=temp[stock1], x=temp[stock2], window=window)

    temp['beta'] = model.beta
    temp = temp.dropna()
    temp['spread'] = temp[stock1] - temp.beta*temp[stock2]
    temp['zscore'] = (temp['spread'] - np.mean(temp['spread']))/np.std(temp['spread'])
    
    temp['longsignal'] = (temp['zscore'] <= -entry) & (temp['zscore'] < -exit)
    temp['shortsignal'] = (temp['zscore'] >= entry) & (temp['zscore'] > exit)  
    temp['closelong'] = (temp['zscore'] >= -exit)
    temp['closeshort'] = (temp['zscore'] <= exit)
    
    return temp

示例#23

0

显示文件

文件： a2task1_803.py 项目： dcrainsailing/Asset-Pricing-Code

def rolling_ndays_ffmodels(ndays=90):
    register_matplotlib_converters()
    code_list = [
        'SPY', 'XLB', 'XLE', 'XLF', 'XLI', 'XLK', 'XLP', 'XLU', 'XLV', 'XLY'
    ]

    for code in code_list:
        etf = ETF(code, '2010-01-01', '2019-09-14')
        etf.price_acquire()
        etf.data['ETF_Daily_return'] = (
            etf.data['Close'] / etf.data['Close'].shift(1) - 1)
        etf.data['Date'] = etf.data['Date'].apply(
            lambda x: x.strftime("%Y%m%d"))
        data = pd.DataFrame.merge(etf.data, ff.data, how='left', on='Date')
        data = data.dropna(axis=0, how='any')
        model = PandasRollingOLS(y=data.ETF_Daily_return,
                                 x=data[['Mkt_RF', 'SMB', 'HML']],
                                 window=ndays)
        X = (data.loc[ndays:, 'Date']).reset_index(drop=True)
        X = X.apply(lambda x: datetime.datetime.strptime(x, "%Y%m%d"))
        plt.plot(X, model.beta)
        plt.title('Beta to the Fama-French factors of ETF:' + code + '')
        plt.show()
    return

示例#24

0

显示文件

文件： ICFactorBacktest.py 项目： yuba316/yuba316

def getSignal(index): # 在此处定义因子
    df = copy.deepcopy(index)
    
    df['pre_close'] = df['close'].shift(1)
    df['HnLOpen'] = (df['open']-df['pre_close'])/df['pre_close']
    
    df['period'] = df['close'].rolling(window=10).mean()
    df['MA'] = (df['close']-df['period'])/df['period']
    
    df['DIF'] = df['close'].ewm(min_periods=12,adjust=False,alpha=2/(1+12)).mean()-df['close'].ewm(min_periods=26,adjust=False,alpha=2/(1+26)).mean()
    df['DEA'] = df['DIF'].ewm(min_periods=9,adjust=False,alpha=2/(1+9)).mean()
    df['macd'] = 2*(df['DIF']-df['DEA'])
    #df['macd'] = ta.MACD(df['close'],12,26,9)[2]
    
    df['max'] = df.apply(lambda x: max(x['close']-x['pre_close'],0),axis=1)
    df['abs'] = df.apply(lambda x: abs(x['close']-x['pre_close']),axis=1)
    df['RSI'] = 100*df['max'].ewm(min_periods=14,adjust=False,alpha=2/(1+14)).mean()/df['abs'].ewm(min_periods=14,adjust=False,alpha=2/(1+14)).mean()
    #df['RSI'] = ta.RSI(df['close'],14)
    
    df['ATR'] = (ta.ATR(df['high'],df['low'],df['close'],10))/df['period']
    df['ADX'] = ta.ADX(df['high'],df['low'],df['close'],14)
    
    df['buy'] = df.apply(lambda x: max(x['high']-x['pre_close'],0),axis=1)+df['close']-df['low']
    df['sell'] = df['buy']+df.apply(lambda x: max(x['pre_close']-x['low'],0),axis=1)+df['high']-df['close']
    df['dmkTD'] = df['buy'].rolling(window=9).sum()/df['sell'].rolling(window=9).sum()
    
    df['pre_low'] = df['low'].shift(1)
    df['l_max'] = df.apply(lambda x: max(x['low']-x['pre_low'],0),axis=1)
    df['l_abs'] = df.apply(lambda x: abs(x['low']-x['pre_low']),axis=1)
    df['sma'] = df['l_abs'].rolling(window=3).mean()/df['l_max'].rolling(window=3).mean()*1000
    df['ema'] = df['sma'].ewm(min_periods=3,adjust=False,alpha=2/(1+3)).mean()
    df['ll'] = df['low'].rolling(window=13).min()
    df['hh'] = df['ema'].rolling(window=13).max()
    df['turn'] = df.apply(lambda x: (x['ema']+x['hh']*2)/2 if x['low']<=x['ll'] else 0,axis=1)
    df['turn'] = df['turn'].ewm(min_periods=3,adjust=False,alpha=2/(1+3)).mean()/618
    df['turn'] = df['turn'].apply(lambda x: 500 if x>500 else x)
    
    df['low_n'] = df['low'].rolling(window=9).min()
    df['high_n'] = df['high'].rolling(window=9).max()
    df['RSV'] = (df['close']-df['low_n'])/(df['high_n']-df['low_n'])*100
    n = len(df)
    K,D,J = [],[],[]
    for i in range(n):
        if df['RSV'].isnull().iloc[i]:
            K.append(50)
            D.append(50)
            J.append(np.nan)
        else:
            K.append(K[-1]*2/3+df['RSV'].iloc[i]/3)
            D.append(D[-1]*2/3+K[-1]/3)
            J.append(3*K[-1]-2*D[-1])
    df['K'] = K
    df['D'] = D
    df['KDJ'] = J
    df['KDJ_1'] = df['KDJ'].shift(1)
    df['KDJ_2'] = df['KDJ'].shift(2)
    df['turn'] = df.apply(lambda x: x['turn'] if ((x['KDJ']>x['KDJ_1']) and (x['KDJ_2']>x['KDJ_1'])) else 0, axis=1)
    
    df['DAY'] = df.index
    up,dw,mid = PandasRollingOLS(df['high'],df['DAY'],15),PandasRollingOLS(df['low'],df['DAY'],15),PandasRollingOLS(df['close'],df['DAY'],15)
    df['upA'],df['upB'],df['dwA'],df['dwB'],df['midA'],df['midB'] = up.alpha,up.beta,dw.alpha,dw.beta,mid.alpha,mid.beta
    df['err_up'] = (df['high']-df['DAY']*df['upB']-df['upA'])/df['high']
    df['err_dw'] = (df['high']-df['DAY']*df['dwB']-df['dwA'])/df['high']
    df['err_mid'] = (df['high']-df['DAY']*df['midB']-df['midA'])/df['high']
    
    df['meanHigh'] = (df['close']-df['high'].rolling(window=15).mean())/df['close']
    df['meanLow'] = (df['close']-df['low'].rolling(window=15).mean())/df['close']
    
    df['rtn'] = (df['close']-df['pre_close'])/df['pre_close']
    df['dwrtn'] = df['rtn'].apply(lambda x: 0 if x>0 else x)
    df['negILLIQ'] = df['dwrtn']/df['volume']
    df['negILLIQ'] = 10**5*df['negILLIQ'].rolling(window=20).sum()/(df['dwrtn']<0).rolling(window=20).sum()
    
    df['rng'] = (df['high']-df['low'])/df['pre_close']
    df['ILLIQ'] = df['rng'].rolling(window=10).mean()
    df['cvILLIQ'] = df['rng'].rolling(window=20).std()/df['rng'].rolling(window=20).mean()
    
    df['vol_quantile'] = df['volume'].rolling(window=40).quantile(.75, interpolation='lower')
    df['vol_max'] = df.apply(lambda x: x['volume'] if x['volume']>x['vol_quantile'] else x['vol_quantile'],axis=1)
    df['vol_increase'] = (df['volume']-df['vol_quantile'])/df['vol_max']
    
    return df[['trade_date','pre_close','HnLOpen','MA','macd','RSI','ATR','ADX','dmkTD','turn',\
               'err_up','err_dw','err_mid','meanHigh','meanLow','negILLIQ','cvILLIQ','ILLIQ','vol_increase']]

示例#25

0

显示文件

文件： roc_lowess.py 项目： vmgmarv/Different-fitting-algorithms-and-ROC-for-deep-seated-landslide-sensors

                timestamp = np.array(node.ts.values)
                df_td = np.array(node.td.values)
                
                rol_xz = fitting.rolling_window(xz, window=17)
                timestamp = fitting.rolling_window(timestamp, window=17)
                df_td = fitting.rolling_window(df_td, window=17)
                
                lws_xz, lst_val, ts, n_td = fitting.rolling_lws(rol_xz, timestamp, df_td)
                lst_val = np.array(lst_val)
#                
#                lws = low_ess(xz,np.arange(len(xz)), fraction = 0.1)
#                lws = lws[:,1]
#                
#                lws = pd.Series(lws)
                
                vel = PandasRollingOLS(y=pd.Series(lst_val),x=pd.Series(lst_val),window=7)
                vel = abs(vel.beta.values)
                vel = vel[0:2000]
                td = df_td[0:2000]
#                vel = PandasRollingOLS(y=lws, x=node.td,window=7)
#                vel = abs(vel.beta.values)
#                
#                vel = vel[0:20000]
#                
                accel = acceleration(vel,td,t)
#                pred = current_pred(vel,t)
#                
#                start_index = len(pred) - len(act)
#            
#                actual.append(act)
#                predicted.append(pred[start_index:])

示例#26

0

显示文件

文件： index.py 项目： tonylibing/blueblood

def roll_slope(data0, data1, per):
    return PandasRollingOLS(y=data1, x=data0, window=per).beta

示例#27

0

显示文件

文件： lowess_protocol.py 项目： vmgmarv/Different-fitting-algorithms-and-ROC-for-deep-seated-landslide-sensors

def velocity(df):
    
    vel_ = PandasRollingOLS(y=df.lws,x=df.td, window=7)
    vel_ = ([np.nan] * 6) + list(abs(vel_.beta.values))

    return vel_

示例#28

0

显示文件

文件： bubbles_picture_analysis.py 项目： Alonabeep/yalab

        'time': time
    })
    return img_data


if __name__ == '__main__':
    dir_path = 'D:\\Users\\yonat\\Desktop\\HUJI\\HUJI Homework\\Advanced Physics Lab A\\Water Heating - Experiment B\\Camera Pics\\8.6.2020\\main exp pics\\'
    results_path = 'D:\\Users\\yonat\\Desktop\\HUJI\\HUJI Homework\\Advanced Physics Lab A\\Water Heating - Experiment B\\Results\\8.6.2020\\bubble_pixels_ex1.csv'
    experiment_path = 'D:\\Users\\yonat\\Desktop\\HUJI\\HUJI Homework\\Advanced Physics Lab A\\Water Heating - Experiment B\\Results\\8.6.2020\\main_run_data.csv'

    exp_data = read_experiment_data(experiment_path, cooling_measurement=True)
    # bubbles_data = analyze_all_images_in_dir(dir_path)
    bubbles_data = pd.read_csv(results_path)

    window_size = 20
    exp_data['temp_change_rate'] = PandasRollingOLS(x=exp_data.time, y=exp_data.temp, window=window_size).beta \
        .shift(-int(window_size / 2))

    fig, axs = plt.subplots(2, sharex=True)
    exp_data.plot(x='time',
                  y='temp_change_rate',
                  linestyle='None',
                  marker='.',
                  grid=True,
                  ax=axs[0],
                  label='Temperature rate of change',
                  c='k')
    # exp_data.plot(x='time', y='temp', linestyle='None', marker='.', grid=True, ax=ax)

    scale_const = 2e-7
    secondary_axis = axs[0].secondary_yaxis(
        'right',

示例#29

0

显示文件

data = pd.read_csv(config.input_data_path + '/' + 'EWA EWC' + '.csv',
                   index_col='Date')

x = data['EWA']
y = data['EWC']

x_ticker = 'EWA'
y_ticker = 'EWC'

k = np.polyfit(x, y, 1)
xx = np.linspace(min(x), max(x), 1000)
yy = np.polyval(k, xx)

lookback = 100
modelo2 = PandasRollingOLS(y=y, x=x, window=lookback)
data = data[lookback - 1:]
betas = modelo2.beta

data['beta'] = betas

data['numunits'] = data.apply(lambda x: x[x_ticker] - x['beta'] * x[y_ticker],
                              axis=1)

model = smf.OLS(y, x)
results = model.fit()


def cointegration_test(y, x):
    ols_result = smf.OLS(y, x).fit()
    return ts.adfuller(ols_result.resid, maxlag=1)

示例#30

0

显示文件

    def alpha2_genetic_TFZZ(
            cls,
            data: pd.DataFrame,
            high_name: str = PVN.HIGH.value,
            close_name: str = PVN.CLOSE.value,
            amount_name: str = PVN.AMOUNT.value,
            volume_name: str = PVN.VOLUME.value,
            adj_factor_name: str = PVN.ADJ_FACTOR.value) -> pd.Series:
        """
        alpha2因子来自: <<20200220-天风证券-基于基因表达式规划的价量因子挖掘>>
        alpha2计算公式： 𝐴𝑙𝑝ℎ𝑎2: 𝑡𝑠_𝑟𝑒𝑔𝑏𝑒𝑡𝑎(𝑛𝑒𝑔(𝑠_𝑙𝑜𝑔(𝑠𝑢𝑏(𝑑𝑖𝑣(𝑉𝑊𝐴𝑃,𝑃𝑅𝐸𝐶𝐿𝑂𝑆𝐸),1))),
                                𝑚𝑖𝑛(𝑠𝑢𝑏(𝑑𝑖𝑣(𝐻𝐼𝐺𝐻,𝑃𝑅𝐸𝐶𝐿𝑂𝑆𝐸),1),𝐴𝑀𝑂𝑈𝑁𝑇),20)

        VWAP = Amount / Volume: 计算VWAP后需要用复权因子进行调整，否则VWAP与PRECLOSE计算出来的收益率存在跳空现象

        价格序列需要进行复权因子调整：因为后续需要进行滚动回归，不进行复权因子调整会出现价格不连续
        标准化过程可能会出现最大值等于最小值情况，分母为零，出现无限大，将值换为空值
        进行大小比较时，若存在空值则为空
        :param data:
        :param high_name: 最高价
        :param close_name: 收盘价
        :param amount_name: 成交额
        :param volume_name: 成交量
        :param adj_factor_name: 复权因子
        :return:
        """
        # 设置双重索引并且排序
        data.set_index([KN.TRADE_DATE.value, KN.STOCK_ID.value], inplace=True)
        data.sort_index(inplace=True)

        data['VWAP'] = data[amount_name] / data[volume_name] * data[
            adj_factor_name]

        # 生成Y
        cal_sub1 = data[[close_name, 'VWAP'
                         ]].groupby(KN.STOCK_ID.value, group_keys=False).apply(
                             lambda x: x['VWAP'] / x[close_name].shift(1) - 1)
        cal_sub1 = cal_sub1.droplevel(0)

        data['reg_y'] = -np.sign(cal_sub1) * np.log(abs(cal_sub1))

        # 生成X
        cal_sub2 = data[[high_name, 'VWAP']].groupby(KN.STOCK_ID.value).apply(
            lambda x: x[high_name] / x[close_name].shift(1) - 1)

        data['return_sta'] = cal_sub2.groupby(KN.TRADE_DATE.value).apply(
            lambda x: (x - x.min()) / (x.max() - x.min()))

        # 处理无限大值
        data[np.isinf(data['return_sta'])] = np.nan

        data['volume_sta'] = data[amount_name].groupby(
            KN.TRADE_DATE.value).apply(lambda x: (x - x.min()) /
                                       (x.max() - x.min()))

        # 处理无限大值
        data[np.isinf(data['volume_sta'])] = np.nan

        data['reg_x'] = data[['return_sta', 'volume_sta']].min(axis=1,
                                                               skipna=False)

        # 滚动回归
        result = data[['reg_x', 'reg_y']].groupby(
            KN.TRADE_DATE.value, group_keys=False).apply(lambda x: pd.Series(
                index=x.index) if len(x) < 20 else PandasRollingOLS(
                    x=x['reg_x'], y=x['reg_y'], window=20).beta['feature1'])

        result.name = sys._getframe().f_code.co_name
        return result