def cal_factor_return(self, sf_ids): sfs = [] for sf_id in sf_ids: sfs.append( StockFactor.subclass(sf_id, StockFactor.stock_factors[sf_id])) close = StockAsset.all_stock_nav() ret = close.pct_change() ret = ret[StockAsset.all_stock_info().index] dates = ret.index dates = dates[dates > '2000-01-01'] df_ret = pd.DataFrame(columns=sf_ids) df_sret = pd.DataFrame(columns=StockAsset.all_stock_info().index) for date, next_date in zip(dates[:-1], dates[1:]): print 'cal_factor_return:', date tmp_exposure = {} tmp_ret = ret.loc[next_date].values for sf in sfs: tmp_exposure[sf.factor_id] = sf.exposure.loc[date] tmp_exposure_df = pd.DataFrame(tmp_exposure) tmp_exposure_df = tmp_exposure_df[sf_ids].fillna(0.0) tmp_exposure_df = tmp_exposure_df.loc[ StockAsset.all_stock_info().index] mod = sm.OLS(tmp_ret, tmp_exposure_df.values, missing='drop').fit() df_ret.loc[next_date] = mod.params df_sret.loc[next_date] = tmp_ret - np.dot(tmp_exposure_df.values, mod.params) return df_ret, df_sret
def cal_factor_return(self, sf_ids): period = 21 sfs = [] for sf_id in sf_ids: sfs.append( StockFactor.subclass(sf_id, StockFactor.stock_factors[sf_id])) close = StockAsset.all_stock_nav() ret = close.pct_change(period).iloc[period:] ret = ret[StockAsset.all_stock_info().index] dates = ret.index dates = dates[dates >= '2005-01-01'] df_ret = pd.DataFrame(columns=sf_ids) df_sret = pd.DataFrame(columns=StockAsset.all_stock_info().index) pool = Pool(len(sfs)) sfs = pool.map(multiprocess_load_factor_exposure, sfs) pool.close() pool.join() for date, next_date in zip(dates[:-period], dates[period:]): tmp_exposure = {} tmp_ret = ret.loc[next_date].values for sf in sfs: tmp_exposure[sf.factor_id] = sf.exposure.loc[date] #tmp_exposure[sf.factor_id] = fed[sf.factor_id].loc[date] tmp_exposure_df = pd.DataFrame(tmp_exposure) tmp_exposure_df = tmp_exposure_df[sf_ids].fillna(0.0) tmp_exposure_df = tmp_exposure_df.loc[ StockAsset.all_stock_info().index] mod = sm.OLS(tmp_ret, tmp_exposure_df.values, missing='drop').fit() # mod = sm.WLS(tmp_ret, tmp_exposure_df.values, weights = tmp_amount, missing = 'drop').fit() # print(mod.summary()) df_ret.loc[next_date] = mod.params df_sret.loc[next_date] = tmp_ret - np.dot(tmp_exposure_df.values, mod.params) return df_ret, df_sret
def cal_indexposure(self, stock_id): stock_quote = StockAsset.get_stock(stock_id).quote stock_info = StockAsset.all_stock_info() sf = pd.DataFrame(index=stock_quote.index) sf_ind = stock_info.loc[stock_id].sk_swlevel1code if sf_ind == self.sf_ind: sf_exposure = 1 else: sf_exposure = 0 sf['exposure'] = sf_exposure return sf.exposure
def cal_factor_exposure(self): all_stocks = StockAsset.all_stock_info() factor_exposure = [] for desc_method in self.desc_methods: stock_exposure = {} for stock_id in all_stocks.index: stock_exposure[stock_id] = desc_method(stock_id) stock_exposure_df = pd.DataFrame(stock_exposure) stock_exposure_df = StockFactor.stock_factor_filter( stock_exposure_df) stock_exposure_df = StockFactor.normalized(stock_exposure_df) factor_exposure.append(stock_exposure_df) factor_exposure_df = reduce(lambda x, y: x + y, factor_exposure) / len(factor_exposure) factor_exposure_df = factor_exposure_df[all_stocks.index] self.exposure = factor_exposure_df return factor_exposure_df
def valid_stock_table(): all_stocks = StockAsset.all_stock_info() all_stocks = all_stocks.reset_index() all_stocks = all_stocks.set_index(['sk_secode']) st_stocks = StockAsset.stock_st() all_stocks.sk_listdate = all_stocks.sk_listdate + timedelta(365) engine = database.connection('caihui') Session = sessionmaker(bind=engine) session = Session() sql = session.query(asset_stock.tq_qt_skdailyprice.tradedate, asset_stock.tq_qt_skdailyprice.secode, asset_stock.tq_qt_skdailyprice.tclose, asset_stock.tq_qt_skdailyprice.amount).filter( asset_stock.tq_qt_skdailyprice.secode.in_( all_stocks.index)).statement #过滤停牌股票 quotation_amount = pd.read_sql(sql, session.bind, index_col=['tradedate', 'secode'], parse_dates=['tradedate']) quotation = quotation_amount[['tclose']] quotation = quotation.replace(0.0, np.nan) quotation = quotation.unstack() quotation.columns = quotation.columns.droplevel(0) #60个交易日内需要有25个交易日未停牌 quotation_count = quotation.rolling(60).count() quotation[quotation_count < 25] = np.nan #过滤掉过去一年日均成交额排名后20%的股票 amount = quotation_amount[['amount']] amount = amount.unstack() amount.columns = amount.columns.droplevel(0) year_amount = amount.rolling(252, min_periods=100).mean() def percentile20nan(x): x[x <= np.percentile(x, 20)] = np.nan return x year_amount = year_amount.apply(percentile20nan, axis=1) quotation[year_amount.isnull()] = np.nan session.commit() session.close() #过滤st股票 for i in range(0, len(st_stocks)): secode = st_stocks.index[i] record = st_stocks.iloc[i] selecteddate = record.selecteddate outdate = record.outdate if secode in set(quotation.columns): #print secode, selecteddate, outdate quotation.loc[selecteddate:outdate, secode] = np.nan #过滤上市未满一年股票 for secode in all_stocks.index: if secode in set(quotation.columns): quotation.loc[:all_stocks.loc[secode, 'sk_listdate'], secode] = np.nan quotation = quotation.rename( columns=dict(zip(all_stocks.index, all_stocks.globalid))) asset_stock_factor.update_valid_stock_table(quotation)