def cal_halpha(self, stock_id): stock_quote = StockAsset.get_stock(stock_id).quote close = stock_quote.tcloseaf close = close.replace(0.0, method='pad') ret = close.pct_change() sz = Asset.load_nav_series('120000016') bret = sz.pct_change() ret = ret.resample('m').sum().iloc[:-1] bret = bret.resample('m').sum().iloc[:-1] common_index = ret.index.intersection(bret.index) ret = ret.loc[common_index] bret = bret.loc[common_index] ser = pd.Series() if len(common_index) < 60: return ser for i in range(60, len(common_index)): tmp_dates = common_index[i - 59:i + 1] y = ret.loc[tmp_dates].values x = bret.loc[tmp_dates].values.reshape(-1, 1) mod = LinearRegression().fit(x, y) ser.loc[tmp_dates[-1]] = mod.intercept_ today = get_today() ser.loc[today] = np.nan ser = ser.resample('d').last().fillna(method='pad') return ser
def load_corr(factor_ids, start_date, end_date): trade_dates = ATradeDate.trade_date(start_date, end_date) asset_navs = {} for factor_id in factor_ids: # asset_navs[factor_id] = CommandMarkowitz.load_nav_series(factor_id, reindex = trade_dates) asset_navs[factor_id] = Asset.load_nav_series(factor_id, reindex=trade_dates) df_asset_navs = pd.DataFrame(asset_navs) # df_asset_incs = df_asset_navs.pct_change().dropna() # corr = df_asset_incs.corr() corr = df_asset_navs.corr() return corr
def fund_update_monetary_pool(pool, adjust_points, optlimit, optcalc): ''' re calc fund for single fund pool ''' lookback = pool.ra_lookback limit = optlimit if optcalc: # # 计算每个调仓点的最新配置 # data = [] with click.progressbar(length=len(adjust_points), label='calc pool %s' % (pool.id)) as bar: for day in adjust_points: bar.update(1) mnf = MonetaryFundFilter() mnf.handle() asset_ids = MonetaryAllocate.all_monetary_fund_globalid() trade_dates = ATradeDate.week_trade_date() dict_nav = {} for asset_id in asset_ids: dict_nav[asset_id] = Asset.load_nav_series(asset_id, reindex=trade_dates) df_nav = pd.DataFrame(dict_nav).fillna(method='pad') df_inc = df_nav.pct_change().fillna(0.0) codes = pool_by_scale_return(pool, day, lookback, limit, mnf, df_inc) print(day, len(codes), codes) if codes is None or len(codes) == 0: continue ra_fund = base_ra_fund.load(globalids=codes) ra_fund = ra_fund.set_index(['ra_code']) ra_pool = pool['id'] for code in ra_fund.index: ra_fund_id = ra_fund.loc[code, 'globalid'] data.append([ra_pool, day, ra_fund_id, code]) fund_df = pd.DataFrame(data, columns = ['ra_pool', 'ra_date', 'ra_fund_id', 'ra_fund_code']) fund_df = fund_df.set_index(['ra_pool', 'ra_date', 'ra_fund_id']) df_new = fund_df columns = [literal_column(c) for c in (df_new.index.names + list(df_new.columns))] s = select(columns) db = database.connection('asset') ra_pool_fund_t = Table('ra_pool_fund', MetaData(bind=db), autoload=True) s = s.where(ra_pool_fund_t.c.ra_pool.in_(df_new.index.get_level_values(0).tolist())) df_old = pd.read_sql(s, db, index_col = df_new.index.names) database.batch(db, ra_pool_fund_t, df_new, df_old)
def fc_update_nav(ctx, optid): lookback_days = 365 blacklist = [24, 32, 40] factor_ids = [ '1200000%02d' % i for i in range(1, 40) if i not in blacklist ] trade_dates = ATradeDate.month_trade_date(begin_date='2018-01-01') date = trade_dates[-1] start_date = (date - datetime.timedelta(lookback_days)).strftime('%Y-%m-%d') end_date = date.strftime('%Y-%m-%d') corr0 = load_ind(factor_ids, start_date, end_date) res = clusterKMeansBase(corr0, maxNumClusters=10, n_init=100) asset_cluster = res[1] asset_cluster = dict( zip(sorted(asset_cluster), sorted(asset_cluster.values()))) factor_name = base_ra_index.load() for k, v in asset_cluster.iteritems(): v = np.array(v).astype('int') print factor_name.loc[v] assets = {} for factor_id in factor_ids: assets[factor_id] = Asset.load_nav_series(factor_id) df_assets = pd.DataFrame(assets) db = database.connection('asset') metadata = MetaData(bind=db) t = Table('ra_composite_asset_nav', metadata, autoload=True) for layer in asset_cluster.keys(): layer_id = 'FC.000001.%d' % (layer + 1) layer_assets = asset_cluster[layer] layer_nav = df_assets.loc[:, layer_assets] layer_ret = layer_nav.pct_change().dropna() layer_ret = layer_ret.mean(1) layer_ret = layer_ret.reset_index() layer_ret.columns = ['ra_date', 'ra_inc'] layer_ret['ra_nav'] = (1 + layer_ret['ra_inc']).cumprod() layer_ret['ra_asset_id'] = layer_id df_new = layer_ret.set_index(['ra_asset_id', 'ra_date']) df_old = asset_ra_composite_asset_nav.load_nav(layer_id) df_new = df_new.reindex(columns=['ra_nav', 'ra_inc']) database.batch(db, t, df_new, df_old, timestamp=False)
def load_mv(factor_ids, start_date, end_date): trade_dates = ATradeDate.trade_date(start_date, end_date) asset_navs = {} for factor_id in factor_ids: asset_navs[factor_id] = Asset.load_nav_series(factor_id, reindex=trade_dates) df_asset_navs = pd.DataFrame(asset_navs) df_asset_incs = df_asset_navs.pct_change().dropna() df_mv = df_asset_incs.mean() / df_asset_incs.std() df_mv = df_mv.to_frame('mv') mv_dist = distance_matrix(df_mv, df_mv) df_dist = pd.DataFrame(data=mv_dist, columns=df_asset_navs.columns, index=df_asset_navs.columns) return df_dist
def cal_btsg(self, stock_id): stock_quote = StockAsset.get_stock(stock_id).quote close = stock_quote.tcloseaf close = close.replace(0.0, method='pad') ret = close.pct_change() sz = Asset.load_nav_series('120000016') bret = sz.pct_change() ret = ret.resample('m').sum().iloc[:-1] bret = bret.resample('m').sum().iloc[:-1] common_index = ret.index.intersection(bret.index) ret = ret.loc[common_index] bret = bret.loc[common_index] ser = pd.Series() if len(common_index) < 60: return ser for i in range(60, len(common_index)): tmp_dates = common_index[:i + 1] y = ret.loc[tmp_dates].values x = bret.loc[tmp_dates].values.reshape(-1, 1) x = sm.add_constant(x) mod = sm.OLS(y, x).fit() beta = mod.params[1] sigma = mod.resid.std() btsg = pow(beta * sigma, 0.5) ser.loc[tmp_dates[-1]] = btsg today = get_today() ser.loc[today] = np.nan ser = ser.resample('d').last().fillna(method='pad') return ser
def nav_update(alloc, enddate): alloc_id = alloc['globalid'] # 加载仓位信息 df_pos = asset_mz_highlow_pos.load(alloc_id) # 加载资产收益率 min_date = df_pos.index.min().date() #max_date = df_pos.index.max() if enddate is not None: max_date = enddate.date() else: max_date = (datetime.now() - timedelta(days=1)) # yesterday # data = {} # for asset_id in df_pos.columns: # data[asset_id] = load_nav_series(asset_id, begin_date=min_date, end_date=max_date) # df_nav = pd.DataFrame(data).fillna(method='pad') # df_inc = df_nav.pct_change().fillna(0.0) data = {} for asset_id in df_pos.columns: data[asset_id] = Asset.load_nav_series(asset_id, begin_date=min_date, end_date=max_date) df_nav = pd.DataFrame(data).fillna(method='pad') df_inc = df_nav.pct_change().fillna(0.0) # 计算复合资产净值 df_nav_portfolio = DFUtil.portfolio_nav(df_inc, df_pos, result_col='portfolio') df_result = df_nav_portfolio[['portfolio']].rename(columns={'portfolio':'mz_nav'}).copy() df_result.index.name = 'mz_date' df_result['mz_inc'] = df_result['mz_nav'].pct_change().fillna(0.0) df_result['mz_highlow_id'] = alloc['globalid'] df_result = df_result.reset_index().set_index(['mz_highlow_id', 'mz_date']) asset_mz_highlow_nav.save(alloc_id, df_result)
def load_ret(factor_ids, start_date, end_date): trade_dates = ATradeDate.trade_date(start_date, end_date) asset_navs = {} for factor_id in factor_ids: asset_navs[factor_id] = Asset.load_nav_series(factor_id, reindex=trade_dates) df_asset_navs = pd.DataFrame(asset_navs) df_asset_incs = df_asset_navs.pct_change().dropna() df_std = df_asset_incs.mean() values = np.zeros((len(df_std), len(df_std))) for i in range(len(df_std)): for j in range(len(df_std)): tmp_value = df_std.iloc[i] / df_std.iloc[j] if tmp_value > 1: tmp_value = 1 / tmp_value values[i, j] = tmp_value df_dist = pd.DataFrame(data=values, columns=df_asset_navs.columns, index=df_asset_navs.columns) return df_dist
def fc_rolling(ctx, optid): engine = database.connection('asset') Session = sessionmaker(bind=engine) session = Session() blacklist = [24, 40] asset_ids = ['1200000%02d' % i for i in range(1, 40) if i not in blacklist] assets = {} for asset_id in asset_ids: # assets[asset_id] = load_nav_series(asset_id) assets[asset_id] = Asset.load_nav_series(asset_id) layer_result = {} layer_result['date'] = [] layer_result['layer'] = [] layer_result['factor'] = [] lookback_days = 365 forecast_days = 90 df_result = pd.DataFrame(columns=['date', 'factor_id', 'layer']) start_date = '2017-01-01' trade_dates = ATradeDate.month_trade_date(begin_date=start_date) for date in trade_dates: print date sdate = (date - datetime.timedelta(lookback_days)).strftime('%Y-%m-%d') edate = date.strftime('%Y-%m-%d') fdate = (date + datetime.timedelta(forecast_days)).strftime('%Y-%m-%d') ''' init_num = 5 fc = FactorCluster(assets, init_num, sdate, edate, fdate) fc.handle() while fc.inner_score < 0.88: init_num += 1 fc = FactorCluster(assets, init_num, sdate, edate, fdate) fc.handle() ''' method = 'beta' scores = {} models = {} for i in range(7, 12): fc = FactorCluster(assets, i, sdate, edate, fdate, method=method, bf_ids=None) fc.handle() print i, 'silhouette_samples_value:', fc.silhouette_samples_value score = fc.silhouette_samples_value scores[score] = i models[score] = fc best_score = np.max(scores.keys()) best_model = models[best_score] fc = best_model print 'best cluster num:', fc.n_clusters factor_name = base_ra_index.load() for k, v in fc.asset_cluster.iteritems(): v = np.array(v).astype('int') print factor_name.loc[v] for vv in v: df_result.loc[len(df_result)] = [date, vv, k] print session.commit() session.close()