def initialize_engine(self, start, end, benchmark, price_field): bm_dict = {50: '000016.SH', 300: '000300.SH', 500: '000905.SH'} self.benchmark_code = bm_dict[benchmark] self.influx = influxdbData() self.DB = 'DailyMarket_Gus' self.start = start self.end = end # 读取数据 measure = 'market' self.market = self.influx.getDataMultiprocess( self.DB, measure, self.start, self.end, [ 'code', 'status', 'preclose', 'high', 'low', 'close', price_field ]) self.market.index.names = ['date'] measure = 'exright' self.exright = self.influx.getDataMultiprocess(self.DB, measure, self.start, self.end) self.exright.index.names = ['date'] self.exright = self.exright.fillna(0) measure = 'swap' self.swap = self.influx.getDataMultiprocess(self.DB, measure, self.start, self.end) self.swap.index.names = ['date'] self.swap['str_date'] = self.swap.index.strftime('%Y%m%d') self.swap = self.swap.loc[self.swap['swap_date'] == self.swap['str_date'], :] # benchmark 的报价 Series self.benchmark_quote = self.market.loc[ self.market['code'] == self.benchmark_code, 'close'].copy() print('All Data Needed is ready...')
def JOB_factors(dates, factor_dict, db, measure): DFQr = dfq_risk() influx = influxdbData() table = 'factorexposure' field = 'fvjson' uni = '000000' save_res = [] for date in dates: query = "SELECT {0} FROM dfrisk.{1} WHERE tradingdate ='{2}' and universe='{3}'" \ .format(field, table, date, uni) DFQr.cur.execute(query) try: day_df = pd.read_json(DFQr.cur.fetchone()[0], orient='split', convert_axes=False) day_df['date'] = pd.to_datetime(date) day_df.index.names = ['code'] day_df = day_df.reset_index().set_index('date') day_df['code'] = \ np.where(day_df['code'].str[0] == '6', day_df['code'] + '.SH', day_df['code'] + '.SZ') day_df = day_df.rename(columns=factor_dict) day_df = day_df.where(pd.notnull(day_df), None) # save print('date: %s' % date) r = influx.saveData(day_df, db, measure) if r == 'No error occurred...': pass else: save_res.append('%s Error: %s' % ('RiskExposure', r)) except TypeError: save_res.append('%s Error from DB! Date: %s' % ('RiskExposure', date)) return save_res
def JOB_TTM_growth(codes, df, factor, db, measure): influx = influxdbData() save_res = [] for code in codes: code_df = df.loc[df['code'] == code, :].copy() code_df['{0}_growthQ'.format(factor)] = \ code_df.apply(lambda row: FactorBase.cal_growth( row['{0}_last1Q'.format(factor)], row['{0}'.format(factor)]), axis=1) code_df['{0}_growthY'.format(factor)] = \ code_df.apply(lambda row: FactorBase.cal_growth( row['{0}_last4Q'.format(factor)], row['{0}'.format(factor)]), axis=1) cols = ['{0}_growthQ'.format(factor), '{0}_growthY'.format(factor)] for i in range(1, 8): code_df['{0}_growthQ_last{1}Q'.format(factor, i)] = \ code_df.apply(lambda row: FactorBase.cal_growth( row['{0}_last{1}Q'.format(factor, i + 1)], row['{0}_last{1}Q'.format(factor, i)]), axis=1) cols.append('{0}_growthQ_last{1}Q'.format(factor, i)) code_df = code_df.loc[:, ['code', 'report_period'] + cols] code_df = code_df.replace(np.inf, np.nan) code_df = code_df.loc[np.any(pd.notnull(code_df[cols]), axis=1), :] if code_df.empty: continue code_df = code_df.where(pd.notnull(code_df), None) print('code: %s' % code) r = influx.saveData(code_df, db, measure) if r == 'No error occurred...': pass else: save_res.append('%s_growth Error: %s' % (factor, r)) return save_res
def JOB_factor(codes, df, factor_field, n_Qs, db, measure): influx = influxdbData() save_res = [] for code in codes: code_df = df.loc[df['code'] == code, :].copy() cols = [] for i in range(n_Qs): code_df['{0}_deltaQ_last{1}Q'.format(factor_field, i)] = \ code_df['{0}_last{1}Q'.format(factor_field, i)] - code_df['{0}_last{1}Q'.format(factor_field, i+1)] cols.append('{0}_deltaQ_last{1}Q'.format(factor_field, i)) for i in range(n_Qs - 3): code_df['{0}_deltaY_last{1}Q'.format(factor_field, i)] = \ code_df['{0}_last{1}Q'.format(factor_field, i)] - code_df['{0}_last{1}Q'.format(factor_field, i+4)] cols.append('{0}_deltaY_last{1}Q'.format(factor_field, i)) code_df = code_df.replace(np.inf, np.nan) code_df = code_df.replace(-np.inf, np.nan) code_df = code_df.loc[np.any(pd.notnull(code_df[cols]), axis=1), ['code', 'report_period'] + cols] if code_df.empty: continue code_df.rename(columns={ '{0}_deltaQ_last0Q'.format(factor_field): '{0}_deltaQ'.format(factor_field), '{0}_deltaY_last0Q'.format(factor_field): '{0}_deltaY'.format(factor_field) }, inplace=True) code_df = code_df.where(pd.notnull(code_df), None) print('code: %s' % code) r = influx.saveData(code_df, db, measure) if r == 'No error occurred...': pass else: save_res.append('%s Error: %s' % (measure, r)) return save_res
def JOB_factor(codes, df, EBIT_field, rev_field, result_field, n_Qs, db, measure): influx = influxdbData() save_res = [] for code in codes: cols = [] code_df = df.loc[df['code'] == code, :].copy() for i in range(n_Qs): cols.append('{0}_last{1}Q'.format(result_field, i)) # 由于业绩预告和业绩快报的存在,oper_rev 会领先于 EBIT conditions = [code_df['EBIT_last{0}Q_rp'.format(i)].values == code_df['rev_last{0}Q_rp'.format(i)].values, code_df['EBIT_last{0}Q_rp'.format(i)].values == code_df['rev_last{0}Q_rp'.format(i + 1)].values, code_df['EBIT_last{0}Q_rp'.format(i)].values == code_df['rev_last{0}Q_rp'.format(i + 2)].values] choices = [code_df['{0}_last{1}Q'.format(EBIT_field, i)].values / code_df['{0}_last{1}Q'.format(rev_field, i)].values, code_df['{0}_last{1}Q'.format(EBIT_field, i)].values / code_df['{0}_last{1}Q'.format(rev_field, i + 1)].values, code_df['{0}_last{1}Q'.format(EBIT_field, i)].values / code_df['{0}_last{1}Q'.format(rev_field, i + 2)].values] code_df['{0}_last{1}Q'.format(result_field, i)] = np.select(conditions, choices, default=np.nan) code_df = code_df.replace(np.inf, np.nan) code_df = code_df.replace(-np.inf, np.nan) code_df = code_df.loc[np.any(pd.notnull(code_df[cols]), axis=1), ['code', 'report_period'] + cols] code_df.rename(columns={'{0}_last0Q'.format(result_field): result_field}, inplace=True) code_df = code_df.where(pd.notnull(code_df), None) print('code: %s' % code) r = influx.saveData(code_df, db, measure) if r == 'No error occurred...': pass else: save_res.append('%s Error: %s' % (result_field, r)) return save_res
def JOB_factors(dates, df, db, measure): influx = influxdbData() save_res = [] for date in dates: day_df = df.loc[df['date'] == date, :].copy() if day_df.shape[0] < 50: continue indus = day_df['improved_lv1'].unique() dfs = [] for indu in indus: day_indu_df = day_df.loc[day_df['improved_lv1'] == indu, :].copy() # 保险行业股票数过少 if day_indu_df.shape[0] < 3: day_indu_df['net_profit_trend'] = 5 else: day_indu_df['growth_group'] = \ pd.qcut(day_indu_df['net_profit_Q_growthY'], 3, labels=[1, 2, 3]).astype('int') day_indu_df['acc_group'] = \ pd.qcut(day_indu_df['net_profit_Q_acc'], 3, labels=[1, 2, 3]).astype('int') day_indu_df['net_profit_trend'] = 3 * ( day_indu_df['growth_group'] - 1) + day_indu_df['acc_group'] dfs.append(day_indu_df) day_df = pd.concat(dfs) day_df.set_index('date', inplace=True) day_df = day_df.loc[:, ['code', 'report_period', 'net_profit_trend']] print('date: %s' % date) r = influx.saveData(day_df, db, measure) if r == 'No error occurred...': pass else: save_res.append('%s Error: %s' % (measure, r)) return save_res
def JOB_factors(dates, db, measure): DFQr = dfq_risk() influx = influxdbData() table = 'specificriskhuber' field = 'sriskjson' uni = '000000' save_res = [] for date in dates: query = "SELECT {0} FROM dfrisk.{1} WHERE tradingdate ='{2}' and universe='{3}'" \ .format(field, table, date, uni) DFQr.cur.execute(query) try: day_df = pd.read_json(DFQr.cur.fetchone()[0], orient='split', convert_axes=False, typ='series') day_df.name = 'specific_risk' day_df = pd.DataFrame(day_df) day_df['date'] = pd.to_datetime(date) day_df.index.names = ['code'] day_df = day_df.reset_index().set_index('date') day_df['code'] = \ np.where(day_df['code'].str[0] == '6', day_df['code'] + '.SH', day_df['code'] + '.SZ') day_df = day_df.dropna(subset=['specific_risk']) # save print('date: %s' % date) r = influx.saveData(day_df, db, measure) if r == 'No error occurred...': pass else: save_res.append('%s Error: %s' % ('SpecificRisk', r)) except TypeError: save_res.append('%s Error from DB! Date: %s' % ('SpecificRisk', date)) return save_res
def JOB_cur_GPOA_Q(codes, df, db, measure): influx = influxdbData() save_res = [] for code in codes: code_df = df.loc[df['code'] == code, :].copy() code_df['tot_assets'] = code_df['tot_assets'].fillna( method='ffill') code_df[['tot_assets', 'tot_assets_last1Q']] = \ code_df[['tot_assets', 'tot_assets_last1Q']].fillna(method='ffill', axis=1) code_df = code_df.drop_duplicates(['date'], 'last') code_df['GPOA_Q'] = \ code_df['tot_profit_Q'] / (code_df['tot_assets'] + code_df['tot_assets_last1Q']) * 2 code_df.set_index('date', inplace=True) code_df = code_df.loc[:, ['code', 'GPOA_Q', 'report_period']] code_df = code_df.replace(np.inf, np.nan) code_df = code_df.replace(-np.inf, np.nan) code_df = code_df.dropna(subset=['GPOA_Q']) print('code: %s' % code) if code_df.empty: continue r = influx.saveData(code_df, db, measure) if r == 'No error occurred...': pass else: save_res.append('cur GPOA_Q Error: %s' % r) return save_res
def JOB_cur_ROE_TTM(codes, df, db, measure): influx = influxdbData() save_res = [] for code in codes: code_df = df.loc[df['code'] == code, :].copy() code_df['later_equity'] = code_df['net_equity'].fillna(method='ffill') conditions = [code_df['profit_last4Q_rp'].values == code_df['equity_last4Q_rp'].values, code_df['profit_last4Q_rp'].values == code_df['equity_last3Q_rp'].values] choices = [code_df['net_equity_last4Q'].values, code_df['net_equity_last3Q'].values] code_df['former_equity'] = np.select(conditions, choices, default=np.nan) code_df[['later_equity', 'former_equity']] = \ code_df[['later_equity', 'former_equity']].fillna(method='ffill', axis=1) code_df[['later_equity', 'former_equity']] = \ code_df[['later_equity', 'former_equity']].fillna(method='bfill', axis=1) code_df['ROE'] = code_df['net_profit_TTM'] / (code_df['later_equity'] + code_df['former_equity']) * 2 code_df.set_index('date', inplace=True) code_df = code_df.loc[:, ['code', 'ROE', 'report_period']] code_df = code_df.replace(np.inf, np.nan) code_df = code_df.replace(-np.inf, np.nan) code_df = code_df.dropna() print('code: %s' % code) if code_df.empty: continue r = influx.saveData(code_df, db, measure) if r == 'No error occurred...': pass else: save_res.append('cur ROE TTM Error: %s' % r) return save_res
def JOB_factors(codes, df, months_list, start, db, measure): influx = influxdbData() save_res = [] for code in codes: code_df = df.loc[df['code'] == code, :].copy() code_df = code_df.sort_index() res_dict = {} for idx, row in code_df.iterrows(): r = Rtn_WgtRtn_ExpWgtRtn.cal_Rtn_series( code_df, idx, months_list) if not r: continue else: res_dict[idx] = r if not res_dict: continue else: res_df = pd.DataFrame(res_dict).T res_df['code'] = code res_df = res_df.loc[str(start):, :] res_df = res_df.replace(np.inf, np.nan) res_df = res_df.where(pd.notnull(res_df), None) print('code: %s' % code) r = influx.saveData(res_df, db, measure) if r == 'No error occurred...': pass else: save_res.append('%s Error: %s' % (measure, r)) return save_res
def JOB_factors(df, field, codes, calendar, start): columns = df.columns influx = influxdbData() save_res = [] for code in codes: code_df = df.loc[df['code'] == code, :].copy() insert_dates = calendar - set(code_df.index) content = [[np.nan] * len(columns)] * len(insert_dates) insert_df = pd.DataFrame(content, columns=columns, index=list(insert_dates)) code_df = code_df.append(insert_df, ignore_index=False).sort_index() code_df = code_df.fillna(method='ffill') code_df = code_df.dropna(subset=['code']) code_df['report_period'] = code_df.apply(lambda row: row.dropna().index[-1], axis=1) code_df[field] = \ code_df.apply(lambda row: CashFlowUpdate.get_former_data(row, 0), axis=1) code_df[field + '_last1Q'] = \ code_df.apply(lambda row: CashFlowUpdate.get_former_data(row, 1), axis=1) code_df[field + '_last2Q'] = \ code_df.apply(lambda row: CashFlowUpdate.get_former_data(row, 2), axis=1) code_df[field + '_last3Q'] = \ code_df.apply(lambda row: CashFlowUpdate.get_former_data(row, 3), axis=1) code_df[field + '_lastY'] = \ code_df.apply(lambda row: CashFlowUpdate.get_former_data(row, 4), axis=1) code_df[field + '_last4Q'] = \ code_df.apply(lambda row: CashFlowUpdate.get_former_data(row, 4), axis=1) code_df[field + '_last5Q'] = \ code_df.apply(lambda row: CashFlowUpdate.get_former_data(row, 5), axis=1) code_df[field + '_last6Q'] = \ code_df.apply(lambda row: CashFlowUpdate.get_former_data(row, 6), axis=1) code_df[field + '_last7Q'] = \ code_df.apply(lambda row: CashFlowUpdate.get_former_data(row, 7), axis=1) code_df[field + '_last8Q'] = \ code_df.apply(lambda row: CashFlowUpdate.get_former_data(row, 8), axis=1) code_df[field + '_last9Q'] = \ code_df.apply(lambda row: CashFlowUpdate.get_former_data(row, 9), axis=1) code_df[field + '_last10Q'] = \ code_df.apply(lambda row: CashFlowUpdate.get_former_data(row, 10), axis=1) code_df[field + '_last11Q'] = \ code_df.apply(lambda row: CashFlowUpdate.get_former_data(row, 11), axis=1) code_df[field + '_last12Q'] = \ code_df.apply(lambda row: CashFlowUpdate.get_former_data(row, 12), axis=1) code_df = \ code_df.loc[str(start):, ['code', 'report_period', field, field + '_last1Q', field + '_last2Q', field + '_last3Q', field + '_lastY', field + '_last4Q', field + '_last5Q', field + '_last6Q', field + '_last7Q', field + '_last8Q', field + '_last9Q', field + '_last10Q', field + '_last11Q', field + '_last12Q']] code_df['report_period'] = code_df['report_period'].apply(lambda x: x.strftime('%Y%m%d')) code_df = code_df.where(pd.notnull(code_df), None) print('code: %s' % code) r = influx.saveData(code_df, 'FinancialReport_Gus', field) if r == 'No error occurred...': pass else: save_res.append('CashFlow Field: %s Error: %s' % (field, r)) return save_res
def JOB_factors(df, field, codes, calendar, start, save_db): columns = df.columns influx = influxdbData() save_res = [] for code in codes: code_df = df.loc[df['code'] == code, :].copy() insert_dates = calendar - set(code_df.index) content = [[np.nan] * len(columns)] * len(insert_dates) insert_df = pd.DataFrame(content, columns=columns, index=list(insert_dates)) code_df = code_df.append(insert_df, ignore_index=False).sort_index() code_df = code_df.fillna(method='ffill') code_df = code_df.dropna(subset=['code']) code_df = code_df.loc[str(start):, ] # 所有report_period 为 columns, 去掉第一列(code) rps = np.flipud(code_df.columns[1:]).astype('datetime64[ns]') rp_keys = np.flipud(code_df.columns[1:]) # 选择最新的report_period code_df['report_period'] = code_df.apply(lambda row: row.dropna().index[-1], axis=1) choices = [] for rp in rp_keys: choices.append(code_df[rp].values) # 计算 当期 和 去年同期 code_df['process_rp'] = code_df['report_period'].apply(lambda x: FactorBase.get_former_report_period(x, 0)) conditions = [] for rp in rps: conditions.append(code_df['process_rp'].values == rp) code_df[field] = np.select(conditions, choices, default=np.nan) code_df['process_rp'] = code_df['report_period'].apply(lambda x: FactorBase.get_former_report_period(x, 4)) conditions = [] for rp in rps: conditions.append(code_df['process_rp'].values == rp) code_df[field + '_lastY'] = np.select(conditions, choices, default=np.nan) # 计算过去每一季 res_flds = [] for i in range(1, 13): res_field = field + '_last{0}Q'.format(str(i)) res_flds.append(res_field) code_df['process_rp'] = code_df['report_period'].apply( lambda x: FactorBase.get_former_report_period(x, i)) conditions = [] for rp in rps: conditions.append(code_df['process_rp'].values == rp) code_df[res_field] = np.select(conditions, choices, default=np.nan) # 处理储存数据 code_df = code_df.loc[:, ['code', 'report_period', field, field + '_lastY'] + res_flds] code_df['report_period'] = code_df['report_period'].apply(lambda x: x.strftime('%Y%m%d')) code_df = code_df.where(pd.notnull(code_df), None) print('code: %s' % code) r = influx.saveData(code_df, save_db, field) if r == 'No error occurred...': pass else: save_res.append('WindIndicator Field: %s Error: %s' % (field, r)) return save_res
def JOB_factors(df, field, codes): influx = influxdbData() save_res = [] for code in codes: code_df = df.loc[df['code'] == code, :].copy() drop_dupl_df = code_df.drop_duplicates().copy() # 计算Q drop_dupl_df[field + '_Q'] = drop_dupl_df.apply(lambda row: QnTTMUpdate.JOB_calQ( row[field], row[field + '_last1Q'], row['report_period'], 0), axis=1) drop_dupl_df[field + '_Q_lastY'] = drop_dupl_df.apply(lambda row: QnTTMUpdate.JOB_calQ( row[field + '_lastY'], row[field + '_last5Q'], row['report_period'], 4), axis=1) Q_cols = [field + '_Q', field + '_Q_lastY'] for n in range(1, 12): curr_col = field + '_last{0}Q'.format(str(n)) prev_col = field + '_last{0}Q'.format(str(n+1)) res_col = field + '_Q_last{0}Q'.format(str(n)) Q_cols.append(res_col) drop_dupl_df[res_col] = drop_dupl_df.apply(lambda row: QnTTMUpdate.JOB_calQ( row[curr_col], row[prev_col], row['report_period'], n), axis=1) # 计算TTM drop_dupl_df[field + '_TTM'] = drop_dupl_df[field + '_Q'] + drop_dupl_df[field + '_Q_last1Q'] + \ drop_dupl_df[field + '_Q_last2Q'] + drop_dupl_df[field + '_Q_last3Q'] TTM_cols = [field + '_TTM'] for n in range(1, 9): curr_col = field + '_Q_last{0}Q'.format(str(n)) prev1_col = field + '_Q_last{0}Q'.format(str(n+1)) prev2_col = field + '_Q_last{0}Q'.format(str(n+2)) prev3_col = field + '_Q_last{0}Q'.format(str(n+3)) res_col = field + '_TTM_last{0}Q'.format(str(n)) TTM_cols.append(res_col) drop_dupl_df[res_col] = drop_dupl_df[curr_col] + drop_dupl_df[prev1_col] + \ drop_dupl_df[prev2_col] + drop_dupl_df[prev3_col] Q_TTM_cols = Q_cols + TTM_cols drop_dupl_df = drop_dupl_df[Q_TTM_cols] code_df = pd.merge(code_df.loc[:, ['code', 'report_period']], drop_dupl_df, how='left', left_index=True, right_index=True) code_df = code_df.fillna(method='ffill') code_df = code_df.where(pd.notnull(code_df), None) Q_df = code_df.loc[:, ['code', 'report_period'] + Q_cols] TTM_df = code_df.loc[:, ['code', 'report_period'] + TTM_cols] print('code: %s field: %s' % (code, field + '_Q')) r = influx.saveData(Q_df, 'FinancialReport_Gus', field + '_Q') if r == 'No error occurred...': pass else: save_res.append('QnTTM Field: %s Error: %s' % (field + '_Q', r)) print('code: %s field: %s' % (code, field + '_TTM')) r = influx.saveData(TTM_df, 'FinancialReport_Gus', field + '_TTM') if r == 'No error occurred...': pass else: save_res.append('QnTTM Field: %s Error: %s' % (field + '_TTM', r)) return save_res
def JOB_hist_ROE_TTM(codes, df, db, measure): influx = influxdbData() save_res = [] for code in codes: dfs = [] for i in range(1, 9): code_df = df.loc[df['code'] == code, ['date', 'code', 'net_profit_TTM_last{0}Q'.format(i), 'profit_last{0}Q_rp'.format(i), 'profit_last{0}Q_rp'.format(i+4), 'net_equity_last{0}Q'.format(i-1), 'net_equity_last{0}Q'.format(i), 'net_equity_last{0}Q'.format(i+3), 'net_equity_last{0}Q'.format(i+4), 'equity_last{0}Q_rp'.format(i-1), 'equity_last{0}Q_rp'.format(i), 'equity_last{0}Q_rp'.format(i+3), 'equity_last{0}Q_rp'.format(i+4)]].copy() conditions = [code_df['profit_last{0}Q_rp'.format(i)].values == code_df['equity_last{0}Q_rp'.format(i)].values, code_df['profit_last{0}Q_rp'.format(i)].values == code_df['equity_last{0}Q_rp'.format(i-1)].values] choices = [code_df['net_equity_last{0}Q'.format(i)].values, code_df['net_equity_last{0}Q'.format(i-1)].values] code_df['later_equity'] = np.select(conditions, choices, default=np.nan) conditions = [code_df['profit_last{0}Q_rp'.format(i+4)].values == code_df['equity_last{0}Q_rp'.format(i+4)].values, code_df['profit_last{0}Q_rp'.format(i+4)].values == code_df['equity_last{0}Q_rp'.format(i+3)].values] choices = [code_df['net_equity_last{0}Q'.format(i+4)].values, code_df['net_equity_last{0}Q'.format(i+3)].values] code_df['former_equity'] = np.select(conditions, choices, default=np.nan) code_df[['later_equity', 'former_equity']] = \ code_df[['later_equity', 'former_equity']].fillna(method='ffill', axis=1) code_df[['later_equity', 'former_equity']] = \ code_df[['later_equity', 'former_equity']].fillna(method='bfill', axis=1) code_df['ROE_last{0}Q'.format(i)] = code_df['net_profit_TTM_last{0}Q'.format(i)] / \ (code_df['later_equity'] + code_df['former_equity']) * 2 code_df.set_index(['date', 'code'], inplace=True) code_df = code_df.replace(np.inf, np.nan) code_df = code_df.replace(-np.inf, np.nan) code_df = code_df.loc[:, ['ROE_last{0}Q'.format(i)]].dropna() dfs.append(code_df) if not dfs: continue code_df = pd.concat(dfs, axis=1) code_df = code_df.reset_index().set_index('date') code_df = code_df.replace(np.inf, np.nan) code_df = code_df.replace(-np.inf, np.nan) code_df = code_df.where(pd.notnull(code_df), None) print('code: %s' % code) r = influx.saveData(code_df, db, measure) if r == 'No error occurred...': pass else: save_res.append('ROE_Q Error: %s' % r) return save_res
def JOB_factors(codes, df, db, measure): influx = influxdbData() save_res = [] for code in codes: code_df = df.loc[df['code'] == code, :].copy() conditions = [ code_df['profit_last4Q_rp'].values == code_df['equity_last4Q_rp'].values, code_df['profit_last5Q_rp'].values == code_df['equity_last4Q_rp'].values ] choices = [ code_df['net_profit_TTM_last4Q'].values, code_df['net_profit_TTM_last5Q'].values ] code_df['former_profit'] = np.select(conditions, choices, default=np.nan) conditions = [ code_df['profit_last0Q_rp'].values == code_df['report_period'].values, code_df['profit_last1Q_rp'].values == code_df['report_period'].values ] choices = [ code_df['net_profit_TTM'].values, code_df['net_profit_TTM_last1Q'].values ] code_df['later_profit'] = np.select(conditions, choices, default=np.nan) code_df['delta_equity'] = code_df['later_equity'] - code_df[ 'former_equity'] code_df['delta_profit'] = code_df['later_profit'] - code_df[ 'former_profit'] code_df['marginal_ROE'] = code_df['delta_profit'] / code_df[ 'delta_equity'] code_df.set_index('date', inplace=True) code_df = code_df.loc[:, ['code', 'report_period', 'marginal_ROE']] code_df = code_df.replace(np.inf, np.nan) code_df = code_df.replace(-np.inf, np.nan) code_df = code_df.dropna() print('code: %s' % code) if code_df.empty: continue r = influx.saveData(code_df, db, measure) if r == 'No error occurred...': pass else: save_res.append('marginal ROE Error: %s' % r) return save_res
def __init__(self, strategy_name): self.influx = influxdbData() self.rdf = rdf_data() self.strategy_name = strategy_name self.mkt_db = global_constant.MARKET_DB self.mkt_measure = 'market' self.idx_wgt_measure = 'index_weight' self.st_measure = 'isST' self.industry_measure = 'industry' self.info_measure = 'stk_info' self.factor_db = global_constant.FACTOR_DB self.risk_exp_measure = 'RiskExposure' self.risk_cov_measure = 'RiskCov' self.spec_risk_measure = 'SpecificRisk'
def JOB_cur_ROE_TTM(codes, df, db, measure): influx = influxdbData() save_res = [] for code in codes: code_df = df.loc[df['code'] == code, :].copy() conditions = [ code_df['FY0_rp'].values == code_df['equity_last0Q_rp'].values, code_df['FY0_rp'].values == code_df['equity_last1Q_rp'].values, code_df['FY0_rp'].values == code_df['equity_last2Q_rp'].values, code_df['FY0_rp'].values == code_df['equity_last3Q_rp'].values, code_df['FY0_rp'].values == code_df['equity_last4Q_rp'].values, code_df['FY0_rp'].values == code_df['equity_last5Q_rp'].values, code_df['FY0_rp'].values == code_df['equity_last6Q_rp'].values ] choices = [ code_df['net_equity'].values, code_df['net_equity_last1Q'].values, code_df['net_equity_last2Q'].values, code_df['net_equity_last3Q'].values, code_df['net_equity_last4Q'].values, code_df['net_equity_last5Q'].values, code_df['net_equity_last6Q'].values ] code_df['ROE_equity'] = np.select(conditions, choices, default=np.nan) # 用最近的非nan值填充ROE_equity code_df[['net_equity_last6Q', 'net_equity_last5Q', 'net_equity_last4Q', 'net_equity_last3Q', 'net_equity_last2Q', 'net_equity_last1Q', 'net_equity', 'ROE_equity']] = \ code_df[['net_equity_last6Q', 'net_equity_last5Q', 'net_equity_last4Q', 'net_equity_last3Q', 'net_equity_last2Q', 'net_equity_last1Q', 'net_equity', 'ROE_equity']].fillna( method='ffill', axis=1) # 计算 ROE_FY1 code_df[ 'ROE_FY1'] = code_df['net_profit_FY1'] / code_df['ROE_equity'] code_df.set_index('date', inplace=True) code_df = code_df.loc[:, ['code', 'ROE_FY1', 'report_period']] code_df = code_df.replace(np.inf, np.nan) code_df = code_df.replace(-np.inf, np.nan) code_df = code_df.dropna() print('code: %s' % code) if code_df.empty: continue r = influx.saveData(code_df, db, measure) if r == 'No error occurred...': pass else: save_res.append('ROE_FY1 Error: %s' % r) return save_res
def JOB_factors(mkt_data, codes, start, period, db, measure): influx = influxdbData() save_res = [] for code in codes: code_mkt = mkt_data.loc[mkt_data['code'] == code, ['code', 'amihud']].copy() code_mkt['amihud_{0}'.format(period)] = \ code_mkt['amihud'].rolling(period, min_periods=round(period * 0.6)).mean() code_mkt = code_mkt.dropna(subset=['amihud_{0}'.format(period)]) code_mkt = code_mkt.loc[str(start):, ['code', 'amihud_{0}'.format(period)]] if code_mkt.empty: continue print('code: %s' % code) r = influx.saveData(code_mkt, db, measure) if r == 'No error occurred...': pass else: save_res.append('%s Error: %s' % (measure, r)) return save_res
def JOB_factors(codes, df, factor, db, measure): influx = influxdbData() save_res = [] cols = [] periods = [] for i in range(1, 9): cols.append('{0}_last{1}Q'.format(factor, i - 1)) periods.append(-i + 8) quadratic_featurizer = PolynomialFeatures(degree=2) regression_model = LinearRegression() for code in codes: code_df = df.loc[df['code'] == code, :].copy() code_df = code_df.loc[np.all(pd.notnull(code_df[cols]), axis=1), ['code', 'report_period'] + cols] if code_df.empty: continue x = np.array(periods)[:, np.newaxis] x = quadratic_featurizer.fit_transform(x) drop_dup_df = code_df.drop_duplicates(cols, 'first') drop_dup_values = drop_dup_df[cols].values acc = [] for i in range(drop_dup_values.shape[0]): y = drop_dup_values[i, :] y = y[:, np.newaxis] regression_model.fit(x, y) acc.append(regression_model.coef_[0, 2]) acc_dict = dict(zip(drop_dup_df.index, acc)) code_df['oper_rev_Q_acc'] = code_df.index code_df['oper_rev_Q_acc'] = code_df['oper_rev_Q_acc'].map(acc_dict) code_df = code_df.replace(np.inf, np.nan) code_df = code_df.replace(-np.inf, np.nan) code_df['oper_rev_Q_acc'] = \ code_df.groupby(['code', 'report_period'])['oper_rev_Q_acc'].fillna(method='ffill') code_df = code_df.loc[:, ['code', 'report_period', 'oper_rev_Q_acc' ]].dropna() print('code: %s' % code) r = influx.saveData(code_df, db, measure) if r == 'No error occurred...': pass else: save_res.append('%s Error: %s' % (measure, r)) return save_res
def JOB_factors(turnover, codes, start, period, db, measure): influx = influxdbData() save_res = [] for code in codes: code_to = turnover.loc[turnover['code'] == code, :].copy() code_to['turnover_{0}'.format(period)] = \ code_to['float_turnover'].rolling(period, min_periods=round(period * 0.6)).mean() code_to['ln_turnover_{0}'.format(period)] = np.log( code_to['turnover_{0}'.format(period)].values) code_to = code_to.loc[ str(start):, ['code', 'ln_turnover_{0}'.format(period)]].dropna() if code_to.empty: continue print('code: %s' % code) r = influx.saveData(code_to, db, measure) if r == 'No error occurred...': pass else: save_res.append('%s Error: %s' % (measure, r)) return save_res
def JOB_factors(df, codes, save_db, save_msr): influx = influxdbData() save_res = [] for code in codes: code_df = df.loc[df['code'] == code, :].copy() code_df = code_df.sort_values(['date', 'report_period']) code_df['CA_ratio'] = code_df['CA_ratio'].fillna(method='ffill') code_df['CA_ratio_last1Q'] = code_df['CA_ratio_last1Q'].fillna(method='ffill') code_df = code_df.drop_duplicates(['date'], 'last') code_df['NPL_leverage'] = code_df['NPL'] / code_df['CA_ratio'] code_df['NPL_leverage_last1Q'] = code_df['NPL_last1Q'] / code_df['CA_ratio_last1Q'] code_df = code_df.loc[:, ['date', 'code', 'report_period', 'NPL_leverage', 'NPL_leverage_last1Q']] code_df.set_index('date', inplace=True) code_df = code_df.where(pd.notnull(code_df), None) print('code: %s' % code) r = influx.saveData(code_df, save_db, save_msr) if r == 'No error occurred...': pass else: save_res.append('NPL_leverage Error: %s' % r) return save_res
def JOB_anlst_cover(ranges, df, db, measure): influx = influxdbData() save_res = [] for range_start, range_end in ranges: range_df = df.loc[range_start:range_end, :].copy() range_df = range_df.drop_duplicates(subset=['code', 'organ_name']) if range_df.empty: continue cov = range_df.groupby('code')['organ_name'].count() cov.name = 'anlst_cov' cov = pd.DataFrame(cov) cov['date'] = dtparser.parse(range_end) cov['sqrt_anlst_cov'] = np.sqrt(cov['anlst_cov']) cov = cov.reset_index().set_index('date') print('Time range: %s - %s' % (range_start, range_end)) r = influx.saveData(cov, db, measure) if r == 'No error occurred...': pass else: save_res.append('FACTOR: %s \n range: %s - %s \n Error: %s' % ('anlst_cover', range_start, range_end, r)) return save_res
def JOB_factors(df, codes, calendar, save_db): influx = influxdbData() save_res = [] for code in codes: code_df = df.loc[df['code'] == code, :].copy() insert_dates = calendar - set(code_df.index) content = [[np.nan] * 3] * len(insert_dates) insert_df = pd.DataFrame( content, columns=['code', 'report_period', 'audit_opinion'], index=list(insert_dates)) code_df = code_df.append(insert_df, ignore_index=False).sort_index() code_df = code_df.fillna(method='ffill') code_df = code_df.dropna() print('code: %s' % code) r = influx.saveData(code_df, save_db, 'Audit') if r == 'No error occurred...': pass else: save_res.append('Audit Error: %s' % r) return save_res
def JOB_factors(codes, df, db, measure): influx = influxdbData() save_res = [] for code in codes: code_df = df.loc[df['code'] == code, :].copy() conditions = \ [code_df['FY0_rp'].values == code_df['last0Q_rp'].values, code_df['FY0_rp'].values == code_df['last1Q_rp'].values, code_df['FY0_rp'].values == code_df['last2Q_rp'].values, code_df['FY0_rp'].values == code_df['last3Q_rp'].values, code_df['FY0_rp'].values == code_df['last4Q_rp'].values, code_df['FY0_rp'].values == code_df['last5Q_rp'].values, code_df['FY0_rp'].values == code_df['last6Q_rp'].values] choices = \ [code_df['net_profit_TTM'].values, code_df['net_profit_TTM_last1Q'].values, code_df['net_profit_TTM_last2Q'].values, code_df['net_profit_TTM_last3Q'].values, code_df['net_profit_TTM_last4Q'].values, code_df['net_profit_TTM_last5Q'].values, code_df['net_profit_TTM_last6Q'].values] code_df['net_profit_FY0'] = np.select(conditions, choices, default=np.nan) code_df['net_profit_FY1_growthY'] = code_df[ 'net_profit_FY1'] / code_df['net_profit_FY0'] - 1 code_df = code_df.loc[:, [ 'code', 'net_profit_FY1_growthY', 'report_period' ]] code_df = code_df.replace(np.inf, np.nan) code_df = code_df.replace(-np.inf, np.nan) code_df = code_df.where(pd.notnull(code_df), None) if code_df.empty: continue print('code: %s' % code) r = influx.saveData(code_df, db, measure) if r == 'No error occurred...': pass else: save_res.append('NetProfitFY1_growth Error: %s' % r) return save_res
def JOB_factors(mkt_data, codes, start, period, db, measure): influx = influxdbData() save_res = [] for code in codes: code_mkt = mkt_data.loc[mkt_data['code'] == code, :].copy() # 昨日买入,今日没有unturn数据 code_mkt['to_1'] = code_mkt['float_turnover'].shift(1) code_mkt['price_1'] = code_mkt['fq_vwap'].shift( 1) * code_mkt['to_1'] to_cols = ['to_1'] hp_cols = ['price_1'] for p in range(2, period + 1): turnover = code_mkt['float_turnover'].shift(p) prod_unturn = \ code_mkt['unturn'].rolling(p-1, min_periods=p-1).apply(lambda x: np.product(x)).shift(1) code_mkt['to_{0}'.format(p)] = turnover * prod_unturn code_mkt['price_{0}'.format(p)] = code_mkt['fq_vwap'].shift( p) * code_mkt['to_{0}'.format(p)] to_cols.append('to_{0}'.format(p)) hp_cols.append('price_{0}'.format(p)) code_mkt = code_mkt.dropna() code_mkt['multi'] = 1 / code_mkt[to_cols].sum(axis=1) code_mkt['price'] = code_mkt[hp_cols].sum( axis=1) * code_mkt['multi'] code_mkt['CGO_{0}'.format(period)] = \ (code_mkt['fq_vwap'] - code_mkt['price']) / code_mkt['price'] code_mkt = code_mkt.loc[str(start):, ['code', 'CGO_{0}'.format(period)]] code_mkt = code_mkt.dropna() if code_mkt.empty: continue print('code: %s' % code) r = influx.saveData(code_mkt, db, measure) if r == 'No error occurred...': pass else: save_res.append('%s Error: %s' % (measure, r)) return save_res
def JOB_factors(mkt_data, codes, start, period, db, measure): influx = influxdbData() save_res = [] for code in codes: code_mkt = mkt_data.loc[ mkt_data['code'] == code, ['code', 'fq_close', 'fq_preclose']].copy() code_mkt['period_preclose'] = code_mkt['fq_preclose'].shift(period) code_mkt['ret_{0}'.format( period )] = code_mkt['fq_close'] / code_mkt['period_preclose'] - 1 code_mkt = code_mkt.loc[str(start):, ['code', 'ret_{0}'.format(period)]] code_mkt = code_mkt.dropna() if code_mkt.empty: continue print('code: %s' % code) r = influx.saveData(code_mkt, db, measure) if r == 'No error occurred...': pass else: save_res.append('%s Error: %s' % (measure, r)) return save_res
def JOB_factors(df, codes, calendar, start, end): influx = influxdbData() save_res = [] for code in codes: code_df = df.loc[df['code'] == code, :].copy() dp_dict = dict( zip(code_df['DP_year'].values, code_df['DP_LYR'].values)) blank_df = pd.DataFrame({ 'code': [code] * calendar.shape[0], 'date': calendar }) code_df = pd.merge(code_df, blank_df, on=['date', 'code'], how='outer') code_df = code_df.set_index('date').sort_index() code_df['cur_month'] = code_df.index.strftime('%m').astype('int') code_df['withhold_year'] = code_df.index.strftime('%Y').astype( 'int') - 2 # 大部分分红都在8月前,8月起 withhold_year 向前推1年 code_df['withhold_year'] = np.where( code_df['cur_month'].values <= 7, code_df['withhold_year'].values, code_df['withhold_year'].values + 1) code_df['DP_year'] = code_df['DP_year'].fillna(method='ffill') code_df['DP_year'] = code_df[['DP_year', 'withhold_year']].max(axis=1) code_df['DP_LYR'] = code_df['DP_year'].map(dp_dict) code_df['DP_LYR'] = code_df['DP_LYR'].fillna(0) code_df = code_df.loc[str(start):str(end), ['code', 'DP_LYR']] print('code: %s' % code) r = influx.saveData(code_df, 'DailyFactors_Gus', 'DP_LYR') if r == 'No error occurred...': pass else: save_res.append('DP_LYR Error: %s' % r) return save_res
def JOB_net_profit_divergence(ranges, df, db, measure): influx = influxdbData() save_res = [] for range_start, range_end in ranges: range_df = df.loc[range_start:range_end, :].copy() # 只取对应当年的预测 range_df = range_df.loc[range_df['year'] == int(range_end[:4]), :] # 某券商在6个月期间对某code有多篇报告的情况下,取最后一篇 range_df = range_df.groupby(['code', 'organ_name' ])['net_profit'].last().reset_index() # 取大于5家机构覆盖的code计算分歧度 organ_count = range_df.groupby('code')['organ_name'].count() organ_count = organ_count[organ_count >= 5] range_df = range_df.loc[ range_df['code'].isin(organ_count.index), :] if range_df.empty: continue mean = range_df.groupby('code')['net_profit'].mean() std = range_df.groupby('code')['net_profit'].std() divergence = std / abs(mean) divergence.name = 'net_profit_divergence' divergence = pd.DataFrame(divergence) divergence['date'] = dtparser.parse(range_end) divergence = divergence.reset_index().set_index('date') divergence = divergence.replace(np.inf, np.nan) divergence = divergence.dropna() if divergence.empty: continue print('Time range: %s - %s' % (range_start, range_end)) r = influx.saveData(divergence, db, measure) if r == 'No error occurred...': pass else: save_res.append('FACTOR: %s \n range: %s - %s \n Error: %s' % ('net_profit_div', range_start, range_end, r)) return save_res
def __init__(self): self.rdf = rdf_data() self.influx = influxdbData() self.db = 'DailyMarket_Gus' self.measure = 'market'
def JOB_factors(codes, df, start, db, measure): influx = influxdbData() save_res = [] for code in codes: code_df = df.loc[df['code'] == code, :].copy() code_df = code_df.sort_index() first_dt = code_df.index[0] res_dict = {} for idx, row in code_df.iterrows(): str_idx = idx.strftime('%Y%m%d') m1_before_idx = (idx - relativedelta(months=1)).strftime('%Y%m%d') m3_before_idx = (idx - relativedelta(months=3)).strftime('%Y%m%d') y2_before_idx = (idx - relativedelta(years=2)).strftime('%Y%m%d') # ------------------------------------------------------------------------------------------ if (idx - first_dt).days < 30: continue period_m1_df = code_df.loc[m1_before_idx:str_idx, :].copy() if not period_m1_df.shape[0] < 10: res_dict[idx] = {} res_dict[idx]['turn_1m'] = period_m1_df['turnover'].mean() res_dict[idx]['float_turn_1m'] = period_m1_df['float_turnover'].mean() res_dict[idx]['free_turn_1m'] = period_m1_df['free_turnover'].mean() res_dict[idx]['std_turn_1m'] = period_m1_df['turnover'].std() res_dict[idx]['std_float_turn_1m'] = period_m1_df['float_turnover'].std() res_dict[idx]['std_free_turn_1m'] = period_m1_df['free_turnover'].std() else: continue # ----------------------------------------------------------------------------------------- if (idx - first_dt).days < 90: continue period_m3_df = code_df.loc[m3_before_idx:str_idx, :].copy() if not period_m3_df.shape[0] < 30: res_dict[idx]['turn_3m'] = period_m3_df['turnover'].mean() res_dict[idx]['float_turn_3m'] = period_m3_df['float_turnover'].mean() res_dict[idx]['free_turn_3m'] = period_m3_df['free_turnover'].mean() res_dict[idx]['std_turn_3m'] = period_m3_df['turnover'].std() res_dict[idx]['std_float_turn_3m'] = period_m3_df['float_turnover'].std() res_dict[idx]['std_free_turn_3m'] = period_m3_df['free_turnover'].std() else: continue # --------------------------------------------------------------------------------------- if (idx - first_dt).days < 700: continue period_y2_df = code_df.loc[y2_before_idx:str_idx, :].copy() # 股票长期停牌的情况 eg. 000029.SZ if period_y2_df.shape[0] < 200: continue else: turn_2y = period_y2_df['turnover'].mean() if turn_2y == 0: continue else: float_turn_2y = period_y2_df['float_turnover'].mean() free_turn_2y = period_y2_df['free_turnover'].mean() std_turn_2y = period_y2_df['turnover'].std() std_float_turn_2y = period_y2_df['float_turnover'].std() std_free_turn_2y = period_y2_df['free_turnover'].std() # ------------------------------------------------------------------- res_dict[idx]['bias_turn_1m'] = \ res_dict[idx]['turn_1m'] / turn_2y - 1 res_dict[idx]['bias_float_turn_1m'] = \ res_dict[idx]['float_turn_1m'] / float_turn_2y - 1 res_dict[idx]['bias_free_turn_1m'] = \ res_dict[idx]['free_turn_1m'] / free_turn_2y - 1 res_dict[idx]['bias_std_turn_1m'] = \ res_dict[idx]['std_turn_1m'] / std_turn_2y - 1 res_dict[idx]['bias_std_float_turn_1m'] = \ res_dict[idx]['std_float_turn_1m'] / std_float_turn_2y - 1 res_dict[idx]['bias_std_free_turn_1m'] = \ res_dict[idx]['std_free_turn_1m'] / std_free_turn_2y - 1 res_dict[idx]['bias_turn_3m'] = \ res_dict[idx]['turn_3m'] / turn_2y - 1 res_dict[idx]['bias_float_turn_3m'] = \ res_dict[idx]['float_turn_3m'] / float_turn_2y - 1 res_dict[idx]['bias_free_turn_3m'] = \ res_dict[idx]['free_turn_3m'] / free_turn_2y - 1 res_dict[idx]['bias_std_turn_3m'] = \ res_dict[idx]['std_turn_3m'] / std_turn_2y - 1 res_dict[idx]['bias_std_float_turn_3m'] = \ res_dict[idx]['std_float_turn_3m'] / std_float_turn_2y - 1 res_dict[idx]['bias_std_free_turn_3m'] = \ res_dict[idx]['std_free_turn_3m'] / std_free_turn_2y - 1 if not res_dict: continue else: res_df = pd.DataFrame(res_dict).T res_df = res_df.loc[str(start):, ] res_df = res_df.replace(np.inf, np.nan) res_df = res_df.where(pd.notnull(res_df), None) res_df['code'] = code # save print('code: %s' % code) r = influx.saveData(res_df, db, measure) if r == 'No error occurred...': pass else: save_res.append('%s Error: %s' % ('Turnover', r)) return save_res