def base_executive_regulation(self): """高管增减持""" df1 = general_form_mongo_to_df( self.host_200, 'ada', 'base_executive_regulation', pos=[ 'secu', 'name.szh', 'cd', 'rd', 'cirrat', 'change', 'after', 'cause' ], filters={'rd': { '$gte': self.start_date, '$lte': self.end_date }}) df1 = df1.rename( columns={ 'name.szh': '股份变动人姓名', 'cd': '变动日期', 'rd': '填报日期', 'cirrat': '占流通股本比例', 'change': '变动股数', 'after': '变动后持股', 'cause': '变动原因' }) df1 = df1[df1.secu.map(lambda x: x[0] in ['0', '3', '6'])] df1['增减持方向'] = df1['变动股数'].map(lambda x: '高管增持' if float(x) > 0 else '高管减持') df2 = df1[['secu', '填报日期', '变动股数', '增减持方向', '占流通股本比例']] df2 = df2.rename(columns={'填报日期': 'dt'}) df3 = df2[df2['dt'] == self.end_date][[ 'secu', '增减持方向', '变动股数', '占流通股本比例' ]] return df1, df2, df3
def off_cap(_dt=None): dt = datetime.datetime.strftime(datetime.date.today(), '%Y-%m-%d') if _dt is None else _dt # 获取证券基本信息base_stock df_base = general_form_mongo_to_df( '122.144.134.4', 'ada', 'base_stock', pos=['code', 'abbr.szh', 'ls.code', 'ls.dt', 'ls.edt'], filters={ 'mkt.code': { '$in': ['1001', '1002', '1003', '1012'] }, 'ls.edt': None }) # 判断是否为摘帽概念 temp_dt = str( datetime.datetime.strptime(dt, '%Y-%m-%d') - relativedelta(years=1))[:10] engine = create_engine( 'mysql+mysqlconnector://pd_team:pd_team321@[email protected]/ada-fd' ) sql_cap = """select dt,tick from hq_stock_trade where zqjb = 'P' and dt >= '%s' """ % ( temp_dt) df_cap = pd.read_sql(sql_cap, engine) df_cap = df_cap[df_cap.tick.map(lambda x: x[0] in ['0', '3', '6'])] df_cap.tick = df_cap.tick.map(lambda x: x + '_SH_EQ' if x[0] is '6' else x + '_SZ_EQ') df_cap['摘帽'] = '摘帽' df_cap = df_cap.rename(columns={'dt': 'cap_dt', 'tick': 'code'}) df = pd.merge(df_base, df_cap, on='code', how='outer') return df
def pre_disclosure_time(self): """ 年度报告预披露:pre_disclosure_time """ df = general_form_mongo_to_df( self.host_95, 'ada', 'pre_disclosure_time', pos=['secu', 'y', 'order', 'change1', 'change2', 'change3'], filters={ 'order': { '$gte': self.start_date, '$lte': self.end_date } }) for col in ['change1', 'change2', 'change3']: df.update(pd.DataFrame(df[col].values, columns=['order'])) df = df[df.secu.map(lambda x: x[0] in ['0', '3', '6'])] df = df.drop(['change1', 'change2', 'change3'], axis=1) df = df.rename(columns={'order': '年报预约披露', 'y': '披露年报报告期'}) df2 = df.copy() df2['dt'] = df2['年报预约披露'] df2['年报预约披露'] = '预约披露年报' # 获取最近一期年报的报告期 rpt = self.end_date[:4] + '-12-31' if int( self.end_date[5:7]) > 4 else str(int(self.end_date[:4]) - 1) + '-12-31' df3 = df2[df2['披露年报报告期'] == rpt][['secu', 'dt' ]].rename(columns={'dt': '年报披露日期'}) return df, df2, df3
def cmb_report_score_adjust(self): """报告评级调整表:数据有待考证""" df1 = general_form_mongo_to_df( self.host_4, 'forecast', 'cmb_report_score_adjust', pos=['stockcode', 'ccd', 'pcd', 'csi', 'psi', 'saf'], filters={'ccd': { '$gte': self.start_date, '$lte': self.end_date }}) if len(df1) == 0: return df1, df1, df1 else: df1 = df1[df1['stockcode'].map( lambda x: len(x) == 6 and x[0] in ['0', '3', '6'])] df1.ccd = df1.ccd.map(lambda x: x[:10]) temp_dic = {1: '评级未调', 2: '评级上调', 3: '评级下调', 4: '评级未知'} df1.saf = df1.saf.map(lambda x: temp_dic[x]) df1 = df1.rename( columns={ 'ccd': 'dt', 'stockcode': 'secu', 'saf': '评级调整标志', 'pcd': '上次预测日期', 'psi': '上次评级', 'csi': '本次评级' }) df1 = df1[df1.secu.map(lambda x: x[0] in ['0', '3', '6'])] df1.secu = df1.secu.map(lambda x: x + '_SH_EQ' if x[0] is '6' else x + '_SZ_EQ') df2 = df1[['dt', 'secu', '评级调整标志']] df3 = df2[df2['dt'] == self.end_date][['secu', '评级调整标志']] return df1, df2, df3
def earlier_annual_report(self): dt1 = self.start_date dt2 = self.end_date df = general_form_mongo_to_df( host='122.144.134.95', db_name='news', tb_name='announcement', pos=['title', 'secu.cd', 'pdt'], filters={ 'typ': '100101', 'secu.cd': { '$regex': '_S' }, 'pdt': { "$gte": datetime.datetime.strptime(dt1, '%Y-%m-%d'), "$lte": datetime.datetime.strptime(dt2, '%Y-%m-%d') } }) # 剔除已取消的报告 df = df[df.title.map(lambda x: '已取消' not in x)] df['rpt_year'] = df.title.str.extract('(\d{4})', expand=False) df = df.set_index('rpt_year').groupby( level=0).apply(lambda x: x.sort_values(by='pdt').head( int(len(x) * 0.1))).reset_index(drop=True) df.pdt = df.pdt.map(lambda x: str(x.date()) if x.hour >= 15 else get_nearest_trade_day(x)) df = df.rename(columns={'secu.cd': 'secu', 'pdt': 'dt'}) df['year'] = df['dt'].map( lambda x: datetime.datetime.strptime(x, '%Y-%m-%d').year) df['secu'] = df['secu'].map( lambda x: [secu for secu in x if secu[0] in ['0', '3', '6'] and secu[-6:] in ['_SH_EQ', '_SZ_EQ']]) \ .map(lambda x: x[0] if len(x) >= 1 else np.nan) df = df.dropna(how='any') df = df.set_index('year') return df
def all_announcements(self): pth = os.path.join(self.pth, 'all_announcements.hd5') if os.path.exists(pth): df = pd.read_hdf(pth) else: dt1 = self.start_date dt2 = self.end_date df = general_form_mongo_to_df( host='122.144.134.95', db_name='news', tb_name='announcement', pos=['title', 'secu.cd', 'pdt'], filters={ 'secu.cd': { '$regex': '_S' }, 'pdt': { "$gte": datetime.datetime.strptime(dt1, '%Y-%m-%d'), "$lte": datetime.datetime.strptime(dt2, '%Y-%m-%d') } }) df.to_hdf(pth, 'df') df = df.rename(columns={'secu.cd': 'secu', 'pdt': 'dt'}) df.secu = df.secu.map( lambda x: [s for s in x if s[0] in ['0', '3', '6']]) df = df[df.secu.map(lambda x: len(x) >= 1)] df.secu = df.secu.map(lambda x: x[0]) return df
def cal_expected(self): """归属母公司净利润is_tpl_30 一致预期归属母公司净利润con_forecast_stk:c4""" # 1. 获取业绩预告中披露的净利润预计值 ins = World_Event(self.start_date, self.end_date) df_actual = ins.base_earnings_preannouncement()[0] df_actual = df_actual[df_actual['业绩预告报告期'].map( lambda x: x.split('-')[1]) == '12'] df_actual = df_actual.sort_values( by='dt', ascending=True).drop_duplicates(['业绩预告报告期', 'secu'], keep='last') df_actual['sign'] = df_actual['业绩预告内容'].str.extract("(亏损)", expand=False) df_actual.sign = df_actual.sign.map(lambda x: -1.0 if x == '亏损' else 1.0) temp = df_actual['业绩预告内容'].str.extractall("(\d+[.]?\d+)[万|%]").astype(np.float).rename(columns={0: 'value'}) \ .reset_index().pivot(index='level_0', columns='match', values='value') temp['avg_profit'] = temp.iloc[:, :2].mean(axis=1) df_combined = pd.merge(df_actual.reset_index(), temp.reset_index()[['level_0', 'avg_profit']], left_on='index', right_on='level_0', how='outer') df_combined = df_combined.drop(['level_0', 'index'], axis=1) df_combined['avg_profit'] = df_combined.avg_profit * df_combined.sign df_combined['bef'] = df_combined.dt.map( lambda x: get_next_n_trade_day(x)) # 预告公布时间匹配为最新交易日 # df_combined['current_bar']=df_combined.current_bar.map(lambda x: get_nearest_trade_day(x))#预告公布时间匹配为最新交易日 # df_combined['next_bar'] = df_combined.next_bar.map(lambda x: get_nearest_trade_day(x)) # 预告公布时间匹配为最新交易日 print('1') # 2.查询预告披露日的一致预期净利润 filters2 = { '$or': [{ 'stockcode': row[1].secu[:6], 'tdate': int(row[1].bef.replace('-', '')), 'rptdate': int(row[1]['业绩预告报告期'][:4]) } for row in df_combined.iterrows()] } fore_profit = general_form_mongo_to_df( host='122.144.134.4', db_name='forecast', tb_name='con_forecast_stk', pos=['stockcode', 'rptdate', 'c4'], filters=filters2) fore_profit = fore_profit.rename(columns={ 'stockcode': 'secu', 'rptdate': '业绩预告报告期' }) fore_profit['业绩预告报告期'] = fore_profit['业绩预告报告期'].map( lambda x: str(x) + '-12-31') fore_profit.secu = fore_profit.secu.map( lambda x: x + '_SH_EQ' if x[0] in ['6'] else x + '_SZ_EQ') print('2') # 5.将业绩预告公布的和一致预期结合 df = pd.merge(df_combined, fore_profit, on=['secu', '业绩预告报告期']) df['excess_profit'] = (df['avg_profit'] - df['c4']) / df['c4'].abs() return df, df, df
def announcements(self): dt1 = self.start_date dt2 = self.end_date df = general_form_mongo_to_df( host='122.144.134.95', db_name='news', tb_name='announcement', pos=['title', 'secu.cd', 'pdt'], filters={ 'secu.cd': { '$regex': '_S' }, "typ": self.typ, 'pdt': { "$gte": datetime.datetime.strptime(dt1, '%Y-%m-%d'), "$lte": datetime.datetime.strptime(dt2, '%Y-%m-%d') } }) return df
def base_earnings_preannouncement(self): """ 业绩预告:base_earnings_preannouncement """ df = general_form_mongo_to_df( self.host_95, 'ada', 'base_earnings_preannouncement', pos=['secu', 'y', 'rpt', 'content', 'typ', 'reason'], filters={'y': { '$gte': self.start_date, '$lte': self.end_date }}) df['content'] = df['content'].map(lambda x: str(x).split('净利润')[1]) df = df.rename( columns={ 'y': 'dt', 'typ': '业绩预告类型', 'content': '业绩预告内容', 'rpt': '业绩预告报告期', 'reason': '原因' }) df = df.loc[:, ['dt', 'secu', '业绩预告类型', '业绩预告报告期', '业绩预告内容', '原因']] df3 = df[df['dt'] == self.end_date][['secu', '业绩预告类型', '业绩预告内容']] return df, df, df3