def _get_report(only_A, table, columns=None, col='截止日期'): """ 获取财务报告数据 使用利润表的公告日期 """ engine = get_engine('dataBrowse') df = pd.read_sql_table(table, engine, columns=columns) if only_A: df = df[~df.证券代码.str.startswith('2')] df = df[~df.证券代码.str.startswith('9')] # df.drop(to_drop, axis=1, inplace=True, errors='ignore') asof_dates = _financial_report_announcement_date() keys = ['证券代码', '截止日期'] if col != '截止日期': # 处理行业排名 df['报告年度'] = df[col] # 原始数据列名称更改为'截止日期' df.rename(columns={col: '截止日期'}, inplace=True) df = df.join( asof_dates.set_index(keys), on=keys ) df.rename(columns={"证券代码": "sid", "截止日期": "asof_date", "公告日期": "timestamp"}, inplace=True) # 修复截止日期 _fill_ad_and_ts(df) # 规范列名称 df.columns = df.columns.map(_normalized_col_name) df.sort_values(['sid', 'asof_date'], inplace=True) return df
def _to_sql(self, api, level, df, if_exists): class_ = self.get_level_class(level) table_name = class_.__tablename__ engine = get_engine(self.db_name) df.to_sql(table_name, con=engine, if_exists=if_exists, index=False) action = '添加' if if_exists == 'append' else '更新' api.logger.notice(f"{action} {self.db_name} {table_name} {len(df)} 行")
def wy_to_db(codes, date): engine = get_engine(db_dir_name) status = {} for i in range(10): for code in codes: ok = status.get(code) if ok: if i == 0: logger.info(f'股票:{code} 已经刷新,跳过') continue if has_traded(code, date) and not has_cjmx(code, date): date_str = date.strftime(DATE_FMT) try: df = wy_fetch_cjmx(code, date_str) status[code] = True except Exception as e: logger.info(f'股票:{code} {date_str} {e!r}') status[code] = False continue df = _wy_fix_data(df) # 保留2位小数 df['价格变动'] = (df['价格变动'] * 100).astype('int') / 100.0 df['成交额'] = (df['成交额'] * 100).astype('int') / 100.0 df.to_sql(CJMX.__tablename__, engine, if_exists='append', index=False) logger.info(f'股票:{code} {date_str} 共{len(df):>3}行') time.sleep(0.3) else: status[code] = True
def update_classify_bom(): """更新分类BOM表""" table = ClassificationBom.__tablename__ engine = get_engine('dataBrowse') with DataBrowse(True) as api: bom = api.classify_bom bom.to_sql(table, engine, if_exists='replace') api.logger.info(f'表:{table} 更新 {len(bom):>4}行')
def update_stock_classify(n): """更新第n组股票分类树信息""" table = Classification.__tablename__ with DataBrowse(True) as api: df = api.get_classify_tree(n) if not df.empty: df.to_sql(table, get_engine('dataBrowse'), if_exists='append', index=False) api.logger.info(f'表:{table} 添加 第{n}层分类 {len(df):>4}行')
def part1(): today = pd.Timestamp('today') if today.hour == 9 and today.minute < 30: df = asyncio.run(fetch_all()) df = df.loc[df['时间'] >= today.normalize(), :] if len(df) > 0: engine = get_engine(db_dir_name) table_name = 'live_quotes' df.to_sql(table_name, engine, index=False, if_exists='append') logger.info('添加{}行'.format(df.shape[0]))
def update_gn_time(): """ 更新股票概念概述列表 """ engine = get_engine(db_dir_name) try: with THS() as api: df = api.gn_times df.to_sql('thsgn_times', engine, index=False, if_exists='replace') except Exception: pass finally: kill_firefox()
def refresh(): """采用覆盖式更新腾讯股票概念列表""" delete_all() engine = get_engine(db_dir_name) df = fetch_concept_stocks() tab = TCTGN.__tablename__ df.rename(columns={ 'item_id': '概念id', 'item_name': '概念简称', 'code': '股票代码' }, inplace=True) df.to_sql(tab, engine, if_exists='append', index=False) logger.notice(f"表{tab} 更新{len(df)}行")
def refresh_daily(): """刷新股票日线数据""" logger.info('刷新股票日线数据......') start = time.time() codes = get_valid_codes() with Pool(max_worker) as p: dfs = p.map(get_data, codes) engine = get_engine(db_dir_name) table_name = 'stock_dailies' to_add = pd.concat(dfs) if len(to_add): to_add.to_sql(table_name, engine, if_exists='append') logger.info(f'添加{to_add.shape[0]:>4}行') logger.info(f"总用时:{(time.time() - start):>.4f}秒")
def refresh(): """采用覆盖式更新""" # 在写入前删除 delete_all() engine = get_engine(db_dir_name) with SSEPage() as sse_api: for get_data, tab in zip((stock_list, delisting, suspend), ('stocks', 'delistings', 'suspends')): try: df = get_data(sse_api) df.to_sql(tab, engine, if_exists='append', index=False) logger.notice(f"表{tab} 更新{len(df)}行") except Exception as e: logger.error(f'{tab}数据刷新失败 {e!r}') time.sleep(0.5)
def refresh_live_quote(): """刷新实时报价""" # 早盘市场竞价在计划任务程序中,自9:15开始,连续执行15分钟 part1() # 9:31分开始,连续2小时 # 12:01开始,连续2小时 today = pd.Timestamp('today') # 如果当日非交易日,直接返回 if not need_refresh(today): return df = asyncio.run(fetch_all()) if len(df) > 0: df = df.loc[df['时间'] >= today.normalize(), :] engine = get_engine(db_dir_name) table_name = 'live_quotes' df.to_sql(table_name, engine, index=False, if_exists='append') logger.info('添加{}行'.format(df.shape[0]))
def get_investment_rating_data(only_A=True): """投资评级""" to_drop = ['序号', '证券简称', 'last_refresh_time', '备注'] columns = [] for c in InvestmentRating.__table__.columns: if c.name not in to_drop: columns.append(c.name) table = InvestmentRating.__tablename__ engine = get_engine('dataBrowse') df = pd.read_sql_table(table, engine, columns=columns) if only_A: df = df[~df.证券代码.str.startswith('2')] df = df[~df.证券代码.str.startswith('9')] df.rename(columns={"证券代码": "sid", "发布日期": "asof_date"}, inplace=True) df.sort_values(['sid', 'asof_date'], inplace=True) return df
def create_tables(db_dir_name=DB_DIR_NAME, rewrite=False): """初始化表""" path = db_path(db_dir_name) if rewrite: try: os.remove(path) except FileNotFoundError: pass engine = get_engine(db_dir_name, echo=True) if db_dir_name.startswith('dataBrowse'): SZXBase.metadata.create_all(engine) elif db_dir_name.startswith('info'): InfoBase.metadata.create_all(engine) elif db_dir_name.startswith('szsh'): szshBase.metadata.create_all(engine) elif db_dir_name.startswith('thematicStatistics'): TSBase.metadata.create_all(engine) else: raise ValueError(f'不支持{db_dir_name}')
def get_performance_forecaste_data(only_A=True): """上市公司业绩预告""" to_drop = ['证券简称', '业绩类型编码', '业绩类型', 'last_refresh_time', '备注'] columns = [] for c in PerformanceForecaste.__table__.columns: if c.name not in to_drop: columns.append(c.name) table = PerformanceForecaste.__tablename__ engine = get_engine('dataBrowse') df = pd.read_sql_table(table, engine, columns=columns) if only_A: df = df[~df.证券代码.str.startswith('2')] df = df[~df.证券代码.str.startswith('9')] df.rename(columns={"证券代码": "sid", "公告日期": "asof_date"}, inplace=True) # 修复截止日期 _fill_ad_and_ts(df, '报告年度') df.sort_values(['sid', 'asof_date'], inplace=True) return df
def _periodly_report(only_A, table): # 一般而言,定期财务报告截止日期与报告年度相同 # 但不排除数据更正等情形下,报告年度与截止日期不一致 to_drop = ['证券简称', '机构名称', '合并类型编码', '合并类型', '报表来源编码', '报表来源', 'last_refresh_time', '备注'] engine = get_engine('dataBrowse') df = pd.read_sql_table(table, engine) if only_A: df = df[~df.证券代码.str.startswith('2')] df = df[~df.证券代码.str.startswith('9')] df.drop(to_drop, axis=1, inplace=True, errors='ignore') df.rename(columns={"证券代码": "sid", "截止日期": "asof_date", "公告日期": "timestamp"}, inplace=True) # 修复截止日期 _fill_ad_and_ts(df) # 规范列名称 df.columns = df.columns.map(_normalized_col_name) df.sort_values(['sid', 'asof_date'], inplace=True) return df
def _get_engine(db_name): if db_name == 'dataBrowse': return get_engine('dataBrowse') elif db_name == 'thematicStatistics': return get_engine('thematicStatistics')