async def init_disclosure(): """初始化历史公告""" df_session = get_session(db_dir_name='info') sdate = pd.Timestamp('2010-01-01') edate = pd.Timestamp('today') date_rng = pd.date_range(sdate, edate) async def is_completed(web_session, d, times): # reader = _get_reader(d, web_session) try: df = await fetch_one_day(web_session, d) logger.info(f"提取网络数据 {d.strftime(r'%Y-%m-%d')} 共{len(df)}行") _refresh(df, df_session) return True except ValueError as e: logger.warn(f"{d.strftime(r'%Y-%m-%d')} 无数据") return True except Exception as e: logger.warn(f"第{times}次尝试失败。 {d.strftime(r'%Y-%m-%d')} {e!r}") return False async with aiohttp.ClientSession() as web_session: for d in date_rng: # 重复3次 for i in range(1, 4): status = await is_completed(web_session, d, i) if status: break else: await asyncio.sleep(4) await asyncio.sleep(4) df_session.close()
def delete_all(): """删除腾讯股票概念列表数据""" session = get_session(db_dir_name) num = session.query(TCTGN).delete(False) logger.notice(f"删除 表:{TCTGN.__tablename__} {num}行") session.commit() session.close()
def is_trading_day(one_day): """查询日期是否为交易日""" db_dir_name = 'szsh' sess = get_session(db_dir_name) res = sess.query(TradingCalendar.交易日).filter( func.date(TradingCalendar.日期) == one_day, ).scalar() sess.close() return res
def init_stock_daily_data(): """初始化所有股票日线数据(含已经退市股票)""" # 多进程容易引起数据库死锁 # 单进程速度极快,不必要复杂化 codes = get_valid_codes(False) sess = get_session(db_dir_name) for code in codes: _refresh_data(code, sess) sess.close()
def flush_index_daily(): """刷新指数日线数据""" sess = get_session(db_dir_name) end = sess.query(func.max(TradingCalendar.日期)).filter( TradingCalendar.交易日 == True).scalar().date() if end is None: raise NotImplementedError('尚未初始化交易日历数据!') codes = get_main_index().index sess.close() flush(codes, end)
def delete_all(): """删除已有股票基本信息数据""" session = get_session(db_dir_name) num = session.query(Stock).delete(False) logger.notice(f"删除 表:{Stock.__tablename__} {num}行") num = session.query(Delisting).delete(False) logger.notice(f"删除 表:{Delisting.__tablename__} {num}行") num = session.query(Suspend).delete(False) logger.notice(f"删除 表:{Suspend.__tablename__} {num}行") session.commit() session.close()
def get_precomputed_shanghai_holidays(): """自开市以来至今,除周六周日外的假期""" db_dir_name = 'szsh' sess = get_session(db_dir_name) res = sess.query(func.date(TradingCalendar.日期)).filter( TradingCalendar.交易日 == 0, ).all() sess.close() return [ x[0] for x in res if pd.Timestamp(x[0]).day_name() not in ('Saturday', 'Sunday') ]
def get_data(code): """获取单个股票的日线数据""" sess = get_session(db_dir_name) d_ = last_date(sess, StockDaily, code) sess.close() if d_ is None: s = get_ipo_date(code) else: s = d_ + pd.Timedelta(days=1) if s > pd.Timestamp('today').normalize(): return pd.DataFrame() df = _fix_data(fetch_history(code, s)) return df
def _update_gn_list(urls): sess = get_session(db_dir_name) api = THS() codes = [x[0][-7:-1] for x in urls] d = {x[0][-7:-1]: x[1] for x in urls} for i in range(20): log.info('第{}次尝试,剩余{}个概念'.format(i + 1, len(codes))) codes = _add_gn_page(api, sess, codes, d) if len(codes) == 0: break time.sleep(1) api.browser.quit() sess.close()
def _add_or_update(d, status): sess = get_session(db_dir_name) old = sess.query(TradingCalendar).filter( func.date(TradingCalendar.日期) == d.date()).one_or_none() if old: old.交易日 = status else: to_add = TradingCalendar(日期=d.date(), 交易日=status) sess.add(to_add) info = '交易日' if status else '*非*交易日' logger.info('添加或者刷新{}:{}'.format(info, d.date())) sess.commit() sess.close()
def _delete(self, api, level, start): """删除项目开始日期之后的所有数据""" # 删除最近的日线数据 # 融资融券数据导致不一致,需要清理旧数据 class_ = self.get_level_class(level) table_name = class_.__tablename__ expr = getattr(class_, self.get_date_field(level)) session = get_session(self.db_name) num = session.query(class_).filter(expr >= start).delete(False) st = start.strftime(r'%Y-%m-%d') msg = f"删除 {self.db_name} {table_name} {st} 开始 {num}行" api.logger.notice(msg) session.commit() session.close()
def get_valid_codes(is_trading=False): """有效A股股票代码""" session = get_session('szsh') codes = session.query(Stock.A股代码).filter(Stock.A股代码.isnot(None)).all() d_codes = session.query(Delisting.证券代码).all() s_codes = session.query(Suspend.证券代码).all() if is_trading: res = [x[0] for x in codes] else: res = [x[0] for x in codes] + [x[0] for x in d_codes] + [x[0] for x in s_codes] res = set([x for x in res if x[0] in ('0', '3', '6')]) session.close() return sorted(res)
def refresh_treasury(): """刷新国库券利率数据""" # 首先下载最新一期的数据 download_last_year() sess = get_session(db_dir_name) start = get_start(sess) if start is None: df = fetch_treasury_data_from() elif start > pd.Timestamp('today').date(): return else: # 读取自开始日期的数据 df = fetch_treasury_data_from(start) insert(sess, df) sess.close()
def get_start_date(self, level): """刷新项目数据的开始日期""" session = get_session(self.db_name) class_ = self.get_level_class(level) date_field = self.get_date_field(level) default_date = self.get_default_start_date(level) expr = getattr(class_, date_field) # 降序排列 # 指定表的最后二项日期(唯一) end_dates = session.query(expr).order_by(expr.desc()).distinct()[:2] # end_dates = session.query(expr.desc()).distinct()[:2] session.close() # 为空返回默认值 if not end_dates: start_date = pd.Timestamp(default_date).normalize() else: start_date = self._compute_start(end_dates, level) return start_date
async def refresh_disclosure(): """刷新公司公告""" session = get_session(db_dir_name='info') today = pd.Timestamp('today') end_date = today + pd.Timedelta(days=1) start_date = last_date(session) if start_date is None: start_date = pd.Timestamp('2010-01-01') else: start_date = start_date + pd.Timedelta(days=1) # 可以提取明天的公司公告 if start_date > end_date + pd.Timedelta(days=1): return date_rng = pd.date_range(start_date, end_date) async with aiohttp.ClientSession() as web_session: for d in date_rng: df = await fetch_one_day(web_session, d) _refresh(df, session) del df time.sleep(1) session.close()
def flush(codes, end): for code in codes: sess = get_session(db_dir_name) start = _get_start_date(sess, code) if start is not None and start > end: logger.info('代码:{} 无需刷新'.format(code)) continue try: df = fetch_history(code=code, start=start, end=end, is_index=True) # 按日期排序(升序) df.sort_index(inplace=True) except ValueError: # 当开始日期大于结束日期时,触发值异常 logger.info('无法获取网页数据。代码:{},开始日期:{}, 结束日期:{}'.format( code, start, end)) continue objs = _gen(df) sess.add_all(objs) sess.commit() logger.info('代码:{}, 新增{}行'.format( code, len(objs))) sess.close()
def update_gn_list(): """ 更新股票概念列表 盘后更新有效 """ if is_trading_time(): warnings.warn('建议非交易时段更新股票概念。交易时段内涨跌幅经常变动,容易产生重复值!!!') sess = get_session(db_dir_name) # 首先删除原有数据 sess.query(THSGN).delete() sess.commit() sess.close() try: api = THS() urls = api.gn_urls api.browser.quit() _update_gn_list(urls) except Exception: pass finally: kill_firefox()
def _append_historical_news(tag, times): """追加历史消息""" session = get_session(db_dir_name) count = 0 with Sina247News() as api: data = api._get_topic_news(tag, times) for news in data: obj = EconomicNews() obj.序号 = news[0] obj.时间 = pd.Timestamp('{} {}'.format(news[1], news[2])) obj.概要 = news[3] obj.分类 = news[4] session.add(obj) try: session.commit() count += 1 logger.info(f"栏目:{TOPIC_MAPS[tag]:>4} 已添加,序号:{news[0]}") except IntegrityError: logger.notice(f"序号:{news[0]}已经存在,自动回滚撤销") session.rollback() logger.info(f"栏目:{TOPIC_MAPS[tag]:>4} 累计添加{count:>4}条") session.close()
def refresh_news(): """刷新最新财经消息""" session = get_session(db_dir_name) count = 0 with Sina247News() as api: data = api.live_data data = sorted(data, key=lambda item: item[0], reverse=True) for news in data: obj = EconomicNews() obj.序号 = news[0] obj.时间 = pd.Timestamp('{} {}'.format(news[1], news[2])) obj.概要 = news[3] obj.分类 = news[4] session.add(obj) try: session.commit() count += 1 except IntegrityError: session.rollback() # # 排序后的数据,如已经存在,则退出循环 # break logger.info(f'添加{count:4} 行') session.close()
def before_update_stock_classify(): """更新股票分类前,删除已经存储的本地数据""" session = get_session('dataBrowse') delete_data_of(Classification, session)
def _get_session(db_name): if db_name == 'dataBrowse': return get_session('dataBrowse') elif db_name == 'thematicStatistics': return get_session('thematicStatistics')
def has_data(): sess = get_session(db_dir_name) q = sess.query(TradingCalendar) res = sess.query(q.exists()).scalar() sess.close() return res
def get_ipo_date(code): """获取股票上市日期""" session = get_session('szsh') res = session.query(Stock.A股上市日期).filter(Stock.A股代码 == code).scalar() session.close() return res