Пример #1
0
async def init_disclosure():
    """初始化历史公告"""
    df_session = get_session(db_dir_name='info')
    sdate = pd.Timestamp('2010-01-01')
    edate = pd.Timestamp('today')
    date_rng = pd.date_range(sdate, edate)

    async def is_completed(web_session, d, times):
        # reader = _get_reader(d, web_session)
        try:
            df = await fetch_one_day(web_session, d)
            logger.info(f"提取网络数据 {d.strftime(r'%Y-%m-%d')} 共{len(df)}行")
            _refresh(df, df_session)
            return True
        except ValueError as e:
            logger.warn(f"{d.strftime(r'%Y-%m-%d')} 无数据")
            return True
        except Exception as e:
            logger.warn(f"第{times}次尝试失败。 {d.strftime(r'%Y-%m-%d')} {e!r}")
            return False

    async with aiohttp.ClientSession() as web_session:
        for d in date_rng:
            # 重复3次
            for i in range(1, 4):
                status = await is_completed(web_session, d, i)
                if status:
                    break
                else:
                    await asyncio.sleep(4)
            await asyncio.sleep(4)
    df_session.close()
Пример #2
0
def delete_all():
    """删除腾讯股票概念列表数据"""
    session = get_session(db_dir_name)
    num = session.query(TCTGN).delete(False)
    logger.notice(f"删除 表:{TCTGN.__tablename__} {num}行")
    session.commit()
    session.close()
Пример #3
0
def is_trading_day(one_day):
    """查询日期是否为交易日"""
    db_dir_name = 'szsh'
    sess = get_session(db_dir_name)
    res = sess.query(TradingCalendar.交易日).filter(
        func.date(TradingCalendar.日期) == one_day, ).scalar()
    sess.close()
    return res
Пример #4
0
def init_stock_daily_data():
    """初始化所有股票日线数据(含已经退市股票)"""
    # 多进程容易引起数据库死锁
    # 单进程速度极快,不必要复杂化
    codes = get_valid_codes(False)
    sess = get_session(db_dir_name)
    for code in codes:
        _refresh_data(code, sess)
    sess.close()
Пример #5
0
def flush_index_daily():
    """刷新指数日线数据"""
    sess = get_session(db_dir_name)
    end = sess.query(func.max(TradingCalendar.日期)).filter(
        TradingCalendar.交易日 == True).scalar().date()
    if end is None:
        raise NotImplementedError('尚未初始化交易日历数据!')
    codes = get_main_index().index
    sess.close()
    flush(codes, end)
Пример #6
0
def delete_all():
    """删除已有股票基本信息数据"""
    session = get_session(db_dir_name)
    num = session.query(Stock).delete(False)
    logger.notice(f"删除 表:{Stock.__tablename__} {num}行")
    num = session.query(Delisting).delete(False)
    logger.notice(f"删除 表:{Delisting.__tablename__} {num}行")
    num = session.query(Suspend).delete(False)
    logger.notice(f"删除 表:{Suspend.__tablename__} {num}行")
    session.commit()
    session.close()
Пример #7
0
def get_precomputed_shanghai_holidays():
    """自开市以来至今,除周六周日外的假期"""
    db_dir_name = 'szsh'
    sess = get_session(db_dir_name)
    res = sess.query(func.date(TradingCalendar.日期)).filter(
        TradingCalendar.交易日 == 0, ).all()
    sess.close()
    return [
        x[0] for x in res
        if pd.Timestamp(x[0]).day_name() not in ('Saturday', 'Sunday')
    ]
Пример #8
0
def get_data(code):
    """获取单个股票的日线数据"""
    sess = get_session(db_dir_name)
    d_ = last_date(sess, StockDaily, code)
    sess.close()
    if d_ is None:
        s = get_ipo_date(code)
    else:
        s = d_ + pd.Timedelta(days=1)
    if s > pd.Timestamp('today').normalize():
        return pd.DataFrame()
    df = _fix_data(fetch_history(code, s))
    return df
Пример #9
0
def _update_gn_list(urls):
    sess = get_session(db_dir_name)
    api = THS()
    codes = [x[0][-7:-1] for x in urls]
    d = {x[0][-7:-1]: x[1] for x in urls}
    for i in range(20):
        log.info('第{}次尝试,剩余{}个概念'.format(i + 1, len(codes)))
        codes = _add_gn_page(api, sess, codes, d)
        if len(codes) == 0:
            break
        time.sleep(1)
    api.browser.quit()
    sess.close()
Пример #10
0
def _add_or_update(d, status):
    sess = get_session(db_dir_name)
    old = sess.query(TradingCalendar).filter(
        func.date(TradingCalendar.日期) == d.date()).one_or_none()
    if old:
        old.交易日 = status
    else:
        to_add = TradingCalendar(日期=d.date(), 交易日=status)
        sess.add(to_add)
    info = '交易日' if status else '*非*交易日'
    logger.info('添加或者刷新{}:{}'.format(info, d.date()))
    sess.commit()
    sess.close()
Пример #11
0
 def _delete(self, api, level, start):
     """删除项目开始日期之后的所有数据"""
     # 删除最近的日线数据
     # 融资融券数据导致不一致,需要清理旧数据
     class_ = self.get_level_class(level)
     table_name = class_.__tablename__
     expr = getattr(class_, self.get_date_field(level))
     session = get_session(self.db_name)
     num = session.query(class_).filter(expr >= start).delete(False)
     st = start.strftime(r'%Y-%m-%d')
     msg = f"删除 {self.db_name} {table_name} {st} 开始 {num}行"
     api.logger.notice(msg)
     session.commit()
     session.close()
Пример #12
0
def get_valid_codes(is_trading=False):
    """有效A股股票代码"""
    session = get_session('szsh')
    codes = session.query(Stock.A股代码).filter(Stock.A股代码.isnot(None)).all()
    d_codes = session.query(Delisting.证券代码).all()
    s_codes = session.query(Suspend.证券代码).all()
    if is_trading:
        res = [x[0] for x in codes]
    else:
        res = [x[0]
               for x in codes] + [x[0]
                                  for x in d_codes] + [x[0] for x in s_codes]
    res = set([x for x in res if x[0] in ('0', '3', '6')])
    session.close()
    return sorted(res)
Пример #13
0
def refresh_treasury():
    """刷新国库券利率数据"""
    # 首先下载最新一期的数据
    download_last_year()
    sess = get_session(db_dir_name)
    start = get_start(sess)
    if start is None:
        df = fetch_treasury_data_from()
    elif start > pd.Timestamp('today').date():
        return
    else:
        # 读取自开始日期的数据
        df = fetch_treasury_data_from(start)
    insert(sess, df)
    sess.close()
Пример #14
0
 def get_start_date(self, level):
     """刷新项目数据的开始日期"""
     session = get_session(self.db_name)
     class_ = self.get_level_class(level)
     date_field = self.get_date_field(level)
     default_date = self.get_default_start_date(level)
     expr = getattr(class_, date_field)
     # 降序排列
     # 指定表的最后二项日期(唯一)
     end_dates = session.query(expr).order_by(expr.desc()).distinct()[:2]
     # end_dates = session.query(expr.desc()).distinct()[:2]
     session.close()
     # 为空返回默认值
     if not end_dates:
         start_date = pd.Timestamp(default_date).normalize()
     else:
         start_date = self._compute_start(end_dates, level)
     return start_date
Пример #15
0
async def refresh_disclosure():
    """刷新公司公告"""
    session = get_session(db_dir_name='info')
    today = pd.Timestamp('today')
    end_date = today + pd.Timedelta(days=1)
    start_date = last_date(session)
    if start_date is None:
        start_date = pd.Timestamp('2010-01-01')
    else:
        start_date = start_date + pd.Timedelta(days=1)
    # 可以提取明天的公司公告
    if start_date > end_date + pd.Timedelta(days=1):
        return
    date_rng = pd.date_range(start_date, end_date)
    async with aiohttp.ClientSession() as web_session:
        for d in date_rng:
            df = await fetch_one_day(web_session, d)
            _refresh(df, session)
            del df
            time.sleep(1)
    session.close()
Пример #16
0
def flush(codes, end):
    for code in codes:
        sess = get_session(db_dir_name)
        start = _get_start_date(sess, code)
        if start is not None and start > end:
            logger.info('代码:{} 无需刷新'.format(code))
            continue
        try:
            df = fetch_history(code=code, start=start, end=end, is_index=True)
            # 按日期排序(升序)
            df.sort_index(inplace=True)
        except ValueError:
            # 当开始日期大于结束日期时,触发值异常
            logger.info('无法获取网页数据。代码:{},开始日期:{}, 结束日期:{}'.format(
                code, start, end))
            continue
        objs = _gen(df)
        sess.add_all(objs)
        sess.commit()
        logger.info('代码:{}, 新增{}行'.format(
            code, len(objs)))
        sess.close()
Пример #17
0
def update_gn_list():
    """
    更新股票概念列表

    盘后更新有效
    """
    if is_trading_time():
        warnings.warn('建议非交易时段更新股票概念。交易时段内涨跌幅经常变动,容易产生重复值!!!')
    sess = get_session(db_dir_name)
    # 首先删除原有数据
    sess.query(THSGN).delete()
    sess.commit()
    sess.close()
    try:
        api = THS()
        urls = api.gn_urls
        api.browser.quit()
        _update_gn_list(urls)
    except Exception:
        pass
    finally:
        kill_firefox()
Пример #18
0
def _append_historical_news(tag, times):
    """追加历史消息"""
    session = get_session(db_dir_name)
    count = 0
    with Sina247News() as api:
        data = api._get_topic_news(tag, times)
    for news in data:
        obj = EconomicNews()
        obj.序号 = news[0]
        obj.时间 = pd.Timestamp('{} {}'.format(news[1], news[2]))
        obj.概要 = news[3]
        obj.分类 = news[4]
        session.add(obj)
        try:
            session.commit()
            count += 1
            logger.info(f"栏目:{TOPIC_MAPS[tag]:>4} 已添加,序号:{news[0]}")
        except IntegrityError:
            logger.notice(f"序号:{news[0]}已经存在,自动回滚撤销")
            session.rollback()
    logger.info(f"栏目:{TOPIC_MAPS[tag]:>4} 累计添加{count:>4}条")
    session.close()
Пример #19
0
def refresh_news():
    """刷新最新财经消息"""
    session = get_session(db_dir_name)
    count = 0
    with Sina247News() as api:
        data = api.live_data
    data = sorted(data, key=lambda item: item[0], reverse=True)
    for news in data:
        obj = EconomicNews()
        obj.序号 = news[0]
        obj.时间 = pd.Timestamp('{} {}'.format(news[1], news[2]))
        obj.概要 = news[3]
        obj.分类 = news[4]
        session.add(obj)
        try:
            session.commit()
            count += 1
        except IntegrityError:
            session.rollback()
            # # 排序后的数据,如已经存在,则退出循环
            # break
    logger.info(f'添加{count:4} 行')
    session.close()
Пример #20
0
def before_update_stock_classify():
    """更新股票分类前,删除已经存储的本地数据"""
    session = get_session('dataBrowse')
    delete_data_of(Classification, session)
Пример #21
0
def _get_session(db_name):
    if db_name == 'dataBrowse':
        return get_session('dataBrowse')
    elif db_name == 'thematicStatistics':
        return get_session('thematicStatistics')
Пример #22
0
def has_data():
    sess = get_session(db_dir_name)
    q = sess.query(TradingCalendar)
    res = sess.query(q.exists()).scalar()
    sess.close()
    return res
Пример #23
0
def get_ipo_date(code):
    """获取股票上市日期"""
    session = get_session('szsh')
    res = session.query(Stock.A股上市日期).filter(Stock.A股代码 == code).scalar()
    session.close()
    return res