Пример #1
0
def get_tradeable(universe, start_time, end_time):
    '''
    获取股票的交易状态
    Notes
    -----
    将成交量为0或者最高价等于最低价视为不能交易,返回值为1表示正常交易,0表示不能交易,NA表示未上市而不能交易
    '''
    sql = '''
    SELECT S.TradingDay, S.TurnoverVolume, S.HighPrice, S.LowPrice, M.Secucode
    FROM QT_DailyQuote S, SecuMain M
    WHERE
        S.InnerCode = M.InnerCode AND
        M.SecuMarket in (83, 90) AND
        S.TradingDay <= CAST(\'{end_time}\' as datetime) AND
        S.TradingDay >= CAST(\'{start_time}\' as datetime) AND
        M.SecuCategory = 1
    ORDER BY S.TradingDay ASC, M.Secucode ASC
    '''
    data = fdgetter.get_db_data(sql, cols=('time', 'vol', 'high', 'low', 'code'),
                                start_time=start_time, end_time=end_time, add_stockcode=False)
    # pdb.set_trace()
    data['code'] = data.code.apply(datatoolkits.add_suffix)
    data.loc[data.vol > 0, 'vol'] = 1
    data.loc[(data.vol > 0) & (data.high == data.low), 'vol'] = 0
    data = data.pivot_table('vol', index='time', columns='code')
    data = data.loc[:, sorted(universe)]
    assert checkdata_completeness(data, start_time, end_time), "Error, data missed!"
    return data
Пример #2
0
 def _inner(universe, start_time, end_time):
     offset = 100 * n + 250
     new_start = pd.to_datetime(start_time) - pd.Timedelta(
         '%d day' % offset)
     data = fdgetter.get_db_data(sql,
                                 start_time=new_start,
                                 end_time=end_time,
                                 cols=('update_time', 'rpt_date', 'code',
                                       'data'),
                                 add_stockcode=False)
     data['code'] = data.code.apply(datatoolkits.add_suffix)
     # 该部分可以单独做成一个函数
     by_code = data.groupby('code')
     data = by_code.apply(
         fdmutils.get_observabel_data).reset_index(drop=True)
     by_cno = data.groupby(['code', 'obs_time'])
     data = by_cno.apply(fdmutils.cal_season, col_name='data', offset=n)
     data = data.reset_index().rename(columns={'obs_time': 'time'})
     tds = dateshandle.get_tds(start_time, end_time)
     data = data.groupby('code').apply(
         datatoolkits.map_data,
         days=tds,
         fillna={'code': lambda x: x.code.iloc[0]})
     data = data.reset_index(drop=True)
     data = data.pivot_table('data', index='time', columns='code')
     data = data.loc[:, sorted(universe)]
     assert check_indexorder(data), 'Error, data order is mixed!'
     return data
Пример #3
0
def get_adjfactor(universe, start_time, end_time):
    '''
    股票的复权因子
    '''
    sql = '''
        SELECT A.ExDiviDate, A.RatioAdjustingFactor, M.SecuCode
        FROM QT_AdjustingFactor A, SecuMain M
        WHERE
            A.InnerCode = M.InnerCode AND
            M.secuMarket in (83, 90) AND
            M.SECUCATEGORY = 1
        ORDER BY M.SecuCode ASC, A.ExDiviDate ASC
        '''
    data = fdgetter.get_db_data(sql, cols=('time', 'data', 'code'), add_stockcode=False)
    data['code'] = data.code.apply(datatoolkits.add_suffix)
    by_code = data.groupby('code')
    tds = dateshandle.get_tds(start_time, end_time)
    data = by_code.apply(datatoolkits.map_data, days=tds, fromNowOn=True,
                         fillna={'code': lambda x: x.code.iloc[0], 'data': lambda x: 1})
    data = data.reset_index(drop=True)
    data = data.pivot_table('data', index='time', columns='code')
    data = data.loc[:, sorted(universe)].fillna(1)    # 因为新股大多数情况下没有分红记录
    assert check_indexorder(data), 'Error, data order is mixed!'
    assert checkdata_completeness(data, start_time, end_time), "Error, data missed!"
    return data
Пример #4
0
def get_ctargetprice(universe, start_time, end_time):
    '''
    获取一致预期目标价数据
    '''
    sql = '''
    SELECT TARGET_PRICE, CON_DATE, STOCK_CODE
    FROM CON_FORECAST_SCHEDULE
    WHERE
        CON_DATE >= CAST(\'{start_time}\' AS datetime) AND
        CON_DATE <= CAST(\'{end_time}\' AS datetime)
    '''
    data = fdgetter.get_db_data(sql,
                                start_time=start_time,
                                end_time=end_time,
                                cols=('data', 'time', 'code'),
                                db=fdgetter.zyyx,
                                add_stockcode=False)
    # pdb.set_trace()
    data['code'] = data.code.apply(datatoolkits.add_suffix)
    data = data.pivot_table('data', index='time', columns='code')
    data = data.loc[:, sorted(universe)]
    assert check_indexorder(data), 'Error, data order is mixed!'
    assert checkdata_completeness(data, start_time,
                                  end_time), "Error, data missed!"
    return data
Пример #5
0
 def _inner(universe, start_time, end_time):
     data = fdgetter.get_db_data(transed_sql, cols=cols, start_time=start_time,
                                 end_time=end_time, add_stockcode=False)
     data['code'] = data.code.apply(datatoolkits.add_suffix)
     if len(data.columns) == 4:
         data.loc[data.data == 0, 'data'] = data['prevclose']
         data.drop('prevclose', inplace=True, axis=1)
     data = data.pivot_table('data', index='time', columns='code')
     data = data.loc[:, sorted(universe)]
     assert check_indexorder(data), 'Error, data order is mixed!'
     assert checkdata_completeness(data, start_time, end_time), "Error, data missed!"
     return data
Пример #6
0
 def _inner(universe, start_time, end_time):
     '''
     上证综指
     '''
     sql = sql_tmplate.replace('code', index_code).replace('price_type', price_type)
     data = fdgetter.get_db_data(sql, cols=('close', 'time'), start_time=start_time,
                                 end_time=end_time, add_stockcode=False)
     # pdb.set_trace()
     data = pd.DataFrame(np.repeat([data.close.values], len(universe), axis=0).T,
                         index=data.time, columns=sorted(universe))
     checkdata_completeness(data, start_time, end_time)
     check_indexorder(data)
     return data
Пример #7
0
 def _inner(universe, start_time, end_time):
     nstart_time = pd.to_datetime(start_time) - pd.Timedelta('60 days')
     data = fdgetter.get_db_data(fdgetter.BASIC_SQLs['INDEX_CONSTITUENTS'], code=index_code,
                                 cols=('code', 'time'), add_stockcode=False,
                                 start_time=nstart_time, end_time=end_time)
     data = data.assign(is_constituent=1)
     data['code'] = data.code.apply(datatoolkits.add_suffix)
     data = data.pivot_table('is_constituent', index='time', columns='code')
     tds = dateshandle.get_tds(nstart_time, end_time)
     data = data.loc[:, sorted(universe)].reset_index()
     data = datatoolkits.map_data(data, days=tds, fromNowOn=True)
     data = data.set_index('time').dropna(axis=0, how='all')
     data = data.loc[(data.index >= start_time) & (data.index <= end_time)]
     return data
Пример #8
0
 def inner(universe, start_time, end_time):
     new_start = tds_shift(start_time, 130)
     data = fdgetter.get_db_data(sql, code=index_code, cols=('code', 'weight', 'time'),
                                 add_stockcode=False, start_time=new_start, end_time=end_time)
     data.code = data.code.apply(datatoolkits.add_suffix)
     # pdb.set_trace()
     data = data.pivot_table('weight', index='time', columns='code')
     tds = dateshandle.get_tds(new_start, end_time)
     data = datatoolkits.map_data(data.reset_index(), days=tds, fromNowOn=True)
     data = data.set_index('time')
     data_tds = dateshandle.get_tds(start_time, end_time)
     data = data.reindex(data_tds)
     data = data.loc[:, sorted(universe)] / 100
     assert check_indexorder(data), "Mixed index order"
     return data
Пример #9
0
 def _inner(universe, start_time, end_time):
     data = fdgetter.get_db_data(transed_sql, cols=('data', 'time', 'code'),
                                 add_stockcode=False)
     data['code'] = data.code.apply(datatoolkits.add_suffix)
     data = data.drop_duplicates().sort_values(['code', 'time'])  # 此处假设若时间相同则股本数量相同
     by_code = data.groupby('code')
     tds = dateshandle.get_tds(start_time, end_time)
     data = by_code.apply(datatoolkits.map_data, days=tds, fromNowOn=True,
                          fillna={'code': lambda x: x.code.iloc[0]})
     data = data.reset_index(drop=True)
     data = data.pivot_table('data', index='time', columns='code')
     data = data.loc[:, sorted(universe)]
     assert check_indexorder(data), 'Error, data order is mixed!'
     assert checkdata_completeness(data, start_time, end_time), "Error, data missed!"
     return data
Пример #10
0
 def _inner(universe, start_time, end_time):
     offset = 366 * n + 180
     # pdb.set_trace()
     new_start = pd.to_datetime(start_time) - pd.Timedelta('%d day' % offset)
     data = fdgetter.get_db_data(sql, start_time=new_start, end_time=end_time,
                                 cols=('update_time', 'rpt_date', 'code', 'data'),
                                 add_stockcode=False)
     data['code'] = data.code.apply(datatoolkits.add_suffix)
     data = _handle_dbdata(data, start_time, end_time, fdmutils.cal_yr, col_name='data',
                           offset=n)
     # pdb.set_trace()
     data = data.loc[:, sorted(universe)]
     assert check_indexorder(data), 'Error, data order is mixed!'
     assert checkdata_completeness(data, start_time, end_time), "Error, data missed!"
     return data
Пример #11
0
def get_zxind(universe, start_time, end_time):
    '''
    获取中信行业的数据,并映射到每个交易日中
    '''
    ind_data = fdgetter.get_db_data(fdgetter.BASIC_SQLs['ZX_IND'], cols=('ind', 'time', 'code'),
                                    add_stockcode=False)
    ind_data['ind'] = ind_data.ind.map(ZXIND_TRANS_DICT)
    ind_data['code'] = ind_data.code.apply(datatoolkits.add_suffix)
    # pdb.set_trace()
    tds = dateshandle.get_tds(start_time, end_time)
    by_code = ind_data.groupby('code')
    ind_data = by_code.apply(datatoolkits.map_data, days=tds,
                             fillna={'ind': lambda x: NaS, 'code': lambda x: x.code.iloc[0]})
    ind_data = ind_data.reset_index(drop=True).set_index(['time', 'code'])
    ind_data = ind_data.loc[:, 'ind'].unstack()
    ind_data = ind_data.loc[:, sorted(universe)].dropna(axis=0, how='all').fillna(NaS)
    return ind_data
Пример #12
0
def update_universe(path=UNIVERSE_FILE_PATH):
    '''
    获取最新的universe,并将最新的universe与之前文件中的universe对比,如果发生了更新,打印相关信息
    随后,将最新的universe存储在指定文件中,存储文件为一个tuple(universe, update_time)

    Parameter
    ---------
    path: str, default UNIVERSE_FILE_PATH
        存储universe数据的文件

    Return
    ------
    universe: list
        当前最新的universe

    Notes
    -----
    不能自行调用该函数用于获取universe,可能造成获取的universe与因子数据的universe不一致,
    获取当前的universe,使用fmanger.factors.utils.get_universe函数
    '''
    logger = logging.getLogger(__name__.split()[0])
    new_universe = fdgetter.get_db_data(fdgetter.BASIC_SQLs['A_UNIVERSE'],
                                        cols=('code', ),
                                        add_stockcode=False)
    new_universe['code'] = new_universe.code.apply(datatoolkits.add_suffix)
    new_universe = new_universe.code.tolist()
    try:
        universe_save = datatoolkits.load_pickle(path)
        universe, _ = universe_save
        nu_set = set(new_universe)
        ou_set = set(universe)
        if nu_set != ou_set:
            add_diff = list(nu_set.difference(ou_set))
            minus_diff = list(ou_set.difference(nu_set))
            msg = 'Warning: universe UPDATED, {drop} are DROPED, {add} are ADDED'.\
                format(drop=minus_diff, add=add_diff)
            logger.info(msg)
            print(msg)
    except FileNotFoundError:
        pass
    data = (new_universe, dt.datetime.now())
    datatoolkits.dump_pickle(data, path)
    return new_universe
Пример #13
0
def get_st(universe, start_time, end_time):
    '''
    获取股票特殊处理的情况
    '''
    st_data = fdgetter.get_db_data(fdgetter.BASIC_SQLs['ST_TAG'],
                                   cols=('time', 'abbr', 'ms', 'code'),
                                   add_stockcode=False)

    def _assign_st(row):
        map_dict = {'ST': 1, 'PT': 5, '撤销ST': 0, '*ST': 2, '撤消*ST并实行ST': 1,
                    '从ST变为*ST': 2, '撤销*ST': 0, '退市整理期': 3, '高风险警示': 4}
        if row.ms in map_dict:
            return map_dict[row.ms]
        else:
            assert row.ms == '撤销PT', "Error, cannot handle tag '{tag}'".format(tag=row.ms)
            if 'ST' in row.abbr:
                return 1
            elif '*ST' in row.abbr:
                return 2
            else:
                return 0
    st_data = st_data.assign(tag=lambda x: x.apply(_assign_st, axis=1))
    st_data['code'] = st_data.code.apply(datatoolkits.add_suffix)
    # 剔除日期重复项,因为数字越大表示越风险越高,因而只保留数字大的
    # pdb.set_trace()
    st_data = st_data.sort_values(['code', 'time', 'tag'])
    by_codentime = st_data.groupby(['code', 'time'])
    st_data = by_codentime.apply(lambda x: x.tail(1).iloc[0])
    st_data = st_data.reset_index(drop=True)
    tds = dateshandle.get_tds(start_time, end_time)
    # pdb.set_trace()
    by_code = st_data.groupby('code')
    st_data = by_code.apply(datatoolkits.map_data, days=tds,
                            fillna={'code': lambda x: x.code.iloc[0]},
                            fromNowOn=True)
    st_data = st_data.reset_index(drop=True)
    # st_data = st_data.reset_index(drop=True).set_index(['time', 'code'])
    # st_data = st_data.loc[:, 'tag'].unstack()
    st_data = st_data.pivot_table('tag', index='time', columns='code').dropna(axis=0, how='all')
    st_data = st_data.loc[:, sorted(universe)].fillna(0)
    assert checkdata_completeness(st_data, start_time, end_time), "Error, data missed!"
    return st_data
Пример #14
0
 def _inner(universe, start_time, end_time):
     new_start = pd.to_datetime(start_time) - pd.Timedelta('540 day')
     data = fdgetter.get_db_data(sql_template, start_time=new_start, end_time=end_time,
                                 cols=('update_time', 'rpt_date', 'code', 'data'),
                                 add_stockcode=False)
     data['code'] = data.code.apply(datatoolkits.add_suffix)
     by_code = data.groupby('code')
     data = by_code.apply(fdmutils.get_observabel_data).reset_index(drop=True)
     by_cno = data.groupby(['code', 'obs_time'])
     data = by_cno.apply(fdmutils.cal_ttm, col_name='data').reset_index()\
         .rename(columns={'obs_time': 'time'})
     tds = dateshandle.get_tds(start_time, end_time)
     # pdb.set_trace()
     data = data.groupby('code').apply(datatoolkits.map_data, days=tds,
                                       fillna={'code': lambda x: x.code.iloc[0]})
     data = data.reset_index(drop=True)
     data = data.pivot_table('data', index='time', columns='code')
     data = data.loc[:, sorted(universe)]
     assert check_indexorder(data), 'Error, data order is mixed!'
     assert checkdata_completeness(data, start_time, end_time), "Error, data missed!"
     return data
Пример #15
0
def get_liststatus(universe, start_time, end_time):
    '''
    获取股票的上市状态
    '''
    ls_data = fdgetter.get_db_data(fdgetter.BASIC_SQLs['LIST_STATUS'],
                                   cols=('code', 'list_status', 'time'), add_stockcode=False)
    ls_map = {1: 1, 2: 2, 3: 1, 4: 4, 6: 3}   # 原数据库中1表示上市,2表示暂停上市,3表示恢复上市,4表示退市,6表示退市整理
    ls_data['list_status'] = ls_data.list_status.map(ls_map)
    ls_data['code'] = ls_data.code.apply(datatoolkits.add_suffix)
    by_code = ls_data.groupby('code')
    tds = dateshandle.get_tds(start_time, end_time)
    ls_data = by_code.apply(datatoolkits.map_data, days=tds,
                            fillna={'code': lambda x: x.code.iloc[0]},
                            fromNowOn=True)
    # ls_data = ls_data.reset_index(drop=True).set_index(['time', 'code'])
    # ls_data = ls_data.loc[:, 'list_status'].unstack()
    ls_data = ls_data.reset_index(drop=True)
    ls_data = ls_data.pivot_table('list_status', index='time',
                                  columns='code').dropna(axis=0, how='all')
    ls_data = ls_data.loc[:, sorted(universe)]
    return ls_data