示例#1
0
def get_universe(start_time, end_time):
    sql = '''
    SELECT SecuCode
    FROM SecuMain
    WHERE
        SecuCategory = 1 AND
        SecuMarket in (83, 90) AND
        ListedState != 9
    '''
    if not check_jydb_update_state(end_time):
        raise ValueError('JYDB has not been updated!')
    tds = get_calendar('stock.sse').get_tradingdays(start_time, end_time)
    data = fetch_db_data(jydb, sql, ['symbol'])
    symbols = sorted(add_stock_suffix(s) for s in data.symbol)
    out = DataFrame(1, index=tds, columns=symbols)
    return out
示例#2
0
def cshift_db(symbol, date, data_name, tab_name, offset):
    # 计算流量类报表偏移后的数据
    # symbol: 股票代码
    # date: 测试数据时间
    # data_name: 数据名称(查询聚源)
    # tab_name: 报表名称(查询聚源)
    # offset: 数据时间偏移量,以季为单位
    symbol = drop_suffix(symbol)
    date = trans_date(date)
    sql = '''
    SELECT m.SecuCode, s.{data}, s.enddate, s.infopubldate
    FROM
            (SELECT EndDate, InfoPublDate, {data}, CompanyCode, BulletinType, IfAdjusted, IfMerged, ROW_NUMBER()
            OVER(PARTITION BY COMPANYCODE, ENDDATE ORDER BY INFOPUBLDATE DESC) as rnum FROM {tab_name}
            WHERE
            BulletinType != 10 AND
            InfoPublDate < \'{date:%Y-%m-%d}\' AND
            IfAdjusted NOT IN (4, 5) AND
            IfMerged = 1) s, SecuMain M
    where
            s.rnum = 1 AND
            S.CompanyCode = M.CompanyCode AND
            M.SecuCode = \'{symbol}\' AND
            M.SecuCategory = 1 AND
            M.SecuMarket IN (83, 90) AND
            S.EndDate >= (SELECT TOP(1) S2.CHANGEDATE
                      FROM LC_ListStatus S2
                      WHERE
                          S2.INNERCODE = M.INNERCODE AND
                          S2.ChangeType = 1)
    ORDER BY S.EndDate ASC'''.format(data=data_name,
                                     symbol=symbol,
                                     date=date,
                                     tab_name=tab_name)
    data = fetch_db_data(jydb, sql,
                         ['symbol', 'data', 'rpt_date', 'update_time'],
                         {'data': 'float64'})
    # pdb.set_trace()
    if len(data) < offset or get_calendar('stock.sse').count(
            data.iloc[-1, 3], date) > 120:
        return np.nan
    raw_data = data.set_index('rpt_date').data
    seasonly_data = raw_data - raw_data.shift(1)
    seasonly_data.loc[seasonly_data.index.month == 3] = np.nan
    seasonly_data = seasonly_data.fillna(raw_data)
    return seasonly_data.iloc[-offset]
示例#3
0
def process_fundamental_data(data, cols, start_time, end_time, max_hist_num,
                             func, **kwargs):
    '''
    处理从数据库中取出的基本面数据,按照给定的函数计算基本面数据,并将基本面数据映射到交易日中

    Parameter
    ---------
    data: pandas.DataFrame
        原始数据,必须包含cols参数中的列名
    cols: iterable
        元素为列名,长度为4,依次表示的意思是[证券代码列, 数据列, 更新时间列, 报告期列]
    start_time: datetime like
        数据的开始时间
    end_time: datetime like
        数据的结束时间
    max_hist_num: int
        每个更新日期保存的历史最新数据的最大数量
    func: function
        格式为function(pandas.DataFrame)-> value,该函数接受的参数为每个股票在每个更新日期
        的最新历史数据,包含cols参数中的列
    kwargs: dict
        以字典形式传给function的参数

    Return
    ------
    out: pandas.DataFrame
        处理后生成的二维表,index为时间列,columns为股票代码列
    '''
    start_time, end_time = trans_date(start_time, end_time)
    symbol_col, data_col, ut_col, rpt_col = cols
    data = data.sort_values([symbol_col, ut_col])
    tds = get_calendar('stock.sse').get_tradingdays(start_time, end_time)

    def handler_per_symbol(df):
        # 对单只证券进行处理
        obs_data, obs_flag = expand_data(df, ut_col, rpt_col, max_hist_num)
        res = obs_data.groupby(obs_flag).apply(func, **kwargs)
        res = map2td(res, tds).reset_index().rename(columns={
            obs_flag: 'time',
            0: 'data'
        })
        return res

    out = data.groupby(symbol_col).apply(handler_per_symbol).reset_index()
    out = out.pivot_table('data', index='time', columns=symbol_col)
    return out
示例#4
0
 def inner(start_time, end_time):
     start_time, end_time = trans_date(start_time, end_time)
     nonlocal sql
     sql = sql.format(start_time=start_time, end_time=end_time)
     data = fetch_db_data(jydb, sql, cols, dtypes=dtypes)
     data.symbol = data.symbol.apply(add_stock_suffix)
     if len(cols) == 4:  # 当前数据为需要使用前收盘填充的数据
         data.loc[data.data == 0, 'data'] = data['prevclose']
         data = data.drop('prevclose', axis=1)
     data = data.pivot_table('data', index='time', columns='symbol')
     latest_td = get_calendar('stock.sse').latest_tradingday(
         end_time, 'PAST')
     universe = sorted(
         pitcache_getter('UNIVERSE', 10).get_csdata(latest_td).index)
     data = data.reindex(columns=universe).sort_index(ascending=True)
     if not check_completeness(data.index, start_time, end_time):
         raise ValueError('Data missed!')
     return data
示例#5
0
def check_completeness(time_index, start_time, end_time):
    '''
    检查数据在时间轴上的完整性

    Parameter
    ---------
    time_index: iterable
        数据的时间轴
    start_time: datetime like
        设定的数据的开始时间
    end_time: datetime like
        设定的数据的结束时间

    Return
    ------
    result: boolean
        若数据完整,返回True
    '''
    expect = get_calendar('stock.sse').get_tradingdays(start_time, end_time)
    return sorted(time_index) == expect
示例#6
0
    def inner(start_time, end_time):
        start_time, end_time = trans_date(start_time, end_time)
        cache_size = 100
        offset = major_mul * minor_mul
        threshold = 1e-5
        start_time_shifted = get_calendar('stock.sse').\
                             shift_tradingdays(start_time, -offset - 20)
        to_data = pitcache_getter('TO_RATE', cache_size).\
                  get_tsdata(start_time_shifted, end_time)

        data = to_data.rolling(offset,
                               min_periods=offset).sum().dropna(how='all')
        data[data <= threshold] = np_nan
        data = data / major_mul
        data = np_log(data)
        data = data.loc[(data.index >= start_time) & (data.index <= end_time)]
        if start_time > trans_date(DATA_START_DATE):
            if not check_completeness(data.index, start_time, end_time):
                raise ValueError('Data missed!')
        return data
示例#7
0
def check_jydb_update_state(check_time):
    """
    检测剧院数据库是否已经更新到给定时间的最近的一个交易日(往前推)
    即给定的日期时候在数据库最新日期之前

    Parameter
    ---------
    check_time: datetime like
        需要检测的时间

    Return
    ------
    is_updated: boolean
        True表示是最新
    """
    db_update_time = get_db_update_time()
    latest_td = get_calendar('stock.sse').latest_tradingday(check_time, 'PAST')
    if latest_td.date() <= db_update_time.date():
        return True
    else:
        return False
示例#8
0
def ttm_processor(raw_data, cols, start_time, end_time):
    """
    将原始的数据处理成TTM,仅支持流量类的数据,如利润表、现金流量表

    Parameter
    ---------
    raw_data: pandas.DataFrame
        待处理的原始报表数据(即季报、半年报和年报)
    cols: iterable
        数据列名,元素依次为[证券代码, 数据, 更新时间, 报告期时间]
    start_time: datetime like
        计算结果的开始时间
    end_time: datetime like
        计算结果的结束时间

    Return
    ------
    out: pandas.DataFrame
    index为时间,columns为证券代码轴
    """
    raw_data = calc_seasonly_data(raw_data, cols)
    # pdb.set_trace()
    raw_data.symbol = raw_data.symbol.apply(add_stock_suffix)
    data = process_fundamental_data(raw_data,
                                    cols,
                                    start_time,
                                    end_time,
                                    6,
                                    calc_tnm,
                                    data_col='data',
                                    period_flag_col='rpt_date')
    last_td = get_calendar('stock.sse').latest_tradingday(end_time, 'PAST')
    universe = sorted(
        pitcache_getter('UNIVERSE', 10).get_csdata(last_td).index)
    data = data.reindex(columns=universe)
    if not check_completeness(data.index, start_time, end_time):
        raise ValueError('Data missed!')
    return data
示例#9
0
 def inner(start_time, end_time):
     if table_name in ['ISY', 'CFSY'] and freq == 2:
         raise ValueError('Incompatible parameters(table_name={tn}, freq={f}'.\
                          format(tn=table_name, f=freq))
     start_time, end_time = trans_date(start_time, end_time)
     start_time_shifted = get_calendar('stock.sse').\
                          shift_tradingdays(start_time, -(66 * (offset + 2) * freq))
     cols = ['symbol', 'data', 'update_date', 'rpt_date']
     data_fetcher = data_fetcher_factory(
         sql, cols, jydb, {
             'data': data_name_sql,
             'table_name_sql': table_map[table_name],
             'if_adjusted': if_adjusted
         }, {'data': 'float64'})
     raw_data = data_fetcher(start_time_shifted, end_time)
     raw_data = raw_data.drop_duplicates(
         ['symbol', 'update_date', 'rpt_date'])
     if table_name in ['ISY', 'CFSY']:  # 当前利润表和现金流量表计算季度偏移时仅会计算单季度数据
         is_signal_season = True
     else:
         is_signal_season = False
     return shift_processor(raw_data, cols, start_time, end_time, offset,
                            freq, is_signal_season)
示例#10
0
# -*- coding:utf-8
"""
Author:  Hao Li
Email: [email protected]
Github: https://github.com/SAmmer0
Created: 2018/3/28
"""
import numpy as np

from fmanager import query
from tdtools import get_calendar
from datautils.datacache.cachecore import DataView

test_dates = [('2015-04-16', '2015-05-07'), ('2016-06-01', '2017-01-01'),
              ('2017-11-01', '2018-03-01'), ('2014-01-01', '2014-05-01')]


def get_df(start_time, end_time):
    return query('CLOSE', (start_time, end_time))


dv_df = DataView(get_df, get_calendar('stock.sse'))

for date in test_dates:
    tmp = dv_df.get_tsdata(*date).fillna(-1000)
    data_cpr = query('CLOSE', date).fillna(-1000)
    assert np.all(np.all(np.isclose(tmp, data_cpr), axis=1))
    print(dv_df._cache_start, dv_df._cache_end, dv_df._extendable)
assert len(dv_df._data_cache) == get_calendar('stock.sse').count(
    dv_df._cache_start, dv_df._cache_end)
示例#11
0
文件: rule.py 项目: rlcjj/QRToolkits
        Parameter
        ---------
        rtime: datetime or the like
            需要判断的时间

        Return
        ------
        result: boolean
            True表示时间符合条件
        '''
        return self._time_condition(rtime)


ssetd_scheduler = RSchedule(
    get_calendar('stock.sse').is_tradingday)  # 股票交易日时间计划表
sseme_scheduler = RSchedule(lambda t: get_calendar(
    'stock.sse').is_cycle_target(t, 'MONTHLY', 'LAST'))  # 股票交易日月末计划表


# --------------------------------------------------------------------------------------------------
# Rule
class Rule(object):
    '''
    标的筛选规则类
    提供以下功能:
    on_time: 在给定的时间,对给定的标的池进行筛选,然后返回筛选结果和筛选状态(用于表明当前筛选是否启用)

    Parameter
    ---------
    datasources: pitdata.DataGetterCollection
示例#12
0
def get_st_status(start_time, end_time):
    '''
    获取股票特殊处理的情况
    '''
    sql = '''
    SELECT S.SpecialTradeTime, S.SecurityAbbr, C.MS, M.SecuCode
    FROM LC_SpecialTrade S, SecuMain M, CT_SystemConst C
    WHERE
        S.InnerCode = M.InnerCode AND
        M.SecuMarket in (83, 90) AND
        S.SpecialTradeType = C.DM AND
        C.LB = 1185 AND
        M.SecuCategory = 1
    '''
    data = fetch_db_data(jydb, sql, ['time', 'abbr', 'ms', 'symbol'])

    def _assign_st(row):
        map_dict = {
            'ST': 1.,
            'PT': 5.,
            '撤销ST': 0.,
            '*ST': 2.,
            '撤消*ST并实行ST': 1.,
            '从ST变为*ST': 2.,
            '撤销*ST': 0.,
            '退市整理期': 3.,
            '高风险警示': 4.
        }
        if row.ms in map_dict:
            return map_dict[row.ms]
        else:
            assert row.ms == '撤销PT', "Error, cannot handle tag '{tag}'".format(
                tag=row.ms)
            if 'ST' in row.abbr:
                return 1
            elif '*ST' in row.abbr:
                return 2
            else:
                return 0

    data = data.assign(tag=lambda x: x.apply(_assign_st, axis=1))
    data['symbol'] = data.symbol.apply(add_stock_suffix)
    # 剔除日期重复项,因为数字越大表示越风险越高,因而只保留数字大的
    data = data.sort_values(['symbol', 'time', 'tag'])
    by_snt = data.groupby(['symbol', 'time'])
    data = by_snt.tail(1)
    data = data.reset_index(drop=True)
    tds = get_calendar('stock.sse').get_tradingdays(start_time, end_time)
    by_symbol = data.groupby('symbol')
    data = by_symbol.apply(map2td,
                           days=tds,
                           timecol='time',
                           fillna={'symbol': lambda x: x.symbol.iloc[0]})
    data = data.pivot_table('tag', index='time',
                            columns='symbol').dropna(axis=0, how='all')
    last_td = get_calendar('stock.sse').latest_tradingday(end_time, 'PAST')
    universe = sorted(
        pitcache_getter('UNIVERSE', 10).get_csdata(last_td).index)
    data = data.reindex(columns=universe).fillna(0)
    if not check_completeness(data.index, start_time, end_time):
        raise ValueError('Error, data missed!')
    return data
示例#13
0
def plot_candle_line(axes,
                     data,
                     columns=None,
                     width=0.8,
                     color_up='red',
                     color_down='green',
                     alpha=1,
                     majortick_locator=lambda x: get_calendar('stock.sse').
                     is_cycle_target(x, 'MONTHLY', 'LAST'),
                     majortick_format='{:%Y-%m-%d}',
                     minortick_locator=None,
                     minortick_format=None,
                     ticklabel_rotation=0):
    '''
    用于绘制K线

    Parameter
    ---------
    axes: matplotlib.axes.Axes
        子图
    data: pandas.DataFrame
        价格数据,index为时间
    columns: tuple, default None
        各个价格对应的数据列名,依次为(open, close, high, low),默认为('open', 'close', 'high', 'low')
    width: float, defaut 0.8
        k线宽度
    color_up: string, default red
        上涨K线颜色
    color_down: string, default green
        下跌K线颜色
    alpha: float(0, 1),default 1
        k线透明度,0表示透明,1表示不透明
    majortick_locator: function(date)->boolean
        主刻度定位函数,默认为每个月最后一个交易日
    majortick_format: string, default {:%Y-%m-%d}
        主刻度格式,必须有.format方法
    mainortick_locator: function(date)->boolean, default None
        副刻度定位函数,默认没有,即不标识副刻度
    minortick_format: string, default None
        副刻度格式,必须有.format方法
    ticklabel_rotation: float, default 0
        刻度标签旋转度数
    '''
    if columns is None:
        columns = ('open', 'close', 'high', 'low')
    dates = data.index
    data = [data[c] for c in columns]
    majortick_pos, majortick_labels = date_ticker(dates, majortick_locator,
                                                  majortick_format)
    axes.set_xticks(majortick_pos, minor=False)
    axes.set_xticklabels(majortick_labels,
                         minor=False,
                         rotation=ticklabel_rotation)
    if minortick_locator is not None:
        if minortick_format is None:
            minortick_format = ''
        minortick_pos, minortick_labels = date_ticker(dates,
                                                      minortick_locator,
                                                      minortick_format,
                                                      add_header=False)
        # 剔除副刻度中与主刻度重复的项
        # pdb.set_trace()
        minortick_pos, minortick_labels = zip(
            *[(pos, label)
              for pos, label in zip(minortick_pos, minortick_labels)
              if pos not in majortick_pos])
        axes.set_xticks(minortick_pos, minor=True)
        if minortick_format != '':
            axes.set_xticklabels(minortick_labels,
                                 minor=True,
                                 rotation=ticklabel_rotation)

    candlestick2_ochl(axes,
                      *data,
                      width=width,
                      colorup=color_up,
                      colordown=color_down,
                      alpha=alpha)
示例#14
0
def generate_dv(factor_name):
    return DataView(generate_getter(factor_name), get_calendar('stock.sse'))