예제 #1
0
def get_samples(mode=0, *arge, **kwargs):
    ''' mode 对应的测试集
        0. 每种规模分别取前N,默认000300-沪深300,000001-上证综指,000905-中证500,399005-深证中小板指,399006-创业板指
        1. 每个行业龙头,主要市值排行
        2. 流动性看:成交量活跃度,按换手率,和成交量日间日内波动
        3.  异动股,日间(短期)、日内
        4.  风格: 动量、反转
        5. 对每种指数,300,500
    '''
    total_num = kwargs.get('total_num') or 100
    start_date = "20191201"
    end_date = "20191231"
    if mode == 0:
        # tickers = ['000300','000001','000905','399005','399006','399001']
        tickers = ['399005', '399006', '399001']
        ticker_to_exchangecd = {
            '000300': 'XSHG',
            '000001': 'XSHG',
            '000905': 'XSHG',
            '399005': 'XSHE',
            '399006': 'XSHE',
            '399001': 'XSHE'
        }
        ret = {}
        _num = int(total_num / len(tickers))
        for _ticker in tickers:
            df = DataAPI.mIdxCloseWeightGet(secID=u"",
                                            ticker=_ticker,
                                            beginDate=start_date,
                                            endDate=end_date,
                                            field=["consID", "weight"],
                                            pandas="1").sort_values(
                                                by='weight', ascending=False)
            # TODO choose the top weights coons, then the correlations with the becnchmark is high, change to sample num
            # randomly
            sec_ids = df['consID'][:_num]
            ret.update({
                '{0}.{1}'.format(_ticker, ticker_to_exchangecd.get(_ticker)):
                sec_ids
            })
        return ret
예제 #2
0
def get_samples(mode=0, *arge, **kwargs):
    ''' mode 对应的测试集
        0. 每种规模分别取前N,默认000300-沪深300,000001-上证综指,000905-中证500,399005-深证中小板指,399006-创业板指
        1. 每个行业龙头,主要市值排行
        2. 流动性看:成交量活跃度,按换手率,和成交量日间日内波动, 现为换手率,对设置的市场
        3.  异动股,日间(短期)、日内
        4.  风格: 动量、反转
        5. 对每种指数,300,500
    '''
    total_num = kwargs.get('total_num') or 100
    start_date = kwargs.get('start_date') or "20191201"
    end_date = kwargs.get('end_date') or "20191231"
    # ['000300', '000001', '000905', '399005', '399006', '399001'] #sh and sz market
    # ['399005', '399006', '399001'] #sz market
    tickers = kwargs.get('mkt_tickers') or ['399005', '399006', '399001']
    ticker_to_exchangecd = {
        '000300': 'XSHG',
        '000001': 'XSHG',
        '000905': 'XSHG',
        '399005': 'XSHE',
        '399006': 'XSHE',
        '399001': 'XSHE'
    }
    if mode == 0:
        ret = {}
        _num = int(total_num / len(tickers))
        for _ticker in tickers:
            df = DataAPI.mIdxCloseWeightGet(secID=u"",
                                            ticker=_ticker,
                                            beginDate=start_date,
                                            endDate=end_date,
                                            field=["consID", "weight"],
                                            pandas="1").sort_values(
                                                by='weight', ascending=False)
            # TODO choose the top weights coons, then the correlations with the becnchmark is high, change to sample num
            # randomly
            sec_ids = df['consID'][:_num]
            ret.update({
                '{0}.{1}'.format(_ticker, ticker_to_exchangecd.get(_ticker)):
                sec_ids
            })
    elif mode == 2:
        ret = {}
        _num = int(total_num / len(tickers))
        for _ticker in tickers:
            df = DataAPI.mIdxCloseWeightGet(secID=u"",
                                            ticker=_ticker,
                                            beginDate=start_date,
                                            endDate=end_date,
                                            field=["consID", "weight"],
                                            pandas="1").sort_values(
                                                by='weight', ascending=False)
            # TODO choose the top weights coons, then the correlations with the becnchmark is high, change to sample num
            # randomly
            cons_ids = df['consID']
            mkt_df = DataAPI.MktEqudGet(secID=cons_ids,
                                        beginDate=start_date,
                                        endDate=end_date,
                                        isOpen=1,
                                        pandas="1")
            df_agg = mkt_df.groupby('secID').agg({'turnoverRate': ['mean']})
            flatten_columns = [
                '{0}_{1}'.format(item[0], item[1]) for item in df_agg.columns
            ]
            df_agg.columns = flatten_columns
            df_agg = df_agg.reset_index()
            df_agg.sort_values(by='turnoverRate_mean', ascending=False)
            sec_ids = df_agg['secID'][:_num]
            ret.update({
                '{0}.{1}'.format(_ticker, ticker_to_exchangecd.get(_ticker)):
                sec_ids
            })
        return ret