def get_stock_fund_univ(dateFrom, dateTo, ifFlexible):
    # 2001010201000000 偏股混合型, 2001010101000000 普通股票型, 2001010204000000 灵活配置型
    # ifFlexible = True表示否包含灵活配置型中的实际偏股型基金
    # 这个地方从ChinaMutualFundDescription还是从ChinaMutualFundSector读取作为table a都没关系
    # 结果是一样的,因为之前不涉及读取bchmk的数据所以直接从表ChinaMutualFundSector读的
    sqlStr = 'select a.F_INFO_WINDCODE, b.S_INFO_SECTORENTRYDT, b.S_INFO_SECTOREXITDT, ' \
             'b.S_INFO_SECTOR, a.F_INFO_BENCHMARK from ChinaMutualFundDescription a ' \
             'left join ChinaMutualFundSector b ' \
             'on a.F_INFO_WINDCODE = b.F_INFO_WINDCODE ' \
             'where b.S_INFO_SECTOR in ' \
             '(\'2001010201000000\', \'2001010101000000\', \'2001010204000000\') and ' \
             'b.S_INFO_SECTORENTRYDT <= {0} and ' \
             '(b.S_INFO_SECTOREXITDT >= {1} or b.S_INFO_SECTOREXITDT is null) ' \
             'order by F_INFO_WINDCODE, S_INFO_SECTORENTRYDT'.format(dateTo, dateFrom)
    # entrydt is null and exitdt is null and cur_sign =
    # 1这种都是只有个代码但还未成立的,没有其他数据就不用选出来了
    fundInfo = get_data_sql(sqlStr, 'wind')
    fundInfo.columns = [
        'Fund_Code', 'Date_In', 'Date_Out', 'Info_Sector', 'Bchmk'
    ]
    fundInfo['Date_In'] = pd.to_datetime(fundInfo['Date_In'])
    fundInfo['Date_Out'] = pd.to_datetime(fundInfo['Date_Out'])

    # 对零配型基金贴偏股或非偏股标签,把零配中的非股票先排除,再进行后面的时间对应操作
    fundInfoStk = fundInfo[fundInfo['Info_Sector'] != '2001010204000000']
    if ifFlexible:
        fundInfoMix = fundInfo[fundInfo['Info_Sector'] == '2001010204000000']
        fundInfoMix = attach_stk_label(fundInfoMix)[0]
        fundInfoMix.drop('Stk_Weight', axis=1, inplace=True)
        fundInfo = pd.concat([fundInfoStk, fundInfoMix],
                             axis=0,
                             ignore_index=True)
    else:
        fundInfo = fundInfoStk.copy()

    if len(fundInfo) > 0:
        days = get_trading_days(dateFrom, dateTo)
        days['Date'] = days.index
        repNum = fundInfo.shape[0]
        repDaysNum = days.shape[0]
        daysRep = pd.concat([days] * repNum, ignore_index=True)
        daysRep.sort_values('Date', inplace=True)
        fundPosRep = pd.concat([fundInfo] * repDaysNum, ignore_index=True)

        daysRep = daysRep.reset_index(drop=True)
        fundPosRep = fundPosRep.reset_index(drop=True)
        assert all(daysRep.index == fundPosRep.index)
        fundPosFull = pd.concat([daysRep, fundPosRep], axis=1)
        resPos = fundPosFull.loc[
            (fundPosFull['Date'] >= fundPosFull['Date_In']) &
            ((fundPosFull['Date'] <= fundPosFull['Date_Out']) |
             (pd.isnull(fundPosFull['Date_Out']))),
            ['Date', 'Fund_Code', 'Info_Sector']]
        resPos.reset_index(drop=True, inplace=True)
    else:
        resPos = pd.DataFrame([], columns=['Date', 'Fund_Code'])
    return resPos
示例#2
0
def get_component_SW(dateFrom, dateTo):
    from public.getDataSQL import get_data_sql
    from public.getTradingDays import get_trading_days
    import pandas as pd
    # 获取全集,如果需要某个单独行业再自行筛选
    exeStr = 'select s_info_windcode, S_CON_WINDCODE, S_CON_INDATE, S_CON_OUTDATE from ' \
             'SWIndexMembers where S_INFO_WINDCODE in ' \
             '(select INDUSTRIESALIAS + \'.SI\' from ASHAREINDUSTRIESCODE where ' \
             'LEVELNUM = 2 and INDUSTRIESCODE like \'61%\') and ' \
             '(S_CON_INDATE <= {} and (S_CON_OUTDATE >= {} or S_CON_OUTDATE is null))'.format(dateTo, dateFrom)
    # Note: 为什么这个地方Indate和Outdate都有等于号:纳入是当天的0点就开始,而剔除的话是截止到剔除日期的24:00结束

    swComponent = get_data_sql(exeStr, 'wind')
    swComponent.columns = ['SW_Code', 'Stock_Code', 'In_Date', 'Out_Date']
    swComponent['In_Date'] = pd.to_datetime(swComponent['In_Date'])
    swComponent['Out_Date'] = pd.to_datetime(swComponent['Out_Date'])

    if len(swComponent) > 0:
        # 调整指数数据格式为每天持仓
        # 先repmat全集出来,再根据时间筛选符合要求的
        repNum = swComponent.shape[0]
        tradingDays = get_trading_days(dateFrom, dateTo)
        tradingDays['Date'] = tradingDays.index
        repDaysNum = tradingDays.shape[0]
        tradingDays = pd.concat([tradingDays] * repNum, ignore_index=True)
        tradingDays.sort_values('Date', inplace=True)
        swComponentRep = pd.concat([swComponent] * repDaysNum,
                                   ignore_index=True)

        tradingDays = tradingDays.reset_index(drop=True)
        swComponentRep = swComponentRep.reset_index(drop=True)
        assert all(tradingDays.index == swComponentRep.index)
        swComponentFull = pd.concat([tradingDays, swComponentRep], axis=1)

        swComponentFull['Valid'] = (swComponentFull['Date'] >= swComponentFull['In_Date']) & \
                                ((swComponentFull['Date'] <= swComponentFull['Out_Date']) |
                                 (swComponentFull['Out_Date'].isnull()))  # 当天进当天出
        swComponent = swComponentFull[swComponentFull['Valid']]
        swComponent.reset_index(
            drop=True,
            inplace=True)  # 数据筛选后必须reset index 不然index还是原来indexPosFull的
        # # check NUM
        # count = indexPos.groupby(by = ['Date'])['Stock_Code'].count()
        # assert all((count <= 300) & (count >= 295))

        swComponent = swComponent[['SW_Code', 'Date', 'Stock_Code']]
    else:
        swComponent = pd.DataFrame([],
                                   columns=['SW_Code', 'Date', 'Stock_Code'])

    swComponent.set_index(['Date', 'Stock_Code'], inplace=True)

    return swComponent
def get_trading_days(dateFrom, dateTo):
    assert isinstance(dateFrom, str), 'dateFrom should be str'
    assert isinstance(dateTo, str), 'dateTo should  be str'

    exeStr = 'select TRADE_DAYS from dbo.ASHARECALENDAR where ' \
             'S_INFO_EXCHMARKET = \'SSE\' and TRADE_DAYS >= %s and TRADE_DAYS <= %s' % (dateFrom, dateTo)
    tradingDays = get_data_sql(exeStr, 'wind')
    tradingDays.columns = ['Date']
    tradingDays['Date'] = pd.to_datetime(tradingDays['Date'])
    tradingDays = tradingDays.sort_values(
        'Date')  # 等价于tradingDays.sort_value('Date', inplace=True)
    tradingDays.set_index('Date', inplace=True)

    return tradingDays
def get_index_pos(indexCode, dateFrom, dateTo):
    exeStr = 'select  S_INFO_WINDCODE, S_CON_WINDCODE, S_CON_INDATE, S_CON_OUTDATE, CUR_SIGN ' \
             'from dbo.AINDEXMEMBERS where S_INFO_WINDCODE = \'%s\' and' \
             ' (S_CON_INDATE <= %s and ( S_CON_OUTDATE > %s or S_CON_OUTDATE is null))' % (indexCode, dateTo, dateFrom)
    indexPos = get_data_sql(exeStr, 'wind')
    indexPos.columns = [
        'Index_Code', 'Stock_Code', 'DateIn', 'DateOut', 'Cur_Label'
    ]
    indexPos['DateIn'] = pd.to_datetime(indexPos['DateIn'])
    indexPos['DateOut'] = pd.to_datetime(indexPos['DateOut'])

    if len(indexPos) > 0:
        # 调整指数数据格式为每天持仓
        # 先repmat全集出来,再根据时间筛选符合要求的
        repNum = indexPos.shape[0]
        tradingDays = get_trading_days(dateFrom, dateTo)
        tradingDays['Date'] = tradingDays.index
        repDaysNum = tradingDays.shape[0]
        tradingDays = pd.concat([tradingDays] * repNum, ignore_index=True)
        tradingDays.sort_values('Date', inplace=True)
        indexPosRep = pd.concat([indexPos] * repDaysNum, ignore_index=True)

        tradingDays = tradingDays.reset_index(drop=True)
        indexPosRep = indexPosRep.reset_index(drop=True)
        assert all(tradingDays.index == indexPosRep.index)
        indexPosFull = pd.concat([tradingDays, indexPosRep], axis=1)

        indexPosFull['Valid'] = (indexPosFull['Date'] >= indexPosFull['DateIn']) & \
                                ((indexPosFull['Date'] < indexPosFull['DateOut']) |
                                 (indexPosFull['DateOut'].isnull())) # 当天进当天出
        indexPos = indexPosFull[indexPosFull['Valid']]
        indexPos.reset_index(
            drop=True,
            inplace=True)  # 数据筛选后必须reset index 不然index还是原来indexPosFull的
        # # check NUM
        # count = indexPos.groupby(by = ['Date'])['Stock_Code'].count()
        # assert all((count <= 300) & (count >= 295))

        indexPos = indexPos[['Index_Code', 'Date', 'Stock_Code']]

    else:
        indexPos = pd.DataFrame([],
                                columns=['Index_Code', 'Date', 'Stock_Code'])

    indexPos.set_index(['Date', 'Stock_Code'], inplace=True)
    # 这个地方index怎么设置是个问题,理论上index应该是唯一的

    return (indexPos)
def basic_info_SW():

    # 61开头是申万,levelnum=2表示一级

    #     exeStr = 'select INDUSTRIESCODE, INDUSTRIESNAME, INDUSTRIESALIAS from dbo.ASHAREINDUSTRIESCODE where  LEVELNUM = 2 and INDUSTRIESCODE like \'61%\' and USED = 1'
    exeStr = 'select INDUSTRIESCODE, INDUSTRIESNAME, INDUSTRIESALIAS, USED from dbo.ASHAREINDUSTRIESCODE where LEVELNUM = 2 and INDUSTRIESCODE like \'61%\''
    codePara = get_data_sql(exeStr, 'wind')
    codePara.columns = ['SW_Code', 'SW_Name', 'Wind_Code', 'Used_Label']
    # codePara['Wind_Code_New'] = pd.Series(codePara['Wind_Code'] + '.SI')
    codePara.sort_values(['Used_Label', 'Wind_Code'], inplace=True)
    codePara.reset_index(drop=True, inplace=True)
    sectorName = [np.nan] * 6 + [
        '消费', '周期上游', '周期中游', '周期中游', '周期上游', '成长', '消费', '消费', '消费', '消费',
        '消费', '周期中游', '周期中游', '大金融', '消费', '消费', '其他', '周期下游', '周期下游', '成长',
        '其他', '成长', '成长', '成长', '大金融', '大金融', '周期下游', '周期下游'
    ]
    codePara['Sector_Name'] = sectorName

    return (codePara)
示例#6
0
def attach_stk_label(inputTable):
    assert all([x in inputTable.columns for x in ['Fund_Code', 'Bchmk']])
    bchmkList = [
        x if pd.isnull(x) else x.split('+') for x in inputTable['Bchmk']
    ]

    bchmkDict = []
    for iRow in range(len(bchmkList)):
        bchmkStringI = bchmkList[iRow]
        obj = {}
        if bchmkStringI is not None:
            for item in bchmkStringI:
                if len(re.findall(r'^([^*%]*)\*?(?:([1-9]\d?)%)?$', item)) > 0:
                    x, y = re.findall(r'^([^*%]*)\*?(?:([1-9]\d?)%)?$',
                                      item)[0]
                    obj[x] = y
        bchmkDict.append(obj)

    # 读取指数标签参数表:
    sqlStr = 'select Bchmk_Name, If_STK from paraBchmkType'
    indexLabel = get_data_sql(sqlStr, 'lhtz')
    indexLabel.columns = ['Bchmk_Name', 'If_STK']
    indexLabel = indexLabel.set_index('Bchmk_Name')['If_STK'].to_dict()

    assert len(inputTable) == len(bchmkDict)
    stkWeight = []
    newIndexName = []
    sqlTable = pd.DataFrame()
    for iCode in range(len(bchmkDict)):
        bchmkI = bchmkDict[iCode]
        stkLabelI = [indexLabel.get(key) for key in bchmkI.keys()]
        weightI = [0.0 if w == '' else float(w) for w in bchmkI.values()]
        if None not in stkLabelI:
            stkWeightI = sum(np.array(stkLabelI) * np.array(weightI))
        else:
            # 把None先粗略补齐,再把新指数名称保存下来,便于print到日志检查
            newIndexName = np.array(list(
                bchmkI.keys()))[[x is None for x in stkLabelI]]
            addLabel = []
            for i in range(len(newIndexName)):
                newIndexNameI = newIndexName[i]
                addLabelI = not ('债' in newIndexNameI or '存款' in newIndexNameI
                                 or '年化' in newIndexNameI)
                addLabel.append(addLabelI)
                # write to sql
                sqlTableI = pd.DataFrame(
                    np.array([
                        newIndexName, addLabel,
                        [
                            datetime.datetime.strftime(
                                datetime.datetime.today(), '%Y-%m-%d')
                        ] * len(addLabel)
                    ]).transpose(),
                    columns=['Bchmk_Name', 'If_STK', 'Update_Time'])
                sqlTable = pd.concat([sqlTable, sqlTableI],
                                     axis=0,
                                     ignore_index=True)

            addIndex = np.where([x is None for x in stkLabelI])[0]
            newIndex = list(range(len(addLabel)))
            for i, j in zip(addIndex, newIndex):
                stkLabelI[i] = addLabel[j]
            stkWeightI = sum(np.array(stkLabelI) * np.array(weightI))

        stkWeight.append(stkWeightI)
    sqlTable.drop_duplicates(inplace=True)
    if len(sqlTable) > 0:
        engine = create_engine(
            "mssql+pymssql://lhtz:[email protected]:1433/lhtzdb")
        conn = engine.connect()
        sqlTable.to_sql('paraBchmkType', conn, index=False, if_exists='append')
        conn.close()
        print('新添加Bchmk_Name:{0} ...'.format(sqlTable))
    else:
        print('没有需要新添加参数的Bchmk_Name...')

    res = inputTable.copy()
    res.loc[:, 'Stk_Weight'] = stkWeight
    res = res[res['Stk_Weight'] >= 50]

    return res, newIndexName
def get_trading_days2(dateFrom, dateTo, dateType):

    assert isinstance(dateFrom, str), 'dateFrom should be str'
    assert isinstance(dateTo, str), 'dateTo should  be str'

    # 因为月末涉及最后一天标签问题,所以往前往后多读一些
    dateFrom1 = pd.to_datetime(dateFrom) - pd.Timedelta(365, unit='D')
    dateTo1 = pd.to_datetime(dateTo) + pd.Timedelta(365, unit='D')
    dateFrom1 = datetime.strftime(dateFrom1, '%Y%m%d')
    dateTo1 = datetime.strftime(dateTo1, '%Y%m%d')

    dateType = dateType.lower()
    if dateType not in ('"week", "month", "quarter", "halfyear", "year"'):
        raise ValueError('wrong marktype, must be('
                         '"week", "month", "quarter", "halfyear", "year")')

    exeStr = 'select TRADE_DAYS from dbo.ASHARECALENDAR where ' \
             'S_INFO_EXCHMARKET = \'SSE\' and TRADE_DAYS >= %s and TRADE_DAYS <= %s' % (dateFrom1, dateTo1)
    tradingDays = get_data_sql(exeStr, 'wind')
    tradingDays.columns = ['Date']
    tradingDays['Date'] = pd.to_datetime(tradingDays['Date'])
    tradingDays.sort_values('Date', inplace=True)
    tradingDays.reset_index(drop=True, inplace=True)

    # @2020.04.02貌似可以用x.is_year_end, x.is_quarter_end, x.is_month_end解决3个
    # 添加周末、月末、季末、年末标签
    # Week_Label需要做一个特殊处理,如果是1月份的Week标签是52,则年份需-1,12月份的Week标签是1,则年份需+1
    tradingDays['Year_Num'] = [x.year for x in tradingDays['Date']]
    tradingDays['Quarter_Num'] = [x.quarter for x in tradingDays['Date']]
    tradingDays['Month_Num'] = [x.month for x in tradingDays['Date']]
    tradingDays['Week_Num'] = [x.week for x in tradingDays['Date']]
    tradingDays.loc[tradingDays['Quarter_Num'] <= 2, 'Halfyear_Num'] = 1
    tradingDays.loc[tradingDays['Quarter_Num'] >= 3, 'Halfyear_Num'] = 2
    tradingDays['Halfyear_Num'] = tradingDays['Halfyear_Num'].astype(int)

    # 这里不用for 循环应该怎么调整:
    for i in range(len(tradingDays)):
        y = tradingDays['Year_Num'][i]
        m = tradingDays['Month_Num'][i]
        w = tradingDays['Week_Num'][i]
        if m == 1 and w == 52:
            tradingDays.loc[i, 'Year_Num'] = y - 1
        elif m == 12 and w == 1:
            tradingDays.loc[
                i,
                'Year_Num'] = y + 1  # 这里直接调整Year_Num没关系,因为后面Year_Label的时候直接用Date.year,Year_Num后来没用了
    del y, m, w

    weekLabel = tradingDays['Year_Num'] * 100 + tradingDays['Week_Num']
    tradingDays['Week_Label'] = weekLabel
    tradingDays['Month_Label'] = [
        x.year * 100 + x.month for x in tradingDays['Date']
    ]
    tradingDays['Quarter_Label'] = [
        x.year * 100 + x.quarter for x in tradingDays['Date']
    ]
    tradingDays['Year_Label'] = [x.year for x in tradingDays['Date']]
    tradingDays['Halfyear_Label'] = [
        x.year * 100 for x in tradingDays['Date']
    ] + tradingDays['Halfyear_Num']

    weekEnd = tradingDays.groupby('Week_Label')['Date'].last()
    monthEnd = tradingDays.groupby('Month_Label')['Date'].last()
    quarterEnd = tradingDays.groupby('Quarter_Label')['Date'].last()
    halfyearEnd = tradingDays.groupby('Halfyear_Label')['Date'].last()
    yearEnd = tradingDays.groupby('Year_Label')['Date'].last()
    tradingDays.loc[:, 'If_Week_End'] = [
        x in weekEnd.values for x in tradingDays['Date'].values
    ]
    tradingDays.loc[:, 'If_Month_End'] = [
        x in monthEnd.values for x in tradingDays['Date'].values
    ]
    tradingDays.loc[:, 'If_Quarter_End'] = [
        x in quarterEnd.values for x in tradingDays['Date'].values
    ]
    tradingDays.loc[:, 'If_Halfyear_End'] = [
        x in halfyearEnd.values for x in tradingDays['Date'].values
    ]
    tradingDays.loc[:, 'If_Year_End'] = [
        x in yearEnd.values for x in tradingDays['Date'].values
    ]

    # 筛选需要的标签
    def selectData(i):
        switcher = {
            'week': tradingDays[['Date', 'Week_Label', 'If_Week_End']],
            'month': tradingDays[['Date', 'Month_Label', 'If_Month_End']],
            'quarter': tradingDays[['Date', 'Quarter_Label',
                                    'If_Quarter_End']],
            'halfyear':
            tradingDays[['Date', 'Halfyear_Label', 'If_Halfyear_End']],
            'year': tradingDays[['Date', 'Year_Label', 'If_Year_End']]
        }
        return switcher.get(dateType, "Invalid day of week")

    res = selectData(dateType)
    res = res[(res['Date'] >= pd.to_datetime(dateFrom))
              & (res['Date'] <= pd.to_datetime(dateTo))]
    res.set_index('Date', inplace=True)

    return res
示例#8
0
def get_new_fund_amount(dateFrom, dateTo, ifFlexible):
    # 因为要求MA,所以时间往前错一点
    dateFromTemp = datetime.datetime.strftime(
        pd.to_datetime(dateFrom) - datetime.timedelta(50), '%Y%m%d')
    dateSeq = get_trading_days(dateFromTemp, dateTo)
    dateSeq = attach_date_label(pd.DataFrame(dateSeq), 'month')
    dateSeq = attach_date_label(dateSeq, 'week')
    weekSeq = dateSeq[dateSeq['If_Week_End']]
    # 这是全部的时间,后面把新发基金的数据merge到这个时间上
    # 这个周度和月度是自然周和自然月,只在计算MA的分位数时候作为基准用
    monthSeq = dateSeq[dateSeq['If_Month_End']]

    sqlStr = 'select a.F_INFO_WINDCODE, b.S_INFO_SECTOR, F_INFO_SETUPDATE, ' \
             'a.F_ISSUE_TOTALUNIT * a.F_INFO_PARVALUE as Collection, ' \
             'F_INFO_BENCHMARK  from ChinaMutualFundDescription a ' \
             'left join ChinaMutualFundSector b ' \
             'on a.F_INFO_WINDCODE = b.F_INFO_WINDCODE ' \
             'where b.S_INFO_SECTOR in ' \
             '(\'2001010201000000\', \'2001010101000000\', \'2001010204000000\')'

    stkFund = get_data_sql(sqlStr, 'wind')
    stkFund.columns = [
        'Fund_Code', 'Info_Sector', 'Setup_Date', 'Collection', 'Bchmk'
    ]
    stkFund['Setup_Date'] = pd.to_datetime(stkFund['Setup_Date'])
    stkFund['Collection'] = stkFund['Collection'].astype(float)

    stkFundStk = stkFund[stkFund['Info_Sector'] != '2001010204000000']
    stkFundMix = stkFund[stkFund['Info_Sector'] == '2001010204000000']
    if ifFlexible:
        stkFundMix = attach_stk_label(stkFundMix)[0]
        stkFundMix.drop('Stk_Weight', axis=1, inplace=True)
        stkFund = pd.concat([stkFundStk, stkFundMix],
                            axis=0,
                            ignore_index=True)
        del stkFundStk, stkFundMix
    else:
        stkFund = stkFundStk.copy()
        del stkFundStk

    stkFund.sort_values('Setup_Date', inplace=True)
    stkFund['Date'] = stkFund['Setup_Date']
    stkFund.set_index('Date', inplace=True)
    # 07年之前的有些基金setup date是非交易日,不影响最近3年可不处理
    stkFundWeek = attach_date_label(stkFund, 'week')
    stkFundMonth = attach_date_label(stkFund, 'month')
    weekCount = stkFundWeek.groupby('Week_Label')['Collection'].sum()
    monthCount = stkFundMonth.groupby('Month_Label')['Collection'].sum()

    weekCount = weekSeq.merge(
        pd.DataFrame(weekCount), how='left', on='Week_Label').set_index(
            weekSeq.index)  # 做这一步merge因为有的周没有数据,应该取0处理,月度也需要(虽然月度还没有0的情况)
    # 这里merge完了Week_Label不知怎么就变成object了,需要调整一下,不然存hdf会报警告
    # weekCount['Week_Label'] = weekCount['Week_Label'].astype(int)
    weekCount = weekCount[['Week_Label', 'Collection']]
    monthCount = monthSeq.merge(pd.DataFrame(monthCount),
                                how='left',
                                on='Month_Label').set_index(monthSeq.index)
    # monthCount['Month_Label'] = monthCount['Month_Label'].astype(int)
    monthCount = monthCount[['Month_Label', 'Collection']]
    weekCount['Collection'] = weekCount['Collection'].fillna(0)
    monthCount['Collection'] = monthCount['Collection'].fillna(0)

    # 筛选出过去3年的部分
    dateFrom3Year = datetime.datetime.strftime(
        pd.to_datetime(dateTo) - datetime.timedelta(365 * 3), '%Y%m%d')
    weekCount = weekCount[weekCount.index >= pd.to_datetime(dateFrom3Year)]
    monthCount = monthCount[monthCount.index >= pd.to_datetime(
        dateFrom3Year)]  # 求MA所处分位数的基准序列 画图(除最后一跟MA柱以外的部分)
    # 下面求5天MA和20天MA
    stkFundSum = stkFund.groupby('Date')['Collection'].sum()
    stkFundSum = dateSeq.merge(stkFundSum, how='left', on='Date')
    stkFundSum = pd.DataFrame(stkFundSum['Collection'])
    stkFundSum['Collection'] = stkFundSum['Collection'].fillna(0)

    stkFundSum['MA_5'] = stkFundSum['Collection'].rolling(window=5,
                                                          min_periods=5).sum()
    stkFundSum['MA_20'] = stkFundSum['Collection'].rolling(
        window=20, min_periods=20).sum()

    # 用来画柱状图的序列
    if stkFundSum.index[-1] > weekCount.index[-1]:
        weekCount = pd.concat([
            weekCount[['Collection']],
            pd.DataFrame(stkFundSum.iloc[len(stkFundSum) - 1, :]).transpose()[[
                'MA_5'
            ]].rename(columns={'MA_5': 'Collection'})
        ],
                              axis=0)
    else:
        weekCount = weekCount[['Collection']]
    if stkFundSum.index[-1] > monthCount.index[-1]:
        monthCount = pd.concat([
            monthCount[['Collection']],
            pd.DataFrame(stkFundSum.iloc[len(stkFundSum) - 1, :]).transpose()[[
                'MA_20'
            ]].rename(columns={'MA_20': 'Collection'})
        ],
                               axis=0)
    else:
        monthCount = monthCount[['Collection']]

    # 输出4个数值:过去一周和一月新发规模数据(两个数值)和对应在过去三年上的历史分位数(两个数值)
    res1 = stkFundSum['MA_5'][-1]
    res2 = stkFundSum['MA_20'][-1]
    res3 = weekCount['Collection'].rank(pct=True)[-1]
    res4 = monthCount['Collection'].rank(pct=True)[-1]
    fundRes = pd.DataFrame(
        {
            '过去一周新发规模': '{:.1f}亿'.format(res1),
            '过去一月新发规模': '{:.1f}亿'.format(res2),
            '过去一周新发规模分位数': '{:.1%}'.format(res3),
            '过去一月新发规模分位数': '{:.1%}'.format(res4)
        },
        index=['value'])
    return [fundRes, weekCount, monthCount]
示例#9
0
def get_fund_pos(dateFrom, dateTo, ifFlexible, bchmkCode):
    fundUniv = get_stock_fund_univ(dateFrom, dateTo,
                                   ifFlexible)  # 普通股票型+混合偏股型基金全集
    # 构造年中和年末日期序列
    dateSeqHY = get_trading_days2(dateFrom, dateTo, 'halfyear')
    dateSeqHY = dateSeqHY[dateSeqHY['If_Halfyear_End']]
    # 筛选年中和年末的基金universe  fundUnivHalfYear
    fundUnivHY = dateSeqHY.merge(fundUniv,
                                 how='left',
                                 on='Date',
                                 validate='one_to_many')
    fundUnivHY.drop(['Halfyear_Label', 'If_Halfyear_End'],
                    axis=1,
                    inplace=True)

    # 季度日期序列
    dateSeqQ = get_trading_days2(dateFrom, dateTo, 'quarter')
    dateSeqQ = dateSeqQ[dateSeqQ['If_Quarter_End']]
    fundUnivQ = dateSeqQ.merge(fundUniv,
                               how='left',
                               on='Date',
                               validate='one_to_many')
    fundUnivQ.drop(['Quarter_Label', 'If_Quarter_End'], axis=1, inplace=True)

    # 获取基金的仓位数据,join到univ上求均值即可
    sqlStr = 'select S_INFO_WINDCODE, F_PRT_ENDDATE, F_PRT_STOCKTONAV, F_PRT_NETASSET, F_PRT_STOCKVALUE from ' \
             'ChinaMutualFundAssetPortfolio where F_PRT_ENDDATE >= {0} and F_PRT_ENDDATE <= {1}' \
             'order by S_INFO_WINDCODE, F_PRT_ENDDATE'.format(dateFrom, dateTo)
    fundPosition = get_data_sql(sqlStr, 'wind')
    fundPosition.columns = [
        'Fund_Code', 'Date', 'Stock_Pos', 'Fund_Value', 'Stk_Value'
    ]
    fundPosition['Date'] = pd.to_datetime(fundPosition['Date'])
    fundPosition['Stock_Pos'] = fundPosition['Stock_Pos'].astype(float)

    # Note: fundPosition的日期是报告日期,都是0630这种的,遇到0630不是交易日的就匹配不上 pd.merge_asof只能根据一个key,需要先split,比较麻烦
    # 按月份匹配,如果有多于1条的数据,则取日期最新的
    fundPosition['Month_Label'] = [
        x.year * 100 + x.month for x in fundPosition['Date']
    ]
    fundUnivQ['Month_Label'] = [
        x.year * 100 + x.month for x in fundUnivQ['Date']
    ]
    fundPosition.drop_duplicates(['Fund_Code', 'Month_Label'],
                                 keep='last',
                                 inplace=True)
    fundPosUnivQ = fundUnivQ.merge(fundPosition,
                                   how='left',
                                   on=['Month_Label', 'Fund_Code'],
                                   validate='one_to_one')
    #  去掉仓位低于40%的,这种一般是在建仓期
    fundPosUnivQ = fundPosUnivQ[(fundPosUnivQ['Stock_Pos'] >= 40) |
                                (np.isnan(fundPosUnivQ['Stock_Pos']))].copy()
    fundPosUnivQ.sort_values(['Date_x', 'Fund_Code'], inplace=True)
    fundPosTotalQ = fundPosUnivQ.groupby('Date_x')['Stock_Pos'].mean()
    fundPosTotalQ.index.names = ['Date']  # 这个总数据没有日期问题,只要有数据的一定是全的,不会只是前10名仓位
    # 这个fundPosTotal就是所有偏股型基金总仓位的均值,是季度的,保存下来单独做一个表
    posTableT = fundPosTotalQ.iloc[(len(fundPosTotalQ) -
                                    4):len(fundPosTotalQ)] / 100
    posTableT.index = [
        datetime.datetime.strftime(x, '%Y/%m/%d') for x in posTableT.index
    ]
    posTableT = pd.DataFrame(posTableT).transpose()

    fundPosTotalHY = dateSeqHY.merge(fundPosTotalQ, how='left', on='Date')
    fundPosTotalHY.drop(['Halfyear_Label', 'If_Halfyear_End'],
                        axis=1,
                        inplace=True)
    # 基金持仓的分行业仓位  季报公布的是证监会分类标准,所以这里用底仓自己计算
    # 分行业持仓,最新一期的有的基金公布年报,有的只公布的四季报,这种需要处理,处理时间?????????
    # 基金net asset要直接读,不能用行业仓位反除,四舍五入不准
    # 先筛选出股票型+偏股型基金的底仓,重复的只保留最新
    # 类别code和名称对应表:AShareIndustriesCode,参考用,查询数据不需要这个表
    sqlStr = 'select a.S_INFO_WINDCODE, S_INFO_STOCKWINDCODE, a.F_PRT_ENDDATE, F_PRT_STKVALUE, ' \
             'F_PRT_STKVALUETONAV, b.F_PRT_NETASSET from ' \
             'ChinaMutualFundStockPortfolio a ' \
             'left join ChinaMutualFundAssetPortfolio b ' \
             'on a.S_INFO_WINDCODE = b.S_INFO_WINDCODE and a.F_PRT_ENDDATE = b.F_PRT_ENDDATE ' \
             'where a.S_INFO_WINDCODE in (' \
             'select F_INFO_WINDCODE from ChinaMutualFundSector where S_INFO_SECTOR in ' \
             '(\'2001010201000000\', \'2001010101000000\', \'2001010204000000\')) ' \
             'and month(a.F_PRT_ENDDATE) in (6, 12) and ' \
             'a.F_PRT_ENDDATE >= {0} and a.F_PRT_ENDDATE <= {1}'.format(dateFrom, dateTo)
    # 大概筛选一下就可以,比不筛选稍快一点,也不用筛太精确,因为后面还会join到fundUniv上
    # 直接用一个sql语句不好写,因为entrydate exitdate一个基金对应多条数据,leftjoin 都给弄上了不好汇总
    # 这里面也有同一个报告期出现两次的,数据不一样,以最新日期为准,例如160512.SZ在20110623和20110630的两次只取20110630
    # 这个地方sql省不了,如果不写S_INFO_WINDCODE in (select F_INFO_WINDCODE from ChinaMutualFundSector where ' \
    #                  'S_INFO_SECTOR in (\'2001010201000000\', \'2001010101000000\', \'2001010204000000\'))只join s_info_sector的话会导致有重复数据
    # 这里选出来的数据,可能包含了一些中间符合标准,后期退出了的基金,不用担心,因为最后会再join到fundUniv上面
    fundPosDetail = get_data_sql(sqlStr, 'wind')
    fundPosDetail.columns = [
        'Fund_Code', 'Stock_Code', 'Date', 'Stk_Values', 'Stk_Pct',
        'Fund_Value'
    ]
    fundPosDetail['Date'] = pd.to_datetime(fundPosDetail['Date'])
    fundPosDetail[['Stk_Values', 'Stk_Pct', 'Fund_Value'
                   ]] = fundPosDetail[['Stk_Values', 'Stk_Pct',
                                       'Fund_Value']].astype(float)
    # 摘出半年度的时间clear_version (不能直接fundPosDetail.drop_duplicates因为本来就有很多duplicates必须摘出时间序列再剔除)
    halfYearDate = pd.DataFrame(fundPosDetail['Date'].unique(),
                                columns=['Date'])
    halfYearDate['Month_Label'] = [
        x.year * 100 + x.month for x in halfYearDate['Date']
    ]
    halfYearDate.sort_values(['Date'], inplace=True)
    halfYearDate.drop_duplicates(['Month_Label'], keep='last', inplace=True)
    # tmp = fundPosDetail[[x in set(halfYearDate['Date']) for x in fundPosDetail['Date']]].copy() # 奇怪这个地方如果不加set就都是FALSE,而且这种筛选方式太慢
    # https://stackoverflow.com/questions/48553824/checking-if-a-date-falls-in-a-dataframe-pandas
    halfYearDate['Valid'] = True
    fundPosDetail = fundPosDetail.merge(halfYearDate[['Date', 'Valid']],
                                        on='Date',
                                        how='left')
    fundPosDetail.loc[pd.isnull(fundPosDetail['Valid']), 'Valid'] = False
    fundPosDetail = fundPosDetail[fundPosDetail['Valid']].copy()
    fundPosDetail.reset_index(drop=True, inplace=True)
    fundPosDetail['Month_Label'] = [
        x.year * 100 + x.month for x in fundPosDetail['Date']
    ]
    fundUnivHY['Month_Label'] = [
        x.year * 100 + x.month for x in fundUnivHY['Date']
    ]
    fundPosDetail = fundUnivHY.merge(
        fundPosDetail,
        how='left',
        on=['Month_Label', 'Fund_Code'],
        validate='one_to_many')  # Date_x是交易日,Date_y是报告日
    fundPosDetail = fundPosDetail[[
        'Date_x', 'Fund_Code', 'Stock_Code', 'Stk_Values', 'Fund_Value'
    ]]
    fundPosDetail.columns = [
        'Date', 'Fund_Code', 'Stock_Code', 'Stk_Values', 'Fund_Value'
    ]

    #  贴行业标签计算行业权重
    swComponent = get_component_SW(dateFrom, dateTo)
    fundPosDetail = fundPosDetail.merge(swComponent,
                                        how='left',
                                        on=['Date', 'Stock_Code'])
    uniqueNav = fundPosDetail.drop_duplicates(['Date', 'Fund_Code'],
                                              keep='first')
    totalNav = pd.DataFrame(
        uniqueNav.groupby('Date')['Fund_Value'].sum())  # 每天的基金总市值
    fundPosSector = pd.DataFrame(
        fundPosDetail.groupby(['Date',
                               'SW_Code'])['Stk_Values'].sum())  # 每天分行业的总市值
    fundPosSector['SW_Code'] = fundPosSector.index.get_level_values('SW_Code')
    fundPosSector = fundPosSector.merge(totalNav, how='left', on='Date')
    fundPosSector['SW_Pct'] = fundPosSector['Stk_Values'] / \
        fundPosSector['Fund_Value']
    # 行业标签添加中文名称
    swPara = basic_info_SW()
    swPara['SW_Code'] = swPara['Wind_Code'] + '.SI'
    fundPosSector = fundPosSector.merge(swPara[['SW_Code', 'SW_Name']],
                                        how='left',
                                        on='SW_Code').set_index(
                                            fundPosSector.index)

    # @2020.04.17 添加Bchmk权重
    sqlStr = 'select S_CON_WINDCODE, TRADE_DT, I_WEIGHT from ' \
             'AIndexHS300FreeWeight where S_INFO_WINDCODE = \'{0}\' and TRADE_DT >= {1} and ' \
             'TRADE_DT <= {2}'.format(bchmkCode,
                                      datetime.datetime.strftime(fundPosSector.index.min(), '%Y%m%d'),
                                      datetime.datetime.strftime(fundPosSector.index.max(), '%Y%m%d'))
    bchmkW = get_data_sql(sqlStr, 'wind')
    bchmkW.columns = ['Stock_Code', 'Date', 'Bchmk_W']
    bchmkW['Date'] = pd.to_datetime(bchmkW['Date'])
    bchmkW['Bchmk_W'] = bchmkW['Bchmk_W'].astype(float)
    bchmkW = bchmkW.merge(swComponent, how='left', on=['Date', 'Stock_Code'])
    bchmkW = pd.DataFrame(
        bchmkW.groupby(['Date', 'SW_Code'])['Bchmk_W'].sum() / 100)

    fundPosSector = fundPosSector.merge(bchmkW,
                                        how='left',
                                        on=['Date', 'SW_Code'])
    fundPosSector['Bchmk_W'] = fundPosSector['Bchmk_W'].fillna(0)

    # 怎么判断当前的最新一期是否已经更新完数据,通过时间无法判断,只能通过结果反推,如果底仓汇总的结果和总数差异过大(>3%),就说明还没更新完
    # 汇总的时候不要用行业汇总,直接用底仓汇总(事实上就算<3%了,一段时间应该也会继续更新缩小,diff最终应该小于1%)
    diff = pd.Series(fundPosDetail.groupby(['Date'])['Stk_Values'].sum() / totalNav['Fund_Value']) * 100 -\
           fundPosTotalHY['Stock_Pos']
    if abs(diff[-1]) > 3:
        fundPosSector = fundPosSector[
            fundPosSector.index <= fundPosSector.index.unique()[-2]]
        fundPosTotal = fundPosTotalHY[
            fundPosTotalHY.index <= fundPosTotalHY.index.unique()[-2]]
    assert all(fundPosSector.index.unique() == fundPosTotal.index.unique())
    # 总仓位和各行业配置比例变动,输出表格结果
    currPeriod = fundPosSector.index.unique()[-1]
    lastPeriod = fundPosSector.index.unique()[-2]
    sectorPctTable = fundPosSector.pivot_table(index='SW_Name',
                                               columns=fundPosSector.index,
                                               values='SW_Pct',
                                               aggfunc='first')
    bchmkPctTable = fundPosSector.pivot_table(index='SW_Name',
                                              columns=fundPosSector.index,
                                              values='Bchmk_W',
                                              aggfunc='first')
    assert all(sectorPctTable.index == bchmkPctTable.index) and all(
        sectorPctTable.columns == bchmkPctTable.columns)
    excessPctTable = sectorPctTable - bchmkPctTable
    # 本期 、本期相对上期变化,最终只显示当前还在用的行业
    sectorPctTable = sectorPctTable.merge(
        swPara.loc[swPara['Used_Label'] == 1, ['SW_Name', 'Used_Label']],
        how='left',
        on='SW_Name')
    sectorPctTable.sort_values(['Used_Label', 'SW_Name'],
                               ascending=False,
                               inplace=True)  # 把已经不用的行业放在最下面
    sectorPctTable.reset_index(drop=True, inplace=True)  # 历史仓位变化图
    excessPctTable = excessPctTable.reindex(sectorPctTable['SW_Name'])

    sectorPctTableCurr = sectorPctTable[[
        'SW_Name', lastPeriod, currPeriod, 'Used_Label'
    ]].copy()
    bchmkPctTableCurr = excessPctTable[[lastPeriod, currPeriod]].copy()
    bchmkPctTableCurr.columns = ['上期超配', '本期超配']
    sectorPctTableCurr.rename(columns={
        lastPeriod: '上期仓位',
        currPeriod: '本期仓位'
    },
                              inplace=True)
    bchmkPctTableCurr.reset_index(drop=True, inplace=True)
    sectorPctTableCurr = pd.concat([sectorPctTableCurr, bchmkPctTableCurr],
                                   axis=1)
    sectorPctTableCurr = sectorPctTableCurr[sectorPctTableCurr['Used_Label'] ==
                                            1]
    del sectorPctTableCurr['Used_Label']
    # 本期和上期仓位变化这个表就不显示已经废弃的行业了
    # 总仓位就单独看,不和行业合到一个表了
    posTable = sectorPctTableCurr.copy()
    posTable['Delta'] = posTable['本期仓位'] - posTable['上期仓位']
    posTable['Delta_B'] = posTable['本期超配'] - posTable['上期超配']

    del sectorPctTable['Used_Label']
    return [posTableT, posTable, sectorPctTable]
示例#10
0
from public.getStockFundUniv import get_stock_fund_univ
from public.attachSTKLabel import attach_stk_label
from public.getSWComponent import get_component_SW
from public.basicInfoSW import basic_info_SW
from public.attachDateLabel import attach_date_label
import datetime

############################################ 融资融券数据 ######################
# 目前只需要两融余额总量、融资买入额这两个数据
dateFrom = '20100331'
dateTo = datetime.datetime.strftime(datetime.datetime.today(), '%Y%m%d')
sqlStr = 'select TRADE_DT, S_MARSUM_EXCHMARKET, S_MARSUM_TRADINGBALANCE, S_MARSUM_SECLENDINGBALANCE, ' \
         'S_MARSUM_MARGINTRADEBALANCE, S_MARSUM_PURCHWITHBORROWMONEY from ' \
         'AShareMarginTradeSum where TRADE_DT >= {0} and TRADE_DT <= {1}  ' \
         'order by TRADE_DT, S_MARSUM_EXCHMARKET'.format(dateFrom, dateTo)
marginTD = get_data_sql(sqlStr, 'wind')  # margin trading data
marginTD.columns = [
    'Date', 'Mkt', 'Buy_Balance', 'Sell_Balance', 'Margin_TD_Balance', 'Buy'
]  # Buy代表融资余额,Sell代表融券余额, Margin_TD_Balance是两融余额
marginTD['Date'] = pd.to_datetime(marginTD['Date'])
marginTD[['Buy_Balance', 'Sell_Balance', 'Margin_TD_Balance',
          'Buy']] = marginTD[[
              'Buy_Balance', 'Sell_Balance', 'Margin_TD_Balance', 'Buy'
          ]].apply(lambda x: x.astype(float))
marginTD.sort_values(['Mkt', 'Date'], inplace=True)
marginTD['Buy_MA'] = marginTD.groupby('Mkt')['Buy'].rolling(
    window=10, min_periods=10).mean().values
marginTD.to_hdf('dataForPlot/marginTDData.hdf',
                key='marginTD',
                type='w',
                format='table')