Пример #1
0
    def prepare_data(self, begin_date, end_date):
        shifted_begin_date = shift_date(begin_date, 500)  # 取到2年之前的数据
        # Invested Capital = 资产总计121 - 流动负债101+ 应付票据68 + 短期借款109 + 一年内到期的长期负债0
        bs = cp.concat_fund(self.data_source, self.tickers, 'BS').loc[shifted_begin_date:end_date,['ticker', 121, 101, 68, 109, 0]]
        bs['IC'] = bs[121] - bs[101] + bs[68] + bs[109] + bs[0]
        bs = bs.drop([121, 101, 68, 109, 0], axis=1)
        self.bs = bs.dropna()

        # EBT = 归母净利润40 + 财务费用56
        inst = cp.concat_fund(self.data_source, self.tickers, 'IS').loc[shifted_begin_date:end_date,['ticker', 40, 56]]
        inst = inst[(inst[56] > 1) | (inst[56] < -1)].copy()
        inst['return'] = inst[40] + inst[56]
        inst = inst.drop([40, 56], axis=1)
        inst.dropna(inplace=True)

        inst['release_date'] = inst.index
        inst['report_date'] = inst.index

        returnTTM_ls = []
        for ticker in inst['ticker'].unique():
            try:  # 财务数据不足4条会有异常
                return_df = ttmContinues(inst[inst['ticker'] == ticker], 'return')
                return_df['ticker'] = ticker
            except:
                # print(ticker + ': revenue error')
                continue
            returnTTM_ls.append(return_df)

        self.inst = pd.concat(returnTTM_ls)
        self.inst.set_index('datetime', inplace=True)
Пример #2
0
    def prepare_data(self, begin_date, end_date):
        shifted_begin_date = shift_date(begin_date, 800)
        bs = cp.concat_fund(self.data_source, self.tickers,
                            'BS').loc[shifted_begin_date:end_date,
                                      ['ticker', 86]]
        bs['release_date'] = bs.index
        bs['report_date'] = bs.index
        bs['motherEquity'] = bs[86]

        # 归母权益
        equity_mean = []
        for ticker in bs['ticker'].unique():
            try:
                tmp_equity = ttmDiscrete(bs[bs['ticker'] == ticker],
                                         'motherEquity', 5)
                tmp_equity['ticker'] = ticker
            except:
                continue
            equity_mean.append(tmp_equity)

        equity_mean = pd.concat(equity_mean)

        inst = cp.concat_fund(self.data_source, self.tickers,
                              'IS').loc[shifted_begin_date:end_date,
                                        ['ticker', 40]]
        inst['release_date'] = inst.index
        inst['report_date'] = inst.index
        inst['motherNetProfit'] = inst[40]

        # 归母净利润
        net_profit = []
        for ticker in inst['ticker'].unique():
            try:
                tmp_profit = ttmContinues(inst[inst['ticker'] == ticker],
                                          'motherNetProfit')
                tmp_profit['ticker'] = ticker
            except:
                continue
            net_profit.append(tmp_profit)

        net_profit = pd.concat(net_profit)

        # 时间排序处理
        equity_mean['report_date'] = equity_mean['report_date'].apply(
            lambda x: x.strftime("%Y-%m-%d"))
        net_profit['report_date'] = net_profit['report_date'].apply(
            lambda x: x.strftime("%Y-%m-%d"))

        self.equity_mean = equity_mean.sort_values(
            by=['report_date', 'datetime'], ascending=[False, False])
        self.net_profit = net_profit.sort_values(
            by=['report_date', 'datetime'], ascending=[False, False])
Пример #3
0
 def prepare_data(self, begin_date, end_date):
     """
     制作因子的数据准备
     :param begin_date: 
     :param end_date: 
     :return: 
     """
     shifted_begin_date = shift_date(begin_date,
                                     self.factor_param['lagTradeDays'])
     hq = cp.concat_stock(self.data_source,
                          self.tickers).loc[shifted_begin_date:end_date,
                                            ['code', 'close']]
     self.hq = cp.hconcat_stock_series(hq, self.tickers)
Пример #4
0
    def prepare_data(self, begin_date, end_date):
        """
        数据预处理
        """

        #  净资产周转率 = 营业收入_TTM / 净资产总计_TTM
        #  净资产总计=总资产-负债总额
        #  营业收入_TTM为最近4个季度报告期的营业收入之和,
        #  净资产总计_TTM为最近5个季度报告期总资产的平均值。
        #  Net asset turnover ratio = netAssets / totalLiabilities

        #  获取财务数据:
        shifted_begin_date = shift_date(begin_date, 500)
        #117负债, 121资产
        netAssets = cp.concat_fund(self.data_source, self.tickers, 'BS').loc[shifted_begin_date:end_date, ['ticker', 117, 121]]
        netAssets['netAssets'] =  netAssets[121] - netAssets[117]
        netAssets.drop([117, 121], axis=1, inplace=True)
        netAssets = netAssets[netAssets['netAssets'] :0]
        netAssets['report_date'] = netAssets.index
        netAssets['release_date'] = netAssets.index

        netAssetsTTM_ls = []
        for ticker in netAssets['ticker'].unique():
            try:
                netAssets_df = ttmDiscrete(netAssets[netAssets['ticker'] == ticker], 'netAssets')
                netAssets_df['ticker'] = ticker
            except:
                # print(ticker + ': net asset error')
                continue
            netAssetsTTM_ls.append(netAssets_df)

        #0营业收入
        revenue = cp.concat_fund(self.data_source, self.tickers, 'IS').loc[shifted_begin_date:end_date, ['ticker', 0]]
        revenue['revenue'] =  revenue[0]
        revenue.drop([0], axis=1, inplace=True)
        revenue['report_date'] = revenue.index
        revenue['release_date'] = revenue.index

        revenueTTM_ls = []
        for ticker in revenue['ticker'].unique():
            try:  # 财务数据不足4条会有异常
                reven_df = ttmContinues(revenue[revenue['ticker'] == ticker], 'revenue')
                reven_df['ticker'] = ticker
            except:
                # print(ticker + ': revenue error')
                continue
            revenueTTM_ls.append(reven_df)

        self.revenueTTM = pd.concat(revenueTTM_ls)
        self.netAssetsTTM = pd.concat(netAssetsTTM_ls)
Пример #5
0
    def prepare_data(self, begin_date, end_date):
        """
        数据预处理
        """
        # 获取财务数据:
        # CATurnover = currentAssets 103 / revenue 0
        shifted_begin_date = shift_date(begin_date, 500)
        bs = cp.concat_fund(self.data_source, self.tickers,
                            'BS').loc[shifted_begin_date:end_date,
                                      ['ticker', 103]]
        bs['release_date'] = bs.index
        bs['report_date'] = bs.index
        bs['currentAssets'] = bs[103]
        bs.drop(103, axis=1, inplace=True)

        inst = cp.concat_fund(self.data_source, self.tickers,
                              'IS').loc[shifted_begin_date:end_date,
                                        ['ticker', 0]]
        inst['release_date'] = inst.index
        inst['report_date'] = inst.index
        inst['revenue'] = inst[0]
        inst.drop([0], axis=1, inplace=True)

        # TTM Continues处理
        revenueTTM_ls = []
        for ticker in inst['ticker'].unique():
            try:  # 财务数据不足4条会有异常
                reven_df = ttmContinues(inst[inst['ticker'] == ticker],
                                        'revenue')
                reven_df['ticker'] = ticker
            except:
                print(ticker + ': revenue error')
                continue
            revenueTTM_ls.append(reven_df)

        # TTM Discrete 取近期平均
        currentAssetsTTM_ls = []
        for ticker in bs['ticker'].unique():
            try:
                currentAssets_df = ttmDiscrete(bs[bs['ticker'] == ticker],
                                               'currentAssets')
                currentAssets_df['ticker'] = ticker
            except:
                print(ticker + ': current asset error')
                continue
            currentAssetsTTM_ls.append(currentAssets_df)

        self.revenueTTM = pd.concat(revenueTTM_ls)
        self.currentAssetsTTM = pd.concat(currentAssetsTTM_ls)
Пример #6
0
    def prepare_data(self, begin_date, end_date):
        """
        数据预处理
        """
        shifted_begin_date = shift_date(begin_date, 500)
        inst = cp.concat_fund(self.data_source, self.tickers, 'IS').loc[shifted_begin_date:end_date,['ticker', 40]]
        inst['motherNetProfit'] = inst[40]
        inst.drop(40, axis=1, inplace=True)
        inst['release_date'] = inst.index
        inst['report_date'] = inst.index

        profitTTM_ls = []
        for ticker in inst['ticker'].unique():
            try:  # 财务数据不足4条会有异常
                reven_df = ttmContinues(inst[inst['ticker'] == ticker], 'motherNetProfit')
                reven_df['ticker'] = ticker
            except:
                continue
            profitTTM_ls.append(reven_df)

        # 净利润ttm
        self.profitTTM = pd.concat(profitTTM_ls)
        # self.profitTTM.set_index('datetime', inplace=True)

        # 总市值
        # Tushare的市值数据只有17年-now
        df = market_value(self.data_source + '\\other\\otherdata.csv', self.tickers)
        self.mkt_value = df.drop(['price', 'totals'], axis=1)
Пример #7
0
    def prepare_data(self, begin_date, end_date):
        """
        数据预处理
        """

        shifted_begin_date = shift_date(begin_date, 500) # 向前取500个交易日

        # 取利润表中“归属于母公司股东的净利润”项目,项目名称及数字详见FundDict
        inst = cp.concat_fund(self.data_source, self.tickers, 'IS').loc[shifted_begin_date:end_date,['ticker', 40]]
        inst['motherNetProfit'] = inst[40]
        inst.drop(40, axis=1, inplace=True)

        # ttm算法需要“财报发布日”与“财报报告日”两个日期作为参数
        inst['release_date'] = inst.index
        inst['report_date'] = inst.index

        # 净利润ttm
        profitTTM_ls = []
        for ticker in inst['ticker'].unique():
            try:  # 财务数据不足4条会有异常
                reven_df = ttmContinues(inst[inst['ticker'] == ticker], 'motherNetProfit')
                reven_df['ticker'] = ticker
            except:
                continue
            profitTTM_ls.append(reven_df)
        self.profitTTM = pd.concat(profitTTM_ls)

        # 取“OtherData”中总市值数据
        # Tushare的市值数据只有17年6月->now
        df = market_value(self.data_source + '\\other\\otherdata.csv', self.tickers)
        self.mkt_value = df.drop(['price', 'totals'], axis=1)
Пример #8
0
 def prepare_data(self, begin_date, end_date):
     shifted_begin_date = shift_date(begin_date, 500)
     bs = cp.concat_fund(self.data_source, self.tickers,
                         'BS').loc[shifted_begin_date:end_date,
                                   ['ticker', 101, 103]]
     bs['CurrentRatio'] = bs[101] / bs[103]
     self.bs = bs.drop([101, 103], axis=1)
Пример #9
0
 def prepare_data(self, begin_date, end_date):
     shifted_begin_date = shift_date(begin_date, 500)
     bs = cp.concat_fund(self.data_source, self.tickers,
                         'BS').loc[shifted_begin_date:end_date,
                                   ['ticker', 101, 52, 139, 88, 103]]
     # 速动资产=流动资产103-存货52=流动资产103-存货52-预付账款139-待摊费用88
     # 流动负债101
     bs['Quick'] = (bs[103] - bs[88] - bs[52] - bs[139]) / bs[101]
     self.balance_sheet = bs.drop([101, 52, 139, 88, 103], axis=1)
Пример #10
0
    def prepare_data(self, begin_date, end_date):
        shifted_begin_date = shift_date(begin_date, 500)
        # EBIT / 利息费用,其中 EBIT=利润总额34+净利息费用
        # 净利息费用=利息支出-利息收入,若未披露财务费用附注,则直接取财务费用值56
        inst = cp.concat_fund(self.data_source, self.tickers, 'IS').loc[shifted_begin_date:end_date, ['ticker', 34, 56]]
        self.inst = inst[(inst[56] > 1) | (inst[56] < -1)].copy()

        self.inst['interscover'] = (self.inst[34] + self.inst[56]) / self.inst[56]
        self.inst.sort_index(ascending=True, inplace=True)
Пример #11
0
    def prepare_data(self, begin_date, end_date):
        shifted_begin_date = shift_date(begin_date, 700)
        # totalAssets 121
        bs = cp.concat_fund(self.data_source, self.tickers,
                            'BS').loc[shifted_begin_date:end_date,
                                      ['ticker', 121]]
        bs['release_date'] = bs.index
        bs['report_date'] = bs.index
        bs['totalAssets'] = bs[121]
        bs.drop(121, axis=1, inplace=True)

        # revenue 0, cost 4
        inst = cp.concat_fund(self.data_source, self.tickers,
                              'IS').loc[shifted_begin_date:end_date,
                                        ['ticker', 0, 4]]
        inst['release_date'] = inst.index
        inst['report_date'] = inst.index
        inst['revenue'] = inst[0]
        inst.drop(0, axis=1, inplace=True)

        revenueTTM_ls = []
        totalAssetsTTM_ls = []
        for ticker in inst['ticker'].unique():
            try:  # 财务数据不足4条会有异常
                reven_df = ttmContinues(inst[inst['ticker'] == ticker],
                                        'revenue')
                reven_df['ticker'] = ticker
            except:
                print(ticker + ': revenue error')
                continue
            revenueTTM_ls.append(reven_df)

        for ticker in bs['ticker'].unique():
            try:
                total_asset_df = ttmDiscrete(bs[bs['ticker'] == ticker],
                                             'totalAssets')
                total_asset_df['ticker'] = ticker
            except:
                print(ticker + ': total asset error')
                continue
            totalAssetsTTM_ls.append(total_asset_df)

        self.revenueTTM = pd.concat(revenueTTM_ls)
        self.totalAssetsTTM = pd.concat(totalAssetsTTM_ls)
Пример #12
0
    def prepare_data(self, begin_date, end_date):
        """
        数据预处理
        """
        # 多取一些数据做填充
        shifted_begin_date = shift_date(begin_date,
                                        self.factor_param['lagTradeDays'])

        # 获取股票行情
        hq = cp.concat_stock(self.data_source,
                             self.tickers).loc[shifted_begin_date:end_date,
                                               ['code', 'close']]
        self.hq = cp.hconcat_stock_series(hq, self.tickers)

        # 获取指数Benchmark
        # b = sp.get_index(self.benchmark).loc[shifted_begin_date:end_date,['close']]
        b = pd.read_csv(self.data_source + '\\hq\\' + self.benchmark + '.csv',
                        index_col=0).loc[shifted_begin_date:end_date,
                                         ['close']]
        self.b = b.fillna(method='ffill')
Пример #13
0
    def prepare_data(self, begin_date, end_date):
        """
        数据预处理
        """
        # 多取一些数据做填充
        shifted_begin_date = shift_date(begin_date,
                                        self.factor_param['lagTradeDays'])

        # 获取股票行情
        hq = cp.concat_stock(self.data_source,
                             self.tickers).loc[shifted_begin_date:end_date,
                                               ['code', 'close']]
        self.hq = cp.hconcat_stock_series(hq, self.tickers)

        # 获取指数Benchmark
        # b = sp.get_index(self.benchmark).loc[shifted_begin_date:end_date,['close']]
        b = pd.read_csv(self.data_source + '\\hq\\' + self.benchmark + '.csv',
                        index_col=0).loc[shifted_begin_date:end_date,
                                         ['close']]
        self.b = b.fillna(method='ffill')

        # 获取财务数据
        # 按账面价值比 1/(1+负债总额/股东权益)
        # Dbequrt: Debt to Equity Ratio 产权比率=负债总额/股东权益*100%
        shifted_begin_date = shift_date(begin_date, 500)
        # 117负债, 121资产
        Dbequrt_df = cp.concat_fund(self.data_source, self.tickers,
                                    'BS').loc[shifted_begin_date:end_date,
                                              ['ticker', 117, 121]]
        Dbequrt_df['totalLiabilities'] = Dbequrt_df[121]
        Dbequrt_df['totalEquity'] = Dbequrt_df[117]
        Dbequrt_df['Dbequrt'] = Dbequrt_df['totalLiabilities'] / Dbequrt_df[
            'totalEquity']
        Dbequrt_df.drop([117, 121], axis=1, inplace=True)
        Dbequrt_df = Dbequrt_df[Dbequrt_df['Dbequrt']:0]
        Dbequrt_df['report_date'] = Dbequrt_df.index
        Dbequrt_df['release_date'] = Dbequrt_df.index
        self.Dbequrt_df = Dbequrt_df.drop(['totalLiabilities', 'totalEquity'],
                                          axis=1)
Пример #14
0
    def prepare_data(self, begin_date, end_date):
        shifted_begin_date = shift_date(begin_date, 500)
        # motherNetProfit 40
        inst = cp.concat_fund(self.data_source, self.tickers,
                              'IS').loc[shifted_begin_date:end_date,
                                        ['ticker', 40]]
        inst['release_date'] = inst.index
        inst['report_date'] = inst.index
        # cash_flows_yield 133
        cf = cp.concat_fund(self.data_source, self.tickers,
                            'CF').loc[shifted_begin_date:end_date,
                                      ['ticker', 133]]
        cf['release_date'] = cf.index
        cf['report_date'] = cf.index

        self.accrual_df = cf.merge(
            inst, on=['ticker', 'release_date', 'report_date'])
        self.accrual_df['accr'] = self.accrual_df[40] - self.accrual_df[133]

        cash_flow_ls = []
        for ticker in self.accrual_df['ticker'].unique():
            try:  # 财务数据不足4条会有异常
                reven_df = ttmContinues(
                    self.accrual_df[self.accrual_df['ticker'] == ticker],
                    'accr')
                reven_df['ticker'] = ticker
            except:
                continue
            cash_flow_ls.append(reven_df)

        self.accrual_ttm = pd.concat(cash_flow_ls)
        # 总市值
        # Tushare的市值数据只有17年-now
        df = market_value(self.data_source + '\\other\\otherdata.csv',
                          self.tickers)
        self.mkt_value = df.drop(['price', 'totals'], axis=1)
Пример #15
0
    def prepare_data(self, begin_date, end_date):
        shifted_begin_date = shift_date(begin_date, 500)
        earings_df = cp.concat_fund(self.data_source, self.tickers,
                                    'IS').loc[shifted_begin_date:end_date,
                                              ['ticker', 40]]
        earings_df['motherNetProfit'] = earings_df[40]
        earings_df.drop(40, axis=1, inplace=True)
        earings_df['reportDate'] = earings_df.index
        earings_df['reportDate'] = earings_df['reportDate'].apply(
            lambda x: x.strftime("%Y-%m-%d"))
        # 仅仅取年报, 查找是否reportDate是否以12月31日结尾

        self.earings_df = earings_df[earings_df['reportDate'].str.endswith(
            '12-31')]
        # Tushare的市值数据只有17年-now
        df = market_value(self.data_source + '\\other\\otherdata.csv',
                          self.tickers)
        self.mkt_value = df.drop(['price', 'totals'], axis=1)
Пример #16
0
    def prepare_data(self, begin_date, end_date):
        """
        数据预处理
        """

        # 获取财务数据:
        # 资产负债比 = 总资产 /  公司债务总额
        # TA2TL = totalAssets / totalLiabilities
        # 117负债, 121资产
        shifted_begin_date = shift_date(begin_date, 500)
        ff = cp.concat_fund(self.data_source, self.tickers,
                            'BS').loc[shifted_begin_date:end_date,
                                      ['ticker', 117, 121]]
        # 这里以report date假定为announce date
        ff['reportDate'] = ff.index

        # 取出负债,资产总额数据
        ff['TA2TL'] = ff[117] / ff[121]
        ff.drop([117, 121], axis=1, inplace=True)

        self.df = ff
Пример #17
0
    def prepare_data(self, begin_date, end_date):
        shifted_begin_date = shift_date(begin_date, 700)
        inst = cp.concat_fund(self.data_source, self.tickers,
                              'IS').loc[shifted_begin_date:end_date,
                                        ['ticker', 0, 4]]
        inst['release_date'] = inst.index
        inst['report_date'] = inst.index
        inst['revenue'] = inst[0]
        inst['cost'] = inst[4]
        inst.drop([0, 4], axis=1, inplace=True)

        revenueTTM_ls = []
        for ticker in inst['ticker'].unique():
            try:  # 财务数据不足4条会有异常
                reven_df = ttmContinues(inst[inst['ticker'] == ticker],
                                        'revenue,cost')
                reven_df['ticker'] = ticker
            except:
                print(ticker + ': revenue and cost error')
                continue
            revenueTTM_ls.append(reven_df)

        self.revenue_cost_TTM = pd.concat(revenueTTM_ls)
Пример #18
0
if __name__ == '__main__':

    start = time.time()
    import os
    from factorset.data.OtherData import code_to_symbol
    from factorset.data import CSVParser as cp
    import tushare as ts
    # allAshare = pd.read_csv(os.path.abspath('./allAShare.csv'))
    # allAshare = allAshare['0']
    hs300 = ts.get_hs300s()
    hs300.code = hs300.code.apply(code_to_symbol)
    # 爬取沪深300还未存入的数据
    Ashare = list(
        set(hs300.code.tolist()) -
        set(cp.all_fund_symbol(os.path.abspath('.'), 'IS')))
    # BS表内时间有重复
    # Ashare = ['300671.SZ', '002886.SZ', '300696.SZ', '603055.SH', '300670.SZ', '300692.SZ',
    # '002889.SZ', '603882.SH', '603801.SH', '603938.SH', '300687.SZ', '603535.SH', '603043.SH']
    # BS时间有重复且值不相同(招股说明与申报稿)
    # Ashare = ['002886.SZ', '300696.SZ', '603938.SH', '300692.SZ', '300670.SZ', '603882.SH']
    # IS时间有重复且值不相同(招股说明与申报稿)
    # Ashare = ['002886.SZ', '300696.SZ', '300670.SZ', '300692.SZ', '603055.SH', '603938.SH', '603882.SH']
    # CF时间有重复且值不相同(招股说明与申报稿)
    # Ashare = ['002386.SZ', '603882.SH', '603018.SH', '300671.SZ', '603938.SH', '300537.SZ', '300670.SZ' ,
    # '002086.SZ', '000568.SZ', '600612.SH', '300696.SZ', '600552.SH', '300687.SZ', '600983.SH', '002889.SZ',
    #  '603801.SH', '300692.SZ', '603055.SH', '002886.SZ', '002852.SZ', '603505.SH', '300365.SZ', '603535.SH',
    #  '300214.SZ', '300135.SZ', '603043.SH']
    FundCrawler('BS').main(Ashare, num=20)
    print(time.time() - start)