Exemplo n.º 1
0
    def crawl_DIV_type(self, year, stocktype):
        url = 'https://mops.twse.com.tw/server-java/t05st09sub'
        form_data = {
            'encodeURIComponent': 1,
            'step': 1,
            'TYPEK': stocktype,
            'YEAR': common.year_CE2RC(year),
            'first': '',
            'qryType': 2,
        }

        # 拆解內容
        table_array = common.crawl_data2text(url, form_data,
                                             'big5').split('<table')

        dfDIV = pd.DataFrame()

        for table in table_array:
            if '公司代號' in table:
                tr_array = table.split('<tr')
                for tr in tr_array:
                    td_array = tr.split('<td')
                    if len(td_array) > 15:
                        # 公司代號
                        ticker = common.col_clear(
                            td_array[1]).split('-')[0].strip()
                        # 所屬年度
                        intyr, vaild = common.TryParse(
                            'int',
                            common.col_clear(td_array[3]).split('年')[0])
                        yr = common.year_RC2CE(intyr)
                        # 現金股利
                        CD, vaild = common.TryParse(
                            'float', common.col_clear(td_array[12]))
                        # 股票股利
                        SD, vaild = common.TryParse(
                            'float', common.col_clear(td_array[15]))
                        # 判斷是否有該公司當年度資料,更新/新增
                        index = (ticker, yr)

                        if len(dfDIV.index) > 0 and index in dfDIV.index:
                            data = dfDIV.loc[index]
                            data[0] = data[0] + CD
                            data[1] = data[1] + SD
                        else:
                            df = pd.DataFrame(data=[[CD, SD]],
                                              index=pd.MultiIndex.from_tuples(
                                                  [index]),
                                              columns=['CD', 'SD'])
                            df.index.set_names(['Ticker', 'yr'], inplace=True)

                            dfDIV = dfDIV.append(df)

        return dfDIV
Exemplo n.º 2
0
    def crawl_FSA_type(self, year, stocktype):
        url = 'https://mops.twse.com.tw/mops/web/ajax_t51sb02'
        form_data = {
            'encodeURIComponent': 1,
            'run': 'Y',
            'step': 1,
            'TYPEK': stocktype,
            'year': common.year_CE2RC(year),
            'isnew': '',
            'firstin': 1,
            'off': 1,
            'ifrs': 'Y',
        }

        # 拆解內容
        table_array = common.crawl_data2text(url, form_data).split('<table')

        dfFSA = pd.DataFrame()
        if len(table_array) < 3:
            return dfFSA

        tr_array = table_array[3].split('<tr')
        for tr in tr_array:
            td_array = tr.split('<td')
            if len(td_array) > 15:
                # 公司代號
                ticker = common.col_clear(td_array[1]).split('-')[0].strip()
                # 負債占資產比率
                DR, vaild = common.TryParse('float',
                                            common.col_clear(td_array[3]))
                # 長期資金佔不動產廠房及設備比率
                LER, vaild = common.TryParse('float',
                                             common.col_clear(td_array[4]))
                # 流動比率
                CR, vaild = common.TryParse('float',
                                            common.col_clear(td_array[5]))
                # 速動比率
                UR, vaild = common.TryParse('float',
                                            common.col_clear(td_array[6]))
                # 利息保障倍數
                IPM, vaild = common.TryParse('float',
                                             common.col_clear(td_array[7]))
                # 應收款項周轉率
                ARTR, vaild = common.TryParse('float',
                                              common.col_clear(td_array[8]))
                # 平均收現日數
                ACCD, vaild = common.TryParse('float',
                                              common.col_clear(td_array[9]))
                # 存貨週轉率(次)
                ITR, vaild = common.TryParse('float',
                                             common.col_clear(td_array[10]))
                # 平均銷貨日數
                ASD, vaild = common.TryParse('float',
                                             common.col_clear(td_array[11]))
                # 不動產廠房及設備週轉率(次)
                PETR, vaild = common.TryParse('float',
                                              common.col_clear(td_array[12]))
                # 總資產週轉率(次)
                TATR, vaild = common.TryParse('float',
                                              common.col_clear(td_array[13]))
                # 資產報酬率(%)
                ROA, vaild = common.TryParse('float',
                                             common.col_clear(td_array[14]))
                # 權益報酬率(%)
                ROE, vaild = common.TryParse('float',
                                             common.col_clear(td_array[15]))
                # 稅前純益佔實收資本比率(%)
                NPBT2PCR, vaild = common.TryParse(
                    'float', common.col_clear(td_array[16]))
                # 純益率(%)
                NPR, vaild = common.TryParse('float',
                                             common.col_clear(td_array[17]))
                # 每股盈餘(元)
                EPS, vaild = common.TryParse('float',
                                             common.col_clear(td_array[18]))
                # 現金流量比率(%)
                CFR, vaild = common.TryParse('float',
                                             common.col_clear(td_array[19]))
                # 現金流量允當比率(%)
                CFAR, vaild = common.TryParse('float',
                                              common.col_clear(td_array[20]))
                # 現金再投資比率(%)
                CRR, vaild = common.TryParse('float',
                                             common.col_clear(td_array[21]))

                # 判斷是否有該公司當年度資料,更新/新增
                index = (ticker, common.year_RC2CE(year))
                data = [
                    DR, LER, CR, UR, IPM, ARTR, ACCD, ITR, ASD, PETR, TATR,
                    ROA, ROE, NPBT2PCR, NPR, EPS, CFR, CFAR, CRR
                ]
                df = pd.DataFrame(data=[data],
                                  index=pd.MultiIndex.from_tuples([index]),
                                  columns=[
                                      'DR', 'LER', 'CR', 'UR', 'IPM', 'ARTR',
                                      'ACCD', 'ITR', 'ASD', 'PETR', 'TATR',
                                      'ROA', 'ROE', 'NPBT2PCR', 'NPR', 'EPS',
                                      'CFR', 'CFAR', 'CRR'
                                  ])

                df.index.set_names(['Ticker', 'yr'], inplace=True)

                dfFSA = dfFSA.append(df)

        return dfFSA
Exemplo n.º 3
0
    def crawl_BS_type(self, year, season, stocktype):
        url = 'https://mops.twse.com.tw/mops/web/ajax_t163sb05'
        form_data = {
            'encodeURIComponent': 1,
            'step': 1,
            'firstin': 1,
            'off': 1,
            'isQuery': 'Y',
            'TYPEK': stocktype,
            'year': common.year_CE2RC(year),
            'season': season
        }

        # 拆解內容
        table_array = common.crawl_data2text(url, form_data).split('<table')
        dfBS = pd.DataFrame()

        dtTitle = {
            'TA': ['資產總額', '資產總計'],
            'TL': ['負債總計', '負債總額'],
            'TE': ['權益總計', '權益總額'],
            'RNP': ['每股參考淨值'],
            'CA': ['流動資產'],
            'NCA': ['非流動資產'],
            'CL': ['流動負債'],
            'NCL': ['非流動負債']
        }

        for table in table_array:
            if '代號</th>' in table:
                tr_array = table.split('<tr')
                dtIndex = {
                    'TA': -1,
                    'TL': -1,
                    'TE': -1,
                    'RNP': -1,
                    'CA': -1,
                    'NCA': -1,
                    'CL': -1,
                    'NCL': -1,
                }

                for tr in tr_array:
                    if '<th' in tr:
                        th_array = tr.split('<th')
                        for thIndex in range(1, len(th_array)):
                            title = common.col_clear(th_array[thIndex]).strip()
                            for key in dtTitle.keys():
                                if title in dtTitle[key]:
                                    dtIndex[key] = thIndex
                        continue
                    td_array = tr.split('<td')
                    if len(td_array) > 1:
                        #公司代號, 年, 季
                        ticker = common.col_clear(td_array[1])
                        index = (ticker, common.year_RC2CE(year), season)

                        dtData = {
                            'TA': 0,
                            'TL': 0,
                            'TE': 0,
                            'RNP': 0,
                            'CA': 0,
                            'NCA': 0,
                            'CL': 0,
                            'NCL': 0,
                        }

                        for key in dtIndex.keys():
                            if dtIndex[key] >= 0:
                                val, vaild = common.TryParse(
                                    'float',
                                    common.col_clear(td_array[dtIndex[key]]))
                                dtData[key] = val

                        data = [
                            dtData['TA'], dtData['TL'], dtData['TE'],
                            dtData['RNP'], dtData['CA'], dtData['NCA'],
                            dtData['CL'], dtData['NCL']
                        ]

                        df = pd.DataFrame(
                            data=[data],
                            index=pd.MultiIndex.from_tuples([index]),
                            columns=[
                                'TA', 'TL', 'TE', 'RNper', 'CA', 'NCA', 'CL',
                                'NCL'
                            ])

                        df.index.set_names(['Ticker', 'yr', 'qtr'],
                                           inplace=True)

                        dfBS = dfBS.append(df)

        return dfBS
Exemplo n.º 4
0
    def crawl_SCI_type(self, year, season, stocktype):
        url = 'https://mops.twse.com.tw/mops/web/ajax_t163sb04'
        form_data = {
            'encodeURIComponent': 1,
            'step': 1,
            'firstin': 1,
            'TYPEK': stocktype,
            'code': '',
            'year': common.year_CE2RC(year),
            'season': season
        }

        # 拆解內容
        table_array = common.crawl_data2text(url, form_data).split('<table')
        dfcomprehensiveIncome = pd.DataFrame()

        dtTitle = {
            'Rev1': ['利息淨收益', '營業收入', '淨收益', '收益', '收入'],
            'Rev2': ['利息以外淨損益'],
            'GP': ['營業毛利(毛損)'],
            'OP': ['營業利益(損失)', '營業利益'],
            'NPBT':
            ['繼續營業單位稅前淨利(淨損)', '稅前淨利(淨損)', '繼續營業單位稅前損益', '繼續營業單位稅前純益(純損)'],
            'NPAT': ['本期稅後淨利(淨損)', '本期淨利(淨損)'],
            'NPPC': ['淨利(損)歸屬於母公司業主', '淨利(淨損)歸屬於母公司業主'],
            'EPS': ['基本每股盈餘(元)']
        }

        for table in table_array:
            if '代號</th>' in table:
                tr_array = table.split('<tr')
                dtIndex = {
                    'Rev1': -1,
                    'Rev2': -1,
                    'GP': -1,
                    'OP': -1,
                    'NPBT': -1,
                    'NPAT': -1,
                    'NPPC': -1,
                    'EPS': -1
                }

                for tr in tr_array:
                    if '<th' in tr:
                        th_array = tr.split('<th')
                        for thIndex in range(1, len(th_array)):
                            title = common.col_clear(th_array[thIndex]).strip()
                            for key in dtTitle.keys():
                                if title in dtTitle[key]:
                                    dtIndex[key] = thIndex
                        continue
                    td_array = tr.split('<td')
                    if len(td_array) > 1:
                        #公司代號, 年, 季
                        ticker = common.col_clear(td_array[1])
                        index = (ticker, common.year_RC2CE(year), season)
                        if index not in dfcomprehensiveIncome.index:
                            dtData = {
                                'Rev1': 0,
                                'Rev2': 0,
                                'GP': 0,
                                'OP': 0,
                                'NPBT': 0,
                                'NPAT': 0,
                                'NPPC': 0,
                                'EPS': 0
                            }

                            for key in dtIndex.keys():
                                if dtIndex[key] >= 0:
                                    val, vaild = common.TryParse(
                                        'float',
                                        common.col_clear(
                                            td_array[dtIndex[key]]))
                                    dtData[key] = val

                            data = [
                                dtData['Rev1'] + dtData['Rev2'], dtData['GP'],
                                dtData['OP'] if dtData['OP'] > 0 else
                                dtData['NPBT'], dtData['NPBT'], dtData['NPAT'],
                                dtData['NPPC'], dtData['EPS']
                            ]

                            df = pd.DataFrame(
                                data=[data],
                                index=pd.MultiIndex.from_tuples([index]),
                                columns=[
                                    'Rev', 'GP', 'OP', 'NPBT', 'NPAT', 'NPPC',
                                    'EPS'
                                ])

                            df['GM'] = df['GP'] / df['Rev']
                            df.index.set_names(['Ticker', 'yr', 'qtr'],
                                               inplace=True)
                            dfcomprehensiveIncome = dfcomprehensiveIncome.append(
                                df)

        return dfcomprehensiveIncome
Exemplo n.º 5
0
 def asFloat(x):
     val = x.replace(',', '')
     fval, vaild = common.TryParse('float', val)
     return fval if vaild else vaild
Exemplo n.º 6
0
    def get_TWValueScore(self):
        dfValueStockScore = pd.DataFrame()
        data = []
        index = []
        for code in self.ComInfo.index:
            try:
                if code not in self.FSA.index.get_level_values(0):
                    continue
                #今年, 去年
                thisYYYY = self.FSA.loc[(code,)].index.max()
                lastYYYY = thisYYYY-1
                #當月, 前月, 去年同月
                thisYYMM = self.REV.loc[(code, )].index.max()
                thisYY = int(thisYYMM/100)
                thisMM = int(thisYYMM%100)
                strlastYYMM = arrow.get(thisYY, thisMM, 1).shift(months=-1).format('YYYYMM')
                lastYYMM, vaild = common.TryParse('int',strlastYYMM)
                lastYAYYMM = (thisYY-1)*100+thisMM
                #當季, 上季, 去年同季
                arrthisSSNYYYY = self.SCI.loc[(code,)].index.max()
                thisSSNYYYY, thisSSN = arrthisSSNYYYY
                lastSSNYYYY = thisSSNYYYY if thisSSN>1 else thisSSNYYYY-1
                lastSSN = thisSSN-1 if thisSSN>1 else 4
                lastYASSNYYYY = thisSSNYYYY-1
                lastYASSNMM = thisSSN
                #前日
                currDate = self.DQ.loc[(code,)].index.max()

                #1.月營收月增率>上月=5
                MonthMoM = self.REV.loc[(code, thisYYMM), 'RevMcLM'].values[0]
                score1 = 5 if MonthMoM>0 else 0
                #2.月營收年增率>去年同期=5
                MonthYAYoY = self.REV.loc[(code, thisYYMM), 'RevMcLYM'].values[0]
                score2 = 5 if MonthYAYoY>0 else 0
                #3.累計營收年增率>去年同期=10
                CumYAYoY = self.REV.loc[(code, thisYYMM), 'RevYCml2LYCml'].values[0]
                score3 = 10 if CumYAYoY>0 else 0
                #4.毛利率季增率>上季=5
                thisSSNGM = self.SCI.loc[(code, thisSSNYYYY, thisSSN)]['GM']
                lastSSNGM = self.SCI.loc[(code, lastSSNYYYY, lastSSN)]['GM']
                GrossMarginQoQ = 0 if lastSSNGM == 0 else (thisSSNGM/lastSSNGM)-1
                score4 = 5 if GrossMarginQoQ>0 else 0
                #5.毛利率年增率>去年同季=5
                thisYASSNGP = self.SCI.loc[(code, thisSSNYYYY, list(range(1, thisSSN+1)))]['GP'].sum()
                lastYASSNGP = self.SCI.loc[(code, lastYASSNYYYY, list(range(1, lastYASSNMM+1)))]['GP'].sum()
                GrossMarginYoY =0 if lastYASSNGP == 0 else (thisYASSNGP/lastYASSNGP)-1
                score5 = 5 if GrossMarginYoY>0 else 0
                #6.OP季增率>上季=5
                thisSSNOP = self.SCI.loc[(code, thisSSNYYYY, thisSSN)]['OP']
                lastSSNOP = self.SCI.loc[(code, lastSSNYYYY, lastSSN)]['OP']
                OperatingProfitQoQ = 0 if lastSSNOP == 0 else (thisSSNOP/lastSSNOP)-1
                score6 = 5 if OperatingProfitQoQ>0 else 0
                #7.OP年增率>去年同季=5
                thisYASSNOP = self.SCI.loc[(code, thisSSNYYYY, list(range(1, thisSSN+1)))]['OP'].sum()
                lastYASSNOP = self.SCI.loc[(code, lastYASSNYYYY, list(range(1, lastYASSNMM+1)))]['OP'].sum()
                OperatingProfitYoY = 0 if lastYASSNOP == 0 else (thisYASSNOP/lastYASSNOP)-1
                score7 = 5 if OperatingProfitYoY>0 else 0
                #8.5年營業活動現金流量>0=5
                ls5YCASHO = self.FSA.loc[(code, list(range(thisYYYY-4, thisYYYY+1))),'CFR']*self.BS.loc[(code, list(range(thisYYYY-4, thisYYYY+1)), 4)]['CL']
                score8 = 0 if False in list(ls5YCASHO>0) else 5
                #9.5年OP>0=5
                ls5YOCF = self.SCI.loc[(code, list(range(thisYYYY-4, thisYYYY+1)), ),'OP'].groupby('yr').sum()
                score9 = 0 if False in list(ls5YOCF>0) else 5
                #10.5年本期淨利>0=5
                ls5NetProfit = self.SCI.loc[(code, list(range(thisYYYY-4, thisYYYY+1)), ),'NPBT'].groupby('yr').sum()
                score10 = 0 if False in list(ls5NetProfit>0) else 5
                #11.5年CD>0=5
                ls5EarnM = self.DIV.loc[(code, list(range(thisYYYY-4, thisYYYY+1))), 'CD']
                score11 = 0 if False in list(ls5EarnM>0) else 5
                #12.CR>100%=5
                CURR = self.FSA.loc[(code, thisYYYY),'CR']
                score12 =5 if CURR>100 else 0
                #13.負債比率<50%=5
                Debt2AssetsRatio = self.FSA.loc[(code, thisYYYY),'DR']
                score13 =5 if Debt2AssetsRatio<50 else 0
                #14.PER(越低)=15
                score14 = -1
                PEScore=[40, 30, 25, 20, 15, 12, 10]
                try:
                    PE = float(self.DQ.loc[(code, currDate), 'PER'])
                    PER = [x<PE for x in PEScore]
                    for i in range(len(PER)):
                      if PER[i]:
                          score14 = i*2.5
                          break
                    if score14<0:
                      score14 = 15
                except:
                    PE = 0
                    score14 = 0
                #15.股價淨值比(越低)=5
                CP = float(self.DQ.loc[(code, currDate), 'CP'])
                CD = float(self.DIV.loc[(code, thisYYYY), 'CD'])
                RNper = float(self.BS.loc[(code, thisSSNYYYY, thisSSN), 'RNper'])

                score15 = -1
                PBScore=[8.5, 6, 4, 2.5, 1.5, 1]
                if RNper == 0:
                    PB = -1
                    score15 = 0
                else:
                    PB = CP/RNper
                    PBR = [x<PB for x in PBScore]
                    for i in range(len(PBR)):
                      if PBR[i]:
                          score15 = i*2.5
                          break
                    if score15 < 0:
                      score15 = 5
                #16.CD殖利率(越高)=10
                score16 = -1
                DividendYieldScore = [0, 1, 2, 2.5, 3, 4, 5, 6, 8, 10]
                DividendYield = 0 if CP == 0 else CD/CP
                DividendYieldR = [x>DividendYield for x in DividendYieldScore]
                for i in range(len(DividendYieldR)):
                  if DividendYieldR[i]:
                      score16 = (i-1)*2.5
                      break
                if score16<0:
                  score16 = 10

                score = score1+score2+score3+score4+score5+score6+score7+score8+score9+score10+score11+score12+score13+score14+score15+score16

                index.append([code, currDate])
                #當日, 當月, 當季, 當年
                data.append([thisYYMM, str(thisSSNYYYY)+'/'+str(thisSSN), thisYYYY,
                self.ComInfo.loc[code,'Com'],self.ComInfo.loc[code,'IC'],MonthMoM,MonthYAYoY, CumYAYoY, GrossMarginQoQ, GrossMarginYoY, OperatingProfitQoQ, OperatingProfitYoY, CURR, Debt2AssetsRatio, PE, PB, DividendYield, score])
            except:
                continue

        if len(data)>0:
            dfValueStockScore = pd.DataFrame(data = data, index = pd.MultiIndex.from_tuples(index), 
            columns=['thisYYMM', 'thisSSN', 'thisYYYY', 'Com', 'IC', 'MonthMoM', 'MonthYAYoY', 'CumYAYoY', 'GrossMarginQoQ','GrossMarginYoY','OperatingProfitQoQ','OperatingProfitYoY','CURR','Debt2AssetsRatio','PE','PB', 'DividendYield','TWValueScore'])

            dfValueStockScore.index.set_names(['Ticker', 'Date'], inplace=True)

        return dfValueStockScore