def crawl_DIV_type(self, year, stocktype): url = 'https://mops.twse.com.tw/server-java/t05st09sub' form_data = { 'encodeURIComponent': 1, 'step': 1, 'TYPEK': stocktype, 'YEAR': common.year_CE2RC(year), 'first': '', 'qryType': 2, } # 拆解內容 table_array = common.crawl_data2text(url, form_data, 'big5').split('<table') dfDIV = pd.DataFrame() for table in table_array: if '公司代號' in table: tr_array = table.split('<tr') for tr in tr_array: td_array = tr.split('<td') if len(td_array) > 15: # 公司代號 ticker = common.col_clear( td_array[1]).split('-')[0].strip() # 所屬年度 intyr, vaild = common.TryParse( 'int', common.col_clear(td_array[3]).split('年')[0]) yr = common.year_RC2CE(intyr) # 現金股利 CD, vaild = common.TryParse( 'float', common.col_clear(td_array[12])) # 股票股利 SD, vaild = common.TryParse( 'float', common.col_clear(td_array[15])) # 判斷是否有該公司當年度資料,更新/新增 index = (ticker, yr) if len(dfDIV.index) > 0 and index in dfDIV.index: data = dfDIV.loc[index] data[0] = data[0] + CD data[1] = data[1] + SD else: df = pd.DataFrame(data=[[CD, SD]], index=pd.MultiIndex.from_tuples( [index]), columns=['CD', 'SD']) df.index.set_names(['Ticker', 'yr'], inplace=True) dfDIV = dfDIV.append(df) return dfDIV
def crawl_FSA_type(self, year, stocktype): url = 'https://mops.twse.com.tw/mops/web/ajax_t51sb02' form_data = { 'encodeURIComponent': 1, 'run': 'Y', 'step': 1, 'TYPEK': stocktype, 'year': common.year_CE2RC(year), 'isnew': '', 'firstin': 1, 'off': 1, 'ifrs': 'Y', } # 拆解內容 table_array = common.crawl_data2text(url, form_data).split('<table') dfFSA = pd.DataFrame() if len(table_array) < 3: return dfFSA tr_array = table_array[3].split('<tr') for tr in tr_array: td_array = tr.split('<td') if len(td_array) > 15: # 公司代號 ticker = common.col_clear(td_array[1]).split('-')[0].strip() # 負債占資產比率 DR, vaild = common.TryParse('float', common.col_clear(td_array[3])) # 長期資金佔不動產廠房及設備比率 LER, vaild = common.TryParse('float', common.col_clear(td_array[4])) # 流動比率 CR, vaild = common.TryParse('float', common.col_clear(td_array[5])) # 速動比率 UR, vaild = common.TryParse('float', common.col_clear(td_array[6])) # 利息保障倍數 IPM, vaild = common.TryParse('float', common.col_clear(td_array[7])) # 應收款項周轉率 ARTR, vaild = common.TryParse('float', common.col_clear(td_array[8])) # 平均收現日數 ACCD, vaild = common.TryParse('float', common.col_clear(td_array[9])) # 存貨週轉率(次) ITR, vaild = common.TryParse('float', common.col_clear(td_array[10])) # 平均銷貨日數 ASD, vaild = common.TryParse('float', common.col_clear(td_array[11])) # 不動產廠房及設備週轉率(次) PETR, vaild = common.TryParse('float', common.col_clear(td_array[12])) # 總資產週轉率(次) TATR, vaild = common.TryParse('float', common.col_clear(td_array[13])) # 資產報酬率(%) ROA, vaild = common.TryParse('float', common.col_clear(td_array[14])) # 權益報酬率(%) ROE, vaild = common.TryParse('float', common.col_clear(td_array[15])) # 稅前純益佔實收資本比率(%) NPBT2PCR, vaild = common.TryParse( 'float', common.col_clear(td_array[16])) # 純益率(%) NPR, vaild = common.TryParse('float', common.col_clear(td_array[17])) # 每股盈餘(元) EPS, vaild = common.TryParse('float', common.col_clear(td_array[18])) # 現金流量比率(%) CFR, vaild = common.TryParse('float', common.col_clear(td_array[19])) # 現金流量允當比率(%) CFAR, vaild = common.TryParse('float', common.col_clear(td_array[20])) # 現金再投資比率(%) CRR, vaild = common.TryParse('float', common.col_clear(td_array[21])) # 判斷是否有該公司當年度資料,更新/新增 index = (ticker, common.year_RC2CE(year)) data = [ DR, LER, CR, UR, IPM, ARTR, ACCD, ITR, ASD, PETR, TATR, ROA, ROE, NPBT2PCR, NPR, EPS, CFR, CFAR, CRR ] df = pd.DataFrame(data=[data], index=pd.MultiIndex.from_tuples([index]), columns=[ 'DR', 'LER', 'CR', 'UR', 'IPM', 'ARTR', 'ACCD', 'ITR', 'ASD', 'PETR', 'TATR', 'ROA', 'ROE', 'NPBT2PCR', 'NPR', 'EPS', 'CFR', 'CFAR', 'CRR' ]) df.index.set_names(['Ticker', 'yr'], inplace=True) dfFSA = dfFSA.append(df) return dfFSA
def crawl_BS_type(self, year, season, stocktype): url = 'https://mops.twse.com.tw/mops/web/ajax_t163sb05' form_data = { 'encodeURIComponent': 1, 'step': 1, 'firstin': 1, 'off': 1, 'isQuery': 'Y', 'TYPEK': stocktype, 'year': common.year_CE2RC(year), 'season': season } # 拆解內容 table_array = common.crawl_data2text(url, form_data).split('<table') dfBS = pd.DataFrame() dtTitle = { 'TA': ['資產總額', '資產總計'], 'TL': ['負債總計', '負債總額'], 'TE': ['權益總計', '權益總額'], 'RNP': ['每股參考淨值'], 'CA': ['流動資產'], 'NCA': ['非流動資產'], 'CL': ['流動負債'], 'NCL': ['非流動負債'] } for table in table_array: if '代號</th>' in table: tr_array = table.split('<tr') dtIndex = { 'TA': -1, 'TL': -1, 'TE': -1, 'RNP': -1, 'CA': -1, 'NCA': -1, 'CL': -1, 'NCL': -1, } for tr in tr_array: if '<th' in tr: th_array = tr.split('<th') for thIndex in range(1, len(th_array)): title = common.col_clear(th_array[thIndex]).strip() for key in dtTitle.keys(): if title in dtTitle[key]: dtIndex[key] = thIndex continue td_array = tr.split('<td') if len(td_array) > 1: #公司代號, 年, 季 ticker = common.col_clear(td_array[1]) index = (ticker, common.year_RC2CE(year), season) dtData = { 'TA': 0, 'TL': 0, 'TE': 0, 'RNP': 0, 'CA': 0, 'NCA': 0, 'CL': 0, 'NCL': 0, } for key in dtIndex.keys(): if dtIndex[key] >= 0: val, vaild = common.TryParse( 'float', common.col_clear(td_array[dtIndex[key]])) dtData[key] = val data = [ dtData['TA'], dtData['TL'], dtData['TE'], dtData['RNP'], dtData['CA'], dtData['NCA'], dtData['CL'], dtData['NCL'] ] df = pd.DataFrame( data=[data], index=pd.MultiIndex.from_tuples([index]), columns=[ 'TA', 'TL', 'TE', 'RNper', 'CA', 'NCA', 'CL', 'NCL' ]) df.index.set_names(['Ticker', 'yr', 'qtr'], inplace=True) dfBS = dfBS.append(df) return dfBS
def crawl_SCI_type(self, year, season, stocktype): url = 'https://mops.twse.com.tw/mops/web/ajax_t163sb04' form_data = { 'encodeURIComponent': 1, 'step': 1, 'firstin': 1, 'TYPEK': stocktype, 'code': '', 'year': common.year_CE2RC(year), 'season': season } # 拆解內容 table_array = common.crawl_data2text(url, form_data).split('<table') dfcomprehensiveIncome = pd.DataFrame() dtTitle = { 'Rev1': ['利息淨收益', '營業收入', '淨收益', '收益', '收入'], 'Rev2': ['利息以外淨損益'], 'GP': ['營業毛利(毛損)'], 'OP': ['營業利益(損失)', '營業利益'], 'NPBT': ['繼續營業單位稅前淨利(淨損)', '稅前淨利(淨損)', '繼續營業單位稅前損益', '繼續營業單位稅前純益(純損)'], 'NPAT': ['本期稅後淨利(淨損)', '本期淨利(淨損)'], 'NPPC': ['淨利(損)歸屬於母公司業主', '淨利(淨損)歸屬於母公司業主'], 'EPS': ['基本每股盈餘(元)'] } for table in table_array: if '代號</th>' in table: tr_array = table.split('<tr') dtIndex = { 'Rev1': -1, 'Rev2': -1, 'GP': -1, 'OP': -1, 'NPBT': -1, 'NPAT': -1, 'NPPC': -1, 'EPS': -1 } for tr in tr_array: if '<th' in tr: th_array = tr.split('<th') for thIndex in range(1, len(th_array)): title = common.col_clear(th_array[thIndex]).strip() for key in dtTitle.keys(): if title in dtTitle[key]: dtIndex[key] = thIndex continue td_array = tr.split('<td') if len(td_array) > 1: #公司代號, 年, 季 ticker = common.col_clear(td_array[1]) index = (ticker, common.year_RC2CE(year), season) if index not in dfcomprehensiveIncome.index: dtData = { 'Rev1': 0, 'Rev2': 0, 'GP': 0, 'OP': 0, 'NPBT': 0, 'NPAT': 0, 'NPPC': 0, 'EPS': 0 } for key in dtIndex.keys(): if dtIndex[key] >= 0: val, vaild = common.TryParse( 'float', common.col_clear( td_array[dtIndex[key]])) dtData[key] = val data = [ dtData['Rev1'] + dtData['Rev2'], dtData['GP'], dtData['OP'] if dtData['OP'] > 0 else dtData['NPBT'], dtData['NPBT'], dtData['NPAT'], dtData['NPPC'], dtData['EPS'] ] df = pd.DataFrame( data=[data], index=pd.MultiIndex.from_tuples([index]), columns=[ 'Rev', 'GP', 'OP', 'NPBT', 'NPAT', 'NPPC', 'EPS' ]) df['GM'] = df['GP'] / df['Rev'] df.index.set_names(['Ticker', 'yr', 'qtr'], inplace=True) dfcomprehensiveIncome = dfcomprehensiveIncome.append( df) return dfcomprehensiveIncome
def asFloat(x): val = x.replace(',', '') fval, vaild = common.TryParse('float', val) return fval if vaild else vaild
def get_TWValueScore(self): dfValueStockScore = pd.DataFrame() data = [] index = [] for code in self.ComInfo.index: try: if code not in self.FSA.index.get_level_values(0): continue #今年, 去年 thisYYYY = self.FSA.loc[(code,)].index.max() lastYYYY = thisYYYY-1 #當月, 前月, 去年同月 thisYYMM = self.REV.loc[(code, )].index.max() thisYY = int(thisYYMM/100) thisMM = int(thisYYMM%100) strlastYYMM = arrow.get(thisYY, thisMM, 1).shift(months=-1).format('YYYYMM') lastYYMM, vaild = common.TryParse('int',strlastYYMM) lastYAYYMM = (thisYY-1)*100+thisMM #當季, 上季, 去年同季 arrthisSSNYYYY = self.SCI.loc[(code,)].index.max() thisSSNYYYY, thisSSN = arrthisSSNYYYY lastSSNYYYY = thisSSNYYYY if thisSSN>1 else thisSSNYYYY-1 lastSSN = thisSSN-1 if thisSSN>1 else 4 lastYASSNYYYY = thisSSNYYYY-1 lastYASSNMM = thisSSN #前日 currDate = self.DQ.loc[(code,)].index.max() #1.月營收月增率>上月=5 MonthMoM = self.REV.loc[(code, thisYYMM), 'RevMcLM'].values[0] score1 = 5 if MonthMoM>0 else 0 #2.月營收年增率>去年同期=5 MonthYAYoY = self.REV.loc[(code, thisYYMM), 'RevMcLYM'].values[0] score2 = 5 if MonthYAYoY>0 else 0 #3.累計營收年增率>去年同期=10 CumYAYoY = self.REV.loc[(code, thisYYMM), 'RevYCml2LYCml'].values[0] score3 = 10 if CumYAYoY>0 else 0 #4.毛利率季增率>上季=5 thisSSNGM = self.SCI.loc[(code, thisSSNYYYY, thisSSN)]['GM'] lastSSNGM = self.SCI.loc[(code, lastSSNYYYY, lastSSN)]['GM'] GrossMarginQoQ = 0 if lastSSNGM == 0 else (thisSSNGM/lastSSNGM)-1 score4 = 5 if GrossMarginQoQ>0 else 0 #5.毛利率年增率>去年同季=5 thisYASSNGP = self.SCI.loc[(code, thisSSNYYYY, list(range(1, thisSSN+1)))]['GP'].sum() lastYASSNGP = self.SCI.loc[(code, lastYASSNYYYY, list(range(1, lastYASSNMM+1)))]['GP'].sum() GrossMarginYoY =0 if lastYASSNGP == 0 else (thisYASSNGP/lastYASSNGP)-1 score5 = 5 if GrossMarginYoY>0 else 0 #6.OP季增率>上季=5 thisSSNOP = self.SCI.loc[(code, thisSSNYYYY, thisSSN)]['OP'] lastSSNOP = self.SCI.loc[(code, lastSSNYYYY, lastSSN)]['OP'] OperatingProfitQoQ = 0 if lastSSNOP == 0 else (thisSSNOP/lastSSNOP)-1 score6 = 5 if OperatingProfitQoQ>0 else 0 #7.OP年增率>去年同季=5 thisYASSNOP = self.SCI.loc[(code, thisSSNYYYY, list(range(1, thisSSN+1)))]['OP'].sum() lastYASSNOP = self.SCI.loc[(code, lastYASSNYYYY, list(range(1, lastYASSNMM+1)))]['OP'].sum() OperatingProfitYoY = 0 if lastYASSNOP == 0 else (thisYASSNOP/lastYASSNOP)-1 score7 = 5 if OperatingProfitYoY>0 else 0 #8.5年營業活動現金流量>0=5 ls5YCASHO = self.FSA.loc[(code, list(range(thisYYYY-4, thisYYYY+1))),'CFR']*self.BS.loc[(code, list(range(thisYYYY-4, thisYYYY+1)), 4)]['CL'] score8 = 0 if False in list(ls5YCASHO>0) else 5 #9.5年OP>0=5 ls5YOCF = self.SCI.loc[(code, list(range(thisYYYY-4, thisYYYY+1)), ),'OP'].groupby('yr').sum() score9 = 0 if False in list(ls5YOCF>0) else 5 #10.5年本期淨利>0=5 ls5NetProfit = self.SCI.loc[(code, list(range(thisYYYY-4, thisYYYY+1)), ),'NPBT'].groupby('yr').sum() score10 = 0 if False in list(ls5NetProfit>0) else 5 #11.5年CD>0=5 ls5EarnM = self.DIV.loc[(code, list(range(thisYYYY-4, thisYYYY+1))), 'CD'] score11 = 0 if False in list(ls5EarnM>0) else 5 #12.CR>100%=5 CURR = self.FSA.loc[(code, thisYYYY),'CR'] score12 =5 if CURR>100 else 0 #13.負債比率<50%=5 Debt2AssetsRatio = self.FSA.loc[(code, thisYYYY),'DR'] score13 =5 if Debt2AssetsRatio<50 else 0 #14.PER(越低)=15 score14 = -1 PEScore=[40, 30, 25, 20, 15, 12, 10] try: PE = float(self.DQ.loc[(code, currDate), 'PER']) PER = [x<PE for x in PEScore] for i in range(len(PER)): if PER[i]: score14 = i*2.5 break if score14<0: score14 = 15 except: PE = 0 score14 = 0 #15.股價淨值比(越低)=5 CP = float(self.DQ.loc[(code, currDate), 'CP']) CD = float(self.DIV.loc[(code, thisYYYY), 'CD']) RNper = float(self.BS.loc[(code, thisSSNYYYY, thisSSN), 'RNper']) score15 = -1 PBScore=[8.5, 6, 4, 2.5, 1.5, 1] if RNper == 0: PB = -1 score15 = 0 else: PB = CP/RNper PBR = [x<PB for x in PBScore] for i in range(len(PBR)): if PBR[i]: score15 = i*2.5 break if score15 < 0: score15 = 5 #16.CD殖利率(越高)=10 score16 = -1 DividendYieldScore = [0, 1, 2, 2.5, 3, 4, 5, 6, 8, 10] DividendYield = 0 if CP == 0 else CD/CP DividendYieldR = [x>DividendYield for x in DividendYieldScore] for i in range(len(DividendYieldR)): if DividendYieldR[i]: score16 = (i-1)*2.5 break if score16<0: score16 = 10 score = score1+score2+score3+score4+score5+score6+score7+score8+score9+score10+score11+score12+score13+score14+score15+score16 index.append([code, currDate]) #當日, 當月, 當季, 當年 data.append([thisYYMM, str(thisSSNYYYY)+'/'+str(thisSSN), thisYYYY, self.ComInfo.loc[code,'Com'],self.ComInfo.loc[code,'IC'],MonthMoM,MonthYAYoY, CumYAYoY, GrossMarginQoQ, GrossMarginYoY, OperatingProfitQoQ, OperatingProfitYoY, CURR, Debt2AssetsRatio, PE, PB, DividendYield, score]) except: continue if len(data)>0: dfValueStockScore = pd.DataFrame(data = data, index = pd.MultiIndex.from_tuples(index), columns=['thisYYMM', 'thisSSN', 'thisYYYY', 'Com', 'IC', 'MonthMoM', 'MonthYAYoY', 'CumYAYoY', 'GrossMarginQoQ','GrossMarginYoY','OperatingProfitQoQ','OperatingProfitYoY','CURR','Debt2AssetsRatio','PE','PB', 'DividendYield','TWValueScore']) dfValueStockScore.index.set_names(['Ticker', 'Date'], inplace=True) return dfValueStockScore