def crawl(self): _pageSource = self.navigation() # read desired values self.Volume = self.extractUlLi(_pageSource, "Volume") yearRange = self.extractUlLi(_pageSource, "52Wk") # Expected format Low-high if '-' in yearRange: self.YearHigh = tools.cast(yearRange.split('-')[1].strip(), float) self.YearLow = tools.cast(yearRange.split('-')[0].strip(), float) PE_EPS = self.extractUlLi(_pageSource, "P/E") # expected format P/E (EPS) if '(' in PE_EPS: self.PE = tools.cast(PE_EPS.split('(')[0].strip(), float) self.EPS = tools.cast( PE_EPS.split('(')[1].replace(')', '').strip(), float) self.Price = self.readPrice(_pageSource) return
def readPagingLength(self, pageSource): # read the pagination section which is represented as a ul HTML element # with nav-page class name _soup = soup.Helper() _block = _soup.elemSelector("ul", {"class": "nav-page"}, pageSource) # expected format # <ul> # <li> <button value="page number"> # ... # </ul> _max = 0 _pageNum = 0 # find the biggest paging number for _li in _block.find_all("li"): _btn = _li.find("button") if _btn is not None: _pageNum = tools.cast(_btn["value"], int, 0) if _pageNum > _max: _max = _pageNum self.PAGINGLENGTH = _max return
def isMyFinancialReport(self, financialObj): # reported quarterly or yearly data belong to the last quarter or year _myDate = tools.cast(self.Date, datetime) if type(_myDate) != datetime: print("Date format is incorrect") return False # define paterns regex_quarter = r"(^\d{4}) ([Q][1-4]$)" # Year Q[1-4] if re.search(regex_quarter, financialObj.Period): # check if the financial report is maximum 3 month older than the price date _year = financialObj.Period[:4] _quarter = financialObj.Period[4:].strip() if _quarter.lower() == "q1": return str(_myDate.year) == _year and _myDate.month - 3 <= 6 elif _quarter.lower() == "q2": return str(_myDate.year) == _year and _myDate.month - 6 <= 6 elif _quarter.lower() == "q3": return str(_myDate.year - 1) == _year and _myDate.month - 9 <= 6 elif _quarter.lower() == "q4": return str(_myDate.year - 1) == _year and 12 - _myDate.month >= 6 return False
def readPrice(self, pageSource): _soup = soup.Helper() _block = _soup.convertToSoup(pageSource) _priceElem = _block.find("span", {"class": "currentval"}) return tools.cast(_priceElem.text, float)
def setPPEBV(self): self.PPEBV = round( tools.cast(self.PBV, float, 0) * tools.cast(self.PriceInfo.PE, float, 0))
def setPBV(self): if self.BookValue == None: return self.PBV = round((tools.cast(self.PriceInfo.Price, float, 0) / tools.cast(self.BookValue, float, 1)), 2) return