def parsePage(self, page): soup = BeautifulSoup(page) result = {} try : product_name = soup.find('p', {'class':'product-name'}).get_text() product_name = PageUtils.replaceString(product_name) product_name = product_name.split(' ') product_name = product_name[0] result["productName"] = product_name except Exception, e: print "getLoanDetailsInfo-->product_name==>", e if self.is_checkPage(soup) : return False
def getListInfos(self, page): table = [] soup = BeautifulSoup(page) # print soup productTag_list = soup.find_all("ul", {"class": re.compile("icontent-ul*")}) # print productTag_list for productTag in productTag_list: try: productId = productTag["productid"] liTag_list = productTag.find_all("li") annualRate = liTag_list[0].find("span").get_text() period = liTag_list[1].find("span", {"class": "year"}).get_text() period = PageUtils.replaceString(period) projectType = PageUtils.replaceString(liTag_list[2].get_text()) amountAtLeast = PageUtils.replaceString(liTag_list[3].get_text()) amountAtLeast = amountAtLeast[:-2] transNumber = PageUtils.replaceString(liTag_list[4].get_text()) transNumber = transNumber[:-1] ## Need delete # print annualRate, period, productType, amountAtLeast, transNumber mapData = { "productId": productId, "annualRate": annualRate, "period": period, "projectType": projectType, "amountAtLeast": amountAtLeast, "transNumber": transNumber, } table.append(mapData) # print List except Exception, e: print "getListInfos-->", e
soup = BeautifulSoup(page) result = {} try : product_name = soup.find('p', {'class':'product-name'}).get_text() product_name = PageUtils.replaceString(product_name) product_name = product_name.split(' ') product_name = product_name[0] result["productName"] = product_name except Exception, e: print "getLoanDetailsInfo-->product_name==>", e if self.is_checkPage(soup) : return False try : guaranteeCompany = soup.find('p', {'class': 'purchase-safe-info-text'}).get_text() guaranteeCompany = PageUtils.replaceString(guaranteeCompany) result["guaranteeCompany"] = guaranteeCompany except: guaranteeCompany = "notFound" # print guaranteeCompany # # total amount of product try : product_infos_list = soup.find_all('div', {'class':'product-info-detail-item'}) totalAmount = product_infos_list[0].find_all('p')[1].get_text() o = PageUtils.clearTotalAmount(str(totalAmount)) print "totalAmount-->", totalAmount, "=========", o totalAmount = o result["productScale_yuan"] = totalAmount except: totalAmount = "notFound"