class CrawlerHome: def __init__(self): reload(sys) sys.setdefaultencoding("utf8") # @UndefinedVariable self.pageUtils = PageUtils() def updateData(self, data): _main = data.get("m") _slaves = data.get("s") main = {} slave = {} print _main print _slaves db = DataBase() today = self.pageUtils.getCurrentDate() tomorrow = self.pageUtils.delayed(today, -1) # 取昨天的数据 sql = "select * from zcb_report_master where date(createdate)='" + str(tomorrow) + "'" tomorrow_main = db.execute(sql) if len(tomorrow_main) > 0: tomorrow_main = tomorrow_main[0] print "sql==>", sql for _d in _main: print "-->", _main.get(_d), "==", tomorrow_main.get(_d) if _d == "yycjjebl": main[_d] = _main.get(_d) else: if tomorrow_main.get(_d) == None or _main.get(_d) > tomorrow_main.get(_d): main[_d] = _main.get(_d) # 今天是否有数据 sql = "select * from zcb_report_master where date(createdate)='" + str(today) + "'" today_main = db.execute(sql) if len(today_main) < 1: # 主表数据插入 sql = db.parseInsert("zcb_report_master", main) print "insert master sql -->", sql else: today_main = today_main[0] sql = db.parseUpdate("zcb_report_master", main, " id = " + str(today_main.get("id"))) print "update master sql -->", sql db.execute(sql) else: sql = "select * from zcb_report_master where date(createdate)='" + str(today) + "'" today_main = db.execute(sql) if len(today_main) < 1: sql = db.parseInsert("zcb_report_master", _main) print "insert master sql -->", sql db.execute(sql) else: today_main = today_main[0] sql = db.parseUpdate("zcb_report_master", _main, "id=" + str(today_main.get("id"))) print "update master sql -->", sql db.execute(sql) # 处理从表数据 for _slave in _slaves: sql = ( "select * from zcb_report_slave where date(createdate)='" + str(tomorrow) + "' and type='" + _slave.get("type") + "' and tzqx = '" + _slave.get("tzqx") + "'" ) print sql tomorrow_slave = db.execute(sql) if len(tomorrow_slave) > 0: tomorrow_slave = tomorrow_slave[0] slave = {} slave["type"] = _slave.get("type") slave["tzqx"] = _slave.get("tzqx") for _d in _slave: print "-->", _slave.get(_d), "==", tomorrow_slave.get(_d) if tomorrow_slave.get(_d) == None or _slave.get(_d) != tomorrow_slave.get(_d): print _d, "==>", _slave.get(_d), "!!!!!", tomorrow_slave.get(_d) slave[_d] = _slave.get(_d) if len(slave) < 1: continue print "slave-->", slave sql = ( "select * from zcb_report_slave where date(createdate)='" + str(today) + "' and type='" + _slave.get("type") + "' and tzqx = '" + _slave.get("tzqx") + "'" ) today_slave = db.execute(sql) if len(today_slave) < 1: # insert sql = db.parseInsert("zcb_report_slave", slave) print "insert slave sql -->", sql else: # update today_slave = today_slave[0] sql = db.parseUpdate("zcb_report_slave", slave, " id = " + str(today_slave.get("id"))) print "update slave sql -->", sql db.execute(sql) else: sql = ( "select * from zcb_report_slave where date(createdate)='" + str(today) + "' and type='" + _slave.get("type") + "' and tzqx = '" + _slave.get("tzqx") + "'" ) today_slave = db.execute(sql) if len(today_slave) < 1: sql = db.parseInsert("zcb_report_slave", _slave) print "insert slave sql -->", sql db.execute(sql) else: today_slave = today_slave[0] sql = db.parseUpdate("zcb_report_slave", _slave, "id = " + str(today_slave.get("id"))) print "update slave sql -->", sql db.execute(sql) def crawlerTest(self): url = "https://zhaocaibao.alipay.com/pf/purchase.htm?productId=20150821000230020000680048696668" self.pageUtils.url = url self.pageUtils.login("13651781949", "lufax123") sleep(10) url = "https://zhaocaibao.alipay.com/pf/purchase.htm?productId=20151009000230020000280058270528" self.pageUtils.browser.get(url) def crawler(self): url = "https://zhaocaibao.alipay.com/pf/productList.htm" browser = self.pageUtils.startBrowser() browser.get(url) print browser.title a = self.parsePage_home(browser) url = "https://cmspromo.alipay.com/finance/fullyear.htm" browser.get(url) print browser.title b = self.parsePage_finance(browser) b.update() a.get("m").update(b) # print a.get("s") # print b self.updateData(a) # d = self.pageUtils.downloadPage(url) # browser.find_element("", "").get_attribute(name) # print "==>",browser.find_element_by_class_name("data-box").text() browser.quit() def parsePage_home(self, page): result = {} result_m = {} result["m"] = result_m # print soup.title # #平台成交金额 cjjes = page.find_elements_by_class_name("data-box") c = "" for cjje in cjjes: # cjje += cjje.get_attribute("class") c += cjje.text result_m["cjje"] = c yycjje = page.find_element_by_class_name("week-book-data") result_m["yycjje"] = self.clearNumber(yycjje.text) yycjjebl = page.find_element_by_class_name("book-rate-data") result_m["yycjjebl"] = yycjjebl.text grqyds = page.find_elements_by_css_selector('div[class="several-months fn-clear"]') i = 0 qixis = ["3", "3-6", "6-12", "12-24", "24"] result_s_list = [] result["s"] = result_s_list for grqyd in grqyds: result_s_map = {"type": "个人企业贷"} result_s_list.append(result_s_map) result_s_map["tzqx"] = qixis[i] aa = grqyd.find_element_by_css_selector('div[class="product-book fn-clear"]') zg = aa.find_element_by_class_name("content-third-type") # 总共 result_s_map["zgje"] = self.clearNumber(zg.text) yylilv = aa.find_element_by_class_name("content-second-type") # 预约利率 result_s_map["yylilv"] = yylilv.text print "yylilv-->", yylilv.text try: bb = grqyd.find_element_by_css_selector('div[class="product-buy fn-clear"]') gm = bb.find_element_by_class_name("content-third-type") # 购买 result_s_map["gmje"] = self.clearNumber(gm.text) gmlilv = bb.find_element_by_css_selector('li[class="w145 buy-product-rate"]') # 购买利率 result_s_map["gmlilv"] = gmlilv.text print "gmlilv-->", gmlilv.text except Exception, e: print e i += 1 # .find_element_by_class_name("content-third-type")#购买 # print zg,gm # print result return result