def synzcb_loan (self,startDate): today = PageUtils.getCurrentDate() startDate = PageUtils.delayed(today,-1) startDate2 = PageUtils.delayed(today,-2) DB = dataBaseOperator() sql = ''' insert into zcb_loan_state (productid) select productid from zcb_loan where productid in ( select productid from zcb_loan_process where createDate >= \''''+str(startDate2)+'''\' and createDate < \''''+str(startDate)+'''\' and productid not in (select productid from zcb_loan_process where createDate >= \''''+str(startDate)+'''\' and createDate < \''''+str(today)+'''\')) and state != '已下架' and state !='已售罄' and productid not in (select productid from zcb_loan_state ) ''' print sql print "exe row -->",DB.execute(sql) #DB.createLog({"type":"loan_synState","msg":"个人企业贷同步"+str(startDate)+"的数据成功"})# DB.dataBaseClose() return
def processData(self, productid, data, tablename): # zcb_insu_process # 今天的日期 格式 yyyy-mm-dd startData = PageUtils.getCurrentDate() endData = PageUtils.delayed(startData, 1) currentTime_str = PageUtils.getCurrentTime() # time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) db = DataBase() query_sql = ( "select * from " + tablename + " where productId = '" + productid + "' and createDate < '" + endData + "' and createDate>='" + startData + "'" ) # print "processData query_sql-->"+query_sql query_result = db.execute(query_sql) data["updateDate"] = currentTime_str if len(query_result) < 1: # 当天没有数据,执行插入 # zcb_insu_process data["createDate"] = currentTime_str sql = db.parseInsert(tablename, data) # print "processData-->insert sql==>"+sql else: id = query_result[0]["id"] # print id sql = db.parseUpdate(tablename, data, "id='" + str(id) + "'") # print "processData-->update sql==>"+sql db.execute(sql) db.dataBaseClose()
select productid from zcb_loan where productid in ( select productid from zcb_loan_process where createDate >= \''''+str(startDate2)+'''\' and createDate < \''''+str(startDate)+'''\' and productid not in (select productid from zcb_loan_process where createDate >= \''''+str(startDate)+'''\' and createDate < \''''+str(today)+'''\')) and state != '已下架' and state !='已售罄' and productid not in (select productid from zcb_loan_state ) ''' print sql print "exe row -->",DB.execute(sql) #DB.createLog({"type":"loan_synState","msg":"个人企业贷同步"+str(startDate)+"的数据成功"})# DB.dataBaseClose() return if __name__ == '__main__': data_obj = data_1Week() #data_obj.dropTables() #data_obj.synzcb("2015-09-21") print PageUtils.getCurrentDate() data_obj.synzcb_loan("2015-11-17")#PageUtils.getCurrentDate() #data_obj.createTable() ''' startDate = "" t_startDate = time.strptime(startDate, "%Y-%m-%d") d_startDate = datetime.datetime(* t_startDate[:6]) d_endDate = d_startDate + datetime.timedelta(days=7) s = str(time.strftime("%Y%m%d", t_startDate)) e=datetime.datetime.strftime((d_endDate - datetime.timedelta(days=1)), '%Y%m%d') print d_startDate,"==",d_endDate,"--",s,"***",e
class CrawlerHome: def __init__(self): reload(sys) sys.setdefaultencoding("utf8") # @UndefinedVariable self.pageUtils = PageUtils() def updateData(self, data): _main = data.get("m") _slaves = data.get("s") main = {} slave = {} print _main print _slaves db = DataBase() today = self.pageUtils.getCurrentDate() tomorrow = self.pageUtils.delayed(today, -1) # 取昨天的数据 sql = "select * from zcb_report_master where date(createdate)='" + str(tomorrow) + "'" tomorrow_main = db.execute(sql) if len(tomorrow_main) > 0: tomorrow_main = tomorrow_main[0] print "sql==>", sql for _d in _main: print "-->", _main.get(_d), "==", tomorrow_main.get(_d) if _d == "yycjjebl": main[_d] = _main.get(_d) else: if tomorrow_main.get(_d) == None or _main.get(_d) > tomorrow_main.get(_d): main[_d] = _main.get(_d) # 今天是否有数据 sql = "select * from zcb_report_master where date(createdate)='" + str(today) + "'" today_main = db.execute(sql) if len(today_main) < 1: # 主表数据插入 sql = db.parseInsert("zcb_report_master", main) print "insert master sql -->", sql else: today_main = today_main[0] sql = db.parseUpdate("zcb_report_master", main, " id = " + str(today_main.get("id"))) print "update master sql -->", sql db.execute(sql) else: sql = "select * from zcb_report_master where date(createdate)='" + str(today) + "'" today_main = db.execute(sql) if len(today_main) < 1: sql = db.parseInsert("zcb_report_master", _main) print "insert master sql -->", sql db.execute(sql) else: today_main = today_main[0] sql = db.parseUpdate("zcb_report_master", _main, "id=" + str(today_main.get("id"))) print "update master sql -->", sql db.execute(sql) # 处理从表数据 for _slave in _slaves: sql = ( "select * from zcb_report_slave where date(createdate)='" + str(tomorrow) + "' and type='" + _slave.get("type") + "' and tzqx = '" + _slave.get("tzqx") + "'" ) print sql tomorrow_slave = db.execute(sql) if len(tomorrow_slave) > 0: tomorrow_slave = tomorrow_slave[0] slave = {} slave["type"] = _slave.get("type") slave["tzqx"] = _slave.get("tzqx") for _d in _slave: print "-->", _slave.get(_d), "==", tomorrow_slave.get(_d) if tomorrow_slave.get(_d) == None or _slave.get(_d) != tomorrow_slave.get(_d): print _d, "==>", _slave.get(_d), "!!!!!", tomorrow_slave.get(_d) slave[_d] = _slave.get(_d) if len(slave) < 1: continue print "slave-->", slave sql = ( "select * from zcb_report_slave where date(createdate)='" + str(today) + "' and type='" + _slave.get("type") + "' and tzqx = '" + _slave.get("tzqx") + "'" ) today_slave = db.execute(sql) if len(today_slave) < 1: # insert sql = db.parseInsert("zcb_report_slave", slave) print "insert slave sql -->", sql else: # update today_slave = today_slave[0] sql = db.parseUpdate("zcb_report_slave", slave, " id = " + str(today_slave.get("id"))) print "update slave sql -->", sql db.execute(sql) else: sql = ( "select * from zcb_report_slave where date(createdate)='" + str(today) + "' and type='" + _slave.get("type") + "' and tzqx = '" + _slave.get("tzqx") + "'" ) today_slave = db.execute(sql) if len(today_slave) < 1: sql = db.parseInsert("zcb_report_slave", _slave) print "insert slave sql -->", sql db.execute(sql) else: today_slave = today_slave[0] sql = db.parseUpdate("zcb_report_slave", _slave, "id = " + str(today_slave.get("id"))) print "update slave sql -->", sql db.execute(sql) def crawlerTest(self): url = "https://zhaocaibao.alipay.com/pf/purchase.htm?productId=20150821000230020000680048696668" self.pageUtils.url = url self.pageUtils.login("13651781949", "lufax123") sleep(10) url = "https://zhaocaibao.alipay.com/pf/purchase.htm?productId=20151009000230020000280058270528" self.pageUtils.browser.get(url) def crawler(self): url = "https://zhaocaibao.alipay.com/pf/productList.htm" browser = self.pageUtils.startBrowser() browser.get(url) print browser.title a = self.parsePage_home(browser) url = "https://cmspromo.alipay.com/finance/fullyear.htm" browser.get(url) print browser.title b = self.parsePage_finance(browser) b.update() a.get("m").update(b) # print a.get("s") # print b self.updateData(a) # d = self.pageUtils.downloadPage(url) # browser.find_element("", "").get_attribute(name) # print "==>",browser.find_element_by_class_name("data-box").text() browser.quit() def parsePage_home(self, page): result = {} result_m = {} result["m"] = result_m # print soup.title # #平台成交金额 cjjes = page.find_elements_by_class_name("data-box") c = "" for cjje in cjjes: # cjje += cjje.get_attribute("class") c += cjje.text result_m["cjje"] = c yycjje = page.find_element_by_class_name("week-book-data") result_m["yycjje"] = self.clearNumber(yycjje.text) yycjjebl = page.find_element_by_class_name("book-rate-data") result_m["yycjjebl"] = yycjjebl.text grqyds = page.find_elements_by_css_selector('div[class="several-months fn-clear"]') i = 0 qixis = ["3", "3-6", "6-12", "12-24", "24"] result_s_list = [] result["s"] = result_s_list for grqyd in grqyds: result_s_map = {"type": "个人企业贷"} result_s_list.append(result_s_map) result_s_map["tzqx"] = qixis[i] aa = grqyd.find_element_by_css_selector('div[class="product-book fn-clear"]') zg = aa.find_element_by_class_name("content-third-type") # 总共 result_s_map["zgje"] = self.clearNumber(zg.text) yylilv = aa.find_element_by_class_name("content-second-type") # 预约利率 result_s_map["yylilv"] = yylilv.text print "yylilv-->", yylilv.text try: bb = grqyd.find_element_by_css_selector('div[class="product-buy fn-clear"]') gm = bb.find_element_by_class_name("content-third-type") # 购买 result_s_map["gmje"] = self.clearNumber(gm.text) gmlilv = bb.find_element_by_css_selector('li[class="w145 buy-product-rate"]') # 购买利率 result_s_map["gmlilv"] = gmlilv.text print "gmlilv-->", gmlilv.text except Exception, e: print e i += 1 # .find_element_by_class_name("content-third-type")#购买 # print zg,gm # print result return result
and state != '已下架' and state !='已售罄' ''' print sql print "exe row -->",DB.execute(sql) #DB.createLog({"type":"loan_synState","msg":"个人企业贷同步"+str(startDate)+"的数据成功"})# DB.dataBaseClose() return if __name__ == '__main__': data_obj = SynLoanData() #data_obj.dropTables() #data_obj.synzcb("2015-09-21") # print PageUtils.getCurrentDate() #data_obj.synzcb_loan("2015-10-17")#PageUtils.getCurrentDate() data_obj.synzcb_loan(PageUtils.getCurrentDate()) #data_obj.createTable() ''' startDate = "" t_startDate = time.strptime(startDate, "%Y-%m-%d") d_startDate = datetime.datetime(* t_startDate[:6]) d_endDate = d_startDate + datetime.timedelta(days=7) s = str(time.strftime("%Y%m%d", t_startDate)) e=datetime.datetime.strftime((d_endDate - datetime.timedelta(days=1)), '%Y%m%d') print d_startDate,"==",d_endDate,"--",s,"***",e '''