def synzcb_loan (self,startDate): today = PageUtils.getCurrentDate() startDate = PageUtils.delayed(today,-1) startDate2 = PageUtils.delayed(today,-2) DB = dataBaseOperator() sql = ''' insert into zcb_loan_state (productid) select productid from zcb_loan where productid in ( select productid from zcb_loan_process where createDate >= \''''+str(startDate2)+'''\' and createDate < \''''+str(startDate)+'''\' and productid not in (select productid from zcb_loan_process where createDate >= \''''+str(startDate)+'''\' and createDate < \''''+str(today)+'''\')) and state != '已下架' and state !='已售罄' and productid not in (select productid from zcb_loan_state ) ''' print sql print "exe row -->",DB.execute(sql) #DB.createLog({"type":"loan_synState","msg":"个人企业贷同步"+str(startDate)+"的数据成功"})# DB.dataBaseClose() return
def processData(self, productid, data, tablename): # zcb_insu_process # 今天的日期 格式 yyyy-mm-dd startData = PageUtils.getCurrentDate() endData = PageUtils.delayed(startData, 1) currentTime_str = PageUtils.getCurrentTime() # time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) db = DataBase() query_sql = ( "select * from " + tablename + " where productId = '" + productid + "' and createDate < '" + endData + "' and createDate>='" + startData + "'" ) # print "processData query_sql-->"+query_sql query_result = db.execute(query_sql) data["updateDate"] = currentTime_str if len(query_result) < 1: # 当天没有数据,执行插入 # zcb_insu_process data["createDate"] = currentTime_str sql = db.parseInsert(tablename, data) # print "processData-->insert sql==>"+sql else: id = query_result[0]["id"] # print id sql = db.parseUpdate(tablename, data, "id='" + str(id) + "'") # print "processData-->update sql==>"+sql db.execute(sql) db.dataBaseClose()
class CrawlerHome: def __init__(self): reload(sys) sys.setdefaultencoding("utf8") # @UndefinedVariable self.pageUtils = PageUtils() def updateData(self, data): _main = data.get("m") _slaves = data.get("s") main = {} slave = {} print _main print _slaves db = DataBase() today = self.pageUtils.getCurrentDate() tomorrow = self.pageUtils.delayed(today, -1) # 取昨天的数据 sql = "select * from zcb_report_master where date(createdate)='" + str(tomorrow) + "'" tomorrow_main = db.execute(sql) if len(tomorrow_main) > 0: tomorrow_main = tomorrow_main[0] print "sql==>", sql for _d in _main: print "-->", _main.get(_d), "==", tomorrow_main.get(_d) if _d == "yycjjebl": main[_d] = _main.get(_d) else: if tomorrow_main.get(_d) == None or _main.get(_d) > tomorrow_main.get(_d): main[_d] = _main.get(_d) # 今天是否有数据 sql = "select * from zcb_report_master where date(createdate)='" + str(today) + "'" today_main = db.execute(sql) if len(today_main) < 1: # 主表数据插入 sql = db.parseInsert("zcb_report_master", main) print "insert master sql -->", sql else: today_main = today_main[0] sql = db.parseUpdate("zcb_report_master", main, " id = " + str(today_main.get("id"))) print "update master sql -->", sql db.execute(sql) else: sql = "select * from zcb_report_master where date(createdate)='" + str(today) + "'" today_main = db.execute(sql) if len(today_main) < 1: sql = db.parseInsert("zcb_report_master", _main) print "insert master sql -->", sql db.execute(sql) else: today_main = today_main[0] sql = db.parseUpdate("zcb_report_master", _main, "id=" + str(today_main.get("id"))) print "update master sql -->", sql db.execute(sql) # 处理从表数据 for _slave in _slaves: sql = ( "select * from zcb_report_slave where date(createdate)='" + str(tomorrow) + "' and type='" + _slave.get("type") + "' and tzqx = '" + _slave.get("tzqx") + "'" ) print sql tomorrow_slave = db.execute(sql) if len(tomorrow_slave) > 0: tomorrow_slave = tomorrow_slave[0] slave = {} slave["type"] = _slave.get("type") slave["tzqx"] = _slave.get("tzqx") for _d in _slave: print "-->", _slave.get(_d), "==", tomorrow_slave.get(_d) if tomorrow_slave.get(_d) == None or _slave.get(_d) != tomorrow_slave.get(_d): print _d, "==>", _slave.get(_d), "!!!!!", tomorrow_slave.get(_d) slave[_d] = _slave.get(_d) if len(slave) < 1: continue print "slave-->", slave sql = ( "select * from zcb_report_slave where date(createdate)='" + str(today) + "' and type='" + _slave.get("type") + "' and tzqx = '" + _slave.get("tzqx") + "'" ) today_slave = db.execute(sql) if len(today_slave) < 1: # insert sql = db.parseInsert("zcb_report_slave", slave) print "insert slave sql -->", sql else: # update today_slave = today_slave[0] sql = db.parseUpdate("zcb_report_slave", slave, " id = " + str(today_slave.get("id"))) print "update slave sql -->", sql db.execute(sql) else: sql = ( "select * from zcb_report_slave where date(createdate)='" + str(today) + "' and type='" + _slave.get("type") + "' and tzqx = '" + _slave.get("tzqx") + "'" ) today_slave = db.execute(sql) if len(today_slave) < 1: sql = db.parseInsert("zcb_report_slave", _slave) print "insert slave sql -->", sql db.execute(sql) else: today_slave = today_slave[0] sql = db.parseUpdate("zcb_report_slave", _slave, "id = " + str(today_slave.get("id"))) print "update slave sql -->", sql db.execute(sql) def crawlerTest(self): url = "https://zhaocaibao.alipay.com/pf/purchase.htm?productId=20150821000230020000680048696668" self.pageUtils.url = url self.pageUtils.login("13651781949", "lufax123") sleep(10) url = "https://zhaocaibao.alipay.com/pf/purchase.htm?productId=20151009000230020000280058270528" self.pageUtils.browser.get(url) def crawler(self): url = "https://zhaocaibao.alipay.com/pf/productList.htm" browser = self.pageUtils.startBrowser() browser.get(url) print browser.title a = self.parsePage_home(browser) url = "https://cmspromo.alipay.com/finance/fullyear.htm" browser.get(url) print browser.title b = self.parsePage_finance(browser) b.update() a.get("m").update(b) # print a.get("s") # print b self.updateData(a) # d = self.pageUtils.downloadPage(url) # browser.find_element("", "").get_attribute(name) # print "==>",browser.find_element_by_class_name("data-box").text() browser.quit() def parsePage_home(self, page): result = {} result_m = {} result["m"] = result_m # print soup.title # #平台成交金额 cjjes = page.find_elements_by_class_name("data-box") c = "" for cjje in cjjes: # cjje += cjje.get_attribute("class") c += cjje.text result_m["cjje"] = c yycjje = page.find_element_by_class_name("week-book-data") result_m["yycjje"] = self.clearNumber(yycjje.text) yycjjebl = page.find_element_by_class_name("book-rate-data") result_m["yycjjebl"] = yycjjebl.text grqyds = page.find_elements_by_css_selector('div[class="several-months fn-clear"]') i = 0 qixis = ["3", "3-6", "6-12", "12-24", "24"] result_s_list = [] result["s"] = result_s_list for grqyd in grqyds: result_s_map = {"type": "个人企业贷"} result_s_list.append(result_s_map) result_s_map["tzqx"] = qixis[i] aa = grqyd.find_element_by_css_selector('div[class="product-book fn-clear"]') zg = aa.find_element_by_class_name("content-third-type") # 总共 result_s_map["zgje"] = self.clearNumber(zg.text) yylilv = aa.find_element_by_class_name("content-second-type") # 预约利率 result_s_map["yylilv"] = yylilv.text print "yylilv-->", yylilv.text try: bb = grqyd.find_element_by_css_selector('div[class="product-buy fn-clear"]') gm = bb.find_element_by_class_name("content-third-type") # 购买 result_s_map["gmje"] = self.clearNumber(gm.text) gmlilv = bb.find_element_by_css_selector('li[class="w145 buy-product-rate"]') # 购买利率 result_s_map["gmlilv"] = gmlilv.text print "gmlilv-->", gmlilv.text except Exception, e: print e i += 1 # .find_element_by_class_name("content-third-type")#购买 # print zg,gm # print result return result
def tjDay(self,startDate): #startDate = "2015-09-13" startDate = startDate[:10] #print "startDate-->",startDate[:10] days = 1 endDate = str(PageUtils.delayed(startDate[:10],days)) #相隔的天数 tiqianDay = -1 tiqianStartDate = str(PageUtils.delayed(startDate,tiqianDay)) tiqianendDate = str(PageUtils.delayed(endDate[:10],tiqianDay)) print startDate,":",endDate,"====",tiqianStartDate,":",tiqianendDate db = dataBaseOperator() f=file("report/loan_"+str(startDate)+".txt","w+") #main_tablename = "zcb_insu" #slave_tablename = "zcb_insu_process" main_tablename = "zcb_loan"#,"zcb_insu","zcb_others" head = "产品类型,投资期限,产品数量(昨天),产品数量(今天),笔数(今天),增加笔数,最小利率,最大利率,起投金额,金额增长" slave_tablename = main_tablename+"_process" sql = ''' SELECT projectType, period, count(*)as total, sum( bishu_today )as bishu_today , sum( addedTransNumber ) as bishu , min( annualRate ) as min_lilv, max( annualRate ) as max_lilv , min( amountAtLeast ) as min_qitou, max( amountAtLeast ) as max_qitou, sum(jezz) as jezz,'已下架' as type FROM ( SELECT productid, CASE WHEN period <=90 THEN '03' WHEN period >90 AND period <=180 THEN '03-06' WHEN period >180 AND period <=365 THEN '06-12' WHEN period >365 AND period <=730 THEN '12-24' WHEN period >730 THEN '24+' END AS period, projectType, addedTransNumber,amountAtLeast,annualRate,bishu_today,jezz FROM ( SELECT productid, case when LOCATE('天',period)>0 then cast(SUBSTRING_INDEX( period, '天', 1 ) as signed) when LOCATE('年',period)>0 then cast(SUBSTRING_INDEX( period, '年', 1 ) as signed)*365 else null end as period, projectType, addedTransNumber,amountAtLeast,annualRate,bishu_today,jezz FROM ( select l1.productid, l1.projectType, l1.period, l1.TransNumber addedTransNumber,l1.TransNumber bishu_today,l1.annualRate, cast( SUBSTRING_INDEX( l1.amountAtLeast, '元起', 1 ) AS signed ) AS amountAtLeast, l1.productScale_yuan as jezz from zcb_loan l1 where state = '已下架' and endSellDate >= \''''+str(startDate)+'''\' and endSellDate < \''''+str(endDate)+'''\' )a )b )c GROUP BY projectType, period ''' #and createDate >= \''''+str(startDate)+'''\' and createDate < \''''+str(endDate)+'''\' 已售罄 已下架 print sql maps = db.execute(sql) for map in maps : map["startDate"] = startDate map["endDate"] = endDate sql = db.parseInsert("zcb_loan_report", map) print sql db.execute(sql) f.writelines(head) for map in maps : result = "\n"+str(map["projectType"])+","+\ str(map["period"])+"个月,0,"+\ str(map["total"])\ +","+str(map["bishu_today"])\ +","+str(map["bishu"])\ +","+str(map["min_lilv"])\ +","+str(map["max_lilv"])\ +","+str(map["min_qitou"]) +"-"+str(map["max_qitou"])\ +","+str(map["jezz"]) ''' result.append(map["projectType"]) result.append(map["period"]) result.append(0) result.append(map["total"]) result.append(map["bishu_today"]) result.append(map["bishu"]) result.append(map["min_lilv"]) result.append(map["max_lilv"]) result.append(str(map["min_qitou"]) +"-"+str(map["max_qitou"]))''' f.write(result) f.write("\n\n\n") f.flush() f.close()
def tjDay(self, startDate): # startDate = "2015-09-13" startDate = startDate[:10] # print "startDate-->",startDate[:10] days = 1 endDate = str(PageUtils.delayed(startDate[:10], days)) # 相隔的天数 tiqianDay = -1 tiqianStartDate = str(PageUtils.delayed(startDate, tiqianDay)) tiqianendDate = str(PageUtils.delayed(endDate[:10], tiqianDay)) print startDate, ":", endDate, "====", tiqianStartDate, ":", tiqianendDate reload(sys) sys.setdefaultencoding("utf8") # @UndefinedVariable db = dataBaseOperator() f = file("report/" + str(startDate) + ".txt", "w+") # main_tablename = "zcb_insu" # slave_tablename = "zcb_insu_process" tablename = ["zcb_loan", "zcb_insu", "zcb_others"] # "zcb_loan","zcb_insu","zcb_others" head = "产品类型,投资期限,产品数量(昨天),产品数量(今天),笔数(今天),增加笔数,最小利率,最大利率,起投金额,金额增长" for main_tablename in tablename: slave_tablename = main_tablename + "_process" sql = ( """ SELECT projectType, period, count(*)as total, sum( bishu_today )as bishu_today , sum( addedTransNumber ) as bishu , min( annualRate ) as min_lilv, max( annualRate ) as max_lilv , min( amountAtLeast ) as min_qitou, max( amountAtLeast ) as max_qitou, sum(jezz) as jezz FROM ( SELECT productid, CASE WHEN period <=90 THEN '03' WHEN period >90 AND period <=180 THEN '03-06' WHEN period >180 AND period <=365 THEN '06-12' WHEN period >365 AND period <=730 THEN '12-24' WHEN period >730 THEN '24+' END AS period, projectType, addedTransNumber,amountAtLeast,annualRate,bishu_today,jezz FROM ( SELECT productid, case when LOCATE('天',period)>0 then cast(SUBSTRING_INDEX( period, '天', 1 ) as signed) when LOCATE('年',period)>0 then cast(SUBSTRING_INDEX( period, '年', 1 ) as signed)*365 else null end as period, projectType, addedTransNumber,amountAtLeast,annualRate,bishu_today,jezz FROM ( select p.productid, i.projectType, i.period, p.bishu as 'addedTransNumber',p.bishu_today,i.annualRate, cast( SUBSTRING_INDEX( amountAtLeast, '元起', 1 ) AS signed ) AS amountAtLeast, (i.productScale_yuan*wanchenglv/100)as jezz from ( select a1.productid , (IFNULL(a1.bishu,0)- IFNULL(a2.bishu,0))as bishu ,IFNULL(a1.bishu,0)as bishu_today,(a1.wanchenglv-a2.wanchenglv)as wanchenglv from (select productid,bishu,IFNULL(b1.wanchenglv,0)as wanchenglv from `""" + slave_tablename + """` b1 where createDate >= \'""" + startDate + """\' and createDate < \'""" + endDate + """\')a1 left join (select productid,bishu,IFNULL(b2.wanchenglv,0)as wanchenglv from `""" + slave_tablename + """` b2 where createDate >= \'""" + tiqianStartDate + """\' and createDate < \'""" + tiqianendDate + """\')a2 on a1.productid = a2.productid ) p left join """ + main_tablename + """ i on p.productid = i.productid )c )a )b GROUP BY projectType, period """ ) print sql maps = db.execute(sql) f.writelines(head) for map in maps: result = ( "\n" + str(map["projectType"]) + "," + str(map["period"]) + "个月,0," + str(map["total"]) + "," + str(map["bishu_today"]) + "," + str(map["bishu"]) + "," + str(map["min_lilv"]) + "," + str(map["max_lilv"]) + "," + str(map["min_qitou"]) + "-" + str(map["max_qitou"]) + "," + str(map["jezz"]) ) """ result.append(map["projectType"]) result.append(map["period"]) result.append(0) result.append(map["total"]) result.append(map["bishu_today"]) result.append(map["bishu"]) result.append(map["min_lilv"]) result.append(map["max_lilv"]) result.append(str(map["min_qitou"]) +"-"+str(map["max_qitou"]))""" f.write(result) f.write("\n\n\n") f.flush() f.close()