def login (self): user = self.g_users[self.g_index] username = user.get("username") password = user.get("password") url = "https://zhaocaibao.alipay.com/pf/purchase.htm?productId=20150821000230020000680048696668" login = {"username":username, "password":password, "username_input":"//input[@id='J-input-user']", "password_input":"//*[@id='password_input']", "submit_but":"//*[@id='J-login-btn']", "check_code":{"check_code_input":"//*[@id='J-input-checkcode']", "img_tag":"//*[@id='J-checkcode-img']", "img_path":"/home/lufax/img"}, "url":url} f = PageUtils.login(login) if f==False: if self.g_waitFlag>5: sleep(10*60) login() self.g_waitFlag += 1 else: self.browser = f self.opener = PageUtils.build_opener_with_chrome_cookies(self.browser.get_cookies())
def zqProductInfo(self): # 得到个人贷企业贷的有效产品ids datas = self.getProductId() print "len-->" + str(len(datas)) if len(datas) < 1: print "这个范围数据已经爬完,休眠5个小时",PageUtils.getCurrentTime() sleep(60 * 60 * 5) print "休眠结束,开始爬数据",PageUtils.getCurrentTime() index = 1 for data in datas: sleep(6) productId = data["productid"] id = data["id"] index += 1 url = "https://zhaocaibao.alipay.com/pf/purchase.htm?productId=" + productId print "zqProductInfo -->" , url try: # 下载页面 page = PageUtils.downloadPage(self.opener,url) except Exception, e: print "downloadPage err -->", e continue try: p = self.parsePage(page) except Exception, e: print "parsePage err -->", e
def synzcb_loan (self,startDate): today = PageUtils.getCurrentDate() startDate = PageUtils.delayed(today,-1) startDate2 = PageUtils.delayed(today,-2) DB = dataBaseOperator() sql = ''' insert into zcb_loan_state (productid) select productid from zcb_loan where productid in ( select productid from zcb_loan_process where createDate >= \''''+str(startDate2)+'''\' and createDate < \''''+str(startDate)+'''\' and productid not in (select productid from zcb_loan_process where createDate >= \''''+str(startDate)+'''\' and createDate < \''''+str(today)+'''\')) and state != '已下架' and state !='已售罄' and productid not in (select productid from zcb_loan_state ) ''' print sql print "exe row -->",DB.execute(sql) #DB.createLog({"type":"loan_synState","msg":"个人企业贷同步"+str(startDate)+"的数据成功"})# DB.dataBaseClose() return
def parsePage(self, page): soup = BeautifulSoup(page) result = {} try : product_name = soup.find('p', {'class':'product-name'}).get_text() product_name = PageUtils.replaceString(product_name) product_name = product_name.split(' ') product_name = product_name[0] result["productName"] = product_name except Exception, e: print "getLoanDetailsInfo-->product_name==>", e if self.is_checkPage(soup) : return False
def getListInfos(self, page): table = [] soup = BeautifulSoup(page) # print soup productTag_list = soup.find_all("ul", {"class": re.compile("icontent-ul*")}) # print productTag_list for productTag in productTag_list: try: productId = productTag["productid"] liTag_list = productTag.find_all("li") annualRate = liTag_list[0].find("span").get_text() period = liTag_list[1].find("span", {"class": "year"}).get_text() period = PageUtils.replaceString(period) projectType = PageUtils.replaceString(liTag_list[2].get_text()) amountAtLeast = PageUtils.replaceString(liTag_list[3].get_text()) amountAtLeast = amountAtLeast[:-2] transNumber = PageUtils.replaceString(liTag_list[4].get_text()) transNumber = transNumber[:-1] ## Need delete # print annualRate, period, productType, amountAtLeast, transNumber mapData = { "productId": productId, "annualRate": annualRate, "period": period, "projectType": projectType, "amountAtLeast": amountAtLeast, "transNumber": transNumber, } table.append(mapData) # print List except Exception, e: print "getListInfos-->", e
def processData(self, productid, data, tablename): # zcb_insu_process # 今天的日期 格式 yyyy-mm-dd startData = PageUtils.getCurrentDate() endData = PageUtils.delayed(startData, 1) currentTime_str = PageUtils.getCurrentTime() # time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) db = DataBase() query_sql = ( "select * from " + tablename + " where productId = '" + productid + "' and createDate < '" + endData + "' and createDate>='" + startData + "'" ) # print "processData query_sql-->"+query_sql query_result = db.execute(query_sql) data["updateDate"] = currentTime_str if len(query_result) < 1: # 当天没有数据,执行插入 # zcb_insu_process data["createDate"] = currentTime_str sql = db.parseInsert(tablename, data) # print "processData-->insert sql==>"+sql else: id = query_result[0]["id"] # print id sql = db.parseUpdate(tablename, data, "id='" + str(id) + "'") # print "processData-->update sql==>"+sql db.execute(sql) db.dataBaseClose()
def synMainData(self, productid, tablename, data): query_sql = "select * from " + tablename + " where productId = '" + productid + "'" # print "synMainData query_sql-->"+query_sql db = DataBase() query_result = db.execute(query_sql) # currentTime_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) currentTime_str = PageUtils.getCurrentTime() if len(query_result) < 1: # insert data data["createDate"] = currentTime_str data["updateDate"] = currentTime_str insert_sql = db.parseInsert(tablename, data) db.execute(insert_sql) db.dataBaseClose()
select productid from zcb_loan where productid in ( select productid from zcb_loan_process where createDate >= \''''+str(startDate2)+'''\' and createDate < \''''+str(startDate)+'''\' and productid not in (select productid from zcb_loan_process where createDate >= \''''+str(startDate)+'''\' and createDate < \''''+str(today)+'''\')) and state != '已下架' and state !='已售罄' and productid not in (select productid from zcb_loan_state ) ''' print sql print "exe row -->",DB.execute(sql) #DB.createLog({"type":"loan_synState","msg":"个人企业贷同步"+str(startDate)+"的数据成功"})# DB.dataBaseClose() return if __name__ == '__main__': data_obj = data_1Week() #data_obj.dropTables() #data_obj.synzcb("2015-09-21") print PageUtils.getCurrentDate() data_obj.synzcb_loan("2015-11-17")#PageUtils.getCurrentDate() #data_obj.createTable() ''' startDate = "" t_startDate = time.strptime(startDate, "%Y-%m-%d") d_startDate = datetime.datetime(* t_startDate[:6]) d_endDate = d_startDate + datetime.timedelta(days=7) s = str(time.strftime("%Y%m%d", t_startDate)) e=datetime.datetime.strftime((d_endDate - datetime.timedelta(days=1)), '%Y%m%d') print d_startDate,"==",d_endDate,"--",s,"***",e
soup = BeautifulSoup(page) result = {} try : product_name = soup.find('p', {'class':'product-name'}).get_text() product_name = PageUtils.replaceString(product_name) product_name = product_name.split(' ') product_name = product_name[0] result["productName"] = product_name except Exception, e: print "getLoanDetailsInfo-->product_name==>", e if self.is_checkPage(soup) : return False try : guaranteeCompany = soup.find('p', {'class': 'purchase-safe-info-text'}).get_text() guaranteeCompany = PageUtils.replaceString(guaranteeCompany) result["guaranteeCompany"] = guaranteeCompany except: guaranteeCompany = "notFound" # print guaranteeCompany # # total amount of product try : product_infos_list = soup.find_all('div', {'class':'product-info-detail-item'}) totalAmount = product_infos_list[0].find_all('p')[1].get_text() o = PageUtils.clearTotalAmount(str(totalAmount)) print "totalAmount-->", totalAmount, "=========", o totalAmount = o result["productScale_yuan"] = totalAmount except: totalAmount = "notFound"
def __init__(self): reload(sys) sys.setdefaultencoding("utf8") # @UndefinedVariable self.pageUtils = PageUtils()
class CrawlerHome: def __init__(self): reload(sys) sys.setdefaultencoding("utf8") # @UndefinedVariable self.pageUtils = PageUtils() def updateData(self, data): _main = data.get("m") _slaves = data.get("s") main = {} slave = {} print _main print _slaves db = DataBase() today = self.pageUtils.getCurrentDate() tomorrow = self.pageUtils.delayed(today, -1) # 取昨天的数据 sql = "select * from zcb_report_master where date(createdate)='" + str(tomorrow) + "'" tomorrow_main = db.execute(sql) if len(tomorrow_main) > 0: tomorrow_main = tomorrow_main[0] print "sql==>", sql for _d in _main: print "-->", _main.get(_d), "==", tomorrow_main.get(_d) if _d == "yycjjebl": main[_d] = _main.get(_d) else: if tomorrow_main.get(_d) == None or _main.get(_d) > tomorrow_main.get(_d): main[_d] = _main.get(_d) # 今天是否有数据 sql = "select * from zcb_report_master where date(createdate)='" + str(today) + "'" today_main = db.execute(sql) if len(today_main) < 1: # 主表数据插入 sql = db.parseInsert("zcb_report_master", main) print "insert master sql -->", sql else: today_main = today_main[0] sql = db.parseUpdate("zcb_report_master", main, " id = " + str(today_main.get("id"))) print "update master sql -->", sql db.execute(sql) else: sql = "select * from zcb_report_master where date(createdate)='" + str(today) + "'" today_main = db.execute(sql) if len(today_main) < 1: sql = db.parseInsert("zcb_report_master", _main) print "insert master sql -->", sql db.execute(sql) else: today_main = today_main[0] sql = db.parseUpdate("zcb_report_master", _main, "id=" + str(today_main.get("id"))) print "update master sql -->", sql db.execute(sql) # 处理从表数据 for _slave in _slaves: sql = ( "select * from zcb_report_slave where date(createdate)='" + str(tomorrow) + "' and type='" + _slave.get("type") + "' and tzqx = '" + _slave.get("tzqx") + "'" ) print sql tomorrow_slave = db.execute(sql) if len(tomorrow_slave) > 0: tomorrow_slave = tomorrow_slave[0] slave = {} slave["type"] = _slave.get("type") slave["tzqx"] = _slave.get("tzqx") for _d in _slave: print "-->", _slave.get(_d), "==", tomorrow_slave.get(_d) if tomorrow_slave.get(_d) == None or _slave.get(_d) != tomorrow_slave.get(_d): print _d, "==>", _slave.get(_d), "!!!!!", tomorrow_slave.get(_d) slave[_d] = _slave.get(_d) if len(slave) < 1: continue print "slave-->", slave sql = ( "select * from zcb_report_slave where date(createdate)='" + str(today) + "' and type='" + _slave.get("type") + "' and tzqx = '" + _slave.get("tzqx") + "'" ) today_slave = db.execute(sql) if len(today_slave) < 1: # insert sql = db.parseInsert("zcb_report_slave", slave) print "insert slave sql -->", sql else: # update today_slave = today_slave[0] sql = db.parseUpdate("zcb_report_slave", slave, " id = " + str(today_slave.get("id"))) print "update slave sql -->", sql db.execute(sql) else: sql = ( "select * from zcb_report_slave where date(createdate)='" + str(today) + "' and type='" + _slave.get("type") + "' and tzqx = '" + _slave.get("tzqx") + "'" ) today_slave = db.execute(sql) if len(today_slave) < 1: sql = db.parseInsert("zcb_report_slave", _slave) print "insert slave sql -->", sql db.execute(sql) else: today_slave = today_slave[0] sql = db.parseUpdate("zcb_report_slave", _slave, "id = " + str(today_slave.get("id"))) print "update slave sql -->", sql db.execute(sql) def crawlerTest(self): url = "https://zhaocaibao.alipay.com/pf/purchase.htm?productId=20150821000230020000680048696668" self.pageUtils.url = url self.pageUtils.login("13651781949", "lufax123") sleep(10) url = "https://zhaocaibao.alipay.com/pf/purchase.htm?productId=20151009000230020000280058270528" self.pageUtils.browser.get(url) def crawler(self): url = "https://zhaocaibao.alipay.com/pf/productList.htm" browser = self.pageUtils.startBrowser() browser.get(url) print browser.title a = self.parsePage_home(browser) url = "https://cmspromo.alipay.com/finance/fullyear.htm" browser.get(url) print browser.title b = self.parsePage_finance(browser) b.update() a.get("m").update(b) # print a.get("s") # print b self.updateData(a) # d = self.pageUtils.downloadPage(url) # browser.find_element("", "").get_attribute(name) # print "==>",browser.find_element_by_class_name("data-box").text() browser.quit() def parsePage_home(self, page): result = {} result_m = {} result["m"] = result_m # print soup.title # #平台成交金额 cjjes = page.find_elements_by_class_name("data-box") c = "" for cjje in cjjes: # cjje += cjje.get_attribute("class") c += cjje.text result_m["cjje"] = c yycjje = page.find_element_by_class_name("week-book-data") result_m["yycjje"] = self.clearNumber(yycjje.text) yycjjebl = page.find_element_by_class_name("book-rate-data") result_m["yycjjebl"] = yycjjebl.text grqyds = page.find_elements_by_css_selector('div[class="several-months fn-clear"]') i = 0 qixis = ["3", "3-6", "6-12", "12-24", "24"] result_s_list = [] result["s"] = result_s_list for grqyd in grqyds: result_s_map = {"type": "个人企业贷"} result_s_list.append(result_s_map) result_s_map["tzqx"] = qixis[i] aa = grqyd.find_element_by_css_selector('div[class="product-book fn-clear"]') zg = aa.find_element_by_class_name("content-third-type") # 总共 result_s_map["zgje"] = self.clearNumber(zg.text) yylilv = aa.find_element_by_class_name("content-second-type") # 预约利率 result_s_map["yylilv"] = yylilv.text print "yylilv-->", yylilv.text try: bb = grqyd.find_element_by_css_selector('div[class="product-buy fn-clear"]') gm = bb.find_element_by_class_name("content-third-type") # 购买 result_s_map["gmje"] = self.clearNumber(gm.text) gmlilv = bb.find_element_by_css_selector('li[class="w145 buy-product-rate"]') # 购买利率 result_s_map["gmlilv"] = gmlilv.text print "gmlilv-->", gmlilv.text except Exception, e: print e i += 1 # .find_element_by_class_name("content-third-type")#购买 # print zg,gm # print result return result
and state != '已下架' and state !='已售罄' ''' print sql print "exe row -->",DB.execute(sql) #DB.createLog({"type":"loan_synState","msg":"个人企业贷同步"+str(startDate)+"的数据成功"})# DB.dataBaseClose() return if __name__ == '__main__': data_obj = SynLoanData() #data_obj.dropTables() #data_obj.synzcb("2015-09-21") # print PageUtils.getCurrentDate() #data_obj.synzcb_loan("2015-10-17")#PageUtils.getCurrentDate() data_obj.synzcb_loan(PageUtils.getCurrentDate()) #data_obj.createTable() ''' startDate = "" t_startDate = time.strptime(startDate, "%Y-%m-%d") d_startDate = datetime.datetime(* t_startDate[:6]) d_endDate = d_startDate + datetime.timedelta(days=7) s = str(time.strftime("%Y%m%d", t_startDate)) e=datetime.datetime.strftime((d_endDate - datetime.timedelta(days=1)), '%Y%m%d') print d_startDate,"==",d_endDate,"--",s,"***",e '''
def tjDay(self,startDate): #startDate = "2015-09-13" startDate = startDate[:10] #print "startDate-->",startDate[:10] days = 1 endDate = str(PageUtils.delayed(startDate[:10],days)) #相隔的天数 tiqianDay = -1 tiqianStartDate = str(PageUtils.delayed(startDate,tiqianDay)) tiqianendDate = str(PageUtils.delayed(endDate[:10],tiqianDay)) print startDate,":",endDate,"====",tiqianStartDate,":",tiqianendDate db = dataBaseOperator() f=file("report/loan_"+str(startDate)+".txt","w+") #main_tablename = "zcb_insu" #slave_tablename = "zcb_insu_process" main_tablename = "zcb_loan"#,"zcb_insu","zcb_others" head = "产品类型,投资期限,产品数量(昨天),产品数量(今天),笔数(今天),增加笔数,最小利率,最大利率,起投金额,金额增长" slave_tablename = main_tablename+"_process" sql = ''' SELECT projectType, period, count(*)as total, sum( bishu_today )as bishu_today , sum( addedTransNumber ) as bishu , min( annualRate ) as min_lilv, max( annualRate ) as max_lilv , min( amountAtLeast ) as min_qitou, max( amountAtLeast ) as max_qitou, sum(jezz) as jezz,'已下架' as type FROM ( SELECT productid, CASE WHEN period <=90 THEN '03' WHEN period >90 AND period <=180 THEN '03-06' WHEN period >180 AND period <=365 THEN '06-12' WHEN period >365 AND period <=730 THEN '12-24' WHEN period >730 THEN '24+' END AS period, projectType, addedTransNumber,amountAtLeast,annualRate,bishu_today,jezz FROM ( SELECT productid, case when LOCATE('天',period)>0 then cast(SUBSTRING_INDEX( period, '天', 1 ) as signed) when LOCATE('年',period)>0 then cast(SUBSTRING_INDEX( period, '年', 1 ) as signed)*365 else null end as period, projectType, addedTransNumber,amountAtLeast,annualRate,bishu_today,jezz FROM ( select l1.productid, l1.projectType, l1.period, l1.TransNumber addedTransNumber,l1.TransNumber bishu_today,l1.annualRate, cast( SUBSTRING_INDEX( l1.amountAtLeast, '元起', 1 ) AS signed ) AS amountAtLeast, l1.productScale_yuan as jezz from zcb_loan l1 where state = '已下架' and endSellDate >= \''''+str(startDate)+'''\' and endSellDate < \''''+str(endDate)+'''\' )a )b )c GROUP BY projectType, period ''' #and createDate >= \''''+str(startDate)+'''\' and createDate < \''''+str(endDate)+'''\' 已售罄 已下架 print sql maps = db.execute(sql) for map in maps : map["startDate"] = startDate map["endDate"] = endDate sql = db.parseInsert("zcb_loan_report", map) print sql db.execute(sql) f.writelines(head) for map in maps : result = "\n"+str(map["projectType"])+","+\ str(map["period"])+"个月,0,"+\ str(map["total"])\ +","+str(map["bishu_today"])\ +","+str(map["bishu"])\ +","+str(map["min_lilv"])\ +","+str(map["max_lilv"])\ +","+str(map["min_qitou"]) +"-"+str(map["max_qitou"])\ +","+str(map["jezz"]) ''' result.append(map["projectType"]) result.append(map["period"]) result.append(0) result.append(map["total"]) result.append(map["bishu_today"]) result.append(map["bishu"]) result.append(map["min_lilv"]) result.append(map["max_lilv"]) result.append(str(map["min_qitou"]) +"-"+str(map["max_qitou"]))''' f.write(result) f.write("\n\n\n") f.flush() f.close()
class CrawlerCQYY: def __init__(self): reload(sys) sys.setdefaultencoding('utf8') # @UndefinedVariable self.pageUtils = PageUtils() ym = "https://zcbprod.alipay.com" def crawler (self): self.pageUtils.url = "https://zcbprod.alipay.com/appointment/lotteryHistoryActivityList.htm" #self.pageUtils.login("13651781949", "lufax123") self.pageUtils.login("*****@*****.**", "lufax123456") print self.pageUtils.browser.title try: pageNum = self.pageUtils.browser.find_element_by_css_selector('span[class="ui-paging-bold"]').text except: pageNum = '1/1' pageNum = pageNum[2:] print pageNum for i in range(int(pageNum)) : url = self.ym+"/appointment/lotteryHistoryActivityList.htm?currentPage="+str(i) print "url-->",url page = self.pageUtils.downloadPage(url) flag = self.parsePageUrl(page) if flag ==False: self.pageUtils.browser.quit() self.crawler() return urls = flag for u in urls: u = self.ym + u print u page = self.pageUtils.downloadPage(u) print self.parsePage(page) def parsePageUrl (self,page): soup = BeautifulSoup(page) print "title==>",soup.title.string if "登录中心 - 支付宝" in soup.title.string: return False hrefs = soup.find_all('td', {'class': "detail-link"})# #hrefs = soup.find_all('a', {'seed': re.compile('detailLink-linkT*')}) print "==>",hrefs result = [] for href in hrefs: result.append(href.find("a")['href']) print "-->",href,"===",href.find("a"),"---",href.find("a")['href'] return result def parsePage(self,page): result = {} soup = BeautifulSoup(page) #利率 lilv = soup.find('p',{'class':'product-param-value-num'}) result["lilv"]=lilv.getText() #投资期限 tzqx = soup.find('p',{'class':'product-param-value-num'}) result["tzqx"]=tzqx.getText() #担保机构 dbjg = soup.find('h2').find("a").getText() result["dbjg"]=dbjg.getText() #类型 type = soup.find('p',{'class':'product-param-value-num product-param-value-txt'}) result["type"] = type.getText() infos = soup.find('div',{'class':'app-apply'}) #中签人数 result["zqrs"]=infos #中签率 #result["zql"]=tzqx infos = soup.find('div',{'class':'product-info-detail fn-clear'}).find("p") #总金额 zje = infos[0].getText() result["zje"]=zje #已预约金额 yyje = infos[1].getText() result["yyje"]=yyje #抽签完成时间 cqwcDate = infos[2].getText() result["cqwcDate"]=cqwcDate #开放预约时间 kfyyDate = soup.find('div',{'class':'timer1'}).find("p") result["kfyyDate"]=kfyyDate.getText() #开始抽签时间 kfyyDate = soup.find('div',{'class':'timer2'}).find("p") result["kfyyDate"]=kfyyDate.getText() #下架时间 xjDate = soup.find('div',{'class':'timer4'}).find("p") result["xjDate"]=xjDate.getText() #起息日 qxDate = soup.find('div',{'class':'timer5'}).find("p") result["qxDate"]=qxDate.getText() #到期日 dqDate = soup.find('div',{'class':'timer6'}).find("p") result["dqDate"]=dqDate.getText() return result
def tjDay(self, startDate): # startDate = "2015-09-13" startDate = startDate[:10] # print "startDate-->",startDate[:10] days = 1 endDate = str(PageUtils.delayed(startDate[:10], days)) # 相隔的天数 tiqianDay = -1 tiqianStartDate = str(PageUtils.delayed(startDate, tiqianDay)) tiqianendDate = str(PageUtils.delayed(endDate[:10], tiqianDay)) print startDate, ":", endDate, "====", tiqianStartDate, ":", tiqianendDate reload(sys) sys.setdefaultencoding("utf8") # @UndefinedVariable db = dataBaseOperator() f = file("report/" + str(startDate) + ".txt", "w+") # main_tablename = "zcb_insu" # slave_tablename = "zcb_insu_process" tablename = ["zcb_loan", "zcb_insu", "zcb_others"] # "zcb_loan","zcb_insu","zcb_others" head = "产品类型,投资期限,产品数量(昨天),产品数量(今天),笔数(今天),增加笔数,最小利率,最大利率,起投金额,金额增长" for main_tablename in tablename: slave_tablename = main_tablename + "_process" sql = ( """ SELECT projectType, period, count(*)as total, sum( bishu_today )as bishu_today , sum( addedTransNumber ) as bishu , min( annualRate ) as min_lilv, max( annualRate ) as max_lilv , min( amountAtLeast ) as min_qitou, max( amountAtLeast ) as max_qitou, sum(jezz) as jezz FROM ( SELECT productid, CASE WHEN period <=90 THEN '03' WHEN period >90 AND period <=180 THEN '03-06' WHEN period >180 AND period <=365 THEN '06-12' WHEN period >365 AND period <=730 THEN '12-24' WHEN period >730 THEN '24+' END AS period, projectType, addedTransNumber,amountAtLeast,annualRate,bishu_today,jezz FROM ( SELECT productid, case when LOCATE('天',period)>0 then cast(SUBSTRING_INDEX( period, '天', 1 ) as signed) when LOCATE('年',period)>0 then cast(SUBSTRING_INDEX( period, '年', 1 ) as signed)*365 else null end as period, projectType, addedTransNumber,amountAtLeast,annualRate,bishu_today,jezz FROM ( select p.productid, i.projectType, i.period, p.bishu as 'addedTransNumber',p.bishu_today,i.annualRate, cast( SUBSTRING_INDEX( amountAtLeast, '元起', 1 ) AS signed ) AS amountAtLeast, (i.productScale_yuan*wanchenglv/100)as jezz from ( select a1.productid , (IFNULL(a1.bishu,0)- IFNULL(a2.bishu,0))as bishu ,IFNULL(a1.bishu,0)as bishu_today,(a1.wanchenglv-a2.wanchenglv)as wanchenglv from (select productid,bishu,IFNULL(b1.wanchenglv,0)as wanchenglv from `""" + slave_tablename + """` b1 where createDate >= \'""" + startDate + """\' and createDate < \'""" + endDate + """\')a1 left join (select productid,bishu,IFNULL(b2.wanchenglv,0)as wanchenglv from `""" + slave_tablename + """` b2 where createDate >= \'""" + tiqianStartDate + """\' and createDate < \'""" + tiqianendDate + """\')a2 on a1.productid = a2.productid ) p left join """ + main_tablename + """ i on p.productid = i.productid )c )a )b GROUP BY projectType, period """ ) print sql maps = db.execute(sql) f.writelines(head) for map in maps: result = ( "\n" + str(map["projectType"]) + "," + str(map["period"]) + "个月,0," + str(map["total"]) + "," + str(map["bishu_today"]) + "," + str(map["bishu"]) + "," + str(map["min_lilv"]) + "," + str(map["max_lilv"]) + "," + str(map["min_qitou"]) + "-" + str(map["max_qitou"]) + "," + str(map["jezz"]) ) """ result.append(map["projectType"]) result.append(map["period"]) result.append(0) result.append(map["total"]) result.append(map["bishu_today"]) result.append(map["bishu"]) result.append(map["min_lilv"]) result.append(map["max_lilv"]) result.append(str(map["min_qitou"]) +"-"+str(map["max_qitou"]))""" f.write(result) f.write("\n\n\n") f.flush() f.close()
# print "synMainData query_sql-->"+query_sql db = DataBase() query_result = db.execute(query_sql) # currentTime_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) currentTime_str = PageUtils.getCurrentTime() if len(query_result) < 1: # insert data data["createDate"] = currentTime_str data["updateDate"] = currentTime_str insert_sql = db.parseInsert(tablename, data) db.execute(insert_sql) db.dataBaseClose() if __name__ == "__main__": crawler_obj = CrawlerAllProductList() print "start-->" + str(PageUtils.getCurrentTime()) crawler_obj.crawlWebs("OTHERS") crawler_obj.crawlWebs("INSU") crawler_obj.crawlWebs("FUND") crawler_obj.crawlWebs("LOAN") print "end-->" + str(PageUtils.getCurrentTime()) """ try: crawler_obj.crawlWebs('FUND') except: logging.warning("Exception occur about FUND") """