def zqProductInfo(self): # 得到个人贷企业贷的有效产品ids datas = self.getProductId() print "len-->" + str(len(datas)) if len(datas) < 1: print "这个范围数据已经爬完,休眠5个小时",PageUtils.getCurrentTime() sleep(60 * 60 * 5) print "休眠结束,开始爬数据",PageUtils.getCurrentTime() index = 1 for data in datas: sleep(6) productId = data["productid"] id = data["id"] index += 1 url = "https://zhaocaibao.alipay.com/pf/purchase.htm?productId=" + productId print "zqProductInfo -->" , url try: # 下载页面 page = PageUtils.downloadPage(self.opener,url) except Exception, e: print "downloadPage err -->", e continue try: p = self.parsePage(page) except Exception, e: print "parsePage err -->", e
def synMainData(self, productid, tablename, data): query_sql = "select * from " + tablename + " where productId = '" + productid + "'" # print "synMainData query_sql-->"+query_sql db = DataBase() query_result = db.execute(query_sql) # currentTime_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) currentTime_str = PageUtils.getCurrentTime() if len(query_result) < 1: # insert data data["createDate"] = currentTime_str data["updateDate"] = currentTime_str insert_sql = db.parseInsert(tablename, data) db.execute(insert_sql) db.dataBaseClose()
def processData(self, productid, data, tablename): # zcb_insu_process # 今天的日期 格式 yyyy-mm-dd startData = PageUtils.getCurrentDate() endData = PageUtils.delayed(startData, 1) currentTime_str = PageUtils.getCurrentTime() # time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) db = DataBase() query_sql = ( "select * from " + tablename + " where productId = '" + productid + "' and createDate < '" + endData + "' and createDate>='" + startData + "'" ) # print "processData query_sql-->"+query_sql query_result = db.execute(query_sql) data["updateDate"] = currentTime_str if len(query_result) < 1: # 当天没有数据,执行插入 # zcb_insu_process data["createDate"] = currentTime_str sql = db.parseInsert(tablename, data) # print "processData-->insert sql==>"+sql else: id = query_result[0]["id"] # print id sql = db.parseUpdate(tablename, data, "id='" + str(id) + "'") # print "processData-->update sql==>"+sql db.execute(sql) db.dataBaseClose()
# print "synMainData query_sql-->"+query_sql db = DataBase() query_result = db.execute(query_sql) # currentTime_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) currentTime_str = PageUtils.getCurrentTime() if len(query_result) < 1: # insert data data["createDate"] = currentTime_str data["updateDate"] = currentTime_str insert_sql = db.parseInsert(tablename, data) db.execute(insert_sql) db.dataBaseClose() if __name__ == "__main__": crawler_obj = CrawlerAllProductList() print "start-->" + str(PageUtils.getCurrentTime()) crawler_obj.crawlWebs("OTHERS") crawler_obj.crawlWebs("INSU") crawler_obj.crawlWebs("FUND") crawler_obj.crawlWebs("LOAN") print "end-->" + str(PageUtils.getCurrentTime()) """ try: crawler_obj.crawlWebs('FUND') except: logging.warning("Exception occur about FUND") """