def login (self):
            
        user = self.g_users[self.g_index]
        username = user.get("username")
        password = user.get("password") 
        
        url = "https://zhaocaibao.alipay.com/pf/purchase.htm?productId=20150821000230020000680048696668"

        login = {"username":username,
                "password":password,
                "username_input":"//input[@id='J-input-user']",
                "password_input":"//*[@id='password_input']",
                "submit_but":"//*[@id='J-login-btn']", 
                "check_code":{"check_code_input":"//*[@id='J-input-checkcode']",
                            "img_tag":"//*[@id='J-checkcode-img']",
                            "img_path":"/home/lufax/img"},
                "url":url}
           
        f = PageUtils.login(login)
        
        if f==False:
            if self.g_waitFlag>5:
                sleep(10*60)
            login()
            self.g_waitFlag += 1
        else:
            self.browser = f 
            self.opener = PageUtils.build_opener_with_chrome_cookies(self.browser.get_cookies())
 def zqProductInfo(self):
         
         # 得到个人贷企业贷的有效产品ids    
     datas = self.getProductId()
     print "len-->" + str(len(datas))
     if len(datas) < 1:
         print "这个范围数据已经爬完,休眠5个小时",PageUtils.getCurrentTime()
         sleep(60 * 60 * 5)
         print "休眠结束,开始爬数据",PageUtils.getCurrentTime()
     index = 1
        
     for data in datas:
         sleep(6)
         productId = data["productid"]
         id = data["id"]
         index += 1
         url = "https://zhaocaibao.alipay.com/pf/purchase.htm?productId=" + productId
         print "zqProductInfo -->" , url
         try:
                 # 下载页面
             page = PageUtils.downloadPage(self.opener,url)
         except Exception, e:
             print "downloadPage err -->", e
             continue
         try:
             p = self.parsePage(page)
             
         except Exception, e:
             print "parsePage err -->", e
Пример #3
0
 def synzcb_loan (self,startDate):
     
     today = PageUtils.getCurrentDate()
     startDate = PageUtils.delayed(today,-1)
     startDate2 = PageUtils.delayed(today,-2)
     DB = dataBaseOperator()
     
     sql = '''
         insert into zcb_loan_state (productid)
         select productid from zcb_loan where productid in  (
             select productid from zcb_loan_process where createDate >= \''''+str(startDate2)+'''\' and createDate < \''''+str(startDate)+'''\'  and productid not in  
                 (select productid from zcb_loan_process where createDate >= \''''+str(startDate)+'''\' and createDate < \''''+str(today)+'''\')) 
         and state != '已下架' and state !='已售罄' and productid not in (select productid from zcb_loan_state )
      '''
     print sql 
     print "exe row -->",DB.execute(sql)
     #DB.createLog({"type":"loan_synState","msg":"个人企业贷同步"+str(startDate)+"的数据成功"})#
      
     DB.dataBaseClose()
     return  
 def parsePage(self, page):
     
     soup = BeautifulSoup(page)
     result = {} 
     try : 
         product_name = soup.find('p', {'class':'product-name'}).get_text()
         product_name = PageUtils.replaceString(product_name)
         product_name = product_name.split(' ')
         product_name = product_name[0]
         result["productName"] = product_name
     except Exception, e:
         print "getLoanDetailsInfo-->product_name==>", e
         if self.is_checkPage(soup) : 
             return False
    def getListInfos(self, page):
        table = []
        soup = BeautifulSoup(page)
        # print soup

        productTag_list = soup.find_all("ul", {"class": re.compile("icontent-ul*")})

        # print productTag_list
        for productTag in productTag_list:
            try:
                productId = productTag["productid"]
                liTag_list = productTag.find_all("li")
                annualRate = liTag_list[0].find("span").get_text()
                period = liTag_list[1].find("span", {"class": "year"}).get_text()
                period = PageUtils.replaceString(period)

                projectType = PageUtils.replaceString(liTag_list[2].get_text())
                amountAtLeast = PageUtils.replaceString(liTag_list[3].get_text())
                amountAtLeast = amountAtLeast[:-2]
                transNumber = PageUtils.replaceString(liTag_list[4].get_text())
                transNumber = transNumber[:-1]
                ## Need delete
                # print annualRate, period, productType, amountAtLeast, transNumber

                mapData = {
                    "productId": productId,
                    "annualRate": annualRate,
                    "period": period,
                    "projectType": projectType,
                    "amountAtLeast": amountAtLeast,
                    "transNumber": transNumber,
                }

                table.append(mapData)
                # print List
            except Exception, e:
                print "getListInfos-->", e
    def processData(self, productid, data, tablename):

        # zcb_insu_process
        # 今天的日期 格式 yyyy-mm-dd
        startData = PageUtils.getCurrentDate()
        endData = PageUtils.delayed(startData, 1)
        currentTime_str = PageUtils.getCurrentTime()  # time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        db = DataBase()
        query_sql = (
            "select * from "
            + tablename
            + " where productId = '"
            + productid
            + "' and createDate < '"
            + endData
            + "' and createDate>='"
            + startData
            + "'"
        )
        # print "processData query_sql-->"+query_sql
        query_result = db.execute(query_sql)

        data["updateDate"] = currentTime_str

        if len(query_result) < 1:
            # 当天没有数据,执行插入
            # zcb_insu_process
            data["createDate"] = currentTime_str
            sql = db.parseInsert(tablename, data)
            # print "processData-->insert sql==>"+sql
        else:
            id = query_result[0]["id"]
            # print id
            sql = db.parseUpdate(tablename, data, "id='" + str(id) + "'")
            # print "processData-->update sql==>"+sql
        db.execute(sql)
        db.dataBaseClose()
    def synMainData(self, productid, tablename, data):

        query_sql = "select * from " + tablename + " where productId = '" + productid + "'"
        # print "synMainData query_sql-->"+query_sql
        db = DataBase()
        query_result = db.execute(query_sql)

        # currentTime_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        currentTime_str = PageUtils.getCurrentTime()
        if len(query_result) < 1:  # insert data
            data["createDate"] = currentTime_str
            data["updateDate"] = currentTime_str
            insert_sql = db.parseInsert(tablename, data)
            db.execute(insert_sql)
        db.dataBaseClose()
Пример #8
0
            select productid from zcb_loan where productid in  (
                select productid from zcb_loan_process where createDate >= \''''+str(startDate2)+'''\' and createDate < \''''+str(startDate)+'''\'  and productid not in  
                    (select productid from zcb_loan_process where createDate >= \''''+str(startDate)+'''\' and createDate < \''''+str(today)+'''\')) 
            and state != '已下架' and state !='已售罄' and productid not in (select productid from zcb_loan_state )
         '''
        print sql 
        print "exe row -->",DB.execute(sql)
        #DB.createLog({"type":"loan_synState","msg":"个人企业贷同步"+str(startDate)+"的数据成功"})#
         
        DB.dataBaseClose()
        return  
if __name__ == '__main__':
    data_obj = data_1Week()
    #data_obj.dropTables()
    #data_obj.synzcb("2015-09-21")
    print PageUtils.getCurrentDate() 
    data_obj.synzcb_loan("2015-11-17")#PageUtils.getCurrentDate()
    #data_obj.createTable()
    
    '''
    startDate = ""
      
    t_startDate = time.strptime(startDate, "%Y-%m-%d")
    d_startDate = datetime.datetime(* t_startDate[:6])
    d_endDate = d_startDate + datetime.timedelta(days=7)
    
   
    s = str(time.strftime("%Y%m%d", t_startDate))
    e=datetime.datetime.strftime((d_endDate - datetime.timedelta(days=1)), '%Y%m%d')
    
    print d_startDate,"==",d_endDate,"--",s,"***",e
 soup = BeautifulSoup(page)
 result = {} 
 try : 
     product_name = soup.find('p', {'class':'product-name'}).get_text()
     product_name = PageUtils.replaceString(product_name)
     product_name = product_name.split(' ')
     product_name = product_name[0]
     result["productName"] = product_name
 except Exception, e:
     print "getLoanDetailsInfo-->product_name==>", e
     if self.is_checkPage(soup) : 
         return False
     
 try :
     guaranteeCompany = soup.find('p', {'class': 'purchase-safe-info-text'}).get_text()
     guaranteeCompany = PageUtils.replaceString(guaranteeCompany)
     result["guaranteeCompany"] = guaranteeCompany
 except:
     guaranteeCompany = "notFound"
 # print guaranteeCompany
 
 # # total amount of product
 try :
     product_infos_list = soup.find_all('div', {'class':'product-info-detail-item'})
     totalAmount = product_infos_list[0].find_all('p')[1].get_text()
     o = PageUtils.clearTotalAmount(str(totalAmount))
     print "totalAmount-->", totalAmount, "=========", o
     totalAmount = o
     result["productScale_yuan"] = totalAmount
 except:
     totalAmount = "notFound" 
Пример #10
0
 def __init__(self):
     reload(sys)
     sys.setdefaultencoding("utf8")  # @UndefinedVariable
     self.pageUtils = PageUtils()
Пример #11
0
class CrawlerHome:
    def __init__(self):
        reload(sys)
        sys.setdefaultencoding("utf8")  # @UndefinedVariable
        self.pageUtils = PageUtils()

    def updateData(self, data):
        _main = data.get("m")
        _slaves = data.get("s")
        main = {}
        slave = {}

        print _main
        print _slaves
        db = DataBase()

        today = self.pageUtils.getCurrentDate()
        tomorrow = self.pageUtils.delayed(today, -1)

        # 取昨天的数据
        sql = "select * from zcb_report_master where date(createdate)='" + str(tomorrow) + "'"
        tomorrow_main = db.execute(sql)
        if len(tomorrow_main) > 0:
            tomorrow_main = tomorrow_main[0]

            print "sql==>", sql

            for _d in _main:
                print "-->", _main.get(_d), "==", tomorrow_main.get(_d)
                if _d == "yycjjebl":
                    main[_d] = _main.get(_d)
                else:
                    if tomorrow_main.get(_d) == None or _main.get(_d) > tomorrow_main.get(_d):
                        main[_d] = _main.get(_d)

                # 今天是否有数据
            sql = "select * from zcb_report_master where date(createdate)='" + str(today) + "'"
            today_main = db.execute(sql)
            if len(today_main) < 1:
                # 主表数据插入
                sql = db.parseInsert("zcb_report_master", main)
                print "insert master sql -->", sql
            else:
                today_main = today_main[0]
                sql = db.parseUpdate("zcb_report_master", main, " id = " + str(today_main.get("id")))
                print "update master sql -->", sql
            db.execute(sql)
        else:
            sql = "select * from zcb_report_master where date(createdate)='" + str(today) + "'"
            today_main = db.execute(sql)
            if len(today_main) < 1:
                sql = db.parseInsert("zcb_report_master", _main)
                print "insert master sql -->", sql
                db.execute(sql)
            else:
                today_main = today_main[0]
                sql = db.parseUpdate("zcb_report_master", _main, "id=" + str(today_main.get("id")))
                print "update master sql -->", sql
                db.execute(sql)
            # 处理从表数据
        for _slave in _slaves:

            sql = (
                "select * from zcb_report_slave where date(createdate)='"
                + str(tomorrow)
                + "' and type='"
                + _slave.get("type")
                + "' and tzqx = '"
                + _slave.get("tzqx")
                + "'"
            )
            print sql
            tomorrow_slave = db.execute(sql)
            if len(tomorrow_slave) > 0:
                tomorrow_slave = tomorrow_slave[0]
                slave = {}
                slave["type"] = _slave.get("type")
                slave["tzqx"] = _slave.get("tzqx")
                for _d in _slave:
                    print "-->", _slave.get(_d), "==", tomorrow_slave.get(_d)
                    if tomorrow_slave.get(_d) == None or _slave.get(_d) != tomorrow_slave.get(_d):
                        print _d, "==>", _slave.get(_d), "!!!!!", tomorrow_slave.get(_d)
                        slave[_d] = _slave.get(_d)

                if len(slave) < 1:
                    continue
                print "slave-->", slave
                sql = (
                    "select * from zcb_report_slave where date(createdate)='"
                    + str(today)
                    + "' and type='"
                    + _slave.get("type")
                    + "' and tzqx = '"
                    + _slave.get("tzqx")
                    + "'"
                )
                today_slave = db.execute(sql)
                if len(today_slave) < 1:
                    # insert
                    sql = db.parseInsert("zcb_report_slave", slave)
                    print "insert slave sql -->", sql
                else:
                    # update
                    today_slave = today_slave[0]
                    sql = db.parseUpdate("zcb_report_slave", slave, " id = " + str(today_slave.get("id")))
                    print "update slave sql -->", sql
                db.execute(sql)
            else:
                sql = (
                    "select * from zcb_report_slave where date(createdate)='"
                    + str(today)
                    + "' and type='"
                    + _slave.get("type")
                    + "' and tzqx = '"
                    + _slave.get("tzqx")
                    + "'"
                )
                today_slave = db.execute(sql)
                if len(today_slave) < 1:
                    sql = db.parseInsert("zcb_report_slave", _slave)
                    print "insert slave sql -->", sql
                    db.execute(sql)
                else:
                    today_slave = today_slave[0]
                    sql = db.parseUpdate("zcb_report_slave", _slave, "id = " + str(today_slave.get("id")))
                    print "update slave sql -->", sql
                    db.execute(sql)

    def crawlerTest(self):
        url = "https://zhaocaibao.alipay.com/pf/purchase.htm?productId=20150821000230020000680048696668"
        self.pageUtils.url = url
        self.pageUtils.login("13651781949", "lufax123")
        sleep(10)
        url = "https://zhaocaibao.alipay.com/pf/purchase.htm?productId=20151009000230020000280058270528"
        self.pageUtils.browser.get(url)

    def crawler(self):

        url = "https://zhaocaibao.alipay.com/pf/productList.htm"

        browser = self.pageUtils.startBrowser()
        browser.get(url)
        print browser.title
        a = self.parsePage_home(browser)

        url = "https://cmspromo.alipay.com/finance/fullyear.htm"
        browser.get(url)
        print browser.title
        b = self.parsePage_finance(browser)

        b.update()
        a.get("m").update(b)
        # print a.get("s")
        # print b
        self.updateData(a)
        # d = self.pageUtils.downloadPage(url)

        # browser.find_element("", "").get_attribute(name)
        # print "==>",browser.find_element_by_class_name("data-box").text()

        browser.quit()

    def parsePage_home(self, page):
        result = {}

        result_m = {}
        result["m"] = result_m
        # print soup.title
        # #平台成交金额
        cjjes = page.find_elements_by_class_name("data-box")
        c = ""
        for cjje in cjjes:
            # cjje += cjje.get_attribute("class")
            c += cjje.text
        result_m["cjje"] = c

        yycjje = page.find_element_by_class_name("week-book-data")
        result_m["yycjje"] = self.clearNumber(yycjje.text)

        yycjjebl = page.find_element_by_class_name("book-rate-data")
        result_m["yycjjebl"] = yycjjebl.text

        grqyds = page.find_elements_by_css_selector('div[class="several-months fn-clear"]')

        i = 0
        qixis = ["3", "3-6", "6-12", "12-24", "24"]
        result_s_list = []
        result["s"] = result_s_list
        for grqyd in grqyds:
            result_s_map = {"type": "个人企业贷"}
            result_s_list.append(result_s_map)
            result_s_map["tzqx"] = qixis[i]
            aa = grqyd.find_element_by_css_selector('div[class="product-book fn-clear"]')
            zg = aa.find_element_by_class_name("content-third-type")  # 总共
            result_s_map["zgje"] = self.clearNumber(zg.text)
            yylilv = aa.find_element_by_class_name("content-second-type")  # 预约利率
            result_s_map["yylilv"] = yylilv.text
            print "yylilv-->", yylilv.text

            try:
                bb = grqyd.find_element_by_css_selector('div[class="product-buy fn-clear"]')
                gm = bb.find_element_by_class_name("content-third-type")  # 购买
                result_s_map["gmje"] = self.clearNumber(gm.text)

                gmlilv = bb.find_element_by_css_selector('li[class="w145 buy-product-rate"]')  # 购买利率
                result_s_map["gmlilv"] = gmlilv.text
                print "gmlilv-->", gmlilv.text
            except Exception, e:
                print e
            i += 1
            # .find_element_by_class_name("content-third-type")#购买
            # print zg,gm
        # print result
        return result
Пример #12
0
            and state != '已下架' and state !='已售罄'
         '''
        print sql 
        print "exe row -->",DB.execute(sql)
        #DB.createLog({"type":"loan_synState","msg":"个人企业贷同步"+str(startDate)+"的数据成功"})#
         
        DB.dataBaseClose()
        return  
    
if __name__ == '__main__':
    data_obj = SynLoanData()
    #data_obj.dropTables()
    #data_obj.synzcb("2015-09-21")
#     print PageUtils.getCurrentDate() 
    #data_obj.synzcb_loan("2015-10-17")#PageUtils.getCurrentDate()
    data_obj.synzcb_loan(PageUtils.getCurrentDate())
    #data_obj.createTable()
    
    '''
    startDate = ""
      
    t_startDate = time.strptime(startDate, "%Y-%m-%d")
    d_startDate = datetime.datetime(* t_startDate[:6])
    d_endDate = d_startDate + datetime.timedelta(days=7)
    
   
    s = str(time.strftime("%Y%m%d", t_startDate))
    e=datetime.datetime.strftime((d_endDate - datetime.timedelta(days=1)), '%Y%m%d')
    
    print d_startDate,"==",d_endDate,"--",s,"***",e
    '''
Пример #13
0
    def tjDay(self,startDate):  
        #startDate = "2015-09-13"
        startDate = startDate[:10]
        #print "startDate-->",startDate[:10]
        days = 1
        endDate = str(PageUtils.delayed(startDate[:10],days))
           #相隔的天数
        tiqianDay = -1
        tiqianStartDate = str(PageUtils.delayed(startDate,tiqianDay))
        tiqianendDate = str(PageUtils.delayed(endDate[:10],tiqianDay))
        print startDate,":",endDate,"====",tiqianStartDate,":",tiqianendDate
        
        
        
        db = dataBaseOperator()

        f=file("report/loan_"+str(startDate)+".txt","w+")
        #main_tablename = "zcb_insu" 
        #slave_tablename = "zcb_insu_process"
        main_tablename = "zcb_loan"#,"zcb_insu","zcb_others"
        head = "产品类型,投资期限,产品数量(昨天),产品数量(今天),笔数(今天),增加笔数,最小利率,最大利率,起投金额,金额增长"
        slave_tablename = main_tablename+"_process"
        sql = '''
        
            SELECT projectType, 
            period, 
            count(*)as total, 
            sum( bishu_today )as bishu_today  ,
            sum( addedTransNumber ) as bishu , 
            min( annualRate ) as min_lilv,
            max( annualRate ) as max_lilv  ,
            min( amountAtLeast ) as min_qitou,
            max( amountAtLeast ) as max_qitou,
            sum(jezz) as jezz,'已下架' as type
            FROM (
                SELECT productid,
                CASE
                WHEN period <=90 THEN '03'
                WHEN period >90 AND period <=180 THEN '03-06'
                WHEN period >180 AND period <=365 THEN '06-12'
                WHEN period >365 AND period <=730 THEN '12-24'
                WHEN period >730 THEN '24+'
                END AS period, projectType, addedTransNumber,amountAtLeast,annualRate,bishu_today,jezz
                FROM (
                    SELECT productid,  
                        case 
                        when LOCATE('天',period)>0 then cast(SUBSTRING_INDEX( period, '天', 1 ) as signed)  
                        when LOCATE('年',period)>0 then cast(SUBSTRING_INDEX( period, '年', 1 ) as signed)*365 
                        else null
                        end as period, projectType, addedTransNumber,amountAtLeast,annualRate,bishu_today,jezz
                    FROM (
                        
                        select l1.productid, l1.projectType, l1.period,
                            l1.TransNumber addedTransNumber,l1.TransNumber bishu_today,l1.annualRate,
                            cast( SUBSTRING_INDEX( l1.amountAtLeast, '元起', 1 ) AS signed ) AS amountAtLeast,
                            l1.productScale_yuan as jezz
                        from  zcb_loan l1  where state = '已下架' and endSellDate >= \''''+str(startDate)+'''\' and endSellDate < \''''+str(endDate)+'''\'
                    
                   )a
               )b
           )c GROUP BY projectType, period

        '''
        #and createDate >= \''''+str(startDate)+'''\' and createDate < \''''+str(endDate)+'''\' 已售罄 已下架
        print sql
        maps = db.execute(sql)
        for map in maps :
            map["startDate"] = startDate
            map["endDate"] = endDate
            
            sql = db.parseInsert("zcb_loan_report", map)
            print sql 
            db.execute(sql)
            
            
        
        f.writelines(head)
        for map in maps :
            
            result = "\n"+str(map["projectType"])+","+\
                str(map["period"])+"个月,0,"+\
                str(map["total"])\
                +","+str(map["bishu_today"])\
                +","+str(map["bishu"])\
                +","+str(map["min_lilv"])\
                +","+str(map["max_lilv"])\
                +","+str(map["min_qitou"]) +"-"+str(map["max_qitou"])\
                +","+str(map["jezz"]) 
        
            '''
        result.append(map["projectType"])
        result.append(map["period"])
        result.append(0)
        result.append(map["total"])
        result.append(map["bishu_today"])
        result.append(map["bishu"])
        result.append(map["min_lilv"])
        result.append(map["max_lilv"])
        result.append(str(map["min_qitou"]) +"-"+str(map["max_qitou"]))'''
            
            f.write(result)
        f.write("\n\n\n")
        f.flush()
        f.close()
Пример #14
0
class CrawlerCQYY:
    
    def __init__(self):
        reload(sys) 
        sys.setdefaultencoding('utf8')  # @UndefinedVariable
        self.pageUtils = PageUtils()

    ym = "https://zcbprod.alipay.com"

    def crawler (self):
        
        self.pageUtils.url = "https://zcbprod.alipay.com/appointment/lotteryHistoryActivityList.htm"
        #self.pageUtils.login("13651781949", "lufax123")
        self.pageUtils.login("*****@*****.**", "lufax123456")
        print self.pageUtils.browser.title
        
        try:     
            pageNum = self.pageUtils.browser.find_element_by_css_selector('span[class="ui-paging-bold"]').text
        except:
            pageNum = '1/1'
        pageNum  = pageNum[2:] 
        print pageNum
        for i in range(int(pageNum)) :
            
            url = self.ym+"/appointment/lotteryHistoryActivityList.htm?currentPage="+str(i)
            print "url-->",url
            page = self.pageUtils.downloadPage(url)
            
            flag = self.parsePageUrl(page)
            if flag ==False:
                self.pageUtils.browser.quit()
                self.crawler()
                return
                
            urls = flag
            for u in urls:
                u = self.ym + u
                print u
                page = self.pageUtils.downloadPage(u)
                print self.parsePage(page)
        
        
    def parsePageUrl  (self,page):
           
        soup = BeautifulSoup(page)
        print "title==>",soup.title.string
        if "登录中心 - 支付宝" in soup.title.string:
            return False
        hrefs = soup.find_all('td', {'class': "detail-link"})#
        #hrefs = soup.find_all('a', {'seed': re.compile('detailLink-linkT*')})
        print "==>",hrefs
        result = []
        for href in hrefs:
            result.append(href.find("a")['href'])
            print "-->",href,"===",href.find("a"),"---",href.find("a")['href']
        return result
    
    def parsePage(self,page):
        result = {}
        soup = BeautifulSoup(page)
            #利率
        lilv = soup.find('p',{'class':'product-param-value-num'}) 
        result["lilv"]=lilv.getText()   
            #投资期限
        tzqx = soup.find('p',{'class':'product-param-value-num'}) 
        result["tzqx"]=tzqx.getText()   
            #担保机构
        dbjg = soup.find('h2').find("a").getText() 
        result["dbjg"]=dbjg.getText()     
            #类型
        type = soup.find('p',{'class':'product-param-value-num product-param-value-txt'}) 
        result["type"] = type.getText()    
        
        infos = soup.find('div',{'class':'app-apply'}) 
            #中签人数
        result["zqrs"]=infos  
            #中签率
        #result["zql"]=tzqx  
        
        
        infos = soup.find('div',{'class':'product-info-detail fn-clear'}).find("p")
            #总金额
        zje = infos[0].getText()
        result["zje"]=zje
          
            #已预约金额
        yyje = infos[1].getText()
        result["yyje"]=yyje 
            
            #抽签完成时间
        cqwcDate = infos[2].getText()
        result["cqwcDate"]=cqwcDate 
        
        
            #开放预约时间
        kfyyDate = soup.find('div',{'class':'timer1'}).find("p")
        result["kfyyDate"]=kfyyDate.getText()
            #开始抽签时间
        kfyyDate = soup.find('div',{'class':'timer2'}).find("p")
        result["kfyyDate"]=kfyyDate.getText()
            #下架时间
        xjDate = soup.find('div',{'class':'timer4'}).find("p")
        result["xjDate"]=xjDate.getText()   
            #起息日
        qxDate = soup.find('div',{'class':'timer5'}).find("p")
        result["qxDate"]=qxDate.getText()   
            #到期日
        dqDate = soup.find('div',{'class':'timer6'}).find("p")
        result["dqDate"]=dqDate.getText()
        return result
Пример #15
0
    def tjDay(self, startDate):
        # startDate = "2015-09-13"
        startDate = startDate[:10]
        # print "startDate-->",startDate[:10]
        days = 1
        endDate = str(PageUtils.delayed(startDate[:10], days))
        # 相隔的天数
        tiqianDay = -1
        tiqianStartDate = str(PageUtils.delayed(startDate, tiqianDay))
        tiqianendDate = str(PageUtils.delayed(endDate[:10], tiqianDay))
        print startDate, ":", endDate, "====", tiqianStartDate, ":", tiqianendDate

        reload(sys)
        sys.setdefaultencoding("utf8")  # @UndefinedVariable

        db = dataBaseOperator()

        f = file("report/" + str(startDate) + ".txt", "w+")
        # main_tablename = "zcb_insu"
        # slave_tablename = "zcb_insu_process"
        tablename = ["zcb_loan", "zcb_insu", "zcb_others"]  # "zcb_loan","zcb_insu","zcb_others"
        head = "产品类型,投资期限,产品数量(昨天),产品数量(今天),笔数(今天),增加笔数,最小利率,最大利率,起投金额,金额增长"
        for main_tablename in tablename:
            slave_tablename = main_tablename + "_process"
            sql = (
                """
            
                SELECT projectType, 
                period, 
                count(*)as total, 
                sum( bishu_today )as bishu_today  ,
                sum( addedTransNumber ) as bishu , 
                min( annualRate ) as min_lilv,
                max( annualRate ) as max_lilv  ,
                min( amountAtLeast ) as min_qitou,
                max( amountAtLeast ) as max_qitou,
                sum(jezz) as jezz
                FROM (
                    SELECT productid,
                    CASE
                    WHEN period <=90 THEN '03'
                    WHEN period >90 AND period <=180 THEN '03-06'
                    WHEN period >180 AND period <=365 THEN '06-12'
                    WHEN period >365 AND period <=730 THEN '12-24'
                    WHEN period >730 THEN '24+'
                    END AS period, projectType, addedTransNumber,amountAtLeast,annualRate,bishu_today,jezz
                    FROM (
                        SELECT productid,  
                            case 
                            when LOCATE('天',period)>0 then cast(SUBSTRING_INDEX( period, '天', 1 ) as signed)  
                            when LOCATE('年',period)>0 then cast(SUBSTRING_INDEX( period, '年', 1 ) as signed)*365 
                            else null
                            end as period, projectType, addedTransNumber,amountAtLeast,annualRate,bishu_today,jezz
                        FROM (
                            
                            select p.productid, i.projectType, i.period,
                        p.bishu as 'addedTransNumber',p.bishu_today,i.annualRate,
                        cast( SUBSTRING_INDEX( amountAtLeast, '元起', 1 ) AS signed ) AS amountAtLeast,
                        (i.productScale_yuan*wanchenglv/100)as jezz
                    from (
                                select a1.productid , (IFNULL(a1.bishu,0)- IFNULL(a2.bishu,0))as bishu ,IFNULL(a1.bishu,0)as bishu_today,(a1.wanchenglv-a2.wanchenglv)as wanchenglv from 
                                    (select productid,bishu,IFNULL(b1.wanchenglv,0)as wanchenglv
                                    from `"""
                + slave_tablename
                + """` b1 where createDate >= \'"""
                + startDate
                + """\' and createDate < \'"""
                + endDate
                + """\')a1
                                left join 
                                    (select productid,bishu,IFNULL(b2.wanchenglv,0)as wanchenglv 
                                    from `"""
                + slave_tablename
                + """` b2 where createDate >= \'"""
                + tiqianStartDate
                + """\' and createDate < \'"""
                + tiqianendDate
                + """\')a2
                                on a1.productid = a2.productid
                            ) p left join """
                + main_tablename
                + """ i on p.productid = i.productid
                        
                        )c
                    )a
                )b
                GROUP BY projectType, period

            """
            )
            print sql
            maps = db.execute(sql)
            f.writelines(head)
            for map in maps:
                result = (
                    "\n"
                    + str(map["projectType"])
                    + ","
                    + str(map["period"])
                    + "个月,0,"
                    + str(map["total"])
                    + ","
                    + str(map["bishu_today"])
                    + ","
                    + str(map["bishu"])
                    + ","
                    + str(map["min_lilv"])
                    + ","
                    + str(map["max_lilv"])
                    + ","
                    + str(map["min_qitou"])
                    + "-"
                    + str(map["max_qitou"])
                    + ","
                    + str(map["jezz"])
                )

                """
            result.append(map["projectType"])
            result.append(map["period"])
            result.append(0)
            result.append(map["total"])
            result.append(map["bishu_today"])
            result.append(map["bishu"])
            result.append(map["min_lilv"])
            result.append(map["max_lilv"])
            result.append(str(map["min_qitou"]) +"-"+str(map["max_qitou"]))"""

                f.write(result)
            f.write("\n\n\n")
            f.flush()
        f.close()
Пример #16
0
        # print "synMainData query_sql-->"+query_sql
        db = DataBase()
        query_result = db.execute(query_sql)

        # currentTime_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        currentTime_str = PageUtils.getCurrentTime()
        if len(query_result) < 1:  # insert data
            data["createDate"] = currentTime_str
            data["updateDate"] = currentTime_str
            insert_sql = db.parseInsert(tablename, data)
            db.execute(insert_sql)
        db.dataBaseClose()


if __name__ == "__main__":

    crawler_obj = CrawlerAllProductList()
    print "start-->" + str(PageUtils.getCurrentTime())
    crawler_obj.crawlWebs("OTHERS")
    crawler_obj.crawlWebs("INSU")
    crawler_obj.crawlWebs("FUND")
    crawler_obj.crawlWebs("LOAN")
    print "end-->" + str(PageUtils.getCurrentTime())

    """
    try:
        crawler_obj.crawlWebs('FUND')
    except:
        logging.warning("Exception occur about FUND")
    """