Пример #1
0
 def synzcb_loan (self,startDate):
     
     today = PageUtils.getCurrentDate()
     startDate = PageUtils.delayed(today,-1)
     startDate2 = PageUtils.delayed(today,-2)
     DB = dataBaseOperator()
     
     sql = '''
         insert into zcb_loan_state (productid)
         select productid from zcb_loan where productid in  (
             select productid from zcb_loan_process where createDate >= \''''+str(startDate2)+'''\' and createDate < \''''+str(startDate)+'''\'  and productid not in  
                 (select productid from zcb_loan_process where createDate >= \''''+str(startDate)+'''\' and createDate < \''''+str(today)+'''\')) 
         and state != '已下架' and state !='已售罄' and productid not in (select productid from zcb_loan_state )
      '''
     print sql 
     print "exe row -->",DB.execute(sql)
     #DB.createLog({"type":"loan_synState","msg":"个人企业贷同步"+str(startDate)+"的数据成功"})#
      
     DB.dataBaseClose()
     return  
    def processData(self, productid, data, tablename):

        # zcb_insu_process
        # 今天的日期 格式 yyyy-mm-dd
        startData = PageUtils.getCurrentDate()
        endData = PageUtils.delayed(startData, 1)
        currentTime_str = PageUtils.getCurrentTime()  # time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
        db = DataBase()
        query_sql = (
            "select * from "
            + tablename
            + " where productId = '"
            + productid
            + "' and createDate < '"
            + endData
            + "' and createDate>='"
            + startData
            + "'"
        )
        # print "processData query_sql-->"+query_sql
        query_result = db.execute(query_sql)

        data["updateDate"] = currentTime_str

        if len(query_result) < 1:
            # 当天没有数据,执行插入
            # zcb_insu_process
            data["createDate"] = currentTime_str
            sql = db.parseInsert(tablename, data)
            # print "processData-->insert sql==>"+sql
        else:
            id = query_result[0]["id"]
            # print id
            sql = db.parseUpdate(tablename, data, "id='" + str(id) + "'")
            # print "processData-->update sql==>"+sql
        db.execute(sql)
        db.dataBaseClose()
Пример #3
0
class CrawlerHome:
    def __init__(self):
        reload(sys)
        sys.setdefaultencoding("utf8")  # @UndefinedVariable
        self.pageUtils = PageUtils()

    def updateData(self, data):
        _main = data.get("m")
        _slaves = data.get("s")
        main = {}
        slave = {}

        print _main
        print _slaves
        db = DataBase()

        today = self.pageUtils.getCurrentDate()
        tomorrow = self.pageUtils.delayed(today, -1)

        # 取昨天的数据
        sql = "select * from zcb_report_master where date(createdate)='" + str(tomorrow) + "'"
        tomorrow_main = db.execute(sql)
        if len(tomorrow_main) > 0:
            tomorrow_main = tomorrow_main[0]

            print "sql==>", sql

            for _d in _main:
                print "-->", _main.get(_d), "==", tomorrow_main.get(_d)
                if _d == "yycjjebl":
                    main[_d] = _main.get(_d)
                else:
                    if tomorrow_main.get(_d) == None or _main.get(_d) > tomorrow_main.get(_d):
                        main[_d] = _main.get(_d)

                # 今天是否有数据
            sql = "select * from zcb_report_master where date(createdate)='" + str(today) + "'"
            today_main = db.execute(sql)
            if len(today_main) < 1:
                # 主表数据插入
                sql = db.parseInsert("zcb_report_master", main)
                print "insert master sql -->", sql
            else:
                today_main = today_main[0]
                sql = db.parseUpdate("zcb_report_master", main, " id = " + str(today_main.get("id")))
                print "update master sql -->", sql
            db.execute(sql)
        else:
            sql = "select * from zcb_report_master where date(createdate)='" + str(today) + "'"
            today_main = db.execute(sql)
            if len(today_main) < 1:
                sql = db.parseInsert("zcb_report_master", _main)
                print "insert master sql -->", sql
                db.execute(sql)
            else:
                today_main = today_main[0]
                sql = db.parseUpdate("zcb_report_master", _main, "id=" + str(today_main.get("id")))
                print "update master sql -->", sql
                db.execute(sql)
            # 处理从表数据
        for _slave in _slaves:

            sql = (
                "select * from zcb_report_slave where date(createdate)='"
                + str(tomorrow)
                + "' and type='"
                + _slave.get("type")
                + "' and tzqx = '"
                + _slave.get("tzqx")
                + "'"
            )
            print sql
            tomorrow_slave = db.execute(sql)
            if len(tomorrow_slave) > 0:
                tomorrow_slave = tomorrow_slave[0]
                slave = {}
                slave["type"] = _slave.get("type")
                slave["tzqx"] = _slave.get("tzqx")
                for _d in _slave:
                    print "-->", _slave.get(_d), "==", tomorrow_slave.get(_d)
                    if tomorrow_slave.get(_d) == None or _slave.get(_d) != tomorrow_slave.get(_d):
                        print _d, "==>", _slave.get(_d), "!!!!!", tomorrow_slave.get(_d)
                        slave[_d] = _slave.get(_d)

                if len(slave) < 1:
                    continue
                print "slave-->", slave
                sql = (
                    "select * from zcb_report_slave where date(createdate)='"
                    + str(today)
                    + "' and type='"
                    + _slave.get("type")
                    + "' and tzqx = '"
                    + _slave.get("tzqx")
                    + "'"
                )
                today_slave = db.execute(sql)
                if len(today_slave) < 1:
                    # insert
                    sql = db.parseInsert("zcb_report_slave", slave)
                    print "insert slave sql -->", sql
                else:
                    # update
                    today_slave = today_slave[0]
                    sql = db.parseUpdate("zcb_report_slave", slave, " id = " + str(today_slave.get("id")))
                    print "update slave sql -->", sql
                db.execute(sql)
            else:
                sql = (
                    "select * from zcb_report_slave where date(createdate)='"
                    + str(today)
                    + "' and type='"
                    + _slave.get("type")
                    + "' and tzqx = '"
                    + _slave.get("tzqx")
                    + "'"
                )
                today_slave = db.execute(sql)
                if len(today_slave) < 1:
                    sql = db.parseInsert("zcb_report_slave", _slave)
                    print "insert slave sql -->", sql
                    db.execute(sql)
                else:
                    today_slave = today_slave[0]
                    sql = db.parseUpdate("zcb_report_slave", _slave, "id = " + str(today_slave.get("id")))
                    print "update slave sql -->", sql
                    db.execute(sql)

    def crawlerTest(self):
        url = "https://zhaocaibao.alipay.com/pf/purchase.htm?productId=20150821000230020000680048696668"
        self.pageUtils.url = url
        self.pageUtils.login("13651781949", "lufax123")
        sleep(10)
        url = "https://zhaocaibao.alipay.com/pf/purchase.htm?productId=20151009000230020000280058270528"
        self.pageUtils.browser.get(url)

    def crawler(self):

        url = "https://zhaocaibao.alipay.com/pf/productList.htm"

        browser = self.pageUtils.startBrowser()
        browser.get(url)
        print browser.title
        a = self.parsePage_home(browser)

        url = "https://cmspromo.alipay.com/finance/fullyear.htm"
        browser.get(url)
        print browser.title
        b = self.parsePage_finance(browser)

        b.update()
        a.get("m").update(b)
        # print a.get("s")
        # print b
        self.updateData(a)
        # d = self.pageUtils.downloadPage(url)

        # browser.find_element("", "").get_attribute(name)
        # print "==>",browser.find_element_by_class_name("data-box").text()

        browser.quit()

    def parsePage_home(self, page):
        result = {}

        result_m = {}
        result["m"] = result_m
        # print soup.title
        # #平台成交金额
        cjjes = page.find_elements_by_class_name("data-box")
        c = ""
        for cjje in cjjes:
            # cjje += cjje.get_attribute("class")
            c += cjje.text
        result_m["cjje"] = c

        yycjje = page.find_element_by_class_name("week-book-data")
        result_m["yycjje"] = self.clearNumber(yycjje.text)

        yycjjebl = page.find_element_by_class_name("book-rate-data")
        result_m["yycjjebl"] = yycjjebl.text

        grqyds = page.find_elements_by_css_selector('div[class="several-months fn-clear"]')

        i = 0
        qixis = ["3", "3-6", "6-12", "12-24", "24"]
        result_s_list = []
        result["s"] = result_s_list
        for grqyd in grqyds:
            result_s_map = {"type": "个人企业贷"}
            result_s_list.append(result_s_map)
            result_s_map["tzqx"] = qixis[i]
            aa = grqyd.find_element_by_css_selector('div[class="product-book fn-clear"]')
            zg = aa.find_element_by_class_name("content-third-type")  # 总共
            result_s_map["zgje"] = self.clearNumber(zg.text)
            yylilv = aa.find_element_by_class_name("content-second-type")  # 预约利率
            result_s_map["yylilv"] = yylilv.text
            print "yylilv-->", yylilv.text

            try:
                bb = grqyd.find_element_by_css_selector('div[class="product-buy fn-clear"]')
                gm = bb.find_element_by_class_name("content-third-type")  # 购买
                result_s_map["gmje"] = self.clearNumber(gm.text)

                gmlilv = bb.find_element_by_css_selector('li[class="w145 buy-product-rate"]')  # 购买利率
                result_s_map["gmlilv"] = gmlilv.text
                print "gmlilv-->", gmlilv.text
            except Exception, e:
                print e
            i += 1
            # .find_element_by_class_name("content-third-type")#购买
            # print zg,gm
        # print result
        return result
Пример #4
0
    def tjDay(self,startDate):  
        #startDate = "2015-09-13"
        startDate = startDate[:10]
        #print "startDate-->",startDate[:10]
        days = 1
        endDate = str(PageUtils.delayed(startDate[:10],days))
           #相隔的天数
        tiqianDay = -1
        tiqianStartDate = str(PageUtils.delayed(startDate,tiqianDay))
        tiqianendDate = str(PageUtils.delayed(endDate[:10],tiqianDay))
        print startDate,":",endDate,"====",tiqianStartDate,":",tiqianendDate
        
        
        
        db = dataBaseOperator()

        f=file("report/loan_"+str(startDate)+".txt","w+")
        #main_tablename = "zcb_insu" 
        #slave_tablename = "zcb_insu_process"
        main_tablename = "zcb_loan"#,"zcb_insu","zcb_others"
        head = "产品类型,投资期限,产品数量(昨天),产品数量(今天),笔数(今天),增加笔数,最小利率,最大利率,起投金额,金额增长"
        slave_tablename = main_tablename+"_process"
        sql = '''
        
            SELECT projectType, 
            period, 
            count(*)as total, 
            sum( bishu_today )as bishu_today  ,
            sum( addedTransNumber ) as bishu , 
            min( annualRate ) as min_lilv,
            max( annualRate ) as max_lilv  ,
            min( amountAtLeast ) as min_qitou,
            max( amountAtLeast ) as max_qitou,
            sum(jezz) as jezz,'已下架' as type
            FROM (
                SELECT productid,
                CASE
                WHEN period <=90 THEN '03'
                WHEN period >90 AND period <=180 THEN '03-06'
                WHEN period >180 AND period <=365 THEN '06-12'
                WHEN period >365 AND period <=730 THEN '12-24'
                WHEN period >730 THEN '24+'
                END AS period, projectType, addedTransNumber,amountAtLeast,annualRate,bishu_today,jezz
                FROM (
                    SELECT productid,  
                        case 
                        when LOCATE('天',period)>0 then cast(SUBSTRING_INDEX( period, '天', 1 ) as signed)  
                        when LOCATE('年',period)>0 then cast(SUBSTRING_INDEX( period, '年', 1 ) as signed)*365 
                        else null
                        end as period, projectType, addedTransNumber,amountAtLeast,annualRate,bishu_today,jezz
                    FROM (
                        
                        select l1.productid, l1.projectType, l1.period,
                            l1.TransNumber addedTransNumber,l1.TransNumber bishu_today,l1.annualRate,
                            cast( SUBSTRING_INDEX( l1.amountAtLeast, '元起', 1 ) AS signed ) AS amountAtLeast,
                            l1.productScale_yuan as jezz
                        from  zcb_loan l1  where state = '已下架' and endSellDate >= \''''+str(startDate)+'''\' and endSellDate < \''''+str(endDate)+'''\'
                    
                   )a
               )b
           )c GROUP BY projectType, period

        '''
        #and createDate >= \''''+str(startDate)+'''\' and createDate < \''''+str(endDate)+'''\' 已售罄 已下架
        print sql
        maps = db.execute(sql)
        for map in maps :
            map["startDate"] = startDate
            map["endDate"] = endDate
            
            sql = db.parseInsert("zcb_loan_report", map)
            print sql 
            db.execute(sql)
            
            
        
        f.writelines(head)
        for map in maps :
            
            result = "\n"+str(map["projectType"])+","+\
                str(map["period"])+"个月,0,"+\
                str(map["total"])\
                +","+str(map["bishu_today"])\
                +","+str(map["bishu"])\
                +","+str(map["min_lilv"])\
                +","+str(map["max_lilv"])\
                +","+str(map["min_qitou"]) +"-"+str(map["max_qitou"])\
                +","+str(map["jezz"]) 
        
            '''
        result.append(map["projectType"])
        result.append(map["period"])
        result.append(0)
        result.append(map["total"])
        result.append(map["bishu_today"])
        result.append(map["bishu"])
        result.append(map["min_lilv"])
        result.append(map["max_lilv"])
        result.append(str(map["min_qitou"]) +"-"+str(map["max_qitou"]))'''
            
            f.write(result)
        f.write("\n\n\n")
        f.flush()
        f.close()
Пример #5
0
    def tjDay(self, startDate):
        # startDate = "2015-09-13"
        startDate = startDate[:10]
        # print "startDate-->",startDate[:10]
        days = 1
        endDate = str(PageUtils.delayed(startDate[:10], days))
        # 相隔的天数
        tiqianDay = -1
        tiqianStartDate = str(PageUtils.delayed(startDate, tiqianDay))
        tiqianendDate = str(PageUtils.delayed(endDate[:10], tiqianDay))
        print startDate, ":", endDate, "====", tiqianStartDate, ":", tiqianendDate

        reload(sys)
        sys.setdefaultencoding("utf8")  # @UndefinedVariable

        db = dataBaseOperator()

        f = file("report/" + str(startDate) + ".txt", "w+")
        # main_tablename = "zcb_insu"
        # slave_tablename = "zcb_insu_process"
        tablename = ["zcb_loan", "zcb_insu", "zcb_others"]  # "zcb_loan","zcb_insu","zcb_others"
        head = "产品类型,投资期限,产品数量(昨天),产品数量(今天),笔数(今天),增加笔数,最小利率,最大利率,起投金额,金额增长"
        for main_tablename in tablename:
            slave_tablename = main_tablename + "_process"
            sql = (
                """
            
                SELECT projectType, 
                period, 
                count(*)as total, 
                sum( bishu_today )as bishu_today  ,
                sum( addedTransNumber ) as bishu , 
                min( annualRate ) as min_lilv,
                max( annualRate ) as max_lilv  ,
                min( amountAtLeast ) as min_qitou,
                max( amountAtLeast ) as max_qitou,
                sum(jezz) as jezz
                FROM (
                    SELECT productid,
                    CASE
                    WHEN period <=90 THEN '03'
                    WHEN period >90 AND period <=180 THEN '03-06'
                    WHEN period >180 AND period <=365 THEN '06-12'
                    WHEN period >365 AND period <=730 THEN '12-24'
                    WHEN period >730 THEN '24+'
                    END AS period, projectType, addedTransNumber,amountAtLeast,annualRate,bishu_today,jezz
                    FROM (
                        SELECT productid,  
                            case 
                            when LOCATE('天',period)>0 then cast(SUBSTRING_INDEX( period, '天', 1 ) as signed)  
                            when LOCATE('年',period)>0 then cast(SUBSTRING_INDEX( period, '年', 1 ) as signed)*365 
                            else null
                            end as period, projectType, addedTransNumber,amountAtLeast,annualRate,bishu_today,jezz
                        FROM (
                            
                            select p.productid, i.projectType, i.period,
                        p.bishu as 'addedTransNumber',p.bishu_today,i.annualRate,
                        cast( SUBSTRING_INDEX( amountAtLeast, '元起', 1 ) AS signed ) AS amountAtLeast,
                        (i.productScale_yuan*wanchenglv/100)as jezz
                    from (
                                select a1.productid , (IFNULL(a1.bishu,0)- IFNULL(a2.bishu,0))as bishu ,IFNULL(a1.bishu,0)as bishu_today,(a1.wanchenglv-a2.wanchenglv)as wanchenglv from 
                                    (select productid,bishu,IFNULL(b1.wanchenglv,0)as wanchenglv
                                    from `"""
                + slave_tablename
                + """` b1 where createDate >= \'"""
                + startDate
                + """\' and createDate < \'"""
                + endDate
                + """\')a1
                                left join 
                                    (select productid,bishu,IFNULL(b2.wanchenglv,0)as wanchenglv 
                                    from `"""
                + slave_tablename
                + """` b2 where createDate >= \'"""
                + tiqianStartDate
                + """\' and createDate < \'"""
                + tiqianendDate
                + """\')a2
                                on a1.productid = a2.productid
                            ) p left join """
                + main_tablename
                + """ i on p.productid = i.productid
                        
                        )c
                    )a
                )b
                GROUP BY projectType, period

            """
            )
            print sql
            maps = db.execute(sql)
            f.writelines(head)
            for map in maps:
                result = (
                    "\n"
                    + str(map["projectType"])
                    + ","
                    + str(map["period"])
                    + "个月,0,"
                    + str(map["total"])
                    + ","
                    + str(map["bishu_today"])
                    + ","
                    + str(map["bishu"])
                    + ","
                    + str(map["min_lilv"])
                    + ","
                    + str(map["max_lilv"])
                    + ","
                    + str(map["min_qitou"])
                    + "-"
                    + str(map["max_qitou"])
                    + ","
                    + str(map["jezz"])
                )

                """
            result.append(map["projectType"])
            result.append(map["period"])
            result.append(0)
            result.append(map["total"])
            result.append(map["bishu_today"])
            result.append(map["bishu"])
            result.append(map["min_lilv"])
            result.append(map["max_lilv"])
            result.append(str(map["min_qitou"]) +"-"+str(map["max_qitou"]))"""

                f.write(result)
            f.write("\n\n\n")
            f.flush()
        f.close()