Пример #1
0
class CrawlerHome:
    def __init__(self):
        reload(sys)
        sys.setdefaultencoding("utf8")  # @UndefinedVariable
        self.pageUtils = PageUtils()

    def updateData(self, data):
        _main = data.get("m")
        _slaves = data.get("s")
        main = {}
        slave = {}

        print _main
        print _slaves
        db = DataBase()

        today = self.pageUtils.getCurrentDate()
        tomorrow = self.pageUtils.delayed(today, -1)

        # 取昨天的数据
        sql = "select * from zcb_report_master where date(createdate)='" + str(tomorrow) + "'"
        tomorrow_main = db.execute(sql)
        if len(tomorrow_main) > 0:
            tomorrow_main = tomorrow_main[0]

            print "sql==>", sql

            for _d in _main:
                print "-->", _main.get(_d), "==", tomorrow_main.get(_d)
                if _d == "yycjjebl":
                    main[_d] = _main.get(_d)
                else:
                    if tomorrow_main.get(_d) == None or _main.get(_d) > tomorrow_main.get(_d):
                        main[_d] = _main.get(_d)

                # 今天是否有数据
            sql = "select * from zcb_report_master where date(createdate)='" + str(today) + "'"
            today_main = db.execute(sql)
            if len(today_main) < 1:
                # 主表数据插入
                sql = db.parseInsert("zcb_report_master", main)
                print "insert master sql -->", sql
            else:
                today_main = today_main[0]
                sql = db.parseUpdate("zcb_report_master", main, " id = " + str(today_main.get("id")))
                print "update master sql -->", sql
            db.execute(sql)
        else:
            sql = "select * from zcb_report_master where date(createdate)='" + str(today) + "'"
            today_main = db.execute(sql)
            if len(today_main) < 1:
                sql = db.parseInsert("zcb_report_master", _main)
                print "insert master sql -->", sql
                db.execute(sql)
            else:
                today_main = today_main[0]
                sql = db.parseUpdate("zcb_report_master", _main, "id=" + str(today_main.get("id")))
                print "update master sql -->", sql
                db.execute(sql)
            # 处理从表数据
        for _slave in _slaves:

            sql = (
                "select * from zcb_report_slave where date(createdate)='"
                + str(tomorrow)
                + "' and type='"
                + _slave.get("type")
                + "' and tzqx = '"
                + _slave.get("tzqx")
                + "'"
            )
            print sql
            tomorrow_slave = db.execute(sql)
            if len(tomorrow_slave) > 0:
                tomorrow_slave = tomorrow_slave[0]
                slave = {}
                slave["type"] = _slave.get("type")
                slave["tzqx"] = _slave.get("tzqx")
                for _d in _slave:
                    print "-->", _slave.get(_d), "==", tomorrow_slave.get(_d)
                    if tomorrow_slave.get(_d) == None or _slave.get(_d) != tomorrow_slave.get(_d):
                        print _d, "==>", _slave.get(_d), "!!!!!", tomorrow_slave.get(_d)
                        slave[_d] = _slave.get(_d)

                if len(slave) < 1:
                    continue
                print "slave-->", slave
                sql = (
                    "select * from zcb_report_slave where date(createdate)='"
                    + str(today)
                    + "' and type='"
                    + _slave.get("type")
                    + "' and tzqx = '"
                    + _slave.get("tzqx")
                    + "'"
                )
                today_slave = db.execute(sql)
                if len(today_slave) < 1:
                    # insert
                    sql = db.parseInsert("zcb_report_slave", slave)
                    print "insert slave sql -->", sql
                else:
                    # update
                    today_slave = today_slave[0]
                    sql = db.parseUpdate("zcb_report_slave", slave, " id = " + str(today_slave.get("id")))
                    print "update slave sql -->", sql
                db.execute(sql)
            else:
                sql = (
                    "select * from zcb_report_slave where date(createdate)='"
                    + str(today)
                    + "' and type='"
                    + _slave.get("type")
                    + "' and tzqx = '"
                    + _slave.get("tzqx")
                    + "'"
                )
                today_slave = db.execute(sql)
                if len(today_slave) < 1:
                    sql = db.parseInsert("zcb_report_slave", _slave)
                    print "insert slave sql -->", sql
                    db.execute(sql)
                else:
                    today_slave = today_slave[0]
                    sql = db.parseUpdate("zcb_report_slave", _slave, "id = " + str(today_slave.get("id")))
                    print "update slave sql -->", sql
                    db.execute(sql)

    def crawlerTest(self):
        url = "https://zhaocaibao.alipay.com/pf/purchase.htm?productId=20150821000230020000680048696668"
        self.pageUtils.url = url
        self.pageUtils.login("13651781949", "lufax123")
        sleep(10)
        url = "https://zhaocaibao.alipay.com/pf/purchase.htm?productId=20151009000230020000280058270528"
        self.pageUtils.browser.get(url)

    def crawler(self):

        url = "https://zhaocaibao.alipay.com/pf/productList.htm"

        browser = self.pageUtils.startBrowser()
        browser.get(url)
        print browser.title
        a = self.parsePage_home(browser)

        url = "https://cmspromo.alipay.com/finance/fullyear.htm"
        browser.get(url)
        print browser.title
        b = self.parsePage_finance(browser)

        b.update()
        a.get("m").update(b)
        # print a.get("s")
        # print b
        self.updateData(a)
        # d = self.pageUtils.downloadPage(url)

        # browser.find_element("", "").get_attribute(name)
        # print "==>",browser.find_element_by_class_name("data-box").text()

        browser.quit()

    def parsePage_home(self, page):
        result = {}

        result_m = {}
        result["m"] = result_m
        # print soup.title
        # #平台成交金额
        cjjes = page.find_elements_by_class_name("data-box")
        c = ""
        for cjje in cjjes:
            # cjje += cjje.get_attribute("class")
            c += cjje.text
        result_m["cjje"] = c

        yycjje = page.find_element_by_class_name("week-book-data")
        result_m["yycjje"] = self.clearNumber(yycjje.text)

        yycjjebl = page.find_element_by_class_name("book-rate-data")
        result_m["yycjjebl"] = yycjjebl.text

        grqyds = page.find_elements_by_css_selector('div[class="several-months fn-clear"]')

        i = 0
        qixis = ["3", "3-6", "6-12", "12-24", "24"]
        result_s_list = []
        result["s"] = result_s_list
        for grqyd in grqyds:
            result_s_map = {"type": "个人企业贷"}
            result_s_list.append(result_s_map)
            result_s_map["tzqx"] = qixis[i]
            aa = grqyd.find_element_by_css_selector('div[class="product-book fn-clear"]')
            zg = aa.find_element_by_class_name("content-third-type")  # 总共
            result_s_map["zgje"] = self.clearNumber(zg.text)
            yylilv = aa.find_element_by_class_name("content-second-type")  # 预约利率
            result_s_map["yylilv"] = yylilv.text
            print "yylilv-->", yylilv.text

            try:
                bb = grqyd.find_element_by_css_selector('div[class="product-buy fn-clear"]')
                gm = bb.find_element_by_class_name("content-third-type")  # 购买
                result_s_map["gmje"] = self.clearNumber(gm.text)

                gmlilv = bb.find_element_by_css_selector('li[class="w145 buy-product-rate"]')  # 购买利率
                result_s_map["gmlilv"] = gmlilv.text
                print "gmlilv-->", gmlilv.text
            except Exception, e:
                print e
            i += 1
            # .find_element_by_class_name("content-third-type")#购买
            # print zg,gm
        # print result
        return result