Exemplo n.º 1
0
def test_Adview():
    print("............Adview................")
    flag = False
    for try_num in range(5):
        print("Adview第" + str(try_num + 1) + "次尝试-----------")
        try:
            # adview取前天
            yesterday = common.getNowTime(-2)
            yyyy = yesterday[0:4]
            mm = yesterday[5:7]
            dd = yesterday[8:10]
            # beforeYerter = common.getNowTime(-2)
            # yyyyBefore = beforeYerter[0:4]
            # mmBefore = beforeYerter[5:7]
            # ddBefore = beforeYerter[8:10]
            nowTime = common.getNowTime()
            yearNow = nowTime[0:4]
            monthNow = nowTime[5:7]
            dayNow = nowTime[8:10]
            # match_Str 为要保留的日期,但是pandas的日期格式为 yyyy-mm-dd
            match_Str = yyyy + "-" + mm + "-" + dd
            # match_Str_before = yyyyBefore + "-" + mmBefore + "-" + ddBefore
            # Adview下载的文件名的前缀
            prefix = "AdView_" + yearNow + monthNow + dayNow
            # 路径只取nowTime的日期部分
            path = common.getDirPath(nowTime)
            excelName = common.getExcelName(nowTime, "Adview")

            chrome_options = webdriver.ChromeOptions()
            # 使用headless无界面浏览器模式
            # chrome_options.add_argument('--headless')
            # chrome_options.add_argument('--disable-gpu')
            chrome_options.add_argument(
                'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36')
            # 修改默认下载地址
            prefs = {'profile.default_content_settings.popups': 0, 'download.default_directory': path}
            chrome_options.add_experimental_option('prefs', prefs)

            # 启动浏览器,获取网页源代码
            browser = webdriver.Chrome(chrome_options=chrome_options)
            loginURL = "http://www.adview.cn/web/overseas/login"
            dataURL = "http://www.adview.cn/user/bid/income"
            browser.get(loginURL)
            time.sleep(5)
            print("输入用户名密码---------------------------------")
            browser.find_element_by_xpath('//span[text()="Publishers"]').click()
            username,password = common_mysql.selectFromTb("Adview")
            browser.find_element_by_id("email").send_keys(username)
            browser.find_element_by_id("pwd").send_keys(password)
            ctypes.windll.user32.MessageBoxA(0, u"点击确定后,请在15秒内输入验证码,不要点击登录!!!".encode('gb2312'),
                                             u' 信息'.encode('gb2312'), 0)
            print("请输入验证码,等待 15秒")
            for i in range(1,15):
                print("倒计时:",15-i)
                time.sleep(1)
            print("点击登录按钮------")
            browser.find_element_by_xpath('//button[@class="form-control btn btn-blue blue submitBtn"]').click()
            time.sleep(5)
            print("跳转到数据页面---------------------------------")
            browser.get(dataURL)
            time.sleep(5)
            try:
                browser.find_element_by_xpath('//a[text()="English"]').click()
            except:
                flag = False
                print(traceback.format_exc())
                errorInfo = traceback.format_exc()
                comm_logging.myLogger.write_logger(errorInfo)
            time.sleep(2)
            print("点击下载")
            browser.find_element_by_xpath('//input[@value="Export detail CSV"]').click()
            time.sleep(2)

            # 扫描文件夹,获取 Adview 的文件列表
            print("正在扫描下载的 csv 文件---------")
            fileList = common.scan_File(path, prefix)
            common.mkdir(path)
            # 读取下载的文件,删除无用数据并 重新保存
            print("正在剔除其他日期的数据 并另存为 excel ---------")
            common.turnToXls_ByPandas(path + fileList[-1], path + excelName, 'Adview', match_Str)
            common.remove_File(path, prefix)
            print("  excel保存成功,路径:" + path + "-----------")
            print("||||||||||||Adview抓取完毕||||||||||||||")
            flag = True
        except:
            flag = False
            print(traceback.format_exc())
            errorInfo = traceback.format_exc()
            comm_logging.myLogger.write_logger(errorInfo)
            continue
        finally:
            browser.quit()
            if (flag or try_num == 5):
                break
Exemplo n.º 2
0
def test_Pubnative():
    print("............Pubnative................")
    flag = False
    for try_num in range(3):
        print("Pubnative第" + str(try_num + 1) + "次尝试-----------")
        try:
            yesterday = common.getNowTime(-1)
            yyyy = yesterday[0:4]
            mm = yesterday[5:7]
            dd = yesterday[8:10]
            # downloadFile = "C:/Users/Administrator/Downloads" + "/" + "Publisher App_" + dd + "." + mm + "." + yyyy + ".csv"
            # 下载的文件名
            downloadFile = "Publisher App_" + dd + "." + mm + "." + yyyy + "-" + dd + "." + mm + "." + yyyy + ".csv"
            # 用于删除 匹配的字符串
            prefix = "Publisher App_" + dd + "." + mm + "." + yyyy + "-" + dd + "." + mm + "." + yyyy
            # print(downloadFile)
            nowTime = common.getNowTime()
            # 路径只取nowTime的日期部分
            path = common.getDirPath(nowTime)
            # print(path)
            excelName = common.getExcelName(nowTime, "Pubnative")

            chrome_options = webdriver.ChromeOptions()
            # 使用headless无界面浏览器模式
            # chrome_options.add_argument('--headless')
            # chrome_options.add_argument('--disable-gpu')
            chrome_options.add_argument(
                'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'
            )
            # 修改默认下载地址
            prefs = {
                'profile.default_content_settings.popups': 0,
                'download.default_directory': path
            }
            chrome_options.add_experimental_option('prefs', prefs)

            # 启动浏览器,获取网页源代码
            browser = webdriver.Chrome(chrome_options=chrome_options)
            loginURL = "https://dashboard.pubnative.net"
            dataURL = "https://dashboard.pubnative.net/partner/#!/api"
            browser.get(loginURL)
            time.sleep(5)
            print("输入用户名密码---------------------------------")
            from public import common_mysql
            username, password = common_mysql.selectFromTb("Pubnative")
            browser.find_element_by_id("email").send_keys(username)
            browser.find_element_by_id("password").send_keys(password)
            # browser.find_element_by_xpath('//form[@action="/sessions"]/div[0]/div/input').send_keys("*****@*****.**")
            # browser.find_element_by_xpath('//form[@action="/sessions"]/div[1]/div/input').send_keys("360Security2017666")
            time.sleep(2)
            browser.find_element_by_xpath('//input[@value="LOGIN"]').click()
            print("跳转到数据页面---------------------------------")
            browser.get(dataURL)
            time.sleep(15)
            print("自动选择数据维度内容---------------------------------")
            browser.find_element_by_xpath('//div[@class="col-md-7"]').click()
            browser.find_element_by_xpath(
                '//div[@class="daterange"]/ul/li[2]').click()
            s = browser.find_element_by_tag_name("select")
            # Select(s).select_by_value("0")
            Select(s).select_by_index(12)
            browser.find_element_by_xpath(
                '//button[@class="btn btn-secondary ng-star-inserted"]').click(
                )
            time.sleep(2)
            browser.find_element_by_xpath(
                '//div[@class="reports-filters row"]/div[3]').click()
            print("准备下载...")
            time.sleep(2)

            dimensionsOfdata = [
                "Publisher App", "Impressions", "Requests", "Fill Rate",
                "eCPM", "Clicks", "CTR", "Conversions", "Payout"
            ]
            # workbook = xlwt.Workbook(encoding='utf-8')
            # worksheet = workbook.add_sheet('Pubnative')
            # print("开始爬数据")
            # soup = BeautifulSoup(browser.page_source, "lxml")
            # tbody = soup.find("tbody")
            # time.sleep(2)
            # trSum = tbody.findAll("tr")
            # time.sleep(2)
            # # 加表头
            # col = 0
            # for di in dimensionsOfdata:
            #     worksheet.write(0, col, dimensionsOfdata[col])
            #     col = col + 1
            #
            # row = 1
            # for tr in trSum[3:]:
            #     # col 放在for 外面,会引起错误ValueError: column index (256) not an int in range(256)
            #     col = 0
            #     # tdSum 为一个tr的所有列的list
            #     tdSum = tr.findAll("td")
            #     for td in tdSum:
            #         worksheet.write(row, col, td.text.strip())
            #         col = col + 1
            #     row = row + 1
            print("点击下载")
            browser.find_element_by_xpath(
                '//div[@class="col-md order-disabled optional-buttons"]/a[2]'
            ).click()
            time.sleep(2)

            common.mkdir(path)
            # workbook.save(path + excelName)
            print("读取 csv 另存为 excel")
            common.turnToXls_ByPandas(path + downloadFile, path + excelName,
                                      'Pubnative')
            print("删除多余的 csv")
            common.remove_File(path, prefix)
            print("  excel保存成功,路径:" + path + "-----------")
            print("||||||||||||Pubnative抓取完毕||||||||||||||")
            flag = True
        except:
            flag = False
            print(traceback.format_exc())
            errorInfo = traceback.format_exc()
            comm_logging.myLogger.write_logger(errorInfo)
            continue
        finally:
            browser.quit()
            if (flag or try_num == 3):
                break
Exemplo n.º 3
0
def test_NewCM():
    print("............NewCM................")
    flag = False
    for try_num in range(5):
        print("NewCM第" + str(try_num + 1) + "次尝试-----------")
        try:
            yesterday = common.getNowTime(-1)
            yyyy = yesterday[0:4]
            mm = yesterday[5:7]
            dd = yesterday[8:10]
            nowTime = common.getNowTime()
            yearNow = nowTime[0:4]
            monthNow = nowTime[5:7]
            dayNow = nowTime[8:10]

            # NewCM下载的文件名的前缀
            prefix = "Brand+Details_" + yearNow + monthNow + dayNow
            # 路径只取nowTime的日期部分
            path = common.getDirPath(nowTime)
            excelName = common.getExcelName(nowTime, "NewCM")

            chrome_options = webdriver.ChromeOptions()
            # 使用headless无界面浏览器模式
            # chrome_options.add_argument('--headless')
            # chrome_options.add_argument('--disable-gpu')
            chrome_options.add_argument(
                'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'
            )
            # 修改默认下载地址
            prefs = {
                'profile.default_content_settings.popups': 0,
                'download.default_directory': path
            }
            chrome_options.add_experimental_option('prefs', prefs)

            # 启动浏览器,获取网页源代码
            browser = webdriver.Chrome(chrome_options=chrome_options)
            loginURL = "https://peg.cmcm.com/login"
            dataURL = "https://peg.cmcm.com/frontReport/default"
            browser.get(loginURL)
            time.sleep(5)
            print("输入用户名密码---------------------------------")
            username, password = common_mysql.selectFromTb("NewCM")
            browser.find_element_by_name("username").send_keys(username)
            browser.find_element_by_name("password").send_keys(password)
            ctypes.windll.user32.MessageBoxA(
                0, u"点击确定后,请在15秒内输入验证码,不要点击登录!!!".encode('gb2312'),
                u' 信息'.encode('gb2312'), 0)
            print("请输入验证码,等待 15秒")
            for i in range(1, 15):
                print("倒计时:", 15 - i)
                time.sleep(1)
            print("点击登录按钮------")
            browser.find_element_by_xpath('//button[@type="submit"]').click()
            time.sleep(5)
            print("跳转到数据页面---------------------------------")
            browser.get(dataURL)
            time.sleep(5)
            # 选择 AD unit
            print("选择 AD unit")
            browser.find_element_by_xpath(
                '//button[text()="Dimension & Metric"]').click()
            browser.find_element_by_xpath('//span[text()="Ad Unit"]').click()
            time.sleep(2)
            browser.find_element_by_xpath('//button[text()="Submit"]').click()
            time.sleep(2)
            # 选择日期
            print("选择日期")
            browser.find_element_by_xpath(
                '//button[@class="btn btn-default ng-binding"]').click()
            browser.find_element_by_xpath('//span[text()="Yesterday"]').click()
            time.sleep(2)
            browser.find_element_by_xpath('//button[text()="Submit"]').click()
            time.sleep(2)
            # 点击search
            print("# 点击search")
            browser.find_element_by_xpath('//input[@value="Search"]').click()
            time.sleep(5)
            print("点击下载")
            browser.find_element_by_xpath(
                '//a[@class="btn btn-outline"]').click()
            time.sleep(5)

            # 扫描文件夹,获取 NewCM 的文件列表
            print("正在扫描下载的 csv 文件---------")
            fileList = common.scan_File(path, prefix)
            common.mkdir(path)
            # 读取下载的文件,删除无用数据并 重新保存
            print("正在剔除其他日期的数据 并另存为 excel ---------")
            common.turnToXls_ByPandas(path + fileList[-1], path + excelName,
                                      'NewCM')
            common.remove_File(path, prefix)
            print("  excel保存成功,路径:" + path + "-----------")
            print("||||||||||||NewCM抓取完毕||||||||||||||")
            flag = True
        except:
            flag = False
            print(traceback.format_exc())
            errorInfo = traceback.format_exc()
            comm_logging.myLogger.write_logger(errorInfo)
            continue
        finally:
            browser.quit()
            if (flag or try_num == 5):
                break
Exemplo n.º 4
0
def test_OpenX():
    print("............OpenX................")
    nowTime = common.getNowTime()
    yearNow = nowTime[0:4]
    monthNow = nowTime[5:7]
    dayNow = nowTime[8:10]
    # 路径只取nowTime的日期部分
    path = common.getDirPath(nowTime)
    # 用于删除 匹配的字符串
    prefix = "Workspace 1-" + monthNow + "-" + dayNow + "-" + yearNow
    excelName = common.getExcelName(nowTime, "openx")

    # 尝试5次,错误继续
    flag = False
    for try_num in range(2):
        print("OpenX第" + str(try_num + 1) + "次尝试-----------")
        try:
            print("打开浏览器")
            chrome_options = webdriver.ChromeOptions()
            # 使用headless无界面浏览器模式
            # chrome_options.add_argument('--headless')
            # chrome_options.add_argument('--disable-gpu')
            chrome_options.add_argument(
                'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'
            )
            prefs = {
                'profile.default_content_settings.popups': 0,
                'download.default_directory': path
            }
            chrome_options.add_experimental_option('prefs', prefs)
            # 启动浏览器,获取网页源代码
            browser = webdriver.Chrome(chrome_options=chrome_options)
            # 不最大化 某些按钮按不到会报错
            browser.maximize_window()
            time.sleep(2)
            logInURL = "https://sso.openx.com/login/login"
            dataURL = "http://mobimagic-ui.openx.net/app.html#/reports/pie"

            # 前天
            yesterday = common.getNowTime(-2)
            yyyy = yesterday[0:4]
            mm = yesterday[5:7]
            dd = yesterday[8:10]
            # print(yyyy,mm,dd)

            print("打开登录网址")
            browser.get(logInURL)
            time.sleep(5)

            # try 是正常不需要验证码登录,如果try里面失败了,相当于登录失败,需要验证码,然后就跑到了except中,按照需要验证码的方式进行登录。
            username, password = common_mysql.selectFromTb("OpenX")
            try:
                browser.find_element_by_id("email").send_keys(username)
                time.sleep(3)
                browser.find_element_by_id('password').send_keys(password)
                time.sleep(2)
                browser.find_element_by_id("submit").click()
                time.sleep(5)
                browser.find_element_by_xpath(
                    '//a[text()="http:// mobimagic-ui.openx.net/"]').click()
            except:
                print(traceback.format_exc())
                errorInfo = traceback.format_exc()
                comm_logging.myLogger.write_logger(errorInfo)
                #  不能再重新载入界面,重新载入后不会出现验证码,然后继续失败
                # browser.get(logInURL)
                time.sleep(2)
                browser.find_element_by_id("email").clear()
                browser.find_element_by_id("email").send_keys(username)
                time.sleep(3)
                browser.find_element_by_id('password').clear()
                browser.find_element_by_id('password').send_keys(password)
                ctypes.windll.user32.MessageBoxA(
                    0, u"点击确定后,请在15秒内输入验证码,不要点击登录!!!".encode('gb2312'),
                    u' 信息'.encode('gb2312'), 0)
                print("请输入验证码,等待 15秒")
                for i in range(1, 15):
                    print("倒计时:", 15 - i)
                    time.sleep(1)
                browser.find_element_by_id("submit").click()

            print("模拟登录成功")
            print("打开数据网址")
            browser.get(dataURL)
            time.sleep(10)
            browser.find_element_by_xpath(
                '//div[@class="date-range-filter__input"]').click()
            browser.find_element_by_xpath(
                '//li[@class="date-range-filter__filter date-range-filter__filter--custom date-range-filter__filter__label"]'
            ).click()
            time.sleep(5)
            # startElements = browser.find_element_by_xpath('//input[@placeholder="Enter Date"]').send_keys(mm + "/" + dd + "/" + yyyy)
            # 各种方法试了一通,根本不行,下面可以list[0] 是开始日期,list[1]是结束日期
            print("开始选择日期和维度")
            inputElements = browser.find_elements_by_css_selector(
                '[placeholder="Enter Date"]')
            inputElements[0].clear()
            inputElements[0].send_keys(mm + "/" + dd + "/" + yyyy)
            inputElements[1].clear()
            inputElements[1].send_keys(mm + "/" + dd + "/" + yyyy)
            browser.find_element_by_xpath(
                '//div[@class="date-range-filter__custom-date-range-menu__buttons"]/div/button[2]'
            ).click()
            time.sleep(5)

            # 隐藏日期栏、折线图 不隐藏点不到下载按钮
            browser.find_element_by_xpath(
                '//div[@class="reports-pie-data-collection-container__collapse-trigger"]'
            ).click()
            time.sleep(1)
            browser.find_element_by_xpath(
                '//div[@class="reports-pie-collapse-toggle"]').click()
            time.sleep(1)
            # 点击下载 选择xlsx
            browser.find_element_by_xpath(
                '//div[@class="reports-pie-chart-toolkit__export"]/ox-dropdown'
            ).click()
            time.sleep(1)
            # browser.find_element_by_xpath('//button[@class="ox-btn ox-btn--tertiary"]/div').click()
            browser.find_element_by_xpath('//span[text()="Excel "]').click()
            time.sleep(10)

            common.mkdir(path)
            # 扫描文件夹,获取 openx 的文件列表
            print("正在扫描下载的 xlsx 文件---------")
            fileList = common.scan_File(path, prefix)
            common.turnToXls_ByPandas(path + fileList[-1], path + excelName,
                                      'openx')
            common.remove_File(path, prefix)
            print("  excel保存成功,路径:" + path + "-----------")
            print("||||||||||||OpenX抓取完毕||||||||||||||")
            flag = True
        except Exception as e:
            flag = False
            print(traceback.format_exc())
            errorInfo = traceback.format_exc()
            comm_logging.myLogger.write_logger(errorInfo)
            continue
        finally:
            browser.quit()
            if (flag or try_num == 4):
                break
Exemplo n.º 5
0
def test_Solo():
    print("............Solo................")
    flag = False
    for try_num in range(3):
        print("Solo第" + str(try_num + 1) + "次尝试-----------")
        try:
            dayBeforeYester = common.getNowTime(-2)
            yyyy = dayBeforeYester[0:4]
            mm = dayBeforeYester[5:7]
            dd = dayBeforeYester[8:10]
            nowTime = common.getNowTime()
            yearNow = nowTime[0:4]
            monthNow = nowTime[5:7]
            dayNow = nowTime[8:10]

            # Solo下载的文件名的前缀
            prefix = "ReportTable"
            # 路径只取nowTime的日期部分
            path = common.getDirPath(nowTime)
            excelName = common.getExcelName(nowTime, "Solo")

            chrome_options = webdriver.ChromeOptions()
            # 使用headless无界面浏览器模式
            # chrome_options.add_argument('--headless')
            # chrome_options.add_argument('--disable-gpu')
            chrome_options.add_argument(
                'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'
            )
            # 修改默认下载地址
            prefs = {
                'profile.default_content_settings.popups': 0,
                'download.default_directory': path
            }
            chrome_options.add_experimental_option('prefs', prefs)

            # 启动浏览器,获取网页源代码
            browser = webdriver.Chrome(chrome_options=chrome_options)
            loginURL = "https://portal.newborntown.com/logout"
            dataURL = "https://portal.newborntown.com/report"
            browser.get(loginURL)
            time.sleep(5)
            print("输入用户名密码---------------------------------")
            username, password = common_mysql.selectFromTb("Solo")
            browser.find_element_by_name("email").send_keys(username)
            browser.find_element_by_name("password").send_keys(password)
            ctypes.windll.user32.MessageBoxA(
                0, u"点击确定后,请在15秒内输入验证码,不要点击登录!!!".encode('gb2312'),
                u' 信息'.encode('gb2312'), 0)
            print("请输入验证码,等待 15秒")
            for i in range(1, 15):
                print("倒计时:", 15 - i)
                time.sleep(1)
            print("点击登录按钮------")
            browser.find_element_by_xpath('//button[@id="btn_login"]').click()
            time.sleep(5)
            print("跳转到数据页面---------------------------------")
            browser.get(dataURL)
            time.sleep(5)
            # 选择stats页
            print("选择stats页")
            browser.find_element_by_xpath('//h4[text()="Stats"]').click()
            time.sleep(5)

            # 开始选择日期
            print("开始选择日期")
            browser.find_element_by_id("reportrange").click()
            time.sleep(2)
            browser.find_element_by_xpath(
                '//li[text()="Custom Range"]').click()
            time.sleep(2)
            browser.find_element_by_name("daterangepicker_start").clear()
            browser.find_element_by_name("daterangepicker_start").send_keys(
                mm + "/" + dd + "/" + yyyy)
            time.sleep(2)
            browser.find_element_by_name("daterangepicker_end").clear()
            browser.find_element_by_name("daterangepicker_end").send_keys(mm +
                                                                          "/" +
                                                                          dd +
                                                                          "/" +
                                                                          yyyy)
            time.sleep(2)
            browser.find_element_by_xpath('//button[text()="Apply"]').click()
            time.sleep(5)

            # 点击选择 slot 页
            print("点击选择 slot 页")
            browser.find_element_by_id("slot").click()
            # browser.maximize_window()
            time.sleep(5)

            print("点击下载")
            browser.find_element_by_id("export").click()
            time.sleep(2)

            # 扫描文件夹,获取 Solo 的文件列表
            print("正在扫描下载的 csv 文件---------")
            fileList = common.scan_File(path, prefix)
            common.mkdir(path)
            # 读取下载的文件,删除无用数据并 重新保存
            print("正在剔除其他日期的数据 并另存为 excel ---------")
            common.turnToXls_ByPandas(path + fileList[0], path + excelName,
                                      'Solo')
            common.remove_File(path, prefix)
            print("  excel保存成功,路径:" + path + "-----------")
            print("||||||||||||Solo抓取完毕||||||||||||||")
            flag = True
        except:
            flag = False
            print(traceback.format_exc())
            errorInfo = traceback.format_exc()
            comm_logging.myLogger.write_logger(errorInfo)
            continue
        finally:
            browser.quit()
            if (flag or try_num == 3):
                break