Пример #1
0
def edge_driver(request: "SubRequest") -> Union[Remote, Edge]:
    """Fixture for receiving selenium controlled Edge instance"""
    if request.cls.test_type == "edge-local":
        driver = Edge()
    else:
        executor = RemoteConnection(SAUCE_HUB_URL, resolve_ip=False)
        driver = Remote(desired_capabilities=SAUCE_EDGE,
                        command_executor=executor)
    set_selenium_driver_timeouts(driver)
    request.cls.driver = driver
    yield driver
    driver.close()
Пример #2
0
def livingly():
    url = 'https://www.livingly.com/runway/Milan+Fashion+Week+Fall+2019/Aigner/Details/browse'
    driver = Edge(executable_path=PATH)
    action_chains = ActionChains(driver)
    driver.get(url)
    WebDriverWait(driver, 5000).until(expected_conditions\
            .visibility_of_element_located((By.CLASS_NAME, 'thumbnail-strip')))
    content = driver.find_element_by_xpath('//ul[@class="thumbnail-strip"]')
    links = content.find_elements_by_tag_name('a')

    # Store the links beforehand because Selenium does
    # not update the driver with the new content
    paths = (link.get_attribute('href') for link in links)

    for path in paths:
        # link.click()
        driver.get(path)
        WebDriverWait(driver, 3000).until(expected_conditions\
                .visibility_of_element_located((By.CLASS_NAME, 'region-image')))
        try:
            slideshow = driver.find_element_by_xpath('//div[@class="slideshow-img-link"]')
        except Exception:
            driver.execute_script('window.history.go(-1);')
        else:
            if slideshow.is_displayed():
                big_image_url = slideshow.find_element_by_tag_name('img').get_attribute('data-zoom-url')

                if big_image_url:
                    # driver.get(big_image_url)
                    driver.execute_script(f'window.open("{big_image_url}", "_blank");')

                    # This part will right click on the image,
                    # download it locally
                    image = driver.find_elements_by_tag_name('img')
                    action_chains.context_click(image).perform()

                    # This section gets all the tabs in the
                    # browser, closes the newly opened image
                    # tab and returns the previous one
                    driver.switch_to_window(driver.window_handles[1])
                    driver.close()
                    driver.switch_to_window(driver.window_handles[0])
                    # Wait a couple of seconds before returning
                    # going back in history
                    driver.execute_script('window.history.go(-1);')
                else:
                    driver.execute_script('window.history.go(-1);')
    driver.close()
Пример #3
0
def spider_hero():
    url = "https://pvp.qq.com/web201605/herolist.shtml"
    browser = Edge(
        executable_path=
        'C:\Program Files (x86)\Microsoft\Edge\Application\msedgedriver.exe')
    browser.get(url)
    ls = browser.find_elements_by_css_selector(
        "body > div.wrapper > div > div > div.herolist-box > div.herolist-content > ul > li"
    )
    hero_name = []
    for i in ls:
        hero_name.append(i.text)

    browser.close()

    with open("hero_name.txt", 'w', encoding="utf-8") as f:
        for i in hero_name:
            f.write(i)
            f.write('\n')

    print("写入完毕")
Пример #4
0
def spider_equipment():
    url = "https://pvp.qq.com/web201605/item.shtml"
    browser = Edge(
        executable_path=
        'C:\Program Files (x86)\Microsoft\Edge\Application\msedgedriver.exe')
    browser.get(url)
    ls = browser.find_elements_by_css_selector("#Jlist-details > li")
    equip_name = []
    for i in ls:
        equip_name.append(i.text)

    browser.close()

    with open("equipment_name.txt", 'w', encoding="utf-8") as f:
        for i in equip_name:
            f.write(i)
            f.write('\n')

    print("写入完毕")


# spider_hero()
# spider_equipment()
Пример #5
0
def getcomponies():
    """
    Get Companies from web and write to excel file
    :return:
    """
    _bases.kill_web_driver_edge()
    driver = Edge()
    componies = []
    driver.get('https://www.dosab.org.tr/Alfabetik-Firmalar-Listesi')

    # Get links
    # links = []
    # datalinks = driver.find_elements(By.XPATH, '/html/body/div[2]/div/ul/li/div/a')
    # for link in datalinks:
    #     linkobj = {
    #         'link': link.get_attribute('href'),
    #         'name': link.text
    #     }
    #     links.append(linkobj)

    # Downlaod Mail Images
    # for complink in componies:
    #     parsedlink = str(complink['link']).split('/')
    #     mailimg = f'https://www.dosab.org.tr/dosyalar/emailler/{parsedlink[4]}_EMail.jpg'
    #     wget.download(mailimg, "imgs")

    # OCR Image to text
    pytesseract.pytesseract.tesseract_cmd = r'C:\Users\abdul\AppData\Local\Tesseract-OCR\tesseract.exe'
    imgfiles = os.listdir('imgs')
    imgfiles.sort()

    for imgfile in imgfiles:
        compid = imgfile.split('_EMail.jpg')[0]
        driver.get(f'https://www.dosab.org.tr/Firma/{compid}')
        compname = driver.find_element(By.XPATH,
                                       '/html/body/div[2]/div/div[2]/h4').text
        img = cv2.imread(f'imgs/{imgfile}')
        emailtext = str(pytesseract.image_to_string(img, lang='eng')).replace(
            '\n\f', '')

        if '@' not in emailtext:
            emailtext = ''

        company = {'mail': emailtext, 'name': compname}
        componies.append(company)

    workbook = Workbook(excel_file_name)
    worksheet = workbook.add_worksheet('dosab')
    row = 0
    hformat = workbook.add_format()
    hformat.set_bold()
    worksheet.write(row, 0, "Firma Adi", hformat)
    worksheet.write(row, 1, 'Mailler', hformat)
    row += 1

    for comp in componies:
        worksheet.write(row, 0, comp["name"])

        if '@' in comp['mail']:
            worksheet.write(row, 1, comp['mail'])
        row += 1

    workbook.close()

    driver.close()
Пример #6
0
    #"87073644",
    #"86963282",
    # "87052262",
    # "87033492",
    #  "87073632",
    #  "87033441",
    #  "86965699",
    # "86965230")
    # s.add_artical("85566269","86847612")
    #s.add_author("35692440",slice(4))
    # s.add_author("16778114",slice(0,None))

    s.add_author("6657532", slice(12))
    cookie = driver.get_cookies()

    driver.close()

    s.start()

    with open("cookies.json", "w") as target:
        json.dump(cookie, target, indent=4)

{
    "GET": {
        "scheme": "https",
        "host": "117-27-114-202.mcdn.bilivideo.cn:480",
        "filename": "/upgcxcode/79/81/262668179/262668179-1-30080.m4s",
        "query": {
            "expires": "1607517453",
            "platform": "pc",
            "ssig": "stFfmYCY-VzQJJhIhQJUaw",
Пример #7
0
def getcomponies():
    """
    Get Companies from web and write to excel file
    :return:
    """
    _bases.kill_web_driver_edge()
    driver = Edge()
    componies = []

    driver.get('https://www.nosab.org.tr/firmalar/tr')
    alphabetslinks = []

    for links in driver.find_elements(By.XPATH, '//*[@id="accordion-2"]/li/a'):
        link = {
            'Sector': links.text,
            'Name': links.get_attribute('href')
        }
        alphabetslinks.append(link)

    for anchor in alphabetslinks:
        driver.get(anchor['Name'])
        companies_sector = {
            'Sector': anchor['Sector'],
            'comps': []
        }

        componies_count = len(driver.find_elements(By.XPATH, '/html/body/div[7]/div/div[2]/div[3]/ul/li/a'))

        for indx in range(1, componies_count + 1):
            comp = driver.find_element(By.XPATH, f'/html/body/div[7]/div/div[2]/div[3]/ul/li[{indx}]/a')
            comp.click()
            companies_sector['Sector'] = anchor['Sector']
            company = {
                'Name': driver.find_element(By.XPATH, '/html/body/div[7]/div/div[2]/div[1]/div').text,
                'Data': str(driver.find_element(By.XPATH, '/html/body/div[7]/div/div[2]/div[4]').text)
            }

            companies_sector['comps'].append(company)
            driver.back()

        componies.append(companies_sector)

    row = 0
    workbook = Workbook(excel_file_name)
    worksheet = workbook.add_worksheet('nosab')

    hformat = workbook.add_format()
    hformat.set_bold()
    hformat.set_align('center')
    hformat.set_align('vcenter')

    worksheet.write(row, 0, 'Firma Adi', hformat)
    worksheet.set_column('A:A', 100)

    worksheet.write(row, 1, 'Bilgileri', hformat)
    worksheet.set_column('B:B', 120)

    row += 1

    fwarp = workbook.add_format()
    fwarp.set_text_wrap()

    fname_centralize = workbook.add_format()
    fname_centralize.set_align('center')

    for company in componies:

        if 'Sector' in company:
            worksheet.write(row, 0, company['Sector'], hformat)
            row += 1

        if 'comps' in company:
            for comp in company['comps']:

                if 'Name' in comp:
                    worksheet.write(row, 0, comp['Name'], fname_centralize)

                if 'Data' in comp:
                    worksheet.write(row, 1, comp['Data'], fwarp)

                row += 1

    if os.path.exists(excel_file_name):
        os.remove(excel_file_name)

    time.sleep(_bases.timeout)
    workbook.close()
    driver.close()