Пример #1
0
class EdgeTest(unittest.TestCase):
    def setUp(self):
        WebDriverFactory().setup_edgedriver()
        from msedge.selenium_tools import Edge, EdgeOptions
        options = EdgeOptions()
        options.use_chromium = True
        options.set_capability('platform', 'MAC' if OS_NAME == 'MAC' else 'WINDOWS')
        self.driver1 = Edge(options=options)
        self.driver2 = Edge(options=options)
        self.driver1.maximize_window()
        self.wait1 = WebDriverWait(self.driver1, 5)
        self.wait2 = WebDriverWait(self.driver2, 5)

    def tearDown(self):
        self.driver2.quit()
        self.driver1.quit()

    def test_something(self):
        self.driver1.get('https://www.google.com')
        logger.info(self.driver1.title)
        self.assertIn(self.driver1.title, 'Google')
        self.driver1.get('https://www.naver.com')
        news_btn1 = self.driver1.find_element(By.CSS_SELECTOR, '.link_news')
        news_btn1.click()
        logger.info(self.driver1.current_url)
        self.assertTrue(self.wait1.until(expected_conditions.url_contains('https://news.naver.com')))

        self.driver2.get('https://www.google.com')
        logger.info(self.driver2.title)
        self.assertIn(self.driver2.title, 'Google')
        self.driver2.get('https://www.naver.com')
        news_btn2 = self.driver2.find_element(By.CSS_SELECTOR, '.link_join')
        news_btn2.click()
        logger.info(self.driver2.current_url)
        self.assertTrue(self.wait2.until(expected_conditions.url_contains('https://nid.naver.com')))
Пример #2
0
def main(position, location):
    """Run the main program routine"""
    scraped_jobs = []
    scraped_urls = set()
    
    url = get_url(position, location)
    
    # setup web driver
    options = EdgeOptions()
    options.use_chromium = True
    driver = Edge(options=options)
    driver.implicitly_wait(5)
    driver.get(url)        
    
    # extract the job data
    while True:
        cards = driver.find_elements_by_class_name('jobsearch-SerpJobCard')
        get_page_records(cards, scraped_jobs, scraped_urls)
        try:
            driver.find_element_by_xpath('//a[@aria-label="Next"]').click()
        except NoSuchElementException:
            break
        except ElementNotInteractableException:
            driver.find_element_by_id('popover-x').click()  # to handle job notification popup
            get_page_records(cards, scraped_jobs, scraped_urls)
            continue

    # shutdown driver and save file
    driver.quit()
    save_data_to_file(scraped_jobs)
Пример #3
0
def Restart_Modem():
    options = EdgeOptions()
    options.use_chromium = True
    options.add_argument("-inprivate")
    edge_browser = Edge(options=options)
    edge_browser.get('http://192.168.0.1')
    time.sleep(2)
    print('Welcome')
    try:
        login = edge_browser.find_element_by_name('username')
        password = edge_browser.find_element_by_name('password')
        sign = edge_browser.find_element_by_class_name('styled_button_s')
        login.clear()
        password.clear()
        login.send_keys('admin')
        password.send_keys('admin')
        time.sleep(2)
        sign.click()
        print('Sign in')
        alert = Alert(edge_browser)
        time.sleep(2)
        edge_browser.get('http://192.168.0.1/saveconf.htm')
        time.sleep(2)
        system = edge_browser.find_element_by_id('three_level_menu1')
        system.click()
        time.sleep(2)
        reboot = edge_browser.find_element_by_name('reboot')
        reboot.click()
        alert.accept()
        time.sleep(70)
        print('Reboot')
        edge_browser.quit()
    except:
        print('Problem with restarting modem')
        edge_browser.quit()
 def test_legacy_driver_with_legacy_options(self):
     options = EdgeOptions()
     try:
         driver = Edge('MicrosoftWebDriver.exe', options=options)
     except Exception as e:
         self.assertTrue(False,
                         'Test legacy driver with legacy options failed.')
     else:
         driver.quit()
class ElementAccesser:
    def __init__(self, url: str):
        edge_options = EdgeOptions()
        edge_options.use_chromium = True
        edge_options.add_argument('headless')
        edge_options.add_argument('disable-gpu')
        self.driver = Edge(executable_path='msedgedriver.exe',
                           options=edge_options)
        self.driver.get(url)
        self.wait = WebDriverWait(self.driver, 10)

    def wait4Element(self, xpath: str):
        try:
            self.wait.until(EC.presence_of_element_located((By.XPATH, xpath)))
        except:
            return False

    def wait4Element4Ever(self, xpath: str):
        """Wait for an element forever"""
        while True:
            try:
                self.wait.until(
                    EC.presence_of_element_located((By.XPATH, xpath)))
                return
            except:
                continue

    def findElement(self, xpath: str):
        """Find an element, if it doesn't exist, return False"""
        element = None
        try:
            element = self.driver.find_element_by_xpath(xpath)
            return element
        except:
            return False

    def inputInElement(self, element: object, content: str):
        element.send_keys(content)

    def clickInElement(self, element: object):
        if element == False:
            raise '\nelement was not found and you tried to click it'
        else:
            element.click()

    def checkBackgroundElement(self, element: object, color: str):
        """Compares the background of an element.
        
        color must have the following format: rgba(r, g, b, a)"""
        if str(element.value_of_css_property('background-color')) == color:
            return True
        else:
            return False

    def quitBrowser(self, quit_message: str):
        self.driver.quit()
        return quit_message
 def test_chromium_driver_with_chromium_options(self):
     options = EdgeOptions()
     options.use_chromium = True
     try:
         driver = Edge('msedgedriver.exe', options=options)
     except:
         self.assertTrue(
             False, 'Test chromium driver with chromium options failed.')
     else:
         driver.quit()
 def test_default(self):
     try:
         driver = Edge()
         cap = driver.capabilities
         self.assertEqual('MicrosoftEdge', cap['browserName'],
                          'Driver launches Edge Legacy.')
     except:
         self.assertTrue(False, 'Test default options failed.')
     else:
         driver.quit()
 def test_legacy_options(self):
     try:
         options = EdgeOptions()
         options.use_chromium = False
         driver = Edge(options=options)
         cap = driver.capabilities
         self.assertEqual('MicrosoftEdge', cap['browserName'],
                          'Driver launches Edge Legacy.')
     except:
         self.assertTrue(False, 'Test legacy options failed.')
     else:
         driver.quit()
Пример #9
0
def extract_element():
    options = EdgeOptions()
    options.headless = True
    options.use_chromium = True

    driver = Edge(executable_path=configs.msedge_driver_executable_path,
                  options=options)
    driver.get(configs.home_page_url)
    time.sleep(4)

    element = driver.find_element_by_xpath(configs.html_image_element)
    html = element.get_attribute('outerHTML')
    driver.quit()
    return html
Пример #10
0
    def test_chromium_options(self):
        try:
            options = EdgeOptions()
            options.use_chromium = True
            driver = Edge(options=options)
            cap = driver.capabilities
            self.assertEqual('msedge', cap['browserName'],
                             'Driver launches Edge Chromium.')

            result = driver.execute_cdp_cmd('Browser.getVersion', {})
            self.assertTrue('userAgent' in result,
                            'Driver can send Chromium-specific commands.')
        except:
            self.assertTrue(False, 'Test chromium options failed.')
        else:
            driver.quit()
Пример #11
0
class News163Spider(scrapy.Spider):
    name = 'news163'
    # allowed_domains = ['news.163.com']
    start_urls = ['http://news.163.com/']
    models_urls = []

    
    def __init__(self):
        options = EdgeOptions()
        options.use_chromium = True
        # options.add_argument("headless")
        # options.add_argument("disable-gpu")
        #防止打印无用信息   enable-automation规避检测
        options.add_experimental_option("excludeSwitches", ['enable-automation', 'enable-logging'])
        self.bro = Edge(options = options)


    def parse(self, response):
        li_list = response.xpath('//*[@id="index2016_wrap"]/div[1]/div[2]/div[2]/div[2]/div[2]/div/ul/li')
        alist = [3,4,6,7]
        for index in alist:
            model_url = li_list[index].xpath('./a/@href').extract_first()
            self.models_urls.append(model_url)
        for url in self.models_urls:
            yield scrapy.Request(url,callback=self.parse_model)

    def parse_model(self,response):
        div_list = response.xpath('/html/body/div/div[3]/div[4]/div[1]/div[1]/div/ul/li/div/div')
        for div in div_list:        
            title = div.xpath('./div/div[1]/h3/a/text()').extract_first()
            news_detail_url = div.xpath('./div/div[1]/h3/a/@href').extract_first()
            item = News163SpiderItem()
            item['title'] = title
            #新闻详情页请求
            yield scrapy.Request(url = news_detail_url,callback=self.parse_detail,meta={'item':item})

    def parse_detail(self,response):
        content = response.xpath('//*[@id="content"]/div[2]//text()').extract()
        content = ''.join(content)
        item = response.meta['item']
        print(item)
        item['content'] = content
        yield item


    def closed(self,spider):
        self.bro.quit()
Пример #12
0
def run():
    email = read_decrypt(efile)
    password = read_decrypt(pfile)
    cemail = str(email)
    cpassword = str(password)
    print(cemail)
    print(cpassword)
    with open("browserdetails.txt", "r") as f:
        data = [line.rstrip('\n') for line in f]
    browser = data[0].lower()
    gpu = data[1].lower()

    if browser == 'edge':
        try:
            requests.get("http://www.google.com")
            print('Connection Found')
            options = EdgeOptions()
            options.use_chromium = True
            options.add_argument("--start-maximized")
            if gpu == 'no':
                options.add_argument("window-size=1920,1080")
                options.add_argument("--headless")
                options.add_argument("disable-gpu")
                options.add_argument("-inprivate")
            driver = Edge(executable_path='msedgedriver.exe', options=options)
            driver.get('https://gokano.com/')
            try:
                email = WebDriverWait(driver, 3).until(
                    EC.presence_of_element_located((By.NAME, 'email')))
                print("Page is ready!")
                email.send_keys(cemail)
                password = driver.find_element_by_name('password')
                password.send_keys(cpassword)
                time.sleep(3)
                button = driver.find_element_by_class_name('submit')
                button.click()
                print('Login sucessful')
            except TimeoutException:
                print("Error logining in")  #[email protected]
                time.sleep(3)
                driver.quit()
            time.sleep(3)
            try:
                cdp = driver.find_element_by_link_text('Collect daily points')
                cdp.click()
                write_time()
                time.sleep(3)
                driver.quit()
            except NoSuchElementException:
                print('Already collected')
                time.sleep(3)
                driver.quit()
            print('Automation completed')
            time.sleep(3)
            driver.quit()
        except requests.ConnectionError:
            print('Could not connect')
class HeaderText(unittest.TestCase):
    def setUp(self):
        options = EdgeOptions()
        options.use_chromium = True
        options.binary_location = "C:\\Program Files (x86)\\Microsoft\\Edge Dev\\Application\\msedge.exe"
        dir = os.path.dirname(os.path.realpath(__file__))
        edge_driver_path = dir + "\\edgedriver_win64\\msedgedriver.exe"
        self.driver = Edge(options=options, executable_path=edge_driver_path)
        self.driver.implicitly_wait(30)
        self.driver.maximize_window()
        self.driver.get("http://localhost:4200")

    def test_HeaderText(self):
        headerText = self.driver.find_element_by_css_selector("h1").get_attribute("innerText")
        self.assertEqual("todos", headerText)

    def tearDown(self):
        self.driver.quit()
class AddAToDoText(unittest.TestCase):
    def setUp(self):
        options = EdgeOptions()
        options.use_chromium = True
        options.binary_location = "C:\\Program Files (x86)\\Microsoft\\Edge Dev\\Application\\msedge.exe"
        dir = os.path.dirname(os.path.realpath(__file__))
        edge_driver_path = dir + "\\edgedriver_win64\\msedgedriver.exe"
        self.driver = Edge(options=options, executable_path=edge_driver_path)
        self.driver.implicitly_wait(30)
        self.driver.maximize_window()
        self.driver.get("http://*****:*****@class='toggle']/following-sibling::label").get_attribute("innerText")
        self.assertEqual("The test is adding this todo", addedToDoText)

    def tearDown(self):
        self.driver.quit()
from time import sleep
from msedge.selenium_tools import Edge, EdgeOptions

#edge无头浏览器   phantomJs可用,已停止更新
options = EdgeOptions()
options.use_chromium = True
options.add_argument("headless")
options.add_argument("disable-gpu")
#防止打印无用信息   enable-automation规避检测 #最新版浏览器已无用
options.add_experimental_option("excludeSwitches",
                                ['enable-automation', 'enable-logging'])

#谷歌无头  #谷歌88.0版本可用
# from selenium.webdriver import Chrome
# from selenium.webdriver import ChromeOptions
# options = ChromeOptions()
# chrome_options.add_argument('--headless')
# chrome_options.add_argument('--disable-gpu')
# options.add_experimental_option("excludeSwitches", ["enable-automation",'enable-logging'])
# options.add_argument("--disable-blink-features=AutomationControlled")
# options.add_experimental_option('useAutomationExtension', False)
# wd = Chrome(options=options)

wd = Edge(options=options)
wd.get('https://www.baidu.com')

print(wd.page_source)
sleep(2)
wd.quit()
Пример #16
0
class Chat:
    def __init__(self):
        self.login = None
        self.password = None
        self.message = None
        self.thread = None
        self.picture = None
        self._driver = None
        self._session = None
        self._executor_url = None

        self._base_url = 'https://www.messenger.com/'
        self._initiate()

    def _initiate(self):
        try:
            with open('SessionExecutor.txt') as f:
                data = f.readlines()
            data = [a.strip() for a in data]
            self._session = data[0]
            self._executor_url = data[1]
            self._driver = webdriver.Remote(command_executor=self._executor_url, desired_capabilities={}, options=options)
            self._driver.session_id = self._session
            print('same browser')
        except (FileNotFoundError, IndexError, MaxRetryError):
            self._driver = Edge("./msedgedriver.exe", options=options)
            with open('SessionExecutor.txt', 'w+', encoding='utf-8') as f:
                f.write(self._driver.session_id)
                f.write('\n')
                f.write(self._driver.command_executor._url)
            print("new browser")

    def _log_in(self):
        self._driver.get(self._base_url)
        self._driver.find_element_by_xpath('/html/body/div[2]/div[2]/div/div/div/div/div[3]/button[2]').click()
        self._driver.find_element_by_id("email").send_keys(self.login)
        self._driver.find_element_by_id("pass").send_keys(self.password, Keys.RETURN)

    def send_message(self, thread, message, login, password):
        try:
            self.message = message
            self.login = login
            self.password = password
            self.thread = thread
        except ValueError:
            return 'Lacking key parameters'
        if "/t" not in self._driver.current_url:
            self._log_in()
        url = self._base_url + 't/' + self.thread
        if self._driver.current_url != url:
            self._driver.get(url)
        WebDriverWait(self._driver, 10).until(ec.presence_of_element_located((By.CLASS_NAME, "_5rp7")))
        self._driver.find_element_by_xpath('/html/body/div[1]/div/div[1]/div/div[2]/div/div/div[1]/div[1]/ \
        div[2]/div/div/div/div/div/div[1]/div[2]/div/div/div/div[2]/div/form/div/div[3]/div[2]/div[1]/div/ \
        div/div/div/div[2]/div/div/div/div').send_keys(self.message, Keys.RETURN)

    def send_picture(self, thread, picture, login, password):
        try:
            self.picture = picture
            self.login = login
            self.password = password
            self.thread = thread
        except ValueError:
            return 'Lacking key parameters'
        if "/t" not in self._driver.current_url:
            self._log_in()
        url = self._base_url + 't/' + self.thread
        if self._driver.current_url != url:
            self._driver.get(url)
        WebDriverWait(self._driver, 10).until(ec.presence_of_element_located((By.XPATH, "/html/body/div[1] \
        /div/div[1]/div/div[2]/div/div/div[1]/div[1]/div[2]/div/div/div/div/div/div/div[2]/div/div/div/ \
        div[2]/div/form/div/div[3]/div[1]/input")))
        upload_picture = self._driver.find_element_by_xpath('/html/body/div[1]/div/div[1]/div/div[2]/ \
        div/div/div[1]/div[1]/div[2]/div/div/div/div/div/div/div[2]/div/div/div/div[2]/div/form/div/ \
        div[3]/div[1]/input')
        upload_picture.send_keys(self.picture)
        time.sleep(2)
        self._driver.find_element_by_xpath('/html/body/div[1]/div/div[1]/div/div[2]/div/div/div[1]/ \
        div[1]/div[2]/div/div/div/div/div/div[1]/div[2]/div/div/div/div[2]/div/form/div/div[3]/div[2] \
        /div[1]/div/div/div/div/div[2]/div/div/div/div').send_keys(Keys.RETURN)

    def exit(self):
        self._driver.quit()
        os.system('cmd /c "taskkill /IM msedgedriver.exe /F /T"')
    try:
        element = driver.find_element_by_id("Home")
        element.click()
        time.sleep(3)

        element = driver.find_element_by_id("Orders")
        element.click()
        time.sleep(3)

        element = driver.find_element_by_id("Portfolio")
        element.click()
        time.sleep(3)

        element = driver.find_element_by_id("Securities")
        element.click()
        time.sleep(3)
        
        element = driver.find_element_by_id("Analysis")
        element.click()
        time.sleep(3)

        element = driver.find_element_by_id("Logout")
        element.click()
        time.sleep(3)
        driver.close()
        driver.quit()
    except:
        driver.close()
        driver.quit()
        exit(5)
Пример #18
0
            ping = browser.find_element_by_css_selector(
                '#verlauf-detail > tbody > tr:nth-child(3) > td:nth-child(3)'
            ).text
            #timestamp
            timestamp = browser.find_element_by_css_selector(
                '#testresult-detail > tbody > tr:nth-child(1) > td:nth-child(2) > span'
            ).text
            print("DL: " + downspeed + " ; UL: " + upspeed + " ; ping: " +
                  ping)
            fields = [timestamp, downspeed, upspeed, ping, browser.current_url]
            with open(os.path.join(os.path.dirname(__file__), 'results.csv'),
                      'a',
                      newline="") as f:
                writer = csv.writer(f)
                writer.writerow(fields)
            # save cookies
            pickle.dump(
                browser.get_cookies(),
                open(os.path.join(os.path.dirname(__file__), 'cookies.pkl'),
                     "wb"))
        except TimeoutException:
            print("Loading took too much time!")
            print(browser.current_url)
    except TimeoutException:
        print("Didn't find accept button")
        print(browser.current_url)

finally:
    browser.close()
    browser.quit()
def main():
    searchtext = input()
    num_requested = int(input())
    number_of_scrolls = num_requested / 400 + 1
    # number_of_scrolls * 400 images will be opened in the browser

    if not os.path.exists(download_path + searchtext.replace(" ", "_")):
        os.makedirs(download_path + searchtext.replace(" ", "_"))

    url = "https://www.google.co.in/search?q="+searchtext+"&source=lnms&tbm=isch"
    chrome_driver_path = "msedgedriver.exe"
    browser_path = "C:\\Program Files (x86)\\Microsoft\\Edge Beta\\Application\\msedge.exe"
    option = EdgeOptions()
    option.binary_location = browser_path
    driver = Edge(executable_path = chrome_driver_path, options = option)
    driver.get(url)

    headers = {}
    headers['User-Agent'] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.163 Safari/537.36"
    extensions = {"jpg", "jpeg", "png", "gif"}
    img_count = 0
    downloaded_img_count = 0

    for _ in range(int(number_of_scrolls)):
        for __ in range(15):
            driver.execute_script("window.scrollBy(0, 1000000)")
            time.sleep(0.2)
        time.sleep(0.5)
        try:
            driver.find_element_by_xpath(
                "//input[@value='Show more results']").click()
        except Exception as e:
            print("Less images found: {}".format(e))
            break

    html = driver.page_source.split('"')
    imges = []
    links = []
    for i in html:
        if i.startswith('https:') and ('gstatic' not in i) and ('google' not in i):
            links.append(i.split('"')[0])
    for i in html:
        if i.startswith('http') and 'usqp=CAU' in i.split('.')[-1]:
            imges.append(i.split('"')[0])
    for i in html:
        if i.startswith('http') and i.split('"')[0].split('.')[-1] in extensions:
            imges.append(i.split('"')[0])
    links = list(set(links))
    imges = list(set(imges))
    print(imges)
    links_left = Diff(links, imges)

    #removing duplicates
    urls_new = []
    [urls_new.append(x) for x in links_left if x not in urls_new]

    file1 = open("page_source.txt", "w", encoding='utf8')
    file1.writelines(urls_new)
    img_type = []
    print("Total images: {}\n".format(len(imges)))
    for img in imges:
        img_count += 1
        print("Downloading image {}:{}".format(img_count, img))
        img_type = img.rsplit('.', 1)
        try:
            req = Request(img, headers=headers)
            raw_img = urlopen(req).read()
            f = open(download_path+searchtext.replace(" ", "_")+"/" +
                     str(downloaded_img_count)+"."+"jpeg", "wb")
            f.write(raw_img)
            f.close
            downloaded_img_count += 1
        except Exception as e:
            print("Download failed: {}".format(e))
        finally:
            print
        if downloaded_img_count >= num_requested:
            break

    print("Total downloaded: {}/{}".format(downloaded_img_count, img_count))
    print("Total images: {}\n".format(len(urls_new)))

    for url in urls_new:
        img_count = img_scp.img_download(url, download_path+searchtext.replace(" ", "_")+"/", img_count)
    driver.quit()
Пример #20
0
def getRightStufAnimeData(memberStatus, title, bookType, currPageNum):
    #Starts wevdriver to scrape edge chromium
    options = EdgeOptions()
    options.use_chromium = True
    options.add_argument("-inprivate")
    options.add_argument("--headless")
    driver = Edge(options=options)
    
    #Create a empty list for all the data types we want to track
    titleList, priceList, stockStatusList = [], [], []

    #Get the URL for the page we are going to scrape for data
    driver.get(getPageURL(bookType, currPageNum, title))
    
    #Need to wait so the website can finish loading
    time.sleep(5)

    #Parse the HTML to start scraping for data
    soup = BeautifulSoup(driver.page_source, "html.parser")
    
    #Get the Title, Price, and Stock Status Data of each Manga Volume and whether or not next page button exists
    titleList = soup.find_all("span", {"itemprop" : "name"})
    priceList = soup.find_all("span", {"itemprop" : "price"})
    stockStatusList = soup.find_all("div", {"class" : "product-line-stock-container"})
    nextPageButton = soup.find("li", {"class" : "global-views-pagination-next"})
    
    #Check to see if the title given by the user generates a valid URL for RightStufAnime
    if not titleList:
        print("Error!!! Invalid Title, Use English Title Variant w/ Appropriate Spacing & Capitalization")
        return
    else: #If the URL is a "valid" RightStufAnime website URL
        websiteName = "RightStufAnime"
        gotAnimeDiscount = 0.05 #5% Manga discount
        
        #Format data into a single list
        for fullTitle, price, stockStatus in zip(titleList, priceList, stockStatusList): #get only the title and volume number for the series we are looking for
            if deParseString(title) in deParseString(fullTitle.text): #Fixes issue with capitilization
                if memberStatus: #If user is a member add discount
                    priceVal = float(price.text[1:])
                    priceText = "$" + str(round((priceVal - (priceVal * gotAnimeDiscount)), 2)) #Add discount
                else:
                    priceText = price.text
                
                stockCheck = stockStatus.text
                if stockCheck.find("Out of Stock") != -1:
                    stockCheck = "Out of Stock"
                elif stockCheck.find("No Longer Available") != -1:
                    stockCheck = "Out of Print"
                elif stockCheck.find("Pre-Order") != -1:
                    stockCheck = "Pre-Order"
                else:
                    stockCheck = "Available"
                dataFile.append([fullTitle.text, priceText, stockCheck])
                
        #Check to see if there is another page
        if nextPageButton != None:
            currPageNum += 1
            print(title)
            getRightStufAnimeData(memberStatus, title, bookType, currPageNum)
    
    #Initialize the a CSV to write into w/ appropiate headers
    csvFile = websiteName + "Data.csv"
    with open (csvFile, "w", newline = "", encoding = "utf-8") as file:
        writeToFile = csv.writer(file)
        writeToFile.writerow(["Title", "Price", "Stock Status"])
        writeToFile.writerows(natsorted(dataFile)) #Sort data by title and write to the file
    driver.quit()
    return csvFile
Пример #21
0
def run():
    email = read_decrypt(efile)
    password = read_decrypt(pfile)
    cemail = str(email)
    cpassword = str(password)
    with open("browserdetails.txt", "r") as f:
        data = [line.rstrip('\n') for line in f]
    browser = data[0].lower()
    gpu = data[1].lower()

    if browser == 'edge':
        try:
            requests.get("http://www.google.com")
            print('Connection Established.')
            l1 = tk.Label(top, text=" Connection Established. ", bg='white')
            l1.config(font=('helvetica', 15, "bold"))
            canvas.create_window(200, 410, window=l1)
            options = EdgeOptions()
            options.use_chromium = True
            options.add_argument("--start-maximized")
            if gpu == 'no':
                options.add_argument("window-size=1920,1080")
                options.add_argument("--headless")
                options.add_argument("disable-gpu")
                options.add_argument("-inprivate")
            driver = Edge(resource_path('msedgedriver.exe'), options=options)
            driver.get('https://gokano.com/')
            try:
                email = WebDriverWait(driver, 3).until(
                    EC.presence_of_element_located((By.NAME, 'email')))
                print("Page is ready!")
                l1 = tk.Label(top,
                              text="           Page is Ready.           ",
                              bg='white')
                l1.config(font=('helvetica', 15, "bold"))
                canvas.create_window(200, 410, window=l1)
                email.send_keys(cemail)
                password = driver.find_element_by_name('password')
                password.send_keys(cpassword)
                time.sleep(3)
                button = driver.find_element_by_class_name('submit')
                button.click()
                try:
                    driver.find_element_by_class_name('gokan-alert-error')
                    print("Invalid Credintials")
                    l1 = tk.Label(top,
                                  text=" Invalid Credintials. ",
                                  bg='white')
                    l1.config(font=('helvetica', 15, "bold"))
                    canvas.create_window(200, 410, window=l1)
                    time.sleep(3)
                    driver.quit()
                except NoSuchElementException:
                    print('Login sucessful')
                    l1 = tk.Label(top, text=" Login Successful. ", bg='white')
                    l1.config(font=('helvetica', 15, "bold"))
                    canvas.create_window(200, 410, window=l1)
            except TimeoutException:
                print("Login Error!")
                l1 = tk.Label(top, text=" Login Error! ", bg='white')
                l1.config(font=('helvetica', 15, "bold"))
                canvas.create_window(200, 410, window=l1)
                # [email protected]
                time.sleep(3)
                driver.quit()
            time.sleep(3)
            try:
                cdp = driver.find_element_by_link_text('Collect daily points')
                cdp.click()
                write_time()
                time.sleep(3)
                driver.quit()
            except NoSuchElementException:
                print('Already collected')
                l1 = tk.Label(top,
                              text=" Points Already Collected. ",
                              bg='white')
                l1.config(font=('helvetica', 15, "bold"))
                canvas.create_window(200, 410, window=l1)
                time.sleep(3)
                driver.quit()
            print('Automation completed')
            l1 = tk.Label(top, text=" Automation Completed. ", bg='white')
            l1.config(font=('helvetica', 15, "bold"))
            canvas.create_window(200, 410, window=l1)
            time.sleep(3)
            write_time()
            driver.quit()
        except requests.ConnectionError:
            print('Could not connect')
            l1 = tk.Label(top, text=" Couldn't Connect. ", bg='white')
            l1.config(font=('helvetica', 15, "bold"))
            canvas.create_window(200, 410, window=l1)
            driver.quit()
Пример #22
0
class Sei:

    __area_inicial = None
    __windows_before = 0
    __windows_after = 0

    def __init__(self, headless=False, executable_path='chromedriver'):
        if 'chromedriver' in executable_path:
            chrome_options = Options()
            chrome_options.add_argument('--enable-javascript')
            chrome_options.add_argument('--window-size=1440,900')
            chrome_options.add_argument("--disable-extensions")
            chrome_options.add_argument("--proxy-server='direct://'")
            chrome_options.add_argument("--proxy-bypass-list=*")
            chrome_options.add_argument("--start-maximized")
            chrome_options.add_argument('--disable-dev-shm-usage')
            chrome_options.add_argument('--no-sandbox')
            chrome_options.add_argument('--ignore-certificate-errors')
            if headless:
                chrome_options.add_argument('--headless')
                chrome_options.add_argument('--disable-gpu')
            self.driver = webdriver.Chrome(executable_path=executable_path,
                                           options=chrome_options)
        elif 'msedgedriver' in executable_path:
            edge_options = EdgeOptions()
            edge_options.use_chromium = True
            edge_options.add_argument('enable-javascript')
            edge_options.add_argument('window-size=1440,900')
            edge_options.add_argument("disable-extensions")
            edge_options.add_argument("proxy-server='direct://'")
            edge_options.add_argument("proxy-bypass-list=*")
            edge_options.add_argument("start-maximized")
            edge_options.add_argument('disable-dev-shm-usage')
            edge_options.add_argument('no-sandbox')
            edge_options.add_argument('ignore-certificate-errors')
            if headless:
                edge_options.add_argument('headless')
                edge_options.add_argument('disable-gpu')
            self.driver = Edge(executable_path=executable_path,
                               options=edge_options)

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        self.close()

    def start_driver(self, url, usuario=None, senha=None):

        if usuario == None:
            usuario = input('Digite o usuário: ')
        if senha == None:
            senha = getpass('Digite a senha: ')

        self.driver.get(url)

        usuario_field = WebDriverWait(self.driver, 3).until(
            EC.presence_of_element_located((By.ID, "txtUsuario")))

        senha_field = self.driver.find_element_by_id('pwdSenha')
        botao_acessar = self.driver.find_element_by_id('sbmLogin')

        usuario_field.clear()
        usuario_field.send_keys(usuario)
        senha_field.clear()
        senha_field.send_keys(senha)
        botao_acessar.click()
        alerta = self.fechar_alerta()
        if alerta:
            raise Exception(alerta)  # usuário ou senha inválido
        self.__area_incial = self.get_area()

    def go_to(self, numero_sei):
        if self.__windows_after > self.__windows_before:
            self.driver.close()
            self.driver.switch_to.window(
                self.driver.window_handles[self.__windows_before - 1])
        self.driver.switch_to.default_content()
        pesquisa = WebDriverWait(self.driver, 3).until(
            EC.presence_of_element_located((By.ID, "txtPesquisaRapida")))
        pesquisa.clear()
        pesquisa.send_keys(str(numero_sei))
        formPesquisaRapida = WebDriverWait(self.driver, 3).until(
            EC.presence_of_element_located(
                (By.ID, "frmProtocoloPesquisaRapida")))
        self.__windows_before = len(self.driver.window_handles)
        formPesquisaRapida.submit()
        self.__windows_after = len(self.driver.window_handles)
        if self.__windows_after > self.__windows_before:
            self.driver.switch_to.window(
                self.driver.window_handles[self.__windows_after - 1])

    def is_processo_aberto(self, area=None, processo=None):
        if processo:
            self.go_to(processo)
        else:
            self.driver.switch_to.default_content()
        try:
            ifrVisualizacao = WebDriverWait(self.driver, 3).until(
                EC.presence_of_element_located((By.ID, "ifrVisualizacao")))
            self.driver.switch_to.frame(ifrVisualizacao)
            informacao = WebDriverWait(self.driver, 3).until(
                EC.presence_of_element_located((By.ID, "divInformacao")))
            mensagem = informacao.text
            aberto = 'aberto' in mensagem
            if area:
                regex = '(?im)^(.*)(' + area + ')[^0-9a-z](.*)$'
                matches = search(regex, mensagem)
                if matches:
                    aberto = True
                else:
                    aberto = False
            self.driver.switch_to.default_content()
        except:
            aberto = None
            mensagem = 'Impossível abrir mensagem do processo'
        return aberto, mensagem

    def get_processo_anexador(self, processo=None):
        if processo:
            self.go_to(processo)
        else:
            self.driver.switch_to.default_content()
        ifrVisualizacao = WebDriverWait(self.driver, 3).until(
            EC.presence_of_element_located((By.ID, "ifrVisualizacao")))
        self.driver.switch_to.frame(ifrVisualizacao)
        informacao = WebDriverWait(self.driver, 3).until(
            EC.presence_of_element_located((By.ID, "divInformacao")))
        procAnex = None
        if 'Processo anexado ao processo' in informacao.text:
            processoAnexador = WebDriverWait(self.driver, 3).until(
                EC.presence_of_element_located(
                    (By.XPATH, "//*[@id=\"divInformacao\"]/div/a")))
            procAnex = processoAnexador.text
        self.driver.switch_to.default_content()
        return procAnex

    def get_area(self):
        self.driver.switch_to.default_content()
        select = Select(self.driver.find_element_by_id('selInfraUnidades'))
        return select.all_selected_options[0].text

    def seleciona_area(self, area):
        self.driver.switch_to.default_content()
        select = Select(self.driver.find_element_by_id('selInfraUnidades'))
        all_selected_options = select.all_selected_options
        for option in all_selected_options:
            if area == option.text:
                return True

        select = Select(self.driver.find_element_by_id('selInfraUnidades'))
        options = select.options
        for option in options:
            if area == option.text:
                select.select_by_visible_text(area)
                Select(
                    WebDriverWait(self.driver, 3).until(
                        EC.presence_of_element_located(
                            (By.ID, 'selInfraUnidades'))))
                return True

        return False

    def clicar_botao(self, botao):
        self.driver.switch_to.default_content()
        ifrVisualizacao = WebDriverWait(self.driver, 3).until(
            EC.presence_of_element_located((By.ID, "ifrVisualizacao")))
        self.driver.switch_to.frame(ifrVisualizacao)
        arvore = WebDriverWait(self.driver, 3).until(
            EC.presence_of_element_located((By.ID, "divArvoreAcoes")))
        botoes = arvore.find_elements(By.XPATH,
                                      '//*[@id=\"divArvoreAcoes\"]/a')

        for b in botoes:
            img = b.find_element(By.XPATH, 'img')
            if botao in img.get_attribute('title'):
                b.click()
                try:
                    WebDriverWait(self.driver, 1).until(
                        EC.alert_is_present(),
                        'Timed out waiting for PA creation ' +
                        'confirmation popup to appear.')
                except:
                    try:
                        self.driver.switch_to.default_content()
                    except:
                        None
                return True
        return False

    def fechar_alerta(self):
        alerta = None
        try:
            WebDriverWait(self.driver, 3).until(
                EC.alert_is_present(), 'Timed out waiting for PA creation ' +
                'confirmation popup to appear.')
            alert = self.driver.switch_to.alert
            alerta = alert.text
            alert.accept()
            self.driver.switch_to.default_content()
        except TimeoutException:
            None
        return alerta

    def is_sobrestado(self, area=None, processo=None):
        if processo:
            self.go_to(processo)
        else:
            self.driver.switch_to.default_content()
        ifrVisualizacao = WebDriverWait(self.driver, 3).until(
            EC.presence_of_element_located((By.ID, "ifrVisualizacao")))
        self.driver.switch_to.frame(ifrVisualizacao)
        informacao = WebDriverWait(self.driver, 3).until(
            EC.presence_of_element_located((By.ID, "divInformacao")))
        sobrestado = 'sobrestado' in informacao.text
        mensagem = informacao.text
        self.driver.switch_to.default_content()
        if area:
            regex = '(?im)^(.*)(' + area + ')[^0-9a-z](.*)$'
            matches = search(regex, informacao.text)
            return sobrestado, matches != None
        else:
            return sobrestado, mensagem

    def sobrestar_processo(self, motivo, processo=None):
        if processo:
            self.go_to(processo)
        else:
            self.driver.switch_to.default_content()
        if self.clicar_botao('Sobrestar Processo'):
            ifrVisualizacao = WebDriverWait(self.driver, 3).until(
                EC.presence_of_element_located((By.ID, "ifrVisualizacao")))
            self.driver.switch_to.frame(ifrVisualizacao)
            self.driver.find_element(By.ID, 'divOptSomenteSobrestar').click()
            motivoField = self.driver.find_element(By.ID, 'txaMotivo')
            motivoField.clear()
            motivoField.send_keys(motivo)
            self.driver.find_element(By.ID, 'sbmSalvar').click()
            self.driver.switch_to.default_content()
            return True
        return False

    def remover_sobrestamento(self, processo=None):
        if processo:
            self.go_to(processo)
        if self.clicar_botao('Remover Sobrestamento do Processo'):
            self.fechar_alerta()
            return True
        return False

    def publicar(self,
                 resumo_ementa,
                 data_disponibilizacao,
                 documento=None,
                 dou=False,
                 secao=None,
                 pagina=None):
        if documento:
            self.go_to(documento)
        else:
            self.driver.switch_to.default_content()
        if self.clicar_botao('Agendar Publicação'):
            ifrVisualizacao = WebDriverWait(self.driver, 3).until(
                EC.presence_of_element_located((By.ID, "ifrVisualizacao")))
            self.driver.switch_to.frame(ifrVisualizacao)

            resumo_ementa_text_field = self.driver.find_element(
                By.ID, 'txaResumo')
            resumo_ementa_text_field.clear()
            resumo_ementa_text_field.send_keys(resumo_ementa)

            disponibilizacao = self.driver.find_element(
                By.ID, 'txtDisponibilizacao')
            disponibilizacao.clear()
            disponibilizacao.send_keys(data_disponibilizacao)

            if dou:
                select = Select(self.driver.find_element_by_id('selVeiculoIO'))
                select.select_by_visible_text('DOU')

                select = Select(
                    WebDriverWait(self.driver, 3).until(
                        EC.presence_of_element_located((By.ID, "selSecaoIO"))))
                WebDriverWait(self.driver, 3).until(
                    EC.presence_of_element_located(
                        (By.CSS_SELECTOR,
                         "option[value='" + secao if secao else '3' + "']")))
                select.select_by_visible_text(secao if secao else '3')

                pagina_text_field = self.driver.find_element(
                    By.ID, 'txtPaginaIO')
                pagina_text_field.clear()
                pagina_text_field.send_keys(pagina if pagina else '')

                disponibilizacao = self.driver.find_element(By.ID, 'txtDataIO')
                disponibilizacao.clear()
                disponibilizacao.send_keys(data_disponibilizacao)

            self.driver.find_element_by_id('btnSalvar').click()

            self.driver.switch_to.default_content()
            return True
        return False

    def get_conteudo_documento(self, documento=None):
        if documento:
            self.go_to(documento)
        else:
            self.driver.switch_to.default_content()
        try:
            ifrVisualizacao = WebDriverWait(self.driver, 3).until(
                EC.presence_of_element_located((By.ID, "ifrVisualizacao")))
            self.driver.switch_to.frame(ifrVisualizacao)
            ifrArvoreHtml = WebDriverWait(self.driver, 3).until(
                EC.presence_of_element_located((By.ID, "ifrArvoreHtml")))
            self.driver.switch_to.frame(ifrArvoreHtml)
            documento_conteudo = self.driver.find_element_by_xpath(
                '/html/body').get_attribute('innerHTML')
            documento_conteudo = sub(
                r'\\n', '', documento_conteudo)  # retirar quebra de páginas
            documento_conteudo = sub(r'\s\s+?', ' ',
                                     documento_conteudo)  # tira espaços duplos
            documento_conteudo = sub(r' ', ' ',
                                     documento_conteudo)  # tira espaços duplos
            documento_conteudo = documento_conteudo.strip(
            )  # retirar quebras de páginas que tenham restado
            return documento_conteudo
        except:
            raise Exception('Conteúdo do documento %s não encontrado.' %
                            documento)
        finally:
            self.driver.switch_to.default_content()

    def get_documento_element_by_id(self, id, documento=None):
        if documento:
            self.go_to(documento)
        else:
            self.driver.switch_to.default_content()
        try:
            if (self.__windows_after == self.__windows_before):
                ifrVisualizacao = WebDriverWait(self.driver, 3).until(
                    EC.presence_of_element_located((By.ID, "ifrVisualizacao")))
                self.driver.switch_to.frame(ifrVisualizacao)
                ifrArvoreHtml = WebDriverWait(self.driver, 3).until(
                    EC.presence_of_element_located((By.ID, "ifrArvoreHtml")))
                self.driver.switch_to.frame(ifrArvoreHtml)
            return self.driver.find_element_by_id(id).text
        except:
            raise Exception('Conteúdo do documento %s não encontrado.' %
                            documento)
        finally:
            self.driver.switch_to.default_content()

    def get_documento_elements_by_id(self, id, documento=None):
        if documento:
            self.go_to(documento)
        else:
            self.driver.switch_to.default_content()
        try:
            if (self.__windows_after == self.__windows_before):
                ifrVisualizacao = WebDriverWait(self.driver, 3).until(
                    EC.presence_of_element_located((By.ID, "ifrVisualizacao")))
                self.driver.switch_to.frame(ifrVisualizacao)
                ifrArvoreHtml = WebDriverWait(self.driver, 3).until(
                    EC.presence_of_element_located((By.ID, "ifrArvoreHtml")))
                self.driver.switch_to.frame(ifrArvoreHtml)
            elements = self.driver.find_elements_by_id(id)
            return [element.text for element in elements]
        except:
            raise Exception('Conteúdo do documento %s não encontrado.' %
                            documento)
        finally:
            self.driver.switch_to.default_content()

    def get_documento_element_by_xpath(self, xpath, documento=None):
        if documento:
            self.go_to(documento)
        else:
            self.driver.switch_to.default_content()
        try:
            if (self.__windows_after == self.__windows_before):
                ifrVisualizacao = WebDriverWait(self.driver, 3).until(
                    EC.presence_of_element_located((By.ID, "ifrVisualizacao")))
                self.driver.switch_to.frame(ifrVisualizacao)
                ifrArvoreHtml = WebDriverWait(self.driver, 3).until(
                    EC.presence_of_element_located((By.ID, "ifrArvoreHtml")))
                self.driver.switch_to.frame(ifrArvoreHtml)
            return self.driver.find_element_by_xpath(xpath).text
        except:
            raise Exception('Conteúdo do documento %s não encontrado.' %
                            documento)
        finally:
            self.driver.switch_to.default_content()

    def get_documento_elements_by_xpath(self, xpath, documento=None):
        if documento:
            self.go_to(documento)
        else:
            self.driver.switch_to.default_content()
        try:
            if (self.__windows_after == self.__windows_before):
                ifrVisualizacao = WebDriverWait(self.driver, 3).until(
                    EC.presence_of_element_located((By.ID, "ifrVisualizacao")))
                self.driver.switch_to.frame(ifrVisualizacao)
                ifrArvoreHtml = WebDriverWait(self.driver, 3).until(
                    EC.presence_of_element_located((By.ID, "ifrArvoreHtml")))
                self.driver.switch_to.frame(ifrArvoreHtml)
            elements = self.driver.find_elements_by_xpath(xpath)
            return [element.text for element in elements]
        except:
            raise Exception('Conteúdo do documento %s não encontrado.' %
                            documento)
        finally:
            self.driver.switch_to.default_content()

    def close(self, voltar=True):
        if voltar:
            self.seleciona_area(self.__area_incial)
        self.driver.close()
        self.driver.quit()
Пример #23
0
    def info(self):
        options = EdgeOptions()
        options.use_chromium = True
        #options.add_argument('--start-maximized')
        options.add_argument('--disable-extensions')
        driver_path = 'Driver\\msedgedriver.exe'

        #Opciones de navegacion
        driver = Edge(executable_path=driver_path, options=options)

        #inicializamos el navegador
        driver.get('https://www.accuweather.com/')
        Departamento = "Paysandú"

        #COOKIES
        WebDriverWait(driver, 10)\
            .until(EC.element_to_be_clickable((By.XPATH,
                                               '/html/body/div/div[9]/div/div')))\
            .click()

        #BUSCADOR
        WebDriverWait(driver, 10)\
            .until(EC.element_to_be_clickable((By.XPATH,
                                               '/html/body/div/div[1]/div[2]/div[1]/form/input')))\
            .send_keys(Departamento)

        #CIUDAD
        WebDriverWait(driver, 10)\
            .until(EC.element_to_be_clickable((By.XPATH,
                                               '/html/body/div/div[1]/div[2]/div[2]/div[2]/div')))\
            .click()

        #DIAS
        WebDriverWait(driver, 10)\
            .until(EC.element_to_be_clickable((By.XPATH,
                                               '/html/body/div/div[3]/div/div[3]/a[3]')))\
            .click()

        card = WebDriverWait(driver, 20)\
            .until(EC.frame_to_be_available_and_switch_to_it((By.NAME,
                                                              "google_ads_iframe_/6581/web/sam/interstitial/weather/local_home_0")))

        if (card):
            WebDriverWait(driver, 10)\
                .until(EC.element_to_be_clickable((By.XPATH,
                                                   "/html/body/div/div/div[1]/div[1]"))).click()

        #INFO
        WebDriverWait(driver, 10)\
            .until(EC.element_to_be_clickable((By.XPATH,
                                               '/html/body/div/div/div[1]/div[1]/div')))

        info_clima = driver.find_element_by_xpath(
            '/html/body/div/div[5]/div[1]/div[1]')
        info_clima = info_clima.text

        titulo = driver.find_element_by_css_selector('p.module-title')
        titulo = titulo.text
        #print(titulo)

        #SEPARAR
        datos_semana = info_clima.split(titulo)[1].split('\n')[1:36]

        driver.quit()

        return datos_semana
class ChannelScrape:
    """
    Constructors:
    __init__()


    Methods:

    toFile(), getUpcomingId(), getLiveId()
    """
    options_edge = EdgeOptions()
    options_edge.use_chromium = True
    options_edge.add_argument('--ignore-certificate-errors')
    options_edge.add_argument('--ignore-ssl-errors')
    options_edge.add_argument('--mute-audio')

    def __init__(self, channelId: str, headless=True, executable_path=None):
        # Searches for webdriver on each dir from PATH environment variables
        # Currently untested in linux
        if executable_path == None:
            for p in os.environ['PATH'].split(";"):
                if os.path.isfile(p + "\msedgedriver.exe"):
                    self.path_dir = p + "\msedgedriver.exe"

        # Setup driver
        self.options_edge.headless = headless
        self.driver = Edge(options=self.options_edge,
                           executable_path=self.path_dir)

        # JSON collecting process
        url = 'https://www.youtube.com/channel/' + channelId
        self.driver.get(url)
        self.jsonData = self.driver.execute_script('return ytInitialData')
        self.driver.quit()

    def toFile(self, output_file: str):
        """
        Output the collected json data to a file
        output_file: Output file name. File extension will be added automatically
        """
        with codecs.open(output_file + '.json', 'w',
                         encoding='utf-8') as jsonFile:
            json.dump(self.jsonData, jsonFile, ensure_ascii=False, indent=1)

    def getUpcomingId(self, dayDelta=14):
        """
        Returns a list of upcoming livestream(s) video ID
        dayDelta: If the upcoming livestream delta is more than the provided argument,
                the livestream Id will not be added to the return list 
        """

        # Personal note:
        # The base for calculating dates is 31-12-1969 (UNIX epoch time)
        # Which is then counted to the used date by seconds
        dateFilter = timedelta(days=dayDelta)
        dateThreshold = datetime.now() + dateFilter
        collectedContents = []
        try:
            content = self.jsonData['contents'][
                'twoColumnBrowseResultsRenderer']['tabs'][0]['tabRenderer'][
                    'content']['sectionListRenderer']['contents'][1][
                        'itemSectionRenderer']['contents'][0]['shelfRenderer'][
                            'content']
        except:
            print(
                'Index out of range (Most likely channel only have horizontal grid renderer)'
            )
            return collectedContents
        # Only one upcoming livestream
        # This shouldn't need to use for loop assuming that there is always one item in items key
        # But items is still an array, so just in case
        if "expandedShelfContentsRenderer" in content:
            for item in content['expandedShelfContentsRenderer']['items']:
                liveDateEpoch = int(
                    item['videoRenderer']['upcomingEventData']['startTime'])
                liveDate = datetime.fromtimestamp(mktime(
                    gmtime(liveDateEpoch)))
                if item['videoRenderer']['thumbnailOverlays'][0][
                        'thumbnailOverlayTimeStatusRenderer'][
                            'style'] == "UPCOMING" and liveDate < dateThreshold:
                    collectedContents.append(item['videoRenderer']['videoId'])

        # Multiple upcoming livestreams
        elif "horizontalListRenderer" in content:
            for item in content['horizontalListRenderer']['items']:
                if 'upcomingEventData' in item['gridVideoRenderer']:
                    liveDateEpoch = int(item['gridVideoRenderer']
                                        ['upcomingEventData']['startTime'])
                    liveDate = datetime.fromtimestamp(
                        mktime(gmtime(liveDateEpoch)))
                    if item['gridVideoRenderer']['thumbnailOverlays'][0][
                            'thumbnailOverlayTimeStatusRenderer'][
                                'style'] == "UPCOMING" and liveDate < dateThreshold:
                        collectedContents.append(
                            item['gridVideoRenderer']['videoId'])

        return collectedContents

    def getLiveId(self):
        # Returns a list of the current livestreams video Id, if any
        # It is unlikely that there are multiple livestreams in the same channel,
        # but the possibility is there, therefore it returns a list instead of a single item

        content = self.jsonData['contents']['twoColumnBrowseResultsRenderer'][
            'tabs'][0]['tabRenderer']['content']['sectionListRenderer'][
                'contents'][0]['itemSectionRenderer']['contents'][0]
        collectedContents = []
        if "channelFeaturedContentRenderer" in content:
            for videoItem in content['channelFeaturedContentRenderer'][
                    'items']:
                if videoItem['videoRenderer']['thumbnailOverlays'][0][
                        'thumbnailOverlayTimeStatusRenderer'][
                            'style'] == "LIVE":
                    collectedContents.append(
                        videoItem['videoRenderer']['videoId'])

        return collectedContents
Пример #25
0
def download(url):
    options = EdgeOptions()
    options.use_chromium =True
    # option = webdriver.ChromeOptions()
    # option.add_argument('headless')
    options.add_argument('log-level=3')
    driver = Edge(options=options)
    # driver = webdriver.Chrome(
    #     executable_path='.//chromedriver', chrome_options=option)

    title = "output"
    try:
        driver.set_page_load_timeout(15)
        driver.get(url)
        title = driver.title
    except:
        print("Timeout - start download anyway.")

    print(f'道客巴巴: 《{title}》')
    time.sleep(5)

    try:
        # 展开全部
        elem_cont_button = driver.find_element_by_id("continueButton")
        driver.execute_script(
            "arguments[0].scrollIntoView(true);", elem_cont_button)
        actions = ActionChains(driver)
        actions.move_to_element(elem_cont_button).perform()
        time.sleep(0.5)
        elem_cont_button.click()
    except NoSuchElementException:
        pass

    # 获取页数
    num_of_pages = driver.find_element_by_id('readshop').find_element_by_class_name(
        'mainpart').find_element_by_class_name('shop3').find_element_by_class_name('text').get_attribute('innerHTML')
    num_of_pages = int(num_of_pages.split(' ')[-1])

    for i in range(5):
        # 缩放
        driver.find_element_by_id('zoomInButton').click()
        time.sleep(0.5)

    if os.path.exists(f'./temp/{title}'):
        shutil.rmtree(f'./temp/{title}')
    os.makedirs(f'./temp/{title}')

    for pages in trange(num_of_pages):
        time.sleep(0.5)

        canvas_id = "page_" + str(pages + 1)
        pagepb_id = "pagepb_" + str(pages + 1)

        element = driver.find_element_by_id(canvas_id)
        driver.execute_script("arguments[0].scrollIntoView(true);", element)
        actions = ActionChains(driver)
        actions.move_to_element(element).perform()
        time.sleep(0.5)

        # Check loading status
        while(len(driver.find_element_by_id(pagepb_id).get_attribute('innerHTML')) != 0):
            time.sleep(1)
            # print(driver.find_element_by_id(
            #     pagepb_id).get_attribute('innerHTML'))

        js_cmd = "var canvas = document.getElementById('{}');".format(canvas_id) + \
            "return canvas.toDataURL();"
        img_data = driver.execute_script(js_cmd)

        img_data = (img_data[22:]).encode()

        with open(f"./temp/{title}/{pages}.png", "wb") as fh:
            fh.write(base64.decodebytes(img_data))
    driver.quit()
    print('下载完毕,正在转码')
    conpdf(f'output/{title}.pdf', f'temp/{title}', '.png')