def check_ajax_page_for_msg(label, msg, url):
    try:
        # headers = {
        #     "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
        #     "Accept-Encoding":"gzip, deflate, br",
        #     "Accept-Language":"en-US,en;q=0.5",
        #     "Cache-Control":"no-cache",
        #     "Connection":"keep-alive",
        #     "Host":"www.nvidia.com",
        #     "Pragma":"no-cache",
        #     "Referer":"https://www.google.com/",
        #     "TE":"Trailers",
        #     "Upgrade-Insecure-Requests":"1",
        #     "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:80.0) Gecko/20100101 Firefox/80.0"
        # }

        webdriver = Firefox()
        webdriver.implicitly_wait(30)
        response = webdriver.request('GET', url)

        response2 = webdriver.request('GET', 'https://www.nvidia.com/etc/designs/nvidiaGDC/clientlibs_foundation.min.3a16fd19562feeb504bb63525a249962.js')
        webdriver.execute_script(response2.text)

        # webdriver.find_element_by_class_name('availability')

        # with open('clientlibs_foundation.min.3a16fd19562feeb504bb63525a249962.js') as f:
        #     webdriver.execute_script(f.read())

        # element = WebDriverWait(webdriver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "availability")))

        wait_for_ajax(webdriver)

        # webdriver.find_element_by_class_name('availability')

        if response.status_code != 200:
            send_sms_msg('Attention needed. Got a response other than 200 OK from ' + label + ' page.')

        html = response.text

        soup = BeautifulSoup(html, 'html.parser')

        disclaimer_msg_present = msg in html

        html_file = open(label + '.html', 'w', encoding='utf-8')
        html_file.write(html)
        html_file.close()
        availability = soup.find('div', {'class', 'availability'})

        print('Value of disclaimer_msg_present for ' + label + ' page: ' + str(disclaimer_msg_present))
        if not disclaimer_msg_present:
            send_sms_msg('ATTENTION! ' + label + ' page message has changed.')

        print('\nResponse from ' + label + ' page link: ' + str(response.status_code) + '\n')
    except Exception as e:
        send_sms_msg('Exception occurred while sending request to Ironmaster website.')
        print(e)
Пример #2
0
def atc():
    browser = Firefox()
    browser2 = Firefox()

    getLink()
    link = "https://yeezysupply.com/products/womens-tubular-boot-pvc-transparent"
    atc = "https://yeezysupply.com/cart/add.js"
    size = variants[input("Enter size: ")]
    payload = {"quantity": "1", "id": size}
    input("Press Enter load and add to cart...")

    # -------------- Go to link and ATC---------------
    browser.get(link)
    response = browser.request('POST', atc, data=payload)
    browser.get("https://yeezysupply.com/cart")
    browser.execute_script(
        "document.getElementsByClassName('K__button CA__button-checkout')[0].click();"
    )

    browser2.get(link)
    response = browser2.request('POST', atc, data=payload)
    browser2.get("https://yeezysupply.com/cart")
    browser2.execute_script(
        "document.getElementsByClassName('K__button CA__button-checkout')[0].click();"
    )

    # -------------- Go to shipping --------------
    input("CONTINUE TO SHIPPING...")
    for i in checkoutPayload:
        inputMsg = browser.find_element_by_id(i[0])
        inputMsg.send_keys(i[1])

    mySelect = Select(
        browser.find_element_by_id("checkout_shipping_address_province"))
    mySelect.select_by_value('Maryland')
    browser.execute_script(
        "document.getElementsByClassName('step__footer__continue-btn btn')[0].click();"
    )

    for i in checkoutPayload:
        inputMsg = browser2.find_element_by_id(i[0])
        inputMsg.send_keys(i[1])

    mySelect = Select(
        browser2.find_element_by_id("checkout_shipping_address_province"))
    mySelect.select_by_value('Maryland')
    browser2.execute_script(
        "document.getElementsByClassName('step__footer__continue-btn btn')[0].click();"
    )

    # -------------- Go to payment --------------
    input("CONTINUE TO PAYMENT METHOD...")
    browser.execute_script(
        "document.getElementsByClassName('step__footer__continue-btn btn')[0].click();"
    )
    browser2.execute_script(
        "document.getElementsByClassName('step__footer__continue-btn btn')[0].click();"
    )

    # -------------- Fill card --------------
    input("FILL CREDIT CARD...")
    eachFrame = 0
    for i in creditCard:
        frame = browser.find_elements_by_xpath(
            '//iframe[@frameborder="0"]')[eachFrame]
        browser.switch_to.frame(frame)
        inputMsg = browser.find_element_by_id(i[0])
        for e in range(0, len(i)):
            inputMsg.send_keys(i[e])
        browser.switch_to.default_content()
        eachFrame += 1

    eachFrame = 0
    for i in creditCard:
        frame = browser2.find_elements_by_xpath(
            '//iframe[@frameborder="0"]')[eachFrame]
        browser2.switch_to.frame(frame)
        inputMsg = browser2.find_element_by_id(i[0])
        for e in range(0, len(i)):
            inputMsg.send_keys(i[e])
        browser2.switch_to.default_content()
        eachFrame += 1

    # -------------- FINAL STEP CHECKOUT --------------
    print_warn("CHECKOUT?")
    input("")
    browser.execute_script(
        "document.getElementsByClassName('step__footer__continue-btn btn')[0].click();"
    )
    browser2.execute_script(
        "document.getElementsByClassName('step__footer__continue-btn btn')[0].click();"
    )

    time.sleep(10)
    browser.quit()
Пример #3
0
class RedBubble:
    def __init__(self):
        self.options = Options()
        self.options.set_preference("intl.accept_languages", 'en-us')
        self.options.set_preference('useAutomationExtension', False)
        self.options.set_preference('dom.webdriver.enabled', False)

        self.driver = Firefox(options=self.options)
        self.driver.execute_script(
            "Object.defineProperty(navigator, 'webdriver', {get: () => undefined})"
        )
        self.driver.get("https://amiunique.org/fp")
        time.sleep(5)
        self.driver.get("https://antcpt.com/score_detector/")
        time.sleep(5)

    def get_to(self, url):
        if self.driver.current_url != url:
            self.driver.get(url)

    def download_image(self, file_name, image_url):
        if "x1000" in image_url:
            big_image_url = image_url.replace("1000x1000", "2000x2000")
            big_image_url = big_image_url.replace("750x1000", "1500x2000")
            big_image_url = big_image_url.replace("1000", "2000")
            big_image_url = big_image_url.replace("750", "1500")
            if self.download_image(file_name, big_image_url):
                print(f"Gotten bigger image for {file_name} @ {big_image_url}")
                return True
        try:
            res = self.driver.request('GET', image_url)
        except requests.exceptions.ConnectionError:
            time.sleep(60)
            res = self.driver.request('GET', image_url)

        if 200 <= res.status_code < 300:
            im = Image.open(io.BytesIO(res.content))

            width, height = im.size

            # Add watermark
            if max(width, height) >= 2000:
                font = ImageFont.truetype('Arial.ttf', 72)
            else:
                font = ImageFont.truetype('Arial.ttf', 36)

            draw = ImageDraw.Draw(im)
            text = "DuckHunt.me"

            fill_color = (128, 0, 0)

            textwidth, textheight = draw.textsize(text, font)

            margin = 30
            x = width - textwidth - margin
            y = height - textheight - margin
            try:
                draw.text((x, y), text, font=font, fill=fill_color)
            except TypeError:
                draw.text((x, y), text, font=font, fill=128)

            # Write to file
            im.save(file_name)

            return True
        else:
            print(f"Download of {file_name} (@{image_url}) FAILED.")
            return False

    def clean_html(self, html_string):
        tree = html.fromstring(html_string)

        cleaner = html.clean.Cleaner()
        cleaner.safe_attrs_only = True
        cleaner.safe_attrs = frozenset(['id'])
        cleaned = cleaner.clean_html(tree)
        return html.tostring(cleaned, encoding='unicode')

    def dismiss_cookie_popup(self):
        self.dismiss_member_prompt()
        try:
            self.driver.find_element_by_class_name(
                "Toastify__toast-body").find_element_by_tag_name(
                    "button").click()
            return True
        except (NoSuchElementException, ElementClickInterceptedException):
            return False

    def dismiss_member_prompt(self):
        try:
            self.driver.find_element_by_class_name(
                "sailthru-overlay-close").click()
            return True
        except NoSuchElementException:
            return False

    def login(self):
        self.driver.get("https://www.redbubble.com/en/auth/login")

        username = self.driver.find_element_by_xpath(
            '//*[@id="ReduxFormInput1"]')
        username.send_keys(USERNAME)

        password = self.driver.find_element_by_xpath(
            '//*[@id="ReduxFormInput2"]')
        password.send_keys(PASSWORD)

        connect = self.driver.find_element_by_xpath(
            '/html/body/div[1]/div[7]/div[2]/div[2]/div/form/span/button')

        prev_url = self.driver.current_url
        connect.click()
        time.sleep(5)

        while prev_url == self.driver.current_url:
            print("Please solve captcha")
            time.sleep(1)

    def change_locale(self):
        self.get_to("https://www.redbubble.com/settings/show")

        locale_dropdown = Select(
            self.driver.find_element_by_xpath('//*[@id="settings_locale"]'))
        locale_dropdown.select_by_visible_text("English")

        country_code_dropdown = Select(
            self.driver.find_element_by_xpath(
                '//*[@id="settings_country_code"]'))
        country_code_dropdown.select_by_value("US")

        currency_dropdown = Select(
            self.driver.find_element_by_xpath(
                '//*[@id="settings_currency_iso"]'))
        currency_dropdown.select_by_value("USD")

        try:
            button = self.driver.find_element_by_xpath(
                '/html/body/div/form/div[4]/input')
        except NoSuchElementException:
            button = self.driver.find_element_by_xpath(
                '/html/body/div[1]/div[6]/div[2]/form/div[4]/button')

        button.click()

    def get_works_urls(self):
        self.get_to("https://www.redbubble.com/en/portfolio/manage_works")
        self.dismiss_cookie_popup()
        works = []

        links = self.driver.find_elements_by_class_name(
            'works_work-menu-option')
        for a in links:
            if a.tag_name != 'a':
                continue

            href = a.get_attribute('href')
            if href.startswith(
                    "https://www.redbubble.com/people/duckhuntdiscord/works"):
                works.append(href)

        random.shuffle(works)
        return works

    def get_work_products_urls(self, work_url):
        self.get_to(work_url)
        products_urls = []

        links = self.driver.find_elements_by_class_name('carousel_item-link')
        for a in links:
            if a.tag_name != 'a':
                continue

            href = a.get_attribute('href')
            if href.startswith("https://www.redbubble.com/i/"):
                products_urls.append(href)

        random.shuffle(products_urls)
        return products_urls

    def get_work_info(self, work_url):
        self.get_to(work_url)

        return {
            "name":
            self.driver.find_element_by_class_name(
                'work-information_title').text,
            "url":
            urljoin(work_url,
                    urlparse(work_url).path)
        }

    def _get_colorswatch(self):
        try:
            colors_button = self.driver.find_element_by_css_selector(
                "[class^='ColorPickerActivator__colorPickerActivator--']")
        except NoSuchElementException:
            colors_button = None

        if colors_button:
            colors_button.click()

        try:
            colors_swatch = self.driver.find_elements_by_css_selector(
                "[class^='DesktopColorControls__swatch--']")
        except NoSuchElementException:
            colors_swatch = [None]

        if len(colors_swatch) == 0:
            colors_swatch = [None]

        return colors_swatch

    def _get_print_locations(self):
        print_locations = self.driver.find_elements_by_name("printLocation")

        if not print_locations:
            print_locations = [None]

        return print_locations

    def _get_sizes(self):
        sizes = self.driver.find_elements_by_name("size")

        if not sizes:
            sizes = [None]

        return sizes

    def download_product_information(self, product_url, work_info):
        self.get_to(product_url)
        self.dismiss_cookie_popup()

        varients = []

        product_name = self.driver.find_element_by_tag_name('h1').text
        print(f"Downloading {product_name}...")

        download_to = DOWNLOAD_DIR / work_info["name"] / product_name
        download_to.mkdir(exist_ok=True, parents=True)

        if (download_to / "download.json").exists():
            print("Skipping : already downloaded")
            return

        colors = len(self._get_colorswatch())
        print(f"Found {colors} colors_swatch.")
        for color_n in range(colors):
            color_element = self._get_colorswatch()[color_n]

            if color_element is None:
                color_product_name = None
            else:
                color_product_name = color_element.get_attribute('title')
                color_element.click()
                self.dismiss_cookie_popup()

            print_locations_count = len(self._get_print_locations())
            print(f"Found {print_locations_count} print locations.")

            for print_location_n in range(print_locations_count):
                print_location_element = self._get_print_locations(
                )[print_location_n]
                if print_location_element is None:
                    print_location_text = None
                else:
                    label = print_location_element.find_element_by_xpath('..')
                    print_location_text = label.text
                    label.click()
                    self.dismiss_cookie_popup()

                sizes = self._get_sizes()
                sizes_names = []
                for size_element in sizes:
                    if size_element is None:
                        continue
                    else:
                        label = size_element.find_element_by_xpath('..')
                        size_name = label.text
                        sizes_names.append(size_name)

                time.sleep(1)  # Time for images to load
                images = self.driver.find_elements_by_tag_name('img')

                images_downloaded = 0
                images_files = []

                for img in images:
                    src = img.get_attribute('src')
                    klass = img.get_attribute('class')
                    if src.startswith("https://ih1.redbubble.net/image"
                                      ) and "GalleryImage__img--" in klass:
                        main = "PreviewGallery__rightColumn--" in img.find_element_by_xpath(
                            '..').find_element_by_xpath(
                                '..').find_element_by_xpath(
                                    '..').get_attribute('class')

                        img_folder = download_to / f"{color_product_name} - {print_location_text}"
                        img_folder.mkdir(parents=True, exist_ok=True)
                        image_name = img_folder / f"{images_downloaded}.jpg"
                        dld = self.download_image(file_name=image_name,
                                                  image_url=src)
                        if dld:
                            images_downloaded += 1
                            images_files.append({
                                "link": str(image_name),
                                "main": main
                            })

                price = ''
                config_div = self.driver.find_element_by_tag_name(
                    'h1').find_element_by_xpath('..')
                prices_maybe = config_div.find_elements_by_tag_name('span')

                for price_maybe in prices_maybe:
                    try:
                        price = price_maybe.find_element_by_tag_name(
                            'span').text
                        break
                    except NoSuchElementException:
                        continue

                varient = {
                    "color_product_name": color_product_name,
                    "print_location_text": print_location_text,
                    "sizes_names": sizes_names,
                    "images_count": images_downloaded,
                    "images": images_files,
                    "price": price,
                    "url": self.driver.current_url,
                }

                print(varient)

                varients.append(varient)
        try:
            features = self.driver.find_element_by_xpath("//*[contains(text(), 'Features')]")\
                                  .find_element_by_xpath('..')\
                                  .find_element_by_tag_name('ul')\
                                  .get_attribute('outerHTML')
        except NoSuchElementException:
            features = self.driver.find_element_by_xpath("//*[contains(text(), 'Features')]") \
                .find_element_by_xpath('..') \
                .find_element_by_xpath('..') \
                .find_element_by_tag_name('ul') \
                .get_attribute('outerHTML')

        with open(download_to / "download.json", "w") as f:
            json.dump(
                {
                    "varients": varients,
                    "features_html": self.clean_html(features),
                    "work": work_info,
                },
                f,
                sort_keys=True)

    def main(self):
        self.login()
        self.change_locale()
        works_urls = self.get_works_urls()
        print(f"Discovered {len(works_urls)} works")
        for work_url in works_urls:
            work_info = self.get_work_info(work_url)
            # if (DOWNLOAD_DIR / work_info["name"]).exists():
            #     print(f"Skipping {work_info['name']} because it was already downloaded.")
            #     continue
            product_urls = self.get_work_products_urls(work_url)
            for product_url in product_urls:
                ok = False
                while not ok:
                    try:
                        self.download_product_information(
                            product_url, work_info)
                        ok = True
                    except (StaleElementReferenceException,
                            ElementNotInteractableException,
                            NoSuchElementException) as e:
                        print(f"That failed {e}, retrying...")

        self.driver.quit()
Пример #4
0
class EntsoeDownloader:
    """
    A class used to automatically scrap CSV files from ENTSOE Transparecny Platform
    ...
    Attributes
    ----------
    __username : str
        a string representing username of an existing ENTSOE account
    __password : str
        a string representing password of an existing ENTSOE account
    __login_url : str
        the homepage link of ENTSOE
    __download_dir : str
        the folder path where CSV files are stored
    date: str
        string representing today's date
    __day_ahead_price_url: None
        the download url for scraping day ahead prices
    __generation_per_product_url: None
        the download url for scraping electricity generation per products
    __actual_total_load_url: None
        the donwload url for scraping actual loads
    __generation_forecasted_url
        the download url for scraping the generation forecasted
    driver: None
        contains the selenium chrome driver class
    Methods
    -------
    check_download(i=0)
        Checks whether the CSV file is downloaded within a certain time-period else it terminates
    setup(headless=False, date_input=None)
        Initializes the chrome webdriver for scraping
    set_download_url()
        Initializes the download urls
    login_and_download()
        Interacts with the firefox browser and executes a sequence of tasks for scraping
    """
    def __init__(self, last_date_fetched, username="", password=""):

        last_date_fetched = datetime.strptime(last_date_fetched,
                                              "%Y-%m-%d %H:%M:%S")
        new_date = last_date_fetched.date()
        new_date += timedelta(days=1)

        self.__username = username
        self.__password = password
        self.__login_url = "https://transparency.entsoe.eu/homepageLogin"
        self.__download_dir = os.path.join(os.getcwd(), "download")

        # choose another date if the last hour of fetched data is 23:00:00
        self.date = last_date_fetched.date(
        ) if last_date_fetched.time().hour != 23 else new_date

        self.__day_ahead_price_url = None
        self.__generation_per_product_url = None
        self.__actual_total_load_url = None
        self.__generation_forecasted_url = None
        self.driver = None

    @staticmethod
    def check_download(i=0):
        """
        Checks first whether files have been downloaded within a 20 second time interval
        Parameters
        -------------
         i:
            File counter, to check how many files have been downloaded
        """
        time_to_wait = 150
        time_counter = 0

        # Validate whether file has been downloaded
        while True:

            if time_counter > time_to_wait:
                raise FileNotFoundError(
                    "Necessary files not found in directory (20 seconds timeout reached)!"
                )
            # List all the files in the : 'current_working_dir'/download
            file_names = os.listdir(os.getcwd() + "/download")
            if len(file_names) == i:
                return True

            time.sleep(1)
            time_counter += 1

    def setup(self, headless=False, date_input=None):

        # Validate date variable
        if date_input is not None:
            self.date = datetime.strptime(date_input, "%d.%m.%Y").date()

        # Create directory if fetched folder is not available
        if not os.path.exists(self.__download_dir):
            os.mkdir(self.__download_dir)

        # Set headless option and firefox profile
        options = Options()
        options.headless = headless

        fp = webdriver.FirefoxProfile()
        fp.set_preference("browser.download.folderList", 2)
        fp.set_preference("browser.download.manager.showWhenStarting", False)
        fp.set_preference("browser.download.dir", self.__download_dir)
        fp.set_preference(
            "browser.helperApps.neverAsk.saveToDisk",
            "text/plain, application/vnd.ms-excel, text/csv, text/comma-separated-values, "
            "application/octet-stream")

        # Initialize Firefox() object to navigate
        self.driver = Firefox(firefox_profile=fp, options=options)

        return self

    def set_download_url(self):
        """
        Sets class url attributes if date is given
        """

        if self.date is None:
            raise ValueError("Attribute 'date' needs to be defined")

        self.__day_ahead_price_url = "https://transparency.entsoe.eu/transmission-domain/r2/dayAheadPrices/export?" \
                                     "name=&defaultValue=false&viewType=TABLE&areaType=BZN&atch=false&dateTime.dateTime=" + self.date.strftime(
            "%d.%m.%Y") + "+00%3A00%7CCET%7CDAY&biddingZone.values=CTY%7C10Y1001A1001A83F!BZN%7C10Y1001A1001A82H&dateTime.timezone=CET_CEST&dateTime" \
                          ".timezone_input=CET+(UTC%2B1)+%2F+CEST+(UTC%2B2)&dataItem=ALL&timeRange=YEAR&exportType=CSV"

        self.__generation_per_product_url = "https://transparency.entsoe.eu/generation/r2/actualGenerationPerProductionType/export?" \
                                            "name=&defaultValue=false&viewType=TABLE&areaType=BZN&atch=false&datepicker-day-offset-select-dv-date-from_input=D&dateTime.dateTime="+self.date.strftime(
            "%d.%m.%Y")+"+00%3A00%7CCET%7CDAYTIMERANGE&dateTime.endDateTime="+self.date.strftime(
            "%d.%m.%Y")+"+00%3A00%7CCET%7CDAYTIMERANGE&area.values=CTY%7C10Y1001A1001A83F!BZN%7C10Y1001A1001A82H&productionType.values=B01&productionType." \
                        "values=B02&productionType.values=B03&productionType.values=B04&productionType.values=B05&productionType.values=B06&productionType." \
                        "values=B07&productionType.values=B08&productionType.values=B09&productionType.values=B10&productionType.values=B11&productionType." \
                        "values=B12&productionType.values=B13&productionType.values=B14&productionType.values=B20&productionType.values=B15&productionType." \
                        "values=B16&productionType.values=B17&productionType.values=B18&productionType.values=B19&dateTime.timezone=CET_CEST&dateTime." \
                        "timezone_input=CET+(UTC%2B1)+%2F+CEST+(UTC%2B2)&dataItem=ALL&timeRange=YEAR&exportType=CSV"

        self.__actual_total_load_url = "https://transparency.entsoe.eu/load-domain/r2/totalLoadR2/export?name=&defaultValue=false&viewType=TABLE&areaType=BZN&atch=false&dateTime" \
                                       ".dateTime="+self.date.strftime(
            "%d.%m.%Y")+"+00%3A00%7CCET%7CDAY&biddingZone.values=CTY%7C10Y1001A1001A83F!BZN%7C10Y1001A1001A82H&dateTime.timezone=CET_CEST&dateTime" \
                        ".timezone_input=CET+(UTC%2B1)+%2F+CEST+(UTC%2B2)&dataItem=ALL&timeRange=YEAR&exportType=CSV"


        self.__generation_forecasted_url = "https://transparency.entsoe.eu/generation/r2/dayAheadGenerationForecastWindAndSolar/export?" \
                                           "name=&defaultValue=false&viewType=TABLE&areaType=BZN&atch=false&dateTime.dateTime="+self.date.strftime(
            "%d.%m.%Y") +"+00%3A00%7CCET%7CDAYTIMERANGE&dateTime" \
                                           ".endDateTime="+ self.date.strftime(
            "%d.%m.%Y") + "+00%3A00%7CCET%7CDAYTIMERANGE&area.values=CTY%7C10Y1001A1001A83F!BZN%7C10Y1001A1001A82H&productionType" \
                                           ".values=B16&productionType.values=B18&productionType.values=B19&processType.values=A18&processType.values=A01&processType" \
                                           ".values=A40&dateTime.timezone=CET_CEST&dateTime.timezone_input=CET+(UTC%2B1)+%2F+CEST+(UTC%2B2)&dataItem=ALL&timeRange=YEAR&exportType=CSV"

    def login_and_download(self):
        """
        Executes a sequence of tasks to log into ENTSOE and downloads the specified files
        """

        if self.__username == "" or self.__password == "":
            raise ValueError(
                "Pleaser set the credentials for ENTSOE to download")

        # Remove "download" directory to and create new one
        if len(os.listdir(os.getcwd() + "/download")) > 0:
            shutil.rmtree(self.__download_dir)
            os.mkdir(self.__download_dir)

        # Instantiate the download urls and request the browser for the homepage login url
        self.set_download_url()
        self.driver.get(self.__login_url)

        # Find login form elements and insert ENTSOE account credentials
        self.driver.find_element_by_id("email").send_keys(self.__username)
        self.driver.find_element_by_id("password").send_keys(self.__password)

        # Trigger click event to sign in
        self.driver.find_element_by_xpath(
            "//div[@class='submit-line']/div/input").click()

        # Wait until login is completed and the "user-panel-drop-down" element is agreed
        WebDriverWait(self.driver, 10).until(
            EC.presence_of_element_located((By.ID, "close-button")))

        # Agree to the terms and conditions
        loader_div = self.driver.find_element_by_xpath("//*[@id='loading']")
        self.driver.execute_script(
            "arguments[0].setAttribute('style','visibility:hidden;');",
            loader_div)

        self.driver.find_element_by_id("close-button").click()

        # Open download url in new tab
        urls = [
            self.__generation_per_product_url, self.__day_ahead_price_url,
            self.__actual_total_load_url, self.__generation_forecasted_url
        ]

        for url, index in zip(urls, range(len(urls))):
            if self.check_download(index):
                print("Opening url: ",
                      re.search("/r2/(.*)/export", url).group(1))
                self.driver.execute_script("window.open('" + url + "');")

        # Download CSV file and exit the browser
        time_counter = 0
        time_to_wait = 150  # the last CSV file which is the one holding forecast information on solar, wind and total
        # takes at least 40 seconds to download

        while len(os.listdir(os.path.join(os.getcwd(), "download"))) is not 4:
            if time_counter > time_to_wait:
                raise FileNotFoundError(
                    "Necessary files not found in directory"
                    " (150 seconds timeout reached)!")
            time.sleep(1)
            time_counter += 1

        self.driver.quit()
Пример #5
0
import cv2
import boto3
from cv2 import imwrite
from cv2 import addWeighted
from PIL import Image, ImageFilter
import cv2

from scipy.ndimage.filters import median_filter

options = Options()
options.headless = False

browser = Firefox(options=options)
browser.get('https://parivahan.gov.in/rcdlstatus/')

browser.execute_script("document.body.style.zoom='10%'")
elem = browser.find_element_by_id("form_rcdl:j_idt36:j_idt41")
loc = elem.location
size = elem.size
x = int(loc['x'])
y = int(loc['y'])
w = int(size['width'])
h = int(size['height'])
# print(x,y,w,h)
browser.save_screenshot("snap.png")
img = cv2.imread("snap.png")
crop_img = img[y:y + h, x:x + w]
imwrite("Image.png", crop_img)

# print(pytesseract.image_to_string("Image.png"))
Пример #6
0
    return results


#%% Solve an individual game currently on page
def solve_game(window):
    cards = organize_cards(window.find_elements_by_class_name('set-board-card'))
    sets = evaluate_cards(cards)

    for s in sets:
        for c in s:
            c[0].click()
    sleep(1)


#%% Initialize Browser
browser = Firefox()

browser.get('https://www.nytimes.com/puzzles/set')
#Scroll away from banner ad
browser.execute_script("window.scrollTo(0, 400)")
sleep(1)


#%% Select each tab and solve the game.
for i in range(4):
    solve_game(browser)
    try:
        browser.find_element_by_class_name('pzm-modal__button').click()
    except:
        pass
Пример #7
0
class Sunny(object):
    def __init__(self, login, password):
        self.start_display()

        profile = webdriver.FirefoxProfile()
        profile.set_preference('browser.download.folderList', 2) # custom location
        profile.set_preference('browser.download.manager.showWhenStarting', False)
        profile.set_preference('browser.download.dir', current_dir)
        profile.set_preference('browser.helperApps.neverAsk.saveToDisk', "text/csv,application/vnd.ms-excel")
        #profile.set_preference('browser.helperApps.neverAsk.saveToDisk', "text/plain")

        self.driver = Firefox(profile)
        self.login(login, password)
        self._login = login
        self._password = password

    def start_display(self):
        self.display = Display(visible=0, size=(800, 600))
        self.display.start()

    def close(self):
        self.driver.close()
        self.display.stop()

    def login(self, login=None, password=None):
        """Login on the Sunny portal website using the credentials

        Parameters
        ----------

        login: str
            The login credential to sunnyportal

        password: str
            The password credential of sunnyportal
        """
        if not login:
            login = self._login
            password = self._password

        self.driver.get("https://www.sunnyportal.com/Templates/Start.aspx?ReturnUrl=%2f")
        self.driver.find_element_by_id("txtUserName").clear()
        self.driver.find_element_by_id("txtUserName").send_keys(login)
        self.driver.find_element_by_id("txtPassword").clear()
        self.driver.find_element_by_id("txtPassword").send_keys(password)
        self.driver.find_element_by_id("ctl00_ContentPlaceHolder1_Logincontrol1_LoginBtn").click()
        #time.sleep(0.5)

    def wait_n_get(self, element_type, value):
        """ Wait for an element to be present and get it

        Paramters
        ---------
        element_type: By.ID | By.LINK_TEXT...
            The type of value to identify the element to get
        value: str
            the value describing the element to get

        Returns
        -------
        el: element
            The driver element requested
        """
        return WebDriverWait(self.driver, TIME_DELAY).until(EC.presence_of_element_located((element_type, value)))

    def goto(self, n_house):
        """Go to the page of an house given it's number, from the plant list page

        Parameters
        ----------

        n_house: int
            The number of the house to go to
        """
        el = self.wait_n_get(By.LINK_TEXT, houses[n_house])
        el.click()


    def goto_2(self, n_house):
        """Go to a house from the plant pannel on the Dashboard page

        Parameters
        ----------

        n_house: int
            The number of the house to go to

        """
        self.wait_n_get(By.CLASS_NAME, 'plantselect').click()
        self.wait_n_get(By.LINK_TEXT, houses[n_house]).click()


    def hover_over(self, id):
        """Hover over an element of the page given its id

        Parameter
        ---------

        id: str
            The id of the element to hover over
        """
        el = self.wait_n_get(By.ID, id)
        hover = ActionChains(self.driver).move_to_element(el)
        hover.perform()

    def click(self, id):
        """Click on an element of the page given its id

        Parameter
        ---------

        id: str
            The id of the element to click on
        """
        el = self.wait_n_get(By.ID, id)
        el.click()

    def select_date(self, day, month, year):
        id_date =    'ctl00_ContentPlaceHolder1_UserControlShowDashboard1_UserControlShowEnergyAndPower1__datePicker_textBox'
        id_before =  'ctl00_ContentPlaceHolder1_UserControlShowDashboard1_UserControlShowEnergyAndPower1_btn_prev'
        id_after =   'ctl00_ContentPlaceHolder1_UserControlShowDashboard1_UserControlShowEnergyAndPower1_btn_next'
        try:
            el = self.wait_n_get(By.ID, id_date)
            self.driver.execute_script('$("#%s").val("%d/%d/%d")'%(id_date, month, day, year))
            sleep(0.2)
            self.click(id_before)
            sleep(0.2)
            self.click(id_after)
            sleep(0.2)
        except Exception as e:
            if "Element is not clickable at point" in str(e):
                print(e)
                print('trying again!')
                self.select_date(day, month, year)



    def download(self, day=None, month=None, year=None):
        """Download the CSV file
        """
        # Make sure we see the "Day" pannel
        tabactive = self.wait_n_get(By.CLASS_NAME, 'tabactive')
        if not tabactive.text == 'Day':
            self.click(id_day)

        # Select the right day
        if day:
            self.select_date(day, month, year)

        # Hover over the download button
        try:
            self.hover_over(id_hover)
            self.click(id_click)
        except Exception as e_1:
            # Check if the data is available for that day by looking for the info bubble
            try:
                el = self.wait_n_get(By.ID, id_info)
                if 'info.png' in el.get_attribute('src'):
                    print('no data available for this day')
                    return None
                else:
                    # Not sure what just happen there
                    raise(e_1)
            except Exception as e_2:
                if 'Unable to locate element' in str(e_2):
                    # The info icon isn't available
                    print(e_2)
                    raise(e_1)
                else:
                    # Not sure what just happen there
                    print(e_1)
                    print(e_2)
                    #raise (e1, e2)

        # Download the data for the day
        res = self.driver.request('GET', url_data_graph)
        if res.status_code == 200:
            print('sucess')
        else:
            raise Exception('Error:', res.text)
        return res


    def download_house(self, n, day=None, month=None, year=None):
        """ Download the house power production of the day
        Parameters
        ----------
        driver: WebDriver
            The WebDriver instance to action

        n_house: int
            The number of the house to go to

        Return
        ------
        df: pandas.DataFrame | None
            A dataframe containing the house day power production, or None if there isn't any data available
        """

        try:
            # Check what is the starting point
            if 'Start.aspx' in self.driver.current_url:
                # We are on the login screen, we first need to login
                print('-- login in main screen')
                self.login()
                print('-- accessing house', n)
                self.goto(n)
            elif 'sunnyportal.com/Plants' in self.driver.current_url:
                # We are on the plant list, lets
                self.goto(n)
            elif 'sunnyportal.com/FixedPages/Dashboard.aspx' in self.driver.current_url:
                # We are on a dashboard, so we should be able to click on the left hand pannel to go to the new house
                self.goto_2(n)
            else:
                # No idea where we are
                raise Exception('I dont know where we are:', self.driver.current_url)
            print('-- downloading house', n, 'power data')
            res = self.download(day, month, year)
            self.date = self.wait_n_get(By.ID, id_date).get_attribute('value')
            if day:
                if not self.date == "%d/%d/%d"%(month, day, year):
                    print('Error the date wasnt fixed correctly: '+self.date)

            if res:
                # There seems to be a positive response, so let's put it in a pandas dataframe
                df = pd.read_csv(StringIO(res.text), sep=';', names=['power', 'avg'], skiprows=1)
                print('-- download sucessful')
                return df
            else:
                print('-- download failed')
                # No response, we return a None object
                return res

        except Exception as e_1:
            # Something whent wrong
            try:
                # Check if sunny portal has banned us for some time
                text = self.wait_n_get(By.ID, 'ctl00_ContentPlaceHolder1_Logincontrol1_DivLogin').text
                if 'Login failed! Login will be blocked for' in text:
                    # It does seem like we have been banned for some time
                    print(text)
                    n_sec = int(text.split('for')[1].split(' seconds')[0])
                    print('going to sleep for %d sec'%(n_sec))
                    time.sleep(n_sec)
                    print('retrying this house')
                    return self.download_house(n, day, month, year)
            except Exception as e_2:
                # I don't know what went wrong
                print(e_1)
                print(e_2)
                raise(e_1)

    def img(self):
        """A simple screenshot function to show on the notebook"""
        return Image(self.driver.get_screenshot_as_png())

    def download_all(self, day=None, month=None, year=None):
        df_dict = {}
        for k, v in houses.items():
            print(k)
            df = self.download_house(k, day, month, year)
            if isinstance(df, pd.DataFrame):
                df_dict['House %d'%(k)] = df
        # Save the data into a DataFrame
        self.data = pd.DataFrame({k:v.power for k, v in df_dict.items() if isinstance(v, pd.DataFrame)}, index=df.index)

        # Save the data into a file
        m,d,y = self.date.split('/')
        self.data.to_csv('svalin_%s_%s_%s.csv'%(d,m,y))
        return self.data