示例#1
0
    def _execute(self):
        # Meterwatch immediately spawns a popup when loaded which is the actual
        # window we want. So we have to go and grab the main window handle and
        # THEN go looking for the popup window and switch to it.
        handles_before = self._driver.window_handles
        timeline = Timeline(self.start_date, self.end_date)
        main_window = _get_main_window(self._driver)
        login_window = None

        log.info(f"Navigating to {self.url}")
        self._driver.get(self.url)
        self.screenshot("initial_url")

        log.debug("Driver title: " + self._driver.title)
        assert "Seattle MeterWatch" in self._driver.title

        login_page = LoginPage(self._driver)
        meterdata_page = MeterDataPage(self._driver, self._configuration)

        login_page.login(self.username, self.password)
        self._driver.wait().until(
            lambda driver: len(handles_before) != len(driver.window_handles),
            "Issues loading login page.",
        )

        for handle in self._driver.window_handles:
            if handle != main_window:
                login_window = handle

        # We have our popup, so lets do stuff with it.
        log.info("switching to new window")
        self._driver.switch_to.window(login_window)
        # resize: it opens as a tiny window
        self._driver.set_window_size(1200, 800)

        for meter_number in self._configuration.meter_numbers:
            meterdata_page.select_account(meter_number)
            self.start_date, self.end_date = meterdata_page.adjust_start_and_end_dates(
                self.start_date, self.end_date)
            # Widen timeline if necessary after dates may have been adjusted from original.
            timeline.extend_timeline(self.start_date, self.end_date)
            date_range = DateRange(self.start_date, self.end_date)
            interval_size = relativedelta(days=MAX_DOWNLOAD_DAYS)

            for sub_range in date_range.split_iter(delta=interval_size):
                meterdata_page.enter_dates(sub_range.start_date,
                                           sub_range.end_date)
                csv_file_path = meterdata_page.download_data(meter_number)

                log.info(
                    f"parsing kWh usage from downloaded data for {meter_number}"
                )

                self._process_csv(csv_file_path, timeline)

        return Results(readings=timeline.serialize(include_empty=False))
示例#2
0
    def _execute(self):
        # Direct driver to site url -
        # Currently a public URL, no credentials needed. Will have to be
        # refactored in the future if we start scraping private sites.
        self._driver.get(self.site_url)

        # Create page helpers
        overview_page = OverviewPage(self._driver)
        site_analytics_page = SiteAnalyticsPage(self._driver)
        date_picker_component = DatePickerSection(self._driver)

        # Navigate to site analytics tab
        overview_page.wait_until_ready()
        self.screenshot("before clicking on site analytics tab")
        overview_page.navigate_to_site_analytics()

        # Select inverter from both dropdowns
        site_analytics_page.wait_until_ready()
        self.screenshot("before selecting inverters")
        site_analytics_page.select_inverter_from_both_dropdowns(
            self.inverter_id)
        # Click on AC Power button
        self.screenshot("before clicking on ac power button")
        site_analytics_page.click_ac_power_button()
        self.screenshot("after clicking on ac power button")
        self.install_date = self.string_to_date(
            site_analytics_page.get_install_date())

        # Adjust start and end date, depending on inverter install date
        self.adjust_start_and_end_dates()

        date_range = DateRange(self.start_date, self.end_date)
        interval_size = relativedelta(days=MAX_INTERVAL_LENGTH)

        # Loop through desired interval in two day chunks to pull down
        # power generated
        for sub_range in date_range.split_iter(delta=interval_size):
            start = sub_range.start_date
            end = sub_range.end_date

            file_path = date_picker_component.complete_form_and_download(
                start, end)

            intermediate_readings = CSVParser(self.inverter_id,
                                              file_path).process_csv()
            self.readings.update(intermediate_readings)

            log.info("Cleaning up download.")
            clear_downloads(self._driver.download_dir)
            # Adding a large pause
            self._driver.sleep(5)

        return Results(readings=self.readings)
示例#3
0
    def energy_manager_basic_usage_action(
            self, page: sce_pages.SceEnergyManagerBasicUsagePage):
        sce_pages.detect_and_close_survey(self._driver)
        rval = page.select_service_id(self.service_id)
        log.info("Result of select service id %s: %s", self.service_id, rval)
        self.screenshot("select_service_id")
        page.configure_report()

        date_range = DateRange(self.start_date, self.end_date)
        # the website seems to time out when trying to get more than this amount of data
        interval_size = relativedelta(days=7)
        timeline = Timeline(self.start_date, self.end_date)

        for idx, subrange in enumerate(
                date_range.split_iter(delta=interval_size)):
            log.info("Requesting interval data for dates: %s", subrange)
            start = subrange.start_date
            end = subrange.end_date

            page.set_time_range(start, end)
            self.screenshot("set_time_range")

            try:
                page.generate_report()
                time.sleep(5)
                WebDriverWait(self._driver, 180).until(
                    EC.invisibility_of_element_located(
                        sce_pages.GenericBusyIndicatorLocator))
                self.screenshot(f"interval{idx}")
            except Exception as e:
                raise sce_errors.EnergyManagerReportException(
                    "Failed to load data from Energy Manager") from e

            try:
                page.raise_on_report_error()
            except sce_errors.EnergyManagerDataNotFoundException:
                log.info("No data found for this time range, continuing...")
                # If a given date range has no interval data, just move on to the next one
                continue

            log.info("Downloading the interval data report.")
            self.clear_csv_downloads()

            try:
                page.download_report()
            except Exception as e:
                raise sce_errors.EnergyManagerReportException(
                    "Failed to load data from Energy Manager") from e

            try:
                # Wait two minutes for the download to finish
                wait = WebDriverWait(self._driver, 120)
                csv_file_name = wait.until(
                    file_exists_in_dir(self._driver.download_dir, r".*\.csv"))
                csv_file_path = os.path.join(self._driver.download_dir,
                                             csv_file_name)
                for reading in parse_sce_csv_file(csv_file_path,
                                                  self.service_id):
                    timeline.insert(reading.dt, reading.value)
            except TimeoutException:
                raise TimeoutException(
                    "Downloading interval data from Energy Manager failed.")

        self.interval_data_timeline = timeline
示例#4
0
    def _execute(self):
        # Direct the driver to the login page
        self._driver.get(self.login_url)

        # Create page helpers
        login_page = LoginPage(self._driver)
        navigation = Navigation(self._driver)
        meter_selection_page = MeterSelectionPage(self._driver)
        export_csv_page = ExportCsvPage(self._driver)
        download_csv_page = DownloadCsvPage(self._driver)

        # Authenticate
        login_page.wait_until_ready()
        self.screenshot("before login")
        login_page.login(self.username, self.password)

        # Configure interval data generation, in two steps...
        meter_selection_page.wait_until_ready()
        self.screenshot("before meter selection")

        # 1) Specify we are entering a custom date range
        meter_selection_page.select_date_range_option()
        self.screenshot("date range option selected")

        # 2) Locate the meter of interest and select it
        matching_meter = None
        meter_query = self.ngrid_meter_id
        log.info("Looking for a meter with ID == {0}".format(meter_query))
        for meter in meter_selection_page.iter_meters():
            log.info("Found a meter: {0}".format(meter))
            if meter.meter_id == meter_query:
                log.info("Found a matching meter.")
                matching_meter = meter
                break

        if matching_meter is None:
            log.info("No meter with ID {0} was found.".format(meter_query))
            raise InvalidMeterException(
                "Meter {0} was not found".format(meter_query))
        else:
            matching_meter.select()
            self.screenshot("meter selected")

        # Two notes on time...
        # 1) Each meter specifies the date range for which data is
        #    available. If we don't respect this, the page will throw
        #    errors. So, we restrict our start and end dates based on
        #    this information.
        if self.start_date < matching_meter.min_date:
            log.info("Adjusting start date from {0} to {1}".format(
                self.start_date, matching_meter.min_date))
            self.start_date = matching_meter.min_date
        if self.end_date > matching_meter.max_date:
            log.info("Adjusting end date from {0} to {1}".format(
                self.end_date, matching_meter.max_date))
            self.end_date = matching_meter.max_date

        # 2) Only a limited amount of data can be extracted at a time.
        #    The page enforces this by restricting the number of days
        #    for which you can download data. Therefore, we pull down
        #    data in 180-day chunks. The actual restriction is a little
        #    hard to pin down, since it varies based on some nontransparent
        #    factors. 180 though is a very conservative estimate.
        date_range = DateRange(self.start_date, self.end_date)
        interval_size = relativedelta(days=180)
        readings = {}  # Maps dates to interval data, populated below
        for subrange in date_range.split_iter(delta=interval_size):
            log.info("Gathering meter data for: {0}".format(subrange))

            # First, set the date range for the selected meter
            meter_selection_page.set_date_range(subrange)

            # Navigate to the "Export" page, and request a CSV report
            navigation.goto_export()
            export_csv_page.wait_until_ready()
            export_csv_page.generate_report()

            # Wait for the report to generate, then download it
            # and extract interval data from it
            download_csv_page.wait_until_ready()
            csv_rows_iter = download_csv_page.get_csv_rows()
            header = next(csv_rows_iter)
            log.info("CSV Header row: {0}".format(header))

            for data_row in csv_rows_iter:
                result = NationalGridIntervalScraper.parse_csv_row(data_row)
                if result.units == UNITS_KWH:
                    readings[self._iso_str(result.date)] = list(
                        NationalGridIntervalScraper.kwh_to_kw(
                            result.interval_data))

            # Navigate back to the meter selection page in preparation
            # for the next iteration. Note that we do not reselect the
            # meter, since our initial selections are cached.
            navigation.goto_meter_selection()
            meter_selection_page.wait_until_ready()

        return Results(readings=readings)
示例#5
0
    def _execute(self):
        # Direct the driver to the login page
        self._driver.get(self.login_url)
        # Create page helpers
        download_page = DownloadPage(self._driver)
        meter_page = MeterPage(self._driver)
        search_result = MeterSearchResult(self._driver)
        available_dates = AvailableDateComponent(self._driver)
        interval_form = IntervalForm(self._driver)

        self.login_to_mvweb()

        # Navigate to Download Page
        # Pause to let the IFrame to settle down
        time.sleep(5)

        download_page.wait_until_ready(selector=self.download_link_selector)
        self.screenshot("before clicking on download link")
        download_page.navigate_to_download_page(self.download_link_selector)
        time.sleep(10)

        # Enter MeterId in the search box
        meter_page.wait_until_ready(meter_page.MeterSearchInput)
        self.screenshot("before searching for meter")
        meter_page.search_by_meter_id(self.meter_id)
        time.sleep(10)

        # When search results have settled down, click on first meter result.
        # If meter isn't found, throw an error.
        search_result.wait_until_text_visible(
            search_result.SearchResult,
            self.meter_id,
            error_selector=search_result.NoResultsFound,
            alt_text="No matching records found",
            error_cls=MeterNotFoundException,
            error_msg="No matching records found for Meter ID {}".format(self.meter_id),
        )
        self.screenshot("before clicking on meter result")
        search_result.click_on_meter_result()

        # Adjust start and end dates if supplied start and end are out of range
        adjusted_start, adjusted_end = available_dates.adjust_start_and_end_dates(
            self.start_date, self.end_date
        )

        date_range = DateRange(adjusted_start, adjusted_end)
        interval_size = relativedelta(days=MAX_INTERVAL_LENGTH)

        timeline = Timeline(adjusted_start, adjusted_end, self._configuration.interval)
        # Breaks the date range into small, manageable chunks and downloads a csv
        # of demands for each one.
        for sub_range in date_range.split_iter(delta=interval_size):
            log.info("Getting interval data for date range: {}".format(sub_range))
            start = sub_range.start_date
            end = sub_range.end_date

            # Fill out interval form and click save to download data
            interval_form.fill_out_interval_form_and_download(start, end)
            file_path = self.download_file("csv")

            # Extract intermediate info from csv
            self._process_csv(file_path, timeline)

        return Results(readings=timeline.serialize(include_empty=False))
示例#6
0
    def _execute_internal(self):
        # Direct the driver to the login page
        self._driver.get(self.login_url)

        # Create page helpers
        login_page = LoginPage(self._driver)
        home_page = HomePage(self._driver)
        usage_page = UsagePage(self._driver)

        # Authenticate
        log.info("Logging in.")
        login_page.wait_until_ready()
        self.screenshot("before login")
        # login seems to sometimes fail; try twice
        try:
            login_page.login(self.username, self.password, self)
        except LoginError:
            log.info("login failed; trying login a second time in 30s")
            self._driver.get(self.login_url)
            self._driver.sleep(30)
            self.screenshot("before second login")
            login_page.login(self.username, self.password, self)
        self.screenshot("after login")

        # On the homepage, fetch the visible account information. This info
        # tells us (among other things) which account id is associated with
        # which account name.
        log.info("Waiting for home page to be ready.")
        home_page.wait_until_ready()
        self.screenshot("home page loaded")

        # Go to the 'Usage' Page
        log.info("Navigating to 'Usage' page.")
        usage_page.navigate_to_usage_page()
        usage_page.wait_until_ready()
        self.screenshot("usage_page_initial")

        if usage_page.is_enterprise():
            log.info("Enterprise: selecting account: {0}".format(
                self.account_id))
            if not usage_page.select_account_from_portfolio(self.account_id):
                error_msg = f"Unable to find account with ID={self.account_id}"
                log.info(error_msg)
                raise InvalidAccountException(error_msg)
            self.screenshot("usage_account_selected")

        else:
            log.info("Selecting account: {0}".format(self.account_id))
            if not usage_page.select_account(self.account_id):
                available_accounts = set(usage_page.get_available_accounts())
                error_msg = "Unable to find account with ID={0}. Available accounts are: {1}".format(
                    self.account_id, available_accounts)
                log.info(error_msg)
                raise InvalidAccountException(error_msg)
            self.screenshot("usage_account_selected")

        # Select the desired meter
        log.info("Selecting meter with id: {0}".format(self.service_id))
        if not usage_page.select_meter(self.service_id):
            available_meters = set(usage_page.get_available_meters())
            error_msg = (
                "Unable to find meter with ID={0}. Available meters are: {1}".
                format(self.service_id, available_meters))
            raise InvalidMeterException(error_msg)
        self.screenshot("selected meter")

        date_range = DateRange(self.start_date, self.end_date)
        timeline = Timeline(self.start_date,
                            self.end_date,
                            interval=self._configuration.interval)

        excel_download = True
        if usage_page.is_enterprise():
            usage_page.enterprise_select_usage(self._configuration.interval)
        else:
            usage_page.select_usage(self._configuration.interval)
            excel_download = usage_page.excel_download_available()

        # use the same JavaScript download for both regular and enterprise
        if excel_download:
            if self._configuration.interval == 1440:
                for subrange in date_range.split_iter(delta=relativedelta(
                        days=7)):
                    usage_page.javascript_download(
                        subrange.start_date,
                        subrange.end_date,
                        self._configuration.interval,
                    )
            else:
                dt = self.start_date
                while dt < self.end_date:
                    usage_page.javascript_download(
                        dt, dt, self._configuration.interval)
                    dt += timedelta(days=1)
            for filename in glob(f"{self._driver.download_dir}/*.xlsx"):
                parse_xlsx(timeline, filename, self.adjustment_factor)
        else:
            log.info("starting GreenButton download")
            usage_page.open_green_button()
            self.screenshot("opened green button")
            """
            This page only allows you to download a certain amount of billing data at a time. We will use a
            conservative chunk size of 90 days.
            """
            interval_size = relativedelta(days=90)
            for subrange in date_range.split_iter(delta=interval_size):
                self.get_green_button(usage_page, timeline,
                                      subrange.start_date, subrange.end_date)

        return Results(readings=timeline.serialize())
示例#7
0
    def reports_page_action(
            self, reports_page: saltriver_pages.SaltRiverReportsPage):
        log.info("goto_meter_profiles")
        reports_page.goto_meter_profiles()
        meter_page = saltriver_pages.MeterProfilesPage(self._driver)
        WebDriverWait(self._driver, 30).until(page_is_ready(meter_page))
        self.screenshot("meter profiles")

        log.info("get meters")
        meters = meter_page.get_meters()
        meter, channel = self.find_matching_meter_and_channel(
            meters, self.meter_id, self.channel_id)
        self.screenshot("meter and channel")

        log.info("goto reports")
        meter_page.goto_reports()
        WebDriverWait(self._driver, 30).until(page_is_ready(reports_page))
        time.sleep(10)
        log.info("looking for interval download")
        reports_page.goto_interval_download()
        interval_download_page = saltriver_pages.IntervalDownloadPage(
            self._driver)
        WebDriverWait(self._driver,
                      30).until(page_is_ready(interval_download_page))
        self.screenshot("interval download")
        log.info("interval download page is ready")
        interval_download_page.basic_configuration()
        interval_download_page.select_meter_by_id(meter.meter_id)

        start = self.start_date
        end = self.end_date

        # Snap the scraper start date to the data start date for the selected meter/channel.
        if start < channel.data_start:
            start = channel.data_start

        if start > end:
            raise InvalidDateRangeError(
                "The start date must be before the end date (start='{}', end='{}')"
                .format(start, end))

        # Pull out data 30 days at a time
        date_range = DateRange(start, end)
        interval_size = relativedelta(days=30)
        timeline = Timeline(start, end)
        for sub_range in date_range.split_iter(delta=interval_size):
            log.info("downloading %s", sub_range)
            self.clear_csv_downloads()
            interval_download_page.set_date_range(sub_range.start_date,
                                                  sub_range.end_date)
            interval_download_page.download_interval_data()
            self.screenshot("download %s" %
                            sub_range.end_date.strftime("%Y%m%d"))
            try:
                wait = WebDriverWait(self._driver, 180)
                csv_file_name = wait.until(
                    file_exists_in_dir(self._driver.download_dir, r".*\.csv"))
                csv_file_path = os.path.join(self._driver.download_dir,
                                             csv_file_name)
                for (when, reading) in parse_spatia_interval_csv(
                        csv_file_path, channel.id):
                    # The CSV file reports readings at the end of each fifteen minute interval. So the first reading
                    # of the day occurs at 00:15. and the last at midnight. We want to report the readings at the
                    # _start_ of each interval, thus we subtract 15 minutes here.
                    when = when - timedelta(minutes=15)
                    timeline.insert(when, reading)
            except TimeoutException:
                raise TimeoutException(
                    "Downloading interval data from SPATIA failed.")

        self.interval_data_timeline = timeline
示例#8
0
    def _execute(self):
        # Create page helpers
        if self.log_in:
            self.login()

        config_page = ConfigurationPage(self._driver)
        navigation = Navigation(self._driver)
        export_csv_page = ExportCsvPage(self._driver)
        download_csv_page = DownloadCsvPage(self._driver)

        # Configure interval data generation, in two steps...
        config_page.wait_until_ready()
        config_page.select_all_accounts()
        self.screenshot("accounts selected")

        matching_meter = None
        meter_query = self.epo_meter_id
        _log("Looking for a meter with ID == {0}".format(meter_query))
        for meter in config_page.iter_meters():
            _log("Found a meter: {0}".format(meter))
            if meter.meter_id == meter_query:
                _log("Found a matching meter.")
                matching_meter = meter
                break

        if matching_meter is None:
            _log("No meter with ID {0} was found.".format(meter_query))
            raise InvalidMeterException(
                "Meter {0} was not found".format(meter_query))
        else:
            matching_meter.select()
            self.screenshot("meter selected")

        # Two notes on time...
        # 1) Each meter specifies the date range for which data is
        #    available. If we don't respect this, the page will throw
        #    errors. We restrict our start and end dates based on
        #    this information.
        if self.start_date < matching_meter.min_date:
            _log("Adjusting start date from {0} to {1}".format(
                self.start_date, matching_meter.min_date))
            self.start_date = matching_meter.min_date
        if self.end_date > matching_meter.max_date:
            _log("Adjusting end date from {0} to {1}".format(
                self.end_date, matching_meter.max_date))
            self.end_date = matching_meter.max_date

        # 2) Only a limited amount of data can be extracted at a time.
        #    The page enforces this by restricting the number of days
        #    for which you can download data. Therefore, we pull down
        #    data in 180-day chunks. The actual restriction is a little
        #    hard to pin down, since it varies based on some nontransparent
        #    factors. 180 though is a very conservative estimate.
        date_range = DateRange(self.start_date, self.end_date)
        interval_size = relativedelta(days=180)
        readings = {}  # Maps dates to interval data, populated below
        for interval in date_range.split_iter(delta=interval_size):
            _log("Gathering meter data for: {0}".format(interval))

            # First, set the date range for the selected meter
            config_page.set_date_range(interval)
            self.screenshot("date range set {} to {}".format(
                interval.start_date.isoformat(),
                interval.end_date.isoformat()))

            # Navigate to the "Export" page, and request a CSV report
            navigation.goto_export()
            export_csv_page.wait_until_ready()
            export_csv_page.generate_report()

            # Wait for the report to generate, then download it
            # and extract interval data from it
            download_csv_page.wait_until_ready()
            csv_rows_iter = download_csv_page.get_csv_rows()
            header = next(csv_rows_iter)
            _log("CSV Header row: {0}".format(header))

            csv_kw_rows = self._get_csv_kw_rows(csv_rows_iter)
            if self._is_multichannel(csv_kw_rows) and not self.channel_id:
                raise MissingChannelIdException(
                    "Missing channel ID for multichannel meter")

            # The intervals coefficient is a multiplier for the interval data.
            intervals_coeff = 4  # For 15-minute intervals
            if (self._get_interval_data_length(csv_kw_rows) ==
                    self.READINGS_NUM_30_MIN_INT):
                intervals_coeff = 2  # For 30-minute intervals

            for data_row in csv_kw_rows:
                # Add usage rows with the requested channel.
                # If no channel_id was passed in, add all usage rows.
                if not self.channel_id or self.channel_id == data_row.channel_id:
                    readings[data_row.date.strftime("%Y-%m-%d")] = list(
                        self._kwh_to_kw(data_row.interval_data,
                                        intervals_coeff))

            # Navigate back to the meter selection page in preparation
            # for the next iteration. Note that we do not reselect the
            # meter, since our initial selections are cached.
            navigation.goto_meter_selection()
            config_page.wait_until_ready()

        return Results(readings=readings)
示例#9
0
    def _execute(self):
        date_range = DateRange(self.start_date, self.end_date)
        parser = PSEIntervalReportParser(self.start_date, self.end_date)

        login_page = LoginPage(self._driver)

        login_page.wait_until_ready()
        self.screenshot("before login")
        login_page.login(self.username, self.password)
        self.screenshot("after login")

        menu_page = MainMenuPage(self._driver)
        menu_page.select_interval_report()

        self.screenshot("interval report")

        report_page = IntervalReportPage(self._driver, self.service_id,
                                         self.site_name)
        report_page.configure_meter_target()

        self.screenshot("meter target configured")

        step = relativedelta(months=1)
        for window in date_range.split_iter(step):
            # PSE has a known failure for some meters between 2017-11-05 and 2017-12-05.
            # We skip this month for now.
            if self._configuration.service_id in self.BadMeters:
                if window.start_date <= date(2017, 11, 5):
                    window.start_date = min(window.start_date,
                                            date(2017, 11, 4))
                    window.end_date = min(window.end_date, date(2017, 11, 4))
                elif window.end_date >= date(2017, 12, 5):
                    window.start_date = max(window.start_date,
                                            date(2017, 12, 6))
                    window.end_date = max(window.end_date, date(2017, 12, 6))
                else:
                    # Window is entirely inside the bad time region, just skip.
                    continue

                if window.start_date == window.end_date:
                    continue

            log.info("Downloading data for period %s - %s." %
                     (window.start_date, window.end_date))
            report_page.select_report_window(window.start_date,
                                             window.end_date)
            try:
                report_page.download_report()
            except TimeoutException:
                msg = ("The scraper failed to download interval data for "
                       "the date range {} to {}. This may be due to an "
                       "issue with the PSE website.").format(
                           window.start_date, window.end_date)
                raise ReportGenerationError(msg)

            parser.save_report(window.start_date, window.end_date)

        results = parser.serialize()

        # Write the raw interval JSON into the scraper log for easy
        # reference.
        with open(os.path.join(config.WORKING_DIRECTORY, "interval_data.json"),
                  "w") as f:
            f.write(json.dumps(results, sort_keys=True, indent=4))

        log.info("results=%s", len(results))
        return Results(readings=results)