def _execute(self): # Meterwatch immediately spawns a popup when loaded which is the actual # window we want. So we have to go and grab the main window handle and # THEN go looking for the popup window and switch to it. handles_before = self._driver.window_handles timeline = Timeline(self.start_date, self.end_date) main_window = _get_main_window(self._driver) login_window = None log.info(f"Navigating to {self.url}") self._driver.get(self.url) self.screenshot("initial_url") log.debug("Driver title: " + self._driver.title) assert "Seattle MeterWatch" in self._driver.title login_page = LoginPage(self._driver) meterdata_page = MeterDataPage(self._driver, self._configuration) login_page.login(self.username, self.password) self._driver.wait().until( lambda driver: len(handles_before) != len(driver.window_handles), "Issues loading login page.", ) for handle in self._driver.window_handles: if handle != main_window: login_window = handle # We have our popup, so lets do stuff with it. log.info("switching to new window") self._driver.switch_to.window(login_window) # resize: it opens as a tiny window self._driver.set_window_size(1200, 800) for meter_number in self._configuration.meter_numbers: meterdata_page.select_account(meter_number) self.start_date, self.end_date = meterdata_page.adjust_start_and_end_dates( self.start_date, self.end_date) # Widen timeline if necessary after dates may have been adjusted from original. timeline.extend_timeline(self.start_date, self.end_date) date_range = DateRange(self.start_date, self.end_date) interval_size = relativedelta(days=MAX_DOWNLOAD_DAYS) for sub_range in date_range.split_iter(delta=interval_size): meterdata_page.enter_dates(sub_range.start_date, sub_range.end_date) csv_file_path = meterdata_page.download_data(meter_number) log.info( f"parsing kWh usage from downloaded data for {meter_number}" ) self._process_csv(csv_file_path, timeline) return Results(readings=timeline.serialize(include_empty=False))
def _execute(self): # Direct driver to site url - # Currently a public URL, no credentials needed. Will have to be # refactored in the future if we start scraping private sites. self._driver.get(self.site_url) # Create page helpers overview_page = OverviewPage(self._driver) site_analytics_page = SiteAnalyticsPage(self._driver) date_picker_component = DatePickerSection(self._driver) # Navigate to site analytics tab overview_page.wait_until_ready() self.screenshot("before clicking on site analytics tab") overview_page.navigate_to_site_analytics() # Select inverter from both dropdowns site_analytics_page.wait_until_ready() self.screenshot("before selecting inverters") site_analytics_page.select_inverter_from_both_dropdowns( self.inverter_id) # Click on AC Power button self.screenshot("before clicking on ac power button") site_analytics_page.click_ac_power_button() self.screenshot("after clicking on ac power button") self.install_date = self.string_to_date( site_analytics_page.get_install_date()) # Adjust start and end date, depending on inverter install date self.adjust_start_and_end_dates() date_range = DateRange(self.start_date, self.end_date) interval_size = relativedelta(days=MAX_INTERVAL_LENGTH) # Loop through desired interval in two day chunks to pull down # power generated for sub_range in date_range.split_iter(delta=interval_size): start = sub_range.start_date end = sub_range.end_date file_path = date_picker_component.complete_form_and_download( start, end) intermediate_readings = CSVParser(self.inverter_id, file_path).process_csv() self.readings.update(intermediate_readings) log.info("Cleaning up download.") clear_downloads(self._driver.download_dir) # Adding a large pause self._driver.sleep(5) return Results(readings=self.readings)
def energy_manager_basic_usage_action( self, page: sce_pages.SceEnergyManagerBasicUsagePage): sce_pages.detect_and_close_survey(self._driver) rval = page.select_service_id(self.service_id) log.info("Result of select service id %s: %s", self.service_id, rval) self.screenshot("select_service_id") page.configure_report() date_range = DateRange(self.start_date, self.end_date) # the website seems to time out when trying to get more than this amount of data interval_size = relativedelta(days=7) timeline = Timeline(self.start_date, self.end_date) for idx, subrange in enumerate( date_range.split_iter(delta=interval_size)): log.info("Requesting interval data for dates: %s", subrange) start = subrange.start_date end = subrange.end_date page.set_time_range(start, end) self.screenshot("set_time_range") try: page.generate_report() time.sleep(5) WebDriverWait(self._driver, 180).until( EC.invisibility_of_element_located( sce_pages.GenericBusyIndicatorLocator)) self.screenshot(f"interval{idx}") except Exception as e: raise sce_errors.EnergyManagerReportException( "Failed to load data from Energy Manager") from e try: page.raise_on_report_error() except sce_errors.EnergyManagerDataNotFoundException: log.info("No data found for this time range, continuing...") # If a given date range has no interval data, just move on to the next one continue log.info("Downloading the interval data report.") self.clear_csv_downloads() try: page.download_report() except Exception as e: raise sce_errors.EnergyManagerReportException( "Failed to load data from Energy Manager") from e try: # Wait two minutes for the download to finish wait = WebDriverWait(self._driver, 120) csv_file_name = wait.until( file_exists_in_dir(self._driver.download_dir, r".*\.csv")) csv_file_path = os.path.join(self._driver.download_dir, csv_file_name) for reading in parse_sce_csv_file(csv_file_path, self.service_id): timeline.insert(reading.dt, reading.value) except TimeoutException: raise TimeoutException( "Downloading interval data from Energy Manager failed.") self.interval_data_timeline = timeline
def _execute(self): # Direct the driver to the login page self._driver.get(self.login_url) # Create page helpers login_page = LoginPage(self._driver) navigation = Navigation(self._driver) meter_selection_page = MeterSelectionPage(self._driver) export_csv_page = ExportCsvPage(self._driver) download_csv_page = DownloadCsvPage(self._driver) # Authenticate login_page.wait_until_ready() self.screenshot("before login") login_page.login(self.username, self.password) # Configure interval data generation, in two steps... meter_selection_page.wait_until_ready() self.screenshot("before meter selection") # 1) Specify we are entering a custom date range meter_selection_page.select_date_range_option() self.screenshot("date range option selected") # 2) Locate the meter of interest and select it matching_meter = None meter_query = self.ngrid_meter_id log.info("Looking for a meter with ID == {0}".format(meter_query)) for meter in meter_selection_page.iter_meters(): log.info("Found a meter: {0}".format(meter)) if meter.meter_id == meter_query: log.info("Found a matching meter.") matching_meter = meter break if matching_meter is None: log.info("No meter with ID {0} was found.".format(meter_query)) raise InvalidMeterException( "Meter {0} was not found".format(meter_query)) else: matching_meter.select() self.screenshot("meter selected") # Two notes on time... # 1) Each meter specifies the date range for which data is # available. If we don't respect this, the page will throw # errors. So, we restrict our start and end dates based on # this information. if self.start_date < matching_meter.min_date: log.info("Adjusting start date from {0} to {1}".format( self.start_date, matching_meter.min_date)) self.start_date = matching_meter.min_date if self.end_date > matching_meter.max_date: log.info("Adjusting end date from {0} to {1}".format( self.end_date, matching_meter.max_date)) self.end_date = matching_meter.max_date # 2) Only a limited amount of data can be extracted at a time. # The page enforces this by restricting the number of days # for which you can download data. Therefore, we pull down # data in 180-day chunks. The actual restriction is a little # hard to pin down, since it varies based on some nontransparent # factors. 180 though is a very conservative estimate. date_range = DateRange(self.start_date, self.end_date) interval_size = relativedelta(days=180) readings = {} # Maps dates to interval data, populated below for subrange in date_range.split_iter(delta=interval_size): log.info("Gathering meter data for: {0}".format(subrange)) # First, set the date range for the selected meter meter_selection_page.set_date_range(subrange) # Navigate to the "Export" page, and request a CSV report navigation.goto_export() export_csv_page.wait_until_ready() export_csv_page.generate_report() # Wait for the report to generate, then download it # and extract interval data from it download_csv_page.wait_until_ready() csv_rows_iter = download_csv_page.get_csv_rows() header = next(csv_rows_iter) log.info("CSV Header row: {0}".format(header)) for data_row in csv_rows_iter: result = NationalGridIntervalScraper.parse_csv_row(data_row) if result.units == UNITS_KWH: readings[self._iso_str(result.date)] = list( NationalGridIntervalScraper.kwh_to_kw( result.interval_data)) # Navigate back to the meter selection page in preparation # for the next iteration. Note that we do not reselect the # meter, since our initial selections are cached. navigation.goto_meter_selection() meter_selection_page.wait_until_ready() return Results(readings=readings)
def _execute(self): # Direct the driver to the login page self._driver.get(self.login_url) # Create page helpers download_page = DownloadPage(self._driver) meter_page = MeterPage(self._driver) search_result = MeterSearchResult(self._driver) available_dates = AvailableDateComponent(self._driver) interval_form = IntervalForm(self._driver) self.login_to_mvweb() # Navigate to Download Page # Pause to let the IFrame to settle down time.sleep(5) download_page.wait_until_ready(selector=self.download_link_selector) self.screenshot("before clicking on download link") download_page.navigate_to_download_page(self.download_link_selector) time.sleep(10) # Enter MeterId in the search box meter_page.wait_until_ready(meter_page.MeterSearchInput) self.screenshot("before searching for meter") meter_page.search_by_meter_id(self.meter_id) time.sleep(10) # When search results have settled down, click on first meter result. # If meter isn't found, throw an error. search_result.wait_until_text_visible( search_result.SearchResult, self.meter_id, error_selector=search_result.NoResultsFound, alt_text="No matching records found", error_cls=MeterNotFoundException, error_msg="No matching records found for Meter ID {}".format(self.meter_id), ) self.screenshot("before clicking on meter result") search_result.click_on_meter_result() # Adjust start and end dates if supplied start and end are out of range adjusted_start, adjusted_end = available_dates.adjust_start_and_end_dates( self.start_date, self.end_date ) date_range = DateRange(adjusted_start, adjusted_end) interval_size = relativedelta(days=MAX_INTERVAL_LENGTH) timeline = Timeline(adjusted_start, adjusted_end, self._configuration.interval) # Breaks the date range into small, manageable chunks and downloads a csv # of demands for each one. for sub_range in date_range.split_iter(delta=interval_size): log.info("Getting interval data for date range: {}".format(sub_range)) start = sub_range.start_date end = sub_range.end_date # Fill out interval form and click save to download data interval_form.fill_out_interval_form_and_download(start, end) file_path = self.download_file("csv") # Extract intermediate info from csv self._process_csv(file_path, timeline) return Results(readings=timeline.serialize(include_empty=False))
def _execute_internal(self): # Direct the driver to the login page self._driver.get(self.login_url) # Create page helpers login_page = LoginPage(self._driver) home_page = HomePage(self._driver) usage_page = UsagePage(self._driver) # Authenticate log.info("Logging in.") login_page.wait_until_ready() self.screenshot("before login") # login seems to sometimes fail; try twice try: login_page.login(self.username, self.password, self) except LoginError: log.info("login failed; trying login a second time in 30s") self._driver.get(self.login_url) self._driver.sleep(30) self.screenshot("before second login") login_page.login(self.username, self.password, self) self.screenshot("after login") # On the homepage, fetch the visible account information. This info # tells us (among other things) which account id is associated with # which account name. log.info("Waiting for home page to be ready.") home_page.wait_until_ready() self.screenshot("home page loaded") # Go to the 'Usage' Page log.info("Navigating to 'Usage' page.") usage_page.navigate_to_usage_page() usage_page.wait_until_ready() self.screenshot("usage_page_initial") if usage_page.is_enterprise(): log.info("Enterprise: selecting account: {0}".format( self.account_id)) if not usage_page.select_account_from_portfolio(self.account_id): error_msg = f"Unable to find account with ID={self.account_id}" log.info(error_msg) raise InvalidAccountException(error_msg) self.screenshot("usage_account_selected") else: log.info("Selecting account: {0}".format(self.account_id)) if not usage_page.select_account(self.account_id): available_accounts = set(usage_page.get_available_accounts()) error_msg = "Unable to find account with ID={0}. Available accounts are: {1}".format( self.account_id, available_accounts) log.info(error_msg) raise InvalidAccountException(error_msg) self.screenshot("usage_account_selected") # Select the desired meter log.info("Selecting meter with id: {0}".format(self.service_id)) if not usage_page.select_meter(self.service_id): available_meters = set(usage_page.get_available_meters()) error_msg = ( "Unable to find meter with ID={0}. Available meters are: {1}". format(self.service_id, available_meters)) raise InvalidMeterException(error_msg) self.screenshot("selected meter") date_range = DateRange(self.start_date, self.end_date) timeline = Timeline(self.start_date, self.end_date, interval=self._configuration.interval) excel_download = True if usage_page.is_enterprise(): usage_page.enterprise_select_usage(self._configuration.interval) else: usage_page.select_usage(self._configuration.interval) excel_download = usage_page.excel_download_available() # use the same JavaScript download for both regular and enterprise if excel_download: if self._configuration.interval == 1440: for subrange in date_range.split_iter(delta=relativedelta( days=7)): usage_page.javascript_download( subrange.start_date, subrange.end_date, self._configuration.interval, ) else: dt = self.start_date while dt < self.end_date: usage_page.javascript_download( dt, dt, self._configuration.interval) dt += timedelta(days=1) for filename in glob(f"{self._driver.download_dir}/*.xlsx"): parse_xlsx(timeline, filename, self.adjustment_factor) else: log.info("starting GreenButton download") usage_page.open_green_button() self.screenshot("opened green button") """ This page only allows you to download a certain amount of billing data at a time. We will use a conservative chunk size of 90 days. """ interval_size = relativedelta(days=90) for subrange in date_range.split_iter(delta=interval_size): self.get_green_button(usage_page, timeline, subrange.start_date, subrange.end_date) return Results(readings=timeline.serialize())
def reports_page_action( self, reports_page: saltriver_pages.SaltRiverReportsPage): log.info("goto_meter_profiles") reports_page.goto_meter_profiles() meter_page = saltriver_pages.MeterProfilesPage(self._driver) WebDriverWait(self._driver, 30).until(page_is_ready(meter_page)) self.screenshot("meter profiles") log.info("get meters") meters = meter_page.get_meters() meter, channel = self.find_matching_meter_and_channel( meters, self.meter_id, self.channel_id) self.screenshot("meter and channel") log.info("goto reports") meter_page.goto_reports() WebDriverWait(self._driver, 30).until(page_is_ready(reports_page)) time.sleep(10) log.info("looking for interval download") reports_page.goto_interval_download() interval_download_page = saltriver_pages.IntervalDownloadPage( self._driver) WebDriverWait(self._driver, 30).until(page_is_ready(interval_download_page)) self.screenshot("interval download") log.info("interval download page is ready") interval_download_page.basic_configuration() interval_download_page.select_meter_by_id(meter.meter_id) start = self.start_date end = self.end_date # Snap the scraper start date to the data start date for the selected meter/channel. if start < channel.data_start: start = channel.data_start if start > end: raise InvalidDateRangeError( "The start date must be before the end date (start='{}', end='{}')" .format(start, end)) # Pull out data 30 days at a time date_range = DateRange(start, end) interval_size = relativedelta(days=30) timeline = Timeline(start, end) for sub_range in date_range.split_iter(delta=interval_size): log.info("downloading %s", sub_range) self.clear_csv_downloads() interval_download_page.set_date_range(sub_range.start_date, sub_range.end_date) interval_download_page.download_interval_data() self.screenshot("download %s" % sub_range.end_date.strftime("%Y%m%d")) try: wait = WebDriverWait(self._driver, 180) csv_file_name = wait.until( file_exists_in_dir(self._driver.download_dir, r".*\.csv")) csv_file_path = os.path.join(self._driver.download_dir, csv_file_name) for (when, reading) in parse_spatia_interval_csv( csv_file_path, channel.id): # The CSV file reports readings at the end of each fifteen minute interval. So the first reading # of the day occurs at 00:15. and the last at midnight. We want to report the readings at the # _start_ of each interval, thus we subtract 15 minutes here. when = when - timedelta(minutes=15) timeline.insert(when, reading) except TimeoutException: raise TimeoutException( "Downloading interval data from SPATIA failed.") self.interval_data_timeline = timeline
def _execute(self): # Create page helpers if self.log_in: self.login() config_page = ConfigurationPage(self._driver) navigation = Navigation(self._driver) export_csv_page = ExportCsvPage(self._driver) download_csv_page = DownloadCsvPage(self._driver) # Configure interval data generation, in two steps... config_page.wait_until_ready() config_page.select_all_accounts() self.screenshot("accounts selected") matching_meter = None meter_query = self.epo_meter_id _log("Looking for a meter with ID == {0}".format(meter_query)) for meter in config_page.iter_meters(): _log("Found a meter: {0}".format(meter)) if meter.meter_id == meter_query: _log("Found a matching meter.") matching_meter = meter break if matching_meter is None: _log("No meter with ID {0} was found.".format(meter_query)) raise InvalidMeterException( "Meter {0} was not found".format(meter_query)) else: matching_meter.select() self.screenshot("meter selected") # Two notes on time... # 1) Each meter specifies the date range for which data is # available. If we don't respect this, the page will throw # errors. We restrict our start and end dates based on # this information. if self.start_date < matching_meter.min_date: _log("Adjusting start date from {0} to {1}".format( self.start_date, matching_meter.min_date)) self.start_date = matching_meter.min_date if self.end_date > matching_meter.max_date: _log("Adjusting end date from {0} to {1}".format( self.end_date, matching_meter.max_date)) self.end_date = matching_meter.max_date # 2) Only a limited amount of data can be extracted at a time. # The page enforces this by restricting the number of days # for which you can download data. Therefore, we pull down # data in 180-day chunks. The actual restriction is a little # hard to pin down, since it varies based on some nontransparent # factors. 180 though is a very conservative estimate. date_range = DateRange(self.start_date, self.end_date) interval_size = relativedelta(days=180) readings = {} # Maps dates to interval data, populated below for interval in date_range.split_iter(delta=interval_size): _log("Gathering meter data for: {0}".format(interval)) # First, set the date range for the selected meter config_page.set_date_range(interval) self.screenshot("date range set {} to {}".format( interval.start_date.isoformat(), interval.end_date.isoformat())) # Navigate to the "Export" page, and request a CSV report navigation.goto_export() export_csv_page.wait_until_ready() export_csv_page.generate_report() # Wait for the report to generate, then download it # and extract interval data from it download_csv_page.wait_until_ready() csv_rows_iter = download_csv_page.get_csv_rows() header = next(csv_rows_iter) _log("CSV Header row: {0}".format(header)) csv_kw_rows = self._get_csv_kw_rows(csv_rows_iter) if self._is_multichannel(csv_kw_rows) and not self.channel_id: raise MissingChannelIdException( "Missing channel ID for multichannel meter") # The intervals coefficient is a multiplier for the interval data. intervals_coeff = 4 # For 15-minute intervals if (self._get_interval_data_length(csv_kw_rows) == self.READINGS_NUM_30_MIN_INT): intervals_coeff = 2 # For 30-minute intervals for data_row in csv_kw_rows: # Add usage rows with the requested channel. # If no channel_id was passed in, add all usage rows. if not self.channel_id or self.channel_id == data_row.channel_id: readings[data_row.date.strftime("%Y-%m-%d")] = list( self._kwh_to_kw(data_row.interval_data, intervals_coeff)) # Navigate back to the meter selection page in preparation # for the next iteration. Note that we do not reselect the # meter, since our initial selections are cached. navigation.goto_meter_selection() config_page.wait_until_ready() return Results(readings=readings)
def _execute(self): date_range = DateRange(self.start_date, self.end_date) parser = PSEIntervalReportParser(self.start_date, self.end_date) login_page = LoginPage(self._driver) login_page.wait_until_ready() self.screenshot("before login") login_page.login(self.username, self.password) self.screenshot("after login") menu_page = MainMenuPage(self._driver) menu_page.select_interval_report() self.screenshot("interval report") report_page = IntervalReportPage(self._driver, self.service_id, self.site_name) report_page.configure_meter_target() self.screenshot("meter target configured") step = relativedelta(months=1) for window in date_range.split_iter(step): # PSE has a known failure for some meters between 2017-11-05 and 2017-12-05. # We skip this month for now. if self._configuration.service_id in self.BadMeters: if window.start_date <= date(2017, 11, 5): window.start_date = min(window.start_date, date(2017, 11, 4)) window.end_date = min(window.end_date, date(2017, 11, 4)) elif window.end_date >= date(2017, 12, 5): window.start_date = max(window.start_date, date(2017, 12, 6)) window.end_date = max(window.end_date, date(2017, 12, 6)) else: # Window is entirely inside the bad time region, just skip. continue if window.start_date == window.end_date: continue log.info("Downloading data for period %s - %s." % (window.start_date, window.end_date)) report_page.select_report_window(window.start_date, window.end_date) try: report_page.download_report() except TimeoutException: msg = ("The scraper failed to download interval data for " "the date range {} to {}. This may be due to an " "issue with the PSE website.").format( window.start_date, window.end_date) raise ReportGenerationError(msg) parser.save_report(window.start_date, window.end_date) results = parser.serialize() # Write the raw interval JSON into the scraper log for easy # reference. with open(os.path.join(config.WORKING_DIRECTORY, "interval_data.json"), "w") as f: f.write(json.dumps(results, sort_keys=True, indent=4)) log.info("results=%s", len(results)) return Results(readings=results)