示例#1
0
    def _execute(self):
        # Meterwatch immediately spawns a popup when loaded which is the actual
        # window we want. So we have to go and grab the main window handle and
        # THEN go looking for the popup window and switch to it.
        handles_before = self._driver.window_handles
        timeline = Timeline(self.start_date, self.end_date)
        main_window = _get_main_window(self._driver)
        login_window = None

        log.info(f"Navigating to {self.url}")
        self._driver.get(self.url)
        self.screenshot("initial_url")

        log.debug("Driver title: " + self._driver.title)
        assert "Seattle MeterWatch" in self._driver.title

        login_page = LoginPage(self._driver)
        meterdata_page = MeterDataPage(self._driver, self._configuration)

        login_page.login(self.username, self.password)
        self._driver.wait().until(
            lambda driver: len(handles_before) != len(driver.window_handles),
            "Issues loading login page.",
        )

        for handle in self._driver.window_handles:
            if handle != main_window:
                login_window = handle

        # We have our popup, so lets do stuff with it.
        log.info("switching to new window")
        self._driver.switch_to.window(login_window)
        # resize: it opens as a tiny window
        self._driver.set_window_size(1200, 800)

        for meter_number in self._configuration.meter_numbers:
            meterdata_page.select_account(meter_number)
            self.start_date, self.end_date = meterdata_page.adjust_start_and_end_dates(
                self.start_date, self.end_date)
            # Widen timeline if necessary after dates may have been adjusted from original.
            timeline.extend_timeline(self.start_date, self.end_date)
            date_range = DateRange(self.start_date, self.end_date)
            interval_size = relativedelta(days=MAX_DOWNLOAD_DAYS)

            for sub_range in date_range.split_iter(delta=interval_size):
                meterdata_page.enter_dates(sub_range.start_date,
                                           sub_range.end_date)
                csv_file_path = meterdata_page.download_data(meter_number)

                log.info(
                    f"parsing kWh usage from downloaded data for {meter_number}"
                )

                self._process_csv(csv_file_path, timeline)

        return Results(readings=timeline.serialize(include_empty=False))
    def process_partial_bills(self):
        """Primary method.

        Goes through billing_data and uploads new partial bills directly to the partial bills table.
        If a new partial bill differs from an existing partial bill,
        a new partial bill is created, rather than overwriting the old one.
        """
        # Run initial validation of all the partial bills.  Failures are caught
        # and the scraper run is marked as FAILED.
        try:
            PartialBillValidator(self.billing_data).run_prevalidation()
        except (OverlappedBillingDataDateRangeError, NoFutureBillsError):
            return Status.FAILED

        # Snap the start date of the first new bill, if applicable
        self.billing_data = snap_first_start(self.billing_data, self.haves)

        for pending_partial in self.billing_data:
            found = False
            for existing_partial in self.haves:
                existing_cycle = DateRange(
                    existing_partial.initial, existing_partial.closing
                )
                pending_cycle = DateRange(pending_partial.start, pending_partial.end)

                if existing_cycle == pending_cycle:  # cycles match exactly
                    if (
                        existing_partial.differs(pending_partial)
                        and not self._bad_override_detected(
                            existing_partial, pending_partial
                        )
                        and not self._existing_is_manual(
                            existing_partial, pending_partial
                        )
                    ):
                        # Mark the old partial bill as superseded
                        # and add a new partial bill
                        self._supersede(existing_partial, pending_partial)
                    found = True
                    break
                elif existing_cycle.intersects(
                    pending_cycle
                ):  # cycle does not match exactly, but intersects.
                    if not self._existing_is_manual(existing_partial, pending_partial):
                        # We create a new partial bill and supersede the old one
                        self._supersede(existing_partial, pending_partial)
                    found = True

            if not found:
                # Pending partial bill does not already exist, so we stage a new one
                pb = PartialBill.generate(
                    self.meter.utility_service, self.bill_type, pending_partial
                )
                self.staged_partial.append(pb)

        return Status.SUCCEEDED if self.staged_partial else Status.COMPLETED
示例#3
0
    def _execute(self):
        # Direct driver to site url -
        # Currently a public URL, no credentials needed. Will have to be
        # refactored in the future if we start scraping private sites.
        self._driver.get(self.site_url)

        # Create page helpers
        overview_page = OverviewPage(self._driver)
        site_analytics_page = SiteAnalyticsPage(self._driver)
        date_picker_component = DatePickerSection(self._driver)

        # Navigate to site analytics tab
        overview_page.wait_until_ready()
        self.screenshot("before clicking on site analytics tab")
        overview_page.navigate_to_site_analytics()

        # Select inverter from both dropdowns
        site_analytics_page.wait_until_ready()
        self.screenshot("before selecting inverters")
        site_analytics_page.select_inverter_from_both_dropdowns(
            self.inverter_id)
        # Click on AC Power button
        self.screenshot("before clicking on ac power button")
        site_analytics_page.click_ac_power_button()
        self.screenshot("after clicking on ac power button")
        self.install_date = self.string_to_date(
            site_analytics_page.get_install_date())

        # Adjust start and end date, depending on inverter install date
        self.adjust_start_and_end_dates()

        date_range = DateRange(self.start_date, self.end_date)
        interval_size = relativedelta(days=MAX_INTERVAL_LENGTH)

        # Loop through desired interval in two day chunks to pull down
        # power generated
        for sub_range in date_range.split_iter(delta=interval_size):
            start = sub_range.start_date
            end = sub_range.end_date

            file_path = date_picker_component.complete_form_and_download(
                start, end)

            intermediate_readings = CSVParser(self.inverter_id,
                                              file_path).process_csv()
            self.readings.update(intermediate_readings)

            log.info("Cleaning up download.")
            clear_downloads(self._driver.download_dir)
            # Adding a large pause
            self._driver.sleep(5)

        return Results(readings=self.readings)
示例#4
0
def datafeed(
    account: SnapmeterAccount,
    meter: Meter,
    datasource: SnapmeterMeterDataSource,
    params: dict,
    task_id: Optional[str] = None,
) -> Status:
    meta = datasource.meta or {}
    configuration = Configuration(mvweb_id=meta.get("mvWebId"),
                                  interval=meter.interval)
    # reduce load on MVWeb servers: skip if meter has data from within the last 3 days and there are no gaps
    max_reading = meter.readings_range.max_date or date.today() - timedelta(
        days=365)
    interval_age = (date.today() - max_reading).days
    date_range = DateRange(
        *iso_to_dates(params.get("data_start"), params.get("data_end")))
    # freshest we can expect is 3 days old
    date_range = DateRange(
        date_range.start_date,
        min(date_range.end_date,
            date.today() - timedelta(days=3)),
    )
    expected = (date_range.end_date - date_range.start_date).days + 1
    days_with_data = (db.session.query(MeterReading).filter(
        MeterReading.meter == meter.oid,
        MeterReading.occurred >= date_range.start_date,
        MeterReading.occurred <= date_range.end_date,
    ).count())
    log.info(
        "days with data from %s - %s = %s",
        date_range.start_date,
        date_range.end_date,
        days_with_data,
    )
    if interval_age <= 3 and days_with_data == expected:
        log.info(
            "skipping MVWeb run: meter %s has recent interval data (%s) and no gaps",
            meter.oid,
            max_reading,
        )
        return Status.SKIPPED

    return run_datafeed(
        LADWPMVWebScraper,
        account,
        meter,
        datasource,
        params,
        configuration=configuration,
        task_id=task_id,
    )
示例#5
0
def test_scraper(
    utility_account: str,
    service_id: str,
    account_group: str,
    account_number: str,
    start_date: date,
    end_date: date,
    username: str,
    password: str,
):
    """Launch a Chrome browser to test the scraper."""
    configuration = PortlandBizportalConfiguration(
        utility="utility:portland-ge",
        utility_account_id=utility_account,
        account_group=account_group,
        bizportal_account_number=account_number,
        service_id=service_id,
    )
    credentials = Credentials(username, password)
    scraper = PortlandBizportalScraper(
        credentials, DateRange(start_date, end_date), configuration
    )
    scraper.start()
    with mock.patch("datafeeds.scrapers.pge.bill_pdf.upload_bill_to_s3"):
        scraper.scrape(
            readings_handler=None,
            bills_handler=ft.partial(
                test_upload_bills, -1, service_id, None, "portland-bizportal"
            ),
            partial_bills_handler=None,
            pdfs_handler=None,
        )
    scraper.stop()
    def energy_manager_date_range(self, min_start_date):

        if self.start_date:
            start_date = date(
                year=self.start_date.year, month=self.start_date.month, day=1
            )
        else:
            start_date = min_start_date

        if self.end_date:
            end_date = date(year=self.end_date.year, month=self.end_date.month, day=1)
        else:
            today = date.today()
            end_date = date(year=today.year, month=today.month, day=1)

        if start_date > end_date:
            msg = "The scraper start date must be before the end date (start={}, end={})".format(
                start_date, end_date
            )
            sce_errors.BillingDataDateRangeException(msg)
        if end_date < min_start_date:
            msg = "No billing data is available for the range {} to {}.".format(
                start_date, end_date
            )
            raise sce_errors.BillingDataDateRangeException(msg)

        if start_date < min_start_date:
            log.info("Adjusting start date to minimum start date: %s", start_date)
            start_date = min_start_date

        date_range = DateRange(start_date, end_date)
        return date_range
示例#7
0
def test_scraper(
    meter_number: str,
    start_date: date,
    end_date: date,
    username: str,
    password: str,
):
    configuration = LADWPBillPdfConfiguration(
        meter_number=meter_number,
        utility_account_id=meter_number,
        commodity="False",
        account_name=None,
    )
    credentials = Credentials(username, password)
    scraper = LADWPBillPdfScraper(
        credentials, DateRange(start_date, end_date), configuration
    )
    scraper.start()
    scraper.scrape(
        bills_handler=ft.partial(
            test_upload_bills, -1, meter_number, None, "ladwp-bill-pdf"
        ),
        partial_bills_handler=None,
        readings_handler=None,
        pdfs_handler=None,
    )
    scraper.stop()
def test_scraper(
    service_id: str,
    gen_service_id: str,
    utility_account_id: str,
    start_date: date,
    end_date: date,
    username: str,
    password: str,
):
    is_partial = gen_service_id is not None
    configuration = SceReactBasicBillingConfiguration(
        service_id=service_id,
        gen_service_id=gen_service_id,
        utility_account_id=utility_account_id,
        scrape_partial_bills=is_partial,
        scrape_bills=not is_partial,
    )
    credentials = Credentials(username, password)
    scraper = SceReactBasicBillingScraper(credentials,
                                          DateRange(start_date, end_date),
                                          configuration)
    fixture = setup_fixture().get(service_id)
    if fixture:
        scraper.utility_service = fixture["initial"]
        set_tariff_mock = MagicMock()
        set_tariff_mock.return_value = mock_set_tariff_from_utility_code
        scraper.utility_service.set_tariff_from_utility_code = set_tariff_mock
    scraper.start()
    scraper.scrape(
        bills_handler=ft.partial(test_upload_bills, -1, service_id, None,
                                 "sce-react-basic-billing"),
        partial_bills_handler=ft.partial(test_upload_partial_bills, None, None,
                                         None),
        readings_handler=None,
        pdfs_handler=None,
    )
    scraper.stop()
    if fixture:
        print("field\tactual\texpected\tmatch?")
        fields = [
            "service_id",
            "tariff",
            "utility_account_id",
            "gen_service_id",
            "gen_tariff",
            "gen_utility",
            "gen_utility_account_id",
            "provider_type",
        ]
        matches = []
        for field in fields:
            actual = getattr(scraper.utility_service, field)
            expected = getattr(fixture["expected"], field)
            print(f"{field}\t{actual}\t{expected}\t{actual == expected}")
            if actual == expected:
                matches.append(field)
        if matches == fields:
            print("\nOK")
        else:
            print(f"\nFAILED: mismatches = {set(fields) - set(matches)}")
示例#9
0
    def test_scraper(self):
        """The Synchronizer can extract partial bill data from the SMD tables."""
        self.add_customer_info("12345", "ABCDE")
        self.add_bill("ABCDE", datetime(2020, 1, 1), timedelta(days=30))
        self.add_bill("ABCDE", datetime(2020, 2, 1), timedelta(days=28))
        self.add_bill("ABCDE", datetime(2020, 3, 1), timedelta(days=30))

        config = SmdPartialBillingScraperConfiguration(self.meter)
        scraper = SmdPartialBillingScraper(
            Credentials(None, None),
            DateRange(date(2019, 12, 1), date(2020, 5, 1)),
            configuration=config,
        )
        results = scraper._execute()
        self.assertEqual(3, len(results.tnd_bills))

        # Perform a quick sanity check that we found the right bill dates.
        # Conversion from an SMD bill to a billing datum is tested elsewhere in depth.

        # Note: Dates intentionally do not line up with SMD records; this ensures the dates
        # agree with PDF bill data.
        expected = [
            (date(2020, 1, 2), date(2020, 1, 31)),
            (date(2020, 2, 2), date(2020, 2, 29)),
            (date(2020, 3, 2), date(2020, 3, 31)),
        ]

        actual = [(b.start, b.end) for b in results.tnd_bills]
        self.assertEqual(expected, actual)
def test_scraper(
    service_id: str,
    address: str,
    start_date: date,
    end_date: date,
    username: str,
    password: str,
):
    configuration = SceReactEnergyManagerGreenButtonConfiguration(
        service_id=service_id,
        meta={"serviceAccountAddress": address},
        meter=MagicMock(),
    )
    credentials = Credentials(username, password)
    scraper = SceReactEnergyManagerGreenButtonScraper(
        credentials, DateRange(start_date, end_date), configuration)
    scraper.start()
    with mock.patch("datafeeds.scrapers.pge.bill_pdf.upload_bill_to_s3"):
        scraper.scrape(
            readings_handler=ft.partial(test_upload_readings, None),
            bills_handler=None,
            pdfs_handler=None,
            partial_bills_handler=None,
        )
    scraper.stop()
示例#11
0
 def setUp(self):
     super().setUp()
     config = Configuration()
     self.scraper = powertrack.PowerTrackScraper(
         credentials=None,
         date_range=DateRange(self.start_date.date(), self.end_date.date()),
         configuration=config,
     )
示例#12
0
 def setUp(self):
     super().setUp()
     config = Configuration()
     config.site_name = "Xilinx - San Jose"
     self.scraper = bloom_interval.BloomScraper(
         credentials=None,
         date_range=DateRange(self.start_date, self.end_date),
         configuration=config,
     )
示例#13
0
 def setUp(self):
     super().setUp()
     config = Configuration()
     config.account_id = "s12345"
     config.meter_id = "2a566973506457484a43554b772b71553d-1"
     self.scraper = nautilus.NautilusScraper(
         credentials=None,
         date_range=DateRange(self.start_date.date(), self.end_date.date()),
         configuration=config,
     )
def test_scraper(account_number: str, start_date: date, end_date: date,
                 username: str, password: str):
    configuration = FPLMyAccountConfiguration(account_number=account_number)
    credentials = Credentials(username, password)
    scraper = FPLMyAccountScraper(credentials, DateRange(start_date, end_date),
                                  configuration)
    scraper.start()
    scraper.scrape(
        readings_handler=print,
        bills_handler=None,
        pdfs_handler=None,
    )
    scraper.stop()
示例#15
0
def test_scraper(username: str, password: str, account_id: str,
                 start_date: date, end_date: date):
    configuration = PowayWaterConfiguration(account_id)
    credentials = Credentials(username, password)
    scraper = PowayWaterScraper(credentials, DateRange(start_date, end_date),
                                configuration)
    scraper.start()
    scraper.scrape(
        bills_handler=ft.partial(test_upload_bills, -1, account_id, None),
        partial_bills_handler=None,
        readings_handler=None,
        pdfs_handler=None,
    )
    scraper.stop()
示例#16
0
 def test_dst_data(self):
     date_range = DateRange(date(2020, 10, 31), date(2020, 11, 6))
     timeline = Timeline(date_range.start_date, date_range.end_date, 15)
     scraper = heco.HECOScraper(
         Credentials(None, None),
         date_range,
         HECOGridConfiguration(meter_id=123, interval=15),
     )
     scraper._process_csv("datafeeds/scrapers/tests/fixtures/mvweb_dst.csv",
                          timeline)
     with open("datafeeds/scrapers/tests/fixtures/mvweb_dst_expected.json"
               ) as f:
         expected = json.loads(f.read())
     self.assertEqual(expected, timeline.serialize())
def test_scraper(
    point_id: str, start_date: date, end_date: date, username: str, password: str
):
    configuration = SVPIntervalConfiguration(point_id=point_id)
    credentials = Credentials(username, password)
    scraper = SVPIntervalScraper(
        credentials, DateRange(start_date, end_date), configuration
    )
    scraper.start()
    scraper.scrape(
        readings_handler=print,
        bills_handler=None,
        pdfs_handler=None,
    )
    scraper.stop()
示例#18
0
def test_scraper(username: str, password: str, account_id: str,
                 start_date: date, end_date: date):
    configuration = DukeBillingConfiguration("utility:duke-carolinas-nc",
                                             account_id)
    credentials = Credentials(username, password)
    scraper = DukeBillingScraper(credentials, DateRange(start_date, end_date),
                                 configuration)
    scraper.start()
    scraper.scrape(
        bills_handler=ft.partial(test_upload_bills, -1, account_id, None,
                                 "duke-energy-billing"),
        partial_bills_handler=None,
        readings_handler=None,
        pdfs_handler=None,
    )
    scraper.stop()
示例#19
0
def test_scraper(
    service_id: str, start_date: date, end_date: date, username: str, password: str
):
    configuration = SVPBillingConfiguration(
        utility="utility:default", utility_account_id="12345", service_id=service_id
    )
    credentials = Credentials(username, password)
    scraper = SVPBillingScraper(
        credentials, DateRange(start_date, end_date), configuration
    )
    scraper.start()
    scraper.scrape(
        readings_handler=None,
        bills_handler=print,
        pdfs_handler=None,
    )
    scraper.stop()
def test_scraper(utility_account: str, start_date: date, end_date: date,
                 username: str, password: str):
    configuration = PgeBillPdfConfiguration(
        utility="pge",
        utility_account=utility_account,
        gen_utility=None,
        gen_utility_account_id=None,
        datasource=MagicMock(),
    )
    credentials = Credentials(username, password)
    scraper = PgeBillPdfScraper(credentials, DateRange(start_date, end_date),
                                configuration)
    scraper.start()
    with mock.patch("datafeeds.scrapers.pge.bill_pdf.upload_bill_to_s3"):
        scraper.scrape(
            readings_handler=None,
            bills_handler=None,
            pdfs_handler=ft.partial(test_pdf_upload, None),
        )
    scraper.stop()
    def test_fall_daylight_savings(self):
        """Test Fall DST values are not double counted"""

        date_range = DateRange(date(2020, 11, 1), date(2020, 11, 1))
        timeline = Timeline(date_range.start_date, date_range.end_date, 15)
        scraper = SCLMeterWatchScraper(
            Credentials(None, None),
            date_range,
            SCLMeterWatchConfiguration(meter_numbers=["803441"],
                                       meter=self.meter),
        )
        scraper._process_csv(
            "datafeeds/scrapers/tests/fixtures/scl_meterwatch_dst.csv",
            timeline)
        with open(
                "datafeeds/scrapers/tests/fixtures/scl_meterwatch_dst_expected.json"
        ) as f:
            expected = json.loads(f.read())

        self.assertEqual(expected, timeline.serialize())
示例#22
0
    def test_urjanet_data_range_for_partial_scrapers(self):
        datasource = test_util.FixtureDataSource(
            os.path.join(DATA_DIR, "simple_fixture_input.json"))
        transformer = PacificGasElectricTransformer()
        config = BaseUrjanetConfiguration(
            datasource,
            transformer,
            "pge",
            False,
            partial_type=PartialBillProviderType.GENERATION_ONLY,
        )

        date_range = DateRange(date(2020, 1, 1), date(2020, 6, 1))
        scraper = BaseUrjanetScraper(None, date_range, config)
        result = scraper._execute()
        self.assertEqual(
            expected,
            result.generation_bills,
            "partial urjanet scrapers return whatever partial bills we find, regardless of scraped range.",
        )
def test_scraper(
    utility_account_id: str,
    start_date: date,
    end_date: date,
    username: str,
    password: str,
):
    configuration = SaltRiverBillingConfiguration(
        account_id=utility_account_id)
    credentials = Credentials(username, password)
    scraper = SaltRiverBillingScraper(credentials,
                                      DateRange(start_date, end_date),
                                      configuration)
    scraper.start()
    scraper.scrape(
        bills_handler=ft.partial(test_upload_bills, -1, utility_account_id,
                                 None, "saltriver-billing"),
        partial_bills_handler=None,
        readings_handler=None,
        pdfs_handler=None,
    )
    scraper.stop()
示例#24
0
def test_scraper(
    utility_account_id: str,
    service_id: str,
    start_date: date,
    end_date: date,
    username: str,
    password: str,
):
    configuration = PepcoIntervalConfiguration(
        utility_account_id=utility_account_id, service_id=service_id, interval=15
    )
    credentials = Credentials(username, password)
    scraper = PepcoIntervalScraper(
        credentials, DateRange(start_date, end_date), configuration
    )
    scraper.start()
    scraper.scrape(
        readings_handler=print,
        bills_handler=None,
        pdfs_handler=None,
        partial_bills_handler=None,
    )
    scraper.stop()
示例#25
0
    def _execute(self):
        self._driver.get(self.site_url)
        log.info(self._configuration.__dict__)
        log.info(self._configuration.meter_oid)
        interval = self._get_meter_interval()
        log.info("meter interval is %s", interval)

        login_page = LoginPage(self._driver)
        landing_page = LandingPage(self._driver)
        extract_page = DataExtractPage(self._driver)
        if interval == 1440:
            extract_page.IntervalRadio = 'label[for="timeInterval-daily"]'

        login_page.wait_until_ready(login_page.SigninButtonSelector)
        self.screenshot("before login")
        login_page.login(self.username, self.password)

        self.screenshot("after login")
        landing_page.go_to_data_extract()

        log.info("Filling out data extract form")

        self.screenshot("data extract page")
        # Verify dates and break into date ranges
        start_year = extract_page.get_earliest_year(extract_page)
        self.adjust_start_and_end_dates(start_year)
        date_range = DateRange(self.start_date, self.end_date)
        interval_size = relativedelta(days=MAX_DOWNLOAD_DAYS)

        readings = []

        self._export_data(extract_page, date_range, interval_size, interval=interval)

        if self.timeline:
            readings = self.timeline.serialize()

        return Results(readings=readings)
示例#26
0
    def energy_manager_basic_usage_action(
            self, page: sce_pages.SceEnergyManagerBasicUsagePage):
        sce_pages.detect_and_close_survey(self._driver)
        rval = page.select_service_id(self.service_id)
        log.info("Result of select service id %s: %s", self.service_id, rval)
        self.screenshot("select_service_id")
        page.configure_report()

        date_range = DateRange(self.start_date, self.end_date)
        # the website seems to time out when trying to get more than this amount of data
        interval_size = relativedelta(days=7)
        timeline = Timeline(self.start_date, self.end_date)

        for idx, subrange in enumerate(
                date_range.split_iter(delta=interval_size)):
            log.info("Requesting interval data for dates: %s", subrange)
            start = subrange.start_date
            end = subrange.end_date

            page.set_time_range(start, end)
            self.screenshot("set_time_range")

            try:
                page.generate_report()
                time.sleep(5)
                WebDriverWait(self._driver, 180).until(
                    EC.invisibility_of_element_located(
                        sce_pages.GenericBusyIndicatorLocator))
                self.screenshot(f"interval{idx}")
            except Exception as e:
                raise sce_errors.EnergyManagerReportException(
                    "Failed to load data from Energy Manager") from e

            try:
                page.raise_on_report_error()
            except sce_errors.EnergyManagerDataNotFoundException:
                log.info("No data found for this time range, continuing...")
                # If a given date range has no interval data, just move on to the next one
                continue

            log.info("Downloading the interval data report.")
            self.clear_csv_downloads()

            try:
                page.download_report()
            except Exception as e:
                raise sce_errors.EnergyManagerReportException(
                    "Failed to load data from Energy Manager") from e

            try:
                # Wait two minutes for the download to finish
                wait = WebDriverWait(self._driver, 120)
                csv_file_name = wait.until(
                    file_exists_in_dir(self._driver.download_dir, r".*\.csv"))
                csv_file_path = os.path.join(self._driver.download_dir,
                                             csv_file_name)
                for reading in parse_sce_csv_file(csv_file_path,
                                                  self.service_id):
                    timeline.insert(reading.dt, reading.value)
            except TimeoutException:
                raise TimeoutException(
                    "Downloading interval data from Energy Manager failed.")

        self.interval_data_timeline = timeline
示例#27
0
    def _execute(self):
        # Direct the driver to the login page
        self._driver.get(self.login_url)

        # Create page helpers
        login_page = LoginPage(self._driver)
        navigation = Navigation(self._driver)
        meter_selection_page = MeterSelectionPage(self._driver)
        export_csv_page = ExportCsvPage(self._driver)
        download_csv_page = DownloadCsvPage(self._driver)

        # Authenticate
        login_page.wait_until_ready()
        self.screenshot("before login")
        login_page.login(self.username, self.password)

        # Configure interval data generation, in two steps...
        meter_selection_page.wait_until_ready()
        self.screenshot("before meter selection")

        # 1) Specify we are entering a custom date range
        meter_selection_page.select_date_range_option()
        self.screenshot("date range option selected")

        # 2) Locate the meter of interest and select it
        matching_meter = None
        meter_query = self.ngrid_meter_id
        log.info("Looking for a meter with ID == {0}".format(meter_query))
        for meter in meter_selection_page.iter_meters():
            log.info("Found a meter: {0}".format(meter))
            if meter.meter_id == meter_query:
                log.info("Found a matching meter.")
                matching_meter = meter
                break

        if matching_meter is None:
            log.info("No meter with ID {0} was found.".format(meter_query))
            raise InvalidMeterException(
                "Meter {0} was not found".format(meter_query))
        else:
            matching_meter.select()
            self.screenshot("meter selected")

        # Two notes on time...
        # 1) Each meter specifies the date range for which data is
        #    available. If we don't respect this, the page will throw
        #    errors. So, we restrict our start and end dates based on
        #    this information.
        if self.start_date < matching_meter.min_date:
            log.info("Adjusting start date from {0} to {1}".format(
                self.start_date, matching_meter.min_date))
            self.start_date = matching_meter.min_date
        if self.end_date > matching_meter.max_date:
            log.info("Adjusting end date from {0} to {1}".format(
                self.end_date, matching_meter.max_date))
            self.end_date = matching_meter.max_date

        # 2) Only a limited amount of data can be extracted at a time.
        #    The page enforces this by restricting the number of days
        #    for which you can download data. Therefore, we pull down
        #    data in 180-day chunks. The actual restriction is a little
        #    hard to pin down, since it varies based on some nontransparent
        #    factors. 180 though is a very conservative estimate.
        date_range = DateRange(self.start_date, self.end_date)
        interval_size = relativedelta(days=180)
        readings = {}  # Maps dates to interval data, populated below
        for subrange in date_range.split_iter(delta=interval_size):
            log.info("Gathering meter data for: {0}".format(subrange))

            # First, set the date range for the selected meter
            meter_selection_page.set_date_range(subrange)

            # Navigate to the "Export" page, and request a CSV report
            navigation.goto_export()
            export_csv_page.wait_until_ready()
            export_csv_page.generate_report()

            # Wait for the report to generate, then download it
            # and extract interval data from it
            download_csv_page.wait_until_ready()
            csv_rows_iter = download_csv_page.get_csv_rows()
            header = next(csv_rows_iter)
            log.info("CSV Header row: {0}".format(header))

            for data_row in csv_rows_iter:
                result = NationalGridIntervalScraper.parse_csv_row(data_row)
                if result.units == UNITS_KWH:
                    readings[self._iso_str(result.date)] = list(
                        NationalGridIntervalScraper.kwh_to_kw(
                            result.interval_data))

            # Navigate back to the meter selection page in preparation
            # for the next iteration. Note that we do not reselect the
            # meter, since our initial selections are cached.
            navigation.goto_meter_selection()
            meter_selection_page.wait_until_ready()

        return Results(readings=readings)
示例#28
0
def run_datafeed(
    scraper_class,
    account: SnapmeterAccount,
    meter: Meter,
    datasource: MeterDataSource,
    params: dict,
    configuration=None,
    task_id=None,
    transforms: Optional[List[Transforms]] = None,
    disable_login_on_error: Optional[bool] = False,
    notify_on_login_error: Optional[bool] = True,
    meter_only: Optional[bool] = False,
) -> Status:
    transforms = [] if transforms is None else transforms
    bill_handler = ft.partial(
        upload_bills,
        meter.oid,
        meter.utility_service.service_id,
        task_id,
        datasource.name,
    )
    readings_handler = ft.partial(upload_readings, transforms, meter.oid,
                                  datasource.name, task_id)
    pdfs_handler = ft.partial(attach_bill_pdfs, meter.oid, task_id, meter_only)
    partial_bill_handler = ft.partial(upload_partial_bills, meter, task_id)

    date_range = DateRange(
        *iso_to_dates(params.get("data_start"), params.get("data_end")))

    parent: AccountDataSource = None
    if datasource.account_data_source:
        parent = datasource.account_data_source
        credentials = Credentials(parent.username, parent.password)
        if not datasource.account_data_source.enabled:
            raise DataSourceConfigurationError(
                "%s scraper for %s is disabled" %
                (datasource.account_data_source.name, meter.oid))
    else:
        credentials = Credentials(None, None)

    if task_id and config.enabled("ES_INDEX_JOBS"):
        log.info("Uploading task information to Elasticsearch.")
        doc = index.starter_doc(meter.oid, datasource)
        doc["start_date"] = date_range.start_date
        doc["end_date"] = date_range.end_date
        doc["meter_data_source"] = datasource.oid
        if configuration:
            doc.update({
                "billScraper":
                configuration.scrape_bills
                or configuration.scrape_partial_bills,
                "intervalScraper":
                configuration.scrape_readings,
            })
        index.index_etl_run(task_id, doc)

    index_doc: Dict[str, str] = {}
    # create a non-persisted copy
    utility_service = UtilityService.copy_from(meter.utility_service)
    try:
        with scraper_class(credentials, date_range, configuration) as scraper:
            scraper.utility_service = utility_service
            scraper_status = scraper.scrape(
                readings_handler=readings_handler,
                bills_handler=bill_handler,
                pdfs_handler=pdfs_handler,
                partial_bills_handler=partial_bill_handler,
            )
            if scraper_status == Status.SUCCEEDED:
                # Avoid muddying Elasticsearch results
                index_doc = {"status": "SUCCESS"}
            else:
                index_doc = {"status": scraper_status.name}
            if scraper_status in [Status.SUCCEEDED, Status.COMPLETED]:
                retval = Status.SUCCEEDED
            else:
                retval = Status.FAILED
            # sce-metascraper needs to be able to get the completed status back
            if scraper.metascraper:
                retval = scraper_status

    except Exception as exc:
        log.exception("Scraper run failed.")
        retval = Status.FAILED
        index_doc = {
            "status": "FAILED",
            "error": repr(exc),
            "exception": type(exc).__name__,
        }
        # disable the login if scraping threw a LoginError, caller requested disabling on error,
        # and meter data source has a parent account data source
        if isinstance(exc, LoginError) and disable_login_on_error and parent:
            parent.enabled = False
            db.session.add(parent)
            log.warning("disabling %s login %s", parent.source_account_type,
                        parent.oid)
            if notify_on_login_error:
                alert.disable_logins(parent)

    index_doc.update(
        update_utility_service(meter.utility_service, utility_service))
    if task_id and config.enabled("ES_INDEX_JOBS"):
        log.info("Uploading final task status to Elasticsearch.")
        index.index_etl_run(task_id, index_doc)

    return retval
示例#29
0
    def _execute(self):
        # Direct the driver to the login page
        self._driver.get(self.login_url)
        # Create page helpers
        download_page = DownloadPage(self._driver)
        meter_page = MeterPage(self._driver)
        search_result = MeterSearchResult(self._driver)
        available_dates = AvailableDateComponent(self._driver)
        interval_form = IntervalForm(self._driver)

        self.login_to_mvweb()

        # Navigate to Download Page
        # Pause to let the IFrame to settle down
        time.sleep(5)

        download_page.wait_until_ready(selector=self.download_link_selector)
        self.screenshot("before clicking on download link")
        download_page.navigate_to_download_page(self.download_link_selector)
        time.sleep(10)

        # Enter MeterId in the search box
        meter_page.wait_until_ready(meter_page.MeterSearchInput)
        self.screenshot("before searching for meter")
        meter_page.search_by_meter_id(self.meter_id)
        time.sleep(10)

        # When search results have settled down, click on first meter result.
        # If meter isn't found, throw an error.
        search_result.wait_until_text_visible(
            search_result.SearchResult,
            self.meter_id,
            error_selector=search_result.NoResultsFound,
            alt_text="No matching records found",
            error_cls=MeterNotFoundException,
            error_msg="No matching records found for Meter ID {}".format(self.meter_id),
        )
        self.screenshot("before clicking on meter result")
        search_result.click_on_meter_result()

        # Adjust start and end dates if supplied start and end are out of range
        adjusted_start, adjusted_end = available_dates.adjust_start_and_end_dates(
            self.start_date, self.end_date
        )

        date_range = DateRange(adjusted_start, adjusted_end)
        interval_size = relativedelta(days=MAX_INTERVAL_LENGTH)

        timeline = Timeline(adjusted_start, adjusted_end, self._configuration.interval)
        # Breaks the date range into small, manageable chunks and downloads a csv
        # of demands for each one.
        for sub_range in date_range.split_iter(delta=interval_size):
            log.info("Getting interval data for date range: {}".format(sub_range))
            start = sub_range.start_date
            end = sub_range.end_date

            # Fill out interval form and click save to download data
            interval_form.fill_out_interval_form_and_download(start, end)
            file_path = self.download_file("csv")

            # Extract intermediate info from csv
            self._process_csv(file_path, timeline)

        return Results(readings=timeline.serialize(include_empty=False))
示例#30
0
    def _execute_internal(self):
        # Direct the driver to the login page
        self._driver.get(self.login_url)

        # Create page helpers
        login_page = LoginPage(self._driver)
        home_page = HomePage(self._driver)
        usage_page = UsagePage(self._driver)

        # Authenticate
        log.info("Logging in.")
        login_page.wait_until_ready()
        self.screenshot("before login")
        # login seems to sometimes fail; try twice
        try:
            login_page.login(self.username, self.password, self)
        except LoginError:
            log.info("login failed; trying login a second time in 30s")
            self._driver.get(self.login_url)
            self._driver.sleep(30)
            self.screenshot("before second login")
            login_page.login(self.username, self.password, self)
        self.screenshot("after login")

        # On the homepage, fetch the visible account information. This info
        # tells us (among other things) which account id is associated with
        # which account name.
        log.info("Waiting for home page to be ready.")
        home_page.wait_until_ready()
        self.screenshot("home page loaded")

        # Go to the 'Usage' Page
        log.info("Navigating to 'Usage' page.")
        usage_page.navigate_to_usage_page()
        usage_page.wait_until_ready()
        self.screenshot("usage_page_initial")

        if usage_page.is_enterprise():
            log.info("Enterprise: selecting account: {0}".format(
                self.account_id))
            if not usage_page.select_account_from_portfolio(self.account_id):
                error_msg = f"Unable to find account with ID={self.account_id}"
                log.info(error_msg)
                raise InvalidAccountException(error_msg)
            self.screenshot("usage_account_selected")

        else:
            log.info("Selecting account: {0}".format(self.account_id))
            if not usage_page.select_account(self.account_id):
                available_accounts = set(usage_page.get_available_accounts())
                error_msg = "Unable to find account with ID={0}. Available accounts are: {1}".format(
                    self.account_id, available_accounts)
                log.info(error_msg)
                raise InvalidAccountException(error_msg)
            self.screenshot("usage_account_selected")

        # Select the desired meter
        log.info("Selecting meter with id: {0}".format(self.service_id))
        if not usage_page.select_meter(self.service_id):
            available_meters = set(usage_page.get_available_meters())
            error_msg = (
                "Unable to find meter with ID={0}. Available meters are: {1}".
                format(self.service_id, available_meters))
            raise InvalidMeterException(error_msg)
        self.screenshot("selected meter")

        date_range = DateRange(self.start_date, self.end_date)
        timeline = Timeline(self.start_date,
                            self.end_date,
                            interval=self._configuration.interval)

        excel_download = True
        if usage_page.is_enterprise():
            usage_page.enterprise_select_usage(self._configuration.interval)
        else:
            usage_page.select_usage(self._configuration.interval)
            excel_download = usage_page.excel_download_available()

        # use the same JavaScript download for both regular and enterprise
        if excel_download:
            if self._configuration.interval == 1440:
                for subrange in date_range.split_iter(delta=relativedelta(
                        days=7)):
                    usage_page.javascript_download(
                        subrange.start_date,
                        subrange.end_date,
                        self._configuration.interval,
                    )
            else:
                dt = self.start_date
                while dt < self.end_date:
                    usage_page.javascript_download(
                        dt, dt, self._configuration.interval)
                    dt += timedelta(days=1)
            for filename in glob(f"{self._driver.download_dir}/*.xlsx"):
                parse_xlsx(timeline, filename, self.adjustment_factor)
        else:
            log.info("starting GreenButton download")
            usage_page.open_green_button()
            self.screenshot("opened green button")
            """
            This page only allows you to download a certain amount of billing data at a time. We will use a
            conservative chunk size of 90 days.
            """
            interval_size = relativedelta(days=90)
            for subrange in date_range.split_iter(delta=interval_size):
                self.get_green_button(usage_page, timeline,
                                      subrange.start_date, subrange.end_date)

        return Results(readings=timeline.serialize())