def download_and_attach_pdf(
     self, bill_data: BillingDatum, billing_row: sce_pages.BillingDataRow
 ) -> BillingDatum:
     self.clear_pdf_downloads()
     bill_path = self.download_pdf_for_billing_row(billing_row)
     if bill_path:
         with open(bill_path, "rb") as bill_file:
             key = bill_upload.hash_bill_datum(self.service_id, bill_data) + ".pdf"
             return bill_data._replace(
                 attachments=[
                     bill_upload.upload_bill_to_s3(
                         bill_file,
                         key,
                         statement=bill_data.statement,
                         source="sce.com",
                         utility=self.utility,
                         utility_account_id=self.utility_account_id,
                     )
                 ]
             )
     else:
         log.info(
             "No pdf bill was available for this period: %s to %s",
             bill_data.start,
             bill_data.end,
         )
         return bill_data
Exemplo n.º 2
0
    def _execute(self):
        login_page = LoginPage(self._driver)
        home_page = login_page.login(self.username, self.password)
        self.screenshot("home_page")
        bill_history_page = home_page.to_bill_history()
        bill_history_page.set_dates(self.start_date, self.end_date)
        self.screenshot("bill_history")

        history = bill_history_page.gather_data()

        pdf_bytes = sum(len(t[0]) for t in history if t[0])
        xls_bytes = sum(len(t[1]) for t in history if t[1])
        pdfs = sum(1 for t in history if t[0])
        xls = sum(1 for t in history if t[1])
        log.info(
            "Acquired %s pdfs (%s bytes) and %s excel files (%s bytes)."
            % (pdfs, pdf_bytes, xls, xls_bytes)
        )

        bills = []
        for pdf, xls in history:

            bill_data = []
            if xls is not None:
                bill_data = bill_data_from_xls(xls, self.service_account)
            elif pdf is not None:
                bill_data = bill_data_from_pdf(
                    pdf, self.service_account, self.meter_serial
                )

            if pdf is not None and bill_data:
                bill_data_prime = []
                for bill_datum in bill_data:
                    key = bill_upload.hash_bill_datum(self.service_account, bill_datum)
                    # statement date is not visible in the bill PDF text; use end date
                    attachment_entry = bill_upload.upload_bill_to_s3(
                        BytesIO(pdf),
                        key,
                        source="atmosenergy.com",
                        statement=bill_datum.end,
                        utility=self.utility,
                        utility_account_id=self.utility_account_id,
                    )
                    if attachment_entry:
                        bill_data_prime.append(
                            bill_datum._replace(attachments=[attachment_entry])
                        )
                    else:
                        bill_data_prime.append(bill_datum)
                bill_data = bill_data_prime

            if bill_data:
                bills += bill_data

        final_bills = adjust_bill_dates(bills)
        return Results(bills=final_bills)
Exemplo n.º 3
0
    def _execute(self):
        login_page = LoginPage(self._driver)
        home_page = login_page.login(self.keller_id, self.password)
        self.screenshot("home_page")

        bill_history_page = home_page.to_bill_history()
        self.screenshot("bill_history_page")

        bills = bill_history_page.gather_data(self.keller_id, self.start_date,
                                              self.end_date)

        log.info("Acquired %d bills (%s bytes total)." %
                 (len(bills), sum(len(b) for b in bills)))

        bill_data = []
        for b in bills:
            bill_datum = parse_bill_pdf(BytesIO(b))

            if bill_datum is None:
                continue

            key = bill_upload.hash_bill_datum(self.account_number, bill_datum)
            # bill doesn't have a statement date; use end date
            attachment_entry = bill_upload.upload_bill_to_s3(
                BytesIO(b),
                key,
                statement=bill_datum.end,
                source="cityofkeller.com",
                utility=self.utility,
                utility_account_id=self.account_number,
            )
            if attachment_entry:
                bill_data.append(
                    bill_datum._replace(attachments=[attachment_entry]))
            else:
                bill_data.append(bill_datum)

        # bill periods overlap; adjust start dates
        adjusted_bill_data = []
        for bill in bill_data:
            adjusted_bill_data.append(
                BillingDatum(
                    start=bill.start + timedelta(days=1),
                    end=bill.end,
                    statement=bill.statement,
                    cost=bill.cost,
                    used=bill.used,
                    peak=bill.peak,
                    items=bill.items,
                    attachments=bill.attachments,
                    utility_code=None,
                ))
        final_bills = adjust_bill_dates(adjusted_bill_data)
        show_bill_summary(final_bills, "Final Bill Summary")
        return Results(bills=final_bills)
    def make_billing_datum(self, bill_detail: BillPeriodDetails) -> BillingDatum:
        """Convert a billing detail summary from the website to a Gridium BillingDatum object"""
        # get statement date from link: Date=yyyy-mm-dd
        date_re = re.compile(r"Date=(\d\d\d\d-\d\d-\d\d)")
        match = (
            date_re.search(bill_detail.download_link)
            if bill_detail.download_link
            else None
        )
        statement = None
        if match:
            try:
                statement = parse_date(match.group(1)).date()
            except Exception as exc:
                log.warning("error parsing date %s: %s", match.group(1), exc)
        if not statement:
            statement = bill_detail.end
        bill_datum = BillingDatum(
            start=bill_detail.start,
            end=bill_detail.end,
            statement=statement,
            cost=bill_detail.total_charges,
            used=bill_detail.total_kwh,
            peak=bill_detail.max_kw,
            items=None,
            attachments=None,
            utility_code=bill_detail.utility_code,
        )

        pdf_bytes = self.download_pdf(bill_detail)
        if pdf_bytes:
            key = bill_upload.hash_bill_datum(self.account_id, bill_datum)
            attachment_entry = bill_upload.upload_bill_to_s3(
                BytesIO(pdf_bytes),
                key,
                source="smud.org",
                statement=statement,
                utility=self.utility,
                utility_account_id=self.account_id,
            )
            if attachment_entry:
                bill_datum = bill_datum._replace(attachments=[attachment_entry])

        return bill_datum
Exemplo n.º 5
0
    def _execute(self):
        if self.end_date - self.start_date < timedelta(days=90):
            self.start_date = self.end_date - timedelta(days=90)
            log.info(
                "Initial time window was too narrow for this utility. Expanding time window to: %s - %s"
                % (self.start_date, self.end_date))

        login_page = LoginPage(self._driver)
        home_page = login_page.login(self.username, self.password)

        log.info("Login successful. Loading bill history.")
        self.screenshot("post_login")
        bill_history_page = home_page.select_account(self.account_number)

        log.info("Loaded bill history page.")
        self.screenshot("bill_history")
        results = bill_history_page.gather_data(self.start_date, self.end_date)

        log.info("Obtained %s bill records and %s PDFs." %
                 (len(results), sum(1 for _, f in results if f is not None)))

        bills = []
        for bd, pdf_bytes in results:
            if pdf_bytes is None:
                bills.append(bd)
                continue

            key = bill_upload.hash_bill_datum(self.account_number, bd)
            attachment_entry = bill_upload.upload_bill_to_s3(
                BytesIO(pdf_bytes),
                key,
                statement=bd.statement,
                source="hudsonenergy.net",
                utility=self.utility,
                utility_account_id=self.account_number,
            )
            if attachment_entry:
                bills.append(bd._replace(attachments=[attachment_entry]))
            else:
                bills.append(bd)

        final_bills = adjust_bill_dates(bills)
        return Results(bills=final_bills)
Exemplo n.º 6
0
    def get_bills(self, account_id: str, start: date,
                  end: date) -> List[BillingDatum]:
        """Get bills from the table.

        for each row:
          get end from Read date column (date)
          get start date from end date - (Days column (date) - 1)
          get statement date from Bill date column (date)
          if not start - end overlaps passed in start / end, continue
          get peak from On-peak Billed kW (float)
          get used from (Off-peak kWh + Shoulder kWh + On-peak kWh) (float)
          get cost from New charges (float)
          click eye icon to download PDF; wait for download to complete to self.driver.download_dir
        """
        WebDriverWait(self.driver, 10).until(
            EC.presence_of_element_located(self.UsageTableBodyLocator))
        usage_table_rows = self.driver.find_elements(
            *self.UsageTableRowsLocator)

        bill_data: List[BillingDatum] = []
        self.driver.screenshot(BaseWebScraper.screenshot_path("bill table"))
        for row in usage_table_rows:
            cols = row.find_elements_by_tag_name("td")
            cols = [
                c for c in cols
                if "display: none" not in c.get_attribute("style")
            ]

            col = lambda x: cols[x].text
            to_num = lambda x: "".join(d for d in col(x)
                                       if d.isdigit() or d == ".")
            to_float = lambda x: float(to_num(x)) if len(to_num(x)) > 0 else 0

            log.debug(f"statement={col(1)} end={col(2)} days={col(7)}")
            # statement date
            statement_date = date_parser.parse(col(1)).date()

            # bill end
            period_year = statement_date.year
            if statement_date.month == 1 and col(2).startswith("12"):
                period_year = statement_date.year - 1
            end_str = f"{col(2)}/{period_year}"
            bill_end = date_parser.parse(end_str).date()

            # bill start
            bill_start = bill_end - timedelta(days=int(to_float(7)) - 1)
            log.debug(f"start={bill_start} end={bill_end}")

            if not self._overlap(start, end, bill_start, bill_end):
                log.info(
                    f"skipping bill {bill_start} - {bill_end}: does not overlap requested range {start} - {end}"
                )
                continue

            # cost
            new_charges = to_float(8)
            # used
            used = to_float(4) + to_float(5) + to_float(6)
            # peak
            peak = to_float(3)

            bill_datum = BillingDatum(
                start=bill_start,
                end=bill_end,
                statement=statement_date,
                cost=new_charges,
                used=used,
                peak=peak,
                items=None,
                attachments=None,
                utility_code=None,
            )

            try:
                bill_pdf_name = "SRPbill{}{}.pdf".format(
                    statement_date.strftime("%B"), statement_date.year)
                pdf_download_link = cols[0].find_element_by_tag_name("a")
                scroll_to(self.driver, pdf_download_link)
                pdf_download_link.click()
                log.info("looking for %s in %s", bill_pdf_name,
                         self.driver.download_dir)
                self.driver.wait(60).until(
                    file_exists_in_dir(self.driver.download_dir,
                                       bill_pdf_name))
            except Exception as e:
                raise Exception(
                    f"Failed to download bill {bill_pdf_name} for statement date {statement_date}:\n {e}"
                )
            log.info(
                f"Bill {bill_pdf_name} for statement date {statement_date} downloaded successfully"
            )

            attachment_entry = None
            # open downloaded PDF and upload
            if config.enabled("S3_BILL_UPLOAD"):
                key = hash_bill_datum(account_id, bill_datum)
                with open(f"{self.driver.download_dir}/{bill_pdf_name}",
                          "rb") as pdf_data:
                    attachment_entry = upload_bill_to_s3(
                        BytesIO(pdf_data.read()),
                        key,
                        source="myaccount.srpnet.com",
                        statement=bill_datum.statement,
                        utility="utility:salt-river-project",
                        utility_account_id=account_id,
                    )
            if attachment_entry:
                bill_data.append(
                    bill_datum._replace(attachments=[attachment_entry]))
            else:
                bill_data.append(bill_datum)
        return bill_data
    def _execute(self):
        if self.end_date - self.start_date < timedelta(days=MINIMUM_BILL_DAYS):
            log.info(
                f"Expanding date range to a minimum of {MINIMUM_BILL_DAYS} days."
            )
            self.start_date = self.end_date - timedelta(days=MINIMUM_BILL_DAYS)

        start_date = max(self.start_date,
                         (datetime.now() - relativedelta(years=10)).date())
        end_date = min(self.end_date, (datetime.now().date()))

        log.info("Final date range to search: %s - %s" %
                 (start_date, end_date))

        login_page = LoginPage(self._driver)
        home_page = login_page.login(self.username, self.password)
        self.screenshot("home_screen")
        log.info("Login successful.")

        bill_history_page = home_page.to_bill_history()
        self.screenshot("bill_history_page")
        log.info("Loaded bill history.")

        bill_history_page.select_account(self.account_number)
        self.screenshot("account_selected")
        log.info("Selected account.")

        bill_history_page.set_dates(start_date, end_date)
        self.screenshot("dates_selected")
        log.info("Selected dates.")

        raw_pdfs = bill_history_page.gather_data()

        log.info("PDF bills captured: %s" % len(raw_pdfs))
        log.info("Net bill pdf bytes captured: %s" %
                 (sum(len(x) for x in raw_pdfs)))

        ii = 0
        bill_data = []
        for b in raw_pdfs:
            ii += 1
            bill_datum = parse_bill_pdf(BytesIO(b), self.meter_number)

            if bill_datum is None:
                log.info("There was a problem parsing a bill PDF #%d." % ii)
                continue

            attachment_entry = None
            if config.enabled("S3_BILL_UPLOAD"):
                key = bill_upload.hash_bill_datum(self.meter_number,
                                                  bill_datum)
                attachment_entry = bill_upload.upload_bill_to_s3(
                    BytesIO(b),
                    key,
                    source="pacificpower.net",
                    statement=bill_datum.statement,
                    utility=self.utility,
                    utility_account_id=self.account_number,
                )

            if attachment_entry:
                bill_data.append(
                    bill_datum._replace(attachments=[attachment_entry]))
            else:
                bill_data.append(bill_datum)

        final_bills = adjust_bill_dates(bill_data)
        show_bill_summary(final_bills, "Final Bill Summary")
        return Results(bills=final_bills)