def _execute(self): login_page = LoginPage(self._driver) home_page = login_page.login(self.username, self.password) self.screenshot("home_page") bill_history_page = home_page.to_bill_history() bill_history_page.set_dates(self.start_date, self.end_date) self.screenshot("bill_history") history = bill_history_page.gather_data() pdf_bytes = sum(len(t[0]) for t in history if t[0]) xls_bytes = sum(len(t[1]) for t in history if t[1]) pdfs = sum(1 for t in history if t[0]) xls = sum(1 for t in history if t[1]) log.info( "Acquired %s pdfs (%s bytes) and %s excel files (%s bytes)." % (pdfs, pdf_bytes, xls, xls_bytes) ) bills = [] for pdf, xls in history: bill_data = [] if xls is not None: bill_data = bill_data_from_xls(xls, self.service_account) elif pdf is not None: bill_data = bill_data_from_pdf( pdf, self.service_account, self.meter_serial ) if pdf is not None and bill_data: bill_data_prime = [] for bill_datum in bill_data: key = bill_upload.hash_bill_datum(self.service_account, bill_datum) # statement date is not visible in the bill PDF text; use end date attachment_entry = bill_upload.upload_bill_to_s3( BytesIO(pdf), key, source="atmosenergy.com", statement=bill_datum.end, utility=self.utility, utility_account_id=self.utility_account_id, ) if attachment_entry: bill_data_prime.append( bill_datum._replace(attachments=[attachment_entry]) ) else: bill_data_prime.append(bill_datum) bill_data = bill_data_prime if bill_data: bills += bill_data final_bills = adjust_bill_dates(bills) return Results(bills=final_bills)
def _execute(self): """Define, run and return the results from running this state machine.""" state_machine = self.define_state_machine() final_state = state_machine.run() if final_state == "done": final_bills = adjust_bill_dates(self.billing_history) return Results(bills=final_bills) raise errors.BillingScraperException( "The scraper did not reach a finished state; " "this will require developer attention.")
def _execute(self): login_page = LoginPage(self._driver) home_page = login_page.login(self.keller_id, self.password) self.screenshot("home_page") bill_history_page = home_page.to_bill_history() self.screenshot("bill_history_page") bills = bill_history_page.gather_data(self.keller_id, self.start_date, self.end_date) log.info("Acquired %d bills (%s bytes total)." % (len(bills), sum(len(b) for b in bills))) bill_data = [] for b in bills: bill_datum = parse_bill_pdf(BytesIO(b)) if bill_datum is None: continue key = bill_upload.hash_bill_datum(self.account_number, bill_datum) # bill doesn't have a statement date; use end date attachment_entry = bill_upload.upload_bill_to_s3( BytesIO(b), key, statement=bill_datum.end, source="cityofkeller.com", utility=self.utility, utility_account_id=self.account_number, ) if attachment_entry: bill_data.append( bill_datum._replace(attachments=[attachment_entry])) else: bill_data.append(bill_datum) # bill periods overlap; adjust start dates adjusted_bill_data = [] for bill in bill_data: adjusted_bill_data.append( BillingDatum( start=bill.start + timedelta(days=1), end=bill.end, statement=bill.statement, cost=bill.cost, used=bill.used, peak=bill.peak, items=bill.items, attachments=bill.attachments, utility_code=None, )) final_bills = adjust_bill_dates(adjusted_bill_data) show_bill_summary(final_bills, "Final Bill Summary") return Results(bills=final_bills)
def _execute(self): if self.end_date - self.start_date < timedelta(days=90): self.start_date = self.end_date - timedelta(days=90) log.info( "Initial time window was too narrow for this utility. Expanding time window to: %s - %s" % (self.start_date, self.end_date)) login_page = LoginPage(self._driver) home_page = login_page.login(self.username, self.password) log.info("Login successful. Loading bill history.") self.screenshot("post_login") bill_history_page = home_page.select_account(self.account_number) log.info("Loaded bill history page.") self.screenshot("bill_history") results = bill_history_page.gather_data(self.start_date, self.end_date) log.info("Obtained %s bill records and %s PDFs." % (len(results), sum(1 for _, f in results if f is not None))) bills = [] for bd, pdf_bytes in results: if pdf_bytes is None: bills.append(bd) continue key = bill_upload.hash_bill_datum(self.account_number, bd) attachment_entry = bill_upload.upload_bill_to_s3( BytesIO(pdf_bytes), key, statement=bd.statement, source="hudsonenergy.net", utility=self.utility, utility_account_id=self.account_number, ) if attachment_entry: bills.append(bd._replace(attachments=[attachment_entry])) else: bills.append(bd) final_bills = adjust_bill_dates(bills) return Results(bills=final_bills)
def _execute(self): if self.end_date - self.start_date < timedelta(days=MINIMUM_BILL_DAYS): log.info( f"Expanding date range to a minimum of {MINIMUM_BILL_DAYS} days." ) self.start_date = self.end_date - timedelta(days=MINIMUM_BILL_DAYS) start_date = max(self.start_date, (datetime.now() - relativedelta(years=10)).date()) end_date = min(self.end_date, (datetime.now().date())) log.info("Final date range to search: %s - %s" % (start_date, end_date)) login_page = LoginPage(self._driver) home_page = login_page.login(self.username, self.password) self.screenshot("home_screen") log.info("Login successful.") bill_history_page = home_page.to_bill_history() self.screenshot("bill_history_page") log.info("Loaded bill history.") bill_history_page.select_account(self.account_number) self.screenshot("account_selected") log.info("Selected account.") bill_history_page.set_dates(start_date, end_date) self.screenshot("dates_selected") log.info("Selected dates.") raw_pdfs = bill_history_page.gather_data() log.info("PDF bills captured: %s" % len(raw_pdfs)) log.info("Net bill pdf bytes captured: %s" % (sum(len(x) for x in raw_pdfs))) ii = 0 bill_data = [] for b in raw_pdfs: ii += 1 bill_datum = parse_bill_pdf(BytesIO(b), self.meter_number) if bill_datum is None: log.info("There was a problem parsing a bill PDF #%d." % ii) continue attachment_entry = None if config.enabled("S3_BILL_UPLOAD"): key = bill_upload.hash_bill_datum(self.meter_number, bill_datum) attachment_entry = bill_upload.upload_bill_to_s3( BytesIO(b), key, source="pacificpower.net", statement=bill_datum.statement, utility=self.utility, utility_account_id=self.account_number, ) if attachment_entry: bill_data.append( bill_datum._replace(attachments=[attachment_entry])) else: bill_data.append(bill_datum) final_bills = adjust_bill_dates(bill_data) show_bill_summary(final_bills, "Final Bill Summary") return Results(bills=final_bills)