def download_and_attach_pdf( self, bill_data: BillingDatum, billing_row: sce_pages.BillingDataRow ) -> BillingDatum: self.clear_pdf_downloads() bill_path = self.download_pdf_for_billing_row(billing_row) if bill_path: with open(bill_path, "rb") as bill_file: key = bill_upload.hash_bill_datum(self.service_id, bill_data) + ".pdf" return bill_data._replace( attachments=[ bill_upload.upload_bill_to_s3( bill_file, key, statement=bill_data.statement, source="sce.com", utility=self.utility, utility_account_id=self.utility_account_id, ) ] ) else: log.info( "No pdf bill was available for this period: %s to %s", bill_data.start, bill_data.end, ) return bill_data
def adjust_billing_datum_type(bill: BillingDatum): """ Handles when datetimes are passed into BillingDatum. Replaces start/end on the BillingDatum object with dates instead of datetimes, if applicable """ bill_start = bill.start bill_end = bill.end if type(bill_start) == datetime: bill_start = bill_start.date() # type: ignore if type(bill_end) == datetime: bill_end = bill_end.date() # type: ignore return bill._replace(start=bill_start, end=bill_end)
def make_billing_datum(self, bill_detail: BillPeriodDetails) -> BillingDatum: """Convert a billing detail summary from the website to a Gridium BillingDatum object""" # get statement date from link: Date=yyyy-mm-dd date_re = re.compile(r"Date=(\d\d\d\d-\d\d-\d\d)") match = ( date_re.search(bill_detail.download_link) if bill_detail.download_link else None ) statement = None if match: try: statement = parse_date(match.group(1)).date() except Exception as exc: log.warning("error parsing date %s: %s", match.group(1), exc) if not statement: statement = bill_detail.end bill_datum = BillingDatum( start=bill_detail.start, end=bill_detail.end, statement=statement, cost=bill_detail.total_charges, used=bill_detail.total_kwh, peak=bill_detail.max_kw, items=None, attachments=None, utility_code=bill_detail.utility_code, ) pdf_bytes = self.download_pdf(bill_detail) if pdf_bytes: key = bill_upload.hash_bill_datum(self.account_id, bill_datum) attachment_entry = bill_upload.upload_bill_to_s3( BytesIO(pdf_bytes), key, source="smud.org", statement=statement, utility=self.utility, utility_account_id=self.account_id, ) if attachment_entry: bill_datum = bill_datum._replace(attachments=[attachment_entry]) return bill_datum
def get_bills(self, utility: str, utility_account_id: str) -> List[BillingDatum]: billing_data = [] available_dates = self.driver.find_elements( By.CSS_SELECTOR, "table.table-alt a.bill-view-link") available_dates = [parse_date(i.text).date() for i in available_dates] log.info("available dates: %s", [dt.strftime("%Y-%m-%d") for dt in available_dates]) xpath_locators = { # Finds the last KWH reading under Total Usage column "cost": "//table[contains(., 'NEW CHARGES')]/tbody/tr/td[3]", "used": "(//table[contains(.,'USAGE')]//tr/td[contains(., 'KWH')])", "usage_kw": "//table[contains(.,'USAGE')]//tr/td[contains(.,'KW') and not(contains(.,'KWH'))]", } # loop through dates in table in ascending order for pdf_date in reversed(available_dates): # skip if the date isn't in the specified range if not (self.start_date <= pdf_date <= self.end_date): log.debug("skipping date outside range: %s", pdf_date) continue view_bill_link = self.driver.find_element_by_xpath( '//a[.="%s"]' % pdf_date.strftime("%m/%d/%Y")) scroll_to(self.driver, view_bill_link) self.driver.sleep(0.5) view_bill_link.click() self.driver.wait(30).until( EC.visibility_of_element_located( (By.CSS_SELECTOR, "div.billImage"))) start_date = None end_date = None cost = None used = None peak = None dates_line_text: str = self.driver.find_element_by_xpath( "//td[contains(., 'Service From:')]").text dates_match = re.search( r"Service From: (?P<from>\w+ \d\d) to (?P<to>\w+ \d\d) \(\d\d Days\)", dates_line_text, ) if dates_match: # if from month is December, use previous year year = (pdf_date.year - 1 if "dec" in dates_match.group("from").lower() else pdf_date.year) start_date = parse_date("%s %s" % (dates_match.group("from"), year)) end_date = parse_date( dates_match.group("to") + pdf_date.strftime(" %Y")) cost_match = self.driver.find(xpath_locators["cost"], xpath=True) if cost_match: cost = cost_match.text cost = float(cost.replace("$", "").replace(",", "")) kwh_usages = [] for match in self.driver.find_all(xpath_locators["used"], xpath=True): # include only if it has a reading values as siblings; exclude credit line items parent = match.find_element_by_xpath("..") # meter number, previous reading, current reading readings_text = "" for idx, child in enumerate( parent.find_elements_by_xpath(".//td")): log.debug("\t%s\t%s", idx, child.text.strip()) readings_text += child.text.strip() if idx == 2: break if not readings_text: log.info("skipping non-reading line item: %s", parent.text) continue kwh_value = float( match.text.replace("KWH", "").replace(",", "").strip()) kwh_usages.append(kwh_value) if kwh_usages: used = sum(kwh_usages) kw_usages = [] for usage_kw_match in self.driver.find_all( xpath_locators["usage_kw"], xpath=True): kw_usages.append( float( usage_kw_match.text.replace("KW", "").replace(",", "").strip())) if kw_usages: peak = max(kw_usages) data = BillingDatum( start=start_date, end=end_date - timedelta(days=1), statement=end_date - timedelta(days=1), cost=cost, peak=peak, used=used, items=None, attachments=None, utility_code=None, ) self.driver.find("a#billImageToPrint").click() self.driver.sleep(1) self.driver.switch_to.window(self.driver.window_handles[-1]) # the filename of the printed pdf is f"{current page title}.pdf" self.driver.execute_script("window.print();") try: file_exists_in_dir(directory=self.download_dir, pattern=r"^Bill View Bill Image.pdf$") except Exception: raise Exception("Unable to download file for %s" % pdf_date) curr_path = os.path.join(self.download_dir, "Bill View Bill Image.pdf") new_path = os.path.join( self.download_dir, f"bill_{pdf_date.strftime('%Y-%m-%d')}.pdf") os.rename(curr_path, new_path) log.info("parsed bill for %s - %s", data.start, data.end) self.driver.find("a#close").click() self.driver.sleep(1) self.driver.switch_to.window(self.driver.window_handles[-1]) self.driver.sleep(1) # upload PDF: key = hash_bill( utility_account_id, data.start, data.end, data.cost, data.peak, data.used, ) with open(new_path, "rb") as pdf_data: attachment_entry = upload_bill_to_s3( BytesIO(pdf_data.read()), key, source="www.duke-energy.com", statement=data.end, utility=utility, utility_account_id=utility_account_id, ) if attachment_entry: data = data._replace(attachments=[attachment_entry]) billing_data.append(data) # Click Bill Information in breadcrumbs to go back to bills list page self.driver.find("a#billInformation").click() return billing_data
def get_bills(self, account_id: str, start: date, end: date) -> List[BillingDatum]: """Get bills from the table. for each row: get end from Read date column (date) get start date from end date - (Days column (date) - 1) get statement date from Bill date column (date) if not start - end overlaps passed in start / end, continue get peak from On-peak Billed kW (float) get used from (Off-peak kWh + Shoulder kWh + On-peak kWh) (float) get cost from New charges (float) click eye icon to download PDF; wait for download to complete to self.driver.download_dir """ WebDriverWait(self.driver, 10).until( EC.presence_of_element_located(self.UsageTableBodyLocator)) usage_table_rows = self.driver.find_elements( *self.UsageTableRowsLocator) bill_data: List[BillingDatum] = [] self.driver.screenshot(BaseWebScraper.screenshot_path("bill table")) for row in usage_table_rows: cols = row.find_elements_by_tag_name("td") cols = [ c for c in cols if "display: none" not in c.get_attribute("style") ] col = lambda x: cols[x].text to_num = lambda x: "".join(d for d in col(x) if d.isdigit() or d == ".") to_float = lambda x: float(to_num(x)) if len(to_num(x)) > 0 else 0 log.debug(f"statement={col(1)} end={col(2)} days={col(7)}") # statement date statement_date = date_parser.parse(col(1)).date() # bill end period_year = statement_date.year if statement_date.month == 1 and col(2).startswith("12"): period_year = statement_date.year - 1 end_str = f"{col(2)}/{period_year}" bill_end = date_parser.parse(end_str).date() # bill start bill_start = bill_end - timedelta(days=int(to_float(7)) - 1) log.debug(f"start={bill_start} end={bill_end}") if not self._overlap(start, end, bill_start, bill_end): log.info( f"skipping bill {bill_start} - {bill_end}: does not overlap requested range {start} - {end}" ) continue # cost new_charges = to_float(8) # used used = to_float(4) + to_float(5) + to_float(6) # peak peak = to_float(3) bill_datum = BillingDatum( start=bill_start, end=bill_end, statement=statement_date, cost=new_charges, used=used, peak=peak, items=None, attachments=None, utility_code=None, ) try: bill_pdf_name = "SRPbill{}{}.pdf".format( statement_date.strftime("%B"), statement_date.year) pdf_download_link = cols[0].find_element_by_tag_name("a") scroll_to(self.driver, pdf_download_link) pdf_download_link.click() log.info("looking for %s in %s", bill_pdf_name, self.driver.download_dir) self.driver.wait(60).until( file_exists_in_dir(self.driver.download_dir, bill_pdf_name)) except Exception as e: raise Exception( f"Failed to download bill {bill_pdf_name} for statement date {statement_date}:\n {e}" ) log.info( f"Bill {bill_pdf_name} for statement date {statement_date} downloaded successfully" ) attachment_entry = None # open downloaded PDF and upload if config.enabled("S3_BILL_UPLOAD"): key = hash_bill_datum(account_id, bill_datum) with open(f"{self.driver.download_dir}/{bill_pdf_name}", "rb") as pdf_data: attachment_entry = upload_bill_to_s3( BytesIO(pdf_data.read()), key, source="myaccount.srpnet.com", statement=bill_datum.statement, utility="utility:salt-river-project", utility_account_id=account_id, ) if attachment_entry: bill_data.append( bill_datum._replace(attachments=[attachment_entry])) else: bill_data.append(bill_datum) return bill_data