def bill_history(self, urja_data: UrjanetData) -> DateIntervalTree: filtered_accounts = self.filtered_accounts(urja_data) ordered_accounts = self.ordered_accounts(filtered_accounts) # For each account, create a billing period, taking care to detect overlaps # (e.g. in the case that a correction bill in issued) bill_history = DateIntervalTree() for account in ordered_accounts: # If no meters associated with the account, this might be the final bill. resource = account.meters[0] if account.meters else account if bill_history.overlaps(resource.IntervalStart, resource.IntervalEnd): log.debug( "Skipping overlapping billing period: meter_pk={}, start={}, end={}" .format(resource.PK, resource.IntervalStart, resource.IntervalEnd)) else: """ NVE bills are issued with overlapping date ranges: Nov 30, 2019 to Dec 31, 2019 31 days Dec 31, 2019 to Jan 31, 2020 31 days Comparison with interval data shows that data matches the calendar month; adjust the start date. """ bill_history.add( resource.IntervalStart + timedelta(days=1), resource.IntervalEnd, self.billing_period(account), ) return bill_history
def get_best_interval_fits(self, bill_history: DateIntervalTree, begin: date, end: date) -> List[Interval]: """Find the best matches for a given date range in the given interval tree This is a little fuzzier than we might like, because PG&E tends to shift start/end dates by one day somewhat arbitrarily. We account for this by allowing a date range to match an interval in the bill history tree if the start/end dates are within one day of the tree interval. """ overlaps = bill_history.range_query(begin, end) if not overlaps: adjusted_end = end + timedelta(days=1) # Try moving end date forward one day overlaps = bill_history.range_query(begin, adjusted_end) if not overlaps: # try moving start date back one day adjusted_start = begin - timedelta(days=1) overlaps = bill_history.range_query(adjusted_start, end) if not overlaps: return [] candidates = [] for overlap in overlaps: if begin >= overlap.begin and end <= overlap.end: candidates.append(overlap) else: start_delta = abs((overlap.begin - begin).days) end_delta = abs((overlap.end - end).days) if start_delta <= 1 and end_delta <= 1: candidates.append(overlap) return candidates
def fill_in_usage_gaps(self, meter: Meter, ival_tree: DateIntervalTree): """Creates an IntervalTree for the missing usage. If missing usage data from Urja XML, create an Interval Tree with the Meter Date Range. We still might be able to extract Charge Data, even if the Usage is missing. With billing streams, the T&D bill's usage persists to the final bill, so for PG&E, SMD usage will be used anyway. """ if not meter.usages: ival_tree.add(meter.IntervalStart, meter.IntervalEnd)
def shift_endpoints(self, history: DateIntervalTree): """Fixes periods where the start and end are the same. For CCA charges, shifting the end date backwards for disputes. If shifting the end date backwards causes a one-day interval, shifts the start date backward as well. """ return DateIntervalTree.shift_endpoints_end(history)
def merge_statement_data(self, bill_history: DateIntervalTree, urja_account: Account) -> None: statement_data: Dict[Interval, LosAngelesWaterBillingPeriod] = defaultdict( functools.partial( LosAngelesWaterBillingPeriod, urja_account)) for meter in urja_account.meters: for charge in meter.charges: periods = bill_history.point_query(charge.IntervalStart) if len(periods) == 1: period = periods[0] statement_data[period].add_utility_charge(charge) elif not periods: log.debug( "Charge doesn't belong to a known billing period, skipping:" ) log_charge(log, charge, indent=1) else: log.debug( "Charge maps to multiple billing periods, skipping:") log_charge(log, charge, indent=1) for usage in meter.usages: periods = bill_history.point_query(usage.IntervalStart) if len(periods) == 1: period = periods[0] statement_data[period].add_usage(usage) elif not periods: log.debug( "Usage doesn't belong to a known billing period, skipping:" ) log_usage(log, usage, indent=1) else: log.debug( "Usage maps to multiple billing periods, skipping:") log_usage(log, usage, indent=1) for period, new_data in statement_data.items(): cur_data = period.data data_added = cur_data.merge(new_data) if data_added: cur_data.add_source_statement(urja_account)
def bill_history(self, urja_data: UrjanetData) -> DateIntervalTree: filtered_accounts = self.filtered_accounts(urja_data) ordered_accounts = self.ordered_accounts(filtered_accounts) bill_history = DateIntervalTree() for account in ordered_accounts: if bill_history.overlaps(account.IntervalStart, account.IntervalEnd): log.debug( "Skipping overlapping billing period: account_pk={}, start={}, end={}" .format(account.PK, account.IntervalStart, account.IntervalEnd)) else: log.debug( "Adding billing period: account_pk={}, start={}, end={}". format(account.PK, account.IntervalStart, account.IntervalEnd)) # Clean Power Alliance End PDF dates are one day too far in the future. bill_history.add( account.IntervalStart, account.IntervalEnd - timedelta(days=1), self.billing_period(account), ) # Adjust date endpoints to avoid 1-day overlaps bill_history = DateIntervalTree.shift_endpoints(bill_history) # Log the billing periods we determined log_generic_billing_periods(bill_history) return bill_history
def bill_history(self, urja_data: UrjanetData) -> DateIntervalTree: filtered_accounts = self.filtered_accounts(urja_data) ordered_accounts = self.ordered_accounts(filtered_accounts) # For each account, create a billing period, taking care to detect overlaps # (e.g. in the case that a correction bill in issued) bill_history = DateIntervalTree() for account in ordered_accounts: if bill_history.overlaps(account.IntervalStart, account.IntervalEnd): log.debug( "Skipping overlapping billing period: account_pk={}, start={}, end={}".format( account.PK, account.IntervalStart, account.IntervalEnd ) ) else: log.debug( "Adding billing period: account_pk={}, start={}, end={}".format( account.PK, account.IntervalStart, account.IntervalEnd ) ) bill_history.add( account.IntervalStart, account.IntervalEnd, self.billing_period(account), ) # Adjust date endpoints to avoid 1-day overlaps bill_history = DateIntervalTree.shift_endpoints(bill_history) # Log the billing periods we determined log_generic_billing_periods(bill_history) return bill_history
def get_account_billing_periods(self, account: Account) -> DateIntervalTree: ival_tree = DateIntervalTree() for meter in account.meters: ival_tree.add(meter.IntervalStart, meter.IntervalEnd) ival_tree.merge_overlaps() return ival_tree
def test_shift_endpoints(self): tree = DateIntervalTree() # default behavior # for: 2019-02-01 - 2019-02-28, 2019-02-28 - 2019-03-31 # to: 2019-02-01 - 2019-02-27, 2020-02-28 - 2019-03-31 tree.add(date(2019, 2, 28), date(2019, 3, 31), None) tree.add(date(2019, 2, 1), date(2019, 2, 28), None) shifted = sorted(DateIntervalTree.shift_endpoints(tree).intervals()) self.assertEqual(date(2019, 2, 1), shifted[0].begin) self.assertEqual(date(2019, 2, 27), shifted[0].end) self.assertEqual(date(2019, 2, 28), shifted[1].begin) self.assertEqual(date(2019, 3, 31), shifted[1].end)
def get_best_interval_fits(self, bill_history: DateIntervalTree, begin: date, end: date) -> Optional[Interval]: """Find the best matches for a given date range in the given interval tree If no immediate overlaps, we try adjusting the start date back one day, and then we try adjusting the end date forward one day. The Interval with the largest number of overlapping days is returned. """ overlaps = bill_history.range_query(begin, end) if not overlaps: # try moving start date back one day adjusted_start = begin - timedelta(days=1) overlaps = bill_history.range_query(adjusted_start, end) if not overlaps: adjusted_end = end + timedelta(days=1) # Try moving end date forward one day overlaps = bill_history.range_query(begin, adjusted_end) if not overlaps: return None def days_overlap(start, stop, interval): start_overlap = max(interval.begin, start) end_overlap = min(interval.end, stop) return end_overlap - start_overlap + timedelta(days=1) max_overlapping = timedelta(0) candidate = None for overlap in overlaps: amt_overlap = days_overlap(begin, end, overlap) if amt_overlap > max_overlapping: candidate = overlap max_overlapping = amt_overlap return candidate
def test_shift_endpoints_start(self): tree = DateIntervalTree() # shift start # for: 2019-02-01 - 2019-02-28, 2019-02-28 - 2019-03-31 # to: 2019-02-01 - 2019-02-28, 2020-03-01 - 2019-03-31 tree.add(date(2019, 2, 28), date(2019, 3, 31), None) tree.add(date(2019, 2, 1), date(2019, 2, 28), None) shifted = sorted( DateIntervalTree.shift_endpoints_start(tree).intervals()) print(shifted) self.assertEqual(date(2019, 2, 1), shifted[0].begin) self.assertEqual(date(2019, 2, 28), shifted[0].end) self.assertEqual(date(2019, 3, 1), shifted[1].begin) self.assertEqual(date(2019, 3, 31), shifted[1].end)
def urja_to_gridium( self, urja_data: UrjanetData) -> GridiumBillingPeriodCollection: """Transform urjanet data into Gridium billing periods""" # Process the account objects in reverse order by statement date. The main motivation here is corrections; # we want to process the most recent billing date first, and ignore earlier data for those same dates. ordered_accounts = sorted(urja_data.accounts, key=lambda x: x.StatementDate, reverse=True) # First, we rough out the billing period dates, by iterating through the ordered accounts and pulling out # usage periods bill_history = DateIntervalTree() for account in ordered_accounts: usage_periods = self.get_account_billing_periods(account) for ival in sorted(usage_periods.intervals(), reverse=True): if bill_history.overlaps(ival.begin, ival.end): log.debug( "Skipping overlapping usage period: account_pk={}, start={}, end={}" .format(account.PK, ival.begin, ival.end)) else: log.debug("Adding usage period: %s - %s", ival.begin, ival.end) bill_history.add(ival.begin, ival.end, self.billing_period_class(account)) # fix periods where start/end are the same bill_history = self.shift_endpoints(bill_history) # Next, we go through the accounts again and insert relevant charge/usage information into the computed # billing periods for account in ordered_accounts: self.merge_statement_data(bill_history, account) # Convert the billing periods into the expected "gridium" format gridium_periods = [] for ival in sorted(bill_history.intervals()): period_data = ival.data gridium_periods.append( GridiumBillingPeriod( start=ival.begin, end=ival.end, statement=period_data.statement(), total_charge=period_data.get_total_charge(), peak_demand=period_data.get_peak_demand(), total_usage=period_data.get_total_usage(), source_urls=period_data.get_source_urls(), line_items=(period_data.utility_charges + period_data.third_party_charges), tariff=period_data.tariff(), service_id=period_data.get_service_id(), utility=period_data.get_utility(), utility_account_id=period_data.get_utility_account_id(), )) return GridiumBillingPeriodCollection(periods=gridium_periods)
def urja_to_gridium( self, urja_data: UrjanetData) -> GridiumBillingPeriodCollection: """Transform Urjanet data for water bills into Gridium billing periods""" filtered_accounts = self.filtered_accounts(urja_data) ordered_accounts = self.ordered_accounts(filtered_accounts) # For each account, create a billing period, taking care to detect overlaps (e.g. in the case that a # correction bill in issued) bill_history = DateIntervalTree() for account in ordered_accounts: period_start, period_end = self.get_account_period(account) if bill_history.overlaps(period_start, period_end): log.debug( "Skipping overlapping billing period: account_pk={}, start={}, end={}" .format(account.PK, period_start, period_end)) else: log.debug( "Adding billing period: account_pk={}, start={}, end={}". format(account.PK, period_start, period_end)) bill_history.add(period_start, period_end, self.billing_period(account)) # Adjust date endpoints to avoid 1-day overlaps bill_history = DateIntervalTree.shift_endpoints(bill_history) # Log the billing periods we determined log_generic_billing_periods(bill_history) # Compute the final set of gridium billing periods gridium_periods = [] for ival in sorted(bill_history.intervals()): period_data = ival.data gridium_periods.append( GridiumBillingPeriod( start=ival.begin, end=ival.end, statement=period_data.statement(), total_charge=period_data.get_total_charge(), peak_demand=None, # No peak demand for water total_usage=period_data.get_total_usage(), source_urls=period_data.get_source_urls(), line_items=list(period_data.iter_charges()), tariff=period_data.tariff(), )) return GridiumBillingPeriodCollection(periods=gridium_periods)
def log_generic_billing_periods(bill_history: DateIntervalTree) -> None: """Helper function for logging data in an interval tree holding bill data""" log.debug("Billing periods") for ival in sorted(bill_history.intervals()): period_data = ival.data log.debug( "\t{} to {} ({} days)".format( ival.begin, ival.end, (ival.end - ival.begin).days ) ) log.debug("\t\tUtility Charges:") for chg in period_data.iter_charges(): log.debug( "\t\t\tAmt=${0}\tName='{1}'\tPK={2}\t{3}\t{4}".format( chg.ChargeAmount, chg.ChargeActualName, chg.PK, chg.IntervalStart, chg.IntervalEnd, ) ) log.debug("\t\tTotal Charge: ${}".format(period_data.get_total_charge())) log.debug("\t\tUsages:") for usg in period_data.iter_unique_usages(): log.debug( "\t\t\tAmt={0}{1}\tComponent={2}\tPK={3}\t{4}\t{5}".format( usg.UsageAmount, usg.EnergyUnit, usg.RateComponent, usg.PK, usg.IntervalStart, usg.IntervalEnd, ) ) log.debug("\t\tTotal Usage: {}".format(period_data.get_total_usage())) log.debug("\t\tStatements:") log.debug( "\t\t\t{0}\tPK={1}".format( period_data.account.SourceLink, period_data.account.PK ) )
def get_account_billing_periods( self, account: Account, max_duration: int = 45) -> DateIntervalTree: """Extract the usage periods for a given Urjanet Account object Recall that the Account object in Urjanet contains data about a given utility account within a single statement. This function takes an Account and attempts to compute the "local" billing timeline according to that account. The timeline is represented as a DateIntervalTree. The idea is that this "local" timeline will be merged with a "global" timeline representing the aggregated state of all Account objects. This function will filter out billing periods that are longer than a certain threshold, according to the max_duration argument. """ ival_tree = DateIntervalTree() for meter in account.meters: for usage in meter.usages: if self.consider_usage(usage): usage_start = usage.IntervalStart usage_end = usage.IntervalEnd duration = self.get_duration(usage_start, usage_end) # if the total is a single day, use the Meter date range instead if duration == 0: log.debug( "using meter data range for zero-length usage %s", usage.PK) usage_start = meter.IntervalStart usage_end = meter.IntervalEnd if self.get_duration(usage_start, usage_end) == 0: log.info( "Unable to use meter data range for zero-length usage %s", usage.PK, ) continue if max_duration and duration > max_duration: log.debug( "Filtering long usage period: {} to {} ({} days)". format(usage_start, usage_end, duration)) ival_tree.add(usage_start, usage_end) self.fill_in_usage_gaps(meter, ival_tree) ival_tree.merge_overlaps() return ival_tree
def bill_history(self, urja_data: UrjanetData) -> DateIntervalTree: filtered_accounts = self.filtered_accounts(urja_data) ordered_accounts = self.ordered_accounts(filtered_accounts) # For each account, create a billing period, taking care to detect overlaps # (e.g. in the case that a correction bill in issued) bill_history = DateIntervalTree() for account in ordered_accounts: """ SDGE bills are issued with overlapping date ranges: Billing Period: 9/30/19 - 10/31/19 Total Days: 31 Billing Period: 10/31/19 - 11/30/19 Total Days: 30 Comparing one meter with interval data: select sum(reading::text::decimal)/4 from ( select json_array_elements(mr.readings) reading from meter_reading mr where meter=1971049865238 and occurred > '2019-09-30' and occurred <= '2019-10-31' ) r; The interval data for 2019-10-01 - 2019-10-31 closely matches the bill; 2019-09-30 - 2019-10-30 does not. Add +1 to the the IntervalStart for the billing period start date """ start = account.IntervalStart + timedelta(days=1) end = account.IntervalEnd # account.meters is the list of bill parts that apply to this Gridium meter # Meter.MeterNumber like %meter.utility_service.service_id% # if there aren't any, this bill doesn't apply; skip it if not account.meters: log.debug( "Skipping billing period; no data for this meter: account_pk={}, start={}, " "end={}".format(account.PK, start, end)) continue if bill_history.overlaps(start, end, strict=False): # try getting date range from Meter start = account.meters[0].IntervalStart + timedelta(days=1) end = account.meters[0].IntervalEnd log.debug( "Billing period overlap: {} - {}; trying with date range from usage: {} " "- {}".format( account.IntervalStart + timedelta(days=1), account.IntervalEnd, start, end, )) # if it still overlaps, skip it if bill_history.overlaps(start, end, strict=False): log.debug( "Skipping overlapping billing period: account_pk={}, start={}, end={}" .format(account.PK, start, end)) continue if (end - start).days > 45: # if bill is too long, it's likely a correction; get billing periods from usages for meter in account.meters: for usage in [ u for u in meter.usages if u.RateComponent == "[total]" ]: log.debug( "Adding billing period from usage: account_pk={}, " "usage_pk={} start={}, end={}".format( account.PK, usage.PK, usage.IntervalStart, usage.IntervalEnd, )) # copy the account; keep only the relevant charges and usages account_copy = copy.copy(account) start = usage.IntervalStart end = usage.IntervalEnd keep_meters = [] for meter_copy in account_copy.meters: if meter_copy.PK != meter.PK: continue meter_copy.usages = [ u for u in meter.usages if u.IntervalStart == start and u.IntervalEnd == end ] meter_copy.charges = [ c for c in meter.charges if c.IntervalStart == start and c.IntervalEnd == end ] keep_meters.append(meter_copy) account_copy.meters = keep_meters bill_history.add( usage.IntervalStart + timedelta(days=1), # prevent overlapping usage.IntervalEnd, SDGEBillingPeriod(account_copy), ) else: log.debug( "Adding billing period: account_pk={}, start={}, end={}". format(account.PK, start, end)) bill_history.add( start, end, SDGEBillingPeriod(account), ) return bill_history
def bill_history(self, urja_data: UrjanetData) -> DateIntervalTree: """LADWP has two versions of where end dates fall: inclusive and exclusive. Currently, the end date is exclusive. The shift_endpoints will fix up the dates as needed within a series, but we also need to exclude the date of the latest bill. """ filtered_accounts = self.filtered_accounts(urja_data) ordered_accounts = self.ordered_accounts(filtered_accounts) # For each account, create a billing period, taking care to detect overlaps # (e.g. in the case that a correction bill in issued) bill_history = DateIntervalTree() for idx, account in enumerate(ordered_accounts): start_date = account.IntervalStart end_date = (account.IntervalEnd - timedelta(days=1) if idx == 0 else account.IntervalEnd) # account.meters is the list of bill parts that apply to this Gridium meter # if there aren't any, this bill doesn't apply; skip it if not account.meters: log.debug( "Skipping billing period; no data for this meter: account_pk={}, start={}, " "end={}".format(account.PK, account.IntervalStart, account.IntervalEnd)) continue meter = account.meters[0] if (bill_history.overlaps(start_date, end_date) and meter.IntervalEnd > meter.IntervalStart): # try using date range from Meter instead log.debug( "Account date range overlaps ({} - {}); trying Meter ({} - {})" .format(start_date, end_date, meter.IntervalStart, meter.IntervalEnd)) start_date = meter.IntervalStart end_date = meter.IntervalEnd if bill_history.overlaps(start_date, end_date): log.debug( "Skipping overlapping billing period: account_pk={}, start={}, end={}" .format(account.PK, start_date, end_date)) continue # can be a correction or multiple billing periods on one statement # get billing periods from charges -- dates on first half of usages spans the whole statement # but don't create single day periods if (end_date - start_date).days > 45: log.debug( "Splitting long billing period: %s - %s", start_date, end_date, ) for meter in account.meters: seen: Set[Tuple] = set() # add the long billing period we're trying to split to seen: # sometimes there's a charge with this same too-long range seen.add((start_date, end_date)) for charge in meter.charges: if (charge.IntervalStart, charge.IntervalEnd) in seen: continue if (charge.IntervalEnd - charge.IntervalStart).days <= 1: continue seen.add((charge.IntervalStart, charge.IntervalEnd)) log.debug( "Adding billing period from charge: account={} meter={}, " "charge_pk={} start={}, end={}".format( account.PK, meter.PK, charge.PK, charge.IntervalStart, charge.IntervalEnd, )) # copy the account and set the date range on the meter account_copy = self.copy_account_data( account, meter, charge.IntervalStart, charge.IntervalEnd) bill_history.add( charge.IntervalStart, charge.IntervalEnd, LADWPBillingPeriod(account_copy), ) # if the long range is the only one, use it if {(start_date, end_date)} == seen and meter.charges: charge = meter.charges[0] log.debug( "Adding long billing period from charges: account={} meter={}, " "start={}, end={}".format( account.PK, meter.PK, start_date, end_date, )) # copy the account and set the date range on the meter account_copy = self.copy_account_data( account, meter, charge.IntervalStart, charge.IntervalEnd) bill_history.add( charge.IntervalStart, charge.IntervalEnd, LADWPBillingPeriod(account_copy), ) else: log.debug( "Adding billing period: account_pk={}, start={}, end={}". format(account.PK, start_date, end_date)) bill_history.add( start_date, end_date, self.billing_period(account), ) # Adjust date endpoints to avoid 1-day overlaps bill_history = DateIntervalTree.shift_endpoints(bill_history) # Log the billing periods we determined log_generic_billing_periods(bill_history) return bill_history
def update_date_range_from_charges(account: Account) -> Account: """Fix date range for bills that cross the winter/summer boundary. When a bill crosses the winter/summary boundary (9/1), charges are reported in two batches: the summer portion and the winter portion. The account and meter IntervalStart and IntervalEnd may encompass just one of these date ranges; fix if needed. Summer/winter example: meter oid 1707479190338 +-----------+---------------+-------------+----------+ | accountPK | IntervalStart | IntervalEnd | meterPK | +-----------+---------------+-------------+----------+ | 5494320 | 2015-09-01 | 2015-09-11 | 19729463 | | 5498442 | 2015-09-11 | 2015-10-09 | 19740313 | PDF (https://sources.o2.urjanet.net/sourcewithhttpbasicauth?id=1e55ab22-7795-d6a4 -a229-22000b849d83) has two two sections for charges: - 8/13/15 - 8/31/15 (summer) - 9/1/15 - 9/11/15 (winter) Meter record has IntervalStart = 9/1/15 and IntervalEnd = 9/1/15 The Charge records have IntervalStart and IntervalEnd for both date ranges. """ account_range = DateIntervalTree() log.debug( "account interval range: %s to %s", account.IntervalStart, account.IntervalEnd, ) if account.IntervalEnd > account.IntervalStart: account_range.add(account.IntervalStart, account.IntervalEnd) for meter in account.meters: meter_range = DateIntervalTree() log.debug("meter interval range: %s to %s", meter.IntervalStart, meter.IntervalEnd) if meter.IntervalEnd > meter.IntervalStart: meter_range.add(meter.IntervalStart, meter.IntervalEnd) charge_range = DateIntervalTree() for charge in meter.charges: # don't create single day periods if (charge.IntervalEnd - charge.IntervalStart).days <= 1: continue log.debug( "charge %s interval range: %s to %s", charge.PK, charge.IntervalStart, charge.IntervalEnd, ) charge_range.add(charge.IntervalStart, charge.IntervalEnd) if len(charge_range.intervals()) > 1: min_charge_dt = min( [r.begin for r in charge_range.intervals()]) max_charge_dt = max([r.end for r in charge_range.intervals()]) log.debug( "Updating meter date range from charges to %s - %s (was %s - %s)", min(meter.IntervalStart, min_charge_dt), max(account.IntervalEnd, max_charge_dt), meter.IntervalStart, meter.IntervalEnd, ) meter.IntervalStart = min(meter.IntervalStart, min_charge_dt) meter.IntervalEnd = max(meter.IntervalEnd, max_charge_dt) log.debug( "Updating account date range from charges to %s - %s (was %s - %s)", min(account.IntervalStart, min_charge_dt), max(account.IntervalEnd, max_charge_dt), account.IntervalStart, account.IntervalEnd, ) account.IntervalStart = min(account.IntervalStart, min_charge_dt) account.IntervalEnd = max(account.IntervalEnd, max_charge_dt) return account
def shift_endpoints(self, history: DateIntervalTree): """Fixes periods where the start and end are the same""" return DateIntervalTree.shift_endpoints_start(history)