def process_dma(dma, bounds): """Process DMA timeseries by aggregating all the contained households in the DMA""" print "Process DMA %s" % (dma, ) for dma_series in dma.timeseries.all(): print "Process series %s" % (dma_series, ) per_capita = dma_series.name.find('capita') > -1 variable = dma_series.variable.id if dma_series.time_step.id == TSTEP_FIFTEEN_MINUTES: start = bounds[variable]['fifteen_start'] end = bounds[variable]['fifteen_end'] # Fifteen minutes process is DEACTIVATED! # We don't process fifteen minutes, it takes too long, # maybe we reactivate later after we optimize the # algorithm to process only new records continue elif dma_series.time_step.id == TSTEP_HOURLY: start = bounds[variable]['hourly_start'] end = bounds[variable]['hourly_end'] elif dma_series.time_step.id == TSTEP_DAILY: start = bounds[variable]['daily_start'] end = bounds[variable]['daily_end'] elif dma_series.time_step.id == TSTEP_MONTHLY: start = bounds[variable]['monthly_start'] end = bounds[variable]['monthly_end'] time_step = ReadTimeStep(dma_series.id, dma_series) tseries = TSeries(time_step=time_step, id=dma_series.id) nhseries = TSeries(time_step=time_step) pointer = start while pointer <= end: tseries[pointer] = 0 nhseries[pointer] = 0 pointer = tseries.time_step.next(pointer) for household in dma.households.all(): for h_series_db in household.timeseries.filter( time_step__id=dma_series.time_step.id, variable__id=variable): hseries = TSeries(id=h_series_db.id) hseries.read_from_db(db.connection) pointer = start while pointer <= end: try: v = hseries[pointer] if math.isnan(v): pointer = tseries.time_step.next(pointer) continue if per_capita: v = v / float(household.num_of_occupants) tseries[pointer] += v nhseries[pointer] += 1 except KeyError: v = 0 pointer = tseries.time_step.next(pointer) pointer = start while pointer <= end: if per_capita and nhseries[pointer] > 0: tseries[pointer] = tseries[pointer] / nhseries[pointer] pointer = tseries.time_step.next(pointer) tseries.write_to_db(db.connection, commit=True) #False)
def process(): for household in Household.objects.all(): daily_series_db = household.timeseries.get(time_step__id=TSTEP_DAILY) series = TSeries(id=daily_series_db.id) series.read_from_db(db.connection) m = 1000.000 * series.average() if math.isnan(m): continue num_of_occupants = max(1, int(round(m / AVERAGE_UNIT_WATER_CONSUMPTION))) print 'Household with id=%s, average daily consumption %.1f, '\ 'number of occupants set to %s'%(household.id, m, num_of_occupants,) household.num_of_occupants = num_of_occupants household.save()
def get_consumption_totals(household, dt, variable): """ Not needed. read_timeseries_tail_from_db does the same thing, faster. :param household: :return: """ if variable == "WaterCold": timeseries = household \ .timeseries.get(variable__id=VAR_CUMULATIVE) raw_series = TSeries(id=timeseries.id) raw_series.read_from_db(db.connection) timestamps = sorted(raw_series.keys()) total = 0 for ts in timestamps: val = raw_series[ts] if isnan(val): continue if ts > dt: break total = raw_series[ts] return total elif variable == "Electricity": timeseries = household \ .timeseries.get(variable__id=VAR_ENERGY_CUMULATIVE) raw_series = TSeries(id=timeseries.id) raw_series.read_from_db(db.connection) timestamps = sorted(raw_series.keys()) total = 0 for ts in timestamps: val = raw_series[ts] if isnan(val): continue if ts > dt: break total = raw_series[ts] return total
def has_burst_old(household): """ We won't be using this algorithm any more :param household: :return: """ name = household.user.username if not name.startswith('GR'): return 0, 0 timeseries = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_PERIOD) series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) today = [] # all today's values _all = [] for i in range(1, len(timestamps)): ts = timestamps[i] if household.user.username == "GR006047" \ and ts.year == 2015 and ts.month == 2 and ts.day == 9 \ and ts.hour == 17: pass prev_ts = timestamps[i - 1] # if previous value is NaN we don't take this value into consideration # Because it might have all consumption of all the previous NaN times val = series[ts] prev_val = series[prev_ts] if isnan(prev_val): continue if i < len(timestamps) - 100: if not isnan(val) and not val == 0: _all.append(series[ts]) else: tm = "%s:%s" % (ts.time().hour, ts.time().minute) if not isnan(val) and not val == 0: today.append((val, tm)) if _all and today: all1 = np.array(_all) p = np.percentile(all1, 95) for cons, tm in today: if cons > p: return cons, tm return 0, 0
def has_burst(household): """ We won't be using this algorithm any more :param household: :return: """ name = household.user.username if not name.startswith('GR'): return 0, 0 timeseries = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_PERIOD) series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) today = [] # all today's values daily_maxes = {} for i in range(1, len(timestamps)): ts = timestamps[i] prev_ts = timestamps[i - 1] date = ts.date() # if previous value is NaN we don't take this value into consideration # Because it might have all consumption of all the previous NaN times val = series[ts] prev_val = series[prev_ts] if isnan(prev_val): continue if i < len(timestamps) - 100: if not isnan(val) and not val == 0: daily_max = daily_maxes.get(date, 0) if val > daily_max: daily_maxes[date] = val else: tm = "%s-%s-%s %s:%s" % (ts.year, ts.month, ts.day, ts.time().hour, ts.time().minute) if not isnan(val) and not val == 0: today.append((val, tm)) if daily_maxes and today: maxes = np.array(daily_maxes.values()) p = np.percentile(maxes, 90) for cons, tm in today: if cons > p: return cons, tm return 0, 0
def parse_and_save_timeseries(device_id, timeseries_id): """ Reads a RAW timeseries from REST API and saves in our local database using the timeseries_id. ``device_id`` will be the ``identifier`` used in other functions, usualy is the customerID==deviceID """ s, e = timeseries_bounding_dates_from_db(db.connection, timeseries_id) if s or e: print 'Raw timeseries id=%s has already data, skipping...' % ( timeseries_id, ) return timeseries = TSeries() timeseries.id = timeseries_id for timestamp, value in ibm_restapi.get_raw_timeseries(device_id): timeseries[timestamp] = value timeseries.write_to_db(db=db.connection, transaction=transaction, commit=False)
def create_objects(dma, household_identifier, series, force=False): """ When a household is fully parsed then this command is called to create database objects thus: user (household owner), household, database time series placeholders (for raw data and for processed data), to write actual time series data in database and finally to estimate the household occupancy. """ print "Processing household %s, user username will be %s as well"%( household_identifier, household_identifier) # Create user (household owner), household, database series placeholders user = create_user(household_identifier) household=create_household(household_identifier, user, zone=dma.id) db_series = create_raw_timeseries(household) create_processed_timeseries(household) timeseries_data = {} # Now we will create timeseries.Timeseries() and we will add # parsed values for variable in db_series: if variable not in ('WaterCold', 'Electricity'): continue s, e = timeseries_bounding_dates_from_db(db.connection, db_series[variable].id) if not force and (s or e): print 'Raw timeseries id=%s has already data, skipping...'%( db_series[variable].id,) continue timeseries = TSeries() timeseries.id = db_series[variable].id total = 0.0 for timestamp, value in series[variable]: if not math.isnan(value): total += value timeseries[timestamp] = total else: timeseries[timestamp] = float('NaN') timeseries_data[variable] = timeseries timeseries.write_to_db(db=db.connection, transaction=transaction, commit=False) if 'WaterCold' in timeseries_data: calc_occupancy(timeseries_data['WaterCold'], household)
def parse_and_save_timeseries(filename, timeseries_id): first_line = True timeseries = TSeries() timeseries.id = timeseries_id with open(filename) as fp: for line in fp.readlines(): if first_line: first_line = False continue components = line.split(',') date_str = components[1].strip('"') value_str = components[2].strip('"') value = float(value_str) if value < MIN_VALUE or value >= MAX_VALUE: value = float('nan') tstamp = datetime.strptime(date_str, '%Y-%m-%d %H:%M:%S') tstamp = tstamp.replace(second=0) timeseries[tstamp] = value timeseries.write_to_db(db=db.connection, transaction=transaction, commit=False)
def get_values_after(household, dt, variable): timeseries = None if variable == "WaterCold": timeseries = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_PERIOD) elif variable == "Electricity": timeseries = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_ENERGY_PERIOD) data = [] if timeseries: series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) for ts in timestamps: val = series[ts] if ts <= dt: continue data.append((ts, val)) return data
def handle(self, *args, **options): try: username = args[0] except IndexError: print "I need a username!" return -1 try: if username not in ["GR", "GB", "PT", "GBA"]: users = User.objects.filter(username=username) else: users = User.objects.filter(username__startswith=username) for user in users: out = [] print "output for {x}".format(x=username) household = Household.objects.get(user=user) # ts_raw = household.timeseries.filter(time_step__isnull=True, # variable__id=VAR_CUMULATIVE)[0] # series = TSeries(id=ts_raw.id) timeseries = household \ .timeseries.get(variable__id=VAR_CUMULATIVE) series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) values = np.array([]) for ts in timestamps: val = series[ts] if isnan(val) or val == 0: continue values = np.append(values, val) #perc = np.percentile(values, 90) out.append([ts, val]) _outfile = "timeseries_cumulative_%s.csv" % user.username _path = "data/" with open(path.join(_path, _outfile), 'w') as of: a = csv.writer(of, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) a.writerows(out) except Exception as e: print "failed with %s" % repr(e)
def handle(self, *args, **options): try: username = args[0] except IndexError: print "I need a username!" return -1 try: if username: user = User.objects.get(username=username) out = [] print "output for {x}".format(x=username) household = Household.objects.get(user=user) timeseries = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_PERIOD) series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) values = np.array([]) for ts in timestamps: val = series[ts] if isnan(val) or val == 0: continue values = np.append(values, val) perc = np.percentile(values, 90) out.append([ts, val, perc]) _outfile = "timeseries_%s.csv" % username _path = "data/" with open(path.join(_path, _outfile), 'w') as of: a = csv.writer(of, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL) a.writerows(out) except Exception as e: print "failed with %s" % repr(e)
def regularize(raw_series_db, proc_series_db, rs, re): """ This function regularize raw_series_db object from database and writes a processed proc_series_db in database. Raw series is a continuously increasing values time series, aggregating the water consumption. Resulting processed timeseries contains water consumption for each of its interval. I.e. if the timeseries is of 15 minutes time step, then each record contains the water consumption for each record period. """ raw_series = TSeries(id=raw_series_db.id) raw_series.read_from_db(db.connection) # We keep the last value for x-checking reasons, see last print # command try: test_value = raw_series[raw_series.bounding_dates()[1]] except Exception as e: #log.debug("Trying to get test value for raw series %s failed with %s. " # "Skipping!" % (raw_series_db.id, repr(e))) return None time_step = ReadTimeStep(proc_series_db.id, proc_series_db) proc_series = TSeries(id=proc_series_db.id, time_step=time_step) # The following code can be used in real conditions to append only # new records to db, in a next version #if not pe: # start = proc_series.time_step.down(rs) #else: # start = proc_series.time_step.up(pe) # Instead of the above we use now: start = proc_series.time_step.down(rs) end = proc_series.time_step.up(re) pointer = start # Pass 1: Initialize proc_series while pointer <= end: proc_series[pointer] = float('nan') pointer = proc_series.time_step.next(pointer) # Pass 2: Transfer cummulative raw series to differences series: prev_s = 0 for i in xrange(len(raw_series)): dat, value = raw_series.items(pos=i) d = datetime.today() d = d.replace(month=11).replace(day=5) if dat.date() == d.date(): pass if not isnan(value): # "if" Added by Chris Pantazis, because sometimes # We get a negative small value by the meter if prev_s > value: prev_s = value raw_series[dat] = value - prev_s prev_s = value # Pass 3: Regularize step: loop over raw series records and distribute # floating point values to processed series for i in xrange(len(raw_series)): dat, value = raw_series.items(pos=i) if not isnan(value): # find previous, next timestamp of the proc time series d1 = proc_series.time_step.down(dat) d2 = proc_series.time_step.up(dat) if isnan(proc_series[d1]): proc_series[d1] = 0 if isnan(proc_series[d2]): proc_series[d2] = 0 if d1 == d2: # if dat on proc step then d1=d2 proc_series[d1] += value continue dif1 = _dif_in_secs(d1, dat) dif2 = _dif_in_secs(dat, d2) dif = dif1 + dif2 # Distribute value to d1, d2 proc_series[d1] += (dif2 / dif) * value proc_series[d2] += (dif1 / dif) * value # Uncomment the following line in order to show debug information. # Usually the three following sums are consistent by equality. If # not equality is satisfied then there is a likelyhood of algorith # error # log.info("%s = %s = %s ?" % (raw_series.sum(), # proc_series.sum(), test_value)) proc_series.write_to_db(db=db.connection, commit=True) #return the full timeseries return proc_series
def has_leakage(household): """ This method checks for leakages. The way it is done is pretty simple I open the hourly timeseries and retrieve all timestamps. I create a dictionary with keys be the dates (not time) and values arrays be the percentage of night/total consumption :param household: :return: False for no leakage, True for leakage """ name = household.user.username if name == "GR059E35": pass if name.startswith('GB'): # not UK because they send daily data return 0, 0 timeseries = household \ .timeseries.get(time_step__id=TSTEP_HOURLY, variable__id=VAR_PERIOD) series = TSeries(id=timeseries.id) series.read_from_db(db.connection) timestamps = sorted(series.keys()) night_dict = {} total_dict = {} _t = datetime.now().time() _d = datetime.today().date() for ts in timestamps: _d = ts.date() _t = ts.time() val = series[ts] if 3 <= _t.hour <= 5: if val == 0: night_dict[_d] = 0 # make all night 0 if one 0 else: try: night_dict[_d] += val except KeyError: night_dict[_d] = val try: total_dict[_d] += val except KeyError: total_dict[_d] = val #remove last day if not a whole day (_t < 24:00) if _t.hour < 23: try: del total_dict[_d] del night_dict[_d] except (KeyError, IndexError): pass _all = [] # all lengths will be in here _today = [] # today's lengths _dates = sorted( total_dict.keys())[:-1] # all except last day 4 * 15min for 4 hrs for _d in _dates: total = total_dict[_d] # there can be a case when I don't get data for 01:00 -> 04:00 # so night[_d] might not exist. in this case let it be zero try: night = night_dict[_d] except KeyError: night = 0 if total > 0 and night > 0 and not isnan(total) and not isnan(night): _all.append(float(night) / float(total)) # Now we need only the last day. However sometimes we have # some timestamps from the next day because the file has all data from # previous day and one entry from today. So we pick today and yesterday # instead of today. Today is too small. And too fast some times. But that # is for some other time to discuss... _dates = sorted(total_dict.keys())[-1:] # only last day's for _d in _dates: total = total_dict[_d] night = night_dict[_d] if total > 0 and night > 0 and not isnan(total) and not isnan(night): _today.append(float(night) / float(total)) if _all and _today: ts = timestamps[-1] tm = "%s-%s-%s %s:%s" % (ts.year, ts.month, ts.day, ts.time().hour, ts.time().minute) all1 = np.array(_all) p = np.percentile(all1, 90) for val in _today: if val > p: return val, tm return 0, 0
def create_objects(data, usernames, force, z_names, z_dict): """ :param data: meter_id -> consumption_type -> [timestamp, volume] :param force: True to overwrite :return: True for success """ households = [] # Create user (household owner), household, database series placeholders hh_ids = sorted(data.keys()) found = False for hh_id in hh_ids: username = usernames[hh_id] if username == "PT94993": pass try: zone_name = z_dict[username] except KeyError: zone_name = z_names[0] zone = DMA.objects.get(name=zone_name) user, created = create_user(username, hh_id) household, found = create_household(hh_id, user, zone.id) households.append(household) db_series = create_raw_timeseries(household) create_processed_timeseries(household) timeseries_data = {} # Now we will create timeseries.Timeseries() and we will add # parsed values for variable in db_series: if variable not in ('WaterCold', 'Electricity'): continue exists = False s, e = timeseries_bounding_dates_from_db(db.connection, db_series[variable].id) latest_ts = e ts_id = db_series[variable].id # checking to see if timeseries records already exist in order # to append # d = read_timeseries_tail_from_db(db.connection, ts_id) total = 0.0 # if s or e: # exists = True # timeseries = TSeries(ts_id) # timeseries.read_from_db(db.connection) # else: # timeseries = TSeries() # timeseries.id = ts_id _dict = data[hh_id] arr = _dict[variable] series = arr if not series: continue earlier = [] if (not latest_ts) or (latest_ts < series[0][0]): # append timeseries = TSeries() timeseries.id = ts_id try: tail = read_timeseries_tail_from_db(db.connection, ts_id) total = float(tail[1]) # keep up from last value except Exception as e: log.debug(repr(e)) total = 0 for timestamp, value in series: if (not latest_ts) or (timestamp > latest_ts): if not isnan(value): total += value timeseries[timestamp] = total else: timeseries[timestamp] = float('NaN') elif timestamp < latest_ts: earlier.append((timestamp, value)) timeseries.append_to_db(db=db.connection, transaction=transaction, commit=True) elif latest_ts >= series[0][0]: if not force: # ignore continue else: # insert for timestamp, value in series: if timestamp < latest_ts: earlier.append((timestamp, value)) if earlier and ("GR" in username or "GBA" in username): # insert (only for athens) # print "appending %s items for %s" % (len(earlier), username) if variable == "WaterCold": ts15 = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_PERIOD) series15 = TSeries(id=ts15.id) elif variable == "Electricity": ts15 = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_ENERGY_PERIOD) series15 = TSeries(id=ts15.id) series15.read_from_db(db.connection) for ts, value in earlier: series15[ts] = value series15.write_to_db(db=db.connection, transaction=transaction, commit=True) raw_ts = TSeries(ts_id) # read existing ts raw data raw_ts.read_from_db(db.connection) total = get_consumption_totals(household, earlier[0][0], variable) init = total for timestamp, value in earlier: if not isnan(value): total += value raw_ts[timestamp] = total else: raw_ts[timestamp] = float('NaN') # correct later values, too diff = total - init all_ts = sorted(raw_ts.keys()) for ts in all_ts: if ts <= timestamp: continue curr = raw_ts[ts] raw_ts[ts] = curr + diff raw_ts.write_to_db(db=db.connection, transaction=transaction, commit=True) if 'WaterCold' in timeseries_data and not found: # only for new HH calc_occupancy(timeseries_data['WaterCold'], household) return households