def MultiTimeseriesProcessDb(method, timeseries_arg, out_timeseries_id, db, read_tstep_func, transaction=None, commit=True, options={}): out_timeseries = Timeseries(id = out_timeseries_id) opts = copy.deepcopy(options) if 'append_only' in opts and opts['append_only']: bounds = timeseries_bounding_dates_from_db(db, id = out_timeseries_id) opts['start_date'] = bounds[1] if bounds else None; opts['interval_exclusive'] = True tseries_arg={} for key in timeseries_arg: ts = Timeseries(id=timeseries_arg[key]) if ('append_only' in opts and opts['append_only']) \ and opts['start_date'] is not None: ts.read_from_db(db, bottom_only=True) if ts.bounding_dates()[0]>opts['start_date']: ts.read_from_db(db) else: ts.read_from_db(db) ts.time_step = read_tstep_func(ts.id) tseries_arg[key] = ts MultiTimeseriesProcess(method, tseries_arg, out_timeseries, opts) if 'append_only' in opts and opts['append_only']: out_timeseries.append_to_db(db=db, transaction=transaction, commit=commit) else: out_timeseries.write_to_db(db=db, transaction=transaction, commit=commit)
def process_dma(dma, bounds): """Process DMA timeseries by aggregating all the contained households in the DMA""" print "Process DMA %s" % (dma,) for dma_series in dma.timeseries.all(): print "Process series %s" % (dma_series,) per_capita = dma_series.name.find('capita') > -1 variable = dma_series.variable.id if dma_series.time_step.id == TSTEP_FIFTEEN_MINUTES: start = bounds[variable]['fifteen_start'] end = bounds[variable]['fifteen_end'] # Fifteen minutes process is DEACTIVATED! # We don't process fifteen minutes, it takes too long, # maybe we reactivate later after we optimize the # algorithm to process only new records continue elif dma_series.time_step.id == TSTEP_HOURLY: start = bounds[variable]['hourly_start'] end = bounds[variable]['hourly_end'] elif dma_series.time_step.id == TSTEP_DAILY: start = bounds[variable]['daily_start'] end = bounds[variable]['daily_end'] elif dma_series.time_step.id == TSTEP_MONTHLY: start = bounds[variable]['monthly_start'] end = bounds[variable]['monthly_end'] time_step = ReadTimeStep(dma_series.id, dma_series) tseries = TSeries(time_step = time_step, id=dma_series.id) nhseries = TSeries(time_step = time_step) pointer = start while pointer<=end: tseries[pointer] = 0 nhseries[pointer] = 0 pointer = tseries.time_step.next(pointer) for household in dma.households.all(): for h_series_db in household.timeseries.filter( time_step__id=dma_series.time_step.id, variable__id=variable): hseries = TSeries(id=h_series_db.id) hseries.read_from_db(db.connection) pointer = start while pointer<=end: try: v = hseries[pointer] if math.isnan(v): pointer = tseries.time_step.next(pointer) continue if per_capita: v = v/float(household.num_of_occupants) tseries[pointer] += v nhseries[pointer] += 1 except KeyError: v = 0 pointer = tseries.time_step.next(pointer) pointer = start while pointer<=end: if per_capita and nhseries[pointer]>0: tseries[pointer] = tseries[pointer] / nhseries[pointer] pointer = tseries.time_step.next(pointer) tseries.write_to_db(db.connection, commit=True)#False)
def process_dma(dma, bounds): """Process DMA timeseries by aggregating all the contained households in the DMA""" print "Process DMA %s" % (dma, ) for dma_series in dma.timeseries.all(): print "Process series %s" % (dma_series, ) per_capita = dma_series.name.find('capita') > -1 variable = dma_series.variable.id if dma_series.time_step.id == TSTEP_FIFTEEN_MINUTES: start = bounds[variable]['fifteen_start'] end = bounds[variable]['fifteen_end'] # Fifteen minutes process is DEACTIVATED! # We don't process fifteen minutes, it takes too long, # maybe we reactivate later after we optimize the # algorithm to process only new records continue elif dma_series.time_step.id == TSTEP_HOURLY: start = bounds[variable]['hourly_start'] end = bounds[variable]['hourly_end'] elif dma_series.time_step.id == TSTEP_DAILY: start = bounds[variable]['daily_start'] end = bounds[variable]['daily_end'] elif dma_series.time_step.id == TSTEP_MONTHLY: start = bounds[variable]['monthly_start'] end = bounds[variable]['monthly_end'] time_step = ReadTimeStep(dma_series.id, dma_series) tseries = TSeries(time_step=time_step, id=dma_series.id) nhseries = TSeries(time_step=time_step) pointer = start while pointer <= end: tseries[pointer] = 0 nhseries[pointer] = 0 pointer = tseries.time_step.next(pointer) for household in dma.households.all(): for h_series_db in household.timeseries.filter( time_step__id=dma_series.time_step.id, variable__id=variable): hseries = TSeries(id=h_series_db.id) hseries.read_from_db(db.connection) pointer = start while pointer <= end: try: v = hseries[pointer] if math.isnan(v): pointer = tseries.time_step.next(pointer) continue if per_capita: v = v / float(household.num_of_occupants) tseries[pointer] += v nhseries[pointer] += 1 except KeyError: v = 0 pointer = tseries.time_step.next(pointer) pointer = start while pointer <= end: if per_capita and nhseries[pointer] > 0: tseries[pointer] = tseries[pointer] / nhseries[pointer] pointer = tseries.time_step.next(pointer) tseries.write_to_db(db.connection, commit=True) #False)
def parse_and_save_timeseries(device_id, timeseries_id): """ Reads a RAW timeseries from REST API and saves in our local database using the timeseries_id. ``device_id`` will be the ``identifier`` used in other functions, usualy is the customerID==deviceID """ s, e = timeseries_bounding_dates_from_db(db.connection, timeseries_id) if s or e: print 'Raw timeseries id=%s has already data, skipping...' % ( timeseries_id, ) return timeseries = TSeries() timeseries.id = timeseries_id for timestamp, value in ibm_restapi.get_raw_timeseries(device_id): timeseries[timestamp] = value timeseries.write_to_db(db=db.connection, transaction=transaction, commit=False)
def create_objects(dma, household_identifier, series, force=False): """ When a household is fully parsed then this command is called to create database objects thus: user (household owner), household, database time series placeholders (for raw data and for processed data), to write actual time series data in database and finally to estimate the household occupancy. """ print "Processing household %s, user username will be %s as well"%( household_identifier, household_identifier) # Create user (household owner), household, database series placeholders user = create_user(household_identifier) household=create_household(household_identifier, user, zone=dma.id) db_series = create_raw_timeseries(household) create_processed_timeseries(household) timeseries_data = {} # Now we will create timeseries.Timeseries() and we will add # parsed values for variable in db_series: if variable not in ('WaterCold', 'Electricity'): continue s, e = timeseries_bounding_dates_from_db(db.connection, db_series[variable].id) if not force and (s or e): print 'Raw timeseries id=%s has already data, skipping...'%( db_series[variable].id,) continue timeseries = TSeries() timeseries.id = db_series[variable].id total = 0.0 for timestamp, value in series[variable]: if not math.isnan(value): total += value timeseries[timestamp] = total else: timeseries[timestamp] = float('NaN') timeseries_data[variable] = timeseries timeseries.write_to_db(db=db.connection, transaction=transaction, commit=False) if 'WaterCold' in timeseries_data: calc_occupancy(timeseries_data['WaterCold'], household)
def parse_and_save_timeseries(device_id, timeseries_id): """ Reads a RAW timeseries from REST API and saves in our local database using the timeseries_id. ``device_id`` will be the ``identifier`` used in other functions, usualy is the customerID==deviceID """ s, e = timeseries_bounding_dates_from_db(db.connection, timeseries_id) if s or e: print 'Raw timeseries id=%s has already data, skipping...'%( timeseries_id,) return timeseries = TSeries() timeseries.id = timeseries_id for timestamp, value in ibm_restapi.get_raw_timeseries(device_id): timeseries[timestamp] = value timeseries.write_to_db(db=db.connection, transaction=transaction, commit=False)
def parse_and_save_timeseries(filename, timeseries_id): first_line = True timeseries = TSeries() timeseries.id = timeseries_id with open(filename) as fp: for line in fp.readlines(): if first_line: first_line = False continue components = line.split(',') date_str = components[1].strip('"') value_str = components[2].strip('"') value = float(value_str) if value < MIN_VALUE or value >= MAX_VALUE: value = float('nan') tstamp = datetime.strptime(date_str, '%Y-%m-%d %H:%M:%S') tstamp = tstamp.replace(second=0) timeseries[tstamp] = value timeseries.write_to_db(db=db.connection, transaction=transaction, commit=False)
def parse_and_save_timeseries(filename, timeseries_id): first_line = True timeseries = TSeries() timeseries.id = timeseries_id with open(filename) as fp: for line in fp.readlines(): if first_line: first_line = False continue components = line.split(',') date_str = components[1].strip('"') value_str = components[2].strip('"') value = float(value_str) if value<MIN_VALUE or value>=MAX_VALUE: value = float('nan') tstamp = datetime.strptime(date_str, '%Y-%m-%d %H:%M:%S') tstamp = tstamp.replace(second=0) timeseries[tstamp] = value timeseries.write_to_db(db=db.connection, transaction=transaction, commit=False)
def regularize_raw_series(raw_series_db, proc_series_db, rs, re, ps, pe ): """ This function regularize raw_series_db object from database and writes a processed proc_series_db in database. Raw series is a continuously increasing values time series, aggregating the water consumption. Resulting processed timeseries contains water consumption for each of its interval. I.e. if the timeseries is of 15 minutes time step, then each record contains the water consumption for each record period. """ raw_series = TSeries(id=raw_series_db.id) raw_series.read_from_db(db.connection) # We keep the last value for x-checking reasons, see last print # command test_value = raw_series[raw_series.bounding_dates()[1]] time_step = ReadTimeStep(proc_series_db.id, proc_series_db) proc_series = TSeries(id=proc_series_db.id, time_step = time_step) # The following code can be used in real conditions to append only # new records to db, in a next version #if not pe: # start = proc_series.time_step.down(rs) #else: # start = proc_series.time_step.up(pe) # Instead of the above we use now: start = proc_series.time_step.down(rs) end = proc_series.time_step.up(re) pointer = start # Pass 1: Initialize proc_series while pointer<=end: proc_series[pointer] = float('nan') pointer = proc_series.time_step.next(pointer) # Pass 2: Transfer cummulative raw series to differences series: prev_s = 0 for i in xrange(len(raw_series)): dat, value = raw_series.items(pos=i) if not math.isnan(value): raw_series[dat] = value-prev_s prev_s = value # Pass 3: Regularize step: loop over raw series records and distribute # floating point values to processed series for i in xrange(len(raw_series)): dat, value = raw_series.items(pos=i) if not math.isnan(value): # find previous, next timestamp of the proc time series d1 = proc_series.time_step.down(dat) d2 = proc_series.time_step.up(dat) if math.isnan(proc_series[d1]): proc_series[d1] = 0 if math.isnan(proc_series[d2]): proc_series[d2] = 0 if d1==d2: # if dat on proc step then d1=d2 proc_series[d1] += value continue dif1 = _dif_in_secs(d1, dat) dif2 = _dif_in_secs(dat, d2) dif = dif1+dif2 # Distribute value to d1, d2 proc_series[d1] += (dif2/dif)*value proc_series[d2] += (dif1/dif)*value # Uncomment the following line in order to show debug information. # Usually the three following sums are consistent by equality. If # not equality is satisfied then there is a likelyhood of algorith # error print raw_series.sum(), proc_series.sum(), test_value proc_series.write_to_db(db=db.connection, commit=True) #False) #return the full timeseries return proc_series
def regularize(raw_series_db, proc_series_db, rs, re): """ This function regularize raw_series_db object from database and writes a processed proc_series_db in database. Raw series is a continuously increasing values time series, aggregating the water consumption. Resulting processed timeseries contains water consumption for each of its interval. I.e. if the timeseries is of 15 minutes time step, then each record contains the water consumption for each record period. """ raw_series = TSeries(id=raw_series_db.id) raw_series.read_from_db(db.connection) # We keep the last value for x-checking reasons, see last print # command try: test_value = raw_series[raw_series.bounding_dates()[1]] except Exception as e: #log.debug("Trying to get test value for raw series %s failed with %s. " # "Skipping!" % (raw_series_db.id, repr(e))) return None time_step = ReadTimeStep(proc_series_db.id, proc_series_db) proc_series = TSeries(id=proc_series_db.id, time_step=time_step) # The following code can be used in real conditions to append only # new records to db, in a next version #if not pe: # start = proc_series.time_step.down(rs) #else: # start = proc_series.time_step.up(pe) # Instead of the above we use now: start = proc_series.time_step.down(rs) end = proc_series.time_step.up(re) pointer = start # Pass 1: Initialize proc_series while pointer <= end: proc_series[pointer] = float('nan') pointer = proc_series.time_step.next(pointer) # Pass 2: Transfer cummulative raw series to differences series: prev_s = 0 for i in xrange(len(raw_series)): dat, value = raw_series.items(pos=i) d = datetime.today() d = d.replace(month=11).replace(day=5) if dat.date() == d.date(): pass if not isnan(value): # "if" Added by Chris Pantazis, because sometimes # We get a negative small value by the meter if prev_s > value: prev_s = value raw_series[dat] = value - prev_s prev_s = value # Pass 3: Regularize step: loop over raw series records and distribute # floating point values to processed series for i in xrange(len(raw_series)): dat, value = raw_series.items(pos=i) if not isnan(value): # find previous, next timestamp of the proc time series d1 = proc_series.time_step.down(dat) d2 = proc_series.time_step.up(dat) if isnan(proc_series[d1]): proc_series[d1] = 0 if isnan(proc_series[d2]): proc_series[d2] = 0 if d1 == d2: # if dat on proc step then d1=d2 proc_series[d1] += value continue dif1 = _dif_in_secs(d1, dat) dif2 = _dif_in_secs(dat, d2) dif = dif1 + dif2 # Distribute value to d1, d2 proc_series[d1] += (dif2 / dif) * value proc_series[d2] += (dif1 / dif) * value # Uncomment the following line in order to show debug information. # Usually the three following sums are consistent by equality. If # not equality is satisfied then there is a likelyhood of algorith # error # log.info("%s = %s = %s ?" % (raw_series.sum(), # proc_series.sum(), test_value)) proc_series.write_to_db(db=db.connection, commit=True) #return the full timeseries return proc_series
def create_objects(data, usernames, force, z_names, z_dict): """ :param data: meter_id -> consumption_type -> [timestamp, volume] :param force: True to overwrite :return: True for success """ households = [] # Create user (household owner), household, database series placeholders hh_ids = sorted(data.keys()) found = False for hh_id in hh_ids: username = usernames[hh_id] if username == "PT94993": pass try: zone_name = z_dict[username] except KeyError: zone_name = z_names[0] zone = DMA.objects.get(name=zone_name) user, created = create_user(username, hh_id) household, found = create_household(hh_id, user, zone.id) households.append(household) db_series = create_raw_timeseries(household) create_processed_timeseries(household) timeseries_data = {} # Now we will create timeseries.Timeseries() and we will add # parsed values for variable in db_series: if variable not in ('WaterCold', 'Electricity'): continue exists = False s, e = timeseries_bounding_dates_from_db(db.connection, db_series[variable].id) latest_ts = e ts_id = db_series[variable].id # checking to see if timeseries records already exist in order # to append # d = read_timeseries_tail_from_db(db.connection, ts_id) total = 0.0 # if s or e: # exists = True # timeseries = TSeries(ts_id) # timeseries.read_from_db(db.connection) # else: # timeseries = TSeries() # timeseries.id = ts_id _dict = data[hh_id] arr = _dict[variable] series = arr if not series: continue earlier = [] if (not latest_ts) or (latest_ts < series[0][0]): # append timeseries = TSeries() timeseries.id = ts_id try: tail = read_timeseries_tail_from_db(db.connection, ts_id) total = float(tail[1]) # keep up from last value except Exception as e: log.debug(repr(e)) total = 0 for timestamp, value in series: if (not latest_ts) or (timestamp > latest_ts): if not isnan(value): total += value timeseries[timestamp] = total else: timeseries[timestamp] = float('NaN') elif timestamp < latest_ts: earlier.append((timestamp, value)) timeseries.append_to_db(db=db.connection, transaction=transaction, commit=True) elif latest_ts >= series[0][0]: if not force: # ignore continue else: # insert for timestamp, value in series: if timestamp < latest_ts: earlier.append((timestamp, value)) if earlier and ("GR" in username or "GBA" in username): # insert (only for athens) # print "appending %s items for %s" % (len(earlier), username) if variable == "WaterCold": ts15 = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_PERIOD) series15 = TSeries(id=ts15.id) elif variable == "Electricity": ts15 = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_ENERGY_PERIOD) series15 = TSeries(id=ts15.id) series15.read_from_db(db.connection) for ts, value in earlier: series15[ts] = value series15.write_to_db(db=db.connection, transaction=transaction, commit=True) raw_ts = TSeries(ts_id) # read existing ts raw data raw_ts.read_from_db(db.connection) total = get_consumption_totals(household, earlier[0][0], variable) init = total for timestamp, value in earlier: if not isnan(value): total += value raw_ts[timestamp] = total else: raw_ts[timestamp] = float('NaN') # correct later values, too diff = total - init all_ts = sorted(raw_ts.keys()) for ts in all_ts: if ts <= timestamp: continue curr = raw_ts[ts] raw_ts[ts] = curr + diff raw_ts.write_to_db(db=db.connection, transaction=transaction, commit=True) if 'WaterCold' in timeseries_data and not found: # only for new HH calc_occupancy(timeseries_data['WaterCold'], household) return households
def create_objects(data, usernames, force, z_names, z_dict): """ :param data: meter_id -> consumption_type -> [timestamp, volume] :param force: True to overwrite :return: True for success """ households = [] # Create user (household owner), household, database series placeholders hh_ids = sorted(data.keys()) found = False for hh_id in hh_ids: username = usernames[hh_id] if username == "PT94993": pass try: zone_name = z_dict[username] except KeyError: zone_name = z_names[0] zone = DMA.objects.get(name=zone_name) user, created = create_user(username, hh_id) household, found = create_household(hh_id, user, zone.id) households.append(household) db_series = create_raw_timeseries(household) create_processed_timeseries(household) timeseries_data = {} # Now we will create timeseries.Timeseries() and we will add # parsed values for variable in db_series: if variable not in ('WaterCold', 'Electricity'): continue exists = False s, e = timeseries_bounding_dates_from_db(db.connection, db_series[variable].id) latest_ts = e ts_id = db_series[variable].id # checking to see if timeseries records already exist in order # to append # d = read_timeseries_tail_from_db(db.connection, ts_id) total = 0.0 # if s or e: # exists = True # timeseries = TSeries(ts_id) # timeseries.read_from_db(db.connection) # else: # timeseries = TSeries() # timeseries.id = ts_id _dict = data[hh_id] arr = _dict[variable] series = arr if not series: continue earlier = [] if (not latest_ts) or (latest_ts < series[0][0]): # append timeseries = TSeries() timeseries.id = ts_id try: tail = read_timeseries_tail_from_db(db.connection, ts_id) total = float(tail[1]) # keep up from last value except Exception as e: log.debug(repr(e)) total = 0 for timestamp, value in series: if (not latest_ts) or (timestamp > latest_ts): if not isnan(value): total += value timeseries[timestamp] = total else: timeseries[timestamp] = float('NaN') elif timestamp < latest_ts: earlier.append((timestamp, value)) timeseries.append_to_db(db=db.connection, transaction=transaction, commit=True) elif latest_ts >= series[0][0]: if not force: # ignore continue else: # insert for timestamp, value in series: if timestamp < latest_ts: earlier.append((timestamp, value)) if earlier and ("GR" in username or "GBA" in username): # insert (only for athens) # print "appending %s items for %s" % (len(earlier), username) if variable == "WaterCold": ts15 = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_PERIOD) series15 = TSeries(id=ts15.id) elif variable == "Electricity": ts15 = household \ .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES, variable__id=VAR_ENERGY_PERIOD) series15 = TSeries(id=ts15.id) series15.read_from_db(db.connection) for ts, value in earlier: series15[ts] = value series15.write_to_db(db=db.connection, transaction=transaction, commit=True) raw_ts = TSeries(ts_id) # read existing ts raw data raw_ts.read_from_db(db.connection) total = get_consumption_totals(household, earlier[0][0], variable) init = total for timestamp, value in earlier: if not isnan(value): total += value raw_ts[timestamp] = total else: raw_ts[timestamp] = float('NaN') # correct later values, too diff = total - init all_ts = sorted(raw_ts.keys()) for ts in all_ts: if ts <= timestamp: continue curr = raw_ts[ts] raw_ts[ts] = curr + diff raw_ts.write_to_db(db=db.connection, transaction=transaction, commit=True) if 'WaterCold' in timeseries_data and not found: # only for new HH calc_occupancy(timeseries_data['WaterCold'], household) return households
Assuming that "dir" is the openmeteo directory, run as follows: export PYTHONPATH=dir:dir/enhydris export DJANGO_SETTINGS=settings ./oldopenmeteo2enhydris.sql """ import sys from datetime import timedelta from django.db import connection, transaction from enhydris.hcore import models from pthelma.timeseries import Timeseries transaction.enter_transaction_management() tms = models.Timeseries.objects.filter(time_step__id__in=[4,5]) for tm in tms: sys.stderr.write("Doing timeseries %d..." % (tm.id,)) t = Timeseries(id=tm.id) nt = Timeseries(id=tm.id) t.read_from_db(connection) for (d, value) in t.items(): d += timedelta(hours=1) assert(not d.minute and not d.hour and not d.second and d.day==1, "Invalid date "+str(d)) nt[d] = value nt.write_to_db(connection, transaction=transaction, commit=False) sys.stderr.write(" Done\n") transaction.commit()