Exemplo n.º 1
0
def process_dma(dma, bounds):
    """Process DMA timeseries by aggregating all the contained
    households in the DMA"""
    print "Process DMA %s" % (dma, )
    for dma_series in dma.timeseries.all():
        print "Process series %s" % (dma_series, )
        per_capita = dma_series.name.find('capita') > -1
        variable = dma_series.variable.id
        if dma_series.time_step.id == TSTEP_FIFTEEN_MINUTES:
            start = bounds[variable]['fifteen_start']
            end = bounds[variable]['fifteen_end']
            # Fifteen minutes process is DEACTIVATED!
            # We don't process fifteen minutes, it takes too long,
            # maybe we reactivate later after we optimize the
            # algorithm to process only new records
            continue
        elif dma_series.time_step.id == TSTEP_HOURLY:
            start = bounds[variable]['hourly_start']
            end = bounds[variable]['hourly_end']
        elif dma_series.time_step.id == TSTEP_DAILY:
            start = bounds[variable]['daily_start']
            end = bounds[variable]['daily_end']
        elif dma_series.time_step.id == TSTEP_MONTHLY:
            start = bounds[variable]['monthly_start']
            end = bounds[variable]['monthly_end']
        time_step = ReadTimeStep(dma_series.id, dma_series)
        tseries = TSeries(time_step=time_step, id=dma_series.id)
        nhseries = TSeries(time_step=time_step)
        pointer = start
        while pointer <= end:
            tseries[pointer] = 0
            nhseries[pointer] = 0
            pointer = tseries.time_step.next(pointer)
        for household in dma.households.all():
            for h_series_db in household.timeseries.filter(
                    time_step__id=dma_series.time_step.id,
                    variable__id=variable):
                hseries = TSeries(id=h_series_db.id)
                hseries.read_from_db(db.connection)
                pointer = start
                while pointer <= end:
                    try:
                        v = hseries[pointer]
                        if math.isnan(v):
                            pointer = tseries.time_step.next(pointer)
                            continue
                        if per_capita:
                            v = v / float(household.num_of_occupants)
                        tseries[pointer] += v
                        nhseries[pointer] += 1
                    except KeyError:
                        v = 0
                    pointer = tseries.time_step.next(pointer)
        pointer = start
        while pointer <= end:
            if per_capita and nhseries[pointer] > 0:
                tseries[pointer] = tseries[pointer] / nhseries[pointer]
            pointer = tseries.time_step.next(pointer)
        tseries.write_to_db(db.connection, commit=True)  #False)
Exemplo n.º 2
0
def process():
    for household in Household.objects.all():
        daily_series_db = household.timeseries.get(time_step__id=TSTEP_DAILY)
        series = TSeries(id=daily_series_db.id)
        series.read_from_db(db.connection)
        m = 1000.000 * series.average()
        if math.isnan(m):
            continue
        num_of_occupants = max(1,
                               int(round(m / AVERAGE_UNIT_WATER_CONSUMPTION)))
        print 'Household with id=%s, average daily consumption %.1f, '\
              'number of occupants set to %s'%(household.id, m,
                      num_of_occupants,)
        household.num_of_occupants = num_of_occupants
        household.save()
Exemplo n.º 3
0
def get_consumption_totals(household, dt, variable):
    """
    Not needed. read_timeseries_tail_from_db does the same thing, faster.
    :param household:
    :return:
    """
    if variable == "WaterCold":
        timeseries = household \
            .timeseries.get(variable__id=VAR_CUMULATIVE)
        raw_series = TSeries(id=timeseries.id)
        raw_series.read_from_db(db.connection)
        timestamps = sorted(raw_series.keys())
        total = 0
        for ts in timestamps:
            val = raw_series[ts]
            if isnan(val):
                continue
            if ts > dt:
                break
            total = raw_series[ts]
        return total
    elif variable == "Electricity":
        timeseries = household \
            .timeseries.get(variable__id=VAR_ENERGY_CUMULATIVE)
        raw_series = TSeries(id=timeseries.id)
        raw_series.read_from_db(db.connection)
        timestamps = sorted(raw_series.keys())
        total = 0
        for ts in timestamps:
            val = raw_series[ts]
            if isnan(val):
                continue
            if ts > dt:
                break
            total = raw_series[ts]
        return total
Exemplo n.º 4
0
def has_burst_old(household):
    """
    We won't be using this algorithm any more
    :param household:
    :return:
    """
    name = household.user.username
    if not name.startswith('GR'):
        return 0, 0
    timeseries = household \
        .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES,
                        variable__id=VAR_PERIOD)
    series = TSeries(id=timeseries.id)
    series.read_from_db(db.connection)
    timestamps = sorted(series.keys())
    today = []  # all today's values
    _all = []
    for i in range(1, len(timestamps)):
        ts = timestamps[i]
        if household.user.username == "GR006047" \
                and ts.year == 2015 and ts.month == 2 and ts.day == 9 \
                and ts.hour == 17:
            pass
        prev_ts = timestamps[i - 1]
        # if previous value is NaN we don't take this value into consideration
        # Because it might have all consumption of all the previous NaN times
        val = series[ts]
        prev_val = series[prev_ts]
        if isnan(prev_val):
            continue
        if i < len(timestamps) - 100:
            if not isnan(val) and not val == 0:
                _all.append(series[ts])
        else:
            tm = "%s:%s" % (ts.time().hour, ts.time().minute)
            if not isnan(val) and not val == 0:
                today.append((val, tm))

    if _all and today:
        all1 = np.array(_all)
        p = np.percentile(all1, 95)
        for cons, tm in today:
            if cons > p:
                return cons, tm
    return 0, 0
Exemplo n.º 5
0
def has_burst(household):
    """
    We won't be using this algorithm any more
    :param household:
    :return:
    """
    name = household.user.username
    if not name.startswith('GR'):
        return 0, 0
    timeseries = household \
        .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES,
                        variable__id=VAR_PERIOD)
    series = TSeries(id=timeseries.id)
    series.read_from_db(db.connection)
    timestamps = sorted(series.keys())
    today = []  # all today's values
    daily_maxes = {}
    for i in range(1, len(timestamps)):
        ts = timestamps[i]
        prev_ts = timestamps[i - 1]
        date = ts.date()
        # if previous value is NaN we don't take this value into consideration
        # Because it might have all consumption of all the previous NaN times
        val = series[ts]
        prev_val = series[prev_ts]
        if isnan(prev_val):
            continue
        if i < len(timestamps) - 100:
            if not isnan(val) and not val == 0:
                daily_max = daily_maxes.get(date, 0)
                if val > daily_max:
                    daily_maxes[date] = val
        else:
            tm = "%s-%s-%s %s:%s" % (ts.year, ts.month, ts.day, ts.time().hour,
                                     ts.time().minute)
            if not isnan(val) and not val == 0:
                today.append((val, tm))

    if daily_maxes and today:
        maxes = np.array(daily_maxes.values())
        p = np.percentile(maxes, 90)
        for cons, tm in today:
            if cons > p:
                return cons, tm
    return 0, 0
Exemplo n.º 6
0
def parse_and_save_timeseries(device_id, timeseries_id):
    """
    Reads a RAW timeseries from REST API and saves in our local
    database using the timeseries_id.
    ``device_id`` will be the ``identifier`` used in other functions,
    usualy is the customerID==deviceID
    """
    s, e = timeseries_bounding_dates_from_db(db.connection, timeseries_id)
    if s or e:
        print 'Raw timeseries id=%s has already data, skipping...' % (
            timeseries_id, )
        return
    timeseries = TSeries()
    timeseries.id = timeseries_id
    for timestamp, value in ibm_restapi.get_raw_timeseries(device_id):
        timeseries[timestamp] = value
    timeseries.write_to_db(db=db.connection,
                           transaction=transaction,
                           commit=False)
Exemplo n.º 7
0
def create_objects(dma, household_identifier, series, force=False):
    """
    When a household is fully parsed then this command is called to create
    database objects thus: user (household owner), household, database time
    series placeholders (for raw data and for processed data), to write actual
    time series data in database and finally to estimate the household
    occupancy.
    """
    print "Processing household %s, user username will be %s as well"%(
            household_identifier, household_identifier)
    # Create user (household owner), household, database series placeholders
    user = create_user(household_identifier)
    household=create_household(household_identifier, user,
            zone=dma.id)
    db_series = create_raw_timeseries(household)
    create_processed_timeseries(household)
    timeseries_data = {}
    # Now we will create timeseries.Timeseries() and we will add
    # parsed values
    for variable in db_series:
        if variable not in ('WaterCold', 'Electricity'):
            continue
        s, e = timeseries_bounding_dates_from_db(db.connection,
                db_series[variable].id)
        if not force and (s or e):
            print 'Raw timeseries id=%s has already data, skipping...'%(
                    db_series[variable].id,)
            continue
        timeseries = TSeries()
        timeseries.id = db_series[variable].id
        total = 0.0
        for timestamp, value in series[variable]:
            if not math.isnan(value):
                total += value
                timeseries[timestamp] = total
            else:
                timeseries[timestamp] = float('NaN')
        timeseries_data[variable] = timeseries
        timeseries.write_to_db(db=db.connection,
                transaction=transaction,
                commit=False)
    if 'WaterCold' in timeseries_data:
        calc_occupancy(timeseries_data['WaterCold'], household)
Exemplo n.º 8
0
def parse_and_save_timeseries(filename, timeseries_id):
    first_line = True
    timeseries = TSeries()
    timeseries.id = timeseries_id
    with open(filename) as fp:
        for line in fp.readlines():
            if first_line:
                first_line = False
                continue
            components = line.split(',')
            date_str = components[1].strip('"')
            value_str = components[2].strip('"')
            value = float(value_str)
            if value < MIN_VALUE or value >= MAX_VALUE:
                value = float('nan')
            tstamp = datetime.strptime(date_str, '%Y-%m-%d %H:%M:%S')
            tstamp = tstamp.replace(second=0)
            timeseries[tstamp] = value
    timeseries.write_to_db(db=db.connection,
                           transaction=transaction,
                           commit=False)
Exemplo n.º 9
0
def get_values_after(household, dt, variable):
    timeseries = None
    if variable == "WaterCold":
        timeseries = household \
            .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES,
                            variable__id=VAR_PERIOD)
    elif variable == "Electricity":
        timeseries = household \
            .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES,
                            variable__id=VAR_ENERGY_PERIOD)
    data = []
    if timeseries:
        series = TSeries(id=timeseries.id)
        series.read_from_db(db.connection)
        timestamps = sorted(series.keys())
        for ts in timestamps:
            val = series[ts]
            if ts <= dt:
                continue
            data.append((ts, val))
    return data
Exemplo n.º 10
0
 def handle(self, *args, **options):
     try:
         username = args[0]
     except IndexError:
         print "I need a username!"
         return -1
     try:
         if username not in ["GR", "GB", "PT", "GBA"]:
             users = User.objects.filter(username=username)
         else:
             users = User.objects.filter(username__startswith=username)
         for user in users:
             out = []
             print "output for {x}".format(x=username)
             household = Household.objects.get(user=user)
             # ts_raw = household.timeseries.filter(time_step__isnull=True,
             #                                      variable__id=VAR_CUMULATIVE)[0]
             # series = TSeries(id=ts_raw.id)
             timeseries = household \
                 .timeseries.get(variable__id=VAR_CUMULATIVE)
             series = TSeries(id=timeseries.id)
             series.read_from_db(db.connection)
             timestamps = sorted(series.keys())
             values = np.array([])
             for ts in timestamps:
                 val = series[ts]
                 if isnan(val) or val == 0:
                     continue
                 values = np.append(values, val)
                 #perc = np.percentile(values, 90)
                 out.append([ts, val])
             _outfile = "timeseries_cumulative_%s.csv" % user.username
             _path = "data/"
             with open(path.join(_path, _outfile), 'w') as of:
                 a = csv.writer(of, delimiter=',',
                                quotechar='"',
                                quoting=csv.QUOTE_ALL)
                 a.writerows(out)
     except Exception as e:
         print "failed with %s" % repr(e)
Exemplo n.º 11
0
 def handle(self, *args, **options):
     try:
         username = args[0]
     except IndexError:
         print "I need a username!"
         return -1
     try:
         if username:
             user = User.objects.get(username=username)
             out = []
             print "output for {x}".format(x=username)
             household = Household.objects.get(user=user)
             timeseries = household \
                 .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES,
                                 variable__id=VAR_PERIOD)
             series = TSeries(id=timeseries.id)
             series.read_from_db(db.connection)
             timestamps = sorted(series.keys())
             values = np.array([])
             for ts in timestamps:
                 val = series[ts]
                 if isnan(val) or val == 0:
                     continue
                 values = np.append(values, val)
                 perc = np.percentile(values, 90)
                 out.append([ts, val, perc])
             _outfile = "timeseries_%s.csv" % username
             _path = "data/"
             with open(path.join(_path, _outfile), 'w') as of:
                 a = csv.writer(of,
                                delimiter=',',
                                quotechar='"',
                                quoting=csv.QUOTE_ALL)
                 a.writerows(out)
     except Exception as e:
         print "failed with %s" % repr(e)
Exemplo n.º 12
0
def regularize(raw_series_db, proc_series_db, rs, re):
    """
    This function regularize raw_series_db object from database and
    writes a processed proc_series_db in database.
    Raw series is a continuously increasing values time series,
    aggregating the water consumption. Resulting processed timeseries
    contains water consumption for each of its interval. I.e. if the
    timeseries is of 15 minutes time step, then each record contains
    the water consumption for each record period.
    """
    raw_series = TSeries(id=raw_series_db.id)
    raw_series.read_from_db(db.connection)
    # We keep the last value for x-checking reasons, see last print
    # command
    try:
        test_value = raw_series[raw_series.bounding_dates()[1]]
    except Exception as e:
        #log.debug("Trying to get test value for raw series %s failed with %s. "
        #          "Skipping!" % (raw_series_db.id, repr(e)))
        return None
    time_step = ReadTimeStep(proc_series_db.id, proc_series_db)
    proc_series = TSeries(id=proc_series_db.id, time_step=time_step)
    # The following code can be used in real conditions to append only
    # new records to db, in a next version
    #if not pe:
    #    start = proc_series.time_step.down(rs)
    #else:
    #    start = proc_series.time_step.up(pe)
    # Instead of the above we use now:
    start = proc_series.time_step.down(rs)
    end = proc_series.time_step.up(re)
    pointer = start
    # Pass 1: Initialize proc_series
    while pointer <= end:
        proc_series[pointer] = float('nan')
        pointer = proc_series.time_step.next(pointer)
    # Pass 2: Transfer cummulative raw series to differences series:
    prev_s = 0
    for i in xrange(len(raw_series)):
        dat, value = raw_series.items(pos=i)
        d = datetime.today()
        d = d.replace(month=11).replace(day=5)
        if dat.date() == d.date():
            pass
        if not isnan(value):
            # "if" Added by Chris Pantazis, because sometimes
            # We get a negative small value by the meter
            if prev_s > value:
                prev_s = value
            raw_series[dat] = value - prev_s
            prev_s = value
    # Pass 3: Regularize step: loop over raw series records and distribute
    # floating point values to processed series
    for i in xrange(len(raw_series)):
        dat, value = raw_series.items(pos=i)
        if not isnan(value):
            # find previous, next timestamp of the proc time series
            d1 = proc_series.time_step.down(dat)
            d2 = proc_series.time_step.up(dat)
            if isnan(proc_series[d1]):
                proc_series[d1] = 0
            if isnan(proc_series[d2]):
                proc_series[d2] = 0
            if d1 == d2:  # if dat on proc step then d1=d2
                proc_series[d1] += value
                continue
            dif1 = _dif_in_secs(d1, dat)
            dif2 = _dif_in_secs(dat, d2)
            dif = dif1 + dif2
            # Distribute value to d1, d2
            proc_series[d1] += (dif2 / dif) * value
            proc_series[d2] += (dif1 / dif) * value
    # Uncomment the following line in order to show debug information.
    # Usually the three following sums are consistent by equality. If
    # not equality is satisfied then there is a likelyhood of algorith
    # error
    # log.info("%s = %s = %s ?" % (raw_series.sum(),
    # proc_series.sum(), test_value))

    proc_series.write_to_db(db=db.connection, commit=True)
    #return the full timeseries
    return proc_series
Exemplo n.º 13
0
def has_leakage(household):
    """
    This method checks for leakages. The way it is done is pretty simple
     I open the hourly timeseries and retrieve all timestamps.
     I create a dictionary with keys be the dates (not time) and values arrays
     be the percentage of night/total consumption
    :param household:
    :return: False for no leakage, True for leakage
    """
    name = household.user.username
    if name == "GR059E35":
        pass
    if name.startswith('GB'):  # not UK because they send daily data
        return 0, 0
    timeseries = household \
        .timeseries.get(time_step__id=TSTEP_HOURLY,
                        variable__id=VAR_PERIOD)
    series = TSeries(id=timeseries.id)
    series.read_from_db(db.connection)
    timestamps = sorted(series.keys())
    night_dict = {}
    total_dict = {}
    _t = datetime.now().time()
    _d = datetime.today().date()
    for ts in timestamps:
        _d = ts.date()
        _t = ts.time()
        val = series[ts]
        if 3 <= _t.hour <= 5:
            if val == 0:
                night_dict[_d] = 0  # make all night 0 if one 0
            else:
                try:
                    night_dict[_d] += val
                except KeyError:
                    night_dict[_d] = val
        try:
            total_dict[_d] += val
        except KeyError:
            total_dict[_d] = val

    #remove last day if not a whole day (_t < 24:00)
    if _t.hour < 23:
        try:
            del total_dict[_d]
            del night_dict[_d]
        except (KeyError, IndexError):
            pass
    _all = []  # all lengths will be in here
    _today = []  # today's lengths

    _dates = sorted(
        total_dict.keys())[:-1]  # all except last day 4 * 15min for 4 hrs
    for _d in _dates:
        total = total_dict[_d]
        # there can be a case when I don't get data for 01:00 -> 04:00
        # so night[_d] might not exist. in this case let it be zero
        try:
            night = night_dict[_d]
        except KeyError:
            night = 0
        if total > 0 and night > 0 and not isnan(total) and not isnan(night):
            _all.append(float(night) / float(total))

    # Now we need only the last day. However sometimes we have
    # some timestamps from the next day because the file has all data from
    # previous day and one entry from today. So we pick today and yesterday
    # instead of today. Today is too small. And too fast some times. But that
    # is for some other time to discuss...
    _dates = sorted(total_dict.keys())[-1:]  # only last day's
    for _d in _dates:
        total = total_dict[_d]
        night = night_dict[_d]
        if total > 0 and night > 0 and not isnan(total) and not isnan(night):
            _today.append(float(night) / float(total))
    if _all and _today:
        ts = timestamps[-1]
        tm = "%s-%s-%s %s:%s" % (ts.year, ts.month, ts.day, ts.time().hour,
                                 ts.time().minute)
        all1 = np.array(_all)
        p = np.percentile(all1, 90)
        for val in _today:
            if val > p:
                return val, tm
    return 0, 0
Exemplo n.º 14
0
def create_objects(data, usernames, force, z_names, z_dict):
    """

    :param data: meter_id -> consumption_type -> [timestamp, volume]
    :param force: True to overwrite
    :return: True for success
    """
    households = []
    # Create user (household owner), household, database series placeholders
    hh_ids = sorted(data.keys())
    found = False
    for hh_id in hh_ids:
        username = usernames[hh_id]
        if username == "PT94993":
            pass
        try:
            zone_name = z_dict[username]
        except KeyError:
            zone_name = z_names[0]
        zone = DMA.objects.get(name=zone_name)
        user, created = create_user(username, hh_id)
        household, found = create_household(hh_id, user, zone.id)
        households.append(household)
        db_series = create_raw_timeseries(household)
        create_processed_timeseries(household)
        timeseries_data = {}
        # Now we will create timeseries.Timeseries() and we will add
        # parsed values
        for variable in db_series:
            if variable not in ('WaterCold', 'Electricity'):
                continue
            exists = False
            s, e = timeseries_bounding_dates_from_db(db.connection,
                                                     db_series[variable].id)
            latest_ts = e
            ts_id = db_series[variable].id
            # checking to see if timeseries records already exist in order
            # to append
            # d = read_timeseries_tail_from_db(db.connection, ts_id)
            total = 0.0
            # if s or e:
            #     exists = True
            #     timeseries = TSeries(ts_id)
            #     timeseries.read_from_db(db.connection)
            # else:
            #     timeseries = TSeries()
            #     timeseries.id = ts_id
            _dict = data[hh_id]
            arr = _dict[variable]
            series = arr
            if not series:
                continue
            earlier = []
            if (not latest_ts) or (latest_ts < series[0][0]):  # append
                timeseries = TSeries()
                timeseries.id = ts_id
                try:
                    tail = read_timeseries_tail_from_db(db.connection, ts_id)
                    total = float(tail[1])  # keep up from last value
                except Exception as e:
                    log.debug(repr(e))
                    total = 0
                for timestamp, value in series:
                    if (not latest_ts) or (timestamp > latest_ts):
                        if not isnan(value):
                            total += value
                            timeseries[timestamp] = total
                        else:
                            timeseries[timestamp] = float('NaN')
                    elif timestamp < latest_ts:
                        earlier.append((timestamp, value))
                timeseries.append_to_db(db=db.connection,
                                        transaction=transaction,
                                        commit=True)
            elif latest_ts >= series[0][0]:
                if not force:  # ignore
                    continue
                else:  # insert
                    for timestamp, value in series:
                        if timestamp < latest_ts:
                            earlier.append((timestamp, value))
            if earlier and ("GR" in username
                            or "GBA" in username):  # insert (only for athens)
                # print "appending %s items for %s" % (len(earlier), username)
                if variable == "WaterCold":
                    ts15 = household \
                        .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES,
                                        variable__id=VAR_PERIOD)
                    series15 = TSeries(id=ts15.id)
                elif variable == "Electricity":
                    ts15 = household \
                        .timeseries.get(time_step__id=TSTEP_FIFTEEN_MINUTES,
                                        variable__id=VAR_ENERGY_PERIOD)
                    series15 = TSeries(id=ts15.id)
                series15.read_from_db(db.connection)
                for ts, value in earlier:
                    series15[ts] = value
                series15.write_to_db(db=db.connection,
                                     transaction=transaction,
                                     commit=True)

                raw_ts = TSeries(ts_id)  # read existing ts raw data
                raw_ts.read_from_db(db.connection)
                total = get_consumption_totals(household, earlier[0][0],
                                               variable)
                init = total
                for timestamp, value in earlier:
                    if not isnan(value):
                        total += value
                        raw_ts[timestamp] = total
                    else:
                        raw_ts[timestamp] = float('NaN')

                # correct later values, too
                diff = total - init
                all_ts = sorted(raw_ts.keys())
                for ts in all_ts:
                    if ts <= timestamp:
                        continue
                    curr = raw_ts[ts]
                    raw_ts[ts] = curr + diff

                raw_ts.write_to_db(db=db.connection,
                                   transaction=transaction,
                                   commit=True)

        if 'WaterCold' in timeseries_data and not found:  # only for new HH
            calc_occupancy(timeseries_data['WaterCold'], household)
    return households