def data():
    temp = pd.read_pickle(os.path.join(_PATH_TO_HERE, 'temp_data.pickle'))
    temp = temp.sort_index().asfreq("H")
    # Extended periods with failed readings, replace with Gvarv
    temp['2004-11-11 14:00':'2004-11-21 23:00'] = np.nan
    temp['2005-02-08 08:00':'2005-02-27 23:00'] = np.nan
    # Shorter periods with failed readings, that we may leave to the cleansing
    # to take care of?
    # temp['2005-09-07 08:00':'2005-09-08 04:00'] = np.nan
    # temp['2006-02-28 05:00':'2006-02-28 04:00'] = np.nan
    # temp['2006-06-17 11:00':'2006-06-18 08:00'] = np.nan
    # temp['2006-12-19 06:00':'2006-12-21 03:00'] = np.nan
    gvarv = xml.parse(_TEMP_DATA)[temp.index[0]:].asfreq("H")
    gvarv_aligned = temp.align(gvarv, join="left")[1]
    # np.where returned a Pandas Timeseries with old Numpy, but now
    # returns an ndarray. Therefore we need to reassign to temp.
    temp[:] = np.where(np.isnan(temp), gvarv_aligned, temp)
    temp = temp.interpolate()
    temp.name = "Temperature"
    # Interpolate away a couple of outliers and zero-recordings, or leave to
    # cleansing?
    # temp['2004-11-29 08:00'] = np.nan
    # temp['2005-11-30 00:00':'2005-11-30 02:00'] = np.nan
    # temp['2006-10-27 09:00'] = np.nan
    # temp = temp.interpolate()
    return temp
def data():
    temp = pd.read_pickle(os.path.join(_PATH_TO_HERE, 'temp_data.pickle'))
    temp = temp.sort_index().asfreq("H")
    # Extended periods with failed readings, replace with Gvarv
    temp['2004-11-11 14:00':'2004-11-21 23:00'] = np.nan
    temp['2005-02-08 08:00':'2005-02-27 23:00'] = np.nan
    # Shorter periods with failed readings, that we may leave to the cleansing
    # to take care of?
    # temp['2005-09-07 08:00':'2005-09-08 04:00'] = np.nan
    # temp['2006-02-28 05:00':'2006-02-28 04:00'] = np.nan
    # temp['2006-06-17 11:00':'2006-06-18 08:00'] = np.nan
    # temp['2006-12-19 06:00':'2006-12-21 03:00'] = np.nan
    gvarv = xml.parse(_TEMP_DATA)[temp.index[0]:].asfreq("H")
    gvarv_aligned = temp.align(gvarv, join="left")[1]
    # np.where returned a Pandas Timeseries with old Numpy, but now
    # returns an ndarray. Therefore we need to reassign to temp.
    temp[:] = np.where(np.isnan(temp), gvarv_aligned, temp)
    temp = temp.interpolate()
    temp.name = "Temperature"
    # Interpolate away a couple of outliers and zero-recordings, or leave to
    # cleansing?
    # temp['2004-11-29 08:00'] = np.nan
    # temp['2005-11-30 00:00':'2005-11-30 02:00'] = np.nan
    # temp['2006-10-27 09:00'] = np.nan
    # temp = temp.interpolate()
    return temp
Пример #3
0
def _collect_and_plot(files):
    TS = []
    location = []
    for f in files:
        temperatures = [ section[1] for section in parse.parse_file(f)[1:-1] if section[1]['Plant'] == ['tmp'] ]
        for t in temperatures:
            if t['Step'][0] != '0000-00-00.01:00:00':
                print 'Not hourly readings of temperature. Abort.'
                break
            dates = ts.date_array(start_date=ts.Date('H', t['Start'][0]), length=len(t['Value']))
            data = [ float(value.rsplit('/')[0]) for value in t['Value'] ]
            TS.append(ts.TimeSeries(data=data, dates=dates))
            if location and t['Installation'][0] != location:
                print 'Location changed during reading of gs2 files. Probably some bad grouping of gs2 files.'
            location = t['Installation'][0]
    if TS:
        path = '/Users/tidemann/Documents/NTNU/devel/data/eklima/Telemark/'
        for file in os.listdir(path):
            try:
                series = xml.parse(path + file)
                sg.utils.plot_time_series([ts.concatenate((TS)), series], ['b-','r-'], [location, file])
            except:
                print file, 'had no data.'
    else:
        print 'No temperature data.'