def correlate(chan_id, weather_variable, subplot_index, annotate_y): # 25 = lighting circuit # (R^2 = 0.443) # 8 = kitchen lights (R^2 = 0.194) # 2 = boiler (versus radiation R^2 = 0.052, # versus mean_temp R^2 = 0.298, # versus max_temp R^2 = 0.432, # versus min_temp R^2 = 0.212) # 3 = solar (R^2 = 0.798) # 12 = fridge vs min_temp R^2 = 0.255 (with on_power_threshold = 20) print("Opening channel data...") channel = Channel(DATA_DIR, chan_id) print("Calculating...") channel.on_power_threshold = 20 hours_on = channel.usage_per_period('D', tz_convert='UTC').hours_on hours_on = hours_on[hours_on > ON_DURATION_THRESHOLD] hours_on.description = 'hours on' print("Got {} days of data from usage_per_period.".format(hours_on.size)) print("Plotting...") x_aligned, y_aligned = pda.stats.align(weather_variable, hours_on) print(x_aligned.description) slope, intercept, r_value, p_value, std_err = linregress(x_aligned.values, y_aligned.values) ax = fig.add_subplot(2,2,subplot_index) ax = spfl.format_axes(ax) ax = pda.stats.plot_regression_line(ax, x_aligned, y_aligned, slope, intercept, r_value, annotate_y=annotate_y) print("R^2={:.3f}".format(r_value**2)) ax.set_title('Correlation between ' + channel.get_long_name() + ' and ' + metoffice.get_long_name(weather_variable.name))
def test(data_input='random'): # First, we will specify the prior. We will then generate some fake data # from the prior specification. We will then perform inference. Then # we'll plot some things. def hazard_func(r): return constant_hazard(r, _lambda=200) if data_input == 'random': # generate test data N = 100 # how many data points to generate? x, changepoints = generate_test_data(N, hazard_func) elif data_input == 'ones': x = np.ones(N) changepoints = [] elif data_input == 'signature': from pda.channel import Channel from os import path DATA_DIR = '/data/mine/domesticPowerData/BellendenRd/wattsUp' #SIG_DATA_FILENAME = 'breadmaker1.csv' SIG_DATA_FILENAME = 'washingmachine1.csv' chan = Channel() chan.load_wattsup(path.join(DATA_DIR, SIG_DATA_FILENAME)) x = chan.series.values[142:1647] N = x.size # plot fig = plt.figure() ax = fig.add_subplot(2, 1, 1) ax.plot(x) ylim = ax.get_ylim() for cp in changepoints: ax.plot([cp, cp], ylim, color='k') # do inference beliefs, maxes = inference(x, hazard_func) # plot beliefs beliefs = beliefs.astype(np.float32) # print(beliefs) ax2 = fig.add_subplot(2, 1, 2, sharex=ax) ax2.imshow(-np.log(beliefs), interpolation='none', aspect='auto', origin='lower', cmap=plt.cm.Blues) ax2.plot(maxes, color='r') ax2.set_xlim([0, N]) ax2.set_ylim([0, ax2.get_ylim()[1]]) plt.draw() return beliefs, maxes
def test(data_input='random'): # First, we will specify the prior. We will then generate some fake data # from the prior specification. We will then perform inference. Then # we'll plot some things. hazard_func = lambda r: constant_hazard(r, _lambda=200) if data_input == 'random': # generate test data N = 100 # how many data points to generate? x, changepoints = generate_test_data(N, hazard_func) elif data_input == 'ones': x = np.ones(N) changepoints = [] elif data_input == 'signature': from pda.channel import Channel from os import path DATA_DIR = '/data/mine/domesticPowerData/BellendenRd/wattsUp' #SIG_DATA_FILENAME = 'breadmaker1.csv' SIG_DATA_FILENAME = 'washingmachine1.csv' chan = Channel() chan.load_wattsup(path.join(DATA_DIR, SIG_DATA_FILENAME)) x = chan.series.values[142:1647] N = x.size # plot fig = plt.figure() ax = fig.add_subplot(2,1,1) ax.plot(x) ylim = ax.get_ylim() for cp in changepoints: ax.plot([cp, cp], ylim, color='k') # do inference beliefs, maxes = inference(x, hazard_func) # plot beliefs beliefs = beliefs.astype(np.float32) #print(beliefs) ax2 = fig.add_subplot(2,1,2, sharex=ax) ax2.imshow(-np.log(beliefs), interpolation='none', aspect='auto', origin='lower', cmap=plt.cm.Blues) ax2.plot(maxes, color='r') ax2.set_xlim([0, N]) ax2.set_ylim([0, ax2.get_ylim()[1]]) plt.draw() return beliefs, maxes
def load_dataset(data_dir=DD, ignore_chans=None, only_load_chans=None, start_date=None, end_date=None): """Loads an entire dataset directory. Args: data_dir (str) ignore_chans (list of ints or label strings): optional. Don't load these channels. only_load_chans (list of ints or label strings): optional. Returns: list of Channels """ if ignore_chans is not None: assert (isinstance(ignore_chans, list)) channels = [] labels = load_labels(data_dir) print("Found", len(labels), "entries in labels.dat") for chan, label in labels.iteritems(): if ignore_chans is not None: if chan in ignore_chans or label in ignore_chans: print("Ignoring chan", chan, label) continue if only_load_chans is not None: if chan not in only_load_chans and label not in only_load_chans: print("Ignoring chan", chan, label) continue print("Attempting to load chan", chan, label, "...", end=" ") sys.stdout.flush() try: c = Channel(data_dir, chan, start_date=start_date, end_date=end_date) except IOError: print("FAILED!") else: channels.append(c) print("success.") return channels
from __future__ import print_function, division from pda.channel import Channel import matplotlib.pyplot as plt from matplotlib.ticker import MaxNLocator import numpy as np import os, datetime import setupPlottingForLaTeX as spfl NORMALISED_BAR_COLOR = 'gray' UNNORMALISED_LINE_COLOR = 'k' DATA_DIR = '/data/mine/vadeec/merged/house1' FIGURE_PATH = os.path.expanduser('~/Dropbox/MyWork/imperial/PhD/writing' '/papers/tetc2013/figures/') LATEX_PDF_OUTPUT_FILENAME = os.path.join(FIGURE_PATH, 'power_histograms.pdf') voltage = Channel() voltage.load_high_freq_mains(os.path.join(DATA_DIR, 'mains.dat'), 'volts') CHAN_IDS = [24,5,12,22,7,9,8,11,42,14,16,4] spfl.setup(columns=2) TITLE_Y = 0.7 MINIMUM_BIN_COUNT = 100 chans = [] normalised = [] for chan_id in CHAN_IDS: # Get channel data print("loading channel", chan_id) c = Channel(DATA_DIR, chan_id) chans.append(c)
TIMESPAN = 'W' # D (daily) or W (weekly) CHAN_IDS = [14,22] spfl.setup() GRID = False TITLE_Y = 0.75 XTICKS_ON = True LATEX_PDF_OUTPUT_FILENAME = os.path.join(FIGURE_PATH, 'weekly_usage_histograms'+FIGURE_SUFFIX) else: CHAN_IDS = [] CHANS = [] for chan_id in CHAN_IDS: # Get channel data print("Loading channel", chan_id) c = Channel(DATA_DIR, chan_id) c = c.crop(START_DATE, END_DATE) CHANS.append(c) if FIGURE_PRESET == 'boiler seasons': BIN_SIZE = 'T' # D (daily) or H (hourly) or T (minutely) TIMESPAN = 'D' # D (daily) or W (weekly) spfl.setup() GRID = False TITLE_Y = 0.7 XTICKS_ON = True LATEX_PDF_OUTPUT_FILENAME = os.path.join(FIGURE_PATH, 'seasonal_variation'+FIGURE_SUFFIX) print("Loading winter boiler data...") winter_boiler = Channel(DATA_DIR, 2) winter_boiler = winter_boiler.crop(datetime.datetime(year=2013, month=2, day=1),
def init_aggregate_and_appliance_dataset_figure( start_date, end_date, n_subplots=2, aggregate_type='one second', plot_both_aggregate_signals=False, data_dir=DD, plot_appliance_ground_truth=True, ignore_chans=None, **kwargs): """Initialise a basic figure with multiple subplots. Plot aggregate data. Optionally plot appliance ground truth dataset. Args: start_date, end_date (str): Required. e.g. '2013/6/4 18:00' n_subplots (int): Required. Must be >= 1. Includes aggregate and appliance ground truth plots. aggregate_type (str): 'one second' or 'current cost'. The flavour of aggregate data to load, plot and return. plot_both_aggregate_signals (bool): Default==False. Plot both flavours of aggregate data? Has no effect on which flavour is returned. data_dir (str): Default=DD plot_appliance_ground_truth (bool): Default==True ignore_chans (list of strings or ints): Defaults to a standard list of channels to ignore. **kwargs: passed to ax.plot Returns: subplots (list of axes), chan (pda.Channel) """ if plot_appliance_ground_truth: assert (n_subplots >= 2) else: assert (n_subplots >= 1) # Initialise figure and subplots fig = plt.figure() fig.canvas.set_window_title(start_date + ' - ' + end_date) subplots = [fig.add_subplot(n_subplots, 1, 1)] for i in range(2, n_subplots + 1): subplots.append(fig.add_subplot(n_subplots, 1, i, sharex=subplots[0])) # Load and plot aggregate channel(s) if aggregate_type == 'one second' or plot_both_aggregate_signals: print('Loading high freq mains...') one_sec = Channel() one_sec.load_normalised(data_dir, high_freq_param='active', start_date=start_date, end_date=end_date) one_sec.plot(subplots[0], color='k', **kwargs) if aggregate_type == 'current cost' or plot_both_aggregate_signals: print('Loading Current Cost aggregate...') cc = Channel(data_dir, 'aggregate', start_date=start_date, end_date=end_date) # cc = Current cost cc.plot(subplots[0], color='r', **kwargs) subplots[0].set_title('Aggregate. 1s active power, normalised.') subplots[0].legend() chan = one_sec if aggregate_type == 'one second' else cc if plot_appliance_ground_truth: print('Loading appliance ground truth dataset...') if ignore_chans is None: ignore_chans = [ 'aggregate', 'amp_livingroom', 'adsl_router', 'livingroom_s_lamp', 'gigE_&_USBhub', 'livingroom_s_lamp2', 'iPad_charger', 'subwoofer_livingroom', 'livingroom_lamp_tv', 'DAB_radio_livingroom', 'kitchen_lamp2', 'kitchen_phone&stereo', 'utilityrm_lamp', 'samsung_charger', 'kitchen_radio', 'bedroom_chargers', 'data_logger_pc', 'childs_table_lamp', 'baby_monitor_tx', 'battery_charger', 'office_lamp1', 'office_lamp2', 'office_lamp3', 'gigE_switch' ] ds = load_dataset(data_dir, ignore_chans=ignore_chans, start_date=start_date, end_date=end_date) print("Removing inactive channels...") ds = remove_inactive_channels(ds) print("Plotting dataset ground truth...") plot_each_channel_activity(subplots[1], ds) return subplots, chan
from __future__ import print_function, division from pda.channel import Channel import matplotlib.pyplot as plt from matplotlib.ticker import MaxNLocator import numpy as np import os, datetime import setupPlottingForLaTeX as spfl NORMALISED_BAR_COLOR = 'gray' UNNORMALISED_LINE_COLOR = 'k' DATA_DIR = '/data/mine/vadeec/merged/house1' FIGURE_PATH = os.path.expanduser('~/Dropbox/MyWork/imperial/PhD/writing' '/papers/tetc2013/figures/') LATEX_PDF_OUTPUT_FILENAME = os.path.join(FIGURE_PATH, 'power_histograms.pdf') voltage = Channel() voltage.load_high_freq_mains(os.path.join(DATA_DIR, 'mains.dat'), 'volts') CHAN_IDS = [24, 5, 12, 22, 7, 9, 8, 11, 42, 14, 16, 4] spfl.setup(columns=2) TITLE_Y = 0.7 MINIMUM_BIN_COUNT = 100 chans = [] normalised = [] for chan_id in CHAN_IDS: # Get channel data print("loading channel", chan_id) c = Channel(DATA_DIR, chan_id) chans.append(c)
DATA_DIR = '/data/mine/vadeec/merged/house1' FIGURE_PATH = os.path.expanduser('~/Dropbox/MyWork/imperial/PhD/writing' '/papers/tetc2013/figures/') LATEX_PDF_OUTPUT_FILENAME = os.path.join(FIGURE_PATH, 'on_durations.pdf') CHAN_IDS = [6,5,12,22,39,9,8,11,42,13,16,4] spfl.setup(columns=2) TITLE_Y = 0.7 chans = [] for chan_id in CHAN_IDS: # Get channel data print("loading channel", chan_id) c = Channel(DATA_DIR, chan_id) chans.append(c) #------------------------------------------- fig = plt.figure() n_subplots = len(chans) for c in chans: subplot_index = chans.index(c) + 1 ignore_n_off_samples = {'breadmaker': 600, 'washing_machine': 10, 'dishwasher': 10} on_durations = c.durations('on', ignore_n_off_samples=ignore_n_off_samples.get(c.name))
#!/bin/python from __future__ import print_function, division from pda.channel import Channel import matplotlib.pyplot as plt import matplotlib.ticker as ticker from matplotlib import animation import numpy as np import datetime BIN_SIZE = 'H' # H (hourly) or T (minutely) c = Channel('/data/mine/vadeec/jack-merged', 2) START_PERIOD = c.series.index[0].to_period('W') width = 1440 if BIN_SIZE=='T' else 24 fig = plt.figure() ax = fig.add_subplot(111) COLOR = 'b' x = np.arange(width) y = np.zeros(width) rects = ax.bar(x, y, facecolor=COLOR, edgecolor=COLOR) ax.set_xlim([0, width]) ax.set_ylim([0, 10]) ax.xaxis.set_major_locator(ticker.MultipleLocator(width / 12)) def format_time(x, pos=None): if BIN_SIZE == 'T': #minutely hours = x // 60 else: hours = x return '{:d}'.format(int(hours))
DATA_DIR = '/data/mine/vadeec/merged/house1' FIGURE_PATH = os.path.expanduser('~/Dropbox/MyWork/imperial/PhD/writing' '/papers/tetc2013/figures/') LATEX_PDF_OUTPUT_FILENAME = os.path.join(FIGURE_PATH, 'on_durations.pdf') CHAN_IDS = [6, 5, 12, 22, 39, 9, 8, 11, 42, 13, 16, 4] spfl.setup(columns=2) TITLE_Y = 0.7 chans = [] for chan_id in CHAN_IDS: # Get channel data print("loading channel", chan_id) c = Channel(DATA_DIR, chan_id) chans.append(c) #------------------------------------------- fig = plt.figure() n_subplots = len(chans) for c in chans: subplot_index = chans.index(c) + 1 ignore_n_off_samples = { 'breadmaker': 600, 'washing_machine': 10, 'dishwasher': 10 } on_durations = c.durations('on',
def init_aggregate_and_appliance_dataset_figure( start_date, end_date, n_subplots=2, aggregate_type='one second', plot_both_aggregate_signals=False, data_dir=DD, plot_appliance_ground_truth=True, ignore_chans=None, **kwargs): """Initialise a basic figure with multiple subplots. Plot aggregate data. Optionally plot appliance ground truth dataset. Args: start_date, end_date (str): Required. e.g. '2013/6/4 18:00' n_subplots (int): Required. Must be >= 1. Includes aggregate and appliance ground truth plots. aggregate_type (str): 'one second' or 'current cost'. The flavour of aggregate data to load, plot and return. plot_both_aggregate_signals (bool): Default==False. Plot both flavours of aggregate data? Has no effect on which flavour is returned. data_dir (str): Default=DD plot_appliance_ground_truth (bool): Default==True ignore_chans (list of strings or ints): Defaults to a standard list of channels to ignore. **kwargs: passed to ax.plot Returns: subplots (list of axes), chan (pda.Channel) """ if plot_appliance_ground_truth: assert(n_subplots >= 2) else: assert(n_subplots >= 1) # Initialise figure and subplots fig = plt.figure() fig.canvas.set_window_title(start_date + ' - ' + end_date) subplots = [fig.add_subplot(n_subplots, 1, 1)] for i in range(2, n_subplots+1): subplots.append(fig.add_subplot(n_subplots, 1, i, sharex=subplots[0])) # Load and plot aggregate channel(s) if aggregate_type=='one second' or plot_both_aggregate_signals: print('Loading high freq mains...') one_sec = Channel() one_sec.load_normalised(data_dir, high_freq_param='active', start_date=start_date, end_date=end_date) one_sec.plot(subplots[0], color='k', **kwargs) if aggregate_type=='current cost' or plot_both_aggregate_signals: print('Loading Current Cost aggregate...') cc = Channel(data_dir, 'aggregate', start_date=start_date, end_date=end_date) # cc = Current cost cc.plot(subplots[0], color='r', **kwargs) subplots[0].set_title('Aggregate. 1s active power, normalised.') subplots[0].legend() chan = one_sec if aggregate_type=='one second' else cc if plot_appliance_ground_truth: print('Loading appliance ground truth dataset...') if ignore_chans is None: ignore_chans=['aggregate', 'amp_livingroom', 'adsl_router', 'livingroom_s_lamp', 'gigE_&_USBhub', 'livingroom_s_lamp2', 'iPad_charger', 'subwoofer_livingroom', 'livingroom_lamp_tv', 'DAB_radio_livingroom', 'kitchen_lamp2', 'kitchen_phone&stereo', 'utilityrm_lamp', 'samsung_charger', 'kitchen_radio', 'bedroom_chargers', 'data_logger_pc', 'childs_table_lamp', 'baby_monitor_tx', 'battery_charger', 'office_lamp1', 'office_lamp2', 'office_lamp3', 'gigE_switch'] ds = load_dataset(data_dir, ignore_chans=ignore_chans, start_date=start_date, end_date=end_date) print("Removing inactive channels...") ds = remove_inactive_channels(ds) print("Plotting dataset ground truth...") plot_each_channel_activity(subplots[1], ds) return subplots, chan
TIMESPAN = 'W' # D (daily) or W (weekly) CHAN_IDS = [14, 22] spfl.setup() GRID = False TITLE_Y = 0.75 XTICKS_ON = True LATEX_PDF_OUTPUT_FILENAME = os.path.join( FIGURE_PATH, 'weekly_usage_histograms' + FIGURE_SUFFIX) else: CHAN_IDS = [] CHANS = [] for chan_id in CHAN_IDS: # Get channel data print("Loading channel", chan_id) c = Channel(DATA_DIR, chan_id) c = c.crop(START_DATE, END_DATE) CHANS.append(c) if FIGURE_PRESET == 'boiler seasons': BIN_SIZE = 'T' # D (daily) or H (hourly) or T (minutely) TIMESPAN = 'D' # D (daily) or W (weekly) spfl.setup() GRID = False TITLE_Y = 0.7 XTICKS_ON = True LATEX_PDF_OUTPUT_FILENAME = os.path.join( FIGURE_PATH, 'seasonal_variation' + FIGURE_SUFFIX) print("Loading winter boiler data...") winter_boiler = Channel(DATA_DIR, 2) winter_boiler = winter_boiler.crop(