def fetch_ukdale(data_path, windows, appliances, numApps, period, n_steps, stride_train, stride_test, typeLoad=0, seq_per_batch=0, flgAggSumScaled=0, flgFilterZeros=0, target_inclusion_prob=0.5, trainPer=0.5, valPer=0.25, testPer=0.25): ''' Deleting huge part of seems like generating data from other metadata ''' reader = dt.ReaderTS(windows, appliances, n_steps, stride_train, stride_test, period, flgAggSumScaled, flgFilterZeros, flgScaling=1, trainPer=trainPer, valPer=valPer, testPer=testPer) print(windows) if (numApps==-1): truFileName='all_'+str(n_steps)+'_tr'+str(trainPer)+'_te'+str(testPer)+'_val'+str(valPer)+'_b'+str(len(windows))+'_'+str(windows[1][0])+'_'+str(windows[1][1]) else: if (typeLoad==1): truFileName=appliances[numApps]+'_'+str(n_steps)+'_tr'+str(windows['train'][1][0])+'_te'+str(windows['test'][1][0])+'_val'+str(windows['val'][1][0])+'_'+str(windows['val'][1][1]) else: truFileName=appliances[numApps]+'_'+str(n_steps)+'_'+str(windows[1][0])+'_'+str(windows[1][1]) try: split = pickle.load( open(data_path+"/pickles/"+truFileName+".pickle","rb")) return split['X_train'], split['Y_train'], split['X_val'], split['Y_val'], split['X_test'], split['Y_test'], reader except (OSError, IOError) as e: XdataSet, YdataSet = reader.load_csvdata(data_path, numApps, typeLoad, seq_per_batch, target_inclusion_prob) #shape before: batch, apps, steps x_train, x_val, x_test, y_train, y_val, y_test = XdataSet['train'],XdataSet['val'],XdataSet['test'],YdataSet['train'],YdataSet['val'],YdataSet['test'] x_train, x_val, x_test = np.expand_dims(x_train,axis=2), np.expand_dims(x_val,axis=2), np.expand_dims(x_test,axis=2) if (numApps!=-1): y_train,y_val,y_test = np.expand_dims(y_train,axis=2), np.expand_dims(y_val,axis=2), np.expand_dims(y_test,axis=2) with open(data_path+"/pickles/"+truFileName+".pickle",'wb') as splitWrite: pickle.dump({'X_train':x_train,'Y_train':y_train,'X_val':x_val,'Y_val':y_val,'X_test':x_test,'Y_test':y_test},splitWrite) return x_train, y_train, x_val, y_val, x_test,y_test, reader
import matplotlib.pyplot as plt from tflearn.data_utils import to_categorical, pad_sequences from tflearn.datasets import imdb # hyperparameters lr = 0.001 #batch_size = 513 # 2565 / 5 numBatchs = 5 #batchSizTest = len(XtestSet) n_classes = 10 n_inputs = 10 # MNIST data input (img shape: 28*28) n_steps = 20 # time steps n_hidden_units = 6 # neurons in hidden layer # Barcelona Dataset loading reader = dt.ReaderTS(n_inputs) XdataSet, YdataSet = reader.load_csvdata(n_steps) x_train, x_test, y_train, y_test = XdataSet['train'], XdataSet[ 'test'], YdataSet['train'], YdataSet['test'] x_val, y_val = XdataSet['val'], YdataSet['val'] # Network building net = tflearn.input_data([None, n_inputs, n_steps]) net = tflearn.lstm(net, n_hidden_units) #, dropout=0.8 net = tflearn.fully_connected(net, n_classes, activation='linear') net = tflearn.regression(net, optimizer='adam', learning_rate=0.001, loss='mean_square')
# hyperparameters lr = 0.001 batch_size = 171 # 2565 / 5 numBatchs = 15 instXday = 3 #batchSizTest = len(XtestSet) n_inputs = 22 # MNIST data input (img shape: 28*28) n_steps = 136 # time steps n_hidden_units = 110 # neurons in hidden layer # Loading the dataset paddType = 1 nChannels = 1 reader = dt.ReaderTS(n_steps, n_inputs, instXday, paddType, nChannels) XdataSet, YdataSet, n_classes, _, arrLens = reader.generateDataSet() indexRandom = np.random.permutation(2565) batchDataSets = np.reshape(indexRandom,(numBatchs,batch_size)) indexAccSet = np.reshape(batchDataSets[0:numBatchs-1],(batch_size*(numBatchs-1))) X_train = XdataSet[indexAccSet] Y_train = YdataSet[indexAccSet] X_test = XdataSet[batchDataSets[-1]] Y_test = YdataSet[batchDataSets[-1]] # Build LSTM network model = Sequential() model.add(LSTM(n_hidden_units, input_shape=(n_steps,n_inputs))) model.add(Dense(n_classes, activation='softmax', init=initializers.random_normal(stddev=0.1))) adamOpt = optimizers.Adam(lr=lr)
def weight_variable(shape): initial = tf.random_normal( shape, stddev=0.1) #truncated_normal, stddev=0.1 / w['in'] = tf.random_normal return tf.Variable(initial) def bias_variable(shape): initial = tf.constant(0.1) #, shape=shape return tf.Variable(initial) # DATAPORT uploading reader = dt.ReaderTS(n_inputs, stride_train, stride_test) XdataSet, YdataSet, n_classes = reader.load_csvdata(nilmkt_fileName, period, n_steps) x_train, x_test, y_train, y_test = np.transpose( XdataSet['train'], [0, 2, 1]), np.transpose(XdataSet['test'], [0, 2, 1]), np.transpose( YdataSet['train'], [0, 2, 1]), np.transpose(YdataSet['test'], [0, 2, 1]) x_val, y_val = np.transpose(XdataSet['val'], [0, 2, 1]), np.transpose(YdataSet['val'], [0, 2, 1]) #print x_train.shape, x_test.shape, y_train.shape, y_test.shape # tf Graph input
def main(args): lr = float(args['lr']) #0.0001 nilmkt_fileName = args['nilmkt_fileName'] # Path to ukdale.h5 n_steps = int( args['n_steps']) # Size of input sequence (length of instance) n_hidden_units = int( args['hiddenUnitsLayer1']) # First layer number of units training_iters = int(args['iterations']) # Number of iterations or epochs stride_train = int( args['strideTrain']) # Separation between instances of training set stride_test = n_steps # Separation between instances of test set applianceTest = int( args['applianceToTest'] ) # Apliance (order number in the list) of the appliance to evaluate period = 6 # It is the period that most of other works use # Setting the number of nodes in hidden states of the 3 layers n_inputs_0 = n_hidden_units # Number of cells in input vector n_inputs_1 = int(n_hidden_units / 2) n_inputs_2 = n_hidden_units n_inputs_3 = n_steps ########### FUNCTIONS TO DEFINE WEIGHTS AND BIAS #################### def weight_variable(shape, m=0, std=0.5): initial = tf.random_normal( shape, mean=m, stddev=std ) #truncated_normal, stddev=0.1 / w['in'] = tf.random_normal return tf.Variable(initial) def bias_variable(shape, c=1.0): initial = tf.constant(c, shape=shape) return tf.Variable(initial) def conv1d(x, W, s): return tf.nn.conv1d(x, W, stride=s, padding='SAME') ############ UPLOADING DATA SET ############ reader = dt.ReaderTS(windows, appliances, n_steps, stride_train, stride_test, period, flgScaling=1, trainPer=0.5, valPer=0.25, testPer=0.25) XdataSet, YdataSet = reader.load_csvdata(nilmkt_fileName, applianceTest) # Depending on the applianceTest (-1 for all): x.shape = [batch, seqLen] # y.shape = [batch, seqLen] or [batch, seqLen, numAppliances] x_train, x_test, x_val = XdataSet['train'], XdataSet['test'], XdataSet[ 'val'] y_train, y_test, y_val = YdataSet['train'], YdataSet['test'], YdataSet[ 'val'] ############## STARTING TO CONSTRUCT GRAPH: INPUTS AND OPERATIONS ##################### x = tf.placeholder(tf.float32, [None, n_steps]) y = tf.placeholder(tf.float32, [None, n_steps]) # Size of the batch (it could change in validation and/or test runs) initBatch = tf.placeholder(tf.int32, shape=()) ## Definint layers Win_0 = weight_variable([n_steps, n_inputs_0]) bin_0 = bias_variable([n_inputs_0]) h_0 = tf.sigmoid(tf.matmul(x, Win_0) + bin_0) Win_1 = weight_variable([n_inputs_0, n_inputs_1]) bin_1 = bias_variable([n_inputs_1]) h_1 = tf.sigmoid(tf.matmul(h_0, Win_1) + bin_1) Win_2 = weight_variable([n_inputs_1, n_inputs_2]) bin_2 = bias_variable([n_inputs_2]) h_2 = tf.nn.sigmoid(tf.matmul(h_1, Win_2) + bin_2) Win_3 = weight_variable([n_inputs_2, n_inputs_3]) bin_3 = bias_variable([n_inputs_3]) pred = tf.nn.relu(tf.matmul(h_2, Win_3) + bin_3) ## Indicating to tensorflow that save the values of the parameters/layers in each iteration## tf.summary.histogram('Win_0', Win_0) tf.summary.histogram('Win_1', Win_1) tf.summary.histogram('Win_2', Win_2) ### Flatten real Y and prediction to be able to measure the results as other models do yForMetric = tf.reshape(y, [-1]) predForMetric = tf.reshape(pred, [-1]) ### Calculating metrics (done just in validation/testing. Not part of the optimization) #Check what's the difference btw these different calculatinos. Meanwhile, staying with upMae0 #tf.metrics.mean_absolute_error. = tf.contrib.metrics.streaming_mean_absolute_error mae0, upMae0 = tf.metrics.mean_absolute_error( labels=yForMetric, predictions=predForMetric) # Some problem with initialization upMae1 = tf.reduce_mean(tf.abs(yForMetric - predForMetric)) sum_output = tf.reduce_sum(pred) sum_target = tf.reduce_sum(y) relativeError = (sum_output - sum_target) / tf.maximum( sum_output, sum_target) #### Defining cost function and optimizator cost = tf.losses.mean_squared_error(yForMetric, predForMetric) tf.summary.scalar('cost', cost) #Track values of cost ''' #Evaluate to add a regularizer l1_regularizer = tf.contrib.layers.l1_regularizer(scale=0.5)#scale=0.05, scope=None regularization_penalty = tf.contrib.layers.apply_regularization(l1_regularizer, [W_conv]) ''' train_op = tf.train.AdamOptimizer(lr, epsilon=0.1).minimize(cost) # Show the sizes of the weight matrices to know number of total paramters used for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES): print(v) ## Auxiliar structure to track cost and metrics. ## Could be done w/ summary.scalar but haven't figured out yet - Tensorboard tracking = {'step': [], 'cost': [], 'mae': [], 'relerr': []} ############ START RUNNING ############# with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) sess.run(tf.local_variables_initializer( )) # for mean absolute error, that need local variables initialized merged = tf.summary.merge_all() # Write values of some variables to check them later in tensorboard train_writer = tf.summary.FileWriter( 'Logs/' + datetime.datetime.now().strftime("%y-%m-%d_%H-%M"), sess.graph) test_writer = tf.summary.FileWriter('Logs/LogsTestAE_SepApp/') # Print metrics on validation set every (1/10) of the iterations step = 0 while (step < training_iters): [summary, op_train] = sess.run([merged, train_op], feed_dict={ x: x_train, y: y_train, initBatch: x_train.shape[0] }) train_writer.add_summary(summary, step) if (step % (training_iters / 10) == 0): trncost, metric_upMae0, metric_upMae1, out_preds, out_re = sess.run( [cost, upMae0, upMae1, pred, relativeError], feed_dict={ x: x_val, y: y_val, initBatch: x_val.shape[0] }) tracking['step'].append(step) tracking['cost'].append(trncost) tracking['mae'].append(metric_upMae0) tracking['relerr'].append(out_re) print( "Time {} Step {:5d} Cost {:10.2f} Metric {:6.2f}/{:6.2f} RelError {:3.2f}" .format(datetime.datetime.now().strftime("%y-%m-%d_%H-%M"), step, trncost, metric_upMae0, metric_upMae1, out_re)) step += 1 tstcost, out_preds, metric_upMae0, out_re = sess.run( [cost, pred, upMae0, relativeError], feed_dict={ x: x_test, y: y_test, initBatch: x_test.shape[0] }) print("\nTEST Cost {} Metric {} Relat err {}".format( tstcost, metric_upMae0, out_re)) savedFolder = 'Experiments/app{}_{}'.format( applianceTest, datetime.datetime.now().strftime("%y-%m-%d_%H-%M")) os.makedirs(savedFolder) yFlat = y_test.flatten() predFlat = out_preds.flatten() # Concatenate results horizontally to have again the one large sequence newTest = np.concatenate( [y_test[i] for i in range(y_test.shape[0])], 0) # tf.concat([y_test[i] for i in range(y_test.shape[0])],0) newPred = np.concatenate( [out_preds[i] for i in range(y_test.shape[0])], 0) #tf.concat([preds[i] for i in range(preds.shape[0])],0) newInput = np.concatenate([x_test[i] for i in range(y_test.shape[0])], 0) assert newTest.shape == newPred.shape print("Test size {}".format(newTest.shape)) fLog = open('{}/{}_output'.format(savedFolder, applianceTest), 'w') fLog.write("Step,Cost,MAE, Rel error\n") for i, item in enumerate(tracking['step']): a = tracking['step'][i] b = tracking['cost'][i] c = tracking['mae'][i] d = tracking['relerr'][i] fLog.write("{},{},{},{}\n".format(a, b, c, d)) fLog.write("Test,{},{},{}\n".format(tstcost, metric_upMae0, out_re)) f, axarr = plt.subplots(3, 1, sharex=True) axarr[0].plot(newTest) axarr[0].set_title('Y-real') axarr[1].plot(newPred) axarr[1].set_title('Y-pred') axarr[2].plot(newInput) axarr[2].set_title('X-real') f.subplots_adjust(top=0.92, bottom=0.05, left=0.10, right=0.95, hspace=0.3, wspace=0.3) plt.savefig('{}/{}_plots'.format( savedFolder, applianceTest)) #self.savedFolder+'/'+ch.name+str(numfig) plt.clf()