Пример #1
0
def fetch_ukdale(data_path, windows, appliances, numApps, period, n_steps, stride_train, stride_test, typeLoad=0, seq_per_batch=0, flgAggSumScaled=0, 
                  flgFilterZeros=0, target_inclusion_prob=0.5, trainPer=0.5, valPer=0.25, testPer=0.25):

     
    '''
    Deleting huge part of seems like generating data from other metadata
    '''
    reader = dt.ReaderTS(windows, appliances, n_steps, stride_train, stride_test, period, flgAggSumScaled, flgFilterZeros,
                        flgScaling=1, trainPer=trainPer, valPer=valPer, testPer=testPer)
    print(windows)
    if (numApps==-1):
        truFileName='all_'+str(n_steps)+'_tr'+str(trainPer)+'_te'+str(testPer)+'_val'+str(valPer)+'_b'+str(len(windows))+'_'+str(windows[1][0])+'_'+str(windows[1][1])
    else:
      if (typeLoad==1):
        truFileName=appliances[numApps]+'_'+str(n_steps)+'_tr'+str(windows['train'][1][0])+'_te'+str(windows['test'][1][0])+'_val'+str(windows['val'][1][0])+'_'+str(windows['val'][1][1])
      else:
        truFileName=appliances[numApps]+'_'+str(n_steps)+'_'+str(windows[1][0])+'_'+str(windows[1][1])
    try:
        split = pickle.load( open(data_path+"/pickles/"+truFileName+".pickle","rb"))
        return split['X_train'], split['Y_train'], split['X_val'], split['Y_val'], split['X_test'], split['Y_test'], reader
    except (OSError, IOError) as e:
        XdataSet, YdataSet = reader.load_csvdata(data_path, numApps, typeLoad, seq_per_batch, target_inclusion_prob)
    #shape before: batch, apps, steps
        x_train, x_val, x_test, y_train, y_val, y_test = XdataSet['train'],XdataSet['val'],XdataSet['test'],YdataSet['train'],YdataSet['val'],YdataSet['test']
        x_train, x_val, x_test = np.expand_dims(x_train,axis=2), np.expand_dims(x_val,axis=2), np.expand_dims(x_test,axis=2)
        if (numApps!=-1):
          y_train,y_val,y_test = np.expand_dims(y_train,axis=2), np.expand_dims(y_val,axis=2), np.expand_dims(y_test,axis=2)
        with open(data_path+"/pickles/"+truFileName+".pickle",'wb') as splitWrite:
            pickle.dump({'X_train':x_train,'Y_train':y_train,'X_val':x_val,'Y_val':y_val,'X_test':x_test,'Y_test':y_test},splitWrite)

    return x_train, y_train, x_val, y_val, x_test,y_test, reader
Пример #2
0
import matplotlib.pyplot as plt
from tflearn.data_utils import to_categorical, pad_sequences
from tflearn.datasets import imdb

# hyperparameters
lr = 0.001
#batch_size = 513 # 2565 / 5
numBatchs = 5
#batchSizTest = len(XtestSet)
n_classes = 10
n_inputs = 10  # MNIST data input (img shape: 28*28)
n_steps = 20  # time steps
n_hidden_units = 6  # neurons in hidden layer

# Barcelona Dataset loading
reader = dt.ReaderTS(n_inputs)
XdataSet, YdataSet = reader.load_csvdata(n_steps)

x_train, x_test, y_train, y_test = XdataSet['train'], XdataSet[
    'test'], YdataSet['train'], YdataSet['test']
x_val, y_val = XdataSet['val'], YdataSet['val']

# Network building
net = tflearn.input_data([None, n_inputs, n_steps])
net = tflearn.lstm(net, n_hidden_units)  #, dropout=0.8
net = tflearn.fully_connected(net, n_classes, activation='linear')
net = tflearn.regression(net,
                         optimizer='adam',
                         learning_rate=0.001,
                         loss='mean_square')
Пример #3
0
# hyperparameters
lr = 0.001
batch_size = 171 # 2565 / 5
numBatchs = 15
instXday = 3
#batchSizTest = len(XtestSet)

n_inputs = 22   # MNIST data input (img shape: 28*28)
n_steps = 136    # time steps
n_hidden_units = 110   # neurons in hidden layer

# Loading the dataset
paddType = 1
nChannels = 1
reader = dt.ReaderTS(n_steps, n_inputs, instXday, paddType, nChannels)
XdataSet, YdataSet, n_classes, _, arrLens = reader.generateDataSet()
indexRandom =  np.random.permutation(2565)
batchDataSets = np.reshape(indexRandom,(numBatchs,batch_size))
indexAccSet = np.reshape(batchDataSets[0:numBatchs-1],(batch_size*(numBatchs-1)))
X_train = XdataSet[indexAccSet]
Y_train = YdataSet[indexAccSet]
X_test = XdataSet[batchDataSets[-1]]
Y_test = YdataSet[batchDataSets[-1]]

# Build LSTM network
model = Sequential()
model.add(LSTM(n_hidden_units, input_shape=(n_steps,n_inputs)))
model.add(Dense(n_classes, activation='softmax', init=initializers.random_normal(stddev=0.1)))

adamOpt = optimizers.Adam(lr=lr)

def weight_variable(shape):
    initial = tf.random_normal(
        shape,
        stddev=0.1)  #truncated_normal, stddev=0.1 / w['in'] = tf.random_normal
    return tf.Variable(initial)


def bias_variable(shape):
    initial = tf.constant(0.1)  #, shape=shape
    return tf.Variable(initial)


# DATAPORT uploading
reader = dt.ReaderTS(n_inputs, stride_train, stride_test)
XdataSet, YdataSet, n_classes = reader.load_csvdata(nilmkt_fileName, period,
                                                    n_steps)

x_train, x_test, y_train, y_test = np.transpose(
    XdataSet['train'],
    [0, 2, 1]), np.transpose(XdataSet['test'], [0, 2, 1]), np.transpose(
        YdataSet['train'], [0, 2, 1]), np.transpose(YdataSet['test'],
                                                    [0, 2, 1])
x_val, y_val = np.transpose(XdataSet['val'],
                            [0, 2, 1]), np.transpose(YdataSet['val'],
                                                     [0, 2, 1])

#print x_train.shape, x_test.shape, y_train.shape, y_test.shape

# tf Graph input
def main(args):

    lr = float(args['lr'])  #0.0001
    nilmkt_fileName = args['nilmkt_fileName']  # Path to ukdale.h5
    n_steps = int(
        args['n_steps'])  # Size of input sequence (length of instance)
    n_hidden_units = int(
        args['hiddenUnitsLayer1'])  # First layer number of units
    training_iters = int(args['iterations'])  # Number of iterations or epochs
    stride_train = int(
        args['strideTrain'])  # Separation between instances of training set
    stride_test = n_steps  # Separation between instances of test set
    applianceTest = int(
        args['applianceToTest']
    )  # Apliance (order number in the list) of the appliance to evaluate
    period = 6  # It is the period that most of other works use

    # Setting the number of nodes in hidden states of the 3 layers
    n_inputs_0 = n_hidden_units  # Number of cells in input vector
    n_inputs_1 = int(n_hidden_units / 2)
    n_inputs_2 = n_hidden_units
    n_inputs_3 = n_steps

    ########### FUNCTIONS TO DEFINE WEIGHTS AND BIAS ####################

    def weight_variable(shape, m=0, std=0.5):
        initial = tf.random_normal(
            shape, mean=m, stddev=std
        )  #truncated_normal, stddev=0.1 / w['in'] = tf.random_normal
        return tf.Variable(initial)

    def bias_variable(shape, c=1.0):
        initial = tf.constant(c, shape=shape)
        return tf.Variable(initial)

    def conv1d(x, W, s):
        return tf.nn.conv1d(x, W, stride=s, padding='SAME')

    ############ UPLOADING DATA SET ############
    reader = dt.ReaderTS(windows,
                         appliances,
                         n_steps,
                         stride_train,
                         stride_test,
                         period,
                         flgScaling=1,
                         trainPer=0.5,
                         valPer=0.25,
                         testPer=0.25)

    XdataSet, YdataSet = reader.load_csvdata(nilmkt_fileName, applianceTest)
    # Depending on the applianceTest (-1 for all): x.shape = [batch, seqLen]
    #                                y.shape = [batch, seqLen] or [batch, seqLen, numAppliances]
    x_train, x_test, x_val = XdataSet['train'], XdataSet['test'], XdataSet[
        'val']
    y_train, y_test, y_val = YdataSet['train'], YdataSet['test'], YdataSet[
        'val']

    ##############   STARTING TO CONSTRUCT GRAPH: INPUTS AND OPERATIONS #####################

    x = tf.placeholder(tf.float32, [None, n_steps])
    y = tf.placeholder(tf.float32, [None, n_steps])
    # Size of the batch (it could change in validation and/or test runs)
    initBatch = tf.placeholder(tf.int32, shape=())

    ## Definint layers

    Win_0 = weight_variable([n_steps, n_inputs_0])
    bin_0 = bias_variable([n_inputs_0])
    h_0 = tf.sigmoid(tf.matmul(x, Win_0) + bin_0)

    Win_1 = weight_variable([n_inputs_0, n_inputs_1])
    bin_1 = bias_variable([n_inputs_1])
    h_1 = tf.sigmoid(tf.matmul(h_0, Win_1) + bin_1)

    Win_2 = weight_variable([n_inputs_1, n_inputs_2])
    bin_2 = bias_variable([n_inputs_2])
    h_2 = tf.nn.sigmoid(tf.matmul(h_1, Win_2) + bin_2)

    Win_3 = weight_variable([n_inputs_2, n_inputs_3])
    bin_3 = bias_variable([n_inputs_3])
    pred = tf.nn.relu(tf.matmul(h_2, Win_3) + bin_3)

    ## Indicating to tensorflow that save the values of the parameters/layers in each iteration##
    tf.summary.histogram('Win_0', Win_0)
    tf.summary.histogram('Win_1', Win_1)
    tf.summary.histogram('Win_2', Win_2)

    ### Flatten real Y and prediction to be able to measure the results as other models do
    yForMetric = tf.reshape(y, [-1])
    predForMetric = tf.reshape(pred, [-1])

    ### Calculating metrics (done just in validation/testing. Not part of the optimization)
    #Check what's the difference btw these different calculatinos. Meanwhile, staying with upMae0
    #tf.metrics.mean_absolute_error. =  tf.contrib.metrics.streaming_mean_absolute_error
    mae0, upMae0 = tf.metrics.mean_absolute_error(
        labels=yForMetric,
        predictions=predForMetric)  # Some problem with initialization
    upMae1 = tf.reduce_mean(tf.abs(yForMetric - predForMetric))

    sum_output = tf.reduce_sum(pred)
    sum_target = tf.reduce_sum(y)
    relativeError = (sum_output - sum_target) / tf.maximum(
        sum_output, sum_target)

    #### Defining cost function and optimizator
    cost = tf.losses.mean_squared_error(yForMetric, predForMetric)
    tf.summary.scalar('cost', cost)  #Track values of cost
    '''
    #Evaluate to add a regularizer
    l1_regularizer = tf.contrib.layers.l1_regularizer(scale=0.5)#scale=0.05, scope=None
    regularization_penalty = tf.contrib.layers.apply_regularization(l1_regularizer, [W_conv])
    '''
    train_op = tf.train.AdamOptimizer(lr, epsilon=0.1).minimize(cost)

    # Show the sizes of the weight matrices to know number of total paramters used
    for v in tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES):
        print(v)

    ## Auxiliar structure to track cost and metrics.
    ## Could be done w/ summary.scalar but haven't figured out yet - Tensorboard
    tracking = {'step': [], 'cost': [], 'mae': [], 'relerr': []}

    ############   START RUNNING   #############

    with tf.Session() as sess:
        init = tf.global_variables_initializer()
        sess.run(init)
        sess.run(tf.local_variables_initializer(
        ))  # for mean absolute error, that need local variables initialized
        merged = tf.summary.merge_all()
        # Write values of some variables to check them later in tensorboard
        train_writer = tf.summary.FileWriter(
            'Logs/' + datetime.datetime.now().strftime("%y-%m-%d_%H-%M"),
            sess.graph)
        test_writer = tf.summary.FileWriter('Logs/LogsTestAE_SepApp/')
        # Print metrics on validation set every (1/10) of the iterations
        step = 0
        while (step < training_iters):
            [summary, op_train] = sess.run([merged, train_op],
                                           feed_dict={
                                               x: x_train,
                                               y: y_train,
                                               initBatch: x_train.shape[0]
                                           })
            train_writer.add_summary(summary, step)
            if (step % (training_iters / 10) == 0):
                trncost, metric_upMae0, metric_upMae1, out_preds, out_re = sess.run(
                    [cost, upMae0, upMae1, pred, relativeError],
                    feed_dict={
                        x: x_val,
                        y: y_val,
                        initBatch: x_val.shape[0]
                    })
                tracking['step'].append(step)
                tracking['cost'].append(trncost)
                tracking['mae'].append(metric_upMae0)
                tracking['relerr'].append(out_re)
                print(
                    "Time {} Step {:5d} Cost {:10.2f} Metric {:6.2f}/{:6.2f} RelError {:3.2f}"
                    .format(datetime.datetime.now().strftime("%y-%m-%d_%H-%M"),
                            step, trncost, metric_upMae0, metric_upMae1,
                            out_re))
            step += 1
        tstcost, out_preds, metric_upMae0, out_re = sess.run(
            [cost, pred, upMae0, relativeError],
            feed_dict={
                x: x_test,
                y: y_test,
                initBatch: x_test.shape[0]
            })
        print("\nTEST Cost {} Metric {} Relat err {}".format(
            tstcost, metric_upMae0, out_re))

        savedFolder = 'Experiments/app{}_{}'.format(
            applianceTest,
            datetime.datetime.now().strftime("%y-%m-%d_%H-%M"))
        os.makedirs(savedFolder)
        yFlat = y_test.flatten()
        predFlat = out_preds.flatten()

        # Concatenate results horizontally to have again the one large sequence
        newTest = np.concatenate(
            [y_test[i] for i in range(y_test.shape[0])],
            0)  # tf.concat([y_test[i] for i in range(y_test.shape[0])],0)
        newPred = np.concatenate(
            [out_preds[i] for i in range(y_test.shape[0])],
            0)  #tf.concat([preds[i] for i in range(preds.shape[0])],0)
        newInput = np.concatenate([x_test[i] for i in range(y_test.shape[0])],
                                  0)

        assert newTest.shape == newPred.shape
        print("Test size {}".format(newTest.shape))

        fLog = open('{}/{}_output'.format(savedFolder, applianceTest), 'w')
        fLog.write("Step,Cost,MAE, Rel error\n")
        for i, item in enumerate(tracking['step']):
            a = tracking['step'][i]
            b = tracking['cost'][i]
            c = tracking['mae'][i]
            d = tracking['relerr'][i]
            fLog.write("{},{},{},{}\n".format(a, b, c, d))
        fLog.write("Test,{},{},{}\n".format(tstcost, metric_upMae0, out_re))
        f, axarr = plt.subplots(3, 1, sharex=True)

        axarr[0].plot(newTest)
        axarr[0].set_title('Y-real')

        axarr[1].plot(newPred)
        axarr[1].set_title('Y-pred')

        axarr[2].plot(newInput)
        axarr[2].set_title('X-real')

        f.subplots_adjust(top=0.92,
                          bottom=0.05,
                          left=0.10,
                          right=0.95,
                          hspace=0.3,
                          wspace=0.3)
        plt.savefig('{}/{}_plots'.format(
            savedFolder,
            applianceTest))  #self.savedFolder+'/'+ch.name+str(numfig)
        plt.clf()