def get_pipeline(period, data_path, target_inclusion_prob, windows, appliances,
                 target_appliance, activations, seq_length, num_seq_per_batch):
    # Adding a and b to be coherent with buildings chosen in WINDOWS
    num_seq_per_batch = num_seq_per_batch
    filtered_activations = filter_activations(windows, appliances, activations)

    real_agg_source = RealAggregateSource(
        activations=filtered_activations,
        target_appliance=target_appliance,
        seq_length=seq_length,
        filename=data_path,
        windows=windows,
        sample_period=period,
        target_inclusion_prob=target_inclusion_prob)

    sample = real_agg_source.get_batch(num_seq_per_batch=seq_length).next()
    sample = sample.before_processing
    input_std = sample.input.flatten().std()
    target_std = sample.target.flatten().std()
    pipeline = DataPipeline(
        [real_agg_source],
        num_seq_per_batch=num_seq_per_batch,
        input_processing=[DivideBy(input_std),
                          IndependentlyCenter()],
        target_processing=[DivideBy(target_std)])

    return pipeline
Exemplo n.º 2
0
def get_pipeline(target_appliance, activations):

    num_seq_per_batch = 64
    if target_appliance == 'kettle':
        seq_length = 128
        train_buildings = [1, 2, 4]
        unseen_buildings = [5]
    elif target_appliance == 'microwave':
        seq_length = 288
        train_buildings = [1, 2]
        unseen_buildings = [5]
    elif target_appliance == 'washing machine':
        seq_length = 1024
        train_buildings = [1, 5]
        unseen_buildings = [2]
    elif target_appliance == 'fridge':
        seq_length = 512
        train_buildings = [1, 2, 4]
        unseen_buildings = [5]
    elif target_appliance == 'dish washer':
        seq_length = 1024 + 512
        train_buildings = [1, 2]
        unseen_buildings = [5]

    filtered_windows = select_windows(train_buildings, unseen_buildings)
    filtered_activations = filter_activations(filtered_windows, activations)

    synthetic_agg_source = SyntheticAggregateSource(
        activations=filtered_activations,
        target_appliance=target_appliance,
        seq_length=seq_length,
        sample_period=SAMPLE_PERIOD)

    real_agg_source = RealAggregateSource(activations=filtered_activations,
                                          target_appliance=target_appliance,
                                          seq_length=seq_length,
                                          filename=NILMTK_FILENAME,
                                          windows=filtered_windows,
                                          sample_period=SAMPLE_PERIOD)

    stride_source = StrideSource(target_appliance=target_appliance,
                                 seq_length=seq_length,
                                 filename=NILMTK_FILENAME,
                                 windows=filtered_windows,
                                 sample_period=SAMPLE_PERIOD,
                                 stride=STRIDE)

    sample = real_agg_source.get_batch(num_seq_per_batch=1024).next()
    sample = sample.before_processing
    input_std = sample.input.flatten().std()
    target_std = sample.target.flatten().std()
    pipeline = DataPipeline(
        [synthetic_agg_source, real_agg_source, stride_source],
        num_seq_per_batch=num_seq_per_batch,
        input_processing=[DivideBy(input_std),
                          IndependentlyCenter()],
        target_processing=[DivideBy(target_std)])

    return pipeline
Exemplo n.º 3
0
def create_data_pipeline(conf,
                         sample_period,
                         num_seq_per_batch,
                         source_probabilities=(.5, .5),
                         windows_key='windows'):
    appliances = conf['distracting_appliances']
    appliances.append(conf['target_appliance'])
    data_file_path = conf['data_file'] if os.path.isabs(
        conf['data_file']) else os.path.join(
            os.path.dirname(__file__) + '/../', conf['data_file'])
    windows = {}

    for window_name, window in conf[windows_key].items():
        windows[window_name] = {}

        for house, window_selection in window.items():
            windows[window_name][int(house)] = window_selection

    appliance_activations = load_nilmtk_activations(
        appliances=appliances,
        filename=data_file_path,
        sample_period=sample_period,
        windows=windows)

    synthetic_agg_source = SyntheticAggregateSource(
        activations=appliance_activations,
        target_appliance=conf['target_appliance'],
        seq_length=conf['seq_length'],
        sample_period=sample_period)

    real_agg_source = RealAggregateSource(
        activations=appliance_activations,
        target_appliance=conf['target_appliance'],
        seq_length=conf['seq_length'],
        filename=data_file_path,
        windows=windows,
        sample_period=sample_period)

    sample = next(real_agg_source.get_batch(num_seq_per_batch=1024))
    sample = sample.before_processing
    real_input_std = sample.input.flatten().std()
    real_target_std = sample.target.flatten().std()
    real_avg_power = sample.target.flatten().sum() / 1024 / conf['seq_length']

    pipeline = RectangleDataPipeline(
        [synthetic_agg_source, real_agg_source],
        num_seq_per_batch=num_seq_per_batch,
        source_probabilities=source_probabilities,
        input_processing=[DivideBy(conf['input_std']),
                          IndependentlyCenter()],
        target_processing=[
            DivideBy(conf['target_std']), start_and_end_and_mean
        ])

    return pipeline, real_input_std, real_target_std, real_avg_power
Exemplo n.º 4
0
def get_pipeline(target_appliance, activations):

    if target_appliance == 'kettle':
        seq_length = 128
        train_buildings = [1, 2, 4]
        unseen_buildings = [5]
        num_seq_per_batch = 64

    filtered_windows = select_windows(train_buildings, unseen_buildings)
    filtered_activations = filter_activations(filtered_windows, activations)

    synthetic_agg_source = SyntheticAggregateSource(
        activations=filtered_activations,
        target_appliance=target_appliance,
        seq_length=seq_length,
        sample_period=SAMPLE_PERIOD
    )

    real_agg_source = RealAggregateSource(
        activations=filtered_activations,
        target_appliance=target_appliance,
        seq_length=seq_length,
        filename=NILMTK_FILENAME,
        windows=filtered_windows,
        sample_period=SAMPLE_PERIOD
    )

    stride_source = StrideSource(
        target_appliance=target_appliance,
        seq_length=seq_length,
        filename=NILMTK_FILENAME,
        windows=filtered_windows,
        sample_period=SAMPLE_PERIOD,
        stride=STRIDE
    )

    sample = real_agg_source.get_batch(num_seq_per_batch=1024).next()
    sample = sample.before_processing
    input_std = sample.input.flatten().std()
    target_std = sample.target.flatten().std()
    pipeline = DataPipeline(
        [synthetic_agg_source, real_agg_source, stride_source],
        num_seq_per_batch=num_seq_per_batch,
        input_processing=[DivideBy(input_std), IndependentlyCenter()],
        target_processing=[DivideBy(target_std)]
    )

    return pipeline
Exemplo n.º 5
0
unseen_buildings = [5]
DATA_FOLD_NAMES = ('train', 'unseen_appliances',
                   'unseen_activations_of_seen_appliances')

filtered_windows = select_windows(train_buildings, unseen_buildings)
filtered_activations = filter_activations(filtered_windows, activations)

synthetic_agg_source = SyntheticAggregateSource(
    activations=filtered_activations,
    target_appliance=target_appliance,
    seq_length=seq_length,
    sample_period=SAMPLE_PERIOD)

real_agg_source = RealAggregateSource(activations=filtered_activations,
                                      target_appliance=target_appliance,
                                      seq_length=seq_length,
                                      filename=NILMTK_FILENAME,
                                      windows=filtered_windows,
                                      sample_period=SAMPLE_PERIOD)

# ------------
# needed to rescale the input aggregated data
# rescaling is done using the a first batch of num_seq_per_batch sequences
sample = real_agg_source.get_batch(num_seq_per_batch=1024).next()
sample = sample.before_processing
input_std = sample.input.flatten().std()
target_std = sample.target.flatten().std()
# ------------

pipeline = DataPipeline(
    [synthetic_agg_source, real_agg_source],
    num_seq_per_batch=num_seq_per_batch,
def getNILMbatches(period, filename, target_inclusion_prob, windows,
                   appliances, pTrain, pVal, pTest, num_seq_per_batch,
                   seq_length, numApp):
    activations = load_nilmtk_activations(appliances=appliances,
                                          filename=filename,
                                          sample_period=period,
                                          windows=windows)

    filtered_activations = filter_activations(windows, appliances, activations)

    list_of_Xbatches = []
    list_of_Ybatches = []
    trainSize = int(num_seq_per_batch * pTrain)
    valSize = int(num_seq_per_batch * pVal)
    testSize = int(num_seq_per_batch * pTest)

    if (numApp == -1):
        print("not implemented")
        #return None, None

        ##############3getbatch(enable_all_appliances=True)
        lenApps = len(appliances)

        trainSize = trainSize / lenApps
        valSize = valSizee / lenApps
        testSize = testSize / lenApps

        totalX = {
            'train': np.empty([0, self.time_steps]),
            'val': np.empty([0, self.time_steps]),
            'test': np.empty([0, self.time_steps])
        }

        totalY = {
            'train': np.empty([0, self.time_steps, lenApps]),
            'val': np.empty([0, self.time_steps, lenApps]),
            'test': np.empty([0, self.time_steps, lenApps])
        }

        for target_appliance in appliances:
            real_agg_source = RealAggregateSource(
                activations=filtered_activations,
                target_appliance=target_appliance,
                seq_length=seq_length,
                filename=filename,
                windows=windows,
                sample_period=period,
                target_inclusion_prob=target_inclusion_prob)
            #print('train')
            sampleTrain = real_agg_source.get_batch(
                num_seq_per_batch=trainSize, fold='train',
                validation=False).next()
            Xtrain = sampleTrain.before_processing
            input_std = Xtrain.input.flatten().std()
            target_std = Xtrain.target.flatten().std()
            input_processing = [DivideBy(input_std), IndependentlyCenter()]
            target_processing = [DivideBy(target_std)]
            Xtrain, Ytrain = Xtrain.input, Xtrain.target
            for step in input_processing:
                Xtrain = step(Xtrain)
            for step in target_processing:
                Ytrain = step(Xtrain)

            #print('validate')
            sampleVal = real_agg_source.get_batch(num_seq_per_batch=valSize,
                                                  fold='val',
                                                  validation=True).next()
            Xval = sampleVal.before_processing
            Xval, Yval = Xval.input, Xval.target
            for step in input_processing:
                Xval = step(Xval)
            for step in target_processing:
                Yval = step(Yval)

            #print('test')
            sampleTest = real_agg_source.get_batch(num_seq_per_batch=testSize,
                                                   fold='test',
                                                   validation=True).next()
            Xtest = sampleTest.before_processing
            Xtest, Ytest = Xtest.input, Xtest.target
            for step in input_processing:
                Xtest = step(Xtest)
            for step in target_processing:
                Ytest = step(Ytest)
            '''
            pipeline = get_pipeline(period, filename, target_inclusion_prob, windows, appliances, appliances[numApp], activations, seq_length, num_seq_per_batch)
            batchTrain = pipeline.get_batch(fold='train',validation=False) #define sequence length in get_pipeline()
            batchVal = pipeline.get_batch(fold='val',validation=True)
            batchTest = pipeline.get_batch(fold='test',validation=True)
            '''
            '''
            print(Xtrain[0])
            print(Xtrain[499])
            print(Xval[0])
            print(Xval[249])
            print(Xtest[0])
            print(Xtest[249])
            '''
            totalX = {
                'train': np.squeeze(np.array(Xtrain)),
                'val': np.squeeze(np.array(Xval)),
                'test': np.squeeze(np.array(Xtest))
            }

            totalY = {
                'train': np.squeeze(np.array(Ytrain)),
                'val': np.squeeze(np.array(Yval)),
                'test': np.squeeze(np.array(Ytest))
            }

    else:
        target_appliance = appliances[numApp]
        real_agg_source = RealAggregateSource(
            activations=filtered_activations,
            target_appliance=target_appliance,
            seq_length=seq_length,
            filename=filename,
            windows=windows,
            sample_period=period,
            target_inclusion_prob=target_inclusion_prob)
        #print('train')
        sampleTrain = real_agg_source.get_batch(num_seq_per_batch=trainSize,
                                                fold='train',
                                                validation=False).next()
        Xtrain = sampleTrain.before_processing
        input_std = Xtrain.input.flatten().std()
        target_std = Xtrain.target.flatten().std()
        input_processing = [DivideBy(input_std), IndependentlyCenter()]
        target_processing = [DivideBy(target_std)]
        Xtrain, Ytrain = Xtrain.input, Xtrain.target
        for step in input_processing:
            Xtrain = step(Xtrain)
        for step in target_processing:
            Ytrain = step(Xtrain)

        #print('validate')
        sampleVal = real_agg_source.get_batch(num_seq_per_batch=valSize,
                                              fold='val',
                                              validation=True).next()
        Xval = sampleVal.before_processing
        Xval, Yval = Xval.input, Xval.target
        for step in input_processing:
            Xval = step(Xval)
        for step in target_processing:
            Yval = step(Yval)

        #print('test')
        sampleTest = real_agg_source.get_batch(num_seq_per_batch=testSize,
                                               fold='test',
                                               validation=True).next()
        Xtest = sampleTest.before_processing
        Xtest, Ytest = Xtest.input, Xtest.target
        for step in input_processing:
            Xtest = step(Xtest)
        for step in target_processing:
            Ytest = step(Ytest)
        '''
        pipeline = get_pipeline(period, filename, target_inclusion_prob, windows, appliances, appliances[numApp], activations, seq_length, num_seq_per_batch)
        batchTrain = pipeline.get_batch(fold='train',validation=False) #define sequence length in get_pipeline()
        batchVal = pipeline.get_batch(fold='val',validation=True)
        batchTest = pipeline.get_batch(fold='test',validation=True)
        '''
        '''
        print(Xtrain[0])
        print(Xtrain[499])
        print(Xval[0])
        print(Xval[249])
        print(Xtest[0])
        print(Xtest[249])
        '''
        totalX = {
            'train': np.squeeze(np.array(Xtrain)),
            'val': np.squeeze(np.array(Xval)),
            'test': np.squeeze(np.array(Xtest))
        }

        totalY = {
            'train': np.squeeze(np.array(Ytrain)),
            'val': np.squeeze(np.array(Yval)),
            'test': np.squeeze(np.array(Ytest))
        }

    return totalX, totalY, input_std, target_std
Exemplo n.º 7
0
    'train', 'unseen_appliances', 'unseen_activations_of_seen_appliances')

filtered_windows = select_windows(train_buildings, unseen_buildings)
filtered_activations = filter_activations(filtered_windows, activations)

synthetic_agg_source = SyntheticAggregateSource(
    activations=filtered_activations,
    target_appliance=target_appliance,
    seq_length=seq_length,
    sample_period=SAMPLE_PERIOD
)

real_agg_source = RealAggregateSource(
    activations=filtered_activations,
    target_appliance=target_appliance,
    seq_length=seq_length,
    filename=NILMTK_FILENAME,
    windows=filtered_windows,
    sample_period=SAMPLE_PERIOD
)


# ------------
# needed to rescale the input aggregated data
# rescaling is done using the a first batch of num_seq_per_batch sequences
sample = real_agg_source.get_batch(num_seq_per_batch=1024).next()
sample = sample.before_processing
input_std = sample.input.flatten().std()
target_std = sample.target.flatten().std()
# ------------

Exemplo n.º 8
0
def get_pipeline(activations):
    global seq_length
    agg_source = []
    prob = []
    target_inclusion_prob = 0.48 + len(APPLIANCES) * 0.1

    for task_appliance in APPLIANCES:
        seq_period = SEQ_PERIODS[task_appliance]
        seq_length = seq_period // SAMPLE_PERIOD

        # buildings
        buildings = BUILDINGS[task_appliance]
        train_buildings = buildings['train_buildings']
        unseen_buildings = buildings['unseen_buildings']

        # windows
        filtered_windows = select_windows(train_buildings, unseen_buildings,
                                          WINDOWS)
        filtered_activations = filter_activations(filtered_windows,
                                                  activations,
                                                  BUILDINGS_APPLIANCES)

        # data sources
        real_source_prob = min(0.82, target_inclusion_prob)
        if task_appliance == 'fridge':
            real_source_prob = 1.0

        agg_source.append(
            RealAggregateSource(activations=filtered_activations,
                                target_appliance=task_appliance,
                                appliances=APPLIANCES,
                                target_inclusion_prob=real_source_prob,
                                seq_length=seq_length,
                                filename=NILMTK_FILENAME,
                                windows=filtered_windows,
                                sample_period=SAMPLE_PERIOD))
        prob.append(1.0 / NUM_APPLIANCE)
        """agg_source.append(SyntheticAggregateSource(
            activations=filtered_activations,
            appliances=APPLIANCES,
            seq_length=seq_length,
            distractor_inclusion_prob=0.3,
            target_inclusion_prob=min(0.5, target_inclusion_prob),
            sample_period=SAMPLE_PERIOD
        ))

        agg_source.append(StrideSource(
            target_appliance=task_appliance,
            appliances=APPLIANCES,
            seq_length=seq_length,
            filename=NILMTK_FILENAME,
            windows=filtered_windows,
            sample_period=SAMPLE_PERIOD,
            stride=None
        ))
        prob.append(0.5/NUM_APPLIANCE)"""

    # look for existing processing parameters only when OVERRIDE is not on; if
    # none, generate new ones
    print('Looking for existing processing parameters ... ')
    proc_params_filename = path.join(
        dirs.MODELS_DIR, 'proc_params_' + DATASET + '_[' + TARGET_APPLIANCE +
        ']_' + strftime('%Y-%m-%d_%H_%m') + '.npz')
    if not OVERRIDE and path.exists(proc_params_filename):
        print('Found; using them ...')
        multi_input_std = np.load(proc_params_filename)['multi_input_std']
        multi_target_std = np.load(proc_params_filename)['multi_target_std']
    else:
        if OVERRIDE:
            print('Overridden; generating new ones ...')
        else:
            print('Not found; generating new ones ...')
        multi_input_std = np.array([])
        multi_target_std = np.array([])

        for sample_source in agg_source:
            batch_size = 1024
            sample = sample_source.get_batch(
                num_seq_per_batch=batch_size).next()
            sample = sample.before_processing

            multi_input_std = np.append(multi_input_std,
                                        sample.input.flatten().std())
            multi_target_std = np.append(multi_target_std, [
                sample.target[:, idx].flatten().std()
                for idx in range(NUM_APPLIANCE)
            ])

        multi_input_std = np.mean(multi_input_std)
        multi_target_std = multi_target_std.reshape(-1, NUM_APPLIANCE)
        multi_target_std = np.mean(multi_target_std, axis=0)

        print('=' * 10)
        print('Input std = ', multi_input_std)
        for idx, appliance in enumerate(APPLIANCES):
            print(appliance, 'std = ', multi_target_std[idx])
        print('=' * 10)

        print('Saving the processing parameters ...')
        np.savez(proc_params_filename,
                 multi_input_std=[multi_input_std],
                 multi_target_std=multi_target_std)

    # generate pipeline
    pipeline = DataPipeline(
        agg_source,
        num_seq_per_batch=NUM_SEQ_PER_BATCH,
        input_processing=[DivideBy(multi_input_std),
                          IndependentlyCenter()],
        target_processing=[DivideBy(multi_target_std)],
        source_probabilities=prob,
    )

    return pipeline, multi_input_std, multi_target_std