示例#1
0
 def test_transform(self):
     # scaled_square = square[0] * 2
     # scaled_square[4, 12] = 1.
     scale = gs.fit(square)
     n_square = gs.transform(square, scale=scale)
     self.assertTrue((n_square == normalized_square).all())
     coords = [geom[:, :2].flatten() for geom in n_square]
     coords = [item for sublist in coords for item in sublist]
     std = np.std(coords)
     self.assertAlmostEqual(std, 1., 1)
示例#2
0
 def test_upsized_transform(self):
     square_0 = square[0] * 2
     square_0[:4, 2] = 1.
     square_0[4, 4] = 1.
     scale = gs.fit([square_0])
     n_square = gs.transform([square_0], scale=scale)
     self.assertTrue((n_square == normalized_square).all())
     coords = [geom[:, :2].flatten() for geom in n_square]
     coords = [item for sublist in coords for item in sublist]
     std = np.std(coords)
     self.assertAlmostEqual(std, 1., 1)
示例#3
0
 def test_scaling_square(self):
     scale = gs.fit(square)
     self.assertEqual(scale, 0.5)
示例#4
0
 def test_scaling_square_dup_nodes(self):
     scale = gs.fit(square_duplicate_nodes)
     self.assertEqual(scale, 0.5)
示例#5
0
    def __init__(self, numpy_zip_path, config, normalization=None):
        """
        Loads data from numpy_zip_path, applies a quick integrity check and sets normalization settings
        :param numpy_zip_path: path as string to a Energy Data numpy zip file
        :param normalization: A dictionary with normalization settings use as


        Use:
            train_dataset = EnergyLabelData('path/training/npz', normalization=None)  # 'None is not necessary, tho'
            val_dataset   = EnergyLabelData('path/validation/npz', normalization=other_dataset.normalization)
        """
        npz = np.load(numpy_zip_path)
        self.data = []
        self.config = config
        self.labels = [
            label['energy_performance_vec'] for label in npz['labels']
        ]

        print('Loading data from', numpy_zip_path)
        for record in tqdm(npz['data']):
            # final sanity check
            allowed_classes = ['ndarray', 'list', 'int']
            inputs = {}

            for (key, val) in record.items():
                if key.endswith('_vec'):
                    if type(val).__name__ not in allowed_classes:
                        raise ValueError('Unknown data type ' +
                                         type(val).__name__ + ' in ' +
                                         str(record))
                    inputs[key] = val

            self.data.append(inputs)

        if normalization:  # re-use normalization settings from a different data loader
            self.normalization = normalization
            print(
                'Re-used normalization settings, stored in .normalization dictionary'
            )

        else:  # create new normalization settings
            self.normalization = {}

            print('Getting normalization parameters...')

            # scale geometry
            geoms = [sample['geometry_vec'] for sample in self.data]
            self.normalization['geom_scale'] = gs.fit(geoms)

            # recorded dates
            recorded_dates = [
                sample['recorded_date_vec'] for sample in self.data
            ]
            recorded_dates = np.array(recorded_dates)
            self.normalization['rec_year_mean'] = np.mean(recorded_dates[:, 0])
            self.normalization['rec_year_std'] = np.std(recorded_dates[:, 0])
            self.normalization['rec_month_mean'] = np.mean(recorded_dates[:,
                                                                          1])
            self.normalization['rec_month_std'] = np.std(recorded_dates[:, 1])
            self.normalization['rec_day_mean'] = np.mean(recorded_dates[:, 2])
            self.normalization['rec_day_std'] = np.std(recorded_dates[:, 2])
            self.normalization['rec_weekday_mean'] = np.mean(recorded_dates[:,
                                                                            3])
            self.normalization['rec_weekday_std'] = np.std(recorded_dates[:,
                                                                          3])

            # registration dates
            registration_dates = [
                sample['registration_date_vec'] for sample in self.data
            ]
            registration_dates = np.array(registration_dates)
            self.normalization['reg_year_mean'] = np.mean(
                registration_dates[:, 0])
            self.normalization['reg_year_std'] = np.std(registration_dates[:,
                                                                           0])
            self.normalization['reg_month_mean'] = np.mean(
                registration_dates[:, 1])
            self.normalization['reg_month_std'] = np.std(registration_dates[:,
                                                                            1])
            self.normalization['reg_day_mean'] = np.mean(registration_dates[:,
                                                                            2])
            self.normalization['reg_day_std'] = np.std(registration_dates[:,
                                                                          2])
            self.normalization['reg_weekday_mean'] = np.mean(
                registration_dates[:, 3])
            self.normalization['reg_weekday_std'] = np.std(
                registration_dates[:, 3])

            # house numbers
            house_numbers = [
                sample['house_number_vec'] for sample in self.data
            ]
            self.normalization['house_number_mean'] = np.mean(house_numbers)
            self.normalization['house_number_std'] = np.std(house_numbers)

            # year of construction
            construction_years = [
                sample['year_of_construction_vec'] for sample in self.data
            ]
            self.normalization['construction_years_mean'] = np.mean(
                construction_years)
            self.normalization['construction_years_std'] = np.std(
                construction_years)

            print('Normalization settings stored in .normalization dictionary')