def run_main():
    # load in data settings about a data set from a ini file, ini file must be in same directory as this file
    data_settings = DataSetSettings(os.path.join(os.path.dirname(os.getcwd()), ini_file))
    data_set_files = data_settings.get_data_set_files()

    meter_groups = []
    for meter_file in data_set_files:
        # create meter groups from csv data
        meter_groups.append(MeterGroup(meter_file[0], meter_file[1], meter_file[2]))
        # set meter group settings
        meter_groups[-1].set_meter_group_settings(data_settings.meter_groups())

    # create data packer and add meter groups
    data_packer1 = DataPacker(meter_groups)

    for time_horizon in range(1, 12):
        # package data for machine learning
        data_package1 = data_packer1.package_data(data_settings.packer_input(), data_settings.packer_target(),
                                                  forecast_horizon_override=time_horizon)
        print('Data Packaged')

        if False:
            data = data_package1.training_input.T
            # data = data_package1.validation_input.T
            meta = data_package1.training_input_meta[0]
            embedding_search_fnn(data, meta, go_to=100, threshold=10, ratio=0.001)

    print('Done!')
class TestDataPacker(unittest.TestCase):
    def setUp(self):
        self.data_packer1 = DataPacker()
        self.meter_group_target = MeterGroup('test target group')
        self.meter_group_input = MeterGroup('test input group')

    def test_add_target_group(self):
        self.data_packer1.add_target_group(self.meter_group_target)

        self.assertIn(self.meter_group_target, self.data_packer1.target_groups)

    def test_add_training_group(self):
        self.data_packer1.add_training_group(self.meter_group_input)

        self.assertIn(self.meter_group_input,
                      self.data_packer1.training_groups)
    def test_can_read_from_file_and_run_ann_and_plot_and_save_results(self):

        # load in data settings about that data set from a ini file
        data_set1_settings = DataSetSettings(data_set_settings_ini)

        # create meter groups from csv data files
        east_allers = MeterGroup('east_allers', 'flow', allers_flow_csv)
        roodloft_turbidity = MeterGroup('roodloft_turbidity', 'turbidity', roodloft_turbidity_csv)
        # set meter group settings
        east_allers.set_meter_group_settings(data_set1_settings.meter_groups())
        roodloft_turbidity.set_meter_group_settings(data_set1_settings.meter_groups())

        # create data packer and choose meter groups for input and target
        data_packer1 = DataPacker()
        data_packer1.add_target_group(roodloft_turbidity)
        data_packer1.add_training_group(east_allers)
        data_packer1.add_training_group(roodloft_turbidity)

        # package data for machine learning
        data_package1 = data_packer1.package(data_set1_settings.packer_input(), data_set1_settings.packer_target())

        # create machine learning obj to run ANN
        ann1 = MachineLearning(data_package1.training_input, data_package1.training_target,
                               data_package1.validation_input, data_package1.validation_target,
                               data_package1.get_min_max_list(), data_set1_settings.ann())

        # setup: ml_type, package_name, network_layout, layer_func
        ann1.setup()
        # run: max_epochs, stop_function, training_goal, verbose_settings
        ann1.run(max_epochs=50, trains_per_validation_run=10)

        data_collector1 = DataCollector(ann1, data_package1, data_set1_settings)
        data_collector1.plot_training_and_validation_errors(ann1.get_error_list())
        #data_collector1.plot(data_set1_settings.plot1)
        #data_collector1.store(folder_location, data_set1_settings.store1)
        print()
 def setUp(self):
     self.data_packer1 = DataPacker()
     self.meter_group_target = MeterGroup('test target group')
     self.meter_group_input = MeterGroup('test input group')
예제 #5
0
def run_main():
    # load in data settings about a data set from a ini file, ini file must be in same directory as this file
    data_settings = DataSetSettings(os.path.join(os.path.dirname(os.getcwd()), ini_file))
    data_set_files = data_settings.get_data_set_files()

    meter_groups = []
    for meter_file in data_set_files:
        # create meter groups from csv data
        meter_groups.append(MeterGroup(meter_file[0], meter_file[1], meter_file[2]))
        # set meter group settings
        meter_groups[-1].set_meter_group_settings(data_settings.meter_groups())

    # create data packer and add meter groups
    data_packer1 = DataPacker(meter_groups)

    for time_horizon in range(1, 2):
        # package data for machine learning
        data_package1 = data_packer1.package_data(data_settings.packer_input(), data_settings.packer_target(),
                                                  forecast_horizon_override=time_horizon)
        print('Data Packaged')

        if False:
            data = data_package1.training_input.T
            # data = data_package1.validation_input.T
            meta = data_package1.training_input_meta[0]
            embedding_search_fnn(data, meta, go_to=15, threshold=50, ratio=0.001)

        if False:
            for ann_count in range(10):
                with open(os.path.join(os.path.dirname(os.getcwd()), log_file), 'a') as f:
                    f.write('Forecast Horizon: {} ANN Count: {}\n'.format(time_horizon, ann_count))

                while True:
                    # create machine learning obj to run ANN
                    ann1 = MachineLearning('ANN', data_package1, data_settings.ann())
                    print('Machine Learning Obj Initialized')

                    # run: ml_type, package_name, network_layout, layer_func, max_epochs, trains_per_validation_run,
                    #   max_worse_validations stop_function, training_goal, verbose_settings
                    #code, msg = ann1.run(network_layout='{}, 1'.format(time_horizon))
                    code, msg = ann1.run()

                    if code == 1:  # Failed to learn
                        print(msg)
                        with open(os.path.join(os.path.dirname(os.getcwd()), log_file), 'a') as f:
                            f.write(msg + '\n')
                    else:
                        print(msg)
                        with open(os.path.join(os.path.dirname(os.getcwd()), log_file), 'a') as f:
                            f.write(msg + '\n')
                        print('Machine Learning Obj Finished Run')

                        data_collector1 = DataCollector(ann1, data_package1, data_settings)
                        pickle_file = 'data\\Hillersdon\\hill-exp{}_p{}-h50-t{}-r{}.pickle'.format(
                            experiment_id, process_id, time_horizon, ann_count)

                        with open(pickle_file, 'wb') as pf:
                            pickle.dump(data_collector1, pf, pickle.HIGHEST_PROTOCOL)

                        print('Data Collector Pickled')

                        with open(os.path.join(os.path.dirname(os.getcwd()), log_file), 'a') as f:
                            f.write('Run Count: {}, Pickled: {}\n'.format(str(len(data_collector1.ml_obj.inbuilt_errors)), pickle_file))
                            f.write('Training Errors: {}\n'.format(get_last_errors(ann1.training_errors)))
                            f.write('Validation Errors: {}\n'.format(get_last_errors(ann1.validation_errors)))

                        predictions = data_collector1.ml_obj.best_network[0].sim(data_collector1.ml_obj.training_input)
                        observations = data_collector1.ml_obj.training_target
                        train_errors_str = get_all_error_metrics(observations, predictions, data_collector1.data_package)

                        predictions = data_collector1.ml_obj.best_network[0].sim(data_collector1.ml_obj.validation_input)
                        observations = data_collector1.ml_obj.validation_target
                        val_errors_str = get_all_error_metrics(observations, predictions, data_collector1.data_package)

                        with open(os.path.join(os.path.dirname(os.getcwd()), results_file), 'a') as f:
                            f.write('Run Count: {}, Pickled: {}\n'.format(str(len(data_collector1.ml_obj.inbuilt_errors)), pickle_file))
                            f.write('Training Errors, {}\n'.format(train_errors_str))
                            f.write('Validation Errors, {}\n'.format(val_errors_str))

                        #data_collector1.plot_all()
                        break



    print('Done!')