def plot_validation_predict_and_obs(self, show_title=True, save_loc=None, plot_bounds=None):

        show_title = False
        save_loc = None

        plot_bounds = None
        #plot_bounds=(1150, 1200)
        #save_loc = 'pic_ANN_set0_validation1'

        plot_bounds=(5180, 5230)
        save_loc = 'pic_ANN_set0_validation2'

        label_font = {'size': 20}
        tick_label_size = 15
        title_label_size = 30

        predictions = self.ml_obj.best_network[0].sim(self.ml_obj.validation_input)
        observations = self.ml_obj.validation_target

        if True:  # this can be changed to self.min_max_scaler_target is not None (cant yet due to legacy)
            min_max_scaler_target = preprocessing.MinMaxScaler()
            training_target = min_max_scaler_target.fit_transform(self.data_package.training_target)
            predictions = min_max_scaler_target.inverse_transform(predictions)
            observations = min_max_scaler_target.inverse_transform(observations)

        print(get_all_error_metrics(observations, predictions, None))

        #if self.ml_obj.min_max_scaler_target is not None:
        #    predictions = self.min_max_scaler_target.inverse_transform(predictions)
        #    observations = self.min_max_scaler_target.inverse_transform(observations)

        fig_validation = plt.figure(figsize=(10, 8))
        ax = fig_validation.add_subplot(111)

        x = []
        dt = datetime.datetime.combine(self.data_package.validation_start_date, datetime.datetime.min.time())
        for i in range(len(self.ml_obj.validation_input)):
            x.append(dt + datetime.timedelta(minutes=(15 * i)))

        if plot_bounds is not None:
            lower = plot_bounds[0]
            upper = plot_bounds[1]
            ax.plot(x[lower:upper], observations.clip(min=0)[lower:upper], 'o-', label='observations')
            ax.plot(x[lower:upper], predictions.clip(min=0, max=10)[lower:upper], 'r^-', label='predictions')

        else:
            ax.plot(x, observations.clip(min=0), 'o-', label='observations')
            ax.plot(x, predictions.clip(min=0, max=10), 'r^-', label='predictions')

        if show_title:
            errors_dict = calc_all_error('RMSE, NSE, RSEnaive, R', observations, predictions, round_to=5,
                                         rolling_average_size=96, upper_lim=10, lower_lim=0.05)
            fig_validation.suptitle('Validation Data: RMSE: {} NSE: {} RSEnaive: {} R: {}'.format(
                errors_dict['RMSE'], errors_dict['NSE'], errors_dict['RSEnaive'], errors_dict['R']), fontsize=title_label_size)

        ax.set_ylabel('Turbidity in NTU', fontdict=label_font)
        ax.tick_params(axis='both', which='major', labelsize=tick_label_size)

        ax.legend(loc='best')
        fig_validation.tight_layout()
        if show_title:
            fig_validation.subplots_adjust(top=0.90)

        if save_loc is not None:
            plt.savefig(save_loc+'.png')
        else:
            plt.show()
예제 #2
0
def run_main():
    # load in data settings about a data set from a ini file, ini file must be in same directory as this file
    data_settings = DataSetSettings(os.path.join(os.path.dirname(os.getcwd()), ini_file))
    data_set_files = data_settings.get_data_set_files()

    meter_groups = []
    for meter_file in data_set_files:
        # create meter groups from csv data
        meter_groups.append(MeterGroup(meter_file[0], meter_file[1], meter_file[2]))
        # set meter group settings
        meter_groups[-1].set_meter_group_settings(data_settings.meter_groups())

    # create data packer and add meter groups
    data_packer1 = DataPacker(meter_groups)

    for time_horizon in range(1, 2):
        # package data for machine learning
        data_package1 = data_packer1.package_data(data_settings.packer_input(), data_settings.packer_target(),
                                                  forecast_horizon_override=time_horizon)
        print('Data Packaged')

        if False:
            data = data_package1.training_input.T
            # data = data_package1.validation_input.T
            meta = data_package1.training_input_meta[0]
            embedding_search_fnn(data, meta, go_to=15, threshold=50, ratio=0.001)

        if False:
            for ann_count in range(10):
                with open(os.path.join(os.path.dirname(os.getcwd()), log_file), 'a') as f:
                    f.write('Forecast Horizon: {} ANN Count: {}\n'.format(time_horizon, ann_count))

                while True:
                    # create machine learning obj to run ANN
                    ann1 = MachineLearning('ANN', data_package1, data_settings.ann())
                    print('Machine Learning Obj Initialized')

                    # run: ml_type, package_name, network_layout, layer_func, max_epochs, trains_per_validation_run,
                    #   max_worse_validations stop_function, training_goal, verbose_settings
                    #code, msg = ann1.run(network_layout='{}, 1'.format(time_horizon))
                    code, msg = ann1.run()

                    if code == 1:  # Failed to learn
                        print(msg)
                        with open(os.path.join(os.path.dirname(os.getcwd()), log_file), 'a') as f:
                            f.write(msg + '\n')
                    else:
                        print(msg)
                        with open(os.path.join(os.path.dirname(os.getcwd()), log_file), 'a') as f:
                            f.write(msg + '\n')
                        print('Machine Learning Obj Finished Run')

                        data_collector1 = DataCollector(ann1, data_package1, data_settings)
                        pickle_file = 'data\\Hillersdon\\hill-exp{}_p{}-h50-t{}-r{}.pickle'.format(
                            experiment_id, process_id, time_horizon, ann_count)

                        with open(pickle_file, 'wb') as pf:
                            pickle.dump(data_collector1, pf, pickle.HIGHEST_PROTOCOL)

                        print('Data Collector Pickled')

                        with open(os.path.join(os.path.dirname(os.getcwd()), log_file), 'a') as f:
                            f.write('Run Count: {}, Pickled: {}\n'.format(str(len(data_collector1.ml_obj.inbuilt_errors)), pickle_file))
                            f.write('Training Errors: {}\n'.format(get_last_errors(ann1.training_errors)))
                            f.write('Validation Errors: {}\n'.format(get_last_errors(ann1.validation_errors)))

                        predictions = data_collector1.ml_obj.best_network[0].sim(data_collector1.ml_obj.training_input)
                        observations = data_collector1.ml_obj.training_target
                        train_errors_str = get_all_error_metrics(observations, predictions, data_collector1.data_package)

                        predictions = data_collector1.ml_obj.best_network[0].sim(data_collector1.ml_obj.validation_input)
                        observations = data_collector1.ml_obj.validation_target
                        val_errors_str = get_all_error_metrics(observations, predictions, data_collector1.data_package)

                        with open(os.path.join(os.path.dirname(os.getcwd()), results_file), 'a') as f:
                            f.write('Run Count: {}, Pickled: {}\n'.format(str(len(data_collector1.ml_obj.inbuilt_errors)), pickle_file))
                            f.write('Training Errors, {}\n'.format(train_errors_str))
                            f.write('Validation Errors, {}\n'.format(val_errors_str))

                        #data_collector1.plot_all()
                        break



    print('Done!')
    def plot_training_predict_and_obs(self, show_title=True, save_loc=None, plot_bounds=None):

        show_title = False
        save_loc = None

        plot_bounds = None
        #plot_bounds=(5750, 6300)
        #save_loc = 'pic_ANN_set0_training1'

        plot_bounds = (15690, 15760)
        save_loc = 'pic_ANN_set0_training2'

        label_font = {'size': 20}
        tick_label_size = 15
        title_label_size = 30

        predictions = self.ml_obj.best_network[0].sim(self.ml_obj.training_input)
        observations = self.ml_obj.training_target

        #if self.ml_obj.min_max_scaler_target is not None:
        #    predictions = self.min_max_scaler_target.inverse_transform(predictions)
        #    observations = self.min_max_scaler_target.inverse_transform(observations)

        if True:  # this can be changed to self.min_max_scaler_target is not None (cant yet due to legacy)
            min_max_scaler_target = preprocessing.MinMaxScaler()
            training_target = min_max_scaler_target.fit_transform(self.data_package.training_target)
            predictions = min_max_scaler_target.inverse_transform(predictions)
            observations = min_max_scaler_target.inverse_transform(observations)

        if False:
            bench = []
            hist_error = []
            # Mean
            for i in range(len(predictions)):
                bench.append(observations.mean())
                hist_error.append(abs(observations[i] - bench[i]))

            # # Rolling Mean
            # rolling_average_size = 4
            # bench.append(observations[0][0])
            # hist_error.append(abs(observations[0] - bench[0]))
            # for i in range(1, len(predictions)):
            #     y = i - rolling_average_size
            #     if y < 0:
            #         y = 0
            #     bench.append(np.mean(observations[y:i]))
            #     hist_error.append(abs(observations[i] - bench[i]))

            # # Naive
            # bench.append(observations[0])
            # hist_error.append(abs(observations[0] - bench[0]))
            # for i in range(1, len(predictions)):
            #     bench.append(observations[i - 1])
            #     hist_error.append(abs(observations[i] - bench[i]))

            # # Positive Diff Naive
            # bench.append(observations[0])
            # hist_error.append(abs(observations[0] - bench[0]))
            # for i in range(1, len(predictions)):
            #     bench.append(observations[i-1])
            #     if observations[i] > observations[i-1]:
            #         hist_error.append(abs(observations[i] - bench[i]))
            #     else:
            #         hist_error.append(0)

            # # Positive Diff Naive (only > 2 NTU)
            # bench.append(observations[0])
            # hist_error.append(abs(observations[0] - bench[0]))
            # for i in range(1, len(predictions)):
            #     bench.append(observations[i - 1])
            #     if observations[i] > observations[i - 1] and observations[i] >= 2:
            #         hist_error.append(abs(observations[i] - bench[i]))
            #     else:
            #         hist_error.append(0)

            hist_error = np.array(hist_error)
            predictions = np.array(bench)

        print(get_all_error_metrics(observations, predictions, None))

        fig_training = plt.figure(figsize=(10, 8))
        ax = fig_training.add_subplot(111)

        x = []
        dt = datetime.datetime.combine(self.data_package.training_start_date, datetime.datetime.min.time())
        for i in range(len(self.ml_obj.training_input)):
            x.append(dt + datetime.timedelta(minutes=(15*i)))

        if plot_bounds is not None:
            lower = plot_bounds[0]
            upper = plot_bounds[1]
            ax.plot(x[lower:upper], observations.clip(min=0)[lower:upper], 'o-', label='observations')
            ax.plot(x[lower:upper], predictions.clip(min=0, max=10)[lower:upper], 'r^-', label='predictions')
            #ax.bar(x[lower:upper], hist_error[lower:upper], color='g', width=0.005, align='center', label='absolute error')
        else:
            ax.plot(x, observations.clip(min=0), 'o-', label='observations')
            ax.plot(x, predictions.clip(min=0, max=10), 'r^-', label='predictions')  # TODO: reassess

        if show_title:
            errors_dict = calc_all_error('RMSE, NSE, RSEnaive, R', observations, predictions, round_to=5,
                                         rolling_average_size=96, upper_lim=10, lower_lim=0.05)  # TODO: make dynamic
            fig_training.suptitle('Training Data: RMSE: {} NSE: {} RSEnaive: {} R: {}'.format(
                errors_dict['RMSE'], errors_dict['NSE'], errors_dict['RSEnaive'], errors_dict['R']), fontsize=title_label_size)

        ax.set_ylabel('Turbidity in NTU', fontdict=label_font)
        ax.tick_params(axis='both', which='major', labelsize=tick_label_size)

        ax.legend(loc='best')
        fig_training.tight_layout()

        if show_title:
            fig_training.subplots_adjust(top=0.90)

        if save_loc is not None:
            plt.savefig(save_loc + '.png')
        else:
            plt.show()