示例#1
0
def train_and_store_cnn():
    spectra_training_df, spectra_training_labels = df_dict_to_df_dataframe(
        read_feature_dfs_as_dict(data_set_sub_set=LEARNING_SET,
                                 csv_name=SPECTRA_CSV_NAME))
    spectra_validation_df, spectra_validation_labels = df_dict_to_df_dataframe(
        read_feature_dfs_as_dict(data_set_sub_set=FULL_TEST_SET,
                                 csv_name=SPECTRA_CSV_NAME))

    spectra_training_dfs = spectra_training_df.to_numpy()
    spectra_training_dfs = np.array(
        [row.reshape(SPECTRA_SHAPE) for row in spectra_training_dfs])

    spectra_validation_dfs = spectra_validation_df.to_numpy()
    spectra_validation_dfs = np.array(
        [row.reshape(SPECTRA_SHAPE) for row in spectra_validation_dfs])

    full_cnn, cnn_embedding_layers = build_multiscale_cnn(SPECTRA_SHAPE)
    full_cnn.compile(optimizer='adam', loss='mean_squared_error')
    _ = full_cnn.fit(x=spectra_training_dfs,
                     y=spectra_training_labels,
                     epochs=20,
                     verbose=0,
                     validation_data=(spectra_validation_dfs,
                                      spectra_validation_labels))

    cnn_embedding_layers.save(CNN_PATH)
def train_autoencoder():
    training_data, training_labels = df_dict_to_df_dataframe(
        read_feature_dfs_as_dict(data_set_sub_set=LEARNING_SET))
    validation_dict, validation_labels = df_dict_to_df_dataframe(
        read_feature_dfs_as_dict(data_set_sub_set=FULL_TEST_SET))

    _, _ = fit_autoencoder(training_data,
                           encoding_size=25,
                           validation_data=(validation_dict,
                                            validation_labels))
def train_raw_pca():
    training_data, training_labels = df_dict_to_df_dataframe(read_raw_dfs_as_dict(sub_set=LEARNING_SET))

    computed_features_pca_combiner_ffnn = EmbeddingFeaturesFNNN(name="PCA learned",
                                                                embedding_method=PCAEmbedding(),
                                                                encoding_size=1280,
                                                                data_set_type=DataSetType.computed)

    computed_features_pca_combiner_ffnn.train(training_data, training_labels, validation_data=None,
                                              validation_labels=None)
示例#4
0
def eval_isomap():
    training_data, training_labels = df_dict_to_df_dataframe(
        read_feature_dfs_as_dict(data_set_sub_set=LEARNING_SET))
    validation_dict = read_feature_dfs_as_dict(data_set_sub_set=FULL_TEST_SET)
    validation_labels = pop_labels(validation_dict)

    isomap_5 = EmbeddingFeaturesFNNN(name="Isomap combined 5",
                                     embedding_method=IsomapEmbedding(),
                                     encoding_size=5,
                                     data_set_type=DataSetType.computed)
    isomap_15 = EmbeddingFeaturesFNNN(name="Isomap combined 15",
                                      embedding_method=IsomapEmbedding(),
                                      encoding_size=15,
                                      data_set_type=DataSetType.computed)
    isomap_25 = EmbeddingFeaturesFNNN(name="Isomap combined 25",
                                      embedding_method=IsomapEmbedding(),
                                      encoding_size=25,
                                      data_set_type=DataSetType.computed)
    isomap_35 = EmbeddingFeaturesFNNN(name="Isomap combined 35",
                                      embedding_method=IsomapEmbedding(),
                                      encoding_size=35,
                                      data_set_type=DataSetType.computed)
    isomap_45 = EmbeddingFeaturesFNNN(name="Isomap combined 45",
                                      embedding_method=IsomapEmbedding(),
                                      encoding_size=45,
                                      data_set_type=DataSetType.computed)
    isomap_55 = EmbeddingFeaturesFNNN(name="Isomap combined 55",
                                      embedding_method=IsomapEmbedding(),
                                      encoding_size=55,
                                      data_set_type=DataSetType.computed)

    isomap_models = [
        isomap_5, isomap_15, isomap_25, isomap_35, isomap_45, isomap_55
    ]
    metrics_dict = {}
    for isomap_model in isomap_models:
        # print("Currently evaluating: ", isomap_model.name)
        isomap_model.train(training_data,
                           training_labels,
                           validation_data=None,
                           validation_labels=None)
        metrics_dict[isomap_model.name] = isomap_model.compute_metrics(
            df_dict=validation_dict,
            labels=validation_labels,
            metrics_list=[rmse, correlation_coefficient])
    save_latex_aggregated_table(metrics_dict, None)
def train_pca():
    training_data, training_labels = df_dict_to_df_dataframe(
        read_feature_dfs_as_dict(data_set_sub_set=LEARNING_SET, csv_name=RAW_CSV_NAME))
    validation_dict = read_feature_dfs_as_dict(data_set_sub_set=FULL_TEST_SET, csv_name=RAW_CSV_NAME)
    validation_labels = pop_labels(validation_dict)

    computed_features_pca_combiner_ffnn = EmbeddingFeaturesFNNN(name="Isomap combined",
                                                                embedding_method=PCAEmbedding(),
                                                                encoding_size=5,
                                                                data_set_type=DataSetType.computed)

    computed_features_pca_combiner_ffnn.train(training_data, training_labels, validation_data=None,
                                              validation_labels=None)
    metrics_dict = {computed_features_pca_combiner_ffnn.name: computed_features_pca_combiner_ffnn.compute_metrics(
        df_dict=validation_dict, labels=validation_labels,
        metrics_list=[rmse, correlation_coefficient])}
    computed_features_pca_combiner_ffnn.visualize_rul(df_dict=validation_dict,
                                                      label_data=validation_labels, experiment_name=None)
    save_latex_aggregated_table(metrics_dict, None)
def train_svr():
    training_data, training_labels = df_dict_to_df_dataframe(
        read_feature_dfs_as_dict(data_set_sub_set=LEARNING_SET))
    validation_dict = read_feature_dfs_as_dict(data_set_sub_set=FULL_TEST_SET)
    validation_labels = pop_labels(validation_dict)

    svr_model = ComputedFeaturesFFNN(name="SVR", feature_list=ENTROPY_FEATURES)

    svr_model.train_svr(training_data, training_labels)
    metrics_dict = {
        "Entropy Poly":
        svr_model.compute_metrics(df_dict=validation_dict,
                                  labels=validation_labels,
                                  metrics_list=[rmse, correlation_coefficient],
                                  use_svr=True)
    }
    svr_model.visualize_rul(df_dict=validation_dict,
                            label_data=validation_labels,
                            use_svr=True,
                            experiment_name=None)
    save_latex_aggregated_table(metrics_dict, None)
def do_eval(model_dict: Dict[str, Sequence[DegradationModel]],
            health_stage_classifier: HealthStageClassifier = None,
            use_svr: bool = False,
            use_gpr: bool = False,
            use_poly_reg: bool = False):
    assert not (use_svr and use_gpr and use_poly_reg)
    # Read evaluation data
    raw_metric_data = read_raw_dfs_as_dict(FULL_TEST_SET)
    feature_metric_data = read_feature_dfs_as_dict(
        data_set_sub_set=FULL_TEST_SET)
    spectra_metric_data = read_feature_dfs_as_dict(
        data_set_sub_set=FULL_TEST_SET, csv_name=SPECTRA_CSV_NAME)

    # Read Raw Data
    raw_training_data, raw_training_labels = df_dict_to_df_dataframe(
        read_raw_dfs_as_dict(LEARNING_SET))
    raw_validation_data, raw_validation_labes = df_dict_to_df_dataframe(
        copy.deepcopy(raw_metric_data))

    # Read Computed Feature Data
    feature_training_data, feature_training_labels = df_dict_to_df_dataframe(
        read_feature_dfs_as_dict(data_set_sub_set=LEARNING_SET))
    feature_validation_data, feature_validation_labels = df_dict_to_df_dataframe(
        copy.deepcopy(feature_metric_data))

    # Read Frequency Spectra Data
    spectra_training_dict, spectra_training_labels = df_dict_to_df_dataframe(
        read_feature_dfs_as_dict(data_set_sub_set=LEARNING_SET,
                                 csv_name=SPECTRA_CSV_NAME))
    spectra_validation_dict, spectra_validation_labels = df_dict_to_df_dataframe(
        copy.deepcopy(spectra_metric_data))

    training_data_dict: Dict[DataSetType, Sequence[pd.DataFrame]] = {
        DataSetType.raw: ((raw_training_data, raw_training_labels),
                          (raw_validation_data, raw_validation_labes)),
        DataSetType.computed:
        ((feature_training_data, feature_training_labels),
         (feature_validation_data, feature_validation_labels)),
        DataSetType.spectra:
        ((spectra_training_dict, spectra_training_labels),
         (spectra_validation_dict, spectra_validation_labels))
    }

    # Format validation data
    raw_metric_labels = pop_labels(raw_metric_data)
    feature_metric_labels = pop_labels(feature_metric_data)
    spectra_metric_labels = pop_labels(spectra_metric_data)

    validation_metric_data: Dict[DataSetType,
                                 Sequence[Dict[str, pd.DataFrame],
                                          Dict[str, pd.Series]]] = {
                                              DataSetType.raw:
                                              (raw_metric_data,
                                               raw_metric_labels),
                                              DataSetType.computed:
                                              (feature_metric_data,
                                               feature_metric_labels),
                                              DataSetType.spectra:
                                              (spectra_metric_data,
                                               spectra_metric_labels)
                                          }

    # Cut dfs according to health_stage_classifier
    if health_stage_classifier is not None:
        for key in training_data_dict.keys():
            training_data_frames = training_data_dict.get(key)
            new_datasets = []
            (training_data,
             training_labels), (validation_data,
                                validation_labels) = training_data_frames
            new_datasets += [(health_stage_classifier.cut_FPTs_of_dataframe(
                training_data, training_labels, feature_training_data))]
            new_datasets += [(health_stage_classifier.cut_FPTs_of_dataframe(
                validation_data, validation_labels, feature_validation_data))]
            training_data_dict[key] = new_datasets

        fpt_dict = {}
        for key, (data, labels) in validation_metric_data.items():
            cut_data, cut_labels, fpts = health_stage_classifier.cut_FPTs_of_dataframe_dict(
                data, labels, feature_validation_data)
            validation_metric_data[key] = (cut_data, cut_labels)
            fpt_dict[str(key)] = fpts
        fpt_path = Path("logs").joinpath("first_prediction_times")
        if not os.path.exists(fpt_path):
            Path(fpt_path).mkdir(parents=True, exist_ok=True)
        with open(fpt_path.joinpath(health_stage_classifier.name),
                  'w') as file:
            json.dump(fpt_dict, file, indent=4)

    # Evaluate Models
    for model_group in tqdm(model_dict.keys(), desc="Evaluating model groups"):
        experiment_name = model_group
        if health_stage_classifier is not None:
            experiment_name += "_true"
        else:
            experiment_name += "_false"
        if use_svr:
            experiment_name += "_SVR"
        elif use_gpr:
            experiment_name += "_GPR"
        elif use_poly_reg:
            experiment_name += "_MLR"
        else:
            experiment_name += "_ANN"
        model_list = model_dict.get(model_group)
        # Train Models
        for model in tqdm(model_list,
                          desc="Training models for model group %s" %
                          experiment_name):
            (training_data,
             training_labels), (validation_data,
                                validation_labels) = training_data_dict.get(
                                    model.get_data_set_type())
            if use_svr:
                model.train_svr(training_data=training_data,
                                training_labels=training_labels)
            elif use_gpr:
                model.train_gpr(training_data=training_data,
                                training_labels=training_labels)
            elif use_poly_reg:
                model.train_poly_reg(training_data=training_data,
                                     training_labels=training_labels,
                                     memory_path=MEMORY_CACHE_PATH)
            else:
                trainings_history = model.train(
                    training_data=training_data,
                    training_labels=training_labels,
                    validation_data=validation_data,
                    validation_labels=validation_labels)

        metric_data = {}
        # Evaluate Models
        for model in tqdm(model_list,
                          desc="Evaluating models for model group %s" %
                          experiment_name,
                          position=0):
            model_metric_data, model_metric_labels = validation_metric_data.get(
                model.get_data_set_type())
            metric_data[model.get_name()] = model.compute_metrics(
                df_dict=model_metric_data,
                labels=model_metric_labels,
                metrics_list=[rmse, correlation_coefficient],
                use_svr=use_svr,
                use_gpr=use_gpr,
                use_poly_reg=use_poly_reg)

            model.visualize_rul(model_metric_data,
                                model_metric_labels,
                                experiment_name=experiment_name,
                                use_svr=use_svr,
                                use_gpr=use_gpr,
                                use_poly_reg=use_poly_reg)
        store_metrics_dict(dict=metric_data, experiment_name=experiment_name)