def evaluate_clusters(start_from=0, mode="sawp"): """ Evaluates the DWT clusters and exports the image of the matrix obtained and the relative matrix in a csv file. Parameters ---------- mode: str = {"direct","sawp"} Select feature representation. """ import plotly.express as px import numpy as np meta = metaframe.MetaFrame(path_data=shared.DATASET_PATH, path_meta=shared.META_PATH) for identifier, uid in enumerate(meta.UUID): if identifier > start_from: X = clu.Cluster(uid) for side in X.sides: for sensor in X.sensors: print("[{}] # ".format(time.ctime()) + "Starting DTW Matrix Computation.") dtw_matrix = X.dist_matrix(side=side, sensor=sensor, mode=mode) fig = px.imshow(dtw_matrix) image_path = f"images/DTW_matrices/DTW_{X.train}_{X.direction}_{X.avg_speed}" \ f"_{X.component}_{X.num_trip}_{X.engine_conf}_{side}_{sensor}_{uid}.png" fig.write_image(image_path) print("[{}] # ".format(time.ctime()) + "Image Exported.") matrix_path = f"private/export/DTW_matrices/DTW_{X.train}_{X.direction}_{X.avg_speed}" \ f"_{X.component}_{X.num_trip}_{X.engine_conf}_{side}_{sensor}_{uid}.dat" np.savetxt(matrix_path, dtw_matrix, delimiter=";") print("[{}] # ".format(time.ctime()) + "File Exported.")
def export_labeled_clu(): """ Print the clusters labeled by clustering algorithm stored into "export/Clusters" folder. """ meta = metaframe.MetaFrame(path_data=shared.DATASET_PATH, path_meta=shared.META_PATH) for identifier, uid in enumerate(meta.UUID): X = clu.Cluster(uid) X.plot_labeled_clusters()
def export_clusters(): """ Wrapper for export_all_clusters method for Cluster objects. It generates the "export/Clusters" folder's content. """ meta = metaframe.MetaFrame(path_data=shared.DATASET_PATH, path_meta=shared.META_PATH) for identifier, uid in enumerate(meta.UUID): try: X = clu.Cluster(uid) X.export_all_clusters() except KeyError: print(f"[{time.ctime()}] # Key Error: Manual Bypass")
def generate_vibration_images(): """ Generate Vibration Images with aligned track for all the item in the meta-frame, with weldings in black. """ # loads the metaframe meta = metaframe.MetaFrame(path_data=shared.DATASET_PATH, path_meta=shared.META_PATH) for identifier, uid in enumerate(meta.UUID): X = track.Track(uid) print("[{}] # ".format(time.ctime()) + "Acceleration Preprocessing Completed.") print("-Data Load Completed.") X.plot_accelerations()
def test_alignment_score( threshold: float = 0.1, detection_range: float = 0.9, plot: bool = True, export: bool = True, epsilon: float = shared.CLUSTERING_EPS, min_samples_cluster: int = shared.CLUSTERING_MS, cluster_metrics: str = shared.CLUSTERING_METRICS, ): """ Score Generator Routine API: It evaluates the correct alignment for the ground truth index for weldings and the shifting of the acceleration sensor. It follows this data pipeline: For each dataset: - take the acceleration from file, - shift them to align with the weldings, - perform SAWP score on each sensor track, - extract anomalous-SAWP index slices - cluster them with DBSCAN - check if the nearest cluster to a given weldings has a space distance which is lesser then detection_range - export the frame-data and score in a score file. Parameters ---------- threshold : float multiplication factor for the decision boundary for anomaly score detection_range : float range (in meters) to assess whether a cluster correctly verifies the hypothesis to be a welding. plot : bool if True, it exports an image for each dataset export : bool if True, it exports the score file epsilon : float The maximum distance between two samples for one to be considered as in the neighborhood of the other. This is not a maximum bound on the distances of points within a cluster. This is the most important DBSCAN parameter to choose appropriately for your data set and distance function. min_samples_cluster : int The number of samples (or total weight) in a neighborhood for a point to be considered as a core point. This includes the point itself. cluster_metrics : str The metric chosen for clustering in the set [‘cityblock’, ‘cosine’, ‘euclidean’, ‘l1’, ‘l2’, ‘manhattan’] """ score_list = [] meta = metaframe.MetaFrame(path_data=shared.DATASET_PATH, path_meta=shared.META_PATH) X = None print("[{}] # ".format(time.ctime()) + "Score Generator Helper started.") for identifier, uid in enumerate(meta.UUID): X = track.Track(uid) for side in X.sides: for sensor in range(4): X.anomaly_cluster[side][sensor]["Error_X"] = detection_range # North&South Side anomaly detection & z-score print("[{}] # ".format(time.ctime()) + "Sensor:{}/4 - Side:{}".format(sensor + 1, side)) X.get_anomalies(side=side, sensor=sensor, threshold=threshold) print("[{}] # ".format(time.ctime()) + "Anomaly Detection completed.") X.get_anomaly_clusters(eps=epsilon, min_samples=min_samples_cluster, metric=cluster_metrics) X.evaluate_prediction(error_length=detection_range) for side in X.sides: for sensor in range(4): score_list.append(( uid, X.train, X.direction, X.avg_speed, X.num_trip, X.component, side, sensor, X.anomaly_cluster[side][sensor]["Avg_Index"], X.anomaly_cluster[side][sensor]["performance"][0], X.anomaly_cluster[side][sensor]["performance"][1], X.anomaly_cluster[side][sensor]["performance"][2], threshold, detection_range, epsilon, min_samples_cluster, cluster_metrics, )) if plot: X.plot_clusters() # X.plot_scores() print("[{}] # ".format(time.ctime()) + "Scores Plot completed.") if export: # Generate Lists columns = ("UUID", "Train", "Direction", "Speed", "Num_Trip", "Component", "Side", "Sensor", "Cluster Centroids", "PD", "ED", "PFA", "Error_X", "Detection_Range", "Epsilon", "Min_Samples_Cluster", "Cluster_Metrics") export_df = pd.DataFrame(score_list, columns=columns) export_df.to_csv( "private/export/Scores/scores_T{}_D{}_E{}_C{}_M{}.csv".format( threshold, detection_range, epsilon, min_samples_cluster, cluster_metrics, )) print("[{}] # ".format(time.ctime()) + "Score Generator Helper completed.")