def evaluate_similarity_runtime(len_light_patterns, path_similarity, path_runtime, rounds): results_runtime = nested_dict(4, list) results_similarity = nested_dict(4, list) same = list(zip(len_light_patterns, len_light_patterns)) combined = list(itertools.combinations(len_light_patterns, 2)) pattern_conbination = same + combined for len_light_pattern1, len_light_pattern2 in pattern_conbination: print("from-to:", len_light_pattern1, len_light_pattern2) for run in range(rounds): print("round: ", run) client1, client2 = get_light_signals([len_light_pattern1, len_light_pattern2]) for equalize_method in [vector_similarity.equalize_methods.fill, vector_similarity.equalize_methods.cut, vector_similarity.equalize_methods.dtw]: print("equalize:", equalize_method) for similarity_method in vector_similarity.similarity_methods: print("similarity:", similarity_method.__name__) start_time = time.time() similarity = similarity_method(client1.signal, client2.signal, equalize_method) elapsed_time = time.time() - start_time assert elapsed_time > 0 results_similarity[len_light_pattern1][len_light_pattern2][equalize_method][similarity_method.__name__].append( similarity) results_runtime[len_light_pattern1][len_light_pattern2][equalize_method][similarity_method.__name__].append( elapsed_time) DillSerializer(path_similarity).serialize(results_similarity) DillSerializer(path_runtime).serialize(results_runtime)
def data_preprocessing(testbeds, scaling): baseline_data = nested_dict(2, dict) testbed_data = nested_dict(2, dict) for operation in ["euclidean", "cosine", "initialisation"]: for testbed in testbeds: data_paths = glob.glob( os.path.join(__location__, "raw-result", "*" + testbed + "*.csv")) baseline_path = [path for path in data_paths if "lbs" in path] assert len(baseline_path) == 1 data_paths.remove(baseline_path[0]) baseline = load_data(baseline_path) he_libraries = numpy.sort(baseline.library.unique()) for he_library in he_libraries: he_library_baseline = baseline[baseline.library == he_library] he_library_baseline_mean = he_library_baseline[operation].mean( ) / scaling["conversion"] he_library_baseline_std = he_library_baseline[operation].std( ) / scaling["conversion"] he_library_baseline_median = he_library_baseline[ operation].median() / scaling["conversion"] feature_lengths = he_library_baseline.vectorLength.unique() assert len(feature_lengths) == 1 baseline_data[operation][testbed][he_library] = ( feature_lengths[0], he_library_baseline_mean, he_library_baseline_std, he_library_baseline_median) df = load_data(data_paths) he_data = df[df.library == he_library] feature_lengths = list() he_library_mean = pandas.DataFrame() he_library_std = pandas.DataFrame() he_library_median = pandas.DataFrame() for feature_length, data in he_data.groupby("vectorLength"): feature_lengths.append(feature_length) he_library_std = he_library_std.append( data.std() / scaling["conversion"], ignore_index=True) he_library_mean = he_library_mean.append( data.mean() / scaling["conversion"], ignore_index=True) he_library_median = he_library_median.append( data.median() / scaling["conversion"], ignore_index=True) he_library_mean = he_library_mean[operation] he_library_std = he_library_std[operation] he_library_median = he_library_median[operation] testbed_data[operation][testbed][he_library] = ( feature_lengths, he_library_mean, he_library_std, he_library_median) return baseline_data, testbed_data
def __init__(self, access_point, data_period_coupling, coupling_compare_method, coupling_similarity_threshold, equalize_method, data_period_localization, num_clients, rooms, frequency_coupling, stop_reactor_callback, evaluate_callback): self.processing_clients = list() self.connected_clients = AtomicCounter(num_clients) self.evaluation_coupling = nested_dict(3, list) self.evaluation_runtime = defaultdict(list) self.rooms = rooms self.frequency_coupling = frequency_coupling self.access_point = access_point self.data_period_coupling = data_period_coupling self.coupling_compare_method = coupling_compare_method self.coupling_similarity_threshold = coupling_similarity_threshold self.equalize_method = equalize_method self.basic_features = BasicFeatures() self.tsfresh_features = TsFreshFeatures() self.data_period_localization = data_period_localization self.stop_reactor_callback = stop_reactor_callback self.evaluate_callback = evaluate_callback
def evaluate_impact_signal_distortion( len_light_patterns, distortion_rates, path_distorted_light_signals, path_distortion_similarity, rounds): distorted_light_signals = defaultdict(list) results_distortion_similarity = nested_dict(3, list) for run in range(rounds): print("round: ", run) for len_light_pattern in len_light_patterns: print("len light pattern:", len_light_pattern) equalize_method = "dummy" client = get_light_signals([len_light_pattern])[0] distorted_light_signals[len_light_pattern].append(client) for distortion_rate in distortion_rates: print("distortion rate: ", distortion_rate) for similarity_method in vector_similarity.similarity_methods: distorted_light_signal = client.get_distorted_light_signal(distortion_rate) similarity = similarity_method(client.signal, distorted_light_signal, equalize_method) if distortion_rate == 0: assert numpy.array_equal(client.signal, distorted_light_signal) assert similarity >= 0.98 results_distortion_similarity[len_light_pattern][distortion_rate][similarity_method.__name__].append(similarity) DillSerializer(path_distortion_similarity).serialize(results_distortion_similarity) DillSerializer(path_distorted_light_signals).serialize(distorted_light_signals)
def process_data(evaluation_data): def find_best_per_params(metric_results): best_params = list() features, coupling_methods, len_light_patterns, num_users = misc.get_all_keys(metric_results) for feature in features: per_feature_results = dict() for coupling_method, len_light_pattern, num_user in itertools.product(coupling_methods, len_light_patterns, num_users): result = metric_results[feature][coupling_method][len_light_pattern][num_user] if len(result) > 0: key = coupling_method + "-" + str(len_light_pattern) + "-" + str(num_user) per_feature_results[key] = numpy.mean(result) per_feature_selection = sorted(per_feature_results.items(), key=lambda kv: kv[1], reverse=True) best_param = per_feature_selection[0][0].split("-") coupling_method = best_param[0] len_light_pattern = int(best_param[1]) num_user = int(best_param[2]) best_params.append((feature, coupling_method, len_light_pattern, num_user)) return best_params def get_metrics(result): accuracy = [result.accuracy_accept, result.accuracy_reject] precision = [result.precision_accept, result.precision_reject] recall = [result.recall_accept, result.recall_reject] f1 = [result.f1_accept, result.f1_reject] return (accuracy, precision, recall, f1), result.runtime def save_result(results, runtime_query_data, metric_results, runtime_results, feature, coupling_method, len_light_pattern, num_client): metrics, runtime_coupling = get_metrics(results) metric_results[feature][coupling_method][len_light_pattern][num_client].append(metrics) runtime_results[feature][coupling_method][len_light_pattern][num_client].append((runtime_query_data, runtime_coupling)) num_clients, num_reject_clients, len_light_patterns, \ sampling_period_couplings, coupling_compare_methods, \ coupling_similarity_thresholds, equalize_methods, \ sampling_period_localizations, sampling_period_ml_trains, \ coupling_ml_classifiers = misc.get_all_keys(evaluation_data) print("############### Static simulation ###############") print("Num clients: ", num_clients) print("Num reject clients: ", num_reject_clients) print("Len light patterns: ", len_light_patterns) print("Sampling period couplings: ", sampling_period_couplings) print("Coupling compare methods: ", coupling_compare_methods) print("Coupling similarity thresholds: ", coupling_similarity_thresholds) print("Equalize methods: ", equalize_methods) print("Sampling period localizations: ", sampling_period_localizations) print("Sampling period ML trains: ", sampling_period_ml_trains) print("Coupling ML classifiers: ", coupling_ml_classifiers) similarity_metrics = nested_dict(4, list) machine_learning_metrics = nested_dict(4, list) localization_metrics = nested_dict(4, list) similarity_runtime = nested_dict(4, list) localization_runtime = nested_dict(4, list) machine_learning_runtime = nested_dict(4, list) for num_client, num_reject_client, len_light_pattern, sampling_period_coupling, \ coupling_compare_method, coupling_similarity_threshold, equalize_method, \ sampling_period_localization, sampling_period_ml_train, coupling_ml_classifier in itertools.product( num_clients, num_reject_clients, len_light_patterns, sampling_period_couplings, coupling_compare_methods, coupling_similarity_thresholds, equalize_methods, sampling_period_localizations, sampling_period_ml_trains, coupling_ml_classifiers): results = evaluation_data[num_client][num_reject_client][len_light_pattern] \ [sampling_period_coupling][coupling_compare_method] \ [coupling_similarity_threshold][equalize_method] \ [sampling_period_localization][sampling_period_ml_train][coupling_ml_classifier] if len(results) > 0: for result in results: #result.runtime_coupling #result.runtime_query_data # localization feature = "ble" save_result(result.localization_random_forest_ble, result.runtime_query_raw_ble, localization_metrics, localization_runtime, feature, "random forest", len_light_pattern, num_client) save_result(result.localization_filtering_ble, result.runtime_query_raw_ble, localization_metrics, localization_runtime, feature, "filtering", len_light_pattern, num_client) save_result(result.localization_svm_ble, result.runtime_query_raw_ble, localization_metrics, localization_runtime, feature, "svm", len_light_pattern, num_client) feature = "wifi" save_result(result.localization_random_forest_wifi, result.runtime_query_raw_wifi, localization_metrics, localization_runtime, feature, "random forest", len_light_pattern, num_client) save_result(result.localization_filtering_wifi, result.runtime_query_raw_wifi, localization_metrics, localization_runtime, feature, "filtering", len_light_pattern, num_client) save_result(result.localization_svm_wifi, result.runtime_query_raw_wifi, localization_metrics, localization_runtime, feature, "svm", len_light_pattern, num_client) # similarity metrics save_result(result.coupling_signal_pattern, result.runtime_query_pattern_light, similarity_metrics, similarity_runtime, "signal pattern", coupling_compare_method, len_light_pattern, num_client) save_result(result.coupling_signal_pattern_duration, result.runtime_query_pattern_light, similarity_metrics, similarity_runtime, "signal pattern duration", coupling_compare_method, len_light_pattern, num_client) save_result(result.coupling_signal_similarity, result.runtime_query_raw_light, similarity_metrics, similarity_runtime, "signal similarity", coupling_compare_method, len_light_pattern, num_client) save_result(result.coupling_machine_learning_basic_all, result.runtime_query_raw_light, machine_learning_metrics, machine_learning_runtime, "basic all", coupling_ml_classifier, len_light_pattern, num_client) save_result(result.coupling_machine_learning_basic_selected, result.runtime_query_raw_light, machine_learning_metrics, machine_learning_runtime, "basic selected", coupling_ml_classifier, len_light_pattern, num_client) save_result(result.coupling_machine_learning_tsfresh_selected, result.runtime_query_raw_light, machine_learning_metrics, machine_learning_runtime, "tsfresh selected", coupling_ml_classifier, len_light_pattern, num_client) best_ml = [(feature, coupling, len_light_pattern, num_user, machine_learning_metrics) for feature, coupling, len_light_pattern, num_user in find_best_per_params(machine_learning_metrics)] best_similarity = [(feature, coupling, len_light_pattern, num_user, similarity_metrics) for feature, coupling, len_light_pattern, num_user in find_best_per_params(similarity_metrics)] best_localization = [(feature, coupling, len_light_pattern, num_user, localization_metrics) for feature, coupling, len_light_pattern, num_user in find_best_per_params(localization_metrics)] return best_similarity, similarity_runtime, best_ml, machine_learning_runtime, best_localization, localization_runtime, len_light_patterns, num_clients
def process_data(evaluation_data): def get_results(results): accuracy = [result.accuracy for result in results if result.accuracy >= 0] precision = [result.precision for result in results if result.precision >= 0] recall = [result.recall for result in results if result.recall >= 0] f1 = [result.f1 for result in results if result.f1 >= 0] runtime = [result.runtime for result in results if result.runtime > 0] return (accuracy, precision, recall, f1), misc.flatten_list(runtime) def save_result(result, metric_results, runtime_results, coupling_ident, runtime_ident, feature, coupling_method, num_user, coupling_frequency, num_room): metrics, runtime = get_results(result.coupling[coupling_ident]) missing_metric = 0 in [len(metric) for metric in metrics] if not missing_metric: # remove empty result metric_results[feature][coupling_method][num_user][coupling_frequency][num_room].append(metrics) runtime_results[feature][coupling_method][num_user][coupling_frequency][num_room].append((result.runtime[runtime_ident], runtime)) def find_best_per_params(metric_results): best_params = list() features, coupling_methods, num_users, coupling_frequencies, num_rooms = misc.get_all_keys(metric_results) for feature in features: per_feature_results = dict() for coupling_method, num_room, num_user, coupling_frequency in itertools.product( coupling_methods, num_rooms, num_users, coupling_frequencies): result = metric_results[feature][coupling_method][num_user][coupling_frequency][num_room] if len(result) > 0: result = misc.flatten_list(misc.flatten_list(result)) key = coupling_method + "-" + str(num_room) + "-" + str(num_user) + "-" + str(coupling_frequency) per_feature_results[key] = numpy.mean(result) per_feature_results = sorted(per_feature_results.items(), key=lambda kv: kv[1], reverse=True) idx = numpy.where(numpy.asarray([metric for _, metric in per_feature_results])!=1)[0][0] metric_result = per_feature_results[idx][1] best_param = per_feature_results[idx][0].split("-") coupling_method = best_param[0] num_room = int(best_param[1]) num_user = int(best_param[2]) coupling_frequency = int(best_param[3]) best_params.append((feature, coupling_method, num_room, num_user, coupling_frequency, metric_result)) return best_params sampling_period_couplings, coupling_compare_methods, \ coupling_similarity_thresholds, equalize_methods, \ sampling_period_localizations, sampling_period_ml_trains, \ coupling_ml_classifiers, num_users, num_rooms, \ simulation_durations, coupling_frequencies = misc.get_all_keys(evaluation_data) print("############### Dynamic simulation ###############") print("Num users: ", num_users) print("Num rooms: ", num_rooms) print("Simulation duration: ", simulation_durations) print("Coupling frequency: ", coupling_frequencies) print("Sampling period couplings: ", sampling_period_couplings) print("Coupling compare methods: ", coupling_compare_methods) print("Coupling similarity thresholds: ", coupling_similarity_thresholds) print("Equalize methods: ", equalize_methods) print("Sampling period localizations: ", sampling_period_localizations) print("Sampling period ML trains: ", sampling_period_ml_trains) print("Coupling ML classifiers: ", coupling_ml_classifiers) similarity_metrics = nested_dict(5, list) machine_learning_metrics = nested_dict(5, list) localization_metrics = nested_dict(5, list) similarity_runtime = nested_dict(5, list) machine_learning_runtime = nested_dict(5, list) localization_runtime = nested_dict(5, list) for sampling_period_coupling, coupling_compare_method, \ coupling_similarity_threshold, equalize_method, \ sampling_period_localization, sampling_period_ml_train, \ coupling_ml_classifier, num_user, num_room, \ simulation_duration, coupling_frequency in itertools.product( sampling_period_couplings, coupling_compare_methods, coupling_similarity_thresholds, equalize_methods, sampling_period_localizations, sampling_period_ml_trains, coupling_ml_classifiers, num_users, num_rooms, simulation_durations, coupling_frequencies): results = evaluation_data[sampling_period_coupling][coupling_compare_method] \ [coupling_similarity_threshold][equalize_method] \ [sampling_period_localization][sampling_period_ml_train] \ [coupling_ml_classifier][num_user][num_room] \ [simulation_duration][coupling_frequency] if len(results) > 0: for result in results: # localization feature = "ble" save_result(result, localization_metrics, localization_runtime, "loc Random Forest BLE", "time query raw ble", feature, "random forest", num_user, coupling_frequency, num_room) save_result(result, localization_metrics, localization_runtime, "loc filtering BLE", "time query raw ble", feature, "filtering", num_user, coupling_frequency, num_room) save_result(result, localization_metrics, localization_runtime, "loc SVM BLE", "time query raw ble", feature, "svm", num_user, coupling_frequency, num_room) feature = "wifi" save_result(result, localization_metrics, localization_runtime, "loc Random Forest WiFi", "time query raw wifi", feature, "random forest", num_user, coupling_frequency, num_room) save_result(result, localization_metrics, localization_runtime, "loc filtering WiFi", "time query raw wifi", feature, "filtering", num_user, coupling_frequency, num_room) save_result(result, localization_metrics, localization_runtime, "loc SVM WiFi", "time query raw wifi", feature, "svm", num_user, coupling_frequency, num_room) # similarity metrics feature = "signal pattern" save_result(result, similarity_metrics, similarity_runtime, feature, "time query pattern light", feature, coupling_compare_method, num_user, coupling_frequency, num_room) feature = "signal pattern duration" save_result(result, similarity_metrics, similarity_runtime, feature, "time query pattern light", feature, coupling_compare_method, num_user, coupling_frequency, num_room) feature = "signal similarity" save_result(result, similarity_metrics, similarity_runtime, feature, "time query raw light", feature, coupling_compare_method, num_user, coupling_frequency, num_room) # machine learning save_result(result, machine_learning_metrics, machine_learning_runtime, "ml basic all features", "time query raw light", "basic all", coupling_ml_classifier, num_user, coupling_frequency, num_room) save_result(result, machine_learning_metrics, machine_learning_runtime, "ml basic selected features", "time query raw light", "basic selected", coupling_ml_classifier, num_user, coupling_frequency, num_room) save_result(result, machine_learning_metrics, machine_learning_runtime, "ml tsfresh selected features", "time query raw light", "tsfresh selected", coupling_ml_classifier, num_user, coupling_frequency, num_room) machine_learning_params = find_best_per_params(machine_learning_metrics) similarity_params = find_best_per_params(similarity_metrics) localization_params = find_best_per_params(localization_metrics) best_machine_learning = [(feature, coupling_method, num_room, num_user, coupling_frequency, machine_learning_metrics) for feature, coupling_method, num_room, num_user, coupling_frequency, _ in machine_learning_params] best_similarity = [(feature, coupling_method, num_room, num_user, coupling_frequency, similarity_metrics) for feature, coupling_method, num_room, num_user, coupling_frequency, _ in similarity_params] best_localization = [(feature, coupling_method, num_room, num_user, coupling_frequency, localization_metrics) for feature, coupling_method, num_room, num_user, coupling_frequency, _ in localization_params] return best_similarity, similarity_runtime, similarity_params, \ best_machine_learning, machine_learning_runtime, machine_learning_params, \ best_localization, localization_runtime, num_users, localization_params, \ coupling_frequencies, num_rooms
def offline_analysis_ml_model(path_ml_offline_evaluation): evaluation_data = DillSerializer(path_ml_offline_evaluation).deserialize() num_clients, num_reject_clients, len_light_patterns, \ classifiers, sampling_periods = misc.get_all_keys(evaluation_data) analysis_result = nested_dict(2, list) for num_client, num_reject_client, len_light_pattern, classifier, sampling_period in itertools.product( num_clients, num_reject_clients, len_light_patterns, classifiers, sampling_periods): results = evaluation_data[num_client][num_reject_client][ len_light_pattern][classifier][sampling_period] if len(results) > 0: analysis_result[classifier][sampling_period].extend(results) print("Num clients: ", num_clients) print("Num reject clients: ", num_reject_clients) print("Len light patterns: ", len_light_patterns) print("Classifiers: ", classifiers) print("Sampling periods: ", sampling_periods) for classifier in classifiers: results = analysis_result[classifier] sub_results = list() for sampling_period in sampling_periods: accuracy = [entry.accuracy_accept for entry in results[sampling_period]] + \ [entry.accuracy_reject for entry in results[sampling_period]] precision = [entry.precision_accept for entry in results[sampling_period]] + \ [entry.precision_reject for entry in results[sampling_period]] recall = [entry.recall_accept for entry in results[sampling_period]] + \ [entry.recall_reject for entry in results[sampling_period]] f1 = [entry.f1_accept for entry in results[sampling_period]] + \ [entry.f1_reject for entry in results[sampling_period]] entry = [ numpy.mean(accuracy), numpy.mean(precision), numpy.mean(recall), numpy.mean(f1) ] entry = [round(value, 2) for value in entry] sub_results.append(entry) fig, ax = plt.subplots() ax.imshow(sub_results, cmap="Greens", aspect="auto", interpolation="nearest", vmin=0, vmax=1.4) ax.set_ylabel("Sampling period (ms)") ytickpos = numpy.arange(len(sampling_periods)) ax.set_yticks(ytickpos) ax.set_yticklabels([ int(sampling_period * 1e3) for sampling_period in sampling_periods ]) xticks = ["Accuracy", "Precision", "Recall", "F1-score"] xtickpos = range(len(xticks)) ax.set_xticks(xtickpos) ax.set_xticklabels(xticks, rotation=20, ha="right") for i in range(len(sub_results)): for j in range(len(sub_results[0])): ax.text(j, i, sub_results[i][j], ha="center", va="center") ticks = [ start + ((end - start) / 2) for start, end in misc.pairwise(xtickpos) ] ax.set_xticks(ticks, minor=True) ticks = [ start + ((end - start) / 2) for start, end in misc.pairwise(ytickpos) ] ax.set_yticks(ticks, minor=True) ax.grid(which='minor', color="black") filepath = os.path.join(__location__, "results", "machine-learning", "vm", "ml-param-" + classifier.lower() + ".pdf") result_path = os.path.dirname(filepath) if not os.path.exists(result_path): os.makedirs(result_path) fig.savefig(filepath, format="pdf", bbox_inches="tight") #plt.show() plt.close(fig)
def offline_test_ml_model(path_ml_offline_evaluation): def filter_params(param_grid): filtered_params = list() for param in param_grid: if param["num clients"] - param["num reject clients"] >= 2: filtered_params.append(param) return filtered_params testbed = "vm" path_ml_train_data = os.path.join(__location__, "..", "online", "ml-train-data", testbed) combined_raw_feature_data = glob.glob( os.path.join(path_ml_train_data, "combined-*-raw-feature-data"))[0] combined_raw_feature_data = DillSerializer( combined_raw_feature_data).deserialize() tsfresh_features_to_extract_selected = os.path.join( __location__, "..", "online", "tsfresh-features-to-be-extracted") tsfresh_features_to_extract_selected = DillSerializer( tsfresh_features_to_extract_selected).deserialize() sampling_periods = sorted(combined_raw_feature_data.keys()) num_clients = 10 num_reject_clients = range(num_clients - 1) num_clients = range(2, num_clients + 1) len_light_patterns = range(2, 11, 2) param_grid = ParameterGrid({ "num clients": num_clients, "num reject clients": num_reject_clients, "len light pattern": len_light_patterns }) sampling_period_coupling = get_pattern_max_sampling_period() filtered_params = filter_params(param_grid) results = nested_dict(5, list) for i, param in enumerate(filtered_params): print("Param: {0}/{1}".format(i + 1, len(filtered_params))) clients = dict() groundtruth_accept_clients = list() groundtruth_reject_clients = list() light_signal, light_signal_time = light_analysis.load_light_pattern( param["len light pattern"]) coupling_data_provider = CouplingDataProvider(light_signal, light_signal_time, None, None) for _ in range(param["num clients"] - param["num reject clients"]): # accept client mac = create_random_mac() client = Client() client.light_signal, _ = coupling_data_provider.get_light_data( sampling_period_coupling) clients[mac] = client groundtruth_accept_clients.append(mac) #light_signal_random, light_signal_random_time = light_analysis.load_random_light_signal() #coupling_data_provider = CouplingDataProvider(light_signal_random, light_signal_random_time, None, None) datalen = len(light_signal) mean = light_signal.mean() std = light_signal.std() noise = numpy.random.normal(mean, std, datalen) coupling_data_provider = CouplingDataProvider(noise, light_signal_time, None, None) for _ in range(param["num reject clients"]): # reject client mac = create_random_mac() client = Client() client.light_signal, _ = coupling_data_provider.get_light_data( sampling_period_coupling) clients[mac] = client groundtruth_reject_clients.append(mac) for clf in Classifier: for sampling_period in sampling_periods: print("Classifier: ", clf) print("Sampling period: ", sampling_period) tsfresh_features = TsFreshFeatures() X_tsfresh = combined_raw_feature_data[sampling_period][ 0].X_tsfresh y_tsfresh = combined_raw_feature_data[sampling_period][ 0].y_tsfresh print("X: ", X_tsfresh.shape) print("X samples: ", len(X_tsfresh.id.unique())) print("y: ", y_tsfresh.shape) print("Extract features ...") X_selected_features = tsfresh_features.extract_selected_features( X_tsfresh, tsfresh_features_to_extract_selected) print("X selected: ", X_selected_features.shape) print("y: ", y_tsfresh.shape) print("Coupling simulation ...") ml_model = Classifier.get_clf(clf) print("Class 1: ", len(y_tsfresh[y_tsfresh == 1])) print("Class 0: ", len(y_tsfresh[y_tsfresh == 0])) ml_model = ml_model.fit(X_selected_features, y_tsfresh) accept_clients = set() reject_clients = set() for client_mac in clients.keys(): client_light_data = clients[client_mac].light_signal feature = tsfresh_features.extract_selected_features( client_light_data, tsfresh_features_to_extract_selected, True) print("Feature shape: ", feature.shape) result = ml_model.predict(feature) if result == 1.0: accept_clients.add(client_mac) else: reject_clients.add(client_mac) accept_clients = list(accept_clients) reject_clients = list(reject_clients) mac_mapping = { key: value for key, value in zip(range(len(clients)), clients.keys()) } result = StaticCouplingResult(accept_clients, reject_clients, groundtruth_accept_clients, groundtruth_reject_clients, None, mac_mapping) results[param["num clients"]][param["num reject clients"]] \ [param["len light pattern"]][clf.name][sampling_period].append(result) print("accept:") print("result:", accept_clients) print("ground truth: ", groundtruth_accept_clients) print(result.accuracy_accept) print("reject:") print("result: ", reject_clients) print("ground truth: ", groundtruth_reject_clients) print(result.accuracy_reject) print("ML cross validation ...") ml_model = Classifier.get_clf(clf) scores = cross_val_score(ml_model, X_selected_features, y_tsfresh, cv=10, n_jobs=-1) print("Scores: ", scores) print("------------------------------------------------------") DillSerializer(path_ml_offline_evaluation).serialize(results)
def analysis_runtime_tsfresh_selected_features(evaluate): data_path = os.path.join(__location__, "raw-results", "feature-selection", "tsfresh-selected-features-runtime") if evaluate: features_path = glob.glob( os.path.join(__location__, "raw-results", "feature-selection", "tsfresh-*-to-be-extracted-*")) features_path = sorted( features_path, key=lambda entry: int(os.path.basename(entry).split("-")[-1])) tsfresh_features = TsFreshFeatures() runtime = nested_dict(2, dict) for len_light_pattern in [2, 4, 6, 8, 10]: light_signal, light_signal_time = light_analysis.load_light_pattern( len_light_pattern) coupling_data_provider = CouplingDataProvider( light_signal, light_signal_time, None, None) sampling_period_coupling = get_pattern_max_sampling_period() light_signal, _ = coupling_data_provider.get_light_data( sampling_period_coupling) print("len light pattern: ", len_light_pattern) print("sampling period: ", sampling_period_coupling) print("len sample: ", len(light_signal)) for feature_path in features_path: num_features = int( os.path.basename(feature_path).split("-")[-1]) print("num features: ", num_features) features_to_extract = DillSerializer( feature_path).deserialize() start = time.time() X = tsfresh_features.extract_selected_features( light_signal, features_to_extract, True) end = time.time() print("feature shape: ", X.shape) assert num_features == X.shape[1] runtime[len_light_pattern][num_features] = end - start print("duration: ", end - start) DillSerializer(data_path).serialize(runtime) else: runtime = DillSerializer(data_path).deserialize() runtime_per_num_feature = defaultdict(list) len_light_patterns, num_features = get_all_keys(runtime) for len_light_pattern, num_feature in itertools.product( len_light_patterns, num_features): runtime_per_num_feature[num_feature].append( runtime[len_light_pattern][num_feature]) fig, ax = plt.subplots() num_features = sorted(runtime_per_num_feature.keys()) median_runtime = [ numpy.median(runtime_per_num_feature[num_feature]) for num_feature in num_features ] nth_feature = 10 ax.text(nth_feature + 0.3, median_runtime[nth_feature] + 0.015, round(median_runtime[nth_feature], 3)) ax.axvline(nth_feature, linestyle="--", color="black") ax.plot(num_features, median_runtime, label="Virtual Machine", marker="o", color="#1f77b4") ax.set_ylabel("Runtime (s)") ax.set_xlabel("Number of features") ax.set_xticks(num_features[::4] + [num_features[-1]]) ax.grid() ax.set_ylim(bottom=0, top=0.3) ax.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=1, mode="expand", borderaxespad=0.) filepath = os.path.join(__location__, "results", "feature-selection", "vm", "tsfresh-features-selected-runtime.pdf") result_path = os.path.dirname(filepath) if not os.path.exists(result_path): os.makedirs(result_path) fig.savefig(filepath, format="pdf", bbox_inches="tight") #plt.show() plt.close(fig)
def feature_selection(signal_pattern_combination, range_len_light_pattern=range(2, 11, 2), range_sampling_period=numpy.arange(0.03, 0.13, 0.01), rounds=10): if "single" in signal_pattern_combination: print("single type") basic_features_selection = nested_dict(3, list) tsfresh_features_selection = nested_dict(2, list) runtime_tsfresh_features = nested_dict(2, list) raw_feature_data = nested_dict(2, list) tsfresh_extracted_features = nested_dict(2, list) for len_light_pattern in range_len_light_pattern: for sampling_period in range_sampling_period: for i in range(rounds): print("round: ", i) sampling_period = round(sampling_period, 2) print("sampling period: ", sampling_period) data = LightData(sampling_period, [len_light_pattern]) basic_features = BasicFeatures() basic_features_extracted = basic_features.extract( data.X_basic) for clf in Classifier: if clf != Classifier.SVM: features_relevance = basic_features.relevance( clf, basic_features_extracted, data.y_basic) basic_features_selection[len_light_pattern][ sampling_period][clf.name].append( features_relevance) tsfresh_features = TsFreshFeatures() tsfresh_features_extracted, relevance_features = tsfresh_features.relevance( data.X_tsfresh, data.y_tsfresh) selected_features = tsfresh_features.select_n_most_useful_features( relevance_features) elapsed_times = tsfresh_features.performance_evaluation( tsfresh_features_extracted, relevance_features, data.X_tsfresh, rounds=1) runtime_tsfresh_features[len_light_pattern][ sampling_period].append(elapsed_times) tsfresh_features_selection[len_light_pattern][ sampling_period].append(selected_features) raw_feature_data[len_light_pattern][ sampling_period].append(data) tsfresh_extracted_features[len_light_pattern][ sampling_period].append(tsfresh_features_extracted) print("---") print("###") else: print("combined type") basic_features_selection = nested_dict(2, list) tsfresh_features_selection = nested_dict(1, list) runtime_tsfresh_features = nested_dict(1, list) raw_feature_data = nested_dict(1, list) tsfresh_extracted_features = nested_dict(1, list) for sampling_period in range_sampling_period: for i in range(rounds): print("round: ", i) sampling_period = round(sampling_period, 2) print("sampling period: ", sampling_period) data = LightData(sampling_period) basic_features = BasicFeatures() basic_features_extracted = basic_features.extract(data.X_basic) for clf in Classifier: if clf != Classifier.SVM: features_relevance = basic_features.relevance( clf, basic_features_extracted, data.y_basic) basic_features_selection[sampling_period][ clf.name].append(features_relevance) tsfresh_features = TsFreshFeatures() tsfresh_features_extracted, relevance_features = tsfresh_features.relevance( data.X_tsfresh, data.y_tsfresh) selected_features = tsfresh_features.select_n_most_useful_features( relevance_features) elapsed_times = tsfresh_features.performance_evaluation( tsfresh_features_extracted, relevance_features, data.X_tsfresh, rounds=1) runtime_tsfresh_features[sampling_period].append(elapsed_times) tsfresh_features_selection[sampling_period].append( selected_features) raw_feature_data[sampling_period].append(data) tsfresh_extracted_features[sampling_period].append( tsfresh_features_extracted) print("---") path_feature_selection = os.path.join(__location__, "raw-results", "feature-selection") DillSerializer( os.path.join(path_feature_selection, signal_pattern_combination + "-runtime-tsfresh")).serialize(runtime_tsfresh_features) DillSerializer( os.path.join(path_feature_selection, signal_pattern_combination + "-basic")).serialize(basic_features_selection) DillSerializer( os.path.join(path_feature_selection, signal_pattern_combination + "-tsfresh")).serialize(tsfresh_features_selection) path_ml_train_data = os.path.join(__location__, "..", "online", "ml-train-data") DillSerializer( os.path.join(path_ml_train_data, signal_pattern_combination + "-raw-feature-data")).serialize(raw_feature_data) DillSerializer( os.path.join( path_ml_train_data, signal_pattern_combination + "-tsfresh-features-extracted")).serialize( tsfresh_extracted_features)
def __init__(self, script, parameter, num_parameter): self.script = script self.parameter = parameter DillSerializer(path_evaluation_data).serialize( nested_dict(num_parameter, list))
def client_similarity_analysis(path_client_similarity, path_runtimes, nth_best, result_path, plot_format): def adapt_ticklabels(labels): return [label.replace("_", " ").capitalize() for label in labels] def plot_raw_similarities(plot_data, similarity_methods, equalize_methods): similarities = [list(similarites.values()) for similarites in plot_data.values()] fig, ax = plt.subplots() im = ax.imshow(similarities, cmap="jet", vmin=0, vmax=1) ax.set_xticks(numpy.arange(len(equalize_methods))) ax.set_yticks(numpy.arange(len(similarity_methods))) ax.set_xticklabels(adapt_ticklabels(equalize_methods)) ax.set_yticklabels(adapt_ticklabels(similarity_methods)) for i in range(len(similarity_methods)): for j in range(len(equalize_methods)): ax.text(j, i, round(similarities[i][j], 2), ha="center", va="center") ax.set_ylabel("Similarity") ax.set_xlabel("Equalize") ax.figure.colorbar(im) filename = "raw-similarities." + plot_format fig.savefig(os.path.join(result_path, filename), format=plot_format, bbox_inches="tight") #plt.show() plt.close(fig) def find_best_similarity_equalize_threshold(total_similarity, path_runtimes, round_factor=2): print("Best similarity equalize threshold") total_similarity = sorted(total_similarity.items(), key=lambda kv: numpy.mean(kv[1]), reverse=True) _, _, runtime_equalize_similarity_methods = get_runtime(path_runtimes) runtime_equalize_similarity_methods = dict(runtime_equalize_similarity_methods) best_similarity = dict() for similarity, metrics in total_similarity[:nth_best]: similarity_method, equalize_method, _ = similarity.split(":") runtime = runtime_equalize_similarity_methods[equalize_method + ":" + similarity_method] weight = 0.8 * numpy.mean(metrics) + 0.2 * (1-runtime) best_similarity[similarity] = round(weight, round_factor) print("Similarity / metrics / runtime (s):", similarity, numpy.round(metrics, round_factor), round(runtime, 4)) best_similarity = sorted(best_similarity.items(), key=lambda kv: kv[1], reverse=True) print("Weighted best results:", best_similarity) results = DillSerializer(path_client_similarity).deserialize() len_light_patterns1, len_light_patterns2, equalize_methods, similarity_methods = misc.get_all_keys(results) total_similarity = dict() plot_data = nested_dict(1, dict) for similarity_method in similarity_methods: for equalize_method in equalize_methods: y_true = list() similarities = list() for len_light_pattern1 in len_light_patterns1: for len_light_pattern2 in len_light_patterns2: if len_light_pattern1 in results and len_light_pattern2 in results[len_light_pattern1]: result = results[len_light_pattern1][len_light_pattern2][equalize_method][similarity_method] similarities.extend(result) y_true.extend(len(result) * [1 if len_light_pattern1 == len_light_pattern2 else 0]) plot_data[similarity_method][equalize_method] = numpy.median(similarities) assert len(similarities) == len(y_true) y_true = numpy.asarray(y_true) similarities = numpy.asarray(similarities) similarity_thresholds = numpy.arange(1, step=0.1) for similarity_threshold in similarity_thresholds: similarity_threshold = round(similarity_threshold, 1) y_pred = numpy.zeros(len(y_true)) y_pred[similarities >= similarity_threshold] = 1 acc = accuracy_score(y_true, y_pred) prec = precision_score(y_true, y_pred) rec = recall_score(y_true, y_pred) f1 = f1_score(y_true, y_pred) key = similarity_method + ":" + equalize_method + ":" + str(similarity_threshold) total_similarity[key] = [acc, prec, rec, f1] find_best_similarity_equalize_threshold(total_similarity, path_runtimes) plot_raw_similarities(plot_data, similarity_methods, equalize_methods)
def serialize(self, obj): f = open(self.path, "w+") json.dump(obj, f) def deserialize(self): f = open(self.path, "r") return json.loads(f.read()) if __name__ == "__main__": import pandas from utils.nested_dict import nested_dict from coupling.device_grouping.online.machine_learning_features import Classifier basic_features_selection = nested_dict(3, dict) for clf in Classifier: basic_features_selection[2][0.05][clf] = pandas.DataFrame({ 'feature': ["length", "max", "mean", "median", "min", "std", "sum", "var"], 'relative_importance': [ 0.000000, 4.416329, 5.198687, 5.364500, 3.102737, 3.586680, 5.439479, 2.891588 ] }) data = list() data.append("a") data.append("b") data.append("c")