示例#1
0
def evaluate_similarity_runtime(len_light_patterns, path_similarity, path_runtime, rounds):
    results_runtime = nested_dict(4, list)
    results_similarity = nested_dict(4, list)
    same = list(zip(len_light_patterns, len_light_patterns))
    combined = list(itertools.combinations(len_light_patterns, 2))
    pattern_conbination = same + combined
    for len_light_pattern1, len_light_pattern2 in pattern_conbination:
        print("from-to:", len_light_pattern1, len_light_pattern2)
        for run in range(rounds):
            print("round: ", run)
            client1, client2 = get_light_signals([len_light_pattern1, len_light_pattern2])
            for equalize_method in [vector_similarity.equalize_methods.fill,
                                    vector_similarity.equalize_methods.cut,
                                    vector_similarity.equalize_methods.dtw]:
                print("equalize:", equalize_method)
                for similarity_method in vector_similarity.similarity_methods:
                    print("similarity:", similarity_method.__name__)
                    start_time = time.time()
                    similarity = similarity_method(client1.signal, client2.signal, equalize_method)
                    elapsed_time = time.time() - start_time
                    assert elapsed_time > 0
                    results_similarity[len_light_pattern1][len_light_pattern2][equalize_method][similarity_method.__name__].append(
                        similarity)
                    results_runtime[len_light_pattern1][len_light_pattern2][equalize_method][similarity_method.__name__].append(
                        elapsed_time)
    DillSerializer(path_similarity).serialize(results_similarity)
    DillSerializer(path_runtime).serialize(results_runtime)
示例#2
0
def data_preprocessing(testbeds, scaling):
    baseline_data = nested_dict(2, dict)
    testbed_data = nested_dict(2, dict)
    for operation in ["euclidean", "cosine", "initialisation"]:
        for testbed in testbeds:
            data_paths = glob.glob(
                os.path.join(__location__, "raw-result",
                             "*" + testbed + "*.csv"))
            baseline_path = [path for path in data_paths if "lbs" in path]
            assert len(baseline_path) == 1
            data_paths.remove(baseline_path[0])
            baseline = load_data(baseline_path)
            he_libraries = numpy.sort(baseline.library.unique())
            for he_library in he_libraries:
                he_library_baseline = baseline[baseline.library == he_library]
                he_library_baseline_mean = he_library_baseline[operation].mean(
                ) / scaling["conversion"]
                he_library_baseline_std = he_library_baseline[operation].std(
                ) / scaling["conversion"]
                he_library_baseline_median = he_library_baseline[
                    operation].median() / scaling["conversion"]
                feature_lengths = he_library_baseline.vectorLength.unique()
                assert len(feature_lengths) == 1
                baseline_data[operation][testbed][he_library] = (
                    feature_lengths[0], he_library_baseline_mean,
                    he_library_baseline_std, he_library_baseline_median)

                df = load_data(data_paths)
                he_data = df[df.library == he_library]
                feature_lengths = list()
                he_library_mean = pandas.DataFrame()
                he_library_std = pandas.DataFrame()
                he_library_median = pandas.DataFrame()
                for feature_length, data in he_data.groupby("vectorLength"):
                    feature_lengths.append(feature_length)
                    he_library_std = he_library_std.append(
                        data.std() / scaling["conversion"], ignore_index=True)
                    he_library_mean = he_library_mean.append(
                        data.mean() / scaling["conversion"], ignore_index=True)
                    he_library_median = he_library_median.append(
                        data.median() / scaling["conversion"],
                        ignore_index=True)
                he_library_mean = he_library_mean[operation]
                he_library_std = he_library_std[operation]
                he_library_median = he_library_median[operation]
                testbed_data[operation][testbed][he_library] = (
                    feature_lengths, he_library_mean, he_library_std,
                    he_library_median)
    return baseline_data, testbed_data
示例#3
0
 def __init__(self, access_point, data_period_coupling,
              coupling_compare_method, coupling_similarity_threshold,
              equalize_method, data_period_localization, num_clients, rooms,
              frequency_coupling, stop_reactor_callback, evaluate_callback):
     self.processing_clients = list()
     self.connected_clients = AtomicCounter(num_clients)
     self.evaluation_coupling = nested_dict(3, list)
     self.evaluation_runtime = defaultdict(list)
     self.rooms = rooms
     self.frequency_coupling = frequency_coupling
     self.access_point = access_point
     self.data_period_coupling = data_period_coupling
     self.coupling_compare_method = coupling_compare_method
     self.coupling_similarity_threshold = coupling_similarity_threshold
     self.equalize_method = equalize_method
     self.basic_features = BasicFeatures()
     self.tsfresh_features = TsFreshFeatures()
     self.data_period_localization = data_period_localization
     self.stop_reactor_callback = stop_reactor_callback
     self.evaluate_callback = evaluate_callback
示例#4
0
def evaluate_impact_signal_distortion(
        len_light_patterns, distortion_rates, path_distorted_light_signals, path_distortion_similarity, rounds):
    
    distorted_light_signals = defaultdict(list)
    results_distortion_similarity = nested_dict(3, list)
    for run in range(rounds):
        print("round: ", run)
        for len_light_pattern in len_light_patterns:
            print("len light pattern:", len_light_pattern)            
            equalize_method = "dummy"
            client = get_light_signals([len_light_pattern])[0]
            distorted_light_signals[len_light_pattern].append(client)
            for distortion_rate in distortion_rates:
                print("distortion rate: ", distortion_rate)
                for similarity_method in vector_similarity.similarity_methods:
                    distorted_light_signal = client.get_distorted_light_signal(distortion_rate)
                    similarity = similarity_method(client.signal, distorted_light_signal, equalize_method)                    
                    if distortion_rate == 0:
                        assert numpy.array_equal(client.signal, distorted_light_signal)
                        assert similarity >= 0.98
                    results_distortion_similarity[len_light_pattern][distortion_rate][similarity_method.__name__].append(similarity)
    DillSerializer(path_distortion_similarity).serialize(results_distortion_similarity)
    DillSerializer(path_distorted_light_signals).serialize(distorted_light_signals)
示例#5
0
 def process_data(evaluation_data):
     
     def find_best_per_params(metric_results):
         best_params = list()
         features, coupling_methods, len_light_patterns, num_users = misc.get_all_keys(metric_results)
         for feature in features:
             per_feature_results = dict()
             for coupling_method, len_light_pattern, num_user in itertools.product(coupling_methods, len_light_patterns, num_users):
                 result = metric_results[feature][coupling_method][len_light_pattern][num_user]
                 if len(result) > 0:
                     key = coupling_method + "-" + str(len_light_pattern) + "-" + str(num_user)
                     per_feature_results[key] = numpy.mean(result)
             per_feature_selection = sorted(per_feature_results.items(), key=lambda kv: kv[1], reverse=True)
             best_param = per_feature_selection[0][0].split("-")
             coupling_method = best_param[0]
             len_light_pattern = int(best_param[1])
             num_user = int(best_param[2])
             best_params.append((feature, coupling_method, len_light_pattern, num_user))
         return best_params
     
     def get_metrics(result):
         accuracy = [result.accuracy_accept, result.accuracy_reject]
         precision = [result.precision_accept, result.precision_reject]
         recall = [result.recall_accept, result.recall_reject]
         f1 = [result.f1_accept, result.f1_reject]
         return (accuracy, precision, recall, f1), result.runtime
     
     def save_result(results, runtime_query_data, metric_results, runtime_results,
                     feature, coupling_method, len_light_pattern, num_client):
         metrics, runtime_coupling = get_metrics(results)
         metric_results[feature][coupling_method][len_light_pattern][num_client].append(metrics)
         runtime_results[feature][coupling_method][len_light_pattern][num_client].append((runtime_query_data, runtime_coupling))
     
     num_clients, num_reject_clients, len_light_patterns, \
         sampling_period_couplings, coupling_compare_methods, \
         coupling_similarity_thresholds, equalize_methods, \
         sampling_period_localizations, sampling_period_ml_trains, \
         coupling_ml_classifiers = misc.get_all_keys(evaluation_data)
     
     print("############### Static simulation ###############")
     print("Num clients: ", num_clients)
     print("Num reject clients: ", num_reject_clients)
     print("Len light patterns: ", len_light_patterns)
     print("Sampling period couplings: ", sampling_period_couplings)
     print("Coupling compare methods: ", coupling_compare_methods)
     print("Coupling similarity thresholds: ", coupling_similarity_thresholds)
     print("Equalize methods: ", equalize_methods)
     print("Sampling period localizations: ", sampling_period_localizations)
     print("Sampling period ML trains: ", sampling_period_ml_trains)
     print("Coupling ML classifiers: ", coupling_ml_classifiers)
     
     similarity_metrics = nested_dict(4, list)
     machine_learning_metrics = nested_dict(4, list)
     localization_metrics = nested_dict(4, list)
     
     similarity_runtime = nested_dict(4, list)
     localization_runtime = nested_dict(4, list)
     machine_learning_runtime = nested_dict(4, list)
     
     for num_client, num_reject_client, len_light_pattern, sampling_period_coupling, \
         coupling_compare_method, coupling_similarity_threshold, equalize_method, \
         sampling_period_localization, sampling_period_ml_train, coupling_ml_classifier in itertools.product(
         num_clients, num_reject_clients, len_light_patterns, sampling_period_couplings,
         coupling_compare_methods, coupling_similarity_thresholds, equalize_methods,
         sampling_period_localizations, sampling_period_ml_trains, coupling_ml_classifiers):
         
         results = evaluation_data[num_client][num_reject_client][len_light_pattern] \
             [sampling_period_coupling][coupling_compare_method] \
             [coupling_similarity_threshold][equalize_method] \
             [sampling_period_localization][sampling_period_ml_train][coupling_ml_classifier]
         
         if len(results) > 0:
             for result in results:
                 #result.runtime_coupling
                 #result.runtime_query_data
                 
                 # localization
                 feature = "ble"
                 save_result(result.localization_random_forest_ble, result.runtime_query_raw_ble,
                             localization_metrics, localization_runtime, feature, "random forest", len_light_pattern, num_client)
                 save_result(result.localization_filtering_ble, result.runtime_query_raw_ble,
                             localization_metrics, localization_runtime, feature, "filtering", len_light_pattern, num_client)
                 save_result(result.localization_svm_ble, result.runtime_query_raw_ble,
                             localization_metrics, localization_runtime, feature, "svm", len_light_pattern, num_client)
                 
                 feature = "wifi"
                 save_result(result.localization_random_forest_wifi, result.runtime_query_raw_wifi,
                             localization_metrics, localization_runtime, feature, "random forest", len_light_pattern, num_client)
                 save_result(result.localization_filtering_wifi, result.runtime_query_raw_wifi,
                             localization_metrics, localization_runtime, feature, "filtering", len_light_pattern, num_client)
                 save_result(result.localization_svm_wifi, result.runtime_query_raw_wifi,
                             localization_metrics, localization_runtime, feature, "svm", len_light_pattern, num_client)
                 
                 # similarity metrics
                 save_result(result.coupling_signal_pattern, result.runtime_query_pattern_light,
                             similarity_metrics, similarity_runtime, "signal pattern", coupling_compare_method, len_light_pattern, num_client)
                 save_result(result.coupling_signal_pattern_duration, result.runtime_query_pattern_light,
                             similarity_metrics, similarity_runtime, "signal pattern duration", coupling_compare_method, len_light_pattern, num_client)
                 
                 save_result(result.coupling_signal_similarity, result.runtime_query_raw_light,
                             similarity_metrics, similarity_runtime, "signal similarity", coupling_compare_method, len_light_pattern, num_client)
                 
                 save_result(result.coupling_machine_learning_basic_all, result.runtime_query_raw_light,
                             machine_learning_metrics, machine_learning_runtime, "basic all", coupling_ml_classifier, len_light_pattern, num_client)
                 save_result(result.coupling_machine_learning_basic_selected, result.runtime_query_raw_light,
                             machine_learning_metrics, machine_learning_runtime, "basic selected", coupling_ml_classifier, len_light_pattern, num_client)
                 save_result(result.coupling_machine_learning_tsfresh_selected, result.runtime_query_raw_light,
                             machine_learning_metrics, machine_learning_runtime, "tsfresh selected", coupling_ml_classifier, len_light_pattern, num_client)
     
     best_ml = [(feature, coupling, len_light_pattern, num_user, machine_learning_metrics) for feature, coupling, len_light_pattern, num_user in find_best_per_params(machine_learning_metrics)]
     best_similarity = [(feature, coupling, len_light_pattern, num_user, similarity_metrics) for feature, coupling, len_light_pattern, num_user in find_best_per_params(similarity_metrics)]
     best_localization = [(feature, coupling, len_light_pattern, num_user, localization_metrics) for feature, coupling, len_light_pattern, num_user in find_best_per_params(localization_metrics)]
     return best_similarity, similarity_runtime, best_ml, machine_learning_runtime, best_localization, localization_runtime, len_light_patterns, num_clients
示例#6
0
 def process_data(evaluation_data):
     
     def get_results(results):
         accuracy = [result.accuracy for result in results if result.accuracy >= 0]
         precision = [result.precision for result in results if result.precision >= 0]
         recall = [result.recall for result in results if result.recall >= 0]
         f1 = [result.f1 for result in results if result.f1 >= 0]
         runtime = [result.runtime for result in results if result.runtime > 0]
         return (accuracy, precision, recall, f1), misc.flatten_list(runtime)
     
     def save_result(result, metric_results, runtime_results, coupling_ident, runtime_ident,
                     feature, coupling_method, num_user, coupling_frequency, num_room):
         metrics, runtime = get_results(result.coupling[coupling_ident])
         missing_metric = 0 in [len(metric) for metric in metrics]
         if not missing_metric: # remove empty result
             metric_results[feature][coupling_method][num_user][coupling_frequency][num_room].append(metrics)
             runtime_results[feature][coupling_method][num_user][coupling_frequency][num_room].append((result.runtime[runtime_ident], runtime))
     
     def find_best_per_params(metric_results):
         best_params = list()
         features, coupling_methods, num_users, coupling_frequencies, num_rooms = misc.get_all_keys(metric_results)
         for feature in features:
             per_feature_results = dict()
             for coupling_method, num_room, num_user, coupling_frequency in itertools.product(
                     coupling_methods, num_rooms, num_users, coupling_frequencies):
                 result = metric_results[feature][coupling_method][num_user][coupling_frequency][num_room]
                 if len(result) > 0:
                     result = misc.flatten_list(misc.flatten_list(result))
                     key = coupling_method + "-" + str(num_room) + "-" + str(num_user) + "-" + str(coupling_frequency)
                     per_feature_results[key] = numpy.mean(result)
             per_feature_results = sorted(per_feature_results.items(), key=lambda kv: kv[1], reverse=True)
             idx = numpy.where(numpy.asarray([metric for _, metric in per_feature_results])!=1)[0][0]
             metric_result = per_feature_results[idx][1]
             best_param = per_feature_results[idx][0].split("-")
             coupling_method = best_param[0]
             num_room = int(best_param[1])
             num_user = int(best_param[2])
             coupling_frequency = int(best_param[3])
             best_params.append((feature, coupling_method, num_room, num_user, coupling_frequency, metric_result))
         return best_params
     
     sampling_period_couplings, coupling_compare_methods, \
         coupling_similarity_thresholds, equalize_methods, \
         sampling_period_localizations, sampling_period_ml_trains, \
         coupling_ml_classifiers, num_users, num_rooms, \
         simulation_durations, coupling_frequencies = misc.get_all_keys(evaluation_data)
     
     print("############### Dynamic simulation ###############")
     print("Num users: ", num_users)
     print("Num rooms: ", num_rooms)
     print("Simulation duration: ", simulation_durations)
     print("Coupling frequency: ", coupling_frequencies)
     print("Sampling period couplings: ", sampling_period_couplings)
     print("Coupling compare methods: ", coupling_compare_methods)
     print("Coupling similarity thresholds: ", coupling_similarity_thresholds)
     print("Equalize methods: ", equalize_methods)
     print("Sampling period localizations: ", sampling_period_localizations)
     print("Sampling period ML trains: ", sampling_period_ml_trains)
     print("Coupling ML classifiers: ", coupling_ml_classifiers)
     
     similarity_metrics = nested_dict(5, list)
     machine_learning_metrics = nested_dict(5, list)
     localization_metrics = nested_dict(5, list)
             
     similarity_runtime = nested_dict(5, list)
     machine_learning_runtime = nested_dict(5, list)
     localization_runtime = nested_dict(5, list)
     
     for sampling_period_coupling, coupling_compare_method, \
         coupling_similarity_threshold, equalize_method, \
         sampling_period_localization, sampling_period_ml_train, \
         coupling_ml_classifier, num_user, num_room, \
         simulation_duration, coupling_frequency in itertools.product(
             sampling_period_couplings, coupling_compare_methods, coupling_similarity_thresholds,
             equalize_methods, sampling_period_localizations, sampling_period_ml_trains,
             coupling_ml_classifiers, num_users, num_rooms, simulation_durations, coupling_frequencies):
         
         results = evaluation_data[sampling_period_coupling][coupling_compare_method] \
             [coupling_similarity_threshold][equalize_method] \
             [sampling_period_localization][sampling_period_ml_train] \
             [coupling_ml_classifier][num_user][num_room] \
             [simulation_duration][coupling_frequency]
         
         if len(results) > 0:
             for result in results:
                 # localization
                 feature = "ble"
                 save_result(result, localization_metrics, localization_runtime, "loc Random Forest BLE", "time query raw ble",
                             feature, "random forest", num_user, coupling_frequency, num_room)
                 
                 save_result(result, localization_metrics, localization_runtime, "loc filtering BLE", "time query raw ble",
                             feature, "filtering", num_user, coupling_frequency, num_room)
                 
                 save_result(result, localization_metrics, localization_runtime, "loc SVM BLE", "time query raw ble",
                             feature, "svm", num_user, coupling_frequency, num_room)
                 
                 feature = "wifi"
                 save_result(result, localization_metrics, localization_runtime, "loc Random Forest WiFi", "time query raw wifi",
                             feature, "random forest", num_user, coupling_frequency, num_room)
                 
                 save_result(result, localization_metrics, localization_runtime, "loc filtering WiFi", "time query raw wifi",
                             feature, "filtering", num_user, coupling_frequency, num_room)
                 
                 save_result(result, localization_metrics, localization_runtime, "loc SVM WiFi", "time query raw wifi",
                             feature, "svm", num_user, coupling_frequency, num_room)
                 
                 # similarity metrics
                 feature = "signal pattern"
                 save_result(result, similarity_metrics, similarity_runtime, feature, "time query pattern light",
                             feature, coupling_compare_method, num_user, coupling_frequency, num_room)
                 
                 feature = "signal pattern duration"
                 save_result(result, similarity_metrics, similarity_runtime, feature, "time query pattern light", 
                             feature, coupling_compare_method, num_user, coupling_frequency, num_room)
                 
                 feature = "signal similarity"
                 save_result(result, similarity_metrics, similarity_runtime, feature, "time query raw light",
                             feature, coupling_compare_method, num_user, coupling_frequency, num_room)
                 
                 # machine learning
                 save_result(result, machine_learning_metrics, machine_learning_runtime, "ml basic all features",
                             "time query raw light", "basic all", coupling_ml_classifier, num_user, coupling_frequency, num_room)
                 
                 save_result(result, machine_learning_metrics, machine_learning_runtime, "ml basic selected features",
                             "time query raw light", "basic selected", coupling_ml_classifier, num_user, coupling_frequency, num_room)
                 
                 save_result(result, machine_learning_metrics, machine_learning_runtime, "ml tsfresh selected features",
                             "time query raw light", "tsfresh selected", coupling_ml_classifier, num_user, coupling_frequency, num_room)
     
     machine_learning_params = find_best_per_params(machine_learning_metrics)
     similarity_params = find_best_per_params(similarity_metrics)
     localization_params = find_best_per_params(localization_metrics)
     best_machine_learning = [(feature, coupling_method, num_room, num_user, coupling_frequency, machine_learning_metrics) for feature, coupling_method, num_room, num_user, coupling_frequency, _ in machine_learning_params]
     best_similarity = [(feature, coupling_method, num_room, num_user, coupling_frequency, similarity_metrics) for feature, coupling_method, num_room, num_user, coupling_frequency, _ in similarity_params]
     best_localization = [(feature, coupling_method, num_room, num_user, coupling_frequency, localization_metrics) for feature, coupling_method, num_room, num_user, coupling_frequency, _ in localization_params]
     return best_similarity, similarity_runtime, similarity_params, \
         best_machine_learning, machine_learning_runtime, machine_learning_params,  \
         best_localization, localization_runtime, num_users, localization_params, \
         coupling_frequencies, num_rooms
示例#7
0
def offline_analysis_ml_model(path_ml_offline_evaluation):
    evaluation_data = DillSerializer(path_ml_offline_evaluation).deserialize()
    num_clients, num_reject_clients, len_light_patterns, \
        classifiers, sampling_periods = misc.get_all_keys(evaluation_data)
    analysis_result = nested_dict(2, list)
    for num_client, num_reject_client, len_light_pattern, classifier, sampling_period in itertools.product(
            num_clients, num_reject_clients, len_light_patterns, classifiers,
            sampling_periods):
        results = evaluation_data[num_client][num_reject_client][
            len_light_pattern][classifier][sampling_period]
        if len(results) > 0:
            analysis_result[classifier][sampling_period].extend(results)

    print("Num clients: ", num_clients)
    print("Num reject clients: ", num_reject_clients)
    print("Len light patterns: ", len_light_patterns)
    print("Classifiers: ", classifiers)
    print("Sampling periods: ", sampling_periods)

    for classifier in classifiers:
        results = analysis_result[classifier]
        sub_results = list()
        for sampling_period in sampling_periods:
            accuracy = [entry.accuracy_accept for entry in results[sampling_period]] + \
                [entry.accuracy_reject for entry in results[sampling_period]]
            precision = [entry.precision_accept for entry in results[sampling_period]] + \
                [entry.precision_reject for entry in results[sampling_period]]
            recall = [entry.recall_accept for entry in results[sampling_period]] + \
                [entry.recall_reject for entry in results[sampling_period]]
            f1 = [entry.f1_accept for entry in results[sampling_period]] + \
                [entry.f1_reject for entry in results[sampling_period]]

            entry = [
                numpy.mean(accuracy),
                numpy.mean(precision),
                numpy.mean(recall),
                numpy.mean(f1)
            ]
            entry = [round(value, 2) for value in entry]
            sub_results.append(entry)

        fig, ax = plt.subplots()
        ax.imshow(sub_results,
                  cmap="Greens",
                  aspect="auto",
                  interpolation="nearest",
                  vmin=0,
                  vmax=1.4)
        ax.set_ylabel("Sampling period (ms)")
        ytickpos = numpy.arange(len(sampling_periods))
        ax.set_yticks(ytickpos)
        ax.set_yticklabels([
            int(sampling_period * 1e3) for sampling_period in sampling_periods
        ])
        xticks = ["Accuracy", "Precision", "Recall", "F1-score"]
        xtickpos = range(len(xticks))
        ax.set_xticks(xtickpos)
        ax.set_xticklabels(xticks, rotation=20, ha="right")
        for i in range(len(sub_results)):
            for j in range(len(sub_results[0])):
                ax.text(j, i, sub_results[i][j], ha="center", va="center")
        ticks = [
            start + ((end - start) / 2)
            for start, end in misc.pairwise(xtickpos)
        ]
        ax.set_xticks(ticks, minor=True)
        ticks = [
            start + ((end - start) / 2)
            for start, end in misc.pairwise(ytickpos)
        ]
        ax.set_yticks(ticks, minor=True)
        ax.grid(which='minor', color="black")
        filepath = os.path.join(__location__, "results", "machine-learning",
                                "vm",
                                "ml-param-" + classifier.lower() + ".pdf")
        result_path = os.path.dirname(filepath)
        if not os.path.exists(result_path):
            os.makedirs(result_path)
        fig.savefig(filepath, format="pdf", bbox_inches="tight")
        #plt.show()
        plt.close(fig)
示例#8
0
def offline_test_ml_model(path_ml_offline_evaluation):
    def filter_params(param_grid):
        filtered_params = list()
        for param in param_grid:
            if param["num clients"] - param["num reject clients"] >= 2:
                filtered_params.append(param)
        return filtered_params

    testbed = "vm"
    path_ml_train_data = os.path.join(__location__, "..", "online",
                                      "ml-train-data", testbed)
    combined_raw_feature_data = glob.glob(
        os.path.join(path_ml_train_data, "combined-*-raw-feature-data"))[0]
    combined_raw_feature_data = DillSerializer(
        combined_raw_feature_data).deserialize()
    tsfresh_features_to_extract_selected = os.path.join(
        __location__, "..", "online", "tsfresh-features-to-be-extracted")
    tsfresh_features_to_extract_selected = DillSerializer(
        tsfresh_features_to_extract_selected).deserialize()
    sampling_periods = sorted(combined_raw_feature_data.keys())

    num_clients = 10
    num_reject_clients = range(num_clients - 1)
    num_clients = range(2, num_clients + 1)
    len_light_patterns = range(2, 11, 2)
    param_grid = ParameterGrid({
        "num clients": num_clients,
        "num reject clients": num_reject_clients,
        "len light pattern": len_light_patterns
    })
    sampling_period_coupling = get_pattern_max_sampling_period()
    filtered_params = filter_params(param_grid)
    results = nested_dict(5, list)
    for i, param in enumerate(filtered_params):
        print("Param: {0}/{1}".format(i + 1, len(filtered_params)))
        clients = dict()
        groundtruth_accept_clients = list()
        groundtruth_reject_clients = list()
        light_signal, light_signal_time = light_analysis.load_light_pattern(
            param["len light pattern"])
        coupling_data_provider = CouplingDataProvider(light_signal,
                                                      light_signal_time, None,
                                                      None)
        for _ in range(param["num clients"] -
                       param["num reject clients"]):  # accept client
            mac = create_random_mac()
            client = Client()
            client.light_signal, _ = coupling_data_provider.get_light_data(
                sampling_period_coupling)
            clients[mac] = client
            groundtruth_accept_clients.append(mac)

        #light_signal_random, light_signal_random_time = light_analysis.load_random_light_signal()
        #coupling_data_provider = CouplingDataProvider(light_signal_random, light_signal_random_time, None, None)

        datalen = len(light_signal)
        mean = light_signal.mean()
        std = light_signal.std()
        noise = numpy.random.normal(mean, std, datalen)
        coupling_data_provider = CouplingDataProvider(noise, light_signal_time,
                                                      None, None)
        for _ in range(param["num reject clients"]):  # reject client
            mac = create_random_mac()
            client = Client()
            client.light_signal, _ = coupling_data_provider.get_light_data(
                sampling_period_coupling)
            clients[mac] = client
            groundtruth_reject_clients.append(mac)

        for clf in Classifier:
            for sampling_period in sampling_periods:
                print("Classifier: ", clf)
                print("Sampling period: ", sampling_period)
                tsfresh_features = TsFreshFeatures()
                X_tsfresh = combined_raw_feature_data[sampling_period][
                    0].X_tsfresh
                y_tsfresh = combined_raw_feature_data[sampling_period][
                    0].y_tsfresh
                print("X: ", X_tsfresh.shape)
                print("X samples: ", len(X_tsfresh.id.unique()))
                print("y: ", y_tsfresh.shape)
                print("Extract features ...")
                X_selected_features = tsfresh_features.extract_selected_features(
                    X_tsfresh, tsfresh_features_to_extract_selected)
                print("X selected: ", X_selected_features.shape)
                print("y: ", y_tsfresh.shape)

                print("Coupling simulation ...")
                ml_model = Classifier.get_clf(clf)
                print("Class 1: ", len(y_tsfresh[y_tsfresh == 1]))
                print("Class 0: ", len(y_tsfresh[y_tsfresh == 0]))
                ml_model = ml_model.fit(X_selected_features, y_tsfresh)
                accept_clients = set()
                reject_clients = set()
                for client_mac in clients.keys():
                    client_light_data = clients[client_mac].light_signal
                    feature = tsfresh_features.extract_selected_features(
                        client_light_data,
                        tsfresh_features_to_extract_selected, True)
                    print("Feature shape: ", feature.shape)
                    result = ml_model.predict(feature)
                    if result == 1.0:
                        accept_clients.add(client_mac)
                    else:
                        reject_clients.add(client_mac)
                accept_clients = list(accept_clients)
                reject_clients = list(reject_clients)
                mac_mapping = {
                    key: value
                    for key, value in zip(range(len(clients)), clients.keys())
                }
                result = StaticCouplingResult(accept_clients, reject_clients,
                                              groundtruth_accept_clients,
                                              groundtruth_reject_clients, None,
                                              mac_mapping)
                results[param["num clients"]][param["num reject clients"]] \
                    [param["len light pattern"]][clf.name][sampling_period].append(result)
                print("accept:")
                print("result:", accept_clients)
                print("ground truth: ", groundtruth_accept_clients)
                print(result.accuracy_accept)
                print("reject:")
                print("result: ", reject_clients)
                print("ground truth: ", groundtruth_reject_clients)
                print(result.accuracy_reject)
                print("ML cross validation ...")
                ml_model = Classifier.get_clf(clf)
                scores = cross_val_score(ml_model,
                                         X_selected_features,
                                         y_tsfresh,
                                         cv=10,
                                         n_jobs=-1)
                print("Scores: ", scores)
                print("------------------------------------------------------")
        DillSerializer(path_ml_offline_evaluation).serialize(results)
示例#9
0
def analysis_runtime_tsfresh_selected_features(evaluate):
    data_path = os.path.join(__location__, "raw-results", "feature-selection",
                             "tsfresh-selected-features-runtime")
    if evaluate:
        features_path = glob.glob(
            os.path.join(__location__, "raw-results", "feature-selection",
                         "tsfresh-*-to-be-extracted-*"))
        features_path = sorted(
            features_path,
            key=lambda entry: int(os.path.basename(entry).split("-")[-1]))
        tsfresh_features = TsFreshFeatures()
        runtime = nested_dict(2, dict)
        for len_light_pattern in [2, 4, 6, 8, 10]:
            light_signal, light_signal_time = light_analysis.load_light_pattern(
                len_light_pattern)
            coupling_data_provider = CouplingDataProvider(
                light_signal, light_signal_time, None, None)
            sampling_period_coupling = get_pattern_max_sampling_period()
            light_signal, _ = coupling_data_provider.get_light_data(
                sampling_period_coupling)
            print("len light pattern: ", len_light_pattern)
            print("sampling period: ", sampling_period_coupling)
            print("len sample: ", len(light_signal))
            for feature_path in features_path:
                num_features = int(
                    os.path.basename(feature_path).split("-")[-1])
                print("num features: ", num_features)
                features_to_extract = DillSerializer(
                    feature_path).deserialize()
                start = time.time()
                X = tsfresh_features.extract_selected_features(
                    light_signal, features_to_extract, True)
                end = time.time()
                print("feature shape: ", X.shape)
                assert num_features == X.shape[1]
                runtime[len_light_pattern][num_features] = end - start
                print("duration: ", end - start)
            DillSerializer(data_path).serialize(runtime)
    else:
        runtime = DillSerializer(data_path).deserialize()
        runtime_per_num_feature = defaultdict(list)
        len_light_patterns, num_features = get_all_keys(runtime)
        for len_light_pattern, num_feature in itertools.product(
                len_light_patterns, num_features):
            runtime_per_num_feature[num_feature].append(
                runtime[len_light_pattern][num_feature])
        fig, ax = plt.subplots()
        num_features = sorted(runtime_per_num_feature.keys())
        median_runtime = [
            numpy.median(runtime_per_num_feature[num_feature])
            for num_feature in num_features
        ]
        nth_feature = 10
        ax.text(nth_feature + 0.3, median_runtime[nth_feature] + 0.015,
                round(median_runtime[nth_feature], 3))
        ax.axvline(nth_feature, linestyle="--", color="black")
        ax.plot(num_features,
                median_runtime,
                label="Virtual Machine",
                marker="o",
                color="#1f77b4")
        ax.set_ylabel("Runtime (s)")
        ax.set_xlabel("Number of features")
        ax.set_xticks(num_features[::4] + [num_features[-1]])
        ax.grid()
        ax.set_ylim(bottom=0, top=0.3)
        ax.legend(bbox_to_anchor=(0., 1.02, 1., .102),
                  loc=3,
                  ncol=1,
                  mode="expand",
                  borderaxespad=0.)
        filepath = os.path.join(__location__, "results", "feature-selection",
                                "vm", "tsfresh-features-selected-runtime.pdf")
        result_path = os.path.dirname(filepath)
        if not os.path.exists(result_path):
            os.makedirs(result_path)
        fig.savefig(filepath, format="pdf", bbox_inches="tight")
        #plt.show()
        plt.close(fig)
示例#10
0
def feature_selection(signal_pattern_combination,
                      range_len_light_pattern=range(2, 11, 2),
                      range_sampling_period=numpy.arange(0.03, 0.13, 0.01),
                      rounds=10):

    if "single" in signal_pattern_combination:
        print("single type")
        basic_features_selection = nested_dict(3, list)
        tsfresh_features_selection = nested_dict(2, list)
        runtime_tsfresh_features = nested_dict(2, list)
        raw_feature_data = nested_dict(2, list)
        tsfresh_extracted_features = nested_dict(2, list)
        for len_light_pattern in range_len_light_pattern:
            for sampling_period in range_sampling_period:
                for i in range(rounds):
                    print("round: ", i)
                    sampling_period = round(sampling_period, 2)
                    print("sampling period: ", sampling_period)
                    data = LightData(sampling_period, [len_light_pattern])
                    basic_features = BasicFeatures()
                    basic_features_extracted = basic_features.extract(
                        data.X_basic)
                    for clf in Classifier:
                        if clf != Classifier.SVM:
                            features_relevance = basic_features.relevance(
                                clf, basic_features_extracted, data.y_basic)
                            basic_features_selection[len_light_pattern][
                                sampling_period][clf.name].append(
                                    features_relevance)

                    tsfresh_features = TsFreshFeatures()
                    tsfresh_features_extracted, relevance_features = tsfresh_features.relevance(
                        data.X_tsfresh, data.y_tsfresh)
                    selected_features = tsfresh_features.select_n_most_useful_features(
                        relevance_features)
                    elapsed_times = tsfresh_features.performance_evaluation(
                        tsfresh_features_extracted,
                        relevance_features,
                        data.X_tsfresh,
                        rounds=1)
                    runtime_tsfresh_features[len_light_pattern][
                        sampling_period].append(elapsed_times)
                    tsfresh_features_selection[len_light_pattern][
                        sampling_period].append(selected_features)

                    raw_feature_data[len_light_pattern][
                        sampling_period].append(data)
                    tsfresh_extracted_features[len_light_pattern][
                        sampling_period].append(tsfresh_features_extracted)
                    print("---")
            print("###")
    else:
        print("combined type")
        basic_features_selection = nested_dict(2, list)
        tsfresh_features_selection = nested_dict(1, list)
        runtime_tsfresh_features = nested_dict(1, list)
        raw_feature_data = nested_dict(1, list)
        tsfresh_extracted_features = nested_dict(1, list)
        for sampling_period in range_sampling_period:
            for i in range(rounds):
                print("round: ", i)
                sampling_period = round(sampling_period, 2)
                print("sampling period: ", sampling_period)
                data = LightData(sampling_period)
                basic_features = BasicFeatures()
                basic_features_extracted = basic_features.extract(data.X_basic)
                for clf in Classifier:
                    if clf != Classifier.SVM:
                        features_relevance = basic_features.relevance(
                            clf, basic_features_extracted, data.y_basic)
                        basic_features_selection[sampling_period][
                            clf.name].append(features_relevance)

                tsfresh_features = TsFreshFeatures()
                tsfresh_features_extracted, relevance_features = tsfresh_features.relevance(
                    data.X_tsfresh, data.y_tsfresh)
                selected_features = tsfresh_features.select_n_most_useful_features(
                    relevance_features)
                elapsed_times = tsfresh_features.performance_evaluation(
                    tsfresh_features_extracted,
                    relevance_features,
                    data.X_tsfresh,
                    rounds=1)
                runtime_tsfresh_features[sampling_period].append(elapsed_times)
                tsfresh_features_selection[sampling_period].append(
                    selected_features)

                raw_feature_data[sampling_period].append(data)
                tsfresh_extracted_features[sampling_period].append(
                    tsfresh_features_extracted)
                print("---")

    path_feature_selection = os.path.join(__location__, "raw-results",
                                          "feature-selection")
    DillSerializer(
        os.path.join(path_feature_selection, signal_pattern_combination +
                     "-runtime-tsfresh")).serialize(runtime_tsfresh_features)
    DillSerializer(
        os.path.join(path_feature_selection, signal_pattern_combination +
                     "-basic")).serialize(basic_features_selection)
    DillSerializer(
        os.path.join(path_feature_selection, signal_pattern_combination +
                     "-tsfresh")).serialize(tsfresh_features_selection)
    path_ml_train_data = os.path.join(__location__, "..", "online",
                                      "ml-train-data")
    DillSerializer(
        os.path.join(path_ml_train_data, signal_pattern_combination +
                     "-raw-feature-data")).serialize(raw_feature_data)
    DillSerializer(
        os.path.join(
            path_ml_train_data, signal_pattern_combination +
            "-tsfresh-features-extracted")).serialize(
                tsfresh_extracted_features)
 def __init__(self, script, parameter, num_parameter):
     self.script = script
     self.parameter = parameter
     DillSerializer(path_evaluation_data).serialize(
         nested_dict(num_parameter, list))
示例#12
0
def client_similarity_analysis(path_client_similarity, path_runtimes, nth_best, result_path, plot_format):
    
    def adapt_ticklabels(labels):
        return [label.replace("_", " ").capitalize() for label in labels]
    
    def plot_raw_similarities(plot_data, similarity_methods, equalize_methods):
        similarities = [list(similarites.values()) for similarites in plot_data.values()]
        fig, ax = plt.subplots()
        im = ax.imshow(similarities, cmap="jet", vmin=0, vmax=1)
        ax.set_xticks(numpy.arange(len(equalize_methods)))
        ax.set_yticks(numpy.arange(len(similarity_methods)))
        ax.set_xticklabels(adapt_ticklabels(equalize_methods))
        ax.set_yticklabels(adapt_ticklabels(similarity_methods))
        for i in range(len(similarity_methods)):
            for j in range(len(equalize_methods)):
                ax.text(j, i, round(similarities[i][j], 2), ha="center", va="center")
        ax.set_ylabel("Similarity")
        ax.set_xlabel("Equalize")
        ax.figure.colorbar(im)
        filename = "raw-similarities." + plot_format
        fig.savefig(os.path.join(result_path, filename), format=plot_format, bbox_inches="tight")
        #plt.show()
        plt.close(fig)
    
    def find_best_similarity_equalize_threshold(total_similarity, path_runtimes, round_factor=2):
        print("Best similarity equalize threshold")
        total_similarity = sorted(total_similarity.items(), key=lambda kv: numpy.mean(kv[1]), reverse=True)
        _, _, runtime_equalize_similarity_methods = get_runtime(path_runtimes)
        runtime_equalize_similarity_methods = dict(runtime_equalize_similarity_methods)
        best_similarity = dict()
        for similarity, metrics in total_similarity[:nth_best]:
            similarity_method, equalize_method, _ = similarity.split(":")
            runtime = runtime_equalize_similarity_methods[equalize_method + ":" + similarity_method]
            weight = 0.8 * numpy.mean(metrics) + 0.2 * (1-runtime)
            best_similarity[similarity] = round(weight, round_factor)
            print("Similarity / metrics / runtime (s):", similarity, numpy.round(metrics, round_factor), round(runtime, 4))
        best_similarity = sorted(best_similarity.items(), key=lambda kv: kv[1], reverse=True)
        print("Weighted best results:", best_similarity)
    
    results = DillSerializer(path_client_similarity).deserialize()
    len_light_patterns1, len_light_patterns2, equalize_methods, similarity_methods = misc.get_all_keys(results)
    total_similarity = dict()
    plot_data = nested_dict(1, dict)
    for similarity_method in similarity_methods:
        for equalize_method in equalize_methods:
            y_true = list()
            similarities = list()
            for len_light_pattern1 in len_light_patterns1:
                for len_light_pattern2 in len_light_patterns2:
                    if len_light_pattern1 in results and len_light_pattern2 in results[len_light_pattern1]:
                        result = results[len_light_pattern1][len_light_pattern2][equalize_method][similarity_method]
                        similarities.extend(result)
                        y_true.extend(len(result) * [1 if len_light_pattern1 == len_light_pattern2 else 0])
            plot_data[similarity_method][equalize_method] = numpy.median(similarities)
            assert len(similarities) == len(y_true)
            y_true = numpy.asarray(y_true)
            similarities = numpy.asarray(similarities)
            similarity_thresholds = numpy.arange(1, step=0.1)
            for similarity_threshold in similarity_thresholds:
                similarity_threshold = round(similarity_threshold, 1)
                y_pred = numpy.zeros(len(y_true))
                y_pred[similarities >= similarity_threshold] = 1
                acc = accuracy_score(y_true, y_pred)
                prec = precision_score(y_true, y_pred)
                rec = recall_score(y_true, y_pred)
                f1 = f1_score(y_true, y_pred)
                key = similarity_method + ":" + equalize_method + ":" + str(similarity_threshold)
                total_similarity[key] = [acc, prec, rec, f1]
    
    find_best_similarity_equalize_threshold(total_similarity, path_runtimes)
    plot_raw_similarities(plot_data, similarity_methods, equalize_methods)
示例#13
0
    def serialize(self, obj):
        f = open(self.path, "w+")
        json.dump(obj, f)

    def deserialize(self):
        f = open(self.path, "r")
        return json.loads(f.read())


if __name__ == "__main__":
    import pandas
    from utils.nested_dict import nested_dict
    from coupling.device_grouping.online.machine_learning_features import Classifier

    basic_features_selection = nested_dict(3, dict)
    for clf in Classifier:
        basic_features_selection[2][0.05][clf] = pandas.DataFrame({
            'feature':
            ["length", "max", "mean", "median", "min", "std", "sum", "var"],
            'relative_importance': [
                0.000000, 4.416329, 5.198687, 5.364500, 3.102737, 3.586680,
                5.439479, 2.891588
            ]
        })

    data = list()
    data.append("a")
    data.append("b")
    data.append("c")