Exemplo n.º 1
0
def get_runtime(result_path):
    
    def median(runtimes):
        return {key: numpy.median(values) for key, values in runtimes.items()}
    
    def sort(runtimes):
        return sorted(runtimes.items(), key=lambda kv: kv[1])
    
    runtimes = DillSerializer(result_path).deserialize()
    runtime_equalize_methods = defaultdict(list)
    runtime_similarity_method = defaultdict(list)
    runtime_equalize_similarity_methods = defaultdict(list)
    len_light_patterns1, len_light_patterns2, equalize_methods, similarity_methods = misc.get_all_keys(runtimes)
    for len_light_pattern1 in len_light_patterns1:
        for len_light_pattern2 in len_light_patterns2:
            for equalize_method in equalize_methods:
                for similarity_method in similarity_methods:
                    runtime = runtimes[len_light_pattern1][len_light_pattern2][equalize_method][similarity_method]
                    if len(runtime) > 0:
                        median_runtime = numpy.median(runtime)
                        runtime_equalize_methods[equalize_method].append(median_runtime)
                        runtime_similarity_method[similarity_method].append(median_runtime)
                        key = equalize_method + ":" + similarity_method
                        runtime_equalize_similarity_methods[key].append(median_runtime)
    
    runtime_equalize_methods = sort(median(runtime_equalize_methods))
    runtime_similarity_method = sort(median(runtime_similarity_method))
    runtime_equalize_similarity_methods = sort(median(runtime_equalize_similarity_methods))
    return runtime_equalize_methods, runtime_similarity_method, runtime_equalize_similarity_methods
Exemplo n.º 2
0
def runtime_total_per_operation_over_all_testbeds(testbed_data):
    print(
        "# total mean runtime per operation per he library over all testbeds")
    operations, testbeds, he_libraries = misc.get_all_keys(testbed_data)
    for he_library in he_libraries:
        for operation in operations:
            min_feature_lengths = list()
            max_feature_lengths = list()
            min_runtime = list()
            max_runtime = list()
            # average over min and max values per testbed
            for testbed in testbeds:
                testbed_feature_lengths, _, _, testbed_median = testbed_data[
                    operation][testbed][he_library]
                min_feature_lengths.append(testbed_feature_lengths[0])
                max_feature_lengths.append(testbed_feature_lengths[-1])
                min_runtime.append(testbed_median.iloc[0])
                max_runtime.append(testbed_median.iloc[-1])
            print(he_library)
            print(operation)
            print("mean min - feature length: ",
                  round(numpy.mean(min_feature_lengths), 2))
            print("mean max - feature length: ",
                  round(numpy.mean(max_feature_lengths), 2))
            print("mean min - runtime: ", round(numpy.mean(min_runtime), 2))
            print("mean max - runtime: ", round(numpy.mean(max_runtime), 2))
            print("---")
Exemplo n.º 3
0
def runtime_analysis_tvgl_tsfresh_all():
    
    def scalability_runtime(results):
        runtimes_tvgl = dict()
        runtimes_ml_tsfresh_all = dict()
        for num_clients in sorted(results.keys()):
            runtime_coupling_tvgl = [result.coupling_tvgl.runtime for result in results[num_clients]]
            runtime_ml_tsfresh_all = [result.coupling_machine_learning_tsfresh_all.runtime for result in results[num_clients]]
            runtimes_tvgl[num_clients] = numpy.median(runtime_coupling_tvgl)
            runtimes_ml_tsfresh_all[num_clients] = numpy.median(runtime_ml_tsfresh_all)
        return {"tvgl": runtimes_tvgl, "tsfresh all": runtimes_ml_tsfresh_all}
    
    print("Runtime analysis of TVGL and tsfresh all features")
    for path_evaluation_data in glob.glob(os.path.join(__location__, "raw-results", "*-coupling-simulation-tvgl")):
        scalability_runtimes = None
        evaluation_data = DillSerializer(path_evaluation_data).deserialize() 
        num_clients, num_reject_clients, len_light_patterns, \
                sampling_period_couplings, coupling_compare_methods, \
                coupling_similarity_thresholds, equalize_methods, \
                sampling_period_localizations, sampling_period_ml_trains, \
                coupling_ml_classifiers = misc.get_all_keys(evaluation_data)
        all_results = list()
        structured_results = defaultdict(list)
        for num_client, num_reject_client, len_light_pattern, sampling_period_coupling, \
            coupling_compare_method, coupling_similarity_threshold, equalize_method, \
            sampling_period_localization, sampling_period_ml_train, coupling_ml_classifier in itertools.product(
                num_clients, num_reject_clients, len_light_patterns, sampling_period_couplings,
                coupling_compare_methods, coupling_similarity_thresholds, equalize_methods,
                sampling_period_localizations, sampling_period_ml_trains, coupling_ml_classifiers):
            
            results = evaluation_data[num_client][num_reject_client][len_light_pattern] \
                [sampling_period_coupling][coupling_compare_method] \
                [coupling_similarity_threshold][equalize_method] \
                [sampling_period_localization][sampling_period_ml_train][coupling_ml_classifier]
            if len(results) > 0:
                all_results.extend(results)
                structured_results[num_client].extend(results)
        scalability_runtimes = scalability_runtime(structured_results)
        runtime_coupling = [result.runtime_coupling for result in all_results]
        for identifier, runtimes in scalability_runtimes.items():
            print(identifier)
            abs_decrease = numpy.median(abs(numpy.diff(runtimes.values())))
            ratio = (abs_decrease / numpy.median(runtimes.values())) * 100
            print("Scalability over num clients {0} s ({1} %)".format(round(abs_decrease,2), round(ratio,2)))
            ratio_runtime = [runtime / numpy.mean(runtime_coupling) for runtime in runtimes.values()]
            ratio_runtime = [entry for entry in ratio_runtime if entry < 1]
            print("Ratio to entire coupling runtime: {0:.2f} %".format(numpy.mean(ratio_runtime)*100))
Exemplo n.º 4
0
 def find_best_per_params(metric_results):
     best_params = list()
     features, coupling_methods, len_light_patterns, num_users = misc.get_all_keys(metric_results)
     for feature in features:
         per_feature_results = dict()
         for coupling_method, len_light_pattern, num_user in itertools.product(coupling_methods, len_light_patterns, num_users):
             result = metric_results[feature][coupling_method][len_light_pattern][num_user]
             if len(result) > 0:
                 key = coupling_method + "-" + str(len_light_pattern) + "-" + str(num_user)
                 per_feature_results[key] = numpy.mean(result)
         per_feature_selection = sorted(per_feature_results.items(), key=lambda kv: kv[1], reverse=True)
         best_param = per_feature_selection[0][0].split("-")
         coupling_method = best_param[0]
         len_light_pattern = int(best_param[1])
         num_user = int(best_param[2])
         best_params.append((feature, coupling_method, len_light_pattern, num_user))
     return best_params
Exemplo n.º 5
0
def comparison_baseline_time_series(baseline_data, testbed_data):
    print("# comparison baseline vs. time-series")
    testbed_comparison_idx = 1
    operations, testbeds, he_libraries = misc.get_all_keys(testbed_data)
    for he_library in he_libraries:
        baseline_total_values = list()
        testbed_total_values = list()
        relative_baseline_runtime = list()
        relative_testbed_runtime = list()
        testbed_total_lengths = list()
        baseline_total_lengths = list()
        for operation in operations:
            for testbed in testbeds:
                baseline_feature_length, _, _, baseline_median = baseline_data[
                    operation][testbed][he_library]
                testbed_feature_lengths, _, _, testbed_median = testbed_data[
                    operation][testbed][he_library]
                assert type(baseline_median) != list
                assert len(testbed_feature_lengths) == len(testbed_median)
                baseline_total_values.append(baseline_median)
                baseline_total_lengths.append(baseline_feature_length)
                testbed_total_values.append(
                    testbed_median.iloc[testbed_comparison_idx])
                testbed_total_lengths.append(
                    testbed_feature_lengths[testbed_comparison_idx])
                # relative runtime with a feature length of one > performance efficiency
                relative_baseline_runtime.append(baseline_median /
                                                 baseline_feature_length)
                relative_testbed_runtime.append(
                    numpy.mean(testbed_median / testbed_feature_lengths))

        relative_testbed_runtime = numpy.median(relative_testbed_runtime)
        relative_baseline_runtime = numpy.median(relative_baseline_runtime)

        print(he_library)
        print("baseline: ", round(numpy.median(baseline_total_values), 2))
        print("testbed: ", round(numpy.median(testbed_total_values), 2))
        print("Mean testbed length: ",
              round(numpy.mean(testbed_total_lengths)))
        print("Mean baseline length: ", numpy.mean(baseline_total_lengths))
        print(
            "normalized testbed / baseline performance delta (%): ",
            round(
                100 * (1 -
                       (relative_testbed_runtime / relative_baseline_runtime)),
                2))
Exemplo n.º 6
0
def runtime_relative_per_operation(testbed_data):
    print("# relative performance he library per operation")
    operations, testbeds, he_libraries = misc.get_all_keys(testbed_data)
    for operation in operations:
        runtime = defaultdict(list)
        for testbed in testbeds:
            for he_library in he_libraries:
                feature_length, _, _, time_median = testbed_data[operation][
                    testbed][he_library]
                temp = time_median / feature_length
                runtime[he_library].append((numpy.mean(temp), numpy.std(temp)))
        # calculate mean over all test platforms
        runtime_summary = list()
        for library, values in runtime.items():
            mean_mean = numpy.mean([mean for mean, _ in values])
            mean_std = numpy.mean([std for _, std in values])
            runtime_summary.append((library, mean_mean, mean_std))
        means = [mean for _, mean, _ in runtime_summary]
        max_idx = numpy.argmax(means)
        min_idx = numpy.argmin(means)
        he_library = [lib for lib, _, _, in runtime_summary]
        runtime_mean = [round(mean, 3) for _, mean, _, in runtime_summary]
        runtime_std = [round(std, 3) for _, _, std in runtime_summary]
        print(operation)
        print(list(zip(he_library, runtime_mean, runtime_std)))
        print("faster: ", runtime_summary[min_idx][0])
        print("slower: ", runtime_summary[max_idx][0])
        print(
            "percentage ratio (% faster): ",
            round(
                100 * runtime_summary[min_idx][1] /
                runtime_summary[max_idx][1], 2))
        print(
            "percentage ratio (% saves): ",
            round(
                100 *
                (1 -
                 runtime_summary[min_idx][1] / runtime_summary[max_idx][1]),
                2))
        print(
            "multiple ratio (x faster): ",
            round(runtime_summary[max_idx][1] / runtime_summary[min_idx][1],
                  2))
Exemplo n.º 7
0
def distortion_similarity_analysis(path_distortion_similarity, result_path, plot_format):
    results = DillSerializer(path_distortion_similarity).deserialize()
    len_light_patterns, distortion_rates, similarity_methods = misc.get_all_keys(results)
    
    print("Similarity threshold by signal distortion")
    distortion_rate = 0.5
    for similarity_method in ["spearman", "pearson", "distance_correlation"]:
        similarity = list()
        for len_light_pattern in len_light_patterns:
            result = results[len_light_pattern][distortion_rate][similarity_method]
            similarity.extend(result)
        print(similarity_method, round(numpy.median(similarity), 2))
    
    fig, ax = plt.subplots()
    markers = itertools.cycle(misc.markers)
    colors = [plt.cm.tab20(i) for i in numpy.linspace(0, 1, len(vector_similarity.similarity_methods))]
    for i, similarity_method in enumerate(similarity_methods):
        distortion = list()
        similarity_mean = list()
        for distortion_rate in distortion_rates:
            mean = list()
            for len_light_pattern in len_light_patterns:
                result = results[len_light_pattern][distortion_rate][similarity_method]
                mean.append(numpy.mean(result))
            distortion.append(distortion_rate)
            similarity_mean.append(numpy.median(mean))
        label = similarity_method.replace("_", " ").capitalize().replace("Dtw", "DTW")
        ax.plot(distortion, similarity_mean, label=label, marker=next(markers), color=colors[i])
    ax.plot([0, 1], [1, 0], color="black", linestyle="--")
    ax.axvline(0.4, color="red", linestyle="--")
    ax.grid()
    ax.set_xticks(distortion_rates)
    ax.set_ylabel("Signal similarity")
    ax.set_xlabel("Distortion rate")
    ax.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=4, mode="expand", borderaxespad=0.)
    fig.set_figwidth(fig.get_figwidth()*2.5)
    filename = "distortion-signal-similarity." + plot_format
    fig.savefig(os.path.join(result_path, filename), plot_format=plot_format, bbox_inches="tight")
    #plt.show()
    plt.close(fig)
Exemplo n.º 8
0
 def find_best_per_params(metric_results):
     best_params = list()
     features, coupling_methods, num_users, coupling_frequencies, num_rooms = misc.get_all_keys(metric_results)
     for feature in features:
         per_feature_results = dict()
         for coupling_method, num_room, num_user, coupling_frequency in itertools.product(
                 coupling_methods, num_rooms, num_users, coupling_frequencies):
             result = metric_results[feature][coupling_method][num_user][coupling_frequency][num_room]
             if len(result) > 0:
                 result = misc.flatten_list(misc.flatten_list(result))
                 key = coupling_method + "-" + str(num_room) + "-" + str(num_user) + "-" + str(coupling_frequency)
                 per_feature_results[key] = numpy.mean(result)
         per_feature_results = sorted(per_feature_results.items(), key=lambda kv: kv[1], reverse=True)
         idx = numpy.where(numpy.asarray([metric for _, metric in per_feature_results])!=1)[0][0]
         metric_result = per_feature_results[idx][1]
         best_param = per_feature_results[idx][0].split("-")
         coupling_method = best_param[0]
         num_room = int(best_param[1])
         num_user = int(best_param[2])
         coupling_frequency = int(best_param[3])
         best_params.append((feature, coupling_method, num_room, num_user, coupling_frequency, metric_result))
     return best_params
Exemplo n.º 9
0
def he_library_performance_per_platform(testbed_data):
    print("# HE library performance per platform")
    operations, testbeds, he_libraries = misc.get_all_keys(testbed_data)
    runtime = defaultdict(dict)
    for he_library in he_libraries:
        for testbed in testbeds:
            runtime_per_operation = list()
            for operation in operations:
                testbed_feature_lengths, _, _, testbed_median = testbed_data[
                    operation][testbed][he_library]
                runtime_per_operation.append(
                    numpy.mean(testbed_median / testbed_feature_lengths))
            runtime[he_library][testbed] = numpy.mean(runtime_per_operation)
    for he_library in runtime.keys():
        sorted_testbeds_runtime = sorted(runtime[he_library].items(),
                                         key=lambda x: x[1])
        # use relative runtime to identify order and difference between platforms, don't use runtime values
        print(he_library)
        print("result: ",
              [platform for platform, _ in sorted_testbeds_runtime])
        for (testbed1, runtime1), (testbed2,
                                   runtime2) in itertools.combinations(
                                       sorted_testbeds_runtime, 2):
            temp_testbeds = [testbed1, testbed2]
            temp_runtimes = [runtime1, runtime2]
            min_runtime = numpy.argmin(temp_runtimes)
            max_runtime = numpy.argmax(temp_runtimes)
            print(temp_testbeds[min_runtime] + " vs. " +
                  temp_testbeds[max_runtime])
            #print("min: ", temp_runtimes[min_runtime], " max: ", temp_runtimes[max_runtime])
            print(
                "slower ratio: ",
                round(temp_runtimes[max_runtime] / temp_runtimes[min_runtime],
                      2))
            print(
                "faster (%): ",
                round(
                    100 * (1 - (temp_runtimes[min_runtime] /
                                temp_runtimes[max_runtime])), 2))
Exemplo n.º 10
0
def plot_distorted_light_signals(
        distortion_rates, path_light_signals, conversion_us_to_ms, result_path, plot_format, plot_round=0):
    
    print("plot distorted light signals")
    light_signals = DillSerializer(path_light_signals).deserialize()
    len_light_patterns = misc.get_all_keys(light_signals)[0]
    for len_light_pattern in len_light_patterns:
        print("len light pattern:", len_light_pattern)
        fig, axarr = plt.subplots(len(distortion_rates))
        for i, distortion_rate in enumerate(distortion_rates):
            client = light_signals[len_light_pattern][plot_round]
            light_signal = client.get_distorted_light_signal(distortion_rate)
            light_signal_time = client.signal_time
            relative_time_ms = (light_signal_time-light_signal_time[0]) / conversion_us_to_ms
            axarr[i].plot(relative_time_ms, light_signal)
            xticks = [] if i+1 < len(distortion_rates) else numpy.arange(relative_time_ms[-1], step=10)
            axarr[i].set_xticks(xticks)
            #axarr[i].set_yticks([round(numpy.mean(light_signal))])
            axarr[i].yaxis.tick_right()
            axis = "both" if i+1 < len(distortion_rates) else "y"
            axarr[i].tick_params(axis=axis, which='both', length=0)
            axarr[i].set_yticks([numpy.mean(light_signal)])
            axarr[i].set_yticklabels([distortion_rate])
        
        axarr[-1].set_xlabel("Signal time (ms)")
        ax = fig.add_subplot(111, frameon=False)
        ax.set_yticks([])
        ax.set_xticks([])
        ax.set_ylabel("Voltage signal (mV)")
        ax2 = ax.twinx()
        ax2.set_yticks([])
        ax2.set_xticks([])
        ax2.set_ylabel("Distortion rate", labelpad=50)
        
        filename = "distortion-rate-signal-len-" + str(len_light_pattern) + "." + plot_format
        filepath = os.path.join(result_path, filename)
        fig.savefig(filepath, format=plot_format, bbox_inches="tight")
        #plt.show()
        plt.close(fig)
Exemplo n.º 11
0
 def process_data(evaluation_data):
     
     def find_best_per_params(metric_results):
         best_params = list()
         features, coupling_methods, len_light_patterns, num_users = misc.get_all_keys(metric_results)
         for feature in features:
             per_feature_results = dict()
             for coupling_method, len_light_pattern, num_user in itertools.product(coupling_methods, len_light_patterns, num_users):
                 result = metric_results[feature][coupling_method][len_light_pattern][num_user]
                 if len(result) > 0:
                     key = coupling_method + "-" + str(len_light_pattern) + "-" + str(num_user)
                     per_feature_results[key] = numpy.mean(result)
             per_feature_selection = sorted(per_feature_results.items(), key=lambda kv: kv[1], reverse=True)
             best_param = per_feature_selection[0][0].split("-")
             coupling_method = best_param[0]
             len_light_pattern = int(best_param[1])
             num_user = int(best_param[2])
             best_params.append((feature, coupling_method, len_light_pattern, num_user))
         return best_params
     
     def get_metrics(result):
         accuracy = [result.accuracy_accept, result.accuracy_reject]
         precision = [result.precision_accept, result.precision_reject]
         recall = [result.recall_accept, result.recall_reject]
         f1 = [result.f1_accept, result.f1_reject]
         return (accuracy, precision, recall, f1), result.runtime
     
     def save_result(results, runtime_query_data, metric_results, runtime_results,
                     feature, coupling_method, len_light_pattern, num_client):
         metrics, runtime_coupling = get_metrics(results)
         metric_results[feature][coupling_method][len_light_pattern][num_client].append(metrics)
         runtime_results[feature][coupling_method][len_light_pattern][num_client].append((runtime_query_data, runtime_coupling))
     
     num_clients, num_reject_clients, len_light_patterns, \
         sampling_period_couplings, coupling_compare_methods, \
         coupling_similarity_thresholds, equalize_methods, \
         sampling_period_localizations, sampling_period_ml_trains, \
         coupling_ml_classifiers = misc.get_all_keys(evaluation_data)
     
     print("############### Static simulation ###############")
     print("Num clients: ", num_clients)
     print("Num reject clients: ", num_reject_clients)
     print("Len light patterns: ", len_light_patterns)
     print("Sampling period couplings: ", sampling_period_couplings)
     print("Coupling compare methods: ", coupling_compare_methods)
     print("Coupling similarity thresholds: ", coupling_similarity_thresholds)
     print("Equalize methods: ", equalize_methods)
     print("Sampling period localizations: ", sampling_period_localizations)
     print("Sampling period ML trains: ", sampling_period_ml_trains)
     print("Coupling ML classifiers: ", coupling_ml_classifiers)
     
     similarity_metrics = nested_dict(4, list)
     machine_learning_metrics = nested_dict(4, list)
     localization_metrics = nested_dict(4, list)
     
     similarity_runtime = nested_dict(4, list)
     localization_runtime = nested_dict(4, list)
     machine_learning_runtime = nested_dict(4, list)
     
     for num_client, num_reject_client, len_light_pattern, sampling_period_coupling, \
         coupling_compare_method, coupling_similarity_threshold, equalize_method, \
         sampling_period_localization, sampling_period_ml_train, coupling_ml_classifier in itertools.product(
         num_clients, num_reject_clients, len_light_patterns, sampling_period_couplings,
         coupling_compare_methods, coupling_similarity_thresholds, equalize_methods,
         sampling_period_localizations, sampling_period_ml_trains, coupling_ml_classifiers):
         
         results = evaluation_data[num_client][num_reject_client][len_light_pattern] \
             [sampling_period_coupling][coupling_compare_method] \
             [coupling_similarity_threshold][equalize_method] \
             [sampling_period_localization][sampling_period_ml_train][coupling_ml_classifier]
         
         if len(results) > 0:
             for result in results:
                 #result.runtime_coupling
                 #result.runtime_query_data
                 
                 # localization
                 feature = "ble"
                 save_result(result.localization_random_forest_ble, result.runtime_query_raw_ble,
                             localization_metrics, localization_runtime, feature, "random forest", len_light_pattern, num_client)
                 save_result(result.localization_filtering_ble, result.runtime_query_raw_ble,
                             localization_metrics, localization_runtime, feature, "filtering", len_light_pattern, num_client)
                 save_result(result.localization_svm_ble, result.runtime_query_raw_ble,
                             localization_metrics, localization_runtime, feature, "svm", len_light_pattern, num_client)
                 
                 feature = "wifi"
                 save_result(result.localization_random_forest_wifi, result.runtime_query_raw_wifi,
                             localization_metrics, localization_runtime, feature, "random forest", len_light_pattern, num_client)
                 save_result(result.localization_filtering_wifi, result.runtime_query_raw_wifi,
                             localization_metrics, localization_runtime, feature, "filtering", len_light_pattern, num_client)
                 save_result(result.localization_svm_wifi, result.runtime_query_raw_wifi,
                             localization_metrics, localization_runtime, feature, "svm", len_light_pattern, num_client)
                 
                 # similarity metrics
                 save_result(result.coupling_signal_pattern, result.runtime_query_pattern_light,
                             similarity_metrics, similarity_runtime, "signal pattern", coupling_compare_method, len_light_pattern, num_client)
                 save_result(result.coupling_signal_pattern_duration, result.runtime_query_pattern_light,
                             similarity_metrics, similarity_runtime, "signal pattern duration", coupling_compare_method, len_light_pattern, num_client)
                 
                 save_result(result.coupling_signal_similarity, result.runtime_query_raw_light,
                             similarity_metrics, similarity_runtime, "signal similarity", coupling_compare_method, len_light_pattern, num_client)
                 
                 save_result(result.coupling_machine_learning_basic_all, result.runtime_query_raw_light,
                             machine_learning_metrics, machine_learning_runtime, "basic all", coupling_ml_classifier, len_light_pattern, num_client)
                 save_result(result.coupling_machine_learning_basic_selected, result.runtime_query_raw_light,
                             machine_learning_metrics, machine_learning_runtime, "basic selected", coupling_ml_classifier, len_light_pattern, num_client)
                 save_result(result.coupling_machine_learning_tsfresh_selected, result.runtime_query_raw_light,
                             machine_learning_metrics, machine_learning_runtime, "tsfresh selected", coupling_ml_classifier, len_light_pattern, num_client)
     
     best_ml = [(feature, coupling, len_light_pattern, num_user, machine_learning_metrics) for feature, coupling, len_light_pattern, num_user in find_best_per_params(machine_learning_metrics)]
     best_similarity = [(feature, coupling, len_light_pattern, num_user, similarity_metrics) for feature, coupling, len_light_pattern, num_user in find_best_per_params(similarity_metrics)]
     best_localization = [(feature, coupling, len_light_pattern, num_user, localization_metrics) for feature, coupling, len_light_pattern, num_user in find_best_per_params(localization_metrics)]
     return best_similarity, similarity_runtime, best_ml, machine_learning_runtime, best_localization, localization_runtime, len_light_patterns, num_clients
Exemplo n.º 12
0
 def process_data(evaluation_data):
     
     def get_results(results):
         accuracy = [result.accuracy for result in results if result.accuracy >= 0]
         precision = [result.precision for result in results if result.precision >= 0]
         recall = [result.recall for result in results if result.recall >= 0]
         f1 = [result.f1 for result in results if result.f1 >= 0]
         runtime = [result.runtime for result in results if result.runtime > 0]
         return (accuracy, precision, recall, f1), misc.flatten_list(runtime)
     
     def save_result(result, metric_results, runtime_results, coupling_ident, runtime_ident,
                     feature, coupling_method, num_user, coupling_frequency, num_room):
         metrics, runtime = get_results(result.coupling[coupling_ident])
         missing_metric = 0 in [len(metric) for metric in metrics]
         if not missing_metric: # remove empty result
             metric_results[feature][coupling_method][num_user][coupling_frequency][num_room].append(metrics)
             runtime_results[feature][coupling_method][num_user][coupling_frequency][num_room].append((result.runtime[runtime_ident], runtime))
     
     def find_best_per_params(metric_results):
         best_params = list()
         features, coupling_methods, num_users, coupling_frequencies, num_rooms = misc.get_all_keys(metric_results)
         for feature in features:
             per_feature_results = dict()
             for coupling_method, num_room, num_user, coupling_frequency in itertools.product(
                     coupling_methods, num_rooms, num_users, coupling_frequencies):
                 result = metric_results[feature][coupling_method][num_user][coupling_frequency][num_room]
                 if len(result) > 0:
                     result = misc.flatten_list(misc.flatten_list(result))
                     key = coupling_method + "-" + str(num_room) + "-" + str(num_user) + "-" + str(coupling_frequency)
                     per_feature_results[key] = numpy.mean(result)
             per_feature_results = sorted(per_feature_results.items(), key=lambda kv: kv[1], reverse=True)
             idx = numpy.where(numpy.asarray([metric for _, metric in per_feature_results])!=1)[0][0]
             metric_result = per_feature_results[idx][1]
             best_param = per_feature_results[idx][0].split("-")
             coupling_method = best_param[0]
             num_room = int(best_param[1])
             num_user = int(best_param[2])
             coupling_frequency = int(best_param[3])
             best_params.append((feature, coupling_method, num_room, num_user, coupling_frequency, metric_result))
         return best_params
     
     sampling_period_couplings, coupling_compare_methods, \
         coupling_similarity_thresholds, equalize_methods, \
         sampling_period_localizations, sampling_period_ml_trains, \
         coupling_ml_classifiers, num_users, num_rooms, \
         simulation_durations, coupling_frequencies = misc.get_all_keys(evaluation_data)
     
     print("############### Dynamic simulation ###############")
     print("Num users: ", num_users)
     print("Num rooms: ", num_rooms)
     print("Simulation duration: ", simulation_durations)
     print("Coupling frequency: ", coupling_frequencies)
     print("Sampling period couplings: ", sampling_period_couplings)
     print("Coupling compare methods: ", coupling_compare_methods)
     print("Coupling similarity thresholds: ", coupling_similarity_thresholds)
     print("Equalize methods: ", equalize_methods)
     print("Sampling period localizations: ", sampling_period_localizations)
     print("Sampling period ML trains: ", sampling_period_ml_trains)
     print("Coupling ML classifiers: ", coupling_ml_classifiers)
     
     similarity_metrics = nested_dict(5, list)
     machine_learning_metrics = nested_dict(5, list)
     localization_metrics = nested_dict(5, list)
             
     similarity_runtime = nested_dict(5, list)
     machine_learning_runtime = nested_dict(5, list)
     localization_runtime = nested_dict(5, list)
     
     for sampling_period_coupling, coupling_compare_method, \
         coupling_similarity_threshold, equalize_method, \
         sampling_period_localization, sampling_period_ml_train, \
         coupling_ml_classifier, num_user, num_room, \
         simulation_duration, coupling_frequency in itertools.product(
             sampling_period_couplings, coupling_compare_methods, coupling_similarity_thresholds,
             equalize_methods, sampling_period_localizations, sampling_period_ml_trains,
             coupling_ml_classifiers, num_users, num_rooms, simulation_durations, coupling_frequencies):
         
         results = evaluation_data[sampling_period_coupling][coupling_compare_method] \
             [coupling_similarity_threshold][equalize_method] \
             [sampling_period_localization][sampling_period_ml_train] \
             [coupling_ml_classifier][num_user][num_room] \
             [simulation_duration][coupling_frequency]
         
         if len(results) > 0:
             for result in results:
                 # localization
                 feature = "ble"
                 save_result(result, localization_metrics, localization_runtime, "loc Random Forest BLE", "time query raw ble",
                             feature, "random forest", num_user, coupling_frequency, num_room)
                 
                 save_result(result, localization_metrics, localization_runtime, "loc filtering BLE", "time query raw ble",
                             feature, "filtering", num_user, coupling_frequency, num_room)
                 
                 save_result(result, localization_metrics, localization_runtime, "loc SVM BLE", "time query raw ble",
                             feature, "svm", num_user, coupling_frequency, num_room)
                 
                 feature = "wifi"
                 save_result(result, localization_metrics, localization_runtime, "loc Random Forest WiFi", "time query raw wifi",
                             feature, "random forest", num_user, coupling_frequency, num_room)
                 
                 save_result(result, localization_metrics, localization_runtime, "loc filtering WiFi", "time query raw wifi",
                             feature, "filtering", num_user, coupling_frequency, num_room)
                 
                 save_result(result, localization_metrics, localization_runtime, "loc SVM WiFi", "time query raw wifi",
                             feature, "svm", num_user, coupling_frequency, num_room)
                 
                 # similarity metrics
                 feature = "signal pattern"
                 save_result(result, similarity_metrics, similarity_runtime, feature, "time query pattern light",
                             feature, coupling_compare_method, num_user, coupling_frequency, num_room)
                 
                 feature = "signal pattern duration"
                 save_result(result, similarity_metrics, similarity_runtime, feature, "time query pattern light", 
                             feature, coupling_compare_method, num_user, coupling_frequency, num_room)
                 
                 feature = "signal similarity"
                 save_result(result, similarity_metrics, similarity_runtime, feature, "time query raw light",
                             feature, coupling_compare_method, num_user, coupling_frequency, num_room)
                 
                 # machine learning
                 save_result(result, machine_learning_metrics, machine_learning_runtime, "ml basic all features",
                             "time query raw light", "basic all", coupling_ml_classifier, num_user, coupling_frequency, num_room)
                 
                 save_result(result, machine_learning_metrics, machine_learning_runtime, "ml basic selected features",
                             "time query raw light", "basic selected", coupling_ml_classifier, num_user, coupling_frequency, num_room)
                 
                 save_result(result, machine_learning_metrics, machine_learning_runtime, "ml tsfresh selected features",
                             "time query raw light", "tsfresh selected", coupling_ml_classifier, num_user, coupling_frequency, num_room)
     
     machine_learning_params = find_best_per_params(machine_learning_metrics)
     similarity_params = find_best_per_params(similarity_metrics)
     localization_params = find_best_per_params(localization_metrics)
     best_machine_learning = [(feature, coupling_method, num_room, num_user, coupling_frequency, machine_learning_metrics) for feature, coupling_method, num_room, num_user, coupling_frequency, _ in machine_learning_params]
     best_similarity = [(feature, coupling_method, num_room, num_user, coupling_frequency, similarity_metrics) for feature, coupling_method, num_room, num_user, coupling_frequency, _ in similarity_params]
     best_localization = [(feature, coupling_method, num_room, num_user, coupling_frequency, localization_metrics) for feature, coupling_method, num_room, num_user, coupling_frequency, _ in localization_params]
     return best_similarity, similarity_runtime, similarity_params, \
         best_machine_learning, machine_learning_runtime, machine_learning_params,  \
         best_localization, localization_runtime, num_users, localization_params, \
         coupling_frequencies, num_rooms
Exemplo n.º 13
0
def plot_runtime_per_operation(baseline_data,
                               testbed_data,
                               scaling,
                               total_feature_length=21000,
                               min_feature_length=200):
    print("# plot runtime per operation")
    plot_format = "pdf"
    colors = {"helib": "blue", "seal": "green"}
    markers = {"iot": "o", "server": "X", "nuc": "v"}
    markevery = {"iot": 5, "server": 5, "nuc": 5}
    translate = {
        "iot": "IoT",
        "nuc": "NUC",
        "server": "Server",
        "helib": "HElib",
        "seal": "SEAL"
    }
    result_path = os.path.join(__location__, "results")
    operations, testbeds, he_libraries = misc.get_all_keys(testbed_data)
    all_feature_lengths = list()
    for operation in operations:
        fig, ax = plt.subplots()
        max_feature_lengths = list()
        for testbed in testbeds:
            for he_library in he_libraries:
                _, _, _, he_library_baseline_median = baseline_data[operation][
                    testbed][he_library]
                feature_lengths, _, _, he_library_median = testbed_data[
                    operation][testbed][he_library]
                all_feature_lengths.extend(feature_lengths)
                if "helib" in he_libraries:
                    translate["nuc"] = "Server"
                    translate["server"] = "NUC"
                if feature_lengths[0] > min_feature_length:
                    fill_feature_lengths = range(min_feature_length,
                                                 feature_lengths[0], 50)
                    fill_runtimes = list()
                    for fill in fill_feature_lengths:
                        rand = random.randint(0, len(he_library_median) - 1)
                        runtime = (
                            he_library_median[rand] / feature_lengths[rand]
                        ) * fill if "initialisation" not in operation else he_library_median[
                            rand]
                        fill_runtimes.append(runtime)
                    he_library_median = fill_runtimes + he_library_median.values.tolist(
                    )
                    feature_lengths = fill_feature_lengths + feature_lengths

                ax.plot(feature_lengths,
                        len(feature_lengths) * [he_library_baseline_median],
                        label=translate[testbed] + "-" +
                        translate[he_library] + " - Baseline",
                        marker=markers[testbed],
                        markevery=markevery[testbed],
                        linestyle="--",
                        color=colors[he_library])
                ax.plot(feature_lengths,
                        he_library_median,
                        label=translate[testbed] + "-" +
                        translate[he_library] + " - Time-series",
                        marker=markers[testbed],
                        markevery=markevery[testbed],
                        color=colors[he_library])

                max_feature_length = feature_lengths[-1]
                if max_feature_length not in max_feature_lengths and max_feature_length < total_feature_length:
                    value = human_format(max_feature_length)
                    xscaling = 0.5 if value == "20K" else 1.02
                    ax.text(max_feature_length * xscaling, 4.5, value)
                    ax.axvline(max_feature_length,
                               color="black",
                               linestyle=":",
                               label="Length limit of time-series")
                    max_feature_lengths.append(max_feature_length)
                    print("time-series limit: ", max_feature_length,
                          "environment: ", operation, testbed, he_library)
                #print(operation)
                #print(testbed)
                #print(he_library)
                #print(he_library_median.values)
                #print(he_library_baseline_median)
                #print("---")

        print("min feature length: ", min(all_feature_lengths))
        print("max feature length: ", max(all_feature_lengths))
        ax.set_xscale("log")
        ax.set_yscale("log")
        ax.set_ylabel("Duration (" + scaling["time unit"] + ")")
        #ax.set_xlabel("Time-series length")
        ax.set_xlabel("# Time-series values")
        ax.grid()
        #plt.show()
        filepath = os.path.join(result_path, operation + "." + plot_format)
        fig.savefig(filepath, bbox_inches="tight", format=plot_format)

        fig_legend = plt.figure()
        handles, labels = ax.get_legend_handles_labels()
        unique_labels = list(set(labels))
        unique_handles = list()
        labels = numpy.array(labels)
        for ul in unique_labels:
            label_pos = numpy.where(labels == ul)[0][0]
            unique_handles.append(handles[label_pos])
        unique_labels, unique_handles = zip(
            *sorted(zip(unique_labels, unique_handles)))
        plt.figlegend(unique_handles, unique_labels, loc="center", ncol=3)
        fig_legend.savefig(os.path.join(result_path,
                                        operation + "-legend.pdf"),
                           format=plot_format,
                           bbox_inches="tight")

        plt.close(fig)
        plt.close(fig_legend)
Exemplo n.º 14
0
def offline_analysis_ml_model(path_ml_offline_evaluation):
    evaluation_data = DillSerializer(path_ml_offline_evaluation).deserialize()
    num_clients, num_reject_clients, len_light_patterns, \
        classifiers, sampling_periods = misc.get_all_keys(evaluation_data)
    analysis_result = nested_dict(2, list)
    for num_client, num_reject_client, len_light_pattern, classifier, sampling_period in itertools.product(
            num_clients, num_reject_clients, len_light_patterns, classifiers,
            sampling_periods):
        results = evaluation_data[num_client][num_reject_client][
            len_light_pattern][classifier][sampling_period]
        if len(results) > 0:
            analysis_result[classifier][sampling_period].extend(results)

    print("Num clients: ", num_clients)
    print("Num reject clients: ", num_reject_clients)
    print("Len light patterns: ", len_light_patterns)
    print("Classifiers: ", classifiers)
    print("Sampling periods: ", sampling_periods)

    for classifier in classifiers:
        results = analysis_result[classifier]
        sub_results = list()
        for sampling_period in sampling_periods:
            accuracy = [entry.accuracy_accept for entry in results[sampling_period]] + \
                [entry.accuracy_reject for entry in results[sampling_period]]
            precision = [entry.precision_accept for entry in results[sampling_period]] + \
                [entry.precision_reject for entry in results[sampling_period]]
            recall = [entry.recall_accept for entry in results[sampling_period]] + \
                [entry.recall_reject for entry in results[sampling_period]]
            f1 = [entry.f1_accept for entry in results[sampling_period]] + \
                [entry.f1_reject for entry in results[sampling_period]]

            entry = [
                numpy.mean(accuracy),
                numpy.mean(precision),
                numpy.mean(recall),
                numpy.mean(f1)
            ]
            entry = [round(value, 2) for value in entry]
            sub_results.append(entry)

        fig, ax = plt.subplots()
        ax.imshow(sub_results,
                  cmap="Greens",
                  aspect="auto",
                  interpolation="nearest",
                  vmin=0,
                  vmax=1.4)
        ax.set_ylabel("Sampling period (ms)")
        ytickpos = numpy.arange(len(sampling_periods))
        ax.set_yticks(ytickpos)
        ax.set_yticklabels([
            int(sampling_period * 1e3) for sampling_period in sampling_periods
        ])
        xticks = ["Accuracy", "Precision", "Recall", "F1-score"]
        xtickpos = range(len(xticks))
        ax.set_xticks(xtickpos)
        ax.set_xticklabels(xticks, rotation=20, ha="right")
        for i in range(len(sub_results)):
            for j in range(len(sub_results[0])):
                ax.text(j, i, sub_results[i][j], ha="center", va="center")
        ticks = [
            start + ((end - start) / 2)
            for start, end in misc.pairwise(xtickpos)
        ]
        ax.set_xticks(ticks, minor=True)
        ticks = [
            start + ((end - start) / 2)
            for start, end in misc.pairwise(ytickpos)
        ]
        ax.set_yticks(ticks, minor=True)
        ax.grid(which='minor', color="black")
        filepath = os.path.join(__location__, "results", "machine-learning",
                                "vm",
                                "ml-param-" + classifier.lower() + ".pdf")
        result_path = os.path.dirname(filepath)
        if not os.path.exists(result_path):
            os.makedirs(result_path)
        fig.savefig(filepath, format="pdf", bbox_inches="tight")
        #plt.show()
        plt.close(fig)
Exemplo n.º 15
0
def analysis_runtime_tsfresh_selected_features(evaluate):
    data_path = os.path.join(__location__, "raw-results", "feature-selection",
                             "tsfresh-selected-features-runtime")
    if evaluate:
        features_path = glob.glob(
            os.path.join(__location__, "raw-results", "feature-selection",
                         "tsfresh-*-to-be-extracted-*"))
        features_path = sorted(
            features_path,
            key=lambda entry: int(os.path.basename(entry).split("-")[-1]))
        tsfresh_features = TsFreshFeatures()
        runtime = nested_dict(2, dict)
        for len_light_pattern in [2, 4, 6, 8, 10]:
            light_signal, light_signal_time = light_analysis.load_light_pattern(
                len_light_pattern)
            coupling_data_provider = CouplingDataProvider(
                light_signal, light_signal_time, None, None)
            sampling_period_coupling = get_pattern_max_sampling_period()
            light_signal, _ = coupling_data_provider.get_light_data(
                sampling_period_coupling)
            print("len light pattern: ", len_light_pattern)
            print("sampling period: ", sampling_period_coupling)
            print("len sample: ", len(light_signal))
            for feature_path in features_path:
                num_features = int(
                    os.path.basename(feature_path).split("-")[-1])
                print("num features: ", num_features)
                features_to_extract = DillSerializer(
                    feature_path).deserialize()
                start = time.time()
                X = tsfresh_features.extract_selected_features(
                    light_signal, features_to_extract, True)
                end = time.time()
                print("feature shape: ", X.shape)
                assert num_features == X.shape[1]
                runtime[len_light_pattern][num_features] = end - start
                print("duration: ", end - start)
            DillSerializer(data_path).serialize(runtime)
    else:
        runtime = DillSerializer(data_path).deserialize()
        runtime_per_num_feature = defaultdict(list)
        len_light_patterns, num_features = get_all_keys(runtime)
        for len_light_pattern, num_feature in itertools.product(
                len_light_patterns, num_features):
            runtime_per_num_feature[num_feature].append(
                runtime[len_light_pattern][num_feature])
        fig, ax = plt.subplots()
        num_features = sorted(runtime_per_num_feature.keys())
        median_runtime = [
            numpy.median(runtime_per_num_feature[num_feature])
            for num_feature in num_features
        ]
        nth_feature = 10
        ax.text(nth_feature + 0.3, median_runtime[nth_feature] + 0.015,
                round(median_runtime[nth_feature], 3))
        ax.axvline(nth_feature, linestyle="--", color="black")
        ax.plot(num_features,
                median_runtime,
                label="Virtual Machine",
                marker="o",
                color="#1f77b4")
        ax.set_ylabel("Runtime (s)")
        ax.set_xlabel("Number of features")
        ax.set_xticks(num_features[::4] + [num_features[-1]])
        ax.grid()
        ax.set_ylim(bottom=0, top=0.3)
        ax.legend(bbox_to_anchor=(0., 1.02, 1., .102),
                  loc=3,
                  ncol=1,
                  mode="expand",
                  borderaxespad=0.)
        filepath = os.path.join(__location__, "results", "feature-selection",
                                "vm", "tsfresh-features-selected-runtime.pdf")
        result_path = os.path.dirname(filepath)
        if not os.path.exists(result_path):
            os.makedirs(result_path)
        fig.savefig(filepath, format="pdf", bbox_inches="tight")
        #plt.show()
        plt.close(fig)
Exemplo n.º 16
0
def client_similarity_analysis(path_client_similarity, path_runtimes, nth_best, result_path, plot_format):
    
    def adapt_ticklabels(labels):
        return [label.replace("_", " ").capitalize() for label in labels]
    
    def plot_raw_similarities(plot_data, similarity_methods, equalize_methods):
        similarities = [list(similarites.values()) for similarites in plot_data.values()]
        fig, ax = plt.subplots()
        im = ax.imshow(similarities, cmap="jet", vmin=0, vmax=1)
        ax.set_xticks(numpy.arange(len(equalize_methods)))
        ax.set_yticks(numpy.arange(len(similarity_methods)))
        ax.set_xticklabels(adapt_ticklabels(equalize_methods))
        ax.set_yticklabels(adapt_ticklabels(similarity_methods))
        for i in range(len(similarity_methods)):
            for j in range(len(equalize_methods)):
                ax.text(j, i, round(similarities[i][j], 2), ha="center", va="center")
        ax.set_ylabel("Similarity")
        ax.set_xlabel("Equalize")
        ax.figure.colorbar(im)
        filename = "raw-similarities." + plot_format
        fig.savefig(os.path.join(result_path, filename), format=plot_format, bbox_inches="tight")
        #plt.show()
        plt.close(fig)
    
    def find_best_similarity_equalize_threshold(total_similarity, path_runtimes, round_factor=2):
        print("Best similarity equalize threshold")
        total_similarity = sorted(total_similarity.items(), key=lambda kv: numpy.mean(kv[1]), reverse=True)
        _, _, runtime_equalize_similarity_methods = get_runtime(path_runtimes)
        runtime_equalize_similarity_methods = dict(runtime_equalize_similarity_methods)
        best_similarity = dict()
        for similarity, metrics in total_similarity[:nth_best]:
            similarity_method, equalize_method, _ = similarity.split(":")
            runtime = runtime_equalize_similarity_methods[equalize_method + ":" + similarity_method]
            weight = 0.8 * numpy.mean(metrics) + 0.2 * (1-runtime)
            best_similarity[similarity] = round(weight, round_factor)
            print("Similarity / metrics / runtime (s):", similarity, numpy.round(metrics, round_factor), round(runtime, 4))
        best_similarity = sorted(best_similarity.items(), key=lambda kv: kv[1], reverse=True)
        print("Weighted best results:", best_similarity)
    
    results = DillSerializer(path_client_similarity).deserialize()
    len_light_patterns1, len_light_patterns2, equalize_methods, similarity_methods = misc.get_all_keys(results)
    total_similarity = dict()
    plot_data = nested_dict(1, dict)
    for similarity_method in similarity_methods:
        for equalize_method in equalize_methods:
            y_true = list()
            similarities = list()
            for len_light_pattern1 in len_light_patterns1:
                for len_light_pattern2 in len_light_patterns2:
                    if len_light_pattern1 in results and len_light_pattern2 in results[len_light_pattern1]:
                        result = results[len_light_pattern1][len_light_pattern2][equalize_method][similarity_method]
                        similarities.extend(result)
                        y_true.extend(len(result) * [1 if len_light_pattern1 == len_light_pattern2 else 0])
            plot_data[similarity_method][equalize_method] = numpy.median(similarities)
            assert len(similarities) == len(y_true)
            y_true = numpy.asarray(y_true)
            similarities = numpy.asarray(similarities)
            similarity_thresholds = numpy.arange(1, step=0.1)
            for similarity_threshold in similarity_thresholds:
                similarity_threshold = round(similarity_threshold, 1)
                y_pred = numpy.zeros(len(y_true))
                y_pred[similarities >= similarity_threshold] = 1
                acc = accuracy_score(y_true, y_pred)
                prec = precision_score(y_true, y_pred)
                rec = recall_score(y_true, y_pred)
                f1 = f1_score(y_true, y_pred)
                key = similarity_method + ":" + equalize_method + ":" + str(similarity_threshold)
                total_similarity[key] = [acc, prec, rec, f1]
    
    find_best_similarity_equalize_threshold(total_similarity, path_runtimes)
    plot_raw_similarities(plot_data, similarity_methods, equalize_methods)