def print_output(results, simulation_parameter): results = {key: numpy.mean(result) for key, result in results.items()} results = sorted(results.items(), key=lambda kv: kv[1], reverse=True) print("Performance " + simulation_parameter + ":", [(key, round(value, 2)) for key, value in results]) for (key1, result1), (key2, result2) in misc.pairwise(results): print(simulation_parameter + ":", key1, " better (%) than ", key2, round(100*((result1 / result2)-1),2))
def analysis_basic_features(testbeds): def plot_feature_importance(importances, filename): feature_order = importances.median().sort_values().index.values importances = importances[feature_order] fig, ax = plt.subplots() importances.boxplot(ax=ax, vert=False, showfliers=False) ax.set_xlabel("Relative importance") filename = os.path.join(__location__, "results", "machine-learning", filename + ".pdf") directory = os.path.dirname(filename) if not os.path.exists(directory): os.makedirs(directory) plt.savefig(filename, format="pdf", bbox_inches="tight") #plt.show() plt.close(fig) print("importance basic features") for feature_type in ["combined", "single"]: feature_importances = list() for testbed in testbeds: file_regex = "*" + feature_type + "*basic*" path_basic_data = glob.glob( os.path.join(__location__, "raw-results", "feature-selection", testbed, file_regex)) if len(path_basic_data) == 0: continue assert len(path_basic_data) == 1 path_basic_data = path_basic_data[0] light_data_type = os.path.basename(path_basic_data).split("-")[0] basic_features_selection = DillSerializer( path_basic_data).deserialize() if "single" in light_data_type: len_light_patterns = basic_features_selection.keys() sampling_periods = basic_features_selection[ len_light_patterns[0]].keys() classifiers = basic_features_selection[len_light_patterns[0]][ sampling_periods[0]].keys() features = get_features(light_data_type, basic_features_selection, sampling_periods, classifiers, len_light_patterns) elif "combined" in light_data_type: sampling_periods = basic_features_selection.keys() classifiers = basic_features_selection[ sampling_periods[0]].keys() features = get_features(light_data_type, basic_features_selection, sampling_periods, classifiers) row = 0 importances = pandas.DataFrame(columns=features) if "single" in light_data_type: for len_light_pattern, sampling_period, classifier in itertools.product( len_light_patterns, sampling_periods, classifiers): feature_importance = basic_features_selection[ len_light_pattern][sampling_period][classifier] row, importances = add_data(importances, feature_importance, row) elif "combined" in light_data_type: for sampling_period, classifier in itertools.product( sampling_periods, classifiers): feature_importance = basic_features_selection[ sampling_period][classifier] row, importances = add_data(importances, feature_importance, row) feature_importances.append(importances) df = pandas.concat(feature_importances) importance_median = df.median().sort_values(ascending=False) importance_median = zip(importance_median.index, importance_median.values) filename = "basic-features-importance-" + light_data_type plot_feature_importance(df, filename) print(light_data_type) print(classifiers) for (feature1_name, feature1_importance), ( feature2_name, feature2_importance) in misc.pairwise(importance_median): print(feature1_name, "importance:", round(feature1_importance, 2)) print(feature2_name, "importance:", round(feature2_importance, 2)) print("ratio importance:", round(feature2_importance / feature1_importance, 2)) print("---")
def offline_analysis_ml_model(path_ml_offline_evaluation): evaluation_data = DillSerializer(path_ml_offline_evaluation).deserialize() num_clients, num_reject_clients, len_light_patterns, \ classifiers, sampling_periods = misc.get_all_keys(evaluation_data) analysis_result = nested_dict(2, list) for num_client, num_reject_client, len_light_pattern, classifier, sampling_period in itertools.product( num_clients, num_reject_clients, len_light_patterns, classifiers, sampling_periods): results = evaluation_data[num_client][num_reject_client][ len_light_pattern][classifier][sampling_period] if len(results) > 0: analysis_result[classifier][sampling_period].extend(results) print("Num clients: ", num_clients) print("Num reject clients: ", num_reject_clients) print("Len light patterns: ", len_light_patterns) print("Classifiers: ", classifiers) print("Sampling periods: ", sampling_periods) for classifier in classifiers: results = analysis_result[classifier] sub_results = list() for sampling_period in sampling_periods: accuracy = [entry.accuracy_accept for entry in results[sampling_period]] + \ [entry.accuracy_reject for entry in results[sampling_period]] precision = [entry.precision_accept for entry in results[sampling_period]] + \ [entry.precision_reject for entry in results[sampling_period]] recall = [entry.recall_accept for entry in results[sampling_period]] + \ [entry.recall_reject for entry in results[sampling_period]] f1 = [entry.f1_accept for entry in results[sampling_period]] + \ [entry.f1_reject for entry in results[sampling_period]] entry = [ numpy.mean(accuracy), numpy.mean(precision), numpy.mean(recall), numpy.mean(f1) ] entry = [round(value, 2) for value in entry] sub_results.append(entry) fig, ax = plt.subplots() ax.imshow(sub_results, cmap="Greens", aspect="auto", interpolation="nearest", vmin=0, vmax=1.4) ax.set_ylabel("Sampling period (ms)") ytickpos = numpy.arange(len(sampling_periods)) ax.set_yticks(ytickpos) ax.set_yticklabels([ int(sampling_period * 1e3) for sampling_period in sampling_periods ]) xticks = ["Accuracy", "Precision", "Recall", "F1-score"] xtickpos = range(len(xticks)) ax.set_xticks(xtickpos) ax.set_xticklabels(xticks, rotation=20, ha="right") for i in range(len(sub_results)): for j in range(len(sub_results[0])): ax.text(j, i, sub_results[i][j], ha="center", va="center") ticks = [ start + ((end - start) / 2) for start, end in misc.pairwise(xtickpos) ] ax.set_xticks(ticks, minor=True) ticks = [ start + ((end - start) / 2) for start, end in misc.pairwise(ytickpos) ] ax.set_yticks(ticks, minor=True) ax.grid(which='minor', color="black") filepath = os.path.join(__location__, "results", "machine-learning", "vm", "ml-param-" + classifier.lower() + ".pdf") result_path = os.path.dirname(filepath) if not os.path.exists(result_path): os.makedirs(result_path) fig.savefig(filepath, format="pdf", bbox_inches="tight") #plt.show() plt.close(fig)
def analysis_runtime_tsfresh(testbeds): print("runtime tsfresh") for runtime_type in ["patterns-runtime", "only-runtime"]: for feature_type in ["combined", "single"]: runtimes = dict() plot_data = list() fileregex = "*" + feature_type + "*" + runtime_type + "*" labels = { "bbb": "IoT Board", "server": "Virtual Machine", "vm": "Server" } for testbed in testbeds: filepath = glob.glob( os.path.join(__location__, "raw-results", "feature-selection", testbed, fileregex)) if len(filepath) == 0: # raw data not available continue assert len(filepath) == 1 path_runtime_tsfresh = filepath[0] # Get runtimes filename = os.path.basename(path_runtime_tsfresh) light_data_type = filename.split("-")[0] runtime_tsfresh = DillSerializer( path_runtime_tsfresh).deserialize() if "only" in filename: if "single" in filename: len_light_patterns = runtime_tsfresh.keys() tmp_runtime_tsfresh = pandas.DataFrame( columns=runtime_tsfresh[ len_light_patterns[0]].columns) # Merge runtime only per feature length for len_light_pattern in len_light_patterns: runtime_data = runtime_tsfresh[len_light_pattern] tmp_runtime_tsfresh = tmp_runtime_tsfresh.append( runtime_data, ignore_index=True) runtime_tsfresh = tmp_runtime_tsfresh else: if "combined" in filename: sampling_periods = runtime_tsfresh.keys() tmp_runtime_tsfresh = pandas.DataFrame( columns=runtime_tsfresh[ sampling_periods[0]][0].columns) row = 0 for sampling_period in sampling_periods: runtime_data = runtime_tsfresh[sampling_period] for entry in runtime_data: tmp_runtime_tsfresh.loc[row] = entry.loc[0] row += 1 runtime_tsfresh = tmp_runtime_tsfresh elif "single" in filename: len_light_patterns = runtime_tsfresh.keys() sampling_periods = runtime_tsfresh[ len_light_patterns[0]].keys() tmp_runtime_tsfresh = pandas.DataFrame( columns=runtime_tsfresh[len_light_patterns[0]][ sampling_periods[0]][0].columns) row = 0 for len_light_pattern in len_light_patterns: for sampling_period in sampling_periods: runtime_data = runtime_tsfresh[ len_light_pattern][sampling_period] for entry in runtime_data: assert len(entry) == 1 tmp_runtime_tsfresh.loc[row] = entry.loc[0] row += 1 runtime_tsfresh = tmp_runtime_tsfresh # remove outlier, more than 3 three times std #runtime_tsfresh[numpy.abs(runtime_tsfresh - runtime_tsfresh.mean()) > 3 * runtime_tsfresh.std()] = numpy.nan median = runtime_tsfresh.median() feature_len = median.index.values relative_runtime = median.values / feature_len runtimes[labels[testbed]] = numpy.mean(relative_runtime) plot_data.append((labels[testbed], feature_len, median)) nth_label = 5 fig, ax = plt.subplots() markers = itertools.cycle(misc.markers) datalen = [ numpy.where(numpy.diff(median) < -0.8)[0] for _, _, median in plot_data ] datalen = [array[0] + 1 for array in datalen if len(array) > 0] datalen = min(datalen) if len(datalen) > 0 else min( [len(feature_len) for _, feature_len, _ in plot_data]) #datalen = min([len(feature_len) for _, feature_len, _ in plot_data]) for label, feature_len, median in plot_data: ax.plot(feature_len[:datalen], median[:datalen], label=label, marker=markers.next(), markevery=nth_label) ax.grid() ax.set_ylabel("Runtime (s)") ax.set_xlabel("Number of features") feature_len = feature_len[:datalen] xticks = feature_len[::nth_label] xticks = numpy.concatenate([xticks, [feature_len[-1]]]) ax.set_xticks(xticks) ax.set_ylim(bottom=0) ax.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=3, mode="expand", borderaxespad=0.) fig.set_figwidth(fig.get_figwidth() * 1.6) #plt.show() filepath = os.path.join(__location__, "results", "feature-selection") filename = "tsfresh-features-only-runtime-" if "only" in filename else "tsfresh-features-runtime-" filename = filename + light_data_type + ".pdf" save_path = os.path.join(filepath, filename) directory = os.path.dirname(save_path) if not os.path.exists(directory): os.makedirs(directory) plt.savefig(save_path, format="pdf", bbox_inches="tight") plt.close(fig) runtimes_ms = {key: value * 1e3 for key, value in runtimes.items()} runtimes_ms = sorted(runtimes_ms.items(), key=lambda kv: kv[1]) print("runtime type:", runtime_type, "feature type:", feature_type) for (testbed1_name, testbed1_runtime), ( testbed2_name, testbed2_runtime) in misc.pairwise(runtimes_ms): print(testbed1_name, "relative runtime (ms):", round(testbed1_runtime, 2)) print(testbed2_name, "relative runtime (ms):", round(testbed2_runtime, 2)) print("ratio faster:", round(testbed2_runtime / testbed1_runtime, 2)) print("---")