def compare(reference_transformation, user_id): global labels_importance global labels_importance_rank #global labels_importance_derivative index = 0 transformations = transformation_vectors.keys() for label in rows_labels: labels_importance[label] = {} labels_importance_rank[label] = {} for transformation in transformations: labels_importance[label][transformation]=transformation_vectors[transformation][0][index] labels_importance_rank[label][transformation]= transformation_vectors[transformation][1][index] #labels_importance_derivative[label][transformation]= transformation_vectors[transformation][2][index] index +=1 #sort the dictionaries per presence rate. The most frequent feature at the biginning labels_importance = collections.OrderedDict(sorted(labels_importance.items(), key=lambda x: x[1][reference_transformation], reverse = True)) #labels_importance_derivative = collections.OrderedDict(sorted(labels_importance_derivative.items(), key=lambda x: x[1][reference_transformation], reverse = True)) labels_importance_rank = collections.OrderedDict(sorted(labels_importance_rank.items(), key=lambda x: x[1][reference_transformation])) print JsonUtils.dict_as_json_str(labels_importance) print JsonUtils.dict_as_json_str(labels_importance_rank) #print np.shape(data_matrix) #write the dictionaries into files out = LogsFileWriter.open(file_name) LogsFileWriter.write(JsonUtils.dict_as_json_str(labels_importance),out) LogsFileWriter.write(JsonUtils.dict_as_json_str(labels_importance_rank),out) LogsFileWriter.close(out) #plot the records importance vs different transformation scores importances_list = [] importances_legends = [] ranks_list = [] ranks_legends = [] importances_derivatives_list = [] importances_derivatives_legends = [] for transformation in transformations: importance_list = [importance[transformation] for importance in labels_importance.values()] importances_list.append(importance_list) importances_legends.append(transformation) rank_list = [rank[transformation] for rank in labels_importance_rank.values()] ranks_list.append(rank_list) ranks_legends.append(transformation) importance_derivative_list = np.diff(np.asarray(importance_list), 1).tolist() importances_derivatives_list.append(importance_derivative_list) importances_derivatives_legends.append(transformation) importances_derivatives_list.append([0]*len(importances_derivatives_list[0])) importances_derivatives_legends.append("y=0") PlotlibDrawer.plot_1(labels_importance.keys(), [percentage["presence_percentage"] for percentage in labels_importance.values()], "features rank", "% records", "presence rate of the features in the records", 10) PlotlibDrawer.plot_2(labels_importance.keys(), importances_list, importances_legends, "features rank", "features scores", "comparison of different transformation scores "+str(user_id), 11) PlotlibDrawer.plot_2(labels_importance_rank.keys(), ranks_list, ranks_legends, "features initial rank", "features rank after transformation", "comparison of different transformation ranks "+str(user_id), 11) PlotlibDrawer.plot_2(labels_importance.keys(), importances_derivatives_list, importances_derivatives_legends, "features initial rank", "features scores derivative", "comparison of different transformation scores derivative "+str(user_id), 11)