Пример #1
0
def compare(reference_transformation, user_id):
	global labels_importance
	global labels_importance_rank
	#global labels_importance_derivative
	index = 0
	transformations = transformation_vectors.keys()
	for label in rows_labels:
		labels_importance[label] = {}
		labels_importance_rank[label] = {}
		for transformation in transformations:
			labels_importance[label][transformation]=transformation_vectors[transformation][0][index]
			labels_importance_rank[label][transformation]= transformation_vectors[transformation][1][index]
			#labels_importance_derivative[label][transformation]= transformation_vectors[transformation][2][index]
		
		index +=1
		
		
	#sort the dictionaries per presence rate. The most frequent feature at the biginning
	labels_importance = collections.OrderedDict(sorted(labels_importance.items(), key=lambda x: x[1][reference_transformation], reverse = True))
	#labels_importance_derivative = collections.OrderedDict(sorted(labels_importance_derivative.items(), key=lambda x: x[1][reference_transformation], reverse = True))
	labels_importance_rank = collections.OrderedDict(sorted(labels_importance_rank.items(), key=lambda x: x[1][reference_transformation]))
	
	
	print JsonUtils.dict_as_json_str(labels_importance)
	
	print JsonUtils.dict_as_json_str(labels_importance_rank)
	#print np.shape(data_matrix)
	
	#write the dictionaries into files
	out = LogsFileWriter.open(file_name)
	LogsFileWriter.write(JsonUtils.dict_as_json_str(labels_importance),out)
	LogsFileWriter.write(JsonUtils.dict_as_json_str(labels_importance_rank),out)
	LogsFileWriter.close(out)
	
	
	#plot the records importance vs different transformation scores
	importances_list = []
	importances_legends = []
	ranks_list = []
	ranks_legends = []
	importances_derivatives_list = []
	importances_derivatives_legends = []
	for transformation in transformations:
		importance_list = [importance[transformation] for importance in labels_importance.values()]
		importances_list.append(importance_list)
		importances_legends.append(transformation)
		
		rank_list = [rank[transformation] for rank in labels_importance_rank.values()]
		ranks_list.append(rank_list)
		ranks_legends.append(transformation)
		
		importance_derivative_list = np.diff(np.asarray(importance_list), 1).tolist()
		importances_derivatives_list.append(importance_derivative_list)
		importances_derivatives_legends.append(transformation)
		
		
	importances_derivatives_list.append([0]*len(importances_derivatives_list[0]))
	importances_derivatives_legends.append("y=0")
	PlotlibDrawer.plot_1(labels_importance.keys(), [percentage["presence_percentage"] for percentage in labels_importance.values()], "features rank", "% records", "presence rate of the features in the records", 10)
	PlotlibDrawer.plot_2(labels_importance.keys(), importances_list, importances_legends, "features rank", "features scores", "comparison of different transformation scores "+str(user_id), 11)
	PlotlibDrawer.plot_2(labels_importance_rank.keys(), ranks_list, ranks_legends, "features initial rank", "features rank after transformation", "comparison of different transformation ranks "+str(user_id), 11)
	PlotlibDrawer.plot_2(labels_importance.keys(), importances_derivatives_list, importances_derivatives_legends, "features initial rank", "features scores derivative", "comparison of different transformation scores derivative "+str(user_id), 11)