Пример #1
0
	def _apply_pretransformation(self, matrix, pretransformation_name):
		transformed_trainset = np.copy(matrix)
		
		
		if pretransformation_name == "none" or pretransformation_name == None:
			#initialize the transformation scores to an array of the size of the number of features and containing all ones. This is equivalent to not having made any transformation
			transformation_scores_by_feature = np.ones(np.shape(matrix)[LCBMFComputer.feature_dimention])
		
		elif pretransformation_name == "idf":
			do_laplace_smoothing = True
			[transformed_trainset, transformation_scores_by_feature] = Numpy.idf_matrix_transformation(matrix, LCBMFComputer.time_dimention, do_laplace_smoothing)
		
		elif pretransformation_name == "ldc":
			[transformed_trainset, transformation_scores_by_feature] = Numpy.ldc_matrix_transformation(matrix, LCBMFComputer.time_dimention)
		
		elif pretransformation_name == "idc":
			do_laplace_smoothing = True
			[transformed_trainset, transformation_scores_by_feature] = Numpy.idc_matrix_transformation(matrix, LCBMFComputer.time_dimention, do_laplace_smoothing)
		
		elif pretransformation_name == "idf3":
			do_laplace_smoothing = True
			[transformed_trainset, transformation_scores_by_feature] = Numpy.idf3_matrix_transformation(matrix, LCBMFComputer.time_dimention, do_laplace_smoothing)
		
		else:
			raise Exception("WRONG TRANSFORMATION EXCEPTION : the transformation "+pretransformation_name+" do not exist")
			
		return [transformed_trainset, transformation_scores_by_feature]
Пример #2
0
def transformations_comparaison_one_user(user_id):
	global file_name
	global rows_labels
	global labels_importance
	global labels_importance_derivative
	global labels_importance_rank
	global transformation_vectors
	
	labels_importance = {}
	labels_importance_derivative = {}
	labels_importance_rank = {}
	transformation_vectors = {}
	rows_labels = None
	file_name = None
	file_name = "transformations_comparaison_"+str(user_id)
	print "loading matrix user "+str(user_id)+"..."
	data_matrix = MDataExtractor.load_matrix(user_id)
	
	rows_labels =  MDataExtractor.load_labels_vector(user_id)
	columns_labels = MDataExtractor.load_time_vector(user_id)
	importance_scores = MDataExtractor.load_importance_scores(user_id)
	
	

	
	add_transformation(data_matrix, "presence_count")
	add_transformation((data_matrix*100/(np.size(data_matrix,time_dimention)*1.0)), "presence_percentage")
		
	idf_matrix = Numpy.idf_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing)
	add_transformation(idf_matrix, "idf_score")
	
	
	'''idf2_matrix = Numpy.idf2_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing)
	add_transformation(idf2_matrix, "idf2_score")'''
	
	'''idf10_matrix = Numpy.idflog10_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing)
	add_transformation(idf10_matrix, "idflog10_score")'''
	
	idf3_matrix = Numpy.idf3_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing)
	add_transformation(idf3_matrix, "idf3_score")
	
	ldc_matrix = Numpy.ldc_matrix_transformation(data_matrix, time_dimention)
	add_transformation(ldc_matrix, "ldc_score")
		
	idc_matrix = Numpy.idc_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing)
	add_transformation(idc_matrix, "idc_score")
	
	
	
	
	compare("presence_count", user_id)