Пример #1
0
	def _apply_pretransformation(self, matrix, pretransformation_name):
		transformed_trainset = np.copy(matrix)
		
		
		if pretransformation_name == "none" or pretransformation_name == None:
			#initialize the transformation scores to an array of the size of the number of features and containing all ones. This is equivalent to not having made any transformation
			transformation_scores_by_feature = np.ones(np.shape(matrix)[LCBMFComputer.feature_dimention])
		
		elif pretransformation_name == "idf":
			do_laplace_smoothing = True
			[transformed_trainset, transformation_scores_by_feature] = Numpy.idf_matrix_transformation(matrix, LCBMFComputer.time_dimention, do_laplace_smoothing)
		
		elif pretransformation_name == "ldc":
			[transformed_trainset, transformation_scores_by_feature] = Numpy.ldc_matrix_transformation(matrix, LCBMFComputer.time_dimention)
		
		elif pretransformation_name == "idc":
			do_laplace_smoothing = True
			[transformed_trainset, transformation_scores_by_feature] = Numpy.idc_matrix_transformation(matrix, LCBMFComputer.time_dimention, do_laplace_smoothing)
		
		elif pretransformation_name == "idf3":
			do_laplace_smoothing = True
			[transformed_trainset, transformation_scores_by_feature] = Numpy.idf3_matrix_transformation(matrix, LCBMFComputer.time_dimention, do_laplace_smoothing)
		
		else:
			raise Exception("WRONG TRANSFORMATION EXCEPTION : the transformation "+pretransformation_name+" do not exist")
			
		return [transformed_trainset, transformation_scores_by_feature]
Пример #2
0
def transformations_comparaison_one_user(user_id):
	global file_name
	global rows_labels
	global labels_importance
	global labels_importance_derivative
	global labels_importance_rank
	global transformation_vectors
	
	labels_importance = {}
	labels_importance_derivative = {}
	labels_importance_rank = {}
	transformation_vectors = {}
	rows_labels = None
	file_name = None
	file_name = "transformations_comparaison_"+str(user_id)
	print "loading matrix user "+str(user_id)+"..."
	data_matrix = MDataExtractor.load_matrix(user_id)
	
	rows_labels =  MDataExtractor.load_labels_vector(user_id)
	columns_labels = MDataExtractor.load_time_vector(user_id)
	importance_scores = MDataExtractor.load_importance_scores(user_id)
	
	

	
	add_transformation(data_matrix, "presence_count")
	add_transformation((data_matrix*100/(np.size(data_matrix,time_dimention)*1.0)), "presence_percentage")
		
	idf_matrix = Numpy.idf_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing)
	add_transformation(idf_matrix, "idf_score")
	
	
	'''idf2_matrix = Numpy.idf2_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing)
	add_transformation(idf2_matrix, "idf2_score")'''
	
	'''idf10_matrix = Numpy.idflog10_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing)
	add_transformation(idf10_matrix, "idflog10_score")'''
	
	idf3_matrix = Numpy.idf3_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing)
	add_transformation(idf3_matrix, "idf3_score")
	
	ldc_matrix = Numpy.ldc_matrix_transformation(data_matrix, time_dimention)
	add_transformation(ldc_matrix, "ldc_score")
		
	idc_matrix = Numpy.idc_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing)
	add_transformation(idc_matrix, "idc_score")
	
	
	
	
	compare("presence_count", user_id)
def compute_svd_one_user(user_id):
	file_name = "svd_user_"+str(user_id)
	print "loading matrix user "+str(user_id)+"..."
	data_matrix = MDataExtractor.load_matrix(user_id)
	rows_labels =  MDataExtractor.load_labels_vector(user_id)
	columns_labels = MDataExtractor.load_time_vector(user_id)
	importance_scores = MDataExtractor.load_importance_scores(user_id)
	
	print "user "+str(user_id)+" has "+str(len(rows_labels))+" features (rows) and "+str(len(columns_labels))+" realization (columns)"
	
	#do the idf / or idc transformation before computing the SVD
	print "doing idf transformation for user "+str(user_id)+"..."
	document_transformed_matrix = np.copy(data_matrix)
	[document_transformed_matrix, scores] = Numpy.idf_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing)
	#[document_transformed_matrix, scores] = Numpy.idc_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing)
	#[document_transformed_matrix, scores] = Numpy.idf3_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing)
	#[document_transformed_matrix, scores] = Numpy.ldc_matrix_transformation(data_matrix, time_dimention)
	
	term_transformed_matrix = np.ones(np.shape(data_matrix))
	#term_transformed_matrix = Numpy.ti_matrix_transformation(data_matrix, importance_scores)	
	#term_transformed_matrix = Numpy.nti_matrix_transformation(data_matrix,  importance_scores)
	
	data_matrix = document_transformed_matrix * term_transformed_matrix
	
	#compute the SVD
	svd_comp = SVDComputer(data_matrix, rows_labels, columns_labels)
	print "computing SVD for user "+str(user_id)+"..."
	svd_comp.compute_svd()
	
	print "constructing interpretable output for user "+str(user_id)+"..."
	energy_captured = svd_comp.construct_rows_interpretable_output(disp_k, disp_m)
	r_output = svd_comp.rows_interpretable_output
	
	print "the energy captured with "+str(disp_k)+" concepts is "+str(energy_captured)+" %"
	
	#write the result
	print "writing SVD result for user "+str(user_id)+"..."
	JsonLogsFileWriter.write(r_output, file_name)
	
	
	
	
	
		
	
	
	
Пример #4
0
def compute_svd_one_user(user_id):
    file_name = "svd_user_" + str(user_id)
    print "loading matrix user " + str(user_id) + "..."
    data_matrix = MDataExtractor.load_matrix(user_id)
    rows_labels = MDataExtractor.load_labels_vector(user_id)
    columns_labels = MDataExtractor.load_time_vector(user_id)
    importance_scores = MDataExtractor.load_importance_scores(user_id)

    print "user " + str(user_id) + " has " + str(
        len(rows_labels)) + " features (rows) and " + str(
            len(columns_labels)) + " realization (columns)"

    #do the idf / or idc transformation before computing the SVD
    print "doing idf transformation for user " + str(user_id) + "..."
    document_transformed_matrix = np.copy(data_matrix)
    [document_transformed_matrix,
     scores] = Numpy.idf_matrix_transformation(data_matrix, time_dimention,
                                               do_laplace_smoothing)
    #[document_transformed_matrix, scores] = Numpy.idc_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing)
    #[document_transformed_matrix, scores] = Numpy.idf3_matrix_transformation(data_matrix, time_dimention, do_laplace_smoothing)
    #[document_transformed_matrix, scores] = Numpy.ldc_matrix_transformation(data_matrix, time_dimention)

    term_transformed_matrix = np.ones(np.shape(data_matrix))
    #term_transformed_matrix = Numpy.ti_matrix_transformation(data_matrix, importance_scores)
    #term_transformed_matrix = Numpy.nti_matrix_transformation(data_matrix,  importance_scores)

    data_matrix = document_transformed_matrix * term_transformed_matrix

    #compute the SVD
    svd_comp = SVDComputer(data_matrix, rows_labels, columns_labels)
    print "computing SVD for user " + str(user_id) + "..."
    svd_comp.compute_svd()

    print "constructing interpretable output for user " + str(user_id) + "..."
    energy_captured = svd_comp.construct_rows_interpretable_output(
        disp_k, disp_m)
    r_output = svd_comp.rows_interpretable_output

    print "the energy captured with " + str(disp_k) + " concepts is " + str(
        energy_captured) + " %"

    #write the result
    print "writing SVD result for user " + str(user_id) + "..."
    JsonLogsFileWriter.write(r_output, file_name)