示例#1
0
def main_pca(images_path):

#Calling the function pre_process of frame_work, this returns some values in the following order, using those return values the training part and testing part is called by passing the required #arguments

#return order : train_data_set,entire_train_data_as_list,no_of_classes,no_of_images_per_class,test_data_set,flag_for_testing
#	
# ret1: train_data_set = list of all training images as an 2-d array
# ret2: entire_train_data_as_list = list of all training images as a single list
# ret3:	no_of_classes = total number of classes in the given input database
# ret4:	test_data_set=list of training images as a 1-array ( In case user wants, can be typecasted to list in his script using this return value )
# ret5: count_of_dots_original_path : contains number of dots the original path has, this +1 gives number of dots to be replaced to get modified names
# ret6: flag_for_testing=This is the flag which says about the directory directory structure. 
#	                 flag_for_testing = 1 , means the directory structure is flat, means modifications of path names is necessary to extract class names
# 			 flag_for_testing = 0 , means the directory structure is hierarchical which means modifcations is not necessary for extracting class names

	
	train_data_set,entire_train_data_as_list,no_of_classes,test_data_set,count_of_dots_original_path,flag_for_testing=frame_work_v2.pre_process(images_path)	

	######### Calling traindb in train_database which actually does the training part and it returns some values which actually is needed during the testing phase.
	####### Input Argument : train_data_set ( set of tranining images )
	##### It returns 3 values
	#### (1) mean_img : contains the mean of all the images, its a 1-d array/list
	### (2) eigen_selected : Usually only the major values of the eigen vector are taken, this contain those major eigen values only
	## (3) signature_images_for_train_set : contains the signatures (mapped images / eigen images ) for the entire training dataset
	
	mean_img,eigen_selected,signature_images_for_train_set=train_database.traindb(train_data_set)

	#### to find number of images trained per class
	## Thas obviously no_of_images_per_class - 1 because one image will be taken for testing part

	#no_images_trained_per_class=no_of_images_per_class-1

	#Uncomment following to print signature of the trained images

	#print "Printing the signature/co-relation matrix of the trained image 
	#print signature_images_for_train_set

	####### Uncomment the following lines when any lengths or the types of the signature variable are to be checked
 
	#print "signature type"
	#print type(signature_images_for_train_set)
	#print "signature length"
	#print len(signature_images_for_train_set)

# Calling the testdb in test_database.py which takes in quite a number of arguments, lets explore the arguments

# arg_1 : signature_images_for_train_set :  contains the signatures (mapped images / eigen images ) for the entire training dataset ( which is return by train_database )
# arg_2 : test_data_set : contains the list of test data images which is randomly selected, one from each class
# arg_3 : entire_train_data_as_list : contains entire train data set ( removed test_data_set from original input ) 
# arg_4 : mean_img : contains the mean of all the images, its a 1-d array/list ( which is return by train_database )
# arg_5 : eigen_selected : Usually only the major values of the eigen vector are taken, this contain those major eigen values only ( which is return by train_database )
# arg_6 : count_of_dots_original_path : contains number of dots the original path has, this +1 gives number of dots to be replaced to get modified names
# arg_7 : flag_for_tesing : flag which actually says if the given directory structure is flat or hierarchy; It sets the flag if the structure is flat


	r=test_database.testdb(signature_images_for_train_set,test_data_set,entire_train_data_as_list,mean_img,eigen_selected,count_of_dots_original_path,flag_for_testing)
	
	print "efficiency is "	
	print r
	return r
示例#2
0
def pre_process(pathtoimages):
	
	################################################# Declarations of all lists which are to be initialised ####################################
	
	images_abs_names=[] # variable contains all the paths to file
	total_img_vect=[]   # variable to contain total images in vector form
	mean_img_vect=[]   # variable  for containing mean of all images
	sum_img_vect=[]   # variable  for containing sum of all images
	mean_for_subtraction=[] # variable contains clones of mean used for subtracting
	norm_list = [] # variable to hold all the norm values during testing phase
	split_image_names=[]  # variable to hold split image files to group into classes
	class_names=[] #Variable to hold  the class names
	each_class=[] # variable to hold names of each class
	entire_class=[] # variable to hold entire class
	test_data_set=[] #variable for storing test images
	train_data_set=[] #variable for storing train images
	entire_train_data_as_list=[] #variable for storing train images as list
	images_name_modified=[] # variable to hold modified images names

	######################################################## End of Declarations  #############################################################

	######### get_files method in the module get_abs_names is called to get the absolute path names of all the images in input directory #######

	src_img_dir=pathtoimages  # Taking the backup of the directory path
	images=lslR.get_files(src_img_dir) #returns all the absolute image names as a list   

	################# Uncomment following 2-lines to print all the absolut path names in the order given by get_abs_names
	
	#print "printing absolute path file names as given by get_abs_names"
	#print images


	images_abs_names=images # Taking backup of absolute path names of the images 
	images.sort() # Sorting the image files so that images of each class are grouped together

	################# Uncomment following 2-lines to print all the absolut path names in the order given by get_abs_names

	#print "printing absolute path file names after sorting"
	#print images
	
	#_________________________________________________________________________________________________________________________________________#	
	 # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$   For initialising mean_image_vect and sum_image_vect, $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$#
	 # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$     its required to know the dimension of each image,  $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$#
	 # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$     so one test image is read and then all the      $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ #
         # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$      required values are found out.           $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ #
	#_________________________________________________________________________________________________________________________________________#

	shape_image=Image.open(images[0]) # Any imge can be open, here we are opening 1st image
	shape_image_array=numpy.asarray(shape_image) # Dimension of the image is to be known so converting to numpy array
	shape=shape_image_array.shape # Getting the dimensions of the image convertedd array 

	################################### Uncomment following two-lines to know the dimension of the image 

	#print "printing shape or the dimension of the image"
	#print shape 
	
	total_dimensions_per_image=shape[0]*shape[1] # Multiplying rows * columns of array to know total dimensions
	
	################################### Uncomment following two-lines to know the total_dimensions of the image 

	#print "printing total dimensions of the image"
	#print total_dimensions_per_image 
	

	#######################################initialising all the required values such as mean_img_vect, sum_img_vect

	for i in range(total_dimensions_per_image):
		mean_img_vect.append(0) # initialising with zeros 
		sum_img_vect.append(0)  # initialising with zeros 

	######## Calling the function which returns the split file names 
	###### The function return_split_file names take one argument and returns two arguments
	#### Input Arguments : images : contains the sorted list of absolute path names of all the images in the input directory
	## Return values : return 1: split_image_names : Has the list of split image names
	# Return Values : return 2: no_of_images : This contains the total number of input images

	split_image_names,no_of_images=return_split_file_names(images) 

	####################################### Uncomment following 2 lines to see the split file names 

	#print "printing split file names"
	#print split_image_names

	#######################This part is to find the length of the path of each image to extract class name 
	
	single_image_to_find_length=split_image_names[0]
	length_split_image_name=len(single_image_to_find_length)

	################################ Code to extract the class names of the database

	for i in range(no_of_images):
		temp_class_name=split_image_names[i][length_split_image_name-2]  #extracting class names
		class_names.append(temp_class_name)  # creating a list of class names 

	###################### Uncomment following 2 lines to know all the different classes with repetitions
	
	#print "printing all class names"
	#print class_names

	set_of_class_names=set(class_names)  #removing the repetitions using set so it contains only unique classes

	###################### Uncomment following 2 lines to know all different classes without repetitions

	#print "printing unique set of class names"
	#print set_of_class_names

	no_of_classes=len(set_of_class_names)  #getting the count of no of classes

	# If the directory structure is different (flat), some change to be done to the path names of the files. 
	# Every database has more then one class, this is obvious, coz if there are more then one class 
	# only then face recogniton on that database makes some sense

	#Checking if the classes are partitioned properly.
	
	flag_for_testing=0 # flag required to be set to 1 if in case if the directory structure is flat
	
	if(no_of_classes<=1): # if number of classes is 1 it means that partition has not happend 
		flag_for_testing=1  # setting  the flag indicating flat architecture
		temp_str_for_checking_if_underscore_is_present=images[0] 
		temp_index_if_present=temp_str_for_checking_if_underscore_is_present.find('_') # to check if '_' is present find returns the position of the '_' in the string or else return -1 if not present

		if(temp_index_if_present>=0): # if present 
			flag_for_changing_file_name=1 # set this flag to 1 which means that seperator is '_'
		else: # if not present 
			flag_for_changing_file_name=0 # set this flag to 0 which means that seperator is '.' or any other symbol

		# Modifying the images names so that it that seperator remains os.sep through out

		for i in range(no_of_images):
			if(flag_for_changing_file_name==0):
				temp_image_name_modified=images[i].replace('.',os.sep,1)
			else:
				temp_image_name_modified=images[i].replace('_',os.sep)
			images_name_modified.append(temp_image_name_modified)

		#Uncomment following 2-lines to see the modified file names
	
		#print "printing modified images names"
		#print images_name_modified


		########## To obtain the split image names
		######## Calling the function which returns the split file names 
		###### The function return_split_file names take one argument and returns two arguments
		#### Input Arguments : images : contains the sorted list of absolute path names of all the images in the input directory
		## Return values  : return 1: split_image_names : Has the list of split image names
		#  Return values  : return 2: no_of_images : This contains the total number of input images


		split_image_names,no_of_images=return_split_file_names(images_name_modified)

		########### Uncomment following 2 lines to know the split file names

		#print " printing list of split file names : "	
		#print split_image_names


		#This part is to find the length of the path of each image to extract class name
	
		single_image_to_find_length=split_image_names[0]
		length_split_image_name=len(single_image_to_find_length)

		#Code to extract the class names of the database

		class_names=[]  # making class_names to empty string which other wise contains some junk values

		for i in range(no_of_images):
			temp_class_name=split_image_names[i][length_split_image_name-2]  #extracting class names '-2' because class names lies in last second position of list
			class_names.append(temp_class_name)  # creating a list of class names 

		########## Uncomment following two lines to know all the different classes with repetitions
	
		#print "printing all class names"
		#print class_names

		set_of_class_names=set(class_names)  #removing the repetitions using set so it contains only unique classes

		# Uncomment to know all the different classes without repetitions

		#print "printing unique set of class names"
		#print set_of_class_names

		no_of_classes=len(set_of_class_names)  #getting the count of no of classes

	
	no_of_images_per_class=no_of_images/no_of_classes  #getting the count of no of images per class

	# Uncomment to print the know the number of images per class

	#print "number of images per class = %d " %(no_of_images_per_class)
	
	#################arranging the input directory of images into the order of class

	for  i in range(no_of_classes):
		each_class=[]
		for j in range(no_of_images_per_class):
			img_counter=i*no_of_images_per_class+j
			each_class.append(images[img_counter])
		entire_class.append(each_class)  #contains all the images arranged according to the class
	
	entire_class_backup=entire_class

	#code to  create trainset and testset 
	#one random image selected in one class will be added in testset and all other remaining (no_of_images_per_class) will be added to trainset

	for i in range(no_of_classes):
		image_no_for_test=random.random()*no_of_images_per_class	
		image_no_for_test=int(image_no_for_test)
		test_data_set.append(entire_class[i][image_no_for_test])
		temp_train=entire_class[i]
		temp_train.remove(entire_class[i][image_no_for_test])
		train_data_set.append(temp_train)

	# Uncomment following lines in order to know the details of the train_data_set
	
	#print "Printing type of train dataset 	
	#print type(train_data_set)
	#print "printing training data set"
	#print train_data_set

	# Uncomment following lines in order to know the details of the test_data_set
	#print "Printing type of test dataset 	
	#print type(test_data_set)
	#print "printing test data set"
	#print test_data_set
	
			
	test_data_set_matrix=numpy.matrix(test_data_set) # converting to matrix to perform multiplication 
	train_data_set_matrix=numpy.matrix(train_data_set) # converting to matrix to perform multiplication 
	
	#############Uncomment following lines to know the shapes and details of train_data_set and test_data_set

	#print "Printing test data shape 	
	#print test_data_set_matrix.shape
	#print "Printing train data shape 
	#print train_data_set_matrix.shape
	
	################### we need the entire training data set as a single list

	for r in  range(no_of_classes):
		c=0
		for c in range(no_of_images_per_class-1):
			entire_train_data_as_list.append(train_data_set[r][c])

	######### Calling traindb in train_database which actually does the training part and it returns some values which actually is needed during the testing phase.
	####### Input Argument : train_data_set ( set of tranining images )
	##### It returns 3 values
	#### (1) mean_img : contains the mean of all the images, its a 1-d array/list
	### (2) eigen_selected : Usually only the major values of the eigen vector are taken, this contain those major eigen values only
	## (3) signature_images_for_train_set : contains the signatures (mapped images / eigen images ) for the entire training dataset
	
	mean_img,eigen_selected,signature_images_for_train_set=train_database.traindb(train_data_set)

	#### to find number of images trained per class
	## Thas obviously no_of_images_per_class - 1 because one image will be taken for testing part

	no_images_trained_per_class=no_of_images_per_class-1

	#Uncomment following to print signature of the trained images

	#print "Printing the signature/co-relation matrix of the trained image 
	#print signature_images_for_train_set

	####### Uncomment the following lines when any lengths or the types of the signature variable are to be checked
 
	#print "signature type"
	#print type(signature_images_for_train_set)
	#print "signature length"
	#print len(signature_images_for_train_set)

# Calling the testdb in test_database.py which takes in quite a number of arguments, lets explore the arguments

# arg_1 : signature_images_for_train_set :  contains the signatures (mapped images / eigen images ) for the entire training dataset ( which is return by train_database )
# arg_2 : test_data_set : contains the list of test data images which is randomly selected, one from each class
# arg_3 : entire_train_data_as_list : contains entire train data set ( removed test_data_set from original input ) 
# arg_4 : mean_img : contains the mean of all the images, its a 1-d array/list ( which is return by train_database )
# arg_5 : eigen_selected : Usually only the major values of the eigen vector are taken, this contain those major eigen values only ( which is return by train_database )
# arg_6 : no_images_trained_per_class : contains  number of images actually trained per class from the original dataset 
# arg_7 : flag_for_tesing : flag which actually if the given directory structure is flat or hierarchy; It sets the flag if the structure is flat


	r=test_database.testdb(signature_images_for_train_set,test_data_set,entire_train_data_as_list,mean_img,eigen_selected,no_images_trained_per_class,flag_for_testing)
	
	return r
示例#3
0
def train(pathtoimages):
	images_abs_names=[] # variable contains all the paths to file
	total_img_vect=[]   # variable to contain total images in vector form
	mean_img_vect=[]   # variable  for containing mean of all images
	sum_img_vect=[]   # variable  for containing sum of all images
	mean_for_subtraction=[] # variable contains clones of mean used for subtracting
	norm_list = [] # variable to hold all the norm values during testing phase
	split_image_names=[]  # variable to hold split image files to group into classes
	class_names=[] #Variable to hold  the class names
	each_class=[] # variable to hold names of each class
	entire_class=[] # variable to hold entire class
	test_data_set=[] #variable for storing test images
	train_data_set=[] #variable for storing train images
	entire_train_data_as_list=[] #variable for storing train images as list

# get_files method in the module get_abs_names is called 
	src_img_dir=pathtoimages
	images=lslR.get_files(src_img_dir)    
	images_abs_names=images

#we might have to initialise mean_image_vect and sum_image_vect,
#so we might required to know the dimension of each image,
#so one test image is read and then all the required values are found out

	shape_image=Image.open(images[0])
	shape_image_array=numpy.asarray(shape_image)
	shape=shape_image_array.shape
	total_dimensions_per_image=shape[0]*shape[1]


#initialising all the required values such as mean_img_vect, sum_img_vect

	for i in range(total_dimensions_per_image):
		mean_img_vect.append(0)
		sum_img_vect.append(0)

#the code which actually partitions the entire database of images into trainset and testset goes here
	no_of_images=len(images)
	for i in range(no_of_images):
		temp_image=images[i].split(os.sep)
		split_image_names.append(temp_image)

#this part is to find the length of  the path of each image to extract class name
	
	single_image_to_find_length=split_image_names[0]
	length_split_image_name=len(single_image_to_find_length)

#Code to extract the class names of the database

	for i in range(no_of_images):
		temp_class_name=split_image_names[i][length_split_image_name-2]  #extracting class names
		class_names.append(temp_class_name)  # creating a list of class names 

# Uncomment to know all the different classes with repetitions
	
	#print "printing all class names"
	#print class_names

	set_of_class_names=set(class_names)  #removing the repetitions using set so it contains only unique classes

# Uncomment to know all the different classes without repetitions

	#print "printing unique set of class names"
	#print set_of_class_names

	no_of_classes=len(set_of_class_names)  #getting the count of no of classes
	no_of_images_per_class=no_of_images/no_of_classes  #getting the count of no of images per class

# Uncomment to print the know thw number of images per class

	#print "number of images per class = %d " %(no_of_images_per_class)
	
#arranging the input directory of images into the order of class

	for  i in range(no_of_classes):
		each_class=[]
		for j in range(no_of_images_per_class):
			img_counter=i*no_of_images_per_class+j
			each_class.append(images[img_counter])
		entire_class.append(each_class)  #contains all the images arranged according to the class
	
	entire_class_backup=entire_class

#code to  create trainset and testset 
#one random image selected in one class will be added in testset and all other remaining (no_of_images_per_class) will be added to trainset

	for i in range(no_of_classes):
		image_no_for_test=random.random()*no_of_images_per_class	
		image_no_for_test=int(image_no_for_test)
		test_data_set.append(entire_class[i][image_no_for_test])
		temp_train=entire_class[i]
		temp_train.remove(entire_class[i][image_no_for_test])
		train_data_set.append(temp_train)

# Uncomment following lines in order to know the details of the train_data_set
	
	#print type(train_data_set)
	#print "printing training data set"
	#print train_data_set
			
	test_data_set_matrix=numpy.matrix(test_data_set)
	train_data_set_matrix=numpy.matrix(train_data_set)
	
	#print test_data_set_matrix.shape
	#print train_data_set_matrix.shape
	
	#print "printing one individual image in training data set"
	#print train_data_set

# we need the entire training data set as a single list
	for r in  range(no_of_classes):
		c=0
		for c in range(no_of_images_per_class-1):
			entire_train_data_as_list.append(train_data_set[r][c])

# Calling traindb in train_database which actually does the training part and it returns some which actually is needed during the testing phase
# It returns 3 values
# (1) mean_img : contains the mean of all the images, its a 1-d array/list
# (2) eigen_selected : Usually only the major values of the eigen vector are taken, this contain those major eigen values only
# (3) signature_images_for_train_set : contains the signatures (mapped images / eigen images ) for the entire training dataset
	
	mean_img,eigen_selected,signature_images_for_train_set=train_database.traindb(train_data_set)

# to find number of images trained per class

	no_images_trained_per_class=no_of_images_per_class-1

#print signature_images_for_train_set

# Uncomment the following lines when any lengths or the types of the following variables are to be checked
 
#	print "signature type"
#	print type(signature_images_for_train_set)
#	print "signature length"
#	print len(signature_images_for_train_set)

# Calling the testdb in test_database.py which takes in quite a number of arguments, lets explore the arguments

# arg_1 : signature_images_for_train_set :  contains the signatures (mapped images / eigen images ) for the entire training dataset ( which is return by train_database )
# arg_2 : test_data_set : contains the list of test data images which is randomly selected, one from each class
# arg_3 : entire_train_data_as_list : contains entire train data set ( removed test_data_set from original input ) 
# arg_4 : mean_img : contains the mean of all the images, its a 1-d array/list ( which is return by train_database )
# arg_5 : eigen_selected : Usually only the major values of the eigen vector are taken, this contain those major eigen values only ( which is return by train_database )
# arg_6 : no_images_trained_per_class : contains  number of images actually trained per class from the original dataset 

	

	test_database.testdb(signature_images_for_train_set,test_data_set,entire_train_data_as_list,mean_img,eigen_selected,no_images_trained_per_class)