def dct_main(database): eigen_vect=[] images_path=get_abs_names.get_files(database) for i in images_path: eigen_vect.append(dct1.imagetoDct(i)) print "Printing eigen vectors of all the images" print eigen_vect
def return_pp(img_dir): names=get_abs_names.get_files(img_dir) names.sort() image_vector=[] for i in range(len(names)): image_vector.append(initial_processing.imageToVector(names[i])) #print image_vector #print len(image_vector) sum_img=image_vector[-1] for i in range(len(image_vector)-1): sum_img=sum_img+image_vector[i] # Uncomment to know the sum image #print "printing sum image" #print sum_img hist_img=numpy.histogram(sum_img,bins=34) ## Uncoment to now the histogram #print "Printing histogram" #print hist_img img_for_dimension=Image.open(names[0]) percent_of_pixels=(hist_img[0]/float(img_for_dimension.size[0]*img_for_dimension.size[1]))*100 # Uncomment to know the details of percentage of pixels present per bucket range #print "Printing percentage of pixels" #print percent_of_pixels #print len(percent_of_pixels) #print sum(percent_of_pixels) return percent_of_pixels
def pre_process(pathtoimages): ################################################# Declarations of all lists which are to be initialised #################################### images_abs_names=[] # variable contains all the paths to file total_img_vect=[] # variable to contain total images in vector form mean_img_vect=[] # variable for containing mean of all images sum_img_vect=[] # variable for containing sum of all images mean_for_subtraction=[] # variable contains clones of mean used for subtracting norm_list = [] # variable to hold all the norm values during testing phase split_image_names=[] # variable to hold split image files to group into classes class_names=[] #Variable to hold the class names each_class=[] # variable to hold names of each class entire_class=[] # variable to hold entire class test_data_set=[] #variable for storing test images train_data_set=[] #variable for storing train images entire_train_data_as_list=[] #variable for storing train images as list images_name_modified=[] # variable to hold modified images names ######################################################## End of Declarations ############################################################# ######### get_files method in the module get_abs_names is called to get the absolute path names of all the images in input directory ####### src_img_dir=pathtoimages # Taking the backup of the directory path images=lslR.get_files(src_img_dir) #returns all the absolute image names as a list ################# Uncomment following 2-lines to print all the absolut path names in the order given by get_abs_names #print "printing absolute path file names as given by get_abs_names" #print images images_abs_names=images # Taking backup of absolute path names of the images images.sort() # Sorting the image files so that images of each class are grouped together ################# Uncomment following 2-lines to print all the absolut path names in the order given by get_abs_names #print "printing absolute path file names after sorting" #print images #_________________________________________________________________________________________________________________________________________# # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ For initialising mean_image_vect and sum_image_vect, $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$# # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ its required to know the dimension of each image, $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$# # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ so one test image is read and then all the $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ # # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ required values are found out. $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ # #_________________________________________________________________________________________________________________________________________# shape_image=Image.open(images[0]) # Any imge can be open, here we are opening 1st image shape_image_array=numpy.asarray(shape_image) # Dimension of the image is to be known so converting to numpy array shape=shape_image_array.shape # Getting the dimensions of the image convertedd array ################################### Uncomment following two-lines to know the dimension of the image #print "printing shape or the dimension of the image" #print shape total_dimensions_per_image=shape[0]*shape[1] # Multiplying rows * columns of array to know total dimensions ################################### Uncomment following two-lines to know the total_dimensions of the image #print "printing total dimensions of the image" #print total_dimensions_per_image #######################################initialising all the required values such as mean_img_vect, sum_img_vect for i in range(total_dimensions_per_image): mean_img_vect.append(0) # initialising with zeros sum_img_vect.append(0) # initialising with zeros ######## Calling the function which returns the split file names ###### The function return_split_file names take one argument and returns two arguments #### Input Arguments : images : contains the sorted list of absolute path names of all the images in the input directory ## Return values : return 1: split_image_names : Has the list of split image names # Return Values : return 2: no_of_images : This contains the total number of input images split_image_names,no_of_images=return_split_file_names(images) ####################################### Uncomment following 2 lines to see the split file names #print "printing split file names" #print split_image_names #######################This part is to find the length of the path of each image to extract class name single_image_to_find_length=split_image_names[0] length_split_image_name=len(single_image_to_find_length) ################################ Code to extract the class names of the database for i in range(no_of_images): temp_class_name=split_image_names[i][length_split_image_name-2] #extracting class names class_names.append(temp_class_name) # creating a list of class names ###################### Uncomment following 2 lines to know all the different classes with repetitions #print "printing all class names" #print class_names set_of_class_names=set(class_names) #removing the repetitions using set so it contains only unique classes ###################### Uncomment following 2 lines to know all different classes without repetitions #print "printing unique set of class names" #print set_of_class_names no_of_classes=len(set_of_class_names) #getting the count of no of classes # If the directory structure is different (flat), some change to be done to the path names of the files. # Every database has more then one class, this is obvious, coz if there are more then one class # only then face recogniton on that database makes some sense #Checking if the classes are partitioned properly. flag_for_testing=0 # flag required to be set to 1 if in case if the directory structure is flat if(no_of_classes<=1): # if number of classes is 1 it means that partition has not happend flag_for_testing=1 # setting the flag indicating flat architecture temp_str_for_checking_if_underscore_is_present=images[0] temp_index_if_present=temp_str_for_checking_if_underscore_is_present.find('_') # to check if '_' is present find returns the position of the '_' in the string or else return -1 if not present if(temp_index_if_present>=0): # if present flag_for_changing_file_name=1 # set this flag to 1 which means that seperator is '_' else: # if not present flag_for_changing_file_name=0 # set this flag to 0 which means that seperator is '.' or any other symbol # Modifying the images names so that it that seperator remains os.sep through out for i in range(no_of_images): if(flag_for_changing_file_name==0): temp_image_name_modified=images[i].replace('.',os.sep,1) else: temp_image_name_modified=images[i].replace('_',os.sep) images_name_modified.append(temp_image_name_modified) #Uncomment following 2-lines to see the modified file names #print "printing modified images names" #print images_name_modified ########## To obtain the split image names ######## Calling the function which returns the split file names ###### The function return_split_file names take one argument and returns two arguments #### Input Arguments : images : contains the sorted list of absolute path names of all the images in the input directory ## Return values : return 1: split_image_names : Has the list of split image names # Return values : return 2: no_of_images : This contains the total number of input images split_image_names,no_of_images=return_split_file_names(images_name_modified) ########### Uncomment following 2 lines to know the split file names #print " printing list of split file names : " #print split_image_names #This part is to find the length of the path of each image to extract class name single_image_to_find_length=split_image_names[0] length_split_image_name=len(single_image_to_find_length) #Code to extract the class names of the database class_names=[] # making class_names to empty string which other wise contains some junk values for i in range(no_of_images): temp_class_name=split_image_names[i][length_split_image_name-2] #extracting class names '-2' because class names lies in last second position of list class_names.append(temp_class_name) # creating a list of class names ########## Uncomment following two lines to know all the different classes with repetitions #print "printing all class names" #print class_names set_of_class_names=set(class_names) #removing the repetitions using set so it contains only unique classes # Uncomment to know all the different classes without repetitions #print "printing unique set of class names" #print set_of_class_names no_of_classes=len(set_of_class_names) #getting the count of no of classes no_of_images_per_class=no_of_images/no_of_classes #getting the count of no of images per class # Uncomment to print the know the number of images per class #print "number of images per class = %d " %(no_of_images_per_class) #################arranging the input directory of images into the order of class for i in range(no_of_classes): each_class=[] for j in range(no_of_images_per_class): img_counter=i*no_of_images_per_class+j each_class.append(images[img_counter]) entire_class.append(each_class) #contains all the images arranged according to the class entire_class_backup=entire_class #code to create trainset and testset #one random image selected in one class will be added in testset and all other remaining (no_of_images_per_class) will be added to trainset for i in range(no_of_classes): image_no_for_test=random.random()*no_of_images_per_class image_no_for_test=int(image_no_for_test) test_data_set.append(entire_class[i][image_no_for_test]) temp_train=entire_class[i] temp_train.remove(entire_class[i][image_no_for_test]) train_data_set.append(temp_train) # Uncomment following lines in order to know the details of the train_data_set #print "Printing type of train dataset #print type(train_data_set) #print "printing training data set" #print train_data_set # Uncomment following lines in order to know the details of the test_data_set #print "Printing type of test dataset #print type(test_data_set) #print "printing test data set" #print test_data_set test_data_set_matrix=numpy.matrix(test_data_set) # converting to matrix to perform multiplication train_data_set_matrix=numpy.matrix(train_data_set) # converting to matrix to perform multiplication #############Uncomment following lines to know the shapes and details of train_data_set and test_data_set #print "Printing test data shape #print test_data_set_matrix.shape #print "Printing train data shape #print train_data_set_matrix.shape ################### we need the entire training data set as a single list for r in range(no_of_classes): c=0 for c in range(no_of_images_per_class-1): entire_train_data_as_list.append(train_data_set[r][c]) ######### Calling traindb in train_database which actually does the training part and it returns some values which actually is needed during the testing phase. ####### Input Argument : train_data_set ( set of tranining images ) ##### It returns 3 values #### (1) mean_img : contains the mean of all the images, its a 1-d array/list ### (2) eigen_selected : Usually only the major values of the eigen vector are taken, this contain those major eigen values only ## (3) signature_images_for_train_set : contains the signatures (mapped images / eigen images ) for the entire training dataset mean_img,eigen_selected,signature_images_for_train_set=train_database.traindb(train_data_set) #### to find number of images trained per class ## Thas obviously no_of_images_per_class - 1 because one image will be taken for testing part no_images_trained_per_class=no_of_images_per_class-1 #Uncomment following to print signature of the trained images #print "Printing the signature/co-relation matrix of the trained image #print signature_images_for_train_set ####### Uncomment the following lines when any lengths or the types of the signature variable are to be checked #print "signature type" #print type(signature_images_for_train_set) #print "signature length" #print len(signature_images_for_train_set) # Calling the testdb in test_database.py which takes in quite a number of arguments, lets explore the arguments # arg_1 : signature_images_for_train_set : contains the signatures (mapped images / eigen images ) for the entire training dataset ( which is return by train_database ) # arg_2 : test_data_set : contains the list of test data images which is randomly selected, one from each class # arg_3 : entire_train_data_as_list : contains entire train data set ( removed test_data_set from original input ) # arg_4 : mean_img : contains the mean of all the images, its a 1-d array/list ( which is return by train_database ) # arg_5 : eigen_selected : Usually only the major values of the eigen vector are taken, this contain those major eigen values only ( which is return by train_database ) # arg_6 : no_images_trained_per_class : contains number of images actually trained per class from the original dataset # arg_7 : flag_for_tesing : flag which actually if the given directory structure is flat or hierarchy; It sets the flag if the structure is flat r=test_database.testdb(signature_images_for_train_set,test_data_set,entire_train_data_as_list,mean_img,eigen_selected,no_images_trained_per_class,flag_for_testing) return r
def decide_algo(input_str): #print input_str wrapper_test_image_names = lslR.get_files(input_str) #Uncomment following 2-lines to print all the image names #print "image names" #print wrapper_test_image_names wrapper_test_image_names.sort() # Sorting the image names no_of_images=len(wrapper_test_image_names) # Getting the length of number of images randomly_selected_image=random.random()*no_of_images # getting the random value in between 0 and no_of_images randomly_selected_image=int(round(randomly_selected_image)) # converting to integer coz index should be a integer #Uncomment following 2 - lines to print the index randomly selected image #print "Index of randomly selected image = %d" %(randomly_selected_image) #print "number of images" #print len(wrapper_test_image_names) #wrapper_test_image=Image.open(wrapper_test_image_names[randomly_selected_image]) # test_image_stat=ImageStat.Stat(wrapper_test_image) # To convert to Imagestat object whcih gives the stat properties of the image ######## We will be doing the decision based on the 16-metrices. ###### Only when the test image (image set) satisfies all the 16 metrices it means that the given test database is actually one of the trained dataset #### If test image fails to meet all the 16-metric criteria, then the wrapper depending on how close the new data set is decides which algorithm to be chosen ## Over time it actually appends the values and maintains the updated metrices of the trained dataset of the new data set ####### We actually test only one image of the entire given test data set ( using the old taught which says one rice grain is often enough to say whether the rice is boiled #### or not, similar we regressively test only one image of the dataset ( given by the user ) ) and choose the algorithm. ####### Image.Stat properties flag=0 # Initializing the flag, flag=0 means database not identified, assuming db not identified in the beginning, will be set once db is identified metric = getmetrics.return_metrics(wrapper_test_image_names[randomly_selected_image]) # Calling the return_metrics of getmetrics which returns 16 metrics as list # break # once if database is identified then we can come out of loop """ ##### We need to get the names of the databases which are previously trained, Rewriting this part with more efficient way fp_for_trained_db=open("mapping_dataset_algo","r") # Opening trained_databases in "r" mode to read the list of trained db's fp_for_trained_db.seek(0) # Not necessary, but still on safer hand its given so fp_for_trained_db points to beginning of the file trained_datasets=pickle.load(fp_for_trained_db) # loading the trained data lists from pickle to trained_datasets for i in range(len(trained_datasets)): # Have to be checked on all the previously trained datasets face_names=lslR.get_files(trained_datasets[i][0]) # Getting the absolute path names of the database face_names.sort() # Sorting the absolute path names no_of_images_in_train=len(face_names) if (no_of_images_in_train<randomly_selected_image): continue trained_metric=getmetrics.return_metrics(face_names[randomly_selected_image]) # Calling the return_metrics of getmetrics which returns 16 metrics as list if(metric.__eq__(trained_metric)): # Comparing if all the 16 metrics of the image is matching print "Data base identified" print "Identified database is" print trained_datasets[i][0] # printing the identified database name flag=1 # Setting the flag if database is identified identified_algo_name=trained_datasets[i][1] if(identified_algo_name=="PCA"): PCA_main.main_pca(trained_datasets[i][0]) if(identified_algo_name=="DCT"): print "DCT is to be called" if(identified_algo_name=="LPP"): print "LPP is to be called" break # once if database is identified then we can come out of loop if(flag==0): # means database not identified ##### Need to extract the trained dataset path trained_data_path=trained_datasets[0][0] # Taking any database path to extract trained database path rindex_ossep=trained_data_path.rindex(os.sep) # Getting the path of the trained databases directory trained_data_path=trained_data_path[0:rindex_ossep] # Getting the path of the trained databases directory src_path=input_str # Getting source directory of new database dest_path=trained_data_path+os.sep # Creating destination directory for taking back up of new database dest_path=dest_path+get_db_name(input_str) # Creating destination directory for taking back up of new database print "Data base not identifed" print "Adding database to our trained database sets" shutil.copytree(src_path,dest_path) # creating a copy of the entire database, dynamically updating new database to trained set add_database.add_db(dest_path) # Adding the new database (which is presently copied to dest_path) to the previously trained list. print "Database added" print dest_path index_best_algo_chosen=compare_with_all.choose_best_algo(dest_path,trained_datasets) best_algo_chosen=algorithms[index_best_algo_chosen] print "Algorithm chosen is " +best_algo_chosen +" bacause it has the more efficiency then other algorithms on this database" new_list_to_append=[] new_list_to_append.append(dest_path) new_list_to_append.append(best_algo_chosen) trained_datasets.append(new_list_to_append) fp_to_update_trained_db=open("mapping_dataset_algo","w+") # Opening trained_databases in "r" mode to read the list of trained db's fp_to_update_trained_db.seek(0) # Not necessary, but still on safer hand its given so fp_for_trained_db points to beginning of the file pickle.dump(trained_datasets,fp_to_update_trained_db) # updating the mapping_dataset file fp_to_update_trained_db.close() if(best_algo_chosen=="PCA"): PCA_main.main_pca(dest_path) if(best_algo_chosen=="DCT"): print "DCT is to be called" if(best_algo_chosen=="LPP"): print "LPP is to be called"
def pre_process(pathtoimages): ################################################# Declarations of all lists which are to be initialised #################################### images_abs_names=[] # variable contains all the paths to file total_img_vect=[] # variable to contain total images in vector form mean_img_vect=[] # variable for containing mean of all images sum_img_vect=[] # variable for containing sum of all images mean_for_subtraction=[] # variable contains clones of mean used for subtracting norm_list = [] # variable to hold all the norm values during testing phase split_image_names=[] # variable to hold split image files to group into classes class_names=[] #Variable to hold the class names each_class=[] # variable to hold names of each class entire_class=[] # variable to hold entire class test_data_set=[] #variable for storing test images train_data_set=[] #variable for storing train images entire_train_data_as_list=[] #variable for storing train images as list images_name_modified=[] # variable to hold modified images names ######################################################## End of Declarations ############################################################# ######### get_files method in the module get_abs_names is called to get the absolute path names of all the images in input directory ####### src_img_dir=pathtoimages # Taking the backup of the directory path images=lslR.get_files(src_img_dir) #returns all the absolute image names as a list ################# Uncomment following 2-lines to print all the absolut path names in the order given by get_abs_names #print "printing absolute path file names as given by get_abs_names" #print images images_abs_names=images # Taking backup of absolute path names of the images images.sort() # Sorting the image files so that images of each class are grouped together ################# Uncomment following 2-lines to print all the absolut path names in the order given by get_abs_names #print "printing absolute path file names after sorting" #print images #_________________________________________________________________________________________________________________________________________# # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ For initialising mean_image_vect and sum_image_vect, $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$# # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ its required to know the dimension of each image, $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$# # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ so one test image is read and then all the $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ # # $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ required values are found out. $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ # #_________________________________________________________________________________________________________________________________________# shape_image=Image.open(images[0]) # Any imge can be open, here we are opening 1st image shape_image_array=numpy.asarray(shape_image) # Dimension of the image is to be known so converting to numpy array shape=shape_image_array.shape # Getting the dimensions of the image convertedd array ################################### Uncomment following two-lines to know the dimension of the image #print "printing shape or the dimension of the image" #print shape total_dimensions_per_image=shape[0]*shape[1] # Multiplying rows * columns of array to know total dimensions ################################### Uncomment following two-lines to know the total_dimensions of the image #print "printing total dimensions of the image" #print total_dimensions_per_image #######################################initialising all the required values such as mean_img_vect, sum_img_vect for i in range(total_dimensions_per_image): mean_img_vect.append(0) # initialising with zeros sum_img_vect.append(0) # initialising with zeros ######## Calling the function which returns the split file names ###### The function return_split_file names take one argument and returns two arguments #### Input Arguments : images : contains the sorted list of absolute path names of all the images in the input directory ## Return values : return 1: split_image_names : Has the list of split image names # Return Values : return 2: no_of_images : This contains the total number of input images split_image_names,no_of_images=return_split_file_names(images) ####################################### Uncomment following 2 lines to see the split file names #print "printing split file names" #print split_image_names #######################This part is to find the length of the path of each image to extract class name single_image_to_find_length=split_image_names[0] length_split_image_name=len(single_image_to_find_length) ################################ Code to extract the class names of the database for i in range(no_of_images): temp_class_name=split_image_names[i][length_split_image_name-2] #extracting class names class_names.append(temp_class_name) # creating a list of class names ###################### Uncomment following 2 lines to know all the different classes with repetitions #print "printing all class names" #print class_names set_of_class_names=set(class_names) #removing the repetitions using set so it contains only unique classes ##################### Converting back to list coz set doesnt support indexing ####### unique_class_names=list(set_of_class_names) unique_class_names.sort() ###################### Uncomment following 2 lines to know all different classes without repetitions #print "printing unique set of class names" #print set_of_class_names no_of_classes=len(set_of_class_names) #getting the count of no of classes # If the directory structure is different (flat), some change to be done to the path names of the files. # Every database has more then one class, this is obvious, coz if there are more then one class # only then face recogniton on that database makes some sense #Checking if the classes are partitioned properly. flag_for_testing=0 # flag required to be set to 1 if in case if the directory structure is flat count_of_dots_original_path=0 if(no_of_classes<=1): # if number of classes is 1 it means that partition has not happend flag_for_testing=1 # setting the flag indicating flat architecture temp_str_for_checking_if_underscore_is_present=images[0] temp_index_if_present=temp_str_for_checking_if_underscore_is_present.find('_') # to check if '_' is present find returns the position of the '_' in the string or else return -1 if not present if(temp_index_if_present>=0): # if present flag_for_changing_file_name=1 # set this flag to 1 which means that seperator is '_' else: # if not present flag_for_changing_file_name=0 # set this flag to 0 which means that seperator is '.' or any other symbol # Modifying the images names so that it that seperator remains os.sep through out count_of_dots_original_path=pathtoimages.count('.') for i in range(no_of_images): if(flag_for_changing_file_name==0): temp_image_name_modified=images[i].replace('.',os.sep,count_of_dots_original_path+1) else: temp_image_name_modified=images[i].replace('_',os.sep) images_name_modified.append(temp_image_name_modified) #Uncomment following 2-lines to see the modified file names #print "printing modified images names" #print images_name_modified ########## To obtain the split image names ######## Calling the function which returns the split file names ###### The function return_split_file names take one argument and returns two arguments #### Input Arguments : images : contains the sorted list of absolute path names of all the images in the input directory ## Return values : return 1: split_image_names : Has the list of split image names # Return values : return 2: no_of_images : This contains the total number of input images split_image_names,no_of_images=return_split_file_names(images_name_modified) ########### Uncomment following 2 lines to know the split file names #print " printing list of split file names : " #print split_image_names #This part is to find the length of the path of each image to extract class name single_image_to_find_length=split_image_names[0] length_split_image_name=len(single_image_to_find_length) #Code to extract the class names of the database class_names=[] # making class_names to empty string which other wise contains some junk values for i in range(no_of_images): temp_class_name=split_image_names[i][length_split_image_name-2] #extracting class names '-2' because class names lies in last second position of list class_names.append(temp_class_name) # creating a list of class names ########## Uncomment following two lines to know all the different classes with repetitions #print "printing all class names" #print class_names set_of_class_names=set(class_names) #removing the repetitions using set so it contains only unique classes # Uncomment to know all the different classes without repetitions #print "printing unique set of class names" #print set_of_class_names no_of_classes=len(set_of_class_names) #getting the count of no of classes ####Converting back to list so coz set doesnt support indexing unique_class_names=list(set_of_class_names) unique_class_names.sort() #################arranging the input directory of images into the order of class img_counter=0 num_of_images_each_class=[] for i in range(no_of_classes): each_class=[] num_of_images_each_class.append(class_names.count(unique_class_names[i])) for j in range(class_names.count(unique_class_names[i])): #print "img=%d" %(img_counter) #print images[img_counter] each_class.append(images[img_counter]) img_counter=img_counter+1 entire_class.append(each_class) #contains all the images arranged according to the class entire_class_backup=entire_class #print "imagecounter=%d" %(img_counter) #print "number of images =%d" %(len(images)) #print entire_class #print "printing number of images per class" #print num_of_images_each_class #print "total number of images = %d" %(sum(num_of_images_each_class)) #code to create trainset and testset #one random image selected in one class will be added in testset and all other remaining (no_of_images_per_class) will be added to trainset for i in range(no_of_classes): image_no_for_test=random.random()*num_of_images_each_class[i] image_no_for_test=int(image_no_for_test) test_data_set.append(entire_class[i][image_no_for_test]) temp_train=entire_class[i] temp_train.remove(entire_class[i][image_no_for_test]) train_data_set.append(temp_train) # Uncomment following lines in order to know the details of the train_data_set #print "Printing type of train dataset #print type(train_data_set) #print "printing training data set" #print train_data_set # Uncomment following lines in order to know the details of the test_data_set #print "Printing type of test dataset #print type(test_data_set) #print "printing test data set" #print test_data_set ################### we need the entire training data set as a single list entire_train_data_as_list=[] for r in range(no_of_classes): entire_train_data_as_list.extend(train_data_set[r]) print "Total number of trained images = %d " %(len(entire_train_data_as_list)) return (train_data_set,entire_train_data_as_list,no_of_classes,test_data_set,count_of_dots_original_path,flag_for_testing)
import sys import Image import scipy.linalg import numpy.matlib from numpy.matlib import zeros images_abs_names=[] # variable contains all the paths to file total_img_vect=[] # variable to contain total images in vector form mean_img_vect=[] # variable for containing mean of all images sum_img_vect=[] # variable for containing sum of all images mean_for_subtraction=[] # variable contains clones of mean used for subtracting """ get_files method in the module get_abs_names is called """ src_img_dir=sys.argv[1] images=lslR.get_files(src_img_dir) # This should be generic in such a way to be given at run time using sys.argv, To be changed very soon """ we might have to initialise mean_image_vect and sum_image_vect, so we might required to know the dimension of each image, so one test image is read and then all the required values are found out """ shape_image=Image.open(images[0]) shape_image_array=numpy.asarray(shape_image) shape=shape_image_array.shape total_dimensions_per_image=shape[0]*shape[1] """ initialising all the required values such as mean_img_vect
def train(pathtoimages): images_abs_names=[] # variable contains all the paths to file total_img_vect=[] # variable to contain total images in vector form mean_img_vect=[] # variable for containing mean of all images sum_img_vect=[] # variable for containing sum of all images mean_for_subtraction=[] # variable contains clones of mean used for subtracting norm_list = [] # variable to hold all the norm values during testing phase split_image_names=[] # variable to hold split image files to group into classes class_names=[] #Variable to hold the class names each_class=[] # variable to hold names of each class entire_class=[] # variable to hold entire class test_data_set=[] #variable for storing test images train_data_set=[] #variable for storing train images entire_train_data_as_list=[] #variable for storing train images as list # get_files method in the module get_abs_names is called src_img_dir=pathtoimages images=lslR.get_files(src_img_dir) images_abs_names=images #we might have to initialise mean_image_vect and sum_image_vect, #so we might required to know the dimension of each image, #so one test image is read and then all the required values are found out shape_image=Image.open(images[0]) shape_image_array=numpy.asarray(shape_image) shape=shape_image_array.shape total_dimensions_per_image=shape[0]*shape[1] #initialising all the required values such as mean_img_vect, sum_img_vect for i in range(total_dimensions_per_image): mean_img_vect.append(0) sum_img_vect.append(0) #the code which actually partitions the entire database of images into trainset and testset goes here no_of_images=len(images) for i in range(no_of_images): temp_image=images[i].split(os.sep) split_image_names.append(temp_image) #this part is to find the length of the path of each image to extract class name single_image_to_find_length=split_image_names[0] length_split_image_name=len(single_image_to_find_length) #Code to extract the class names of the database for i in range(no_of_images): temp_class_name=split_image_names[i][length_split_image_name-2] #extracting class names class_names.append(temp_class_name) # creating a list of class names # Uncomment to know all the different classes with repetitions #print "printing all class names" #print class_names set_of_class_names=set(class_names) #removing the repetitions using set so it contains only unique classes # Uncomment to know all the different classes without repetitions #print "printing unique set of class names" #print set_of_class_names no_of_classes=len(set_of_class_names) #getting the count of no of classes no_of_images_per_class=no_of_images/no_of_classes #getting the count of no of images per class # Uncomment to print the know thw number of images per class #print "number of images per class = %d " %(no_of_images_per_class) #arranging the input directory of images into the order of class for i in range(no_of_classes): each_class=[] for j in range(no_of_images_per_class): img_counter=i*no_of_images_per_class+j each_class.append(images[img_counter]) entire_class.append(each_class) #contains all the images arranged according to the class entire_class_backup=entire_class #code to create trainset and testset #one random image selected in one class will be added in testset and all other remaining (no_of_images_per_class) will be added to trainset for i in range(no_of_classes): image_no_for_test=random.random()*no_of_images_per_class image_no_for_test=int(image_no_for_test) test_data_set.append(entire_class[i][image_no_for_test]) temp_train=entire_class[i] temp_train.remove(entire_class[i][image_no_for_test]) train_data_set.append(temp_train) # Uncomment following lines in order to know the details of the train_data_set #print type(train_data_set) #print "printing training data set" #print train_data_set test_data_set_matrix=numpy.matrix(test_data_set) train_data_set_matrix=numpy.matrix(train_data_set) #print test_data_set_matrix.shape #print train_data_set_matrix.shape #print "printing one individual image in training data set" #print train_data_set # we need the entire training data set as a single list for r in range(no_of_classes): c=0 for c in range(no_of_images_per_class-1): entire_train_data_as_list.append(train_data_set[r][c]) # Calling traindb in train_database which actually does the training part and it returns some which actually is needed during the testing phase # It returns 3 values # (1) mean_img : contains the mean of all the images, its a 1-d array/list # (2) eigen_selected : Usually only the major values of the eigen vector are taken, this contain those major eigen values only # (3) signature_images_for_train_set : contains the signatures (mapped images / eigen images ) for the entire training dataset mean_img,eigen_selected,signature_images_for_train_set=train_database.traindb(train_data_set) # to find number of images trained per class no_images_trained_per_class=no_of_images_per_class-1 #print signature_images_for_train_set # Uncomment the following lines when any lengths or the types of the following variables are to be checked # print "signature type" # print type(signature_images_for_train_set) # print "signature length" # print len(signature_images_for_train_set) # Calling the testdb in test_database.py which takes in quite a number of arguments, lets explore the arguments # arg_1 : signature_images_for_train_set : contains the signatures (mapped images / eigen images ) for the entire training dataset ( which is return by train_database ) # arg_2 : test_data_set : contains the list of test data images which is randomly selected, one from each class # arg_3 : entire_train_data_as_list : contains entire train data set ( removed test_data_set from original input ) # arg_4 : mean_img : contains the mean of all the images, its a 1-d array/list ( which is return by train_database ) # arg_5 : eigen_selected : Usually only the major values of the eigen vector are taken, this contain those major eigen values only ( which is return by train_database ) # arg_6 : no_images_trained_per_class : contains number of images actually trained per class from the original dataset test_database.testdb(signature_images_for_train_set,test_data_set,entire_train_data_as_list,mean_img,eigen_selected,no_images_trained_per_class)
import Image import scipy.linalg import numpy.matlib from numpy.matlib import zeros images_abs_names = [] # variable contains all the paths to file total_img_vect = [] # variable to contain total images in vector form mean_img_vect = [] # variable for containing mean of all images sum_img_vect = [] # variable for containing sum of all images mean_for_subtraction = [] # variable contains clones of mean used for subtracting norm_list = [] # variable to hold all the norm values during testing phase """ get_files method in the module get_abs_names is called """ src_img_dir = sys.argv[1] images = lslR.get_files(src_img_dir) images_abs_names = images """ we might have to initialise mean_image_vect and sum_image_vect, so we might required to know the dimension of each image, so one test image is read and then all the required values are found out """ shape_image = Image.open(images[0]) shape_image_array = numpy.asarray(shape_image) shape = shape_image_array.shape total_dimensions_per_image = shape[0] * shape[1] print "printing shape of the image"