def test_init_method_different_parameters(self): test_dir = File("../../character_examples").getPath() nr_of_training_examples = 90 nr_of_test_examples = 10 for size_classification_factor in drange(0.7, 6.0, 0.3): print str(size_classification_factor) + ' &', for nr_of_segs in range(4, 13): #print(nr_of_segs) test_scores = [] for test_nr in range(10): #print(test_nr) extracor = SimpleImageFeatureExtractor( nr_of_divisions=nr_of_segs, size_classification_factor=size_classification_factor) training_examples, test_examples = extracor.extract_training_and_test_examples( test_dir, nr_of_training_examples, nr_of_test_examples) classifier = CharacterClassifier( training_examples, nr_of_hmms_to_try=1, fraction_of_examples_for_test=0, train_with_examples=False, initialisation_method=SpecializedHMM.InitMethod. count_based) test_scores.append(classifier.test(test_examples)) score = sum(test_scores) / len(test_scores) print ' $' + str(score) + '$ ', if nr_of_segs == 12: print '\\\\' else: print '&',
def create_character_classification_count_matrix(): ''' This function do the following: 1. Creates a character classifier with 90 training examples 2. Runs a test with 10 test examples for every character. The results are put in a matrix M so element M[i][j] contains the number of test examples for the character with index i that are classified to be the character with index j. ''' extracor = SimpleImageFeatureExtractor(nr_of_divisions=11, size_classification_factor=3.4) training_examples, test_examples = extracor.extract_training_and_test_examples(File(File(File(".."),".."),"character_examples").getCanonicalPath(), nr_of_training_examples=90, nr_of_test_examples=10) classifier = CharacterClassifier(training_examples, nr_of_hmms_to_try=1, fraction_of_examples_for_test=0, train_with_examples=False, initialisation_method=SpecializedHMM.InitMethod.count_based) alphabet = get_example_alphabet() classification_count_matrix = zeros(len(alphabet),len(alphabet)) for label, examples in test_examples: character = label.lower() character_index = alphabet.index(character) for example in examples: classified_character = classifier.classify_character_string(example).lower() classified_character_index = alphabet.index(classified_character) count = classification_count_matrix[character_index][classified_character_index] classification_count_matrix[character_index][classified_character_index] = count + 1 return classification_count_matrix
def test_init_method(self, nr_of_segments=7, size_classification_factor=1.3, only_count_based_init=False): '''Test with different number of training examples and compare random init with count based init''' test_dir = File("../../character_examples").getPath() nr_of_training_examples = 90 nr_of_test_examples = 10 extracor = SimpleImageFeatureExtractor( nr_of_divisions=nr_of_segments, size_classification_factor=size_classification_factor) training_examples, test_examples = extracor.extract_training_and_test_examples( test_dir, nr_of_training_examples, nr_of_test_examples) def get_examples(nr_of_examples): if (nr_of_examples != 90): raise "Illegal amount of examples" else: return training_examples def get_character_classifier_with_init_method(traing_examples, init_method): return CharacterClassifier(traing_examples, nr_of_hmms_to_try=1, fraction_of_examples_for_test=0, train_with_examples=False, initialisation_method=init_method) self.test_init_method_with_classifier( get_examples, get_character_classifier_with_init_method, test_examples, [90], only_count_based_init=only_count_based_init)
def test_init_method_different_parameters(self): test_dir = File("../../character_examples").getPath() nr_of_training_examples = 90 nr_of_test_examples = 10 for size_classification_factor in drange(0.7, 6.0, 0.3): print str(size_classification_factor) + ' &', for nr_of_segs in range(4,13): #print(nr_of_segs) test_scores = [] for test_nr in range(10): #print(test_nr) extracor = SimpleImageFeatureExtractor(nr_of_divisions=nr_of_segs, size_classification_factor=size_classification_factor) training_examples, test_examples = extracor.extract_training_and_test_examples(test_dir, nr_of_training_examples, nr_of_test_examples) classifier = CharacterClassifier(training_examples, nr_of_hmms_to_try=1, fraction_of_examples_for_test=0, train_with_examples=False, initialisation_method=SpecializedHMM.InitMethod.count_based) test_scores.append(classifier.test(test_examples)) score = sum(test_scores) / len(test_scores) print ' $' + str(score) +'$ ', if nr_of_segs == 12: print '\\\\' else: print '&',
def test_init_method(self, nr_of_segments=7, size_classification_factor=1.3, only_count_based_init=False): '''Test with different number of training examples and compare random init with count based init''' test_dir = File("../../character_examples").getPath() nr_of_training_examples = 90 nr_of_test_examples = 10 extracor = SimpleImageFeatureExtractor(nr_of_divisions=nr_of_segments, size_classification_factor=size_classification_factor) training_examples, test_examples = extracor.extract_training_and_test_examples(test_dir, nr_of_training_examples, nr_of_test_examples) def get_examples(nr_of_examples): if(nr_of_examples!=90): raise "Illegal amount of examples" else: return training_examples def get_character_classifier_with_init_method(traing_examples, init_method): return CharacterClassifier(traing_examples, nr_of_hmms_to_try=1, fraction_of_examples_for_test=0, train_with_examples=False, initialisation_method=init_method) self.test_init_method_with_classifier(get_examples, get_character_classifier_with_init_method, test_examples, [90], only_count_based_init=only_count_based_init)
def create_character_classification_count_matrix(): ''' This function do the following: 1. Creates a character classifier with 90 training examples 2. Runs a test with 10 test examples for every character. The results are put in a matrix M so element M[i][j] contains the number of test examples for the character with index i that are classified to be the character with index j. ''' extracor = SimpleImageFeatureExtractor(nr_of_divisions=11, size_classification_factor=3.4) training_examples, test_examples = extracor.extract_training_and_test_examples( File(File(File(".."), ".."), "character_examples").getCanonicalPath(), nr_of_training_examples=90, nr_of_test_examples=10) classifier = CharacterClassifier( training_examples, nr_of_hmms_to_try=1, fraction_of_examples_for_test=0, train_with_examples=False, initialisation_method=SpecializedHMM.InitMethod.count_based) alphabet = get_example_alphabet() classification_count_matrix = zeros(len(alphabet), len(alphabet)) for label, examples in test_examples: character = label.lower() character_index = alphabet.index(character) for example in examples: classified_character = classifier.classify_character_string( example).lower() classified_character_index = alphabet.index(classified_character) count = classification_count_matrix[character_index][ classified_character_index] classification_count_matrix[character_index][ classified_character_index] = count + 1 return classification_count_matrix
def test_with_two_characters(self): #test with just two letters so A and B are copied to a #special dir that is deleted after the test base_dir = File("../../character_examples") test_dir = File(base_dir,"test") a_dir = File(base_dir,"A") b_dir = File(base_dir,"B") shutil.copytree(a_dir.getPath(), File(test_dir,"A").getPath()) shutil.copytree(b_dir.getPath(), File(test_dir,"B").getPath()) extracor = SimpleImageFeatureExtractor(nr_of_divisions=7, size_classification_factor=1.3) #Extract features training_examples, test_examples = extracor.extract_training_and_test_examples(test_dir.getPath(), 90, 10) #print("training examples", training_examples) #print("testing examples", test_examples) classifier = CharacterClassifier(training_examples, nr_of_hmms_to_try = 1, fraction_of_examples_for_test = 0.3, feature_extractor=extracor, train_with_examples=False) before = classifier.test(test_examples) #Test serialization classifier_string = classifier.to_string() reborn_classifier = CharacterClassifier(from_string_string=classifier_string) reborn_classifier_test_result = reborn_classifier.test(test_examples) if(reborn_classifier_test_result==before): pass else: raise "Something is wrong with the test result" classifier.train() after = classifier.test(test_examples) print("test_with_two_characters", "before", before, "after", after) shutil.rmtree(test_dir.getPath())
def test_with_two_characters(self): #test with just two letters so A and B are copied to a #special dir that is deleted after the test base_dir = File("../../character_examples") test_dir = File(base_dir, "test") a_dir = File(base_dir, "A") b_dir = File(base_dir, "B") shutil.copytree(a_dir.getPath(), File(test_dir, "A").getPath()) shutil.copytree(b_dir.getPath(), File(test_dir, "B").getPath()) extracor = SimpleImageFeatureExtractor(nr_of_divisions=7, size_classification_factor=1.3) #Extract features training_examples, test_examples = extracor.extract_training_and_test_examples( test_dir.getPath(), 90, 10) #print("training examples", training_examples) #print("testing examples", test_examples) classifier = CharacterClassifier(training_examples, nr_of_hmms_to_try=1, fraction_of_examples_for_test=0.3, feature_extractor=extracor, train_with_examples=False) before = classifier.test(test_examples) #Test serialization classifier_string = classifier.to_string() reborn_classifier = CharacterClassifier( from_string_string=classifier_string) reborn_classifier_test_result = reborn_classifier.test(test_examples) if (reborn_classifier_test_result == before): pass else: raise "Something is wrong with the test result" classifier.train() after = classifier.test(test_examples) print("test_with_two_characters", "before", before, "after", after) shutil.rmtree(test_dir.getPath())
def extract_test_examples_to_file(self): extractor = SimpleImageFeatureExtractor(nr_of_divisions=self.feature_extraction_number_of_segments, size_classification_factor=self.feature_extraction_classification_factor) examples_dir = File(File(File(File(str(inspect.getfile( inspect.currentframe() ))).getParent(),".."),".."),"word_examples_for_test").getCanonicalPath() empty, character_test_examples = extractor.extract_training_and_test_examples(examples_dir, #character_examples word_examples_for_test nr_of_training_examples=0, nr_of_test_examples=10) output = open('datatest_segments_' + str(self.feature_extraction_number_of_segments) + '_cf_'+ str(self.feature_extraction_classification_factor).replace('.','_')+ '.pkl', 'wb') pickle.dump(character_test_examples, output) output.close()
def create_character_classifier(save_to_file_path): example_dir = File("../../character_examples").getPath() nr_of_training_examples = 90 nr_of_test_examples = 10 extractor = SimpleImageFeatureExtractor(nr_of_divisions=11, size_classification_factor=4.6) training_examples, test_examples = extractor.extract_training_and_test_examples(example_dir, nr_of_training_examples, nr_of_test_examples) classifier = CharacterClassifier(training_examples, nr_of_hmms_to_try=1, fraction_of_examples_for_test=0, train_with_examples=True, initialisation_method=SpecializedHMM.InitMethod.count_based, feature_extractor=extractor) test_result = str(classifier.test(test_examples)) print('Prediction ratio:', test_result)
def create_character_classifier(save_to_file_path): example_dir = File("../../character_examples").getPath() nr_of_training_examples = 100 nr_of_test_examples = 0 extractor = SimpleImageFeatureExtractor(nr_of_divisions=7, size_classification_factor=1.3) training_examples, test_examples = extractor.extract_training_and_test_examples(example_dir, nr_of_training_examples, nr_of_test_examples) classifier = CharacterClassifier(training_examples, nr_of_hmms_to_try=1, fraction_of_examples_for_test=0, train_with_examples=False, initialisation_method=SpecializedHMM.InitMethod.count_based, feature_extractor=extractor) classifier_string = classifier.to_string() file = open(save_to_file_path,'w') file.write(classifier_string) file.close()
def extract_test_examples_to_file(self): extractor = SimpleImageFeatureExtractor( nr_of_divisions=self.feature_extraction_number_of_segments, size_classification_factor=self. feature_extraction_classification_factor) examples_dir = File( File( File( File(str(inspect.getfile( inspect.currentframe()))).getParent(), ".."), ".."), "word_examples_for_test").getCanonicalPath() empty, character_test_examples = extractor.extract_training_and_test_examples( examples_dir, #character_examples word_examples_for_test nr_of_training_examples=0, nr_of_test_examples=10) output = open( 'datatest_segments_' + str(self.feature_extraction_number_of_segments) + '_cf_' + str(self.feature_extraction_classification_factor).replace( '.', '_') + '.pkl', 'wb') pickle.dump(character_test_examples, output) output.close()
def create_character_classifier(save_to_file_path): example_dir = File("../../character_examples").getPath() nr_of_training_examples = 100 nr_of_test_examples = 0 extractor = SimpleImageFeatureExtractor(nr_of_divisions=11, size_classification_factor=4.6) training_examples, test_examples = extractor.extract_training_and_test_examples( example_dir, nr_of_training_examples, nr_of_test_examples) classifier = CharacterClassifier( training_examples, nr_of_hmms_to_try=1, fraction_of_examples_for_test=0, train_with_examples=False, initialisation_method=SpecializedHMM.InitMethod.count_based, feature_extractor=extractor) #test_result = str(classifier.test(test_examples)) #print(test_result) classifier_string = classifier.to_string() file = open(save_to_file_path + ".dat", 'w') file.write(classifier_string) file.close()