def test_init_method_different_parameters(self):
     test_dir = File("../../character_examples").getPath()
     nr_of_training_examples = 90
     nr_of_test_examples = 10
     for size_classification_factor in drange(0.7, 6.0, 0.3):
         print str(size_classification_factor) + ' &',
         for nr_of_segs in range(4, 13):
             #print(nr_of_segs)
             test_scores = []
             for test_nr in range(10):
                 #print(test_nr)
                 extracor = SimpleImageFeatureExtractor(
                     nr_of_divisions=nr_of_segs,
                     size_classification_factor=size_classification_factor)
                 training_examples, test_examples = extracor.extract_training_and_test_examples(
                     test_dir, nr_of_training_examples, nr_of_test_examples)
                 classifier = CharacterClassifier(
                     training_examples,
                     nr_of_hmms_to_try=1,
                     fraction_of_examples_for_test=0,
                     train_with_examples=False,
                     initialisation_method=SpecializedHMM.InitMethod.
                     count_based)
                 test_scores.append(classifier.test(test_examples))
             score = sum(test_scores) / len(test_scores)
             print ' $' + str(score) + '$ ',
             if nr_of_segs == 12:
                 print '\\\\'
             else:
                 print '&',
def create_character_classification_count_matrix():
    '''
    This function do the following:
    
    1. Creates a character classifier with 90 training examples
    2. Runs a test with 10 test examples for every character. The results are put in a 
       matrix M so element M[i][j] contains the number of test examples for the character
       with index i that are classified to be the character with index j. 
    '''
    extracor = SimpleImageFeatureExtractor(nr_of_divisions=11, 
                                           size_classification_factor=3.4)
    training_examples, test_examples = extracor.extract_training_and_test_examples(File(File(File(".."),".."),"character_examples").getCanonicalPath(), 
                                                                                   nr_of_training_examples=90,
                                                                                   nr_of_test_examples=10)
    classifier = CharacterClassifier(training_examples,
                                     nr_of_hmms_to_try=1,
                                     fraction_of_examples_for_test=0,
                                     train_with_examples=False,
                                     initialisation_method=SpecializedHMM.InitMethod.count_based)
    alphabet = get_example_alphabet()
    classification_count_matrix = zeros(len(alphabet),len(alphabet))
    for label, examples in test_examples:
        character = label.lower()
        character_index = alphabet.index(character)
        for example in examples:
            classified_character = classifier.classify_character_string(example).lower()
            classified_character_index = alphabet.index(classified_character)
            count = classification_count_matrix[character_index][classified_character_index]
            classification_count_matrix[character_index][classified_character_index] = count + 1
    return classification_count_matrix
    def test_init_method(self,
                         nr_of_segments=7,
                         size_classification_factor=1.3,
                         only_count_based_init=False):
        '''Test with different number of training examples and compare
        random init with count based init'''

        test_dir = File("../../character_examples").getPath()
        nr_of_training_examples = 90
        nr_of_test_examples = 10
        extracor = SimpleImageFeatureExtractor(
            nr_of_divisions=nr_of_segments,
            size_classification_factor=size_classification_factor)
        training_examples, test_examples = extracor.extract_training_and_test_examples(
            test_dir, nr_of_training_examples, nr_of_test_examples)

        def get_examples(nr_of_examples):
            if (nr_of_examples != 90):
                raise "Illegal amount of examples"
            else:
                return training_examples

        def get_character_classifier_with_init_method(traing_examples,
                                                      init_method):
            return CharacterClassifier(traing_examples,
                                       nr_of_hmms_to_try=1,
                                       fraction_of_examples_for_test=0,
                                       train_with_examples=False,
                                       initialisation_method=init_method)

        self.test_init_method_with_classifier(
            get_examples,
            get_character_classifier_with_init_method,
            test_examples, [90],
            only_count_based_init=only_count_based_init)
 def test_init_method_different_parameters(self):
     test_dir = File("../../character_examples").getPath()
     nr_of_training_examples = 90
     nr_of_test_examples = 10
     for size_classification_factor in drange(0.7, 6.0, 0.3):
         print str(size_classification_factor) + ' &',
         for nr_of_segs in range(4,13):
             #print(nr_of_segs)
             test_scores = []
             for test_nr in range(10):
                 #print(test_nr)
                 extracor = SimpleImageFeatureExtractor(nr_of_divisions=nr_of_segs, 
                                                        size_classification_factor=size_classification_factor)
                 training_examples, test_examples = extracor.extract_training_and_test_examples(test_dir, 
                                                                                                nr_of_training_examples, 
                                                                                                nr_of_test_examples)
                 classifier = CharacterClassifier(training_examples,
                                                  nr_of_hmms_to_try=1,
                                                  fraction_of_examples_for_test=0,
                                                  train_with_examples=False,
                                                  initialisation_method=SpecializedHMM.InitMethod.count_based)
                 test_scores.append(classifier.test(test_examples))
             score = sum(test_scores) / len(test_scores)
             print ' $' + str(score) +'$ ',
             if nr_of_segs == 12:
                 print '\\\\'
             else:
                 print '&',
 def test_init_method(self, 
                      nr_of_segments=7, 
                      size_classification_factor=1.3,
                      only_count_based_init=False):
     '''Test with different number of training examples and compare
     random init with count based init'''
     
     test_dir = File("../../character_examples").getPath()
     nr_of_training_examples = 90
     nr_of_test_examples = 10
     extracor = SimpleImageFeatureExtractor(nr_of_divisions=nr_of_segments, 
                                            size_classification_factor=size_classification_factor)
     training_examples, test_examples = extracor.extract_training_and_test_examples(test_dir, 
                                                                                    nr_of_training_examples, 
                                                                                    nr_of_test_examples)
     
     def get_examples(nr_of_examples):
         if(nr_of_examples!=90):
             raise "Illegal amount of examples"
         else:
             return training_examples
         
     
     def get_character_classifier_with_init_method(traing_examples, init_method):
         return CharacterClassifier(traing_examples,
                                    nr_of_hmms_to_try=1,
                                    fraction_of_examples_for_test=0,
                                    train_with_examples=False,
                                    initialisation_method=init_method)
     
     self.test_init_method_with_classifier(get_examples, 
                                           get_character_classifier_with_init_method,
                                           test_examples,
                                           [90],
                                           only_count_based_init=only_count_based_init)
示例#6
0
def create_character_classification_count_matrix():
    '''
    This function do the following:
    
    1. Creates a character classifier with 90 training examples
    2. Runs a test with 10 test examples for every character. The results are put in a 
       matrix M so element M[i][j] contains the number of test examples for the character
       with index i that are classified to be the character with index j. 
    '''
    extracor = SimpleImageFeatureExtractor(nr_of_divisions=11,
                                           size_classification_factor=3.4)
    training_examples, test_examples = extracor.extract_training_and_test_examples(
        File(File(File(".."), ".."), "character_examples").getCanonicalPath(),
        nr_of_training_examples=90,
        nr_of_test_examples=10)
    classifier = CharacterClassifier(
        training_examples,
        nr_of_hmms_to_try=1,
        fraction_of_examples_for_test=0,
        train_with_examples=False,
        initialisation_method=SpecializedHMM.InitMethod.count_based)
    alphabet = get_example_alphabet()
    classification_count_matrix = zeros(len(alphabet), len(alphabet))
    for label, examples in test_examples:
        character = label.lower()
        character_index = alphabet.index(character)
        for example in examples:
            classified_character = classifier.classify_character_string(
                example).lower()
            classified_character_index = alphabet.index(classified_character)
            count = classification_count_matrix[character_index][
                classified_character_index]
            classification_count_matrix[character_index][
                classified_character_index] = count + 1
    return classification_count_matrix
 def test_with_two_characters(self):
     #test with just two letters so A and B are copied to a 
     #special dir that is deleted after the test
     base_dir = File("../../character_examples")
     test_dir = File(base_dir,"test")
     a_dir = File(base_dir,"A")
     b_dir = File(base_dir,"B")
     shutil.copytree(a_dir.getPath(), File(test_dir,"A").getPath())
     shutil.copytree(b_dir.getPath(), File(test_dir,"B").getPath())
     extracor = SimpleImageFeatureExtractor(nr_of_divisions=7, 
                                            size_classification_factor=1.3)
     #Extract features
     training_examples, test_examples = extracor.extract_training_and_test_examples(test_dir.getPath(), 90, 10)
     #print("training examples", training_examples)
     #print("testing examples", test_examples)
     classifier = CharacterClassifier(training_examples, 
                                      nr_of_hmms_to_try = 1, 
                                      fraction_of_examples_for_test = 0.3,
                                      feature_extractor=extracor,
                                      train_with_examples=False)
     before = classifier.test(test_examples)
     #Test serialization
     classifier_string = classifier.to_string()
     reborn_classifier = CharacterClassifier(from_string_string=classifier_string)
     reborn_classifier_test_result = reborn_classifier.test(test_examples)
     if(reborn_classifier_test_result==before):
         pass
     else:
         raise "Something is wrong with the test result"
     classifier.train()
     after = classifier.test(test_examples)
     print("test_with_two_characters", "before", before, "after", after)
     shutil.rmtree(test_dir.getPath())
 def test_with_two_characters(self):
     #test with just two letters so A and B are copied to a
     #special dir that is deleted after the test
     base_dir = File("../../character_examples")
     test_dir = File(base_dir, "test")
     a_dir = File(base_dir, "A")
     b_dir = File(base_dir, "B")
     shutil.copytree(a_dir.getPath(), File(test_dir, "A").getPath())
     shutil.copytree(b_dir.getPath(), File(test_dir, "B").getPath())
     extracor = SimpleImageFeatureExtractor(nr_of_divisions=7,
                                            size_classification_factor=1.3)
     #Extract features
     training_examples, test_examples = extracor.extract_training_and_test_examples(
         test_dir.getPath(), 90, 10)
     #print("training examples", training_examples)
     #print("testing examples", test_examples)
     classifier = CharacterClassifier(training_examples,
                                      nr_of_hmms_to_try=1,
                                      fraction_of_examples_for_test=0.3,
                                      feature_extractor=extracor,
                                      train_with_examples=False)
     before = classifier.test(test_examples)
     #Test serialization
     classifier_string = classifier.to_string()
     reborn_classifier = CharacterClassifier(
         from_string_string=classifier_string)
     reborn_classifier_test_result = reborn_classifier.test(test_examples)
     if (reborn_classifier_test_result == before):
         pass
     else:
         raise "Something is wrong with the test result"
     classifier.train()
     after = classifier.test(test_examples)
     print("test_with_two_characters", "before", before, "after", after)
     shutil.rmtree(test_dir.getPath())
 def extract_test_examples_to_file(self):
     extractor = SimpleImageFeatureExtractor(nr_of_divisions=self.feature_extraction_number_of_segments, 
                                 size_classification_factor=self.feature_extraction_classification_factor)
     examples_dir = File(File(File(File(str(inspect.getfile( inspect.currentframe() ))).getParent(),".."),".."),"word_examples_for_test").getCanonicalPath()
     empty, character_test_examples = extractor.extract_training_and_test_examples(examples_dir, #character_examples word_examples_for_test
                                                                         nr_of_training_examples=0,
                                                                         nr_of_test_examples=10)
                                                                         
     output = open('datatest_segments_' +
                   str(self.feature_extraction_number_of_segments) + '_cf_'+
                   str(self.feature_extraction_classification_factor).replace('.','_')+
                   '.pkl', 'wb')
     pickle.dump(character_test_examples, output)
     output.close()
def create_character_classifier(save_to_file_path):
    example_dir = File("../../character_examples").getPath()
    nr_of_training_examples = 90
    nr_of_test_examples = 10
    
    extractor = SimpleImageFeatureExtractor(nr_of_divisions=11, 
                                            size_classification_factor=4.6)
    
   
    training_examples, test_examples = extractor.extract_training_and_test_examples(example_dir, 
                                                                                   nr_of_training_examples, 
                                                                                   nr_of_test_examples)
    
    classifier = CharacterClassifier(training_examples,
                                     nr_of_hmms_to_try=1,
                                     fraction_of_examples_for_test=0,
                                     train_with_examples=True,
                                     initialisation_method=SpecializedHMM.InitMethod.count_based,
                                     feature_extractor=extractor)
    test_result = str(classifier.test(test_examples))
    print('Prediction ratio:', test_result)
def create_character_classifier(save_to_file_path):
    example_dir = File("../../character_examples").getPath()
    nr_of_training_examples = 100
    nr_of_test_examples = 0
    
    extractor = SimpleImageFeatureExtractor(nr_of_divisions=7, 
                                           size_classification_factor=1.3)
    
    training_examples, test_examples = extractor.extract_training_and_test_examples(example_dir, 
                                                                                   nr_of_training_examples, 
                                                                                   nr_of_test_examples)
    classifier = CharacterClassifier(training_examples,
                                     nr_of_hmms_to_try=1,
                                     fraction_of_examples_for_test=0,
                                     train_with_examples=False,
                                     initialisation_method=SpecializedHMM.InitMethod.count_based,
                                     feature_extractor=extractor)
    classifier_string = classifier.to_string()
    file = open(save_to_file_path,'w')
    file.write(classifier_string)
    file.close()
示例#12
0
    def extract_test_examples_to_file(self):
        extractor = SimpleImageFeatureExtractor(
            nr_of_divisions=self.feature_extraction_number_of_segments,
            size_classification_factor=self.
            feature_extraction_classification_factor)
        examples_dir = File(
            File(
                File(
                    File(str(inspect.getfile(
                        inspect.currentframe()))).getParent(), ".."), ".."),
            "word_examples_for_test").getCanonicalPath()
        empty, character_test_examples = extractor.extract_training_and_test_examples(
            examples_dir,  #character_examples word_examples_for_test
            nr_of_training_examples=0,
            nr_of_test_examples=10)

        output = open(
            'datatest_segments_' +
            str(self.feature_extraction_number_of_segments) + '_cf_' +
            str(self.feature_extraction_classification_factor).replace(
                '.', '_') + '.pkl', 'wb')
        pickle.dump(character_test_examples, output)
        output.close()
def create_character_classifier(save_to_file_path):
    example_dir = File("../../character_examples").getPath()
    nr_of_training_examples = 100
    nr_of_test_examples = 0

    extractor = SimpleImageFeatureExtractor(nr_of_divisions=11,
                                            size_classification_factor=4.6)

    training_examples, test_examples = extractor.extract_training_and_test_examples(
        example_dir, nr_of_training_examples, nr_of_test_examples)
    classifier = CharacterClassifier(
        training_examples,
        nr_of_hmms_to_try=1,
        fraction_of_examples_for_test=0,
        train_with_examples=False,
        initialisation_method=SpecializedHMM.InitMethod.count_based,
        feature_extractor=extractor)
    #test_result = str(classifier.test(test_examples))
    #print(test_result)
    classifier_string = classifier.to_string()
    file = open(save_to_file_path + ".dat", 'w')
    file.write(classifier_string)
    file.close()