Пример #1
0
 def __init__( self, image_path, env ):
     state( 'Opening image...' )
     self.env = env
     if( self.setImage( image_path ) ):
         comment( '- Image opened successfully.')
     else:
         error( 'Could not set image from path in ProcessImage.' )
Пример #2
0
    def getFeatureFiles(self):
        comment('Reading files.')
        tmp_list = []
        for dir in self._directory_list:
            tmp_json = HandleJSON(dir)
            tmp_list.append({
                "feature_json": tmp_json.file,
                "directory": dir,
                "filepath": tmp_json.filepath
            })

        return tmp_list
Пример #3
0
    def setPaths(self, subject: str):
        try:
            comment('Setting paths for image directories.')

            for i in range(len(self._directory_list)):
                self._directory_list[
                    i] = f'{ self._directory_list[ i ] }/{ subject }'

            return TestMessage(
                True, 'Complete list of paths created for feature extraction.')
        except IOError as ioe:
            return TestMessage(False, ioe)
        except:
            return TestMessage(False,
                               'Could not add subject type to path variables')
 def test_comment(self):
     """
         Test whether a successful comment can be made to the console.
     """
     self.assertEqual(
         comment('Testing a comment message').success, True,
         'A successful comment message should return a success status of True.'
     )
Пример #5
0
    def buff( self, small_image ):
        comment( '- Buffing image.' )
        big_image = Image.fromarray( 
            np.uint8( 
                np.full([
                    self.env.settings.content[ 'extract' ][ 'height'],
                    self.env.settings.content[ 'extract' ][ 'width']
                ], 
                255) 
            )
        )

        small_dim = np.shape( np.array( small_image ) )
        big_dim = np.shape( np.array( big_image ) )

        coordinates = []

        # werk eers op die eerste as.
        s_ax = small_dim[ 0 ]
        b_ax = big_dim[ 0 ]

        if( s_ax == b_ax ):
            coordinates.append( 0 )
            pass
        elif( s_ax > b_ax):
            coordinates.append( 0 )
        else:
            coordinates.append( int( ( b_ax/2 ) - ( s_ax/2 ) ) )
        
        # werk dan op die tweede as
        s_ax = small_dim[ 1 ]
        b_ax = big_dim[ 1 ]

        if( s_ax == b_ax ):
            coordinates.append( 0 )
            pass
        elif( s_ax > b_ax):
            coordinates.append( 0 )
        else:
            coordinates.append( int( ( b_ax/2 ) - ( s_ax/2 ) ) )
       
        big_image.paste( Image.fromarray( small_image ), ( coordinates[ 1 ], coordinates[ 0 ]) )
        big_image = np.asarray( big_image ) 
        return big_image
Пример #6
0
 def writeFiles( self, bundle ):
     try:
         os.makedirs( bundle.getPath() )
         os.chdir( bundle.getPath() )
         files = Files( bundle.getPath() , self.env )
         comment( '- Writing content for subdirectories.' )
         files.writeWords( 'words', bundle.words, bundle.folder_name )
         files.writeLines( 'lines', bundle.lines, bundle.folder_name )
     except:
         error( 'Directory already exists.' )
         rm_file = input( 'Do you want to remove the directory [Y/n]?' )
         if( rm_file.upper() == 'Y' or rm_file == '' ):
             os.system( f'rm { bundle.getPath() } -r')
             os.makedirs( bundle.getPath() )
             os.chdir( bundle.getPath() )
             files = Files( bundle.getPath(), self.env )
             comment( '- Writing content for subdirectories.' )
             files.writeWords( 'words', bundle.words, bundle.folder_name )
             files.writeLines( 'lines', bundle.lines, bundle.folder_name )
Пример #7
0
 def getDirectories(self, path):
     comment('Reading directories in dataset.')
     try:
         samples = []
         for dir in os.scandir(path):
             if (os.DirEntry.is_dir(dir)):
                 samples.append(dir.path)
         if (samples != []):
             self._directory_list = samples
             return TestMessage(
                 True,
                 'Successfully compiled a list of directories for the sample.',
                 7)
         else:
             self._directory_list = samples
             return TestMessage(
                 False,
                 'There was no relevant sample directories in this path.',
                 7)
     except Exception as e:
         return TestMessage(False, f'{e}', 22)
Пример #8
0
    def setImage( self, path ):
        try:
            comment( 'Reading image.' )
            comment( '- Loading from path: {0}'.format( path ) )
            self.image = cv2.imread( path )

            comment( '- Flattening image into matrix.' )
            # Flattening and reshaping the array
            self.matrix = np.array( cv2.cvtColor( self.image, cv2.COLOR_RGB2GRAY ))
            x, y = np.shape( self.matrix )
            self.dimensions = [ x, y ]

            comment( '- Matrix shape: {0}'.format( str( self.dimensions )))

            return True
        except:
            return False
Пример #9
0
    def __init__(self, env, feature: str, path=None):
        state('Setting up the image pipeline.')
        self._e = env

        if (path != None):
            directories = self.getDirectories(path)
            if (directories.success):
                # Inside each sample folder there are images for lines and words, which are folders.
                self.subject = ask('Would you like to use [words] or [lines]?',
                                   readInput.readline())
                while (not (self.subject.payload == 'words'
                            or self.subject.payload == 'lines')):
                    self.subject = ask('Type either [words] or [lines]?',
                                       readInput.readline())

                tmp_create_paths = self.setPaths(self.subject.payload)
                if (tmp_create_paths.success):
                    comment(tmp_create_paths.payload)
                    feature_files = self.getFeatureFiles()

                    available_features = self.getListOfAvailableFeatures()

                    comment('Adding features.')

                    for feature_class in available_features:
                        self.save(
                            self.generateFeatures(feature_files,
                                                  feature_class),
                            feature_class)

                else:
                    error(tmp_create_paths.payload)

                    raise IOError(tmp_create_paths.payload)
            else:
                error(directories.payload)
Пример #10
0
 def __init__(self, path: str):
     comment(f"Handling JSON entry for {path}.")
     self.getJson( path )
Пример #11
0
    def getLines( self ):
        print(  )
        extract_buffer = self.env.settings.content[ 'extract' ][ 'buffer']
        kernel_size = self.env.settings.content[ 'extract' ][ 'kernel_size']
        comment( 'Extracting text lines.' )
        threshold = self.env.settings.content[ 'extract' ][ 'threshold']

        # This is the structuring element that will be used in the dilation of the mask.
        comment( '- Creating structuring element with a kernel size of {0}.'.format( kernel_size ) )
        kernel = cv2.getStructuringElement( cv2.MORPH_DILATE, ( kernel_size, kernel_size ) ) 

        # Eroding image
        self.image = cv2.cvtColor( self.image, cv2.COLOR_BGR2GRAY )
        ret, mask = cv2.threshold( self.image, threshold, 255, cv2.THRESH_BINARY )
        comment( '- Dilating image.' )
        mask = cv2.bitwise_not( mask )
        original_mask = mask
        mask = cv2.dilate( mask, kernel, iterations= 1 )

        # Setting up run-length array 
        comment( '- Extracting text lines.' )
        self.matrix = np.array( mask )
        regions = []
        totals = []
        region_flag = False

        # Extracting line regions by using horizontal projections
        for x in range( 0, self.dimensions[ 0 ] ):
            total = 0
            for y in range( 0, self.dimensions[ 1 ]):
                total += self.matrix[ x ][ y ]/255
            
            totals.append( total )

            if( total > extract_buffer and region_flag != True):
                begin_region = x
                region_flag = True
            elif( total <= self.env.settings.content[ 'extract' ][ 'buffer'] and region_flag == True ):
                """
                    - If you reach the end of the one region the boundaries and sub-image is stored in the regions array to be returned by the function.
                    - The extra if-statement is to ensure that only relevant segments are stored.
                """
                [ seg_width, seg_height ] = np.shape( self.image[ begin_region:x, 0:self.dimensions[ 1 ] ] ) 

                if( seg_width > self.env.settings.content[ 'extract' ][ 'tolerance' ] ):

                    # resizing the image height to be consistent for the neural network
                    dim = ( seg_height, self.env.settings.content[ 'extract' ][ 'height' ] )
                    resized_image = cv2.resize( 
                        self.image[ begin_region:x, 0:self.dimensions[ 1 ] ] , 
                        dim, 
                        interpolation = cv2.INTER_AREA 
                    )
                    resized_mask = cv2.resize( 
                        original_mask[ begin_region:x, 0:self.dimensions[ 1 ] ] , 
                        dim, 
                        interpolation = cv2.INTER_AREA 
                    )

                    regions.append({ 
                        'begin': begin_region, 
                        'end': x, 
                        'image': resized_image, 
                        'mask': resized_mask
                    })

                region_flag = False

        return regions
Пример #12
0
    def getWords( self, line ):
        ( rows, cols ) = np.shape( line[ 'image' ] )

        # Erode the mask again to get more gaps in the text.
        extract_buffer = self.env.settings.content[ 'extract' ][ 'buffer']

        comment( '- Extracting words from line.' )
        mask = np.array( line[ 'mask' ] )
        words = []
        word_flag = False
        for c in range( 0, cols ):
            total = 0
            for r in range( 0, rows ):
                total += mask[ r ][ c ]/255

            if( total > extract_buffer and word_flag == False ):
                word_flag = True
                begin_word = c
            elif( total == 0 and word_flag == True ):
                word_flag = False

                # Divide the word image into smaller segments if possible
                word = line[ 'image' ][ 0:rows, begin_word:c ]
                word_mask = mask[ 0:rows, begin_word:c ] 

                ( height, width ) = np.shape( word )
                word_length = c - begin_word
                if( word_length + ( self.env.settings.content[ 'extract' ][ 'buffer'] ) <= self.env.settings.content[ 'extract' ][ 'width'] ):
                    resized_word_image = cv2.resize( 
                        word, 
                        ( height, self.env.settings.content[ 'extract' ][ 'width'] ), 
                        interpolation = cv2.INTER_AREA 
                    )
                    resized_word_mask = cv2.resize( 
                        word_mask, 
                        ( height, self.env.settings.content[ 'extract' ][ 'width'] ), 
                        interpolation = cv2.INTER_AREA 
                    )
                    words.append({ 
                        'begin': begin_word, 
                        'end': c, 
                        'image': resized_word_image, 
                        'mask': resized_word_mask
                    })
                else:
                    div = ceil( width / self.env.settings.content[ 'extract' ][ 'width'] )
                    count = 0
                    while( count < width - ( width / div ) ):
                        seg = word[ 0:rows, count: count + int( width / div ) ]
                        seg_mask = mask[ 0:rows, count: count + int( width / div ) ]
                        if( np.shape( seg )[1] == 0): print( count, width )
                        words.append({ 
                            'begin': count, 
                            'end': count + int( width / div ), 
                            'image': seg, 
                            'mask': seg_mask
                        })
                        count += int( width / div )

                    # If there are still some pixels left for the whole word to be extracted.
                    seg = word[ 0:rows, count: width ]
                    seg_mask = mask[ 0:rows, count: width ]
                    if( np.shape( seg )[1] > 0 ):
                        words.append({ 
                            'begin': count, 
                            'end': width, 
                            'image': seg, 
                            'mask': seg_mask
                        })                

        return words
Пример #13
0
 def save(self, file, feature_class):
     outfile = open(file['filepath'], 'w')
     outfile.write(json.dumps(file['feature_json'], indent=4))
     outfile.close()
     comment(f'Successfully added {feature_class} feature.')