Пример #1
0
 def _net_output_for_word_list(self, word_list, cnn, 
                               min_img_width_height=32, image_size=None, max_pixel=50000, input_layer='word_images', 
                               output_layer='sigmoid', suppress_caffe_output=False):
     output = []
     for idx, word in enumerate(word_list):
         # scale to correct pixel values (0 = background, 1 = text)
         word_img = word.get_word_image().astype(np.float32)
         
         word_img -= 255.0
         word_img /= -255.0
         
         word_img, resized = self._check_size(word_img,image_size)
         word_img = word_img.reshape((1,1,) + word_img.shape).astype(np.float32)
         
         # reshape the PHOCNet
         cnn.blobs[input_layer].reshape(*word_img.shape)
         cnn.reshape()
         
         # forward the word image through the PHOCNet
         cnn.blobs[input_layer].data[...] = word_img
         
         if suppress_caffe_output:
             with Suppressor():
                 output.append(cnn.forward()[output_layer].flatten())
         else:
             output.append(cnn.forward()[output_layer].flatten())
         if ((idx+1)%100 == 0 or (idx+1) == len(word_list)):
             self.logger.info('    [ %*d / %d ]', len(str(len(word_list))), idx+1, len(word_list))            
     return np.vstack(output)     
Пример #2
0
 def __solver_step(self, solver, steps):
     '''
     Runs Caffe solver suppressing Caffe output if necessary
     '''
     if not self.debug_mode:
         with Suppressor():
             solver.step(steps)
     else:
         solver.step(steps)
Пример #3
0
    def _load_pretrained_dense_net(self, phocnet_bin_path, gpu_id, dense_net, debug_mode):
        # create the Caffe Dense PHOCNet object
        self.logger.info('Creating Dense PHOCNet...')
        
        if debug_mode:
            phocnet = caffe.Net(dense_net, phocnet_bin_path,caffe.TEST)
        else:
            with Suppressor():
                phocnet = caffe.Net(dense_net, phocnet_bin_path,caffe.TEST)

        return phocnet
Пример #4
0
 def __get_solver(self, solver_proto_path):
     '''
     Returns a caffe.SGDSolver for the given protofile path,
     ignoring Caffe command line chatter if debug mode is not set
     to True.
     '''
     if not self.debug_mode:
         # disable Caffe init chatter when not in debug
         with Suppressor():
             return caffe.SGDSolver(solver_proto_path)
     else:
         return caffe.SGDSolver(solver_proto_path)
Пример #5
0
 def _load_pretrained_phocnet(self, phocnet_bin_path, gpu_id, debug_mode, deploy_proto_path, phoc_size):
     # create a deploy proto file
     self.logger.info('Saving PHOCNet deploy proto file to %s...', deploy_proto_path)
     mpg = ModelProtoGenerator(initialization='msra', use_cudnn_engine=gpu_id is not None)
     proto = mpg.get_phocnet(word_image_lmdb_path=None, phoc_lmdb_path=None, phoc_size=phoc_size, generate_deploy=True)
     with open(deploy_proto_path, 'w') as proto_file:
         proto_file.write(str(proto))
         
     # create the Caffe PHOCNet object
     self.logger.info('Creating PHOCNet...')
     if debug_mode:
         phocnet = caffe.Net(deploy_proto_path, phocnet_bin_path, caffe.TEST)
     else:
         with Suppressor():
             phocnet = caffe.Net(deploy_proto_path, phocnet_bin_path, caffe.TEST)
             
     return phocnet
Пример #6
0
def main():
    
    parser = argparse.ArgumentParser()
    
    # required training parameters
    parser.add_argument('--net_file', '-f', action='store', type=str,
                      help='The location of the net file.')
    parser.add_argument('--dense', '-d', action='store_true',
                        help='dense network')
 
    params = vars(parser.parse_args())
    
    caffe.set_mode_cpu()
    
    deploy_file = params["net_file"]
    
    
    print "Net: " + deploy_file
    
    with Suppressor():
        net = caffe.Net(deploy_file, caffe.TEST)
        
    print "Layer-wise parameters: "
    pprint([(k, v[0].data.shape, prod(v[0].data.shape) ) for k, v in net.params.items()])
    
    total = sum([prod(v[0].data.shape) for k, v in net.params.items()])
    
    if params["dense"]:
        fc = sum([prod(net.params["fc6_d"][0].data.shape),prod(net.params["fc7_d"][0].data.shape),prod(net.params["fc8_d"][0].data.shape)])
    else:
        fc = sum([prod(net.params["fc6"][0].data.shape),prod(net.params["fc7"][0].data.shape),prod(net.params["fc8"][0].data.shape)])
    
    conv = total-fc
    
    print "Total number of parameters: " + str(total)
    print "Convolutional: " + str(conv)
    print "FC: " + str(fc)
    
    if params["dense"]:
        print "Depth at Tpp: " + str(net.params["fc6_d"][0].data.shape[1]/15)
    else:
        print "Depth at Tpp: " + str(net.params["fc6"][0].data.shape[1]/15)
Пример #7
0
    def _net_output_for_word_list(self,
                                  word_list,
                                  cnn,
                                  min_img_width_height=26,
                                  input_layer='word_images',
                                  output_layer='sigmoid',
                                  suppress_caffe_output=False):
        output = []
        for idx, word in enumerate(word_list):
            # scale to correct pixel values (0 = background, 1 = text)
            word_img = word.get_word_image().astype(np.float32)
            word_img -= 255.0
            word_img /= -255.0

            # check size
            if np.amin(word_img.shape[:2]) < min_img_width_height:
                scale = float(min_img_width_height + 1) / float(
                    np.amin(word_img.shape[:2]))
                new_shape = (int(scale * word_img.shape[0]),
                             int(scale * word_img.shape[1]))
                word_img = resize(image=word_img, output_shape=new_shape)
            word_img = word_img.reshape((
                1,
                1,
            ) + word_img.shape).astype(np.float32)

            # reshape the PHOCNet
            cnn.blobs[input_layer].reshape(*word_img.shape)
            cnn.reshape()

            # forward the word image through the PHOCNet
            cnn.blobs[input_layer].data[...] = word_img
            if suppress_caffe_output:
                with Suppressor():
                    output.append(cnn.forward()[output_layer].flatten())
            else:
                output.append(cnn.forward()[output_layer].flatten())
            if ((idx + 1) % 100 == 0 or (idx + 1) == len(word_list)):
                self.logger.info('    [ %*d / %d ]', len(str(len(word_list))),
                                 idx + 1, len(word_list))
        return np.vstack(output)