Exemplo n.º 1
0
def recognize(image_path: str, weights_path: str, files_limit=4):
    decoder = TextFeatureIO().reader
    images, filenames = load_images(image_path, files_limit)
    images = np.squeeze(images)
    padded_images = np.zeros([32, 32, 100, 3])
    padded_images[:images.shape[0], :, :, :] = images
    tf.reset_default_graph()

    inputdata = tf.placeholder(dtype=tf.float32, shape=[32, 32, 100, 3], name='input')

    images_sh = tf.cast(x=inputdata, dtype=tf.float32)

    # build shadownet
    net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37)
    with tf.variable_scope('shadow'):
        net_out = net.build(inputdata=images_sh)
    decoded, _ = tf.nn.ctc_beam_search_decoder(net_out, 25 * np.ones(32), merge_repeated=False)

    # config tf saver
    saver = tf.train.Saver()
    sess = tf.Session()
    with sess.as_default():

        # restore the model weights
        saver.restore(sess=sess, save_path=weights_path)
        print("Predict...")
        start_time = time()
        predictions = sess.run(decoded, feed_dict={inputdata: padded_images})
        end_time = time()
        print("Prediction time: {}".format(end_time - start_time))
        preds_res = decoder.sparse_tensor_to_str(predictions[0])

        for i, fname in enumerate(filenames):
            print("{}: {}".format(fname, preds_res[i]))
Exemplo n.º 2
0
 def __init__(self,
              config: GlobalConfig,
              dataset_dir: str,
              weights_path: str = None):
     self._log = LogFactory.get_logger()
     self._dataset_dir = dataset_dir
     self._weights_path = weights_path
     self._config = config
     self._display_step = config.get_training_config().display_step
     self._decoder = TextFeatureIO().reader
     self._saver = None
     self._tboard_save_path = 'tboard'
     self._model_save_path = self._get_model_saver_path()
Exemplo n.º 3
0
 def __init__(self, tfrecords_path: str, weights_path: str,
              config: GlobalConfig):
     self._log = LogFactory.get_logger()
     self._tfrecords_path = tfrecords_path
     self._weights_path = weights_path
     self._batch_size = config.get_test_config().batch_size
     self._merge_repeated = config.get_test_config().merge_repeated_chars
     self._gpu_config = config.get_gpu_config()
     self._decoder = TextFeatureIO().reader
     self._recognition_time = None
Exemplo n.º 4
0
def recognize(image_path: str,
              weights_path: str,
              config: GlobalConfig,
              is_vis=True):
    logger = LogFactory.get_logger()
    image = load_and_resize_image(image_path)

    inputdata = tf.placeholder(dtype=tf.float32,
                               shape=[1, 32, 100, 3],
                               name='input')

    net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37)

    with tf.variable_scope('shadow'):
        net_out = net.build(inputdata=inputdata)

    decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=net_out,
                                               sequence_length=25 * np.ones(1),
                                               merge_repeated=False)

    decoder = TextFeatureIO()

    # config tf session
    sess_config = tf.ConfigProto()
    sess_config.gpu_options.per_process_gpu_memory_fraction = config.get_gpu_config(
    ).memory_fraction
    sess_config.gpu_options.allow_growth = config.get_gpu_config(
    ).is_tf_growth_allowed()

    # config tf saver
    saver = tf.train.Saver()

    sess = tf.Session(config=sess_config)

    with sess.as_default():
        saver.restore(sess=sess, save_path=weights_path)
        preds = sess.run(decodes, feed_dict={inputdata: image})
        preds = decoder.writer.sparse_tensor_to_str(preds[0])
        logger.info('Predict image {:s} label {:s}'.format(
            ops.split(image_path)[1], preds[0]))

        if is_vis:
            plt.figure('CRNN Model Demo')
            plt.imshow(
                cv2.imread(image_path, cv2.IMREAD_COLOR)[:, :, (2, 1, 0)])
            plt.show()
    sess.close()
Exemplo n.º 5
0
class CrnnTrainer:
    def __init__(self,
                 config: GlobalConfig,
                 dataset_dir: str,
                 weights_path: str = None):
        self._log = LogFactory.get_logger()
        self._dataset_dir = dataset_dir
        self._weights_path = weights_path
        self._config = config
        self._display_step = config.get_training_config().display_step
        self._decoder = TextFeatureIO().reader
        self._saver = None
        self._tboard_save_path = 'tboard'
        self._model_save_path = self._get_model_saver_path()

    def train(self):
        training_config = self._config.get_training_config()
        images, labels = self._build_data_feed(training_config.batch_size)
        net_out = self._build_net_model(images)
        cost = tf.reduce_mean(
            tf.nn.ctc_loss(labels=labels,
                           inputs=net_out,
                           sequence_length=25 *
                           np.ones(training_config.batch_size)))
        decoded, _ = tf.nn.ctc_beam_search_decoder(
            net_out,
            25 * np.ones(training_config.batch_size),
            merge_repeated=False)
        sequence_dist = tf.reduce_mean(
            tf.edit_distance(tf.cast(decoded[0], tf.int32), labels))
        global_step = tf.Variable(0, name='global_step', trainable=False)
        starter_learning_rate = training_config.learning_rate
        learning_rate = tf.train.exponential_decay(
            starter_learning_rate,
            global_step,
            training_config.lr_decay_steps,
            training_config.lr_decay_rate,
            staircase=True)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            #optimizer = tf.train.AdadeltaOptimizer(learning_rate=learning_rate).minimize(loss=cost, global_step=global_step)
            optimizer = tf.train.RMSPropOptimizer(
                learning_rate=learning_rate).minimize(loss=cost,
                                                      global_step=global_step)
        merge_summary_op = self._configure_tf_summary(cost, learning_rate,
                                                      sequence_dist)
        self._saver = tf.train.Saver(max_to_keep=20)
        sess = self._create_session()
        summary_writer = tf.summary.FileWriter(self._tboard_save_path)
        summary_writer.add_graph(sess.graph)
        # Set the training parameters
        train_epochs = training_config.epochs

        with sess.as_default():
            self._initialize_model(sess)
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)
            for epoch in range(train_epochs):
                self._train_epoch(sess, summary_writer, epoch, optimizer, cost,
                                  sequence_dist, decoded, labels,
                                  merge_summary_op)
            coord.request_stop()
            coord.join(threads=threads)
        sess.close()
        self._log.info('Training finished.')

    def _train_epoch(self, sess, summary_writer, epoch, optimizer, cost,
                     sequence_dist, decoded, input_labels, merge_summary_op):
        _, c, seq_distance, preds_r, gt_labels_r, summary = sess.run([
            optimizer, cost, sequence_dist, decoded, input_labels,
            merge_summary_op
        ])
        preds = self._decoder.sparse_tensor_to_str(preds_r[0])
        gt_labels = self._decoder.sparse_tensor_to_str(gt_labels_r)
        accuracy = get_batch_accuracy(preds, gt_labels)
        mean_accuracy = calculate_array_mean(accuracy)
        self._log_epoch_stats(c, epoch, mean_accuracy, seq_distance)
        summary_writer.add_summary(summary=summary, global_step=epoch)
        self._saver.save(sess=sess,
                         save_path=self._model_save_path,
                         global_step=epoch)

    def _log_epoch_stats(self, c, epoch, mean_accuracy, seq_distance):
        if epoch % self._display_step == 0:
            self._log.info(
                'Epoch: {:d} cost= {:9f} seq distance= {:9f} train accuracy= {:9f}'
                .format(epoch + 1, c, seq_distance, mean_accuracy))

    def _create_session(self):
        gpu_config = self._config.get_gpu_config()
        sess_config = tf.ConfigProto()
        sess_config.gpu_options.per_process_gpu_memory_fraction = gpu_config.memory_fraction
        sess_config.gpu_options.allow_growth = gpu_config.is_tf_growth_allowed(
        )
        return tf.Session(config=sess_config)

    def _build_data_feed(self, batch_size):
        self._log.info('Build data feed...')
        images, labels, _ = self._decoder.read_features(
            ops.join(self._dataset_dir, 'train_feature.tfrecords'))
        inputdata, input_labels = tf.train.shuffle_batch(
            tensors=[images, labels],
            batch_size=batch_size,
            capacity=1000 + 2 * 32,
            min_after_dequeue=100,
            num_threads=4)
        inputdata = tf.cast(x=inputdata, dtype=tf.float32)
        return inputdata, input_labels

    def _build_net_model(self, input_data):
        self._log.info('Build net model...')
        crnn = CRNN(phase='Train',
                    hidden_nums=256,
                    seq_length=25,
                    num_classes=37)
        with tf.variable_scope('shadow', reuse=False):
            net_out = crnn.build(inputdata=input_data)
        return net_out

    @classmethod
    def _get_model_saver_path(cls):
        model_save_dir = 'model'
        if not ops.exists(model_save_dir):
            os.makedirs(model_save_dir)
        train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S',
                                         time.localtime(time.time()))
        model_name = 'crnn_{:s}.ckpt'.format(str(train_start_time))
        return ops.join(model_save_dir, model_name)

    def _configure_tf_summary(self, cost, learning_rate, sequence_dist):
        self._log.info('Configure TF summary...')
        if not ops.exists(self._tboard_save_path):
            os.makedirs(self._tboard_save_path)
        tf.summary.scalar(name='Cost', tensor=cost)
        tf.summary.scalar(name='Learning_Rate', tensor=learning_rate)
        tf.summary.scalar(name='Seq_Dist', tensor=sequence_dist)
        return tf.summary.merge_all()

    def _initialize_model(self, sess):
        if self._weights_path is None:
            self._log.info('Training from scratch')
            init = tf.global_variables_initializer()
            sess.run(init)
        else:
            self._log.info('Restore model from {:s}'.format(
                self._weights_path))
            self._saver.restore(sess=sess, save_path=self._weights_path)
Exemplo n.º 6
0
def recognize(image_path: str,
              weights_path: str,
              output_file: str,
              files_limit=32):
    decoder = TextFeatureIO().reader
    #Read all the files in the images folder
    files = [
        join(image_path, f) for f in listdir(image_path)
        if isfile(join(image_path, f))
    ][:]
    tf.reset_default_graph()
    inputdata = tf.placeholder(dtype=tf.float32,
                               shape=[BATCH_SIZE, 32, 100, 3],
                               name='input')
    images_sh = tf.cast(x=inputdata, dtype=tf.float32)
    # build shadownet
    net = CRNN(phase='Test', hidden_nums=256, seq_length=25, num_classes=37)
    with tf.variable_scope('shadow'):
        net_out = net.build(inputdata=images_sh)
    #top_paths=NUMBER_OF_PREDICTIONS is the number of words to predict
    decoded, log_probabilities = tf.nn.ctc_beam_search_decoder(
        net_out,
        25 * np.ones(BATCH_SIZE),
        merge_repeated=False,
        top_paths=NUMBER_OF_PREDICTIONS)

    # config tf saver
    saver = tf.train.Saver()
    sess = tf.Session()
    with sess.as_default():

        # restore the model weights
        #print('TFVERSION',tf.__version__)
        print("Restoring trained model")
        saver.restore(sess=sess, save_path=weights_path)
        print("Predicting {} images in chunks of {}".format(
            len(files), BATCH_SIZE))
        starting_time = time()

        #Run inference in groups of BATCH_SIZE images
        #Run it with all the files from the provided folder
        for group in chunker(files, BATCH_SIZE):
            start_time = time()
            images, filenames = load_images(group, files_limit)
            images = np.squeeze(images)
            padded_images = np.zeros([BATCH_SIZE, 32, 100, 3])
            padded_images[:images.shape[0], :, :, :] = images

            predictions, probs = sess.run([decoded, log_probabilities],
                                          feed_dict={inputdata: padded_images})
            for i, fname in enumerate(filenames):
                result = ''
                #log_probabilities is recomputed for softmax probs
                e_x = np.exp(probs[i, :]) / np.sum(np.exp(probs[i, :]))

                #build the array of N predictions for each image
                for x in range(NUMBER_OF_PREDICTIONS):
                    preds_res2 = decoder.sparse_tensor_to_str(predictions[x])
                    result = result + ',{:s},{:f}'.format(
                        preds_res2[i], e_x[x])
                #output string formatting and writing to csv file
                result = (basename(fname) + result)
                with open(output_file, 'a') as f:
                    f.write(result)
                    f.write('\n')
            end_time = time()
            print("Prediction time for {} images: {}".format(
                BATCH_SIZE, end_time - start_time))

        print("Total prediction time: {}".format(end_time - starting_time))
        print("Predictions saved in file {}".format(output_file))