示例#1
0
def recognize(image_path, weights_path, is_vis=True):
    """

    :param image_path:
    :param weights_path:
    :param is_vis:
    :return:
    """
    image = cv2.imread(image_path, cv2.IMREAD_COLOR)
    image = cv2.resize(image, (100, 32))
    image = np.expand_dims(image, axis=0).astype(np.float32)

    inputdata = tf.placeholder(dtype=tf.float32, shape=[1, 32, 100, 3], name='input')

    net = crnn_model.ShadowNet(phase='Test', hidden_nums=256, layers_nums=2, seq_length=200, num_classes=148)

    with tf.variable_scope('shadow'):
        net_out = net.build_shadownet(inputdata=inputdata)

    decodes, _ = tf.nn.ctc_beam_search_decoder(inputs=net_out, sequence_length=200*np.ones(1), merge_repeated=False)

    decoder = data_utils.TextFeatureIO()

    # config tf session
    sess_config = tf.ConfigProto()
    sess_config.gpu_options.per_process_gpu_memory_fraction = config.cfg.TRAIN.GPU_MEMORY_FRACTION
    sess_config.gpu_options.allow_growth = config.cfg.TRAIN.TF_ALLOW_GROWTH

    # config tf saver
    saver = tf.train.Saver()

    sess = tf.Session(config=sess_config)

    with sess.as_default():

        saver.restore(sess=sess, save_path=weights_path)

        preds = sess.run(decodes, feed_dict={inputdata: image})

        preds = decoder.writer.sparse_tensor_to_str(preds[0])

        logger.info('Predict image {:s} label {:s}'.format(ops.split(image_path)[1], preds[0]))

        if is_vis:
            plt.figure('CRNN Model Demo')
            plt.imshow(cv2.imread(image_path, cv2.IMREAD_COLOR)[:, :, (2, 1, 0)])
            plt.show()

        sess.close()

    return
def write_features(dataset_dir, save_dir):
    """

    :param dataset_dir:
    :param save_dir:
    :return:
    """
    if not ops.exists(save_dir):
        os.makedirs(save_dir)

    print('Initialize the dataset provider ......')
    provider = data_provider.TextDataProvider(dataset_dir=dataset_dir, annotation_name='sample.txt',
                                              validation_set=True, validation_split=0.12505, shuffle='every_epoch',
                                              normalization=None)
    print('Dataset provider intialize complete')

    feature_io = data_utils.TextFeatureIO()

    # write train tfrecords
    print('Start writing training tf records')

    train_images = provider.train.images; train_imagenames = provider.train.imagenames
    print(len(train_images))
    train_images = [cv2.resize(tmp, (600, 32)) for tmp in train_images]
    '''train_images1 = [];
    cnt = 0; cntlist = [];
    for tmp in train_images:
        try: 
            train_images1.append(cv2.resize(tmp, (100, 32)));
            cntlist.append(cnt); #print("done: ",cnt);
        except: print(cnt,train_imagenames[cnt],"here")
        cnt +=1;
    print("done: ",cnt);'''	
    train_images = [bytes(list(np.reshape(tmp, [600 * 32 * 3]))) for tmp in train_images]
    train_labels = provider.train.labels
    '''cnt = 0;
    for i in provider.train.labels:
        if cnt in cntlist: train_labels.append(i)
        cnt +=1;'''
    train_imagenames = provider.train.imagenames

    train_tfrecord_path = ops.join(save_dir, 'train_feature.tfrecords')
    feature_io.writer.write_features(tfrecords_path=train_tfrecord_path, labels=train_labels, images=train_images,
                                     imagenames=train_imagenames)

    # write test tfrecords
    print('Start writing testing tf records')

    test_images = provider.test.images
    test_images = [cv2.resize(tmp, (600, 32)) for tmp in test_images]
    '''test_images1 = [];
    cnt = 0; cntlist = [];
    for tmp in test_images:
        try: 
            test_images1.append(cv2.resize(tmp, (100, 32)));
            cntlist.append(cnt);
        except: pass
        cnt +=1;'''	
    test_images = [bytes(list(np.reshape(tmp, [600 * 32 * 3]))) for tmp in test_images]
    test_labels = provider.test.labels
    test_imagenames = provider.test.imagenames

    test_tfrecord_path = ops.join(save_dir, 'test_feature.tfrecords')
    feature_io.writer.write_features(tfrecords_path=test_tfrecord_path, labels=test_labels, images=test_images,
                                     imagenames=test_imagenames)

    # write val tfrecords
    print('Start writing validation tf records')

    val_images = provider.validation.images
    '''val_images1 = [];
    cnt = 0; cntlist = [];
    for tmp in val_images:
        try: 
            val_images1.append(cv2.resize(tmp, (100, 32)));
            cntlist.append(cnt);
        except: pass
        cnt +=1;'''	
    val_images = [cv2.resize(tmp, (600, 32)) for tmp in val_images]
    val_images = [bytes(list(np.reshape(tmp, [600 * 32 * 3]))) for tmp in val_images]
    val_labels = provider.validation.labels
    val_imagenames = provider.validation.imagenames

    val_tfrecord_path = ops.join(save_dir, 'validation_feature.tfrecords')
    feature_io.writer.write_features(tfrecords_path=val_tfrecord_path, labels=val_labels, images=val_images,
                                     imagenames=val_imagenames)

    return
示例#3
0
def validation_shadownet(filename,
                         weights_path,
                         is_vis=False,
                         is_recursive=True):
    """

    :param dataset_dir:
    :param weights_path:
    :param is_vis:
    :param is_recursive:
    :return:
    """
    # Initialize the record decoder
    decoder = data_utils.TextFeatureIO().reader
    images_t, labels_t, imagenames_t = decoder.read_features(
        os.getcwd() + "/model/CRNN/data/tfReal/" + filename, num_epochs=None)
    if not is_recursive:
        images_sh, labels_sh, imagenames_sh = tf.train.shuffle_batch(
            tensors=[images_t, labels_t, imagenames_t],
            batch_size=32,
            capacity=1000 + 32 * 2,
            min_after_dequeue=2,
            num_threads=4)
    else:
        images_sh, labels_sh, imagenames_sh = tf.train.batch(
            tensors=[images_t, labels_t, imagenames_t],
            batch_size=32,
            capacity=1000 + 32 * 2,
            num_threads=4)

    images_sh = tf.cast(x=images_sh, dtype=tf.float32)

    # build shadownet
    net = crnn_model.ShadowNet(phase='Validate',
                               hidden_nums=256,
                               layers_nums=2,
                               seq_length=200,
                               num_classes=148)

    with tf.variable_scope('shadow'):
        net_out = net.build_shadownet(inputdata=images_sh)

    decoded, _ = tf.nn.ctc_beam_search_decoder(net_out,
                                               200 * np.ones(32),
                                               merge_repeated=False)

    # config tf session
    sess_config = tf.ConfigProto()
    sess_config.gpu_options.per_process_gpu_memory_fraction = config.cfg.TRAIN.GPU_MEMORY_FRACTION
    sess_config.gpu_options.allow_growth = config.cfg.TRAIN.TF_ALLOW_GROWTH

    # config tf saver
    saver = tf.train.Saver()

    sess = tf.Session(config=sess_config)

    validate_sample_count = 0
    for record in tf.python_io.tf_record_iterator(os.getcwd() +
                                                  "/model/CRNN/data/tfReal/" +
                                                  filename):
        validate_sample_count += 1
    loops_nums = int(math.ceil(validate_sample_count / 32))
    # loops_nums = 100

    with sess.as_default():

        # restore the model weights
        saver.restore(sess=sess, save_path=weights_path)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        print('Start predicting ......')
        if not is_recursive:
            predictions, images, labels, imagenames = sess.run(
                [decoded, images_sh, labels_sh, imagenames_sh])
            imagenames = np.reshape(imagenames, newshape=imagenames.shape[0])
            imagenames = [tmp.decode('utf-8') for tmp in imagenames]
            preds_res = decoder.sparse_tensor_to_str(predictions[0])
            gt_res = decoder.sparse_tensor_to_str(labels)

            accuracy = []

            for index, gt_label in enumerate(gt_res):
                pred = preds_res[index]
                totol_count = len(gt_label)
                correct_count = 0
                try:
                    for i, tmp in enumerate(gt_label):
                        if tmp == pred[i]:
                            correct_count += 1
                except IndexError:
                    continue
                finally:
                    try:
                        accuracy.append(correct_count / totol_count)
                    except ZeroDivisionError:
                        if len(pred) == 0:
                            accuracy.append(1)
                        else:
                            accuracy.append(0)

            accuracy = np.mean(np.array(accuracy).astype(np.float32), axis=0)
            print('Mean validate accuracy is {:5f}'.format(accuracy))

            for index, image in enumerate(images):
                print(
                    'Predict {:s} image with gt label: {:s} **** predict label: {:s}'
                    .format(imagenames[index], gt_res[index],
                            preds_res[index]))
                if is_vis:
                    plt.imshow(image[:, :, (2, 1, 0)])
                    plt.show()
        else:
            accuracy = []
            for epoch in range(loops_nums):
                predictions, images, labels, imagenames = sess.run(
                    [decoded, images_sh, labels_sh, imagenames_sh])
                imagenames = np.reshape(imagenames,
                                        newshape=imagenames.shape[0])
                imagenames = [tmp.decode('utf-8') for tmp in imagenames]
                preds_res = decoder.sparse_tensor_to_str(predictions[0])
                gt_res = decoder.sparse_tensor_to_str(labels)

                for index, gt_label in enumerate(gt_res):
                    pred = preds_res[index]
                    totol_count = len(gt_label)
                    correct_count = 0
                    try:
                        for i, tmp in enumerate(gt_label):
                            if tmp == pred[i]:
                                correct_count += 1
                    except IndexError:
                        continue
                    finally:
                        try:
                            accuracy.append(correct_count / totol_count)
                        except ZeroDivisionError:
                            if len(pred) == 0:
                                accuracy.append(1)
                            else:
                                accuracy.append(0)

                for index, image in enumerate(images):
                    print(
                        'Predict {:s} image with gt label: {:s} **** predict label: {:s}'
                        .format(imagenames[index], gt_res[index],
                                preds_res[index]))
                    # if is_vis:
                    #     plt.imshow(image[:, :, (2, 1, 0)])
                    #     plt.show()

            accuracy = np.mean(np.array(accuracy).astype(np.float32), axis=0)
            print('Validate accuracy is {:5f}'.format(accuracy))

        coord.request_stop()
        coord.join(threads=threads)

    sess.close()
    return
示例#4
0
def train_shadownet(filename,
                    train_epochs,
                    weights_path=None,
                    steps_per_checkpoint=None):
    """
    :param dataset_dir:
    :param weights_path:
    :return:
    """
    train_epochs = int(train_epochs)
    # decode the tf records to get the training data
    decoder = data_utils.TextFeatureIO().reader
    images, labels, imagenames = decoder.read_features(
        os.getcwd() + "/model/CRNN/data/tfReal/" + filename, num_epochs=None)
    inputdata, input_labels, input_imagenames = tf.train.shuffle_batch(
        tensors=[images, labels, imagenames],
        batch_size=32,
        capacity=1000 + 2 * 32,
        min_after_dequeue=100,
        num_threads=1)

    inputdata = tf.cast(x=inputdata, dtype=tf.float32)
    #ilabels = tf.sparse_tensor_to_dense(input_labels)
    #labels = tf.convert_to_tensor(labels)
    #labels = tf.convert_to_tensor(labels)
    #labels = tf.cast(labels,dtype=tf.float32)
    #print("ZZZ"+str(ilabels))
    #ilabels = tf.Print(ilabels,[ilabels],"labelss",summarize =  1000)
    #word_av_list = []
    #w = []
    #for i in range(32):
    #    label = tf.gather_nd(ilabels, [i])
    #    label = tf.Print(label,[label],"label",summarize =  1000)
    #    element = tf.constant([32])
    #    cols = tf.where(tf.equal(label, element))[:,-1]
    #    #cols = tf.Print(cols, [cols], "cols",summarize=1000)
    #    extra = tf.cast(tf.shape(label)[0],'int64')*tf.ones([1], 'int64')
    #    #extra = tf.Print(extra, [extra], "extra", summarize=1000)
    #    cols = tf.concat([cols,extra],0)
    #    #cols = tf.Print(cols,[cols],"cols",summarize =  1000)
    #    cols_len = tf.shape(cols)[0]
    #    cols_right_shifted = tf.concat([[-1], cols[:cols_len-1]], 0)
    #    words_len_final = cols - cols_right_shifted - 1
    #    words_len_final = tf.cast(words_len_final,'float')
    #    num_nonzeros = tf.count_nonzero(words_len_final)
    #    #words_len_sum = tf.cond(tf.equal(tf.cast(num_nonzeros, tf.float32), tf.constant(0.0,dtype=tf.float32)), tf.constant(0.0,dtype=tf.float32),tf.divide(tf.cast(tf.reduce_sum(words_len_final),tf.float32), tf.cast(num_nonzeros, tf.float32)))
    #   # #if tf.equal(tf.cast(num_nonzeros, tf.float32), tf.constant(0.0,dtype=tf.float32)):
    #    #    words_len_sum = tf.constant(0.0,dtype=tf.float32)
    #    #else:
    #        #words_len_sum = tf.divide(tf.cast(tf.reduce_sum(words_len_final),tf.float32), tf.cast(num_nonzeros, tf.float32))
    #    words_len_sum = tf.cond(tf.equal(tf.cast(num_nonzeros, tf.float32), tf.constant(0.0,dtype=tf.float32)), true_fn, lambda: false_fn1(num_nonzeros, words_len_final))
    #    w.append(words_len_sum)
    #    words_len_av = tf.reduce_mean(words_len_final,0)
    #    word_av_list.append(words_len_av)
    #word_av_tf = tf.convert_to_tensor(word_av_list)
    ##word_av_tf= tf.Print(word_av_tf,[word_av_tf],"word_av_tf",summarize =  1000)
    #w = tf.convert_to_tensor(w)
    #w = tf.Print(w,[w],"w",summarize =  1000)
    #median = get_real_median(w)
    ##norm_word_len = tf.cond(tf.equal(tf.cast(median, tf.float32), tf.constant(0.0,dtype=tf.float32)), tf.constant(0.0,dtype=tf.float32), tf.divide(w,median))
    ##if tf.equal(tf.cast(median, tf.float32), tf.constant(0.0,dtype=tf.float32)):
    ##    norm_word_len = tf.constant(0.0,dtype=tf.float32)
    ##else:
    ##    norm_word_len = tf.divide(w,median)
    #norm_word_len = tf.cond(tf.equal(tf.cast(median, tf.float32), tf.constant(0.0,dtype=tf.float32)), true_fn, lambda: false_fn2(w,median))
    #norm_word_len = tf.Print(norm_word_len, [norm_word_len], "norm_word_len", summarize = 1000)
    current_step = 0
    shadownet = crnn_model.ShadowNet(phase='Train',
                                     hidden_nums=256,
                                     layers_nums=2,
                                     seq_length=200,
                                     num_classes=148)
    global_step = tf.Variable(0, trainable=False)
    with tf.variable_scope('shadow', reuse=False):
        net_out = shadownet.build_shadownet(inputdata=inputdata)

    cost = tf.reduce_mean(
        tf.nn.ctc_loss(labels=input_labels,
                       inputs=net_out,
                       sequence_length=200 * np.ones(32),
                       ignore_longer_outputs_than_inputs=True))

    decoded, log_prob = tf.nn.ctc_beam_search_decoder(net_out,
                                                      200 * np.ones(32),
                                                      merge_repeated=False)

    sequence_dist = tf.reduce_mean(
        tf.edit_distance(tf.cast(decoded[0], tf.int32), input_labels))

    global_step = tf.Variable(0, name='global_step', trainable=False)

    starter_learning_rate = config.cfg.TRAIN.LEARNING_RATE
    learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                               global_step,
                                               config.cfg.TRAIN.LR_DECAY_STEPS,
                                               config.cfg.TRAIN.LR_DECAY_RATE,
                                               staircase=True)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.control_dependencies(update_ops):
        optimizer = tf.train.AdadeltaOptimizer(
            learning_rate=learning_rate).minimize(loss=cost,
                                                  global_step=global_step)

    # Set tf summary
    tboard_save_path = 'model/CRNN/tboard/shadownet'
    if not ops.exists(tboard_save_path):
        os.makedirs(tboard_save_path)
    tf.summary.scalar(name='Cost', tensor=cost)
    tf.summary.scalar(name='Learning_Rate', tensor=learning_rate)
    tf.summary.scalar(name='Seq_Dist', tensor=sequence_dist)
    merge_summary_op = tf.summary.merge_all()

    # Set saver configuration
    saver = tf.train.Saver()
    model_save_dir = 'model/CRNN/model/shadownet'
    if not ops.exists(model_save_dir):
        os.makedirs(model_save_dir)
    train_start_time = time.strftime('%Y-%m-%d-%H-%M-%S',
                                     time.localtime(time.time()))
    model_name = 'shadownet_'  #{:s}.ckpt'.format(str(train_start_time))
    model_save_path = ops.join(model_save_dir, model_name)

    # Set sess configuration
    sess_config = tf.ConfigProto(allow_soft_placement=True)
    sess_config.gpu_options.per_process_gpu_memory_fraction = config.cfg.TRAIN.GPU_MEMORY_FRACTION
    sess_config.gpu_options.allow_growth = config.cfg.TRAIN.TF_ALLOW_GROWTH

    sess = tf.Session(config=sess_config)

    summary_writer = tf.summary.FileWriter(tboard_save_path)
    summary_writer.add_graph(sess.graph)

    # Set the training parameters
    #train_epochs = config.cfg.TRAIN.EPOCHS

    with sess.as_default():
        if weights_path is None:
            logger.info('Training from scratch')
            init = tf.global_variables_initializer()
            sess.run(init)
        else:
            logger.info('Restore model from {:s}'.format(weights_path))
            saver.restore(sess=sess, save_path=weights_path)

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        for epoch in range(train_epochs):

            _, c, seq_distance, preds, gt_labels, summary = sess.run([
                optimizer, cost, sequence_dist, decoded, input_labels,
                merge_summary_op
            ])
            indices = gt_labels.indices
            values = gt_labels.values
            dense_shape = gt_labels.dense_shape
            #logger.info(indices.shape)
            #log
            preds = decoder.sparse_tensor_to_str(preds[0])
            gt_labels = decoder.sparse_tensor_to_str(gt_labels)
            current_step += 1
            accuracy = []

            for index, gt_label in enumerate(gt_labels):
                pred = preds[index]
                totol_count = len(gt_label)
                correct_count = 0
                try:
                    for i, tmp in enumerate(gt_label):
                        if tmp == pred[i]:
                            correct_count += 1
                except IndexError:
                    continue
                finally:
                    try:
                        accuracy.append(correct_count / totol_count)
                    except ZeroDivisionError:
                        if len(pred) == 0:
                            accuracy.append(1)
                        else:
                            accuracy.append(0)
            accuracy = np.mean(np.array(accuracy).astype(np.float32), axis=0)
            #
            if epoch % config.cfg.TRAIN.DISPLAY_STEP == 0:
                logger.info(
                    'Epoch: {:d} cost= {:9f} seq distance= {:9f} train accuracy= {:9f}'
                    .format(epoch + 1, c, seq_distance, accuracy))

            summary_writer.add_summary(summary=summary,
                                       global_step=global_step)
            if steps_per_checkpoint is None:
                saver.save(sess=sess,
                           save_path=model_save_path,
                           global_step=global_step)
            else:
                if current_step % int(steps_per_checkpoint) == 0:
                    saver.save(sess=sess,
                               save_path=model_save_path,
                               global_step=global_step)

        coord.request_stop()
        coord.join(threads=threads)

    sess.close()

    return