Exemplo n.º 1
0
def main(argv=None):
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image')
        input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info')

        global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                    orig = im.copy()
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={input_image: [img],
                                                                  input_im_info: im_info})

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='O')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)

                cost_time = (time.time() - start)
                print("cost time: {:.2f}s".format(cost_time))

                for i, box in enumerate(boxes):
                    reshaped_coords = [box[:8].astype(np.int32).reshape((-1, 1, 2))]
                    cv2.polylines(img, reshaped_coords, True, color=(0, 255, 0),
                                  thickness=2)

                    reshaped_coords = np.asarray(reshaped_coords)
                    roi = img[reshaped_coords[0][0][0][1]:reshaped_coords[0][2][0][1], reshaped_coords[0][0][0][0]:reshaped_coords[0][2][0][0]]
                    
                    text = pytesseract.image_to_string(roi, config=config)

                    text = unidecode.unidecode(text)
                    cv2.putText(img, text, (reshaped_coords[0][0][0][0], reshaped_coords[0][0][0][1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
                
                img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)
                cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1])

                with open(os.path.join(FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt",
                          "w") as f:
                    for i, box in enumerate(boxes):
                        line = ",".join(str(box[k]) for k in range(8))
                        line += "," + str(scores[i]) + "\r\n"
                        f.writelines(line)

# if __name__ == '__main__':
    # tf.app.run()

# def delete_prev(path):
    
#     for the_file in os.listdir(path):
#         file_path = os.path.join(path, the_file)
#         try:
#             if os.path.isfile(file_path):
#                 os.unlink(file_path)
#             elif os.path.isdir(file_path): shutil.rmtree(file_path)
#         except Exception as e:
#             print(e)
#             continue

# app = Flask(__name__)
# app._static_folder = os.path.basename('static')

# UPLOAD_FOLDER = os.path.join('main', 'uploads')
# app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER

# @app.route('/')
# def hello_world():
#     return render_template('home_al.html')

# @app.route('/upload', methods=['POST', 'GET'])
# def upload_file():

#     if request.method == 'POST':
#         file = request.files['image']
#         filename = file.filename

#         # prepare directory for processing
#         delete_prev(app.config['UPLOAD_FOLDER'])
#         f = os.path.join(app.config['UPLOAD_FOLDER'], filename)

#         # add your custom code to check that the uploaded file is a valid image and not a malicious file (out-of-scope for this post)
#         file.save(f)

#         tf.app.run()

#         print('done')
#         processed_file = os.path.join('data/res', filename)

#         # return render_template('home_al.html', processed_file = processed_file)
#         return redirect(url_for('send_file', filename=filename))
#         print('redirected to', url_for('send_file', filename=filename))
#     else:

#         print('No request')
#         return render_template('home_al.html')

# # @app.route('/show/<filename>')
# # def uploaded_file(filename):
# #     filename = 'http://127.0.0.1:5000/upload/' + filename
# #     return render_template('home_al.html')

# @app.route('/uploaded/<filename>')
# def send_file(filename):
#     return send_from_directory('data/res', filename)

# app.run(debug=True)
Exemplo n.º 2
0
def ctpnParse(im):
    '''
    转换获取图片文字区域组
    '''
    # im = cv2.imread(image_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='input_image')
        input_im_info = tf.placeholder(tf.float32, shape=[None, 3], name='input_im_info')

        global_step = tf.get_variable('global_step', [], initializer=tf.constant_initializer(0), trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(FLAGS.checkpoint_path, os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            ####################################################################################
            # read img

            # print(image_path)
            start = time.time()
            try:
                im = im[:, :, ::-1]
            except:
                # print("Error reading image {}!".format(image_path))
                exit(1)

            ####################################################################################
            # resize

            img, (rh, rw) = resize_image(im)
            print("Ritu ", rh, rw)
            print("Mae: ", im.shape[0], im.shape[1])
            print("Ushiro: ", img.shape[0], img.shape[1])

            # Ritu: 0.6375 0.6333333333333333
            # Mae:  1280 960
            # Ushiro:  816 608

            h, w, c = img.shape
            im_info = np.array([h, w, c]).reshape([1, 3])
            bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                    feed_dict={input_image: [img],
                                                                input_im_info: im_info})

            ####################################################################################
            # parse

            textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
            scores = textsegs[:, 0]
            textsegs = textsegs[:, 1:5]

            # textdetector = TextDetector(DETECT_MODE='H')
            textdetector = TextDetector(DETECT_MODE='O')
            boxes = textdetector.detect(textsegs, scores[:, np.newaxis], img.shape[:2])
            boxes = np.array(boxes, dtype=np.int)

            ####################################################################################
            # data

            cost_time = (time.time() - start)
            print("cost time: {:.2f}s".format(cost_time))

            # frames

            frames = []
            
            for i, box in enumerate(boxes):
                pnts = []
                # i = 01, 23, 45, 67
                pnts.extend({
                    "x": int(box[i * 2] / rh),
                    "y": int(box[i * 2 + 1] / rw)
                } for i in range(4))
                frames.append({
                    "points": pnts,
                    "score": scores[i]
                })

            return {
                "size": {
                    "x": im.shape[0],
                    "y": im.shape[1]
                },
                "cnt": len(boxes),
                "frames": frames
            }

            '''
Exemplo n.º 3
0
def main(argv=None):
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            saver = tf.train.Saver()
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            print("!!!!! ckpt state!!!!", ckpt_state)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={
                                                           input_image: [img],
                                                           input_im_info:
                                                           im_info
                                                       })

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                             im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='H')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                            img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)

                cost_time = (time.time() - start)
                print("cost time: {:.2f}s".format(cost_time))

                for i, box in enumerate(boxes):
                    cv2.polylines(
                        img, [box[:8].astype(np.int32).reshape((-1, 1, 2))],
                        True,
                        color=(0, 255, 0),
                        thickness=2)
                img = cv2.resize(img,
                                 None,
                                 None,
                                 fx=1.0 / rh,
                                 fy=1.0 / rw,
                                 interpolation=cv2.INTER_LINEAR)
                cv2.imwrite(
                    os.path.join(FLAGS.output_path, os.path.basename(im_fn)),
                    img[:, :, ::-1])

                with open(
                        os.path.join(
                            FLAGS.output_path,
                            os.path.splitext(os.path.basename(im_fn))[0]) +
                        ".txt", "w") as f:
                    for i, box in enumerate(boxes):
                        line = ",".join(str(box[k]) for k in range(8))
                        line += "," + str(scores[i]) + "\r\n"
                        f.writelines(line)
def main(argv=None):
    # if os.path.exists(FLAGS.output_path):
    # shutil.rmtree(FLAGS.output_path)
    # os.makedirs(FLAGS.output_path)
    # print(FLAGS.output_path)
    # os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())
        print("init sess")
        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state('checkpoints_mlt/')
            model_path = os.path.join(
                'checkpoints_mlt/',
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            #im_fn_list = get_images()
            print('===============')
            im = rotate_img('hoadontiendien-3.png')
            print(im.shape)

            cv2.imwrite('rotated2.png', im[:, :, :])
            print("write rotate img")
            start = time.time()

            img, (rh, rw) = resize_image(im)
            h, w, c = img.shape
            im_info = np.array([h, w, c]).reshape([1, 3])
            bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                   feed_dict={
                                                       input_image: [img],
                                                       input_im_info: im_info
                                                   })

            textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
            scores = textsegs[:, 0]
            textsegs = textsegs[:, 1:5]

            textdetector = TextDetector(DETECT_MODE='H')
            boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                        img.shape[:2])
            boxes = np.array(boxes, dtype=np.int)

            cost_time = (time.time() - start)
            print("cost time: {:.2f}s".format(cost_time))
            min_x, max_x, min_y, max_y = 0, w, 0, h
            box_minx = min([b[0] for b in boxes])
            box_miny = min([b[1] for b in boxes])
            box_maxx = max([b[4] for b in boxes])
            box_maxy = max([b[5] for b in boxes])
            print(box_minx, box_miny)
            print(box_maxx, box_maxy)
            crop_img = img[box_miny:box_maxy, box_minx:box_maxx]
            print(crop_img.shape)

            # for b in boxes:
            # if b[0] <
            # texts = []
            for i, box in enumerate(boxes):
                cv2.polylines(img,
                              [box[:8].astype(np.int32).reshape((-1, 1, 2))],
                              True,
                              color=(0, 255, 0),
                              thickness=1)
                #crop_img2 = img[box[1]-5:box[5]+5, box[0]:box[4]]
            img = cv2.resize(img,
                             None,
                             None,
                             fx=1.0 / rh,
                             fy=1.0 / rw,
                             interpolation=cv2.INTER_LINEAR)
            #print(img[:, :, ::-1].shape)
            #cv2.imshow('aaa',img[:, :, ::-1])
            #cv2.waitKey()

            cv2.imwrite('rotate_cuted2.png', crop_img[:, :, :])
Exemplo n.º 5
0
def process():

    output = {
        'path': None,
        'percentage': 0,
        'locate_time': 0,
        'ocr_time': 0,
        'ocr_text': [],
        'err': False
    }

    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu
    index = 0
    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)
            #file_whole = open('data/res/text/whole.txt','w')
            im_fn_list = get_images()
            start_all = time.time()

            for count, im_fn in enumerate(im_fn_list):
                output["err"] = False
                output["path"] = im_fn
                output["ocr_text"].clear()
                output["percentage"] = count / len(im_fn_list)
                print('===============')
                print(
                    im_fn
                )  #im_fn: ../four_angles/recording_2019_10_30/bbq/cam_delicacies-17760-17880/73-500_0.jpg
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    output["err"] = True
                    yield output
                    continue

                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={
                                                           input_image: [img],
                                                           input_im_info:
                                                           im_info
                                                       })

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                             im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='H')
                # DETECT_MODE can be H / O depending on context
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                            img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)

                cost_time = (time.time() - start)
                output["locate_time"] = cost_time
                print("cost time: {:.2f}s".format(cost_time))
                '''
                Do the text recognition
                '''

                text_start = time.time()
                grayImage = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

                ########################################################
                for i, box in enumerate(boxes):
                    cv2.polylines(
                        img, [box[:8].astype(np.int32).reshape((-1, 1, 2))],
                        True,
                        color=(0, 255, 0),
                        thickness=2)
                    ###################################################
                    # First get the number id
                    startX = box[0]
                    startY = box[1]
                    endX = box[4]
                    endY = box[5]
                    ret, thresh = cv2.threshold(img, 127, 255,
                                                cv2.THRESH_BINARY_INV)
                    roi = thresh[startY:endY, startX:endX]

                    ###################################################
                    # Single out the digit

                    ###################################################

                    # in order to apply Tesseract v4 to OCR text we must supply
                    # (1) a language, (2) an OEM flag of 4, indicating that the we
                    # wish to use the LSTM neural net model for OCR, and finally
                    # (3) an OEM value, in this case, 7 which implies that we are
                    # treating the ROI as a single line of text

                    config = ("-l digits --oem 1 --psm 7")
                    # config = ("--oem 0 -c tessedit_char_whitelist=0123456789")
                    text = pytesseract.image_to_string(roi, config=config)
                    output["ocr_text"].append(text)
                    # add the bounding box coordinates and OCR'd text to the list
                    # of results
                    # Only print if number is detected

                    #im_fn: ../four_angles/recording_2019_10_30/bbq/cam_delicacies-17760-17880/73-500_0.jpg
                    if text.isdigit():
                        print(text)
                        if len(text) == 4:
                            data = im_fn.split("/")
                            fn = data[len(data) - 1]  # 73-500_0.jpg
                            folder = data[len(data) - 4] + '/' + data[len(
                                data
                            ) - 3] + '/' + data[
                                len(data) -
                                2]  # recording_2019_10_30/bbq/cam_bbq-8000-18120
                            print(folder + '/' + fn)
                            fn_data = fn.split("-")
                            id_num = fn_data[0]  #73
                            image_name = fn_data[1]  #500_0.jpg

                            directory = 'OCR_text/' + folder + '/'
                            directory = os.path.join(root, directory)
                            if not os.path.exists(directory):
                                os.makedirs(directory)
                            file_whole = open(
                                directory + 'whole-' + id_num + '.txt', 'a')
                            file_whole.write(folder + '/' + fn + ':' + text +
                                             '\n')
                            file_whole.close()
                            #cv2.imwrite(str(index) + '.png', roi)
                            index += 1

# results.append(((startX, startY, endX, endY), text))
                output["ocr_time"] = time.time() - text_start

                ########################################################
                '''
                img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)
                cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1])

                with open(os.path.join(FLAGS.output_path, os.path.splitext(os.path.basename(im_fn))[0]) + ".txt",
                          "w") as f:
                    for i, box in enumerate(boxes):
                        line = ",".join(str(box[k]) for k in range(8))
                        line += "," + str(scores[i]) + "\r\n"
                        f.writelines(line)
                '''
                yield output
            cost_time_all = (time.time() - start_all)
            print("Total cost time: {:.2f}s".format(cost_time_all))
def main(im=None):
    # if os.path.exists(FLAGS.output_path):
    #     shutil.rmtree(FLAGS.output_path)
    # os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    checkpoint_path = 'checkpoints_mlt/'
    with tf.compat.v1.get_default_graph().as_default():
        input_image = tf.compat.v1.placeholder(tf.float32,
                                               shape=[None, None, None, 3],
                                               name='input_image')
        input_im_info = tf.compat.v1.placeholder(tf.float32,
                                                 shape=[None, 3],
                                                 name='input_im_info')

        global_step = tf.compat.v1.get_variable(
            'global_step', [],
            initializer=tf.compat.v1.constant_initializer(0),
            trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.compat.v1.train.Saver(
            variable_averages.variables_to_restore())

        with tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(checkpoint_path)
            model_path = os.path.join(
                checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            # print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            # im_fn_list = get_images()
            # for im_fn in im_fn_list:
            # print('===============')
            # print(im_fn)
            # start = time.time()
            # try:
            #     im = cv2.imread(im_fn)[:, :, ::-1]
            # except:
            #     print("Error reading image {}!".format(im_fn))
            #     continue

            img, (rh, rw) = resize_image(im)
            h, w, c = img.shape
            im_info = np.array([h, w, c]).reshape([1, 3])
            bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                   feed_dict={
                                                       input_image: [img],
                                                       input_im_info: im_info
                                                   })

            textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val, im_info)
            scores = textsegs[:, 0]
            textsegs = textsegs[:, 1:5]

            textdetector = TextDetector(DETECT_MODE='H')
            boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                        img.shape[:2])
            boxes = np.array(boxes, dtype=np.int)

            # cost_time = (time.time() - start)
            # print("cost time: {:.2f}s".format(cost_time))

            # for i, box in enumerate(boxes):
            #     cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0),
            #                   thickness=2)
            # img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)
            # cv2.imwrite(os.path.join(FLAGS.output_path, os.path.basename(im_fn)), img[:, :, ::-1])
            return_array = []

            for i, box in enumerate(boxes):
                box[0] = box[0] / rh
                box[2] = box[2] / rh
                box[1] = box[1] / rw
                box[7] = box[7] / rw
                return_array.append([box[0], box[1], box[2], box[7]])
                # print(return_array)
                # line += ",".join(str(box[k]) for k in [0,1,2,7]) +'),\r\n'
            return return_array
Exemplo n.º 7
0
def main(argv=None):
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                #print('===============')
                #print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={
                                                           input_image: [img],
                                                           input_im_info:
                                                           im_info
                                                       })

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                             im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='H')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                            img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)

                cost_time = (time.time() - start)
                #print("cost time: {:.2f}s".format(cost_time))

                if len(boxes) != 1:
                    print(im_fn, len(boxes))

                flag = -1
                for i, box in enumerate(boxes):
                    #        cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0),thickness=2)
                    #img = cv2.resize(img, None, None, fx=1.0 / rh, fy=1.0 / rw, interpolation=cv2.INTER_LINEAR)

                    arr = np.array(box[:8].astype(np.int32).reshape((-1, 2)))
                    #print(arr)
                    x1 = min(arr[:, 0])
                    x2 = max(arr[:, 0])
                    y1 = min(arr[:, 1])
                    y2 = max(arr[:, 1])
                    pad_w = int((y2 - y1) * 0.5)
                    img_cp = img[y1 - 10:y2 + 10, x1 - pad_w:x2 + pad_w, :]
                    #print(x1,x2,y1,y2)

                    if flag < (x2 - x1) / (y2 - y1):
                        flag = (x2 - x1) / (y2 - y1)

                        if flag > 3:
                            cv2.imwrite(
                                os.path.join(FLAGS.output_path,
                                             os.path.basename(im_fn)),
                                img_cp[:, :, ::-1])
                '''
Exemplo n.º 8
0
def main(argv):
    of_list = []
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            # print("dickk")
            # print(sys.argv[1])
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            print(im_fn_list)
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                print("printing im.shape")
                print(im.shape)
                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={
                                                           input_image: [img],
                                                           input_im_info:
                                                           im_info
                                                       })

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                             im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='H')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                            img.shape[:2])
                boxes = np.array(boxes, dtype=np.int)

                cost_time = (time.time() - start)
                print("cost time: {:.2f}s".format(cost_time))

                for i, box in enumerate(boxes):
                    cv2.polylines(
                        img, [box[:8].astype(np.int32).reshape((-1, 1, 2))],
                        True,
                        color=(0, 255, 0),
                        thickness=2)

                    box_arr = box[:8].astype(np.int32)
                    y = box_arr[0]
                    x = box_arr[1]
                    h = box_arr[2] - box_arr[0]
                    w = box_arr[5] - box_arr[3]

                    img1, (rh, rw) = resize_image(im)
                    img2 = img1[x:x + w, y:y + h, :]
                    cv2.imwrite(FLAGS.output_path + str(i) + '.png', img2)
                    of_list.append(FLAGS.output_path + str(i) + '.png')
Exemplo n.º 9
0
def main(argv=None):
    if train_or_test_1800 == 'no_seperate_mianzhi_train' or train_or_test_1800 == 'no_seperate_mianzhi_test':
        if os.path.exists(FLAGS.output_path):
            shutil.rmtree(FLAGS.output_path)
        os.makedirs(FLAGS.output_path)

    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            ii = a

            for im_fn in im_fn_list[int(a):b]:  #修改这里
                ii += 1
                print(str(ii) + '===============' + str(ii))
                print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                try:

                    img, (rh, rw) = resize_image(im)
                    h, w, c = img.shape
                    im_info = np.array([h, w, c]).reshape([1, 3])
                    bbox_pred_val, cls_prob_val = sess.run(
                        [bbox_pred, cls_prob],
                        feed_dict={
                            input_image: [img],
                            input_im_info: im_info
                        })
                    textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                                 im_info)
                    scores = textsegs[:, 0]
                    textsegs = textsegs[:, 1:
                                        5]  # 每张图片N个poly,textsegs是这些poly的四个坐标。

                    textdetector = TextDetector(DETECT_MODE='H')

                    boxes = textdetector.detect(
                        textsegs, scores[:, np.newaxis],
                        img.shape[:2])  #xzy 方法内部已修改,只显示一个框
                    boxes = np.array(boxes, dtype=np.int)

                    cost_time = (time.time() - start)
                    print("cost time: {:.2f}s".format(cost_time))

                    for i, box in enumerate(boxes):
                        # cv2.polylines(img, [box[:8].astype(np.int32).reshape((-1, 1, 2))], True, color=(0, 255, 0),
                        #               thickness=2)
                        img = img[int(box[1]):int(box[5]),
                                  int(box[0]):int(box[2])]  # xzy 裁剪

                    img = cv2.resize(img,
                                     None,
                                     None,
                                     fx=1.0 / rh,
                                     fy=1.0 / rw,
                                     interpolation=cv2.INTER_LINEAR)
                    cv2.imwrite(
                        os.path.join(FLAGS.output_path,
                                     os.path.basename(im_fn)), img[:, :, ::-1])
                except Exception as e:  #xzy   Corrupt JPEG data: premature end of data segment
                    immmm = cv2.imread(
                        "../../../dataset_warm_up/train_data/13X6EGWI.jpg"
                    )  #xzy 可能WBNGQ9R7.jpg出错
                    cv2.imwrite(
                        os.path.join(FLAGS.output_path,
                                     "xzywa" + str(os.path.basename(im_fn))),
                        immmm[:, :, ::-1])
                    print(str(im_fn) + " is broken!!!!!!!!")
Exemplo n.º 10
0
def main(argv=None):
    if os.path.exists(FLAGS.output_path):
        shutil.rmtree(FLAGS.output_path)
    os.makedirs(FLAGS.output_path)
    os.environ['CUDA_VISIBLE_DEVICES'] = FLAGS.gpu

    with tf.get_default_graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     shape=[None, None, None, 3],
                                     name='input_image')
        input_im_info = tf.placeholder(tf.float32,
                                       shape=[None, 3],
                                       name='input_im_info')

        global_step = tf.get_variable('global_step', [],
                                      initializer=tf.constant_initializer(0),
                                      trainable=False)

        bbox_pred, cls_pred, cls_prob = model.model(input_image)

        variable_averages = tf.train.ExponentialMovingAverage(
            0.997, global_step)
        saver = tf.train.Saver(variable_averages.variables_to_restore())

        with tf.Session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ckpt_state = tf.train.get_checkpoint_state(FLAGS.checkpoint_path)
            model_path = os.path.join(
                FLAGS.checkpoint_path,
                os.path.basename(ckpt_state.model_checkpoint_path))
            print('Restore from {}'.format(model_path))
            saver.restore(sess, model_path)

            im_fn_list = get_images()
            for im_fn in im_fn_list:
                print('===============')
                print(im_fn)
                start = time.time()
                try:
                    im = cv2.imread(im_fn)[:, :, ::-1]
                except:
                    print("Error reading image {}!".format(im_fn))
                    continue

                img, (rh, rw) = resize_image(im)
                h, w, c = img.shape
                im_info = np.array([h, w, c]).reshape([1, 3])
                bbox_pred_val, cls_prob_val = sess.run([bbox_pred, cls_prob],
                                                       feed_dict={
                                                           input_image: [img],
                                                           input_im_info:
                                                           im_info
                                                       })

                textsegs, _ = proposal_layer(cls_prob_val, bbox_pred_val,
                                             im_info)
                scores = textsegs[:, 0]
                textsegs = textsegs[:, 1:5]

                textdetector = TextDetector(DETECT_MODE='O')
                boxes = textdetector.detect(textsegs, scores[:, np.newaxis],
                                            img.shape[:2])
                coords = np.array(boxes, copy=True)

                original_boxes = transform_boxes(coords, im)
                original_pixel_boxes = np.array(original_boxes, dtype=np.int)

                write_text(original_pixel_boxes, scores, im_fn)

                # Original result
                # pixel_boxes = np.array(boxes, dtype=np.int)
                # draw_squares(pixel_boxes, img, rh, rw, im_fn, scores)

                cost_time = (time.time() - start)
                print("cost time: {:.2f}s".format(cost_time))