Exemplo n.º 1
0
def scan(file):
    img = Image.open(file.stream)
    image = np.array(img)
    image = utils.img2gray(image)
    utils.save(image * 255, os.path.join(curr_dir, "test", "p0.png"))
    # image = utils.clearImgGray(image)
    # utils.save(image * 255, os.path.join(curr_dir,"test","p1.png"))
    split_images = utils.splitImg(image)

    ocr_texts = []

    for i, split_image in enumerate(split_images):
        inv_image = utils.img2bwinv(split_image)
        inv_image = utils.clearImg(inv_image)
        image = 255. - split_image
        image = utils.dropZeroEdges(inv_image, image)
        image = utils.resize(image, ocr.image_height)
        image = image / 255.
        ocr_inputs = np.zeros([1, ocr.image_size, ocr.image_size])
        ocr_inputs[0, :] = utils.square_img(
            image, np.zeros([ocr.image_size, ocr.image_size]))

        ocr_seq_len = np.ones(1) * (ocr.image_size * ocr.image_size) // (
            ocr.POOL_SIZE * ocr.POOL_SIZE)

        start = time.time()
        p_net_g = session.run(net_g, {inputs: ocr_inputs})
        p_net_g = np.squeeze(p_net_g, axis=3)

        debug_net_g = np.copy(p_net_g)
        for j in range(1):
            _t_img = utils.unsquare_img(p_net_g[j], ocr.image_height)
            _t_img_bin = np.copy(_t_img)
            _t_img_bin[_t_img_bin <= 0.2] = 0
            _t_img = utils.dropZeroEdges(_t_img_bin, _t_img, min_rate=0.1)
            _t_img = utils.resize(_t_img, ocr.image_height)
            if _t_img.shape[0] * _t_img.shape[
                    1] <= ocr.image_size * ocr.image_size:
                p_net_g[j] = utils.square_img(
                    _t_img, np.zeros([ocr.image_size, ocr.image_size]),
                    ocr.image_height)

        _img = np.vstack((ocr_inputs[0], debug_net_g[0], p_net_g[0]))
        utils.save(_img * 255, os.path.join(curr_dir, "test", "%s.png" % i))

        decoded_list = session.run(res_decoded[0], {
            inputs: p_net_g,
            seq_len: ocr_seq_len
        })
        seconds = round(time.time() - start, 2)
        print("filished ocr %s , paid %s seconds" % (i, seconds))
        detected_list = utils.decode_sparse_tensor(decoded_list)
        for detect_number in detected_list:
            ocr_texts.append(ocr.list_to_chars(detect_number))

    return ocr_texts
Exemplo n.º 2
0
def scan(file):
    img_array = np.asarray(bytearray(file.stream.read()), dtype=np.uint8)
    image = cv2.imdecode(img_array,0)
    split_images = utils.splitImg(image)
    
    ocr_texts = []

    for i, split_image in enumerate(split_images):
        image =utils.img2bwinv(split_image)
        image = utils.dropZeroEdges(image)  
        image = utils.resize(image, ocr.image_height)
        utils.save(image,os.path.join(curr_dir,"test","%s.png"%i))
        maxImageWidth = image.shape[1]+5
        image_vec = utils.img2vec(image,ocr.image_height,maxImageWidth)
        ocr_inputs = np.zeros([1, maxImageWidth, ocr.image_height])
        ocr_inputs[0,:] = np.transpose(image_vec.reshape((ocr.image_height,maxImageWidth)))         
        ocr_seq_len = np.ones(ocr_inputs.shape[0]) * maxImageWidth
        feed = {inputs: ocr_inputs, seq_len: ocr_seq_len,  input_keep_prob: 1.0}
        start = time.time()
        decoded_list = session.run(decoded[0], feed)
        seconds = round(time.time() - start,2)
        print("filished ocr %s , paid %s seconds" % (i,seconds))
        detected_list = ocr.decode_sparse_tensor(decoded_list)            
        for detect_number in detected_list:
            ocr_texts.append(ocr.list_to_chars(detect_number))

    return ocr_texts
Exemplo n.º 3
0
def scan(file):
    img = Image.open(file.stream)
    image = np.array(img)
    image = utils.img2gray(image)
    image = utils.clearImgGray(image)    
    utils.save(image, os.path.join(curr_dir,"test","src.png"))
    split_images = utils.splitImg(image)
    
    ocr_texts = []

    for i, split_image in enumerate(split_images):
        # image = utils.img2bwinv(split_image)
        image = utils.clearImgGray(split_image)
        # image = utils.clearBackgroundColor(image, 255)    
        image = 255. - image
        image = utils.dropZeroEdges(image)  
        image = utils.resize(image, ocr.image_height)
        utils.save(image,os.path.join(curr_dir,"test","%s.png"%i))
        image = image / 255.
        maxImageWidth = image.shape[1]
        maxImageWidth = maxImageWidth + (ocr.POOL_SIZE - maxImageWidth % ocr.POOL_SIZE)
        image_vec = utils.img2vec(image,ocr.image_height,maxImageWidth)
        ocr_inputs = np.zeros([1, maxImageWidth, ocr.image_height])
        ocr_inputs[0,:] = np.transpose(image_vec.reshape((ocr.image_height,maxImageWidth)))         
        ocr_seq_len = np.ones(ocr_inputs.shape[0]) * (maxImageWidth * ocr.image_height) // (ocr.POOL_SIZE * ocr.POOL_SIZE)
        feed = {inputs: ocr_inputs, seq_len: ocr_seq_len,  input_keep_prob: 1.0}
        start = time.time()
        decoded_list = session.run(decoded[0], feed)
        seconds = round(time.time() - start,2)
        print("filished ocr %s , paid %s seconds" % (i,seconds))
        detected_list = ocr.decode_sparse_tensor(decoded_list)            
        for detect_number in detected_list:
            ocr_texts.append(ocr.list_to_chars(detect_number))

    return ocr_texts
Exemplo n.º 4
0
def get_next_batch(batch_size=128):
    inputs = np.zeros([batch_size, image_size[1] * image_size[0]])
    labels = np.zeros([batch_size, label_size], dtype=int)
    batch = random.sample(train_files, batch_size)
    for i, line in enumerate(batch):
        lines = line.split(" ")
        imageFileName = lines[0] + ".png"
        text = line[line.index(' '):].strip()
        # 在宋体9号字体下,O和0完全一致,因此全部按0处理
        # text = text.replace('O','0')
        # 文本需要补齐空格
        text = text + "".join([' ' for x in range(label_size - len(text))])
        if imageFileName in images:
            imgvec = images[imageFileName]
        else:
            # 输出图片为反色黑白
            image = readImgFile(os.path.join(curr_dir, DATA_DIR,
                                             imageFileName))
            image = img2bwinv(image)
            image = dropZeroEdges(image)
            image = resize(image, image_size[0])
            imgvec = img2vec(image, image_size[0], image_size[1])
            # images[imageFileName] = imgvec
        inputs[i, :] = imgvec
        label_list = []
        for c in text:
            if c in CHARS:
                label_list.append(CHARS.index(c))
            else:
                label_list.append(CHARS.index(UNKOWN_CHAR))
        labels[i, :] = label_list
    return inputs, labels
Exemplo n.º 5
0
def getImage(text, font_name, font_length, font_size, noise=False, fontmode=None, fonthint=None):
    params= {}
    params['text'] = text
    params['fontname'] = font_name
    params['fontsize'] = font_size
    # params['fontmode'] = random.choice([0,1,2,4,8])
    if fontmode == None:
        params['fontmode'] = random.choice([0,1,2,4])
    else:
        params['fontmode'] = fontmode
    if fonthint == None:
        params['fonthint'] = random.choice([0,1,2,3,4,5])
    else:
        params['fonthint'] = fonthint
    
    r = http('http://192.168.2.113:8888/',params)
    _img = Image.open(io.BytesIO(r))
    img=Image.new("RGB",_img.size,(255,255,255))
    img.paste(_img,(0,0),_img)
    img = utils.trim(img)
    
    if noise:
        w,h = img.size
        _h = random.randint(9, image_height)
        _w = round(w * _h / h)
        img = img.resize((_w,_h), Image.ANTIALIAS)
        img = np.asarray(img)
        img = 1 - utils.img2gray(img)/255.   
        img = utils.dropZeroEdges(img)

        filter = np.random.random(img.shape) - 0.9
        filter = np.maximum(filter, 0) 
        img = img + filter * 5
        imin, imax = img.min(), img.max()
        img = (img - imin)/(imax - imin)
    else:
        img = np.asarray(img)
        img = utils.img2gray(img) 
        img = utils.img2bwinv(img)
        img = img / 255.
        img = utils.dropZeroEdges(img)
    return img
Exemplo n.º 6
0
def getImage(CHARS, font_name, image_height, font_length, font_size,
             word_dict):
    text = ''
    n = random.random()
    if n < 0.1:
        for i in range(font_length):
            text += random.choice(
                "123456789012345678901234567890-./$,:()+-*=><")
    elif n < 0.5 and n >= 0.1:
        for i in range(font_length):
            text += random.choice(CHARS)
    else:
        while len(text) < font_length:
            word = random.choice(word_dict)
            _word = ""
            for c in word:
                if c in CHARS:
                    _word += c
            text = text + " " + _word.strip()
    text = text.strip()

    params = {}
    params['text'] = text
    params['fontname'] = font_name
    params['fontsize'] = font_size
    # params['fontmode'] = random.choice([0,1,2,4,8])
    params['fontmode'] = random.choice([0, 1, 2, 4])
    params['fonthint'] = random.choice([0, 1, 2, 3, 4, 5])

    r = http('http://192.168.2.113:8888/', params)
    _img = Image.open(io.BytesIO(r))
    img = Image.new("RGB", _img.size, (255, 255, 255))
    img.paste(_img, (0, 0), _img)
    img = utils.trim(img)
    w, h = img.size
    _h = random.randint(9, 64)
    _w = round(w * _h / h)
    img = img.resize((_w, _h), Image.ANTIALIAS)
    img = np.asarray(img)
    img = utils.clearBackgroundColor(img)
    img = 1 - utils.img2gray(img) / 255.
    img = utils.dropZeroEdges(img)

    filter = np.random.random(img.shape) - 0.9
    filter = np.maximum(filter, 0)
    img = img + filter * 5
    imin, imax = img.min(), img.max()
    img = (img - imin) / (imax - imin)

    img = utils.resize(img, image_height)
    return text, img
Exemplo n.º 7
0
def scan2():
    session, inputs, seq_len, input_keep_prob, decoded, log_prob = init()
    need_ocr_images = utils.loadImage(os.path.join(curr_dir, 'test', '0.jpg'),
                                      0)
    ocr_text_groups = []
    for idx, images_group in enumerate(need_ocr_images):
        # if idx != 1: continue
        ocr_texts = []

        for i, image in enumerate(images_group):
            image = utils.dropZeroEdges(image)
            image = utils.resize(image, ocr.image_height)
            utils.save(image,
                       os.path.join(curr_dir, "test", "%s-%s.png" % (idx, i)))
            maxImageWidth = image.shape[1] + 5
            image_vec = utils.img2vec(image, ocr.image_height, maxImageWidth)
            ocr_inputs = np.zeros([1, maxImageWidth, ocr.image_height])
            ocr_inputs[0, :] = np.transpose(
                image_vec.reshape((ocr.image_height, maxImageWidth)))
            ocr_seq_len = np.ones(ocr_inputs.shape[0]) * maxImageWidth
            feed = {
                inputs: ocr_inputs,
                seq_len: ocr_seq_len,
                input_keep_prob: 1.0
            }
            print("starting ocr inputs %s:%s ..." % (idx, i))
            start = time.time()
            decoded_list = session.run(decoded[0], feed)
            seconds = round(time.time() - start, 2)
            print("filished ocr inputs %s, paid %s seconds" % (idx, seconds))
            detected_list = ocr.decode_sparse_tensor(decoded_list)
            for detect_number in detected_list:
                ocr_texts.append(ocr.list_to_chars(detect_number))

        ocr_text_groups.append(ocr_texts)
        # break
    return ocr_text_groups
Exemplo n.º 8
0
def getImage(CHARS, font_name, image_height, font_length, font_size,
             word_dict):
    text = utils_font.get_random_text(CHARS, word_dict, font_length)
    img = utils_font.get_font_image_from_url(text, font_name, font_size)
    img = utils_font.add_noise(img)
    img = utils_pil.convert_to_gray(img)

    w, h = img.size
    _h = random.randint(9, image_height)
    _w = round(w * _h / h)
    img = img.resize((_w, _h), Image.ANTIALIAS)
    img = np.asarray(img)
    #  img = utils.clearBackgroundColor(img)
    img = 1 - img / 255.
    img = utils.dropZeroEdges(img)

    filter = np.random.random(img.shape) - 0.9
    filter = np.maximum(filter, 0)
    img = img + filter * 5
    imin, imax = img.min(), img.max()
    img = (img - imin) / (imax - imin)

    img = utils.resize(img, image_height)
    return text, img
Exemplo n.º 9
0
    with open(os.path.join(curr_dir, "data", "index.txt")) as index_file:
        for i, line in enumerate(index_file.readlines()):
            if i%10000==0: print("resizing image no: ",i)
            lines = line.split(" ")
            image_name = lines[0]+".png"
            dst_image_name = os.path.join(curr_dir, "data" ,"dataset", image_name)
            if os.path.exists(dst_image_name):
                train_files.append(line)
                continue
            if not os.path.exists(os.path.dirname(dst_image_name)):
                os.mkdir(os.path.dirname(dst_image_name))        
            src_image_name = os.path.join(curr_dir,"data",image_name)
            try:
                image = readImgFile(src_image_name)
                image = img2bwinv(image)    
                image = dropZeroEdges(image)    
            except:
                print(dst_image_name,"error")
                continue
            resized_image = resize(image,image_height)
            save(resized_image,dst_image_name)
            train_files.append(line)


def neural_networks():
    # 输入:训练的数量,一张图片的宽度,一张图片的高度 [-1,-1,16]
    inputs = tf.placeholder(tf.float32, [None, None, image_height], name="inputs")
    # 定义 ctc_loss 是稀疏矩阵
    labels = tf.sparse_placeholder(tf.int32, name="labels")
    # 1维向量 size [batch_size] 等于 np.ones(batch_size)* image_width
    seq_len = tf.placeholder(tf.int32, [None], name="seq_len")
Exemplo n.º 10
0
def train():
    inputs, labels, global_step, \
        res_loss, res_optim, seq_len, res_acc, res_decoded, \
        net_g = neural_networks()

    curr_dir = os.path.dirname(__file__)
    model_dir = os.path.join(curr_dir, MODEL_SAVE_NAME)
    if not os.path.exists(model_dir): os.mkdir(model_dir)
    model_G_dir = os.path.join(model_dir, "TG")
    model_R_dir = os.path.join(model_dir, "R16")

    if not os.path.exists(model_R_dir): os.mkdir(model_R_dir)
    if not os.path.exists(model_G_dir): os.mkdir(model_G_dir)

    init = tf.global_variables_initializer()
    with tf.Session() as session:
        session.run(init)

        r_saver = tf.train.Saver(tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, scope='RES'),
                                 sharded=True)
        g_saver = tf.train.Saver(tf.get_collection(
            tf.GraphKeys.TRAINABLE_VARIABLES, scope='TRIM_G'),
                                 sharded=False)

        ckpt = tf.train.get_checkpoint_state(model_G_dir)
        if ckpt and ckpt.model_checkpoint_path:
            print("Restore Model G...")
            g_saver.restore(session, ckpt.model_checkpoint_path)

        ckpt = tf.train.get_checkpoint_state(model_R_dir)
        if ckpt and ckpt.model_checkpoint_path:
            print("Restore Model R...")
            r_saver.restore(session, ckpt.model_checkpoint_path)

        AllLosts = {}
        while True:
            errA = errD1 = errD2 = 1
            batch_size = 4
            for batch in range(BATCHES):
                if len(AllLosts) > 10 and random.random() > 0.7:
                    sorted_font = sorted(AllLosts.items(),
                                         key=operator.itemgetter(1),
                                         reverse=True)
                    font_info = sorted_font[random.randint(0, 10)]
                    font_info = font_info[0].split(",")
                    train_inputs, train_labels, train_seq_len, train_info = get_next_batch_for_res(batch_size, False, \
                        font_info[0], int(font_info[1]), int(font_info[2]), int(font_info[3]))
                else:
                    # train_inputs, train_labels, train_seq_len, train_info = get_next_batch_for_res(batch_size, False, _font_size=36)
                    train_inputs, train_labels, train_seq_len, train_info = get_next_batch_for_res(
                        batch_size)
                # feed = {inputs: train_inputs, labels: train_labels, seq_len: train_seq_len}
                start = time.time()

                p_net_g = session.run(net_g, {inputs: train_inputs})

                p_net_g = np.squeeze(p_net_g, axis=3)
                for i in range(batch_size):
                    _t_img = utils.unsquare_img(p_net_g[i], image_height)
                    _t_img = utils.cvTrimImage(_t_img)
                    _t_img[_t_img < 0] = 0
                    _t_img = utils.resize(_t_img, image_height)
                    if _t_img.shape[0] * _t_img.shape[
                            1] <= image_size * image_size:
                        p_net_g[i] = utils.square_img(
                            _t_img, np.zeros([image_size, image_size]),
                            image_height)

                feed = {
                    inputs: p_net_g,
                    labels: train_labels,
                    seq_len: train_seq_len
                }

                errR, acc, _, steps = session.run(
                    [res_loss, res_acc, res_optim, global_step], feed)
                font_info = train_info[0][0] + "/" + train_info[0][
                    1] + " " + train_info[1][0] + "/" + train_info[1][1]
                print(
                    "%d time: %4.4fs, res_acc: %.4f, res_loss: %.4f, info: %s "
                    % (steps, time.time() - start, acc, errR, font_info))
                if np.isnan(errR) or np.isinf(errR):
                    print("Error: cost is nan or inf")
                    return

                # 如果正确率低于90%,保存出来
                if acc < 0.9:
                    for i in range(batch_size):
                        _img = np.vstack(
                            (train_inputs[i] * 255, p_net_g[i] * 255))
                        cv2.imwrite(
                            os.path.join(curr_dir, "test",
                                         "E%s_%s_%s.png" % (acc, steps, i)),
                            _img)

                for info in train_info:
                    key = ",".join(info)
                    if key in AllLosts:
                        AllLosts[key] = AllLosts[key] * 0.95 + errR * 0.05
                    else:
                        AllLosts[key] = errR

                # 报告
                if steps > 0 and steps % REPORT_STEPS == 0:
                    train_inputs, train_labels, train_seq_len, train_info = get_next_batch_for_res(
                        batch_size)
                    p_net_g = session.run(net_g, {inputs: train_inputs})
                    p_net_g = np.squeeze(p_net_g, axis=3)

                    for i in range(batch_size):
                        _t_img = utils.unsquare_img(p_net_g[i], image_height)
                        _t_img_bin = np.copy(_t_img)
                        _t_img_bin[_t_img_bin <= 0.3] = 0
                        _t_img = utils.dropZeroEdges(_t_img_bin,
                                                     _t_img,
                                                     min_rate=0.1)
                        _t_img = utils.resize(_t_img, image_height)
                        if _t_img.shape[0] * _t_img.shape[
                                1] <= image_size * image_size:
                            p_net_g[i] = utils.square_img(
                                _t_img, np.zeros([image_size, image_size]),
                                image_height)

                    decoded_list = session.run(res_decoded[0], {
                        inputs: p_net_g,
                        seq_len: train_seq_len
                    })

                    for i in range(batch_size):
                        _img = np.vstack((train_inputs[i], p_net_g[i]))
                        cv2.imwrite(
                            os.path.join(curr_dir, "test",
                                         "%s_%s.png" % (steps, i)), _img * 255)

                    original_list = utils.decode_sparse_tensor(train_labels)
                    detected_list = utils.decode_sparse_tensor(decoded_list)
                    if len(original_list) != len(detected_list):
                        print("len(original_list)", len(original_list),
                              "len(detected_list)", len(detected_list),
                              " test and detect length desn't match")
                    print("T/F: original(length) <-------> detectcted(length)")
                    acc = 0.
                    for idx in range(
                            min(len(original_list), len(detected_list))):
                        number = original_list[idx]
                        detect_number = detected_list[idx]
                        hit = (number == detect_number)
                        print("%6s" % hit, list_to_chars(number), "(",
                              len(number), ")")
                        print("%6s" % "", list_to_chars(detect_number), "(",
                              len(detect_number), ")")
                        # 计算莱文斯坦比
                        import Levenshtein
                        acc += Levenshtein.ratio(list_to_chars(number),
                                                 list_to_chars(detect_number))
                    print("Test Accuracy:", acc / len(original_list))
                    sorted_fonts = sorted(AllLosts.items(),
                                          key=operator.itemgetter(1),
                                          reverse=True)
                    for f in sorted_fonts[:20]:
                        print(f)
            print("Save Model R ...")
            r_saver.save(session,
                         os.path.join(model_R_dir, "R.ckpt"),
                         global_step=steps)
            try:
                ckpt = tf.train.get_checkpoint_state(model_G_dir)
                if ckpt and ckpt.model_checkpoint_path:
                    print("Restore Model G...")
                    g_saver.restore(session, ckpt.model_checkpoint_path)
            except:
                pass