예제 #1
0
파일: web.py 프로젝트: zbn123/tensorflow
def scan(file):
    img_array = np.asarray(bytearray(file.stream.read()), dtype=np.uint8)
    image = cv2.imdecode(img_array,0)
    split_images = utils.splitImg(image)
    
    ocr_texts = []

    for i, split_image in enumerate(split_images):
        image =utils.img2bwinv(split_image)
        image = utils.dropZeroEdges(image)  
        image = utils.resize(image, ocr.image_height)
        utils.save(image,os.path.join(curr_dir,"test","%s.png"%i))
        maxImageWidth = image.shape[1]+5
        image_vec = utils.img2vec(image,ocr.image_height,maxImageWidth)
        ocr_inputs = np.zeros([1, maxImageWidth, ocr.image_height])
        ocr_inputs[0,:] = np.transpose(image_vec.reshape((ocr.image_height,maxImageWidth)))         
        ocr_seq_len = np.ones(ocr_inputs.shape[0]) * maxImageWidth
        feed = {inputs: ocr_inputs, seq_len: ocr_seq_len,  input_keep_prob: 1.0}
        start = time.time()
        decoded_list = session.run(decoded[0], feed)
        seconds = round(time.time() - start,2)
        print("filished ocr %s , paid %s seconds" % (i,seconds))
        detected_list = ocr.decode_sparse_tensor(decoded_list)            
        for detect_number in detected_list:
            ocr_texts.append(ocr.list_to_chars(detect_number))

    return ocr_texts
예제 #2
0
def get_next_batch(batch_size=128):
    inputs = np.zeros([batch_size, image_size[1] * image_size[0]])
    labels = np.zeros([batch_size, label_size], dtype=int)
    batch = random.sample(train_files, batch_size)
    for i, line in enumerate(batch):
        lines = line.split(" ")
        imageFileName = lines[0] + ".png"
        text = line[line.index(' '):].strip()
        # 在宋体9号字体下,O和0完全一致,因此全部按0处理
        # text = text.replace('O','0')
        # 文本需要补齐空格
        text = text + "".join([' ' for x in range(label_size - len(text))])
        if imageFileName in images:
            imgvec = images[imageFileName]
        else:
            # 输出图片为反色黑白
            image = readImgFile(os.path.join(curr_dir, DATA_DIR,
                                             imageFileName))
            image = img2bwinv(image)
            image = dropZeroEdges(image)
            image = resize(image, image_size[0])
            imgvec = img2vec(image, image_size[0], image_size[1])
            # images[imageFileName] = imgvec
        inputs[i, :] = imgvec
        label_list = []
        for c in text:
            if c in CHARS:
                label_list.append(CHARS.index(c))
            else:
                label_list.append(CHARS.index(UNKOWN_CHAR))
        labels[i, :] = label_list
    return inputs, labels
예제 #3
0
def get_next_batch(batch_size=128):
    images = []
    to_images = []
    max_width_image = 0
    font_min_length = random.randint(10, 20)
    for i in range(batch_size):
        font_name = random.choice(AllFontNames)
        # font_length = random.randint(font_min_length-5, font_min_length+5)
        font_length = random.randint(3, 5)
        font_size = random.randint(image_height, 64)
        font_mode = random.choice([0, 1, 2, 4])
        font_hint = random.choice([0, 1, 2, 3, 4, 5])
        text = utils_font.get_random_text(CHARS, eng_world_list, font_length)
        image = utils_font.get_font_image_from_url(text,
                                                   font_name,
                                                   font_size,
                                                   fontmode=font_mode,
                                                   fonthint=font_hint)
        to_image = image.copy()
        image = utils_font.add_noise(image)
        image = utils_pil.convert_to_gray(image)
        rate = random.randint(8, 17) / font_size
        image = utils_pil.resize(image, rate)
        image = np.asarray(image)
        image = utils.resize(image, height=image_height)
        image = (255. - image) / 255.
        images.append(image)

        # to_image = utils_font.get_font_image_from_url(text, font_name ,image_height, fontmode = font_mode, fonthint = font_hint)
        to_image = utils_pil.convert_to_gray(to_image)
        to_image = np.asarray(to_image)
        to_image = utils.resize(to_image, height=image_height)
        to_image = utils.img2bwinv(to_image)
        to_image = to_image / 255.
        to_images.append(to_image)

        if image.shape[1] > max_width_image:
            max_width_image = image.shape[1]
        if to_image.shape[1] > max_width_image:
            max_width_image = to_image.shape[1]

    max_width_image = max_width_image + (POOL_SIZE -
                                         max_width_image % POOL_SIZE)
    inputs = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(images)):
        image_vec = utils.img2vec(images[i],
                                  height=image_height,
                                  width=max_width_image,
                                  flatten=False)
        inputs[i, :] = np.transpose(image_vec)

    labels = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(to_images)):
        image_vec = utils.img2vec(to_images[i],
                                  height=image_height,
                                  width=max_width_image,
                                  flatten=False)
        labels[i, :] = np.transpose(image_vec)
    return inputs, labels
예제 #4
0
def scan(file):
    img = Image.open(file.stream)
    image = np.array(img)
    image = utils.img2gray(image)
    utils.save(image * 255, os.path.join(curr_dir, "test", "p0.png"))
    # image = utils.clearImgGray(image)
    # utils.save(image * 255, os.path.join(curr_dir,"test","p1.png"))
    split_images = utils.splitImg(image)

    ocr_texts = []

    for i, split_image in enumerate(split_images):
        inv_image = utils.img2bwinv(split_image)
        inv_image = utils.clearImg(inv_image)
        image = 255. - split_image
        image = utils.dropZeroEdges(inv_image, image)
        image = utils.resize(image, ocr.image_height)
        image = image / 255.
        ocr_inputs = np.zeros([1, ocr.image_size, ocr.image_size])
        ocr_inputs[0, :] = utils.square_img(
            image, np.zeros([ocr.image_size, ocr.image_size]))

        ocr_seq_len = np.ones(1) * (ocr.image_size * ocr.image_size) // (
            ocr.POOL_SIZE * ocr.POOL_SIZE)

        start = time.time()
        p_net_g = session.run(net_g, {inputs: ocr_inputs})
        p_net_g = np.squeeze(p_net_g, axis=3)

        debug_net_g = np.copy(p_net_g)
        for j in range(1):
            _t_img = utils.unsquare_img(p_net_g[j], ocr.image_height)
            _t_img_bin = np.copy(_t_img)
            _t_img_bin[_t_img_bin <= 0.2] = 0
            _t_img = utils.dropZeroEdges(_t_img_bin, _t_img, min_rate=0.1)
            _t_img = utils.resize(_t_img, ocr.image_height)
            if _t_img.shape[0] * _t_img.shape[
                    1] <= ocr.image_size * ocr.image_size:
                p_net_g[j] = utils.square_img(
                    _t_img, np.zeros([ocr.image_size, ocr.image_size]),
                    ocr.image_height)

        _img = np.vstack((ocr_inputs[0], debug_net_g[0], p_net_g[0]))
        utils.save(_img * 255, os.path.join(curr_dir, "test", "%s.png" % i))

        decoded_list = session.run(res_decoded[0], {
            inputs: p_net_g,
            seq_len: ocr_seq_len
        })
        seconds = round(time.time() - start, 2)
        print("filished ocr %s , paid %s seconds" % (i, seconds))
        detected_list = utils.decode_sparse_tensor(decoded_list)
        for detect_number in detected_list:
            ocr_texts.append(ocr.list_to_chars(detect_number))

    return ocr_texts
예제 #5
0
def getImage(text, font_name, font_length, font_size, noise=False, fontmode=None, fonthint=None):
    params= {}
    params['text'] = text
    params['fontname'] = font_name
    params['fontsize'] = font_size
    # params['fontmode'] = random.choice([0,1,2,4,8])
    if fontmode == None:
        params['fontmode'] = random.choice([0,1,2,4])
    else:
        params['fontmode'] = fontmode
    if fonthint == None:
        params['fonthint'] = random.choice([0,1,2,3,4,5])
    else:
        params['fonthint'] = fonthint
    
    r = http('http://192.168.2.113:8888/',params)
    _img = Image.open(io.BytesIO(r))
    img=Image.new("RGB",_img.size,(255,255,255))
    img.paste(_img,(0,0),_img)
    img = utils.trim(img)
    
    if noise:
        w,h = img.size
        _h = random.randint(9, image_height)
        _w = round(w * _h / h)
        img = img.resize((_w,_h), Image.ANTIALIAS)
        img = np.asarray(img)
        img = 1 - utils.img2gray(img)/255.   
        img = utils.dropZeroEdges(img)

        filter = np.random.random(img.shape) - 0.9
        filter = np.maximum(filter, 0) 
        img = img + filter * 5
        imin, imax = img.min(), img.max()
        img = (img - imin)/(imax - imin)
    else:
        img = np.asarray(img)
        img = utils.img2gray(img) 
        img = utils.img2bwinv(img)
        img = img / 255.
        img = utils.dropZeroEdges(img)
    return img
예제 #6
0
def get_next_batch(batch_size=128):
    images = []
    to_images = []
    codes = []
    max_width_image = 0
    for i in range(batch_size):
        font_name = random.choice(AllFontNames)
        font_length = random.randint(25, 30)
        font_size = 36  #random.randint(image_height, 64)
        font_mode = random.choice([0, 1, 2, 4])
        font_hint = random.choice([0, 1, 2, 3, 4, 5])
        text = utils_font.get_random_text(CHARS, eng_world_list, font_length)
        # text = random.sample(CHARS, 12)
        # text = text+text
        # random.shuffle(text)
        # text = "".join(text).strip()
        codes.append([CHARS.index(char) for char in text])
        image = utils_font.get_font_image_from_url(text,
                                                   font_name,
                                                   font_size,
                                                   fontmode=font_mode,
                                                   fonthint=font_hint)
        image = utils_pil.resize_by_height(image, image_height)
        to_image = image.copy()
        image = utils_font.add_noise(image)
        image = utils_pil.convert_to_gray(image)
        _h = random.randint(9, image_height // random.choice([1, 1.5, 2, 2.5]))
        image = utils_pil.resize_by_height(image, _h, random.random() > 0.5)
        image = utils_pil.resize_by_height(image, image_height,
                                           random.random() > 0.5)
        image = np.asarray(image)
        image = utils.resize(image, height=image_height)
        image = (255. - image) / 255.
        images.append(image)

        # to_image = utils_font.get_font_image_from_url(text, font_name ,image_height, fontmode = font_mode, fonthint = font_hint)
        to_image = utils_pil.convert_to_gray(to_image)
        to_image = np.asarray(to_image)
        to_image = utils.resize(to_image, height=image_height)
        to_image = utils.img2bwinv(to_image)
        to_image = to_image / 255.
        to_images.append(to_image)

        if image.shape[1] > max_width_image:
            max_width_image = image.shape[1]
        if to_image.shape[1] > max_width_image:
            max_width_image = to_image.shape[1]

    max_width_image = max_width_image + (POOL_SIZE -
                                         max_width_image % POOL_SIZE)
    inputs = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(images)):
        image_vec = utils.img2vec(images[i],
                                  height=image_height,
                                  width=max_width_image,
                                  flatten=False)
        inputs[i, :] = np.transpose(image_vec)

    targets = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(to_images)):
        image_vec = utils.img2vec(to_images[i],
                                  height=image_height,
                                  width=max_width_image,
                                  flatten=False)
        targets[i, :] = np.transpose(image_vec)

    labels = [np.asarray(i) for i in codes]
    sparse_labels = utils.sparse_tuple_from(labels)
    seq_len = np.ones(batch_size) * (max_width_image *
                                     image_height) // (POOL_SIZE * POOL_SIZE)
    return inputs, targets, sparse_labels, seq_len
예제 #7
0
        os.mkdir(os.path.join(curr_dir, "data" ,"dataset"))
    with open(os.path.join(curr_dir, "data", "index.txt")) as index_file:
        for i, line in enumerate(index_file.readlines()):
            if i%10000==0: print("resizing image no: ",i)
            lines = line.split(" ")
            image_name = lines[0]+".png"
            dst_image_name = os.path.join(curr_dir, "data" ,"dataset", image_name)
            if os.path.exists(dst_image_name):
                train_files.append(line)
                continue
            if not os.path.exists(os.path.dirname(dst_image_name)):
                os.mkdir(os.path.dirname(dst_image_name))        
            src_image_name = os.path.join(curr_dir,"data",image_name)
            try:
                image = readImgFile(src_image_name)
                image = img2bwinv(image)    
                image = dropZeroEdges(image)    
            except:
                print(dst_image_name,"error")
                continue
            resized_image = resize(image,image_height)
            save(resized_image,dst_image_name)
            train_files.append(line)


def neural_networks():
    # 输入:训练的数量,一张图片的宽度,一张图片的高度 [-1,-1,16]
    inputs = tf.placeholder(tf.float32, [None, None, image_height], name="inputs")
    # 定义 ctc_loss 是稀疏矩阵
    labels = tf.sparse_placeholder(tf.int32, name="labels")
    # 1维向量 size [batch_size] 等于 np.ones(batch_size)* image_width