Exemplos de img2vec em Python, exemplos de utils.img2vec em Python

Exemplo n.º 1

0

Exibir arquivo

def get_next_batch(batch_size=128):
    images = []   
    to_images = []
    max_width_image = 0
    font_min_length = random.randint(10, 20)
    for i in range(batch_size):
        font_name = random.choice(AllFontNames)
        font_length = random.randint(font_min_length-5, font_min_length+5)
        font_size = random.randint(9, 64)    
        font_mode = random.choice([0,1,2,4])      
        text = getRedomText(CHARS, eng_world_list, font_length)          
        image= getImage(text, font_name, font_length, font_size, noise = True, fontmode = font_mode)
        image=utils.resize(image, height=image_height)
        images.append(image)

        to_image=getImage(text, font_name, font_length, image_height, noise = False, fontmode = font_mode, fonthint = 0)
        to_image=utils.resize(to_image, height=image_height)
        to_images.append(to_image)

        if image.shape[1] > max_width_image: 
            max_width_image = image.shape[1]
        if to_image.shape[1] > max_width_image: 
            max_width_image = to_image.shape[1]      

    max_width_image = max_width_image + (POOL_SIZE - max_width_image % POOL_SIZE)
    inputs = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(images)):
        image_vec = utils.img2vec(images[i], height=image_height, width=max_width_image, flatten=False)
        inputs[i,:] = np.transpose(image_vec)

    labels = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(to_images)):
        image_vec = utils.img2vec(to_images[i], height=image_height, width=max_width_image, flatten=False)
        labels[i,:] = np.transpose(image_vec)
    return inputs, labels

Exemplo n.º 2

0

Exibir arquivo

Arquivo: font2font_res.py Projeto: zbn123/tensorflow

def get_next_batch(batch_size=128):
    images = []
    to_images = []
    max_width_image = 0
    font_min_length = random.randint(10, 20)
    for i in range(batch_size):
        font_name = random.choice(AllFontNames)
        # font_length = random.randint(font_min_length-5, font_min_length+5)
        font_length = random.randint(3, 5)
        font_size = random.randint(image_height, 64)
        font_mode = random.choice([0, 1, 2, 4])
        font_hint = random.choice([0, 1, 2, 3, 4, 5])
        text = utils_font.get_random_text(CHARS, eng_world_list, font_length)
        image = utils_font.get_font_image_from_url(text,
                                                   font_name,
                                                   font_size,
                                                   fontmode=font_mode,
                                                   fonthint=font_hint)
        to_image = image.copy()
        image = utils_font.add_noise(image)
        image = utils_pil.convert_to_gray(image)
        rate = random.randint(8, 17) / font_size
        image = utils_pil.resize(image, rate)
        image = np.asarray(image)
        image = utils.resize(image, height=image_height)
        image = (255. - image) / 255.
        images.append(image)

        # to_image = utils_font.get_font_image_from_url(text, font_name ,image_height, fontmode = font_mode, fonthint = font_hint)
        to_image = utils_pil.convert_to_gray(to_image)
        to_image = np.asarray(to_image)
        to_image = utils.resize(to_image, height=image_height)
        to_image = utils.img2bwinv(to_image)
        to_image = to_image / 255.
        to_images.append(to_image)

        if image.shape[1] > max_width_image:
            max_width_image = image.shape[1]
        if to_image.shape[1] > max_width_image:
            max_width_image = to_image.shape[1]

    max_width_image = max_width_image + (POOL_SIZE -
                                         max_width_image % POOL_SIZE)
    inputs = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(images)):
        image_vec = utils.img2vec(images[i],
                                  height=image_height,
                                  width=max_width_image,
                                  flatten=False)
        inputs[i, :] = np.transpose(image_vec)

    labels = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(to_images)):
        image_vec = utils.img2vec(to_images[i],
                                  height=image_height,
                                  width=max_width_image,
                                  flatten=False)
        labels[i, :] = np.transpose(image_vec)
    return inputs, labels

Exemplo n.º 3

0

Exibir arquivo

Arquivo: ocr_ascii_clean.py Projeto: zbn123/tensorflow

def get_next_batch_for_srgan(batch_size=128):
    inputs_images  = []
    targets_images = []
    max_width_image = 0
    for i in range(batch_size):
        font_name = random.choice(AllFontNames)
        font_length = random.randint(4, 5)
        font_size = 36 #random.randint(image_height, 64)    
        font_mode = random.choice([0,1,2,4]) 
        font_hint = random.choice([0,1,2,3,4,5])     #删除了2
        text  = utils_font.get_random_text(CHARS, eng_world_list, font_length)
        image = utils_font.get_font_image_from_url(text, font_name, font_size, font_mode, font_hint)
        image = utils_pil.resize_by_height(image, image_height)
        image = utils_pil.convert_to_gray(image)
        targets_image = image.copy()

        _h =  random.randint(9, image_height // random.choice([1,1.5,2,2.5]))
        image = utils_pil.resize_by_height(image, _h)        
        image = utils_pil.resize_by_height(image, image_height, random.random()>0.5) 

        targets_image = np.asarray(targets_image)
        # targets_image = utils.resize(targets_image, height=image_height)
        # targets_image = utils_pil.convert_to_bw(targets_image)
        targets_images.append((255. - targets_image) / 255.)

        image = utils_font.add_noise(image)   
        image = np.asarray(image)
        # image = utils.resize(image, height=image_height)
        image = image * random.uniform(0.3, 1)
        if random.random()>0.5:
            image = (255. - image) / 255.
        else:
            image = image / 255.
        inputs_images.append(image)        

        if image.shape[1] > max_width_image: 
            max_width_image = image.shape[1]
        if targets_image.shape[1] > max_width_image: 
            max_width_image = targets_image.shape[1]      

    # max_width_image = max_width_image + (POOL_SIZE - max_width_image % POOL_SIZE)
    inputs = np.zeros([batch_size, max_width_image, image_height])
    for i in range(batch_size):
        image_vec = utils.img2vec(inputs_images[i], height=image_height, width=max_width_image, flatten=False)
        inputs[i,:] = np.transpose(image_vec)

    targets = np.zeros([batch_size, max_width_image, image_height])
    for i in range(batch_size):
        image_vec = utils.img2vec(targets_images[i], height=image_height, width=max_width_image, flatten=False)
        targets[i,:] = np.transpose(image_vec)

    return inputs, targets

Exemplo n.º 4

0

Exibir arquivo

def scan(file):
    img = Image.open(file.stream)
    image = np.array(img)
    image = utils.img2gray(image)
    image = utils.clearImgGray(image)    
    utils.save(image, os.path.join(curr_dir,"test","src.png"))
    split_images = utils.splitImg(image)
    
    ocr_texts = []

    for i, split_image in enumerate(split_images):
        # image = utils.img2bwinv(split_image)
        image = utils.clearImgGray(split_image)
        # image = utils.clearBackgroundColor(image, 255)    
        image = 255. - image
        image = utils.dropZeroEdges(image)  
        image = utils.resize(image, ocr.image_height)
        utils.save(image,os.path.join(curr_dir,"test","%s.png"%i))
        image = image / 255.
        maxImageWidth = image.shape[1]
        maxImageWidth = maxImageWidth + (ocr.POOL_SIZE - maxImageWidth % ocr.POOL_SIZE)
        image_vec = utils.img2vec(image,ocr.image_height,maxImageWidth)
        ocr_inputs = np.zeros([1, maxImageWidth, ocr.image_height])
        ocr_inputs[0,:] = np.transpose(image_vec.reshape((ocr.image_height,maxImageWidth)))         
        ocr_seq_len = np.ones(ocr_inputs.shape[0]) * (maxImageWidth * ocr.image_height) // (ocr.POOL_SIZE * ocr.POOL_SIZE)
        feed = {inputs: ocr_inputs, seq_len: ocr_seq_len,  input_keep_prob: 1.0}
        start = time.time()
        decoded_list = session.run(decoded[0], feed)
        seconds = round(time.time() - start,2)
        print("filished ocr %s , paid %s seconds" % (i,seconds))
        detected_list = ocr.decode_sparse_tensor(decoded_list)            
        for detect_number in detected_list:
            ocr_texts.append(ocr.list_to_chars(detect_number))

    return ocr_texts

Exemplo n.º 5

0

Exibir arquivo

Arquivo: 4_2.py Projeto: zbn123/tensorflow

def crack(file):
    img = Image.open(file.stream)
    image = np.array(img)
    image = img2vec(img2gray(image))
    y_ = sess.run([prediction], feed_dict={x: [image]})
    result = "".join([str(s) for s in y_[0][0]])
    return result

Exemplo n.º 6

0

Exibir arquivo

Arquivo: web.py Projeto: zbn123/tensorflow

def scan(file):
    img_array = np.asarray(bytearray(file.stream.read()), dtype=np.uint8)
    image = cv2.imdecode(img_array,0)
    split_images = utils.splitImg(image)
    
    ocr_texts = []

    for i, split_image in enumerate(split_images):
        image =utils.img2bwinv(split_image)
        image = utils.dropZeroEdges(image)  
        image = utils.resize(image, ocr.image_height)
        utils.save(image,os.path.join(curr_dir,"test","%s.png"%i))
        maxImageWidth = image.shape[1]+5
        image_vec = utils.img2vec(image,ocr.image_height,maxImageWidth)
        ocr_inputs = np.zeros([1, maxImageWidth, ocr.image_height])
        ocr_inputs[0,:] = np.transpose(image_vec.reshape((ocr.image_height,maxImageWidth)))         
        ocr_seq_len = np.ones(ocr_inputs.shape[0]) * maxImageWidth
        feed = {inputs: ocr_inputs, seq_len: ocr_seq_len,  input_keep_prob: 1.0}
        start = time.time()
        decoded_list = session.run(decoded[0], feed)
        seconds = round(time.time() - start,2)
        print("filished ocr %s , paid %s seconds" % (i,seconds))
        detected_list = ocr.decode_sparse_tensor(decoded_list)            
        for detect_number in detected_list:
            ocr_texts.append(ocr.list_to_chars(detect_number))

    return ocr_texts

Exemplo n.º 7

0

Exibir arquivo

def get_next_batch(batch_size=128):
    inputs = np.zeros([batch_size, image_size[1] * image_size[0]])
    labels = np.zeros([batch_size, label_size], dtype=int)
    batch = random.sample(train_files, batch_size)
    for i, line in enumerate(batch):
        lines = line.split(" ")
        imageFileName = lines[0] + ".png"
        text = line[line.index(' '):].strip()
        # 在宋体9号字体下，O和0完全一致，因此全部按0处理
        # text = text.replace('O','0')
        # 文本需要补齐空格
        text = text + "".join([' ' for x in range(label_size - len(text))])
        if imageFileName in images:
            imgvec = images[imageFileName]
        else:
            # 输出图片为反色黑白
            image = readImgFile(os.path.join(curr_dir, DATA_DIR,
                                             imageFileName))
            image = img2bwinv(image)
            image = dropZeroEdges(image)
            image = resize(image, image_size[0])
            imgvec = img2vec(image, image_size[0], image_size[1])
            # images[imageFileName] = imgvec
        inputs[i, :] = imgvec
        label_list = []
        for c in text:
            if c in CHARS:
                label_list.append(CHARS.index(c))
            else:
                label_list.append(CHARS.index(UNKOWN_CHAR))
        labels[i, :] = label_list
    return inputs, labels

Exemplo n.º 8

0

Exibir arquivo

def get_next_batch(batch_size=128):
    batch = random.sample(train_files, batch_size)    
    codes = []
    images = []
    max_width_image = 0
    for line in batch:
        lines = line.split(" ")
        imageFileName = lines[0]+".png"
        text = line[line.index(' '):].strip()
        # 输入的图片为反色黑白
        image = readImgFile(os.path.join(curr_dir, "data" ,"dataset", imageFileName))    
        images.append(image)
        if image.shape[1] > max_width_image: 
            max_width_image = image.shape[1]
        text_list = [CHARS.index(char) for char in text]
        codes.append(text_list)

    inputs = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(images)):
        image_vec = img2vec(images[i], height=image_height, width=max_width_image, flatten=False)
        inputs[i,:] = np.transpose(image_vec)

    labels = [np.asarray(i) for i in codes]
    #labels转成稀疏矩阵
    sparse_labels = sparse_tuple_from(labels)
    seq_len = np.ones(batch_size) * max_width_image
    return inputs, sparse_labels, seq_len

Exemplo n.º 9

0

Exibir arquivo

Arquivo: ocr_ascii_highway_lstm.py Projeto: zbn123/tensorflow

def get_next_batch(batch_size=128):
    codes = []
    images = []   
    max_width_image = 0
    font_min_length = random.randint(10, 80)
    for i in range(batch_size):
        font_name = random.choice(FontNames)
        font_length = random.randint(font_min_length-5, font_min_length+5)
        font_size = random.randint(14, 64)        
        text, image= getImage(CHARS, font_name, image_height, font_length, font_size, eng_world_list)
        images.append(image)
        if image.shape[1] > max_width_image: 
            max_width_image = image.shape[1]
        text_list = [CHARS.index(char) for char in text]
        codes.append(text_list)

    # 凑成4的整数倍
    max_width_image = max_width_image + (POOL_SIZE - max_width_image % POOL_SIZE)
    inputs = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(images)):
        image_vec = img2vec(images[i], height=image_height, width=max_width_image, flatten=False)
        inputs[i,:] = np.transpose(image_vec)

    labels = [np.asarray(i) for i in codes]
    #labels转成稀疏矩阵
    sparse_labels = sparse_tuple_from(labels)
    #因为模型做了2次pool，所以 seq_len 也需要除以4
    seq_len = np.ones(batch_size) * (max_width_image * image_height) // (POOL_SIZE * POOL_SIZE)
    return inputs, sparse_labels, seq_len

Exemplo n.º 10

0

Exibir arquivo

Arquivo: ocr.ascii.1.py Projeto: zbn123/tensorflow

def get_next_batch(batch_size=128):
    codes = []
    images = []   
    max_width_image = 0
    for i in range(batch_size):
        font_name = random.choice(FontNames)
        font_length = random.randint(50, 100)
        font_size = random.randint(9, 20)        
        text, image= getImage(CHARS, font_name, image_height, font_length, font_size)
        images.append(image)
        if image.shape[1] > max_width_image: 
            max_width_image = image.shape[1]
        text_list = [CHARS.index(char) for char in text]
        codes.append(text_list)

    inputs = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(images)):
        image_vec = img2vec(images[i], height=image_height, width=max_width_image, flatten=False)
        inputs[i,:] = np.transpose(image_vec)

    labels = [np.asarray(i) for i in codes]
    #labels转成稀疏矩阵
    sparse_labels = sparse_tuple_from(labels)
    seq_len = np.ones(batch_size) * max_width_image
    return inputs, sparse_labels, seq_len

Exemplo n.º 11

0

Exibir arquivo

Arquivo: font2font_highway.py Projeto: halfking/tensorflow

def get_next_batch(batch_size=128):
    images = []
    to_images = []
    max_width_image = 0
    font_min_length = random.randint(10, 20)
    for i in range(batch_size):
        font_name = random.choice(FontNames)
        font_length = random.randint(font_min_length - 5, font_min_length + 5)
        font_size = random.randint(9, 64)
        text, image = utils.getImage(CHARS, font_name, image_height,
                                     font_length, font_size, eng_world_list)
        images.append(image)
        if image.shape[1] > max_width_image:
            max_width_image = image.shape[1]
        to_image = utils.renderNormalFontByPIL(ConsolasFont, 64, text)
        to_image = utils.trim(to_image)

        w, h = to_image.size
        _w = round(w * image_height / h)
        _h = image_height
        if _w > max_width_image:
            _w = max_width_image
            _h = round(h * max_width_image / w)

        to_image = to_image.resize((_w, _h), Image.ANTIALIAS)
        to_image = np.asarray(to_image)
        #to_image=utils.resize(to_image, height=image_height)
        to_image = utils.img2gray(to_image)
        to_image = to_image / 255
        to_images.append(to_image)

    inputs = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(images)):
        image_vec = utils.img2vec(images[i],
                                  height=image_height,
                                  width=max_width_image,
                                  flatten=False)
        inputs[i, :] = np.transpose(image_vec)

    labels = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(to_images)):
        image_vec = utils.img2vec(to_images[i],
                                  height=image_height,
                                  width=max_width_image,
                                  flatten=False)
        labels[i, :] = np.transpose(image_vec)
    return inputs, labels

Exemplo n.º 12

0

Exibir arquivo

def get_batch(batch_size=128):
    batch_x = np.zeros([batch_size, image_size])
    batch_y = np.zeros([batch_size, captcha_size])
    for i in range(batch_size):
        text, image = captcha(
            char_set=char_set, captcha_size=captcha_size, width=image_w, height=image_h)
        batch_x[i, :] = img2vec(img2gray(image))
        batch_y[i, :] = list(text)  # 注意 这里的 lable 不能 one hot
    return batch_x, batch_y

Exemplo n.º 13

0

Exibir arquivo

Arquivo: 3.py Projeto: zbn123/tensorflow

def get_batch(batch_size=128):
    batch_x = np.zeros([batch_size, image_size])
    batch_y = np.zeros([batch_size, char_size])
    for i in range(batch_size):
        text, image = captcha(char_set=char_set,
                              captcha_size=1,
                              width=image_w,
                              height=image_h)
        batch_x[i, :] = img2vec(img2gray(image))
        batch_y[i, :] = text2vec(char_set, text)
    return batch_x, batch_y

Exemplo n.º 14

0

Exibir arquivo

Arquivo: 5.py Projeto: zbn123/tensorflow

def get_batch(batch_size=128):
    batch_files = random.sample(files, batch_size)

    batch_x = np.zeros([batch_size, image_size])
    batch_y = np.zeros([batch_size, captcha_size * char_size])
    for i in range(batch_size):
        image = loadimg(os.path.join(fonts_dir, batch_files[i]))
        text = batch_files[i][:4]
        batch_x[i, :] = img2vec(img2gray(image))
        batch_y[i, :] = text2vec(char_set, text)
    return batch_x, batch_y

Exemplo n.º 15

0

Exibir arquivo

def get_next_batch_for_res(batch_size=128):
    images = []   
    codes = []
    max_width_image = 0
    info = ""
    for i in range(batch_size):
        font_name = random.choice(AllFontNames)
        font_length = random.randint(25, 30)
        if random.random()>0.5:
            font_size = random.randint(8, 49)    
        else:
            font_size = random.randint(8, 15) 
        font_mode = random.choice([0,1,2,4]) 
        font_hint = random.choice([0,1,2,3,4,5])     #删除了2
        text = random.sample(CHARS, 12)
        text = text+text+[" "," "]
        random.shuffle(text)
        text = "".join(text).strip()
        codes.append([CHARS.index(char) for char in text])          
        image = utils_font.get_font_image_from_url(text, font_name, font_size, font_mode, font_hint )
        image = utils_pil.resize_by_height(image, image_height, random.random()>0.5)
        image = utils_font.add_noise(image)   
        image = utils_pil.convert_to_gray(image)                   
        image = np.asarray(image)     
        image = utils.resize(image, height=image_height)
        if random.random()>0.5:
            image = (255. - image) / 255.
        else:
            image = image / 255.
        images.append(image)
        if image.shape[1] > max_width_image: 
            max_width_image = image.shape[1]
        info = info+"%s\n\r" % utils_font.get_font_url(text, font_name, font_size, font_mode, font_hint)
    max_width_image = max_width_image + (POOL_SIZE - max_width_image % POOL_SIZE)
    inputs = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(images)):
        image_vec = utils.img2vec(images[i], height=image_height, width=max_width_image, flatten=False)
        inputs[i,:] = np.transpose(image_vec)

    labels = [np.asarray(i) for i in codes]
    sparse_labels = utils.sparse_tuple_from(labels)
    seq_len = np.ones(batch_size) * (max_width_image * image_height ) // (POOL_SIZE * POOL_SIZE)                
    return inputs, sparse_labels, seq_len, info

Exemplo n.º 16

0

Exibir arquivo

Arquivo: test.py Projeto: zbn123/tensorflow

def scan():
    session, inputs, seq_len, input_keep_prob, decoded, log_prob = init()
    need_ocr_images = utils.loadImage(os.path.join(curr_dir, 'test', '1.jpg'),
                                      0)
    ocr_text_groups = []
    for idx, images_group in enumerate(need_ocr_images):
        # if idx != 1: continue
        ocr_texts = []

        # 取最大宽度做为本组的统一输入长度
        widths = [image.shape[1] for image in images_group]
        maxImageWidth = max(widths) + 5

        ocr_inputs = np.zeros(
            [len(images_group), maxImageWidth, ocr.image_height])
        for i, image in enumerate(images_group):
            # image = utils.dropZeroEdges(image)
            # utils.show(utils.dropZeroEdges(image))
            utils.save(image,
                       os.path.join(curr_dir, "test", "%s-%s.png" % (idx, i)))
            image_vec = utils.img2vec(image, ocr.image_height, maxImageWidth)
            ocr_inputs[i, :] = np.transpose(
                image_vec.reshape((ocr.image_height, maxImageWidth)))
            # utils.show(image)
            # return
        ocr_seq_len = np.ones(ocr_inputs.shape[0]) * maxImageWidth

        feed = {inputs: ocr_inputs, seq_len: ocr_seq_len, input_keep_prob: 1.0}
        print("starting ocr inputs %s ..." % idx)
        start = time.time()
        decoded_list = session.run(decoded[0], feed)
        seconds = round(time.time() - start, 2)
        print("filished ocr inputs %s, paid %s seconds" % (idx, seconds))
        detected_list = ocr.decode_sparse_tensor(decoded_list)
        for detect_number in detected_list:
            ocr_texts.append(ocr.list_to_chars(detect_number))

        ocr_text_groups.append(ocr_texts)
        # break
    return ocr_text_groups

Exemplo n.º 17

0

Exibir arquivo

def get_next_batch_for_res(batch_size=128, has_sparse=True, has_onehot=True, \
                            max_width=4096, height=32, need_pad_width_to_max_width=False):
    inputs_images = []
    codes = []
    # 当前这一批图片中的最大宽度
    max_width_image = 0
    info = []
    seq_len = np.ones(batch_size)

    for i in range(batch_size):
        serialized_example = next(dataset, None)
        if serialized_example == None:
            raise Exception("has finished train one data file, stop")

        dataset_example.ParseFromString(serialized_example)

        font_name = str(
            dataset_example.features.feature['font_name'].bytes_list.value[0],
            encoding="utf-8")
        font_size = dataset_example.features.feature[
            'font_size'].int64_list.value[0]
        font_mode = dataset_example.features.feature[
            'font_mode'].int64_list.value[0]
        font_hint = dataset_example.features.feature[
            'font_hint'].int64_list.value[0]

        text = str(
            dataset_example.features.feature['label'].bytes_list.value[0],
            encoding="utf-8")
        size = dataset_example.features.feature['size'].int64_list.value
        image = dataset_example.features.feature['image'].bytes_list.value[0]
        image = utils_pil.frombytes(tuple(size), image)

        # 图旋转灰度
        image = utils_pil.convert_to_gray(image)
        w, h = size
        if h > height:
            image = utils_pil.resize_by_height(image, height)

        # 随机移动图片位置
        image = utils_pil.resize_by_height(image,
                                           height - random.randint(1, 5))
        image, _ = utils_pil.random_space2(image, image, height)

        # 增加噪点
        image = utils_font.add_noise(image)

        # 转为 opencv 格式
        image = np.asarray(image)
        # 默认按高度缩放，如果宽度超过了最大宽度，就按宽度缩放
        image = utils.resize(image, height, max_width)

        # 随机反色并归一化
        if random.random() > 0.5:
            image = image / 255.
        else:
            image = (255. - image) / 255.

        # 记下当前的最大图片宽度
        if max_width_image < image.shape[1]:
            max_width_image = image.shape[1]

        inputs_images.append(image)
        codes.append([CHARS.index(char) for char in text])
        info.append([
            font_name,
            str(font_size),
            str(font_mode),
            str(font_hint),
            str(len(text))
        ])

    # 凑成4的整数倍
    if max_width_image % 4 > 0:
        max_width_image = max_width_image + 4 - max_width_image % 4

    # 如果图片超过最大宽度，懒得去缩放，直接报异常
    if max_width_image > max_width:
        raise Exception("img width must %s <= %s " %
                        (max_width_image, max_width))

    if need_pad_width_to_max_width:
        max_width_image = max_width

    inputs = np.zeros([batch_size, image_height, max_width_image, 1])
    for i in range(batch_size):
        image_vec = utils.img2vec(inputs_images[i],
                                  height=image_height,
                                  width=max_width_image,
                                  flatten=False)
        inputs[i, :] = np.reshape(image_vec,
                                  (image_height, max_width_image, 1))

    labels = [np.asarray(i) for i in codes]

    sparse_labels = None
    onehot_labels = None
    if has_sparse:
        sparse_labels = utils.sparse_tuple_from(labels)
        sparse_labels = np.array(sparse_labels)
    if has_onehot:
        onehot_labels = []
        for label in labels:
            label_one_hot = np.eye(CLASSES_NUMBER)[label]
            onehot_labels.append(label_one_hot)
        onehot_labels = np.array(onehot_labels)

    return inputs, np.array(labels), sparse_labels, onehot_labels, info

Exemplo n.º 18

0

Exibir arquivo

Arquivo: font2font_srgan.py Projeto: zbn123/tensorflow

def get_next_batch(batch_size=128):
    images = []
    to_images = []
    codes = []
    max_width_image = 0
    for i in range(batch_size):
        font_name = random.choice(AllFontNames)
        font_length = random.randint(25, 30)
        font_size = 36  #random.randint(image_height, 64)
        font_mode = random.choice([0, 1, 2, 4])
        font_hint = random.choice([0, 1, 2, 3, 4, 5])
        text = utils_font.get_random_text(CHARS, eng_world_list, font_length)
        # text = random.sample(CHARS, 12)
        # text = text+text
        # random.shuffle(text)
        # text = "".join(text).strip()
        codes.append([CHARS.index(char) for char in text])
        image = utils_font.get_font_image_from_url(text,
                                                   font_name,
                                                   font_size,
                                                   fontmode=font_mode,
                                                   fonthint=font_hint)
        image = utils_pil.resize_by_height(image, image_height)
        to_image = image.copy()
        image = utils_font.add_noise(image)
        image = utils_pil.convert_to_gray(image)
        _h = random.randint(9, image_height // random.choice([1, 1.5, 2, 2.5]))
        image = utils_pil.resize_by_height(image, _h, random.random() > 0.5)
        image = utils_pil.resize_by_height(image, image_height,
                                           random.random() > 0.5)
        image = np.asarray(image)
        image = utils.resize(image, height=image_height)
        image = (255. - image) / 255.
        images.append(image)

        # to_image = utils_font.get_font_image_from_url(text, font_name ,image_height, fontmode = font_mode, fonthint = font_hint)
        to_image = utils_pil.convert_to_gray(to_image)
        to_image = np.asarray(to_image)
        to_image = utils.resize(to_image, height=image_height)
        to_image = utils.img2bwinv(to_image)
        to_image = to_image / 255.
        to_images.append(to_image)

        if image.shape[1] > max_width_image:
            max_width_image = image.shape[1]
        if to_image.shape[1] > max_width_image:
            max_width_image = to_image.shape[1]

    max_width_image = max_width_image + (POOL_SIZE -
                                         max_width_image % POOL_SIZE)
    inputs = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(images)):
        image_vec = utils.img2vec(images[i],
                                  height=image_height,
                                  width=max_width_image,
                                  flatten=False)
        inputs[i, :] = np.transpose(image_vec)

    targets = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(to_images)):
        image_vec = utils.img2vec(to_images[i],
                                  height=image_height,
                                  width=max_width_image,
                                  flatten=False)
        targets[i, :] = np.transpose(image_vec)

    labels = [np.asarray(i) for i in codes]
    sparse_labels = utils.sparse_tuple_from(labels)
    seq_len = np.ones(batch_size) * (max_width_image *
                                     image_height) // (POOL_SIZE * POOL_SIZE)
    return inputs, targets, sparse_labels, seq_len

Exemplo n.º 19

0

Exibir arquivo

Arquivo: font_ascii_resnext_lstm.py Projeto: zbn123/tensorflow

def get_next_batch_for_res(batch_size=128):
    inputs_images = []   
    codes = []
    max_width_image = 0
    info = []
    seq_len = np.ones(batch_size)

    for i in range(batch_size):
        serialized_example = next(dataset, None)
        if serialized_example==None:
            raise Exception("has finished train one data file, stop")

        dataset_example.ParseFromString(serialized_example)

        font_name = str(dataset_example.features.feature['font_name'].bytes_list.value[0],  encoding="utf-8")
        font_size = dataset_example.features.feature['font_size'].int64_list.value[0]
        font_mode = dataset_example.features.feature['font_mode'].int64_list.value[0]
        font_hint = dataset_example.features.feature['font_mode'].int64_list.value[0]

        text = str(dataset_example.features.feature['label'].bytes_list.value[0],  encoding="utf-8")
        size = dataset_example.features.feature['size'].int64_list.value
        image = dataset_example.features.feature['image'].bytes_list.value[0]
        image = utils_pil.frombytes(tuple(size), image)

        image = utils_pil.convert_to_gray(image) 
        w, h = size
        if h > image_height:
            image = utils_pil.resize_by_height(image, image_height)  

        image = utils_pil.resize_by_height(image, image_height-random.randint(1,5))
        image, _ = utils_pil.random_space2(image, image,  image_height)
        
        image = utils_font.add_noise(image)   
        image = np.asarray(image) 

        image = utils.resize(image, image_height, MAX_IMAGE_WIDTH)

        if random.random()>0.5:
            image = image / 255.
        else:
            image = (255. - image) / 255.

        if max_width_image < image.shape[1]:
            max_width_image = image.shape[1]
          
        inputs_images.append(image)
        codes.append([CHARS.index(char) for char in text])                  

        info.append([font_name, str(font_size), str(font_mode), str(font_hint), str(len(text))])
        seq_len[i]=len(text)+1

    # 凑成4的整数倍
    # if max_width_image % 4 > 0:
    #     max_width_image = max_width_image + 4 - max_width_image % 4

    # 如果图片超过最大宽度
    if max_width_image < MAX_IMAGE_WIDTH:
        max_width_image = MAX_IMAGE_WIDTH
        # raise Exception("img width must %s <= %s " % (max_width_image, MAX_IMAGE_WIDTH))

    inputs = np.zeros([batch_size, image_height, max_width_image, 1])
    for i in range(batch_size):
        image_vec = utils.img2vec(inputs_images[i], height=image_height, width=max_width_image, flatten=False)
        inputs[i,:] = np.reshape(image_vec,(image_height, max_width_image, 1))
     
    # print(inputs.shape, len(codes))
    labels = [np.asarray(i) for i in codes]
    sparse_labels = utils.sparse_tuple_from(labels)

    # max_width_image = math.ceil((max_width_image-3+1.)/2.)
    # max_width_image = math.ceil((max_width_image-3+1.)/1.)
    # max_width_image = math.ceil((max_width_image-3+1.)/2.)
    # max_width_image = math.ceil((max_width_image-3+1.)/1.)
    # max_width_image = math.ceil((max_width_image-3+1.)/2.)

    seq_len = np.ones(batch_size) * SEQ_LENGTH
    # print(inputs.shape, seq_len.shape, [len(l) for l in labels])
    return inputs, sparse_labels, seq_len, info

Exemplo n.º 20

0

Exibir arquivo

checkpoint_prefix = os.path.join(log_dir, "model.ckpt")

# 找到最新的运算模型文件
metaFile= sorted(
    [
        (x, os.path.getctime(os.path.join(log_dir,x)))                  
        for x in os.listdir(log_dir) if x.endswith('.meta')  
    ],
    key=lambda i: i[1])[-1][0]

sess = tf.Session()

saver = tf.train.import_meta_graph(os.path.join(log_dir,metaFile))
ckpt = tf.train.get_checkpoint_state(log_dir)
if ckpt and ckpt.model_checkpoint_path:
    saver.restore(sess, ckpt.model_checkpoint_path)
else:
    print("error: can't load checkpoint data")
    exit()

text, image = captcha(char_set="0123456789", captcha_size=4, width=200, height=80)

image = img2vec(img2gray(image)) 

x = tf.get_default_graph().get_tensor_by_name('x:0')
prediction = tf.get_default_graph().get_tensor_by_name('prediction:0')

y_ = sess.run([prediction], feed_dict={x: [image]})
result = "".join([str(x) for x in y_[0][0]])
print("input: %s , get: %s"%(text,result))