示例#1
0
    def __load_data(self):
        """
            Load all the images in the folder
        """

        print('Loading data')

        examples = []

        count = 0
        skipped = 0
        for f in os.listdir(self.examples_path):
            if len(f.split('_')[0]) > self.max_char_count:
                continue
            arr, initial_len = resize_image(
                os.path.join(self.examples_path, f),
                self.max_image_width
            )
            examples.append(
                (
                    arr,
                    f.split('_')[0],
                    label_to_array(f.split('_')[0])
                )
            )
            count += 1

        return examples, len(examples)
示例#2
0
    def load_data(self):
        """
        Load all the images in the folder

        return: List with tuples (img_arr, label_string, label_index_array) and list length
        """

        # TODO:: Change this for different format of data.
        print("Loading data")

        examples = []

        count = 0
        skipped = 0
        for f in os.listdir(self.examples_path):
            if len(f.split("_")[0]) > self.max_char_count:
                continue
            arr, initial_len = resize_image(
                np.array(Image.open(os.path.join(
                    self.examples_path, f), mode="r")),
                self.max_image_width,
            )
            examples.append(
                (
                    arr,
                    f.split("_")[0],
                    label_to_array(f.split("_")[0], self.char_vector),
                )
            )
            count += 1
        
        if len(examples) < self.batch_size:
            raise "Error: Data less than batch size"

        return examples, len(examples)
示例#3
0
    def __load_data(self):
        """
            Load all the images in the folder
        """

        print('Loading data...')

        examples = []
        count = 0
        skipped = 0

        files = os.listdir(self.examples_path)

        for i in range(10):
            random.shuffle(files)

        for f in files:
            if len(f.split('_')[0]) > self.max_char_count:
                continue
            arr, initial_len = read_image(
                os.path.join(self.examples_path, f)
            )
            examples.append(
                (
                    arr,
                    f.split('_')[0],
                    label_to_array(f.split('_')[0], self.char_vector)
                )
            )
            #print(f.split('_')[0], label_to_array(f.split('_')[0], self.char_vector))
            count += 1

        print("Loaded!")
        return examples, len(examples)
    def __load_data(self):
        """
            Load all the images in the folder
        """

        print('Loading data')

        examples = []

        count = 0
        skipped = 0
        for i, f in enumerate(os.listdir(self.examples_path)):
            if i > 100000:
                break
            if len(f.split('_')[0]) > self.max_char_count:
                continue
            arr, initial_len = resize_image(
                os.path.join(self.examples_path, f),
                self.max_image_width
            )
            examples.append(
                (
                    arr,
                    f.split('_')[0].lower(),
                    label_to_array(f.split('_')[0].lower()),
                    label_to_array_2(f.split('_')[0].lower())
                )
            )
            count += 1

        print(count)

        return examples, len(examples)
    def batch_generator(self, queue):
        """Takes a queue and enqueue batches in it
        """

        generator = GeneratorFromDict(language=self.language)
        while True:
            batch = []
            while len(batch) < self.batch_size:
                img, lbl = generator.next()
                batch.append((
                    resize_image(np.array(img.convert("L")),
                                 self.max_image_width)[0],
                    lbl,
                    label_to_array(lbl, self.char_vector),
                ))

            raw_batch_x, raw_batch_y, raw_batch_la = zip(*batch)

            batch_y = np.reshape(np.array(raw_batch_y), (-1))

            batch_dt = sparse_tuple_from(
                np.reshape(np.array(raw_batch_la), (-1)))

            raw_batch_x = np.swapaxes(raw_batch_x, 1, 2)

            batch_x = np.reshape(
                np.array(raw_batch_x),
                (len(raw_batch_x), self.max_image_width, 32, 1))
            if queue.qsize() < 20:
                queue.put((batch_y, batch_dt, batch_x))
            else:
                pass
    def load_data(self):
        """Load all the images in the folder
        """

        print("Loading data")

        examples = []

        count = 0
        skipped = 0
        for f in os.listdir(self.examples_path):
            if len(f.split("_")[0]) > self.max_char_count:
                continue
            arr, initial_len = resize_image(
                imread(os.path.join(self.examples_path, f), mode="L"),
                self.max_image_width,
            )
            examples.append((
                arr,
                f.split("_")[0],
                label_to_array(f.split("_")[0], self.char_vector),
            ))
            count += 1

        return examples, len(examples)
    def __load_data(self):
        """
        load all the images in the folder
        :return:
        """
        print("Loading data")

        examples = []

        count = 0
        skipped = 0
        for f in os.listdir(self.examples_path):
            # 字符数超过最大长度
            if len(f.split("_")[0]) > self.max_char_count:
                continue
            arr, initial_len = resize_image(os.path.join(self.examples_path, f), self.max_image_width)

            examples.append(
                (
                    arr,
                    f.split("_")[0],
                    label_to_array(f.split("_")[0])
                )
            )
            imsave("blah.png", arr)  # ???
            count += 1

        return examples, len(examples)
示例#8
0
    def __iter__(self):
        examples = []
        for f in os.listdir(self.examples_path):
            label, _ = f.split('_')
            if len(f.split('_')[0]) > self.max_char_count:
                continue
            arr, _ = resize_image(os.path.join(self.examples_path, f),
                                  self.max_image_width)
            # to lower
            label_lower = label.lower()
            examples.append((arr, label_lower, label_to_array(label_lower)))

            if len(examples) == self.batch_size:
                raw_batch_x, raw_batch_y, raw_batch_la = zip(*examples)
                batch_y = np.reshape(np.array(raw_batch_y), (-1))

                batch_dt = sparse_tuple_from(np.array(raw_batch_la))

                raw_batch_x = np.swapaxes(raw_batch_x, 1, 2)

                batch_x = np.reshape(
                    np.array(raw_batch_x),
                    (len(raw_batch_x), self.max_image_width, 32, 1))
                yield (batch_y, batch_dt, batch_x)
                examples = []
示例#9
0
    def __load_train_data(self):
        """
        load all train data
        """
        print("loading train data")
        examples = []
        
        filename = './data/train/*.tfrecords'
        files = tf.train.match_filenames_once(filename)
        filename_queue = tf.train.string_input_producer(files,shuffle=True,num_epochs=1)
        reader = tf.TFRecordReader()
        _,serialized_example = reader.read(filename_queue)
        features = tf.parse_single_example(serialized_example,
                                           features={
                                                'label': tf.FixedLenFeature([],tf.string),
                                                'img_raw': tf.FixedLenFeature([],tf.string),
                                                'row': tf.FixedLenFeature([],tf.int64),
                                                'col': tf.FixedLenFeature([],tf.int64)
                                           })
        image = tf.decode_raw(features['img_raw'],tf.uint8)
        img_label = features['label']  #tf中的字符串是以二进制存储的,用bytes.decode解码一下就好
        row = tf.cast(features['row'],tf.int64)
        col = tf.cast(features['col'],tf.int64)
        with tf.Session() as sess:
            #函数内部可能定义了局部变量,还有自己定义的全局变量,在run之前一定要把所有变量初始化
            init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())
            sess.run(init_op)
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)
            images = []
            i = 0
            try:
                while True:
                    i = i + 1
                    image1,label1,height,width = sess.run([image,img_label,row,col])

                    label1 = bytes.decode(label1) #对二进制存储进行转换
                    image1 = np.reshape(np.array(image1),(height,width))
                    arr, initial_len = resize_train_image(image1,self.max_image_width)
                    strs = label1.split('_')
                    label_without_ = ''.join(strs[i] for i in range(len(strs)))
                    #print(label_without_)
                    #保存到tensorboard里观察                
                    examples.append(
                        (
                            arr,
                            label_without_,
                            label_to_array(label_without_)
                        )
                    )            
                    
            except tf.errors.OutOfRangeError:
                print("done!")
        print(i)
        coord.request_stop()
        coord.join(threads)
            
        return examples,len(examples)        
示例#10
0
    def __load_data(self):
        """
            Load all the images in the folder
        """

        print('Loading data')

        examples = []

        count = 0
        skipped = 0
        for f in os.listdir(self.examples_path):
            if len(f.split('_')[0]) > self.max_char_count:
                continue
            arr, initial_len = resize_image(
                os.path.join(self.examples_path, f), self.max_image_width)
            examples.append(
                (arr, f.split('_')[0], label_to_array(f.split('_')[0])))
            count += 1

        return examples, len(examples)
示例#11
0
    def __generate_tfRecord_batch(self):
        """
        load one batch tfRecord
        """
            
        examples = []        
        images,labels,heights,widths = self.iterator.get_next()
        
        with tf.Session() as sess:
            image_batch,label_batch,height_batch,width_batch = sess.run([images,labels,heights,widths])        
            for i in range(self.batch_size):
                #不定长的tensorflow的batch太麻烦了,不如我按1取,然后自己batch  

                image1 = image_batch[0]
                label1 = label_batch[0]
                height = height_batch[0]
                width = width_batch[0]

                #parse to data type
                label1 = bytes.decode(label1)

                #print(label1)

                image1 = np.reshape(np.array(image1),(height,width))
                arr, initial_len = resize_train_image(image1,self.max_image_width)               

                #保存到tensorboard里观察
                #r,c = np.shape(arr)
                #new_img = tf.reshape(arr,(r,c))
                # pic_num = pic_num + 1
                #pics.append(new_img)

                examples.append(
                           (
                            arr,
                            label1,
                            label_to_array(label1)
                           ) )    
        #print(len(examples))
        return examples
示例#12
0
 def train(self, iteration_count):
     with self.__session.as_default():
         print('Training')
         for i in range(iteration_count):
             iter_loss = 0
             for batch_y, batch_sl, batch_x in self.__data_manager.get_next_train_batch(
             ):
                 data_targets = np.asarray([
                     label_to_array(lbl, config.CHAR_VECTOR)
                     for lbl in batch_y
                 ])
                 data_targets = sparse_tuple_from(data_targets)
                 _, loss_value, decoded = self.__session.run(
                     [self.__optimizer, self.__loss, self.__decoded],
                     feed_dict={
                         self.__inputs: batch_x,
                         self.__seq_len: batch_sl,
                         self.__targets: data_targets
                     })
                 iter_loss += loss_value
             print('[{}] Iteration loss: {}'.format(i, iter_loss))
     return None
示例#13
0
    def train(self, iteration_count):
        with self.__session.as_default():
            print('Training')
            for i in range(self.step, iteration_count + self.step):
                iter_loss = 0
                for batch_y, batch_sl, batch_x in self.__data_manager.get_next_train_batch(
                ):
                    data_targets = np.asarray([
                        label_to_array(lbl, config.CHAR_VECTOR)
                        for lbl in batch_y
                    ])
                    data_targets = sparse_tuple_from(data_targets)
                    op, decoded, loss_value = self.__session.run(
                        [self.__optimizer, self.__decoded, self.__cost],
                        feed_dict={
                            self.__inputs:
                            batch_x,
                            self.__seq_len: [self.__max_char_count] *
                            self.__data_manager.batch_size,
                            self.__targets:
                            data_targets
                        })

                    if i % 10 == 0:
                        for j in range(2):
                            print(batch_y[j])
                            print(ground_truth_to_word(decoded[j]))

                    iter_loss += loss_value

                self.__saver.save(self.__session,
                                  self.__save_path,
                                  global_step=self.step)

                print('[{}] Iteration loss: {}'.format(self.step, iter_loss))

                self.step += 1
        return None
示例#14
0
 def test(self):
     with self.__session.as_default():
         print('Testing')
         total_error = 0
         example_count = 0
         for batch_y, batch_sl, batch_x in self.__data_manager.get_next_test_batch(
         ):
             data_targets = np.asarray([
                 label_to_array(lbl, config.CHAR_VECTOR) for lbl in batch_y
             ])
             data_targets = sparse_tuple_from(data_targets)
             decoded = self.__session.run([self.__decoded],
                                          feed_dict={
                                              self.__inputs: batch_x,
                                              self.__seq_len: batch_sl
                                          })
             example_count += len(batch_y)
             total_error += np.sum(
                 levenshtein(ground_truth_to_word(batch_y),
                             ground_truth_to_word(decoded)))
         print('Error on test set: {}'.format(total_error,
                                              total_error / example_count))
     return None
示例#15
0
    def __load_data(self):
        """
            Load all the images in the folder
        """

        print('Loading data')

        examples = []
        count = 0
        skipped = 0
        # for f in os.listdir(self.examples_picture_path):
        #     if len(f.split('_')[0]) > self.max_char_count:
        #         continue
        #     arr, initial_len = resize_image(
        #         os.path.join(self.examples_path, f),
        #         self.max_image_width
        #     )
        with open(self.examples_label_path,
                  'r') as f:  # Address of target_label.txt
            for line in f.readlines():
                address = line.split("__")[0]

                label = line.split("__")[1]
                if len(label) > self.max_char_count:
                    continue
                if list(label)[0] == '#':
                    continue
                img = cv2.imread(address, cv2.IMREAD_GRAYSCALE)
                arr, initial_len = resize_image(img, self.max_image_width)
                dictionary, _, dictionary_len = read_dictionary(
                    self.dictionary_path)

                examples.append((arr, label, label_to_array(label,
                                                            dictionary)))
                count += 1
                dictionary_len = dictionary_len + 1  #!
        return examples, len(examples), dictionary_len
示例#16
0
    def __load_data(self):
        """
            Load all the images in the folder
        """

        print('Loading data from {}'.format(self.examples_path))

        examples = []

        count = 0
        skipped = 0
        for f in os.listdir(self.examples_path):
            if "(" in f:
                os.remove(os.path.join(self.examples_path, f))
                continue
            if len(self.get_label(f)) > self.max_char_count:
                continue
            arr, initial_len = resize_image(
                os.path.join(self.examples_path, f), self.max_image_width)
            examples.append((arr, f, label_to_array(self.get_label(f))))
            imsave('blah.png', arr)
            count += 1
        shuffle(examples)
        return examples, len(examples)
示例#17
0
文件: train.py 项目: lxhsjtu/crnn-lxh
def main(args):
    print('===========load dict===========')
    data_dict = loaddict()
    iteration_count = 1000
    batch_size = 64
    batch_image = 400000
    log_save_dir = "..//model//"
    restore = True

    # The training data
    print('==============load data=============')
    imagefiles = []
    with codecs.open("image_path.txt", 'r', encoding='utf-8') as file:
        line = file.readline()
        while line:
            imagefiles.append(line.strip())
            line = file.readline()
        file.close()
    # data= load_data(data_dir)
    # print('data size:', len(data))

    # perm=np.arange(len(data))
    # np.random.shuffle(perm)
    # data=np.asarray(data)
    # train_data=data[perm]
    # test_data = data[int(len(data) * 0.10):]
    graph = tf.Graph()
    with graph.as_default():
        inputs = tf.placeholder(tf.float32, [batch_size, 32, None, 3],
                                name='inputs')
        # The CRNN
        crnn = CRNN(inputs)
        # Our target output
        targets = tf.sparse_placeholder(tf.int32, name='targets')
        # The length of the sequence
        seq_len = tf.placeholder(tf.int32, [None], name='seq_len')
        logits = tf.reshape(crnn, [-1, 512])  #(batchsize x 37) x 512
        W = tf.Variable(tf.truncated_normal([512, config.NUM_CLASSES],
                                            stddev=0.1,
                                            dtype=tf.float32),
                        name="W")
        b = tf.Variable(tf.constant(0.,
                                    shape=[config.NUM_CLASSES],
                                    dtype=tf.float32),
                        name="b")
        print(logits.get_shape())
        logits = tf.matmul(logits, W) + b
        print(logits.get_shape())
        logits = tf.reshape(logits, [batch_size, -1, config.NUM_CLASSES
                                     ])  # batch_size x 36 x NUM_CLASSES
        print(logits.get_shape())
        # Final layer, the output of the BLSTM
        logits = tf.transpose(logits,
                              (1, 0, 2))  #36 x batch_size x NUM_CLASSES
        global_step = tf.Variable(0, trainable=False)
        # Loss and cost calculation
        loss = tf.nn.ctc_loss(targets, logits, seq_len)
        cost = tf.reduce_mean(loss)
        # learning_rate = tf.train.exponential_decay(0.1,
        #                                            global_step,
        #                                            5000,
        #                                            0.1, staircase=True)
        # Training step
        # optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=0.9).minimize(cost)
        # optimizer = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(cost,global_step=global_step)
        optimizer = tf.train.AdadeltaOptimizer(learning_rate=0.001).minimize(
            loss=cost, global_step=global_step)
        # The decoded answer
        decoded, log_prob = tf.nn.ctc_beam_search_decoder(logits, seq_len)
        # The error rate
        seq_dis = tf.reduce_mean(
            tf.edit_distance(tf.cast(decoded[0], tf.int32), targets))
    config_gpu = tf.ConfigProto()
    config_gpu.gpu_options.allow_growth = True
    with tf.Session(graph=graph, config=config_gpu) as sess:
        # tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables())
        # Train
        if restore:
            print('=============load model============')
            ckpt = tf.train.get_checkpoint_state("../model/")
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(sess, ckpt.model_checkpoint_path)
                print("load success")
            else:
                print("no such file")
                return
        print('============begin training=============')
        for it in range(0, iteration_count):
            i = 0
            for iter in range(1 + (len(imagefiles) // batch_image)):
                imagepath = imagefiles[iter * batch_image:(iter + 1) *
                                       batch_image]
                train_data = load_data_big(imagepath)
                for b in [
                        train_data[x * batch_size:x * batch_size + batch_size]
                        for x in range(0, int(len(train_data) / batch_size))
                ]:
                    start_time = time.time()
                    in_data, labels, data_seq_len = zip(*b)
                    # print(data_seq_len)
                    data_targets = np.asarray(
                        [label_to_array(lbl, data_dict) for lbl in labels])
                    data_targets = sparse_tuple_from(data_targets)
                    # print(np.shape(data_targets[0]))
                    # print(np.shape(data_targets[1]))
                    # print(np.shape(data_targets[2]))
                    # print(data_targets[0])
                    # print(data_targets[1])
                    # print(data_targets[2])
                    data_shape = np.shape(in_data)
                    in_data = np.reshape(
                        in_data,
                        (data_shape[0], data_shape[1], data_shape[2], 3))
                    costacc, _ = sess.run(
                        [cost, optimizer], {
                            inputs: in_data,
                            targets: data_targets,
                            seq_len: data_seq_len,
                        })
                    i += 1
                    print('epoch:{}/1000,cost={},iter={},time={}'.format(
                        it, costacc, i,
                        time.time() - start_time))
                del train_data
                gc.collect()
                print("complete 40W images")
                if (it % 1 == 0):
                    checkpoint_path = os.path.join(log_save_dir, 'model.ckpt')
                    saver.save(sess, checkpoint_path)
                # iter_avg_cost += (np.sum(cost_val) / batch_size) / (int(len(train_data) / batch_size))
            print("complete one epoch")
示例#18
0
def main(args):
    """
        Usage: train.py [iteration_count] [batch_size] [data_dir] [log_save_dir] [graph_save_dir]
    """

    # The user-defined training parameters
    iteration_count = int(args[1])
    batch_size = int(args[2])
    data_dir = args[3]
    log_save_dir = args[4]
    graph_save_dir = args[5]

    # The training data
    data = load_data(data_dir)
    train_data = data[0:int(len(data) * 0.70)]
    test_data = data[int(len(data) * 0.70):]

    graph = tf.Graph()

    with graph.as_default():
        inputs = tf.placeholder(tf.float32, [batch_size, 32, None, 1])

        # The CRNN
        crnn = CRNN(inputs)

        # Our target output
        targets = tf.sparse_placeholder(tf.int32, name='targets')

        # The length of the sequence
        seq_len = tf.placeholder(tf.int32, [None], name='seq_len')

        logits = tf.reshape(crnn, [-1, 512])

        W = tf.Variable(tf.truncated_normal([512, config.NUM_CLASSES],
                                            stddev=0.1),
                        name="W")
        b = tf.Variable(tf.constant(0., shape=[config.NUM_CLASSES]), name="b")

        print(logits.get_shape())

        logits = tf.matmul(logits, W) + b

        print(logits.get_shape())

        logits = tf.reshape(logits, [batch_size, -1, config.NUM_CLASSES])

        print(logits.get_shape())

        # Final layer, the output of the BLSTM
        logits = tf.transpose(logits, (1, 0, 2))

        # Loss and cost calculation
        loss = tf.nn.ctc_loss(targets, logits, seq_len)

        cost = tf.reduce_mean(loss)

        # Training step
        optimizer = tf.train.MomentumOptimizer(0.01, 0.9).minimize(cost)

        # The decoded answer
        decoded, log_prob = tf.nn.ctc_beam_search_decoder(logits, seq_len)

        # The error rate
        acc = tf.reduce_mean(
            tf.edit_distance(tf.cast(decoded[0], tf.int32), targets))

    with tf.Session(graph=graph) as sess:
        tf.global_variables_initializer().run()

        # Train

        for it in range(0, iteration_count):
            iter_avg_cost = 0
            start = time.time()
            for b in [
                    train_data[x * batch_size:x * batch_size + batch_size]
                    for x in range(0, int(len(train_data) / batch_size))
            ]:
                in_data, labels, data_seq_len = zip(*b)

                print(data_seq_len)

                data_targets = np.asarray([
                    label_to_array(lbl, config.CHAR_VECTOR) for lbl in labels
                ])

                data_targets = sparse_tuple_from(data_targets)

                print(np.shape(data_targets[0]))
                print(np.shape(data_targets[1]))
                print(np.shape(data_targets[2]))
                print(data_targets[0])
                print(data_targets[1])
                print(data_targets[2])

                data_shape = np.shape(in_data)

                in_data = np.reshape(
                    in_data, (data_shape[0], data_shape[1], data_shape[2], 1))

                decoded_val, cost_val = sess.run(
                    [decoded, cost], {
                        inputs: in_data,
                        targets: data_targets,
                        seq_len: data_seq_len,
                    })
                iter_avg_cost += (np.sum(cost_val) / batch_size) / (int(
                    len(train_data) / batch_size))

            print('[{}] {} : {}'.format(time.time() - start, it,
                                        iter_avg_cost))