示例#1
0
    def __generate_all_test_batches(self):
        test_batches = []
        while not self.current_test_offset + self.batch_size > self.data_len:
            old_offset = self.current_test_offset

            new_offset = self.current_test_offset + self.batch_size

            self.current_test_offset = new_offset

            raw_batch_x, raw_batch_y, raw_batch_la = zip(*self.data[old_offset:new_offset])

            batch_y = np.reshape(
                np.array(raw_batch_y),
                (-1)
            )

            batch_dt = sparse_tuple_from(
                np.reshape(
                    np.array(raw_batch_la),
                    (-1)
                )
            )

            batch_x = np.reshape(
                np.array(raw_batch_x),
                (-1, self.max_image_width, 32, 1)
            )

            test_batches.append((batch_y, batch_dt, batch_x))
        return test_batches
示例#2
0
    def test_target_wav_file(self, wav_files, txt_labels):
        print('读入语音文件: ', wav_files[0])
        print('开始识别语音数据......')

        self.audio_features, self.audio_features_len, text_vector, text_vector_len = utils.get_audio_mfcc_features(
            None,
            wav_files,
            n_input,
            n_context,
            self.word_num_map,
            txt_labels)
        self.sparse_labels = utils.sparse_tuple_from(text_vector)
        d, train_ler = self.sess.run([self.decoded[0], self.label_err], feed_dict=self.get_feed_dict(dropout=1.0))
        dense_decoded = tf.sparse_tensor_to_dense(d, default_value=-1).eval(session=self.sess)
        decoded_str = utils.trans_array_to_text_ch(dense_decoded[0], self.words)
        print('语音原始文本: {}'.format(txt_labels[0]))
        print('识别出来的文本:  {}'.format(decoded_str))

        self.sess.close()
示例#3
0
    def _generate_all_train_batches(self):
        train_batches = []
        k = 0
        self.current_train_offset = 0
        while not self.current_train_offset + self.batch_size > self.test_offset:
            old_offset = self.current_train_offset

            new_offset = self.current_train_offset + self.batch_size

            self.current_train_offset = new_offset

            raw_batch_x, raw_batch_y, raw_batch_la = zip(*self.data[old_offset:new_offset])

            raw_batch_x = self.__augment_images(raw_batch_x)

            batch_y = np.reshape(
                np.array(raw_batch_y),
                (-1)
            )

            k += 1

            if self.test_augment_image and k > 30:
                break

            batch_dt = sparse_tuple_from(
                np.asarray(raw_batch_la, dtype=np.object)
            )

            raw_batch_x = np.swapaxes(raw_batch_x, 1, 2)

            batch_x = np.reshape(
                np.array(raw_batch_x),
                (len(raw_batch_x), self.max_image_width, self.height, 1)
            )

            train_batches.append((batch_y, batch_dt, batch_x))
        print("Length of train batches", len(train_batches))
        random.shuffle(train_batches)
        return train_batches
示例#4
0
 def train(self, iteration_count):
     with self.__session.as_default():
         print('Training')
         for i in range(iteration_count):
             iter_loss = 0
             for batch_y, batch_sl, batch_x in self.__data_manager.get_next_train_batch(
             ):
                 data_targets = np.asarray([
                     label_to_array(lbl, config.CHAR_VECTOR)
                     for lbl in batch_y
                 ])
                 data_targets = sparse_tuple_from(data_targets)
                 _, loss_value, decoded = self.__session.run(
                     [self.__optimizer, self.__loss, self.__decoded],
                     feed_dict={
                         self.__inputs: batch_x,
                         self.__seq_len: batch_sl,
                         self.__targets: data_targets
                     })
                 iter_loss += loss_value
             print('[{}] Iteration loss: {}'.format(i, iter_loss))
     return None
示例#5
0
    def __generate_all_test_batches(self):
        test_batches = []
        while not self.current_test_offset + self.batch_size > self.data_len:
            old_offset = self.current_test_offset

            new_offset = self.current_test_offset + self.batch_size

            self.current_test_offset = new_offset

            raw_batch_x, raw_batch_y, raw_batch_la = zip(
                *self.data[old_offset:new_offset])

            batch_y = np.reshape(np.array(raw_batch_y), (-1))

            batch_dt = sparse_tuple_from(
                np.reshape(np.array(raw_batch_la), (-1)))

            batch_x = np.reshape(np.array(raw_batch_x),
                                 (-1, self.max_image_width, 32, 1))

            test_batches.append((batch_y, batch_dt, batch_x))
        return test_batches
def input_preprocess():
    
    ###audio_filename = maybe_download('LDC93S1.wav', 93638)
    ###target_filename = maybe_download('LDC93S1.txt', 62)

    fs, audio = wav.read(audio_filename)
    
    inputs = mfcc(audio, samplerate=fs)
    # Tranform in 3D array
    train_inputs = np.asarray(inputs[np.newaxis, :])
    train_inputs = (train_inputs - np.mean(train_inputs))/np.std(train_inputs)
    train_seq_len = [train_inputs.shape[1]]
    num_examples = 1
    
    with open(target_filename, 'r') as f:

    #Only the last line is necessary
        line = f.readlines()[-1]
       ## global original[num_examples]
    # Get only the words between [a-z] and replace period for none
        ###original = ' '.join(line.strip().lower().split(' ')[2:]).replace('.', '')
        targets = original.replace(' ', '  ')
        targets = targets.split(' ')

    # Adding blank label
    targets = np.hstack([Space_Token if x == '' else list(x) for x in targets])

    # Transform char into index
    targets = np.asarray([Space_Index if x == Space_Token else ord(x) - Index_Start
                      for x in targets])

    # Creating sparse representation to feed the placeholder
    train_targets = sparse_tuple_from([targets])

    # We don't have a validation dataset :(
    val_inputs, val_targets, val_seq_len = train_inputs, train_targets, \
                                       train_seq_len

    return inputs, train_inputs, train_targets, train_seq_len
示例#7
0
    def batch_generator(self, queue):
        """Takes a queue and enqueue batches in it
        """

        generator = GeneratorFromDict(language=self.language)
        while True:
            batch = []
            while len(batch) < self.batch_size:
                img, lbl = generator.next()
                batch.append(
                    (
                        resize_image(np.array(img.convert("L")), self.max_image_width)[
                            0
                        ],
                        lbl,
                        label_to_array(lbl, self.char_vector),
                    )
                )

            raw_batch_x, raw_batch_y, raw_batch_la = zip(*batch)

            batch_y = np.reshape(np.array(raw_batch_y), (-1))

            batch_dt = sparse_tuple_from(
                np.reshape(np.array(raw_batch_la), (-1)))

            raw_batch_x = np.swapaxes(raw_batch_x, 1, 2)

            raw_batch_x = raw_batch_x / 255.0

            batch_x = np.reshape(
                np.array(raw_batch_x), (len(raw_batch_x),
                                        self.max_image_width, 32, 1)
            )
            if queue.qsize() < 20:
                queue.put((batch_y, batch_dt, batch_x))
            else:
                pass
示例#8
0
    def train(self, iteration_count):
        with self.__session.as_default():
            print('Training')
            for i in range(self.step, iteration_count + self.step):
                iter_loss = 0
                for batch_y, batch_sl, batch_x in self.__data_manager.get_next_train_batch(
                ):
                    data_targets = np.asarray([
                        label_to_array(lbl, config.CHAR_VECTOR)
                        for lbl in batch_y
                    ])
                    data_targets = sparse_tuple_from(data_targets)
                    op, decoded, loss_value = self.__session.run(
                        [self.__optimizer, self.__decoded, self.__cost],
                        feed_dict={
                            self.__inputs:
                            batch_x,
                            self.__seq_len: [self.__max_char_count] *
                            self.__data_manager.batch_size,
                            self.__targets:
                            data_targets
                        })

                    if i % 10 == 0:
                        for j in range(2):
                            print(batch_y[j])
                            print(ground_truth_to_word(decoded[j]))

                    iter_loss += loss_value

                self.__saver.save(self.__session,
                                  self.__save_path,
                                  global_step=self.step)

                print('[{}] Iteration loss: {}'.format(self.step, iter_loss))

                self.step += 1
        return None
示例#9
0
 def test(self):
     with self.__session.as_default():
         print('Testing')
         total_error = 0
         example_count = 0
         for batch_y, batch_sl, batch_x in self.__data_manager.get_next_test_batch(
         ):
             data_targets = np.asarray([
                 label_to_array(lbl, config.CHAR_VECTOR) for lbl in batch_y
             ])
             data_targets = sparse_tuple_from(data_targets)
             decoded = self.__session.run([self.__decoded],
                                          feed_dict={
                                              self.__inputs: batch_x,
                                              self.__seq_len: batch_sl
                                          })
             example_count += len(batch_y)
             total_error += np.sum(
                 levenshtein(ground_truth_to_word(batch_y),
                             ground_truth_to_word(decoded)))
         print('Error on test set: {}'.format(total_error,
                                              total_error / example_count))
     return None
示例#10
0
    def init_op_test_batches(self):
        test_batches = []
        num_batch = int(np.floor(len(self.test_data) / self.batch_size))
        for index in range(num_batch):
            raw_batch_x, raw_batch_y, raw_batch_la = zip(
                *self.test_data[index * self.batch_size:(1 + index) *
                                self.batch_size])
            batch_y = np.reshape(np.array(raw_batch_y), (-1))
            batch_dt = sparse_tuple_from(np.array(raw_batch_la))
            # batch_dt = sparse_tuple_from(
            #     np.reshape(
            #         np.array(raw_batch_la),
            #         (-1)
            #     )
            # )
            raw_batch_x = np.swapaxes(raw_batch_x, 1, 2)

            batch_x = np.reshape(
                np.array(raw_batch_x),
                (len(raw_batch_x), self.max_image_width, 32, 1))

            test_batches.append((batch_y, batch_dt, batch_x))
            self.test_batches = test_batches
示例#11
0
文件: crnn.py 项目: lygztq/MagicOCR
 def train(self):
     with self.sess.as_default():
         # log file writer
         log_writer = tf.summary.FileWriter(self.log_path, self.sess.graph)
         for i in xrange(
                 self.epoches):  # use xrange to reduce the cost of memory
             iteration_loss = 0
             batch_x, batch_y, batch_length = self.data.get_next_train_batch(
                 self.batch_size)
             data_targets, _, _ = sparse_tuple_from(batch_y)
             batch_length = np.array(batch_length)
             print len(batch_x), data_targets.shape, batch_length.shape
             _, loss_val, predict_str, summary = self.sess.run(
                 [self.optimizer, self.losses, self.decoded, self.summary],
                 feed_dict={
                     self.inputs: batch_x,
                     self.targets: data_targets,
                     self.seq_len: batch_length
                 })
             iteration_loss += loss_val
             log_writer.add_summary(summary, i)
             print "Iteration {} : loss: {}".format(i, iteration_loss)
     return None
示例#12
0
def main():
    ds = dataset(DATA_FOLDER, 1)

    global_step = tf.Variable(0, trainable=False)

    outputs, inputs, _, seq_len = get_model()
    decoded, _ = tf.nn.ctc_beam_search_decoder(outputs,
                                               seq_len,
                                               merge_repeated=False)

    with tf.Session() as sess:
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=100)
        saver.restore(sess, 'trained_model/model')

        test_input, test_label = ds.data, ds.labels
        test_targets = sparse_tuple_from(test_label)

        feed = {
            inputs: test_input,
            seq_len: [MAX_TIMESTEPS for _ in range(len(test_input))]
        }
        dd = sess.run(decoded[0], feed_dict=feed)
        report_accuracy(dd, test_targets)
示例#13
0
def get_next_batch_for_res_train(batch_size=128):
    images = []   
    codes = []
    max_width_image = 0
    info = ""
    for i in range(batch_size):
        font_name = random.choice(AllFontNames)
        font_length = random.randint(25, 30)
        font_size = 36    
        font_mode = random.choice([0,1,2,4]) 
        font_hint = random.choice([0,1,2,3,4,5])     #删除了2
        text = random.sample(CHARS, 12)
        text = text+text+[" "," "]
        random.shuffle(text)
        text = "".join(text).strip()
        codes.append([CHARS.index(char) for char in text])          
        image = utils_font.get_font_image_from_url(text, font_name, font_size, font_mode, font_hint )
        image = utils_pil.resize_by_height(image, image_height)
        image = utils_pil.convert_to_gray(image)                           
        image = np.asarray(image)     
        # image = utils.resize(image, height=image_height)
        # image = utils.img2bwinv(image)
        image = utils_pil.convert_to_bw(image)        
        images.append((255. - image) / 255.)
        if image.shape[1] > max_width_image: 
            max_width_image = image.shape[1]
        info = info+"%s\n\r" % utils_font.get_font_url(text, font_name, font_size, font_mode, font_hint)
    max_width_image = max_width_image + (POOL_SIZE - max_width_image % POOL_SIZE)
    inputs = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(images)):
        image_vec = utils.img2vec(images[i], height=image_height, width=max_width_image, flatten=False)
        inputs[i,:] = np.transpose(image_vec)

    labels = [np.asarray(i) for i in codes]
    sparse_labels = utils.sparse_tuple_from(labels)
    seq_len = np.ones(batch_size) * (max_width_image * image_height ) // (POOL_SIZE * POOL_SIZE)                
    return inputs, sparse_labels, seq_len, info
    def run(self, data, epoch_num, is_pingce, learning_rate=None):
        data_x, data_y = data
        # Padding input to max_time_step of this batch
        batch_train_inputs, batch_train_seq_len = pad_sequences(data_x)
        # Converting to sparse representation so as to to feed SparseTensor input
        batch_train_targets = sparse_tuple_from(data_y)

        #if epoch_num%config.epcho_num_for_test == 0:
        #get pingce result
        if is_pingce and epoch_num % 5 == 0:
            self.get_pingce_result(batch_train_inputs, batch_train_targets,
                                   batch_train_seq_len, learning_rate,
                                   epoch_num)

        if self.is_training:
            #start = time.time()
            return self.sess.run(
                [
                    self.total_loss, self.total_ler, self.global_step,
                    self.train_op
                ],
                feed_dict={
                    self.x: batch_train_inputs,
                    self.y: batch_train_targets,
                    self.learning_rate: learning_rate,
                    self.seq_len: batch_train_seq_len
                })

        else:
            return self.sess.run(
                [self.global_loss_update, self.global_ler_update],
                feed_dict={
                    self.x: batch_train_inputs,
                    self.y: batch_train_targets,
                    self.learning_rate: learning_rate,
                    self.seq_len: batch_train_seq_len
                })
示例#15
0
def next_batch(bs=batch_size, train=True):
    x_batch = []
    y_batch = []
    seq_len_batch = []
    original_batch = []
    i = 0
    for k in range(bs):
        ut_length_dict = dict([(k, len(v['target']))
                               for (k, v) in audio.cache.items()])
        utterances = sorted(ut_length_dict.items(), key=operator.itemgetter(1))
        test_index = 346
        if train:
            utterances = [a[0] for a in utterances[test_index:]]
        else:
            utterances = [a[0] for a in utterances[:test_index]]
        training_element = audio.cache[utterances[i]]
        target_text = training_element['target']
        audio_buffer = training_element['audio']
        x, y, seq_len, original = convert_inputs_to_ctc_format(
            audio_buffer, sample_rate, 'whatever', num_features)
        x_batch.append(x)
        y_batch.append(y)
        seq_len_batch.append(seq_len)
        original_batch.append(original)
        i += 1

    y_batch = sparse_tuple_from(y_batch)
    seq_len_batch = np.array(seq_len_batch)[:, 0]
    for i, pad in enumerate(np.max(seq_len_batch) - seq_len_batch):
        x_batch[i] = np.pad(x_batch[i], ((0, 0), (0, pad), (0, 0)),
                            mode='constant',
                            constant_values=0)

    x_batch = np.concatenate(x_batch, axis=0)

    return x_batch, y_batch, seq_len_batch, original_batch
示例#16
0
 def evaluate_cost(self, X):
     NN = (X.lengths).shape[0]
     N = (X.images).shape[0]
     avg_cost = 0.0
     start = 0
     total = 0
     total_batch = int(math.ceil(1.0 * NN / self.bsize))
     for batchidx in range(total_batch):
         batch_x, labels, tmplen, mysize = X.next_batch(self.bsize, start)
         # Need to convert labels to targets
         test_targets = sparse_tuple_from(labels)
         error, A = self.sess.run(
             [self.ler, self.decoded[0]],
             feed_dict={
                 self.x: batch_x,
                 self.targets: test_targets,
                 self.mylen: tmplen,
                 self.keepprob: 1.0
             })
         print A.values
         print test_targets[1]
         avg_cost += error
         start += self.bsize
     return avg_cost / NN
    for curr_epoch in range(num_epochs):
        train_cost = train_ler = 0
        start = time.time()

        for batch in range(num_batches_per_epoch):

            # Getting the index
            indexes = [i % num_examples for i in range(batch * batch_size, (batch + 1) * batch_size)]

            batch_train_inputs = train_inputs[indexes]
            # Padding input to max_time_step of this batch
            batch_train_inputs, batch_train_seq_len = pad_sequences(batch_train_inputs)

            # Converting to sparse representation so as to to feed SparseTensor input
            batch_train_targets = sparse_tuple_from(train_targets[indexes])

            feed = {inputs: batch_train_inputs,
                    targets: batch_train_targets,
                    seq_len: batch_train_seq_len}

            batch_cost, _ = session.run([cost, optimizer], feed)
            train_cost += batch_cost*batch_size
            train_ler += session.run(ler, feed_dict=feed)*batch_size


        # Shuffle the data
        shuffled_indexes = np.random.permutation(num_examples)
        train_inputs = train_inputs[shuffled_indexes]
        train_targets = train_targets[shuffled_indexes]
#    tf.initialize_all_variables().run()
    tf.global_variables_initializer().run()
    
    for curr_epoch in range(num_epochs):
        train_cost = train_ler = 0
        start = time.time()
        
        for i, batch in enumerate(datagen.iterate_train(mb_size, shuffle=False, sort_by_duration=True)):
            
            train_inputs = batch['x']
            train_targets = batch['y']
            train_texts = batch['texts']
            train_seq_len = batch['input_lengths']
            #batch_train_inputs, batch_train_seq_len = pad_sequences(train_inputs)
            
            batch_train_targets = sparse_tuple_from(train_targets)
            print("Epoch {}/{}, batch number {} ".format(curr_epoch+1, num_epochs, i))

            feed = {inputs: train_inputs,
                    targets: batch_train_targets,
                    seq_len: train_seq_len}

            batch_cost, _ = session.run([cost, optimizer], feed)
            train_cost += batch_cost*batch_size
            train_ler += session.run(ler, feed_dict=feed)*batch_size

        train_cost /= num_examples
        train_ler /= num_examples

        val_inputs, val_targets, val_seq_len = train_inputs, batch_train_targets, train_seq_len
    targets = targets.split(' ')

#print("{}".format(targets))
# Adding blank label
targets = np.hstack([SPACE_TOKEN if x == '' else list(x) for x in targets])
#print("{}".format(targets))
# Transform char into index
targets = np.asarray([
    SPACE_INDEX if x == SPACE_TOKEN else
    COLLON_INDEX if x == COLLON_TOKEN else ord(x) - FIRST_INDEX
    for x in targets
])

#print("{}".format(targets))
# Creating sparse representation to feed the placeholder
train_targets = sparse_tuple_from([targets])
#print("{}".format(train_targets))
# We don't have a validation dataset :(
val_inputs, val_targets, val_seq_len = train_inputs, train_targets, \
                                       train_seq_len

# THE MAIN CODE!

graph = tf.Graph()
with graph.as_default():
    # e.g: log filter bank or MFCC features
    # Has size [batch_size, max_stepsize, num_features], but the
    # batch_size and max_stepsize can vary along each step
    inputs = tf.placeholder(tf.float32, [None, None, num_features])

    # Here we use sparse_placeholder that will generate a
def train_model(ENV, in_file, op_file):

    graph = tf.Graph()
    with graph.as_default():
        stacked_layers = {}

        # e.g: log filter bank or MFCC features
        # Has size [batch_size, max_stepsize, num_features], but the
        # batch_size and max_stepsize can vary along each step
        inputs = tf.placeholder(tf.float32, [None, None, num_features])

        targets = tf.sparse_placeholder(tf.int32)
        # 1d array of size [batch_size]
        seq_len = tf.placeholder(tf.int32, [None])

        # Weights & biases
        weight_classes = tf.Variable(
            tf.truncated_normal([num_hidden, num_classes],
                                mean=0,
                                stddev=0.1,
                                dtype=tf.float32))
        bias_classes = tf.Variable(tf.zeros([num_classes]), dtype=tf.float32)

        #_activation = tf.nn.relu#this was causing the model to diverge
        _activation = None

        layers = {'forward': [], 'backward': []}
        for key in layers.keys():
            for i in range(num_layers):
                cell = tf.nn.rnn_cell.LSTMCell(num_hidden,
                                               use_peepholes=True,
                                               activation=_activation,
                                               state_is_tuple=True,
                                               cell_clip=clip_thresh)
                #
                #cell = RWACell(num_units=num_hidden)
                layers[key].append(cell)
            stacked_layers[key] = tf.nn.rnn_cell.MultiRNNCell(
                layers[key], state_is_tuple=True)

        outputs, bilstm_vars = tf.nn.bidirectional_dynamic_rnn(
            stacked_layers['forward'],
            stacked_layers['backward'],
            inputs,
            sequence_length=seq_len,
            time_major=False,  # [batch_size, max_time, num_hidden]
            dtype=tf.float32)
        """
        outputs_concate = tf.concat_v2(outputs, 2)
        outputs_concate = tf.reshape(outputs_concate, [-1, 2*num_hidden])
        # logits = tf.matmul(outputs_concate, weight_classes) + bias_classes
        """
        fw_output = tf.reshape(outputs[0], [-1, num_hidden])
        bw_output = tf.reshape(outputs[1], [-1, num_hidden])
        logits = tf.add(
            tf.add(tf.matmul(fw_output, weight_classes),
                   tf.matmul(bw_output, weight_classes)), bias_classes)

        logits = tf.reshape(logits, [batch_size, -1, num_classes])
        loss = tf.nn.ctc_loss(targets, logits, seq_len, time_major=False)
        error = tf.reduce_mean(loss)
        optimizer = tf.train.MomentumOptimizer(learning_rate,
                                               momentum).minimize(error)

        # Evaluating
        # decoded, log_prob = ctc_ops.ctc_greedy_decoder(tf.transpose(logits, perm=[1, 0, 2]), seq_len)
        decoded, log_prob = tf.nn.ctc_beam_search_decoder(
            tf.transpose(logits, perm=[1, 0, 2]), seq_len)
        label_error_rate = tf.reduce_mean(
            tf.edit_distance(tf.cast(decoded[0], tf.int32), targets))

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)

    data, labels = load_ipad_data(in_file)
    bound = ((3 * len(data) / batch_size) / 4) * batch_size
    train_inputs = data[0:bound]
    train_labels = labels[0:bound]
    test_data = data[bound:]
    test_labels = labels[bound:]
    num_examples = len(train_inputs)
    num_batches_per_epoch = num_examples / batch_size

    with tf.Session(graph=graph,
                    config=tf.ConfigProto(gpu_options=gpu_options)) as session:
        # Initializate the weights and biases
        tf.global_variables_initializer().run()
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=0)

        ckpt = tf.train.get_checkpoint_state(op_file)
        if ckpt:
            logging.info('load', ckpt.model_checkpoint_path)
            saver.restore(session, ckpt.model_checkpoint_path)
        else:
            logging.info("no previous session to load")

        for curr_epoch in range(num_epochs):
            train_cost = train_ler = 0
            start = time.time()

            for batch in range(num_batches_per_epoch):
                # Getting the index
                indices = [
                    i % num_examples
                    for i in range(batch * batch_size, (batch + 1) *
                                   batch_size)
                ]

                batch_train_inputs = train_inputs[indices]
                # Padding input to max_time_step of this batch
                batch_train_inputs, batch_train_seq_len = pad_sequences(
                    batch_train_inputs)

                # Converting to sparse representation so as to to feed SparseTensor input
                batch_train_targets = sparse_tuple_from(train_labels[indices])

                feed = {
                    inputs: batch_train_inputs,
                    targets: batch_train_targets,
                    seq_len: batch_train_seq_len
                }
                batch_cost, _ = session.run([error, optimizer], feed)
                train_cost += batch_cost * batch_size
                train_ler += session.run(label_error_rate,
                                         feed_dict=feed) * batch_size
                log = "Epoch {}/{}, iter {}, batch_cost {}"
                logging.info(
                    log.format(curr_epoch + 1, num_epochs, batch, batch_cost))

            saver.save(session,
                       os.path.join(ENV.output, 'best.ckpt'),
                       global_step=curr_epoch)

            # Shuffle the data
            shuffled_indexes = np.random.permutation(num_examples)
            train_inputs = train_inputs[shuffled_indexes]
            train_labels = train_labels[shuffled_indexes]

            # Metrics mean
            train_cost /= num_examples
            train_ler /= num_examples

            log = "Epoch {}/{}, train_cost = {:.3f}, train_ler = {:.3f}, time = {:.3f}"
            logging.info(
                log.format(curr_epoch + 1, num_epochs, train_cost, train_ler,
                           time.time() - start))

            #run the test data through
            indices = [
                i % len(test_data)
                for i in range(batch * batch_size, (batch + 1) * batch_size)
            ]
            test_inputs = test_data[indices]
            test_inputs, test_seq_len = pad_sequences(test_inputs)
            test_targets = sparse_tuple_from(test_labels[indices])
            feed_test = {
                inputs: test_inputs,
                targets: test_targets,
                seq_len: test_seq_len
            }
            test_cost, test_ler = session.run([error, label_error_rate],
                                              feed_dict=feed_test)
            log = "Epoch {}/{}, test_cost {}, test_ler {}"
            logging.info(
                log.format(curr_epoch + 1, num_epochs, test_cost, test_ler))

        input_features = [('strokeData', datatypes.Array(num_features))]
        output_features = [('labels', datatypes.Array(num_classes))]

        vars = tf.trainable_variables()
        weights = {'forward': {}, 'backward': {}}
        for _var in vars:
            name = _var.name.encode('utf-8')
            if name.startswith('bidirectional_rnn/fw'):
                key = name.replace('bidirectional_rnn/fw/', '')
                key = key.replace('multi_rnn_cell/cell_0/lstm_cell/', '')
                key = key.replace(':0', '')
                weights['forward'][key] = _var.eval()
            else:
                key = name.replace('bidirectional_rnn/bw/', '')
                key = key.replace('multi_rnn_cell/cell_0/lstm_cell/', '')
                key = key.replace(':0', '')
                weights['backward'][key] = _var.eval()

    builder = NeuralNetworkBuilder(input_features, output_features, mode=None)

    fw_biases = [
        weights['forward']['bias'][0 * num_hidden:1 * num_hidden],
        weights['forward']['bias'][1 * num_hidden:2 * num_hidden],
        weights['forward']['bias'][2 * num_hidden:3 * num_hidden],
        weights['forward']['bias'][3 * num_hidden:4 * num_hidden]
    ]

    bw_biases = [
        weights['backward']['bias'][0 * num_hidden:1 * num_hidden],
        weights['backward']['bias'][1 * num_hidden:2 * num_hidden],
        weights['backward']['bias'][2 * num_hidden:3 * num_hidden],
        weights['backward']['bias'][3 * num_hidden:4 * num_hidden]
    ]

    num_LSTM_gates = 5

    input_weights = {
        'forward': np.zeros((num_LSTM_gates - 1, num_hidden, num_features)),
        'backward': np.zeros((num_LSTM_gates - 1, num_hidden, num_features))
    }

    recurrent_weights = {
        'forward': np.zeros((num_LSTM_gates - 1, num_hidden, num_hidden)),
        'backward': np.zeros((num_LSTM_gates - 1, num_hidden, num_hidden))
    }

    builder.add_bidirlstm(
        name='bidirectional_1',
        W_h=recurrent_weights['forward'],
        W_x=input_weights['forward'],
        b=fw_biases,
        W_h_back=recurrent_weights['backward'],
        W_x_back=input_weights['backward'],
        b_back=bw_biases,
        hidden_size=num_hidden,
        input_size=num_features,
        input_names=[
            'strokeData', 'bidirectional_1_h_in', 'bidirectional_1_c_in',
            'bidirectional_1_h_in_rev', 'bidirectional_1_c_in_rev'
        ],
        output_names=[
            'y', 'bidirectional_1_h_out', 'bidirectional_1_c_out',
            'bidirectional_1_h_out_rev', 'bidirectional_1_c_out_rev'
        ],
        peep=[
            weights['forward']['w_i_diag'], weights['forward']['w_f_diag'],
            weights['forward']['w_o_diag']
        ],
        peep_back=[
            weights['backward']['w_i_diag'], weights['backward']['w_f_diag'],
            weights['backward']['w_o_diag']
        ],
        cell_clip_threshold=clip_thresh)

    builder.add_softmax(name='softmax', input_name='y', output_name='labels')

    optional_inputs = [('bidirectional_1_h_in', num_hidden),
                       ('bidirectional_1_c_in', num_hidden),
                       ('bidirectional_1_h_in_rev', num_hidden),
                       ('bidirectional_1_c_in_rev', num_hidden)]
    optional_outputs = [('bidirectional_1_h_out', num_hidden),
                        ('bidirectional_1_c_out', num_hidden),
                        ('bidirectional_1_h_out_rev', num_hidden),
                        ('bidirectional_1_c_out_rev', num_hidden)]

    #not really sure what this line belowe does, just copied it from the Keras converter in coremltools,
    # and it seemed to make things work
    builder.add_optionals(optional_inputs, optional_outputs)

    model = MLModel(builder.spec)

    model.short_description = 'Model for recognizing a symbols and diagrams drawn on ipad screen with apple pencil'

    model.input_description[
        'strokeData'] = 'A collection of strokes to classify'
    model.output_description[
        'labels'] = 'The "probability" of each label, in a dense array'

    outfile = 'bilstm.mlmodel'
    model.save(outfile)

    print('Saved to file: %s' % outfile)
示例#21
0
def get_next_batch(batch_size=128):
    images = []
    to_images = []
    codes = []
    max_width_image = 0
    for i in range(batch_size):
        font_name = random.choice(AllFontNames)
        font_length = random.randint(25, 30)
        font_size = 36  #random.randint(image_height, 64)
        font_mode = random.choice([0, 1, 2, 4])
        font_hint = random.choice([0, 1, 2, 3, 4, 5])
        text = utils_font.get_random_text(CHARS, eng_world_list, font_length)
        # text = random.sample(CHARS, 12)
        # text = text+text
        # random.shuffle(text)
        # text = "".join(text).strip()
        codes.append([CHARS.index(char) for char in text])
        image = utils_font.get_font_image_from_url(text,
                                                   font_name,
                                                   font_size,
                                                   fontmode=font_mode,
                                                   fonthint=font_hint)
        image = utils_pil.resize_by_height(image, image_height)
        to_image = image.copy()
        image = utils_font.add_noise(image)
        image = utils_pil.convert_to_gray(image)
        _h = random.randint(9, image_height // random.choice([1, 1.5, 2, 2.5]))
        image = utils_pil.resize_by_height(image, _h, random.random() > 0.5)
        image = utils_pil.resize_by_height(image, image_height,
                                           random.random() > 0.5)
        image = np.asarray(image)
        image = utils.resize(image, height=image_height)
        image = (255. - image) / 255.
        images.append(image)

        # to_image = utils_font.get_font_image_from_url(text, font_name ,image_height, fontmode = font_mode, fonthint = font_hint)
        to_image = utils_pil.convert_to_gray(to_image)
        to_image = np.asarray(to_image)
        to_image = utils.resize(to_image, height=image_height)
        to_image = utils.img2bwinv(to_image)
        to_image = to_image / 255.
        to_images.append(to_image)

        if image.shape[1] > max_width_image:
            max_width_image = image.shape[1]
        if to_image.shape[1] > max_width_image:
            max_width_image = to_image.shape[1]

    max_width_image = max_width_image + (POOL_SIZE -
                                         max_width_image % POOL_SIZE)
    inputs = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(images)):
        image_vec = utils.img2vec(images[i],
                                  height=image_height,
                                  width=max_width_image,
                                  flatten=False)
        inputs[i, :] = np.transpose(image_vec)

    targets = np.zeros([batch_size, max_width_image, image_height])
    for i in range(len(to_images)):
        image_vec = utils.img2vec(to_images[i],
                                  height=image_height,
                                  width=max_width_image,
                                  flatten=False)
        targets[i, :] = np.transpose(image_vec)

    labels = [np.asarray(i) for i in codes]
    sparse_labels = utils.sparse_tuple_from(labels)
    seq_len = np.ones(batch_size) * (max_width_image *
                                     image_height) // (POOL_SIZE * POOL_SIZE)
    return inputs, targets, sparse_labels, seq_len
示例#22
0
        targets[i] = targets[i].split(' ')
        #np.append(Targets,targets)

        # Adding blank label
        targets[i] = np.hstack(
            [SPACE_TOKEN if x == '' else list(x) for x in targets[i]])

        # Transform char into index
        targets[i] = np.asarray([
            SPACE_INDEX if x == SPACE_TOKEN else ord(x) - FIRST_INDEX
            for x in targets[i]
        ])

        # Creating sparse representation to feed the placeholder

        train_targets[i] = sparse_tuple_from([targets[i]])

#train_inputs = np.concatenate(tuple(train_inputs.values()),axis=1)
#print (len(train_inputs))

#Targets=np.zeros((0,2))
# Readings targets

#targets_list = []
#np.asarray(train_targets[np.newaxis, :])
#targets_list.append(train_targets)
#targets_list.append(train_targets2)
#print (targets_list)

#train_targets2 = np.asarray(train_targets2[np.newaxis, :])
#train_targets = np.concatenate((train_targets,train_targets2))
示例#23
0
    if continue_training else True
]
# model.build(input_shape=(2, 32, 200, 1))
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001, clipnorm=5)
loss_hist = []
# [print(i.name, i.shape) for i in model.trainable_variables]

# training
# dataset: https://www.robots.ox.ac.uk/~vgg/data/text/#sec-synth
# please check the data_generator in utils for path to the dataset
# the training set containts 7224612 images / 32 = 225769 batches
for x_batch, y_batch in data_generator(batches=112884,
                                       batch_size=64,
                                       epochs=10):

    indices, values, dense_shape = sparse_tuple_from(y_batch)
    y_batch_sparse = tf.sparse.SparseTensor(indices=indices,
                                            values=values,
                                            dense_shape=dense_shape)

    with tf.GradientTape() as tape:
        logits, raw_pred, rnn_out = model(x_batch)
        loss = tf.reduce_mean(
            tf.nn.ctc_loss(labels=y_batch_sparse,
                           logits=rnn_out,
                           label_length=[len(i) for i in y_batch],
                           logit_length=[47] * len(y_batch),
                           blank_index=62))

    grads = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(grads, model.trainable_variables))
示例#24
0
def get_next_batch_for_res(batch_size=128, has_sparse=True, has_onehot=True, \
                            max_width=4096, height=32, need_pad_width_to_max_width=False):
    inputs_images = []
    codes = []
    # 当前这一批图片中的最大宽度
    max_width_image = 0
    info = []
    seq_len = np.ones(batch_size)

    for i in range(batch_size):
        serialized_example = next(dataset, None)
        if serialized_example == None:
            raise Exception("has finished train one data file, stop")

        dataset_example.ParseFromString(serialized_example)

        font_name = str(
            dataset_example.features.feature['font_name'].bytes_list.value[0],
            encoding="utf-8")
        font_size = dataset_example.features.feature[
            'font_size'].int64_list.value[0]
        font_mode = dataset_example.features.feature[
            'font_mode'].int64_list.value[0]
        font_hint = dataset_example.features.feature[
            'font_hint'].int64_list.value[0]

        text = str(
            dataset_example.features.feature['label'].bytes_list.value[0],
            encoding="utf-8")
        size = dataset_example.features.feature['size'].int64_list.value
        image = dataset_example.features.feature['image'].bytes_list.value[0]
        image = utils_pil.frombytes(tuple(size), image)

        # 图旋转灰度
        image = utils_pil.convert_to_gray(image)
        w, h = size
        if h > height:
            image = utils_pil.resize_by_height(image, height)

        # 随机移动图片位置
        image = utils_pil.resize_by_height(image,
                                           height - random.randint(1, 5))
        image, _ = utils_pil.random_space2(image, image, height)

        # 增加噪点
        image = utils_font.add_noise(image)

        # 转为 opencv 格式
        image = np.asarray(image)
        # 默认按高度缩放,如果宽度超过了最大宽度,就按宽度缩放
        image = utils.resize(image, height, max_width)

        # 随机反色并归一化
        if random.random() > 0.5:
            image = image / 255.
        else:
            image = (255. - image) / 255.

        # 记下当前的最大图片宽度
        if max_width_image < image.shape[1]:
            max_width_image = image.shape[1]

        inputs_images.append(image)
        codes.append([CHARS.index(char) for char in text])
        info.append([
            font_name,
            str(font_size),
            str(font_mode),
            str(font_hint),
            str(len(text))
        ])

    # 凑成4的整数倍
    if max_width_image % 4 > 0:
        max_width_image = max_width_image + 4 - max_width_image % 4

    # 如果图片超过最大宽度,懒得去缩放,直接报异常
    if max_width_image > max_width:
        raise Exception("img width must %s <= %s " %
                        (max_width_image, max_width))

    if need_pad_width_to_max_width:
        max_width_image = max_width

    inputs = np.zeros([batch_size, image_height, max_width_image, 1])
    for i in range(batch_size):
        image_vec = utils.img2vec(inputs_images[i],
                                  height=image_height,
                                  width=max_width_image,
                                  flatten=False)
        inputs[i, :] = np.reshape(image_vec,
                                  (image_height, max_width_image, 1))

    labels = [np.asarray(i) for i in codes]

    sparse_labels = None
    onehot_labels = None
    if has_sparse:
        sparse_labels = utils.sparse_tuple_from(labels)
        sparse_labels = np.array(sparse_labels)
    if has_onehot:
        onehot_labels = []
        for label in labels:
            label_one_hot = np.eye(CLASSES_NUMBER)[label]
            onehot_labels.append(label_one_hot)
        onehot_labels = np.array(onehot_labels)

    return inputs, np.array(labels), sparse_labels, onehot_labels, info
    line = f.readlines()[-1]

    # Get only the words between [a-z] and replace period for none
    original = ' '.join(line.strip().lower().split(' ')[2:]).replace('.', '')
    targets = original.replace(' ', '  ')
    targets = targets.split(' ')

# Adding blank label
targets = np.hstack([SPACE_TOKEN if x == '' else list(x) for x in targets])

# Transform char into index
targets = np.asarray([SPACE_INDEX if x == SPACE_TOKEN else ord(x) - FIRST_INDEX
                      for x in targets])

# Creating sparse representation to feed the placeholder
train_targets = sparse_tuple_from([targets])

# We don't have a validation dataset :(
val_inputs, val_targets, val_seq_len = train_inputs, train_targets, \
                                       train_seq_len


# THE MAIN CODE!

graph = tf.Graph()
with graph.as_default():
    # e.g: log filter bank or MFCC features
    # Has size [batch_size, max_stepsize, num_features], but the
    # batch_size and max_stepsize can vary along each step
    inputs = tf.placeholder(tf.float32, [None, None, num_features])
示例#26
0
            for batch in range(num_batches_per_epoch):

                # Getting the index
                indexes = [
                    i % num_examples
                    for i in range(batch * batch_size, (batch + 1) *
                                   batch_size)
                ]

                batch_train_inputs = train_inputs[indexes]
                # Padding input to max_time_step of this batch
                batch_train_inputs, batch_train_seq_len = utils.pad_sequences(
                    batch_train_inputs)

                # Converting to sparse representation so as to to feed SparseTensor input
                batch_train_targets = utils.sparse_tuple_from(
                    train_targets[indexes])

                feed = {
                    inputs: batch_train_inputs,
                    targets: batch_train_targets,
                    seq_len: batch_train_seq_len
                }

                batch_cost, _ = session.run([cost, optimizer], feed)
                train_cost += batch_cost * batch_size
                train_ler += session.run(ler, feed_dict=feed) * batch_size

            # Shuffle the data
            shuffled_indexes = np.random.permutation(num_examples)
            train_inputs = train_inputs[shuffled_indexes]
            train_targets = train_targets[shuffled_indexes]
示例#27
0
def main():
    ds = dataset(DATA_FOLDER, BATCH_SIZE)

    global_step = tf.Variable(0, trainable=False)
    learning_rate = tf.train.exponential_decay(INITIAL_LEARNING_RATE,
                                                global_step,
                                                DECAY_STEPS,
                                                LEARNING_RATE_DECAY_FACTOR,
                                                staircase=True)

    outputs, inputs, targets, seq_len = get_model()

    loss = tf.nn.ctc_loss(labels=targets, inputs=outputs, sequence_length=seq_len)
    cost = tf.reduce_mean(loss)
    # optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=MOMENTUM).minimize(cost, global_step=global_step)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss,global_step=global_step)

    decoded, _ = tf.nn.ctc_beam_search_decoder(outputs, seq_len, merge_repeated=False)
    e_dis = tf.reduce_mean(tf.edit_distance(tf.cast(decoded[0], tf.int32), targets))

    init = tf.global_variables_initializer()

    def do_report():
        test_inputs, test_labels, _ = ds.next_batch()
        test_targets = sparse_tuple_from(test_labels)
        test_feed = {inputs: test_inputs, targets: test_targets, seq_len: [MAX_TIMESTEPS for _ in range(len(test_inputs))]}
        dd = session.run(decoded[0], test_feed)
        report_accuracy(dd, test_targets)

    with tf.Session() as session:
        session.run(init)
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=100)
        for curr_epoch in range(NUM_EPOCHES):
            print("Epoch.......", curr_epoch)
            train_cost = 0
            new_epoch = False
            train_size = 0
            while not new_epoch:
                train_inputs, train_labels, new_epoch = ds.next_batch()
                train_targets = sparse_tuple_from(train_labels)
                feed = {inputs: train_inputs, targets: train_targets, seq_len: [MAX_TIMESTEPS for _ in range(len(train_inputs))]}
                c, steps, _ = session.run([cost, global_step, optimizer], feed)

                train_cost += c * BATCH_SIZE
                print("Step: %d, Loss: %.5f" % (steps, c))

                train_size += BATCH_SIZE

            if (curr_epoch + 1) % REPORT_EPOCHES == 0:
                do_report()
                save_path = saver.save(session, "saved_models/model", global_step=steps)
                print('save model on %s' % save_path)

            train_cost /= train_size

            train_inputs, train_labels, _ = ds.next_batch()
            train_targets = sparse_tuple_from(train_labels)
            val_feed = {inputs: train_inputs, targets: train_targets, seq_len: [MAX_TIMESTEPS for _ in range(len(train_inputs))]}
            val_cost, val_edit_dis, lr, steps = session.run([cost, e_dis, learning_rate, global_step], feed_dict=val_feed)

            log = "Epoch {}/{}, steps = {}, train_cost = {:.3f}, val_cost = {:.3f}, val_edit_dis = {:.3f}, learning_rate = {}"
            print(log.format(curr_epoch + 1, NUM_EPOCHES, steps, train_cost, val_cost, val_edit_dis, lr))
示例#28
0
 def do_report():
     test_inputs, test_labels, _ = ds.next_batch()
     test_targets = sparse_tuple_from(test_labels)
     test_feed = {inputs: test_inputs, targets: test_targets, seq_len: [MAX_TIMESTEPS for _ in range(len(test_inputs))]}
     dd = session.run(decoded[0], test_feed)
     report_accuracy(dd, test_targets)
    def train(self, session):

        inputs = tf.placeholder(
            tf.float32,
            [self.batch_size, self.num_features, None, self.dataset.im_depth])

        targets = tf.sparse_placeholder(tf.int32)

        seq_len = tf.placeholder(tf.int32, [None])

        logits = self.model(inputs, seq_len)

        loss = tf.nn.ctc_loss(targets, logits, seq_len)

        cost = tf.reduce_mean(loss)

        global_step = tf.Variable(0, trainable=False)

        learning_rate = tf.train.exponential_decay(self.initial_learning_rate,
                                                   global_step,
                                                   8000,
                                                   0.98,
                                                   staircase=True)

        optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize(
            cost, global_step=global_step)

        # Option 2: tf.nn.ctc_beam_search_decoder
        # (it's slower but you'll get better results)
        decoded, log_prob = tf.nn.ctc_greedy_decoder(logits, seq_len)

        # Inaccuracy: label error rate
        ler = tf.reduce_mean(
            tf.edit_distance(tf.cast(decoded[0], tf.int32), targets))

        tf.global_variables_initializer().run(session=sess)

        saver = tf.train.Saver(tf.global_variables())

        if not os.path.exists(self.checkpoint_path):
            os.mkdir(self.checkpoint_path)

        ckpt = tf.train.get_checkpoint_state(self.checkpoint_path)

        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(session, ckpt.model_checkpoint_path)
            print("Model restored.")

        else:
            print("No checkpoint found, start training from beginning.")

        for curr_epoch in range(self.num_epochs):
            train_cost = train_ler = 0
            start = time.time()

            X, Y = self.dataset.get_batch()

            for batch in range(self.num_batches_per_epoch):

                train_seq_len = [x.shape[1] for x in X]

                print("EPOCH", curr_epoch, "PROGRESS",
                      self.dataset.index_in_epoch, self.dataset.total_examples)

                train_targets = sparse_tuple_from(Y)

                feed = {
                    inputs: X,
                    targets: train_targets,
                    seq_len: train_seq_len
                }

                batch_cost, _ = session.run([cost, optimizer], feed)

                train_cost += batch_cost * self.batch_size
                train_ler += session.run(ler, feed_dict=feed) * self.batch_size

                #VERBOSE
                if batch % 2 == 0:
                    decod = session.run(decoded, feed)

                    for j in range(self.batch_size):
                        # print("Y:", j, iam_train.id_to_char(Y[j]))
                        print("DECODED BATCH OUTPUT:",
                              self.dataset.id_to_char(decod[0][1]))
示例#30
0
audio_filename = 'wav/2_001002.wav'  #maybe_download('LDC93S1.wav', 93638)
target_filename = 'wav/001002.txt'  #maybe_download('LDC93S1.txt', 62)

inputs = utils.wav_mfcc(audio_filename)

# Tranform in 3D array
train_inputs = np.asarray(inputs[np.newaxis, :])
train_inputs = (train_inputs - np.mean(train_inputs)) / np.std(train_inputs)
train_seq_len = [train_inputs.shape[1]]

# Readings targets
targets, original = utils.encode_target_file(target_filename)

# Creating sparse representation to feed the placeholder
train_targets = utils.sparse_tuple_from([targets])

# We don't have a validation dataset :(
val_inputs, val_targets, val_seq_len = train_inputs, train_targets, train_seq_len

# THE MAIN CODE!

graph = tf.Graph()
with graph.as_default():
    # e.g: log filter bank or MFCC features
    # Has size [batch_size, max_stepsize, num_features], but the
    # batch_size and max_stepsize can vary along each step
    inputs = tf.placeholder(tf.float32, [None, None, num_features])

    # Here we use sparse_placeholder that will generate a
    # SparseTensor required by ctc_loss op.
    for curr_epoch in range(num_epochs):
        train_cost = train_ler = 0
        start = time.time()

        for batch in range(num_batches_per_epoch):

            # Getting the index
            indexes = [i % num_examples for i in range(batch * batch_size, (batch + 1) * batch_size)]

            batch_train_inputs = train_inputs[indexes]
            # Padding input to max_time_step of this batch
            batch_train_inputs, batch_train_seq_len = pad_sequences(batch_train_inputs)

            # Converting to sparse representation so as to to feed SparseTensor input
            batch_train_targets = sparse_tuple_from(train_targets[indexes])

            feed = {inputs: batch_train_inputs,
                    targets: batch_train_targets,
                    seq_len: batch_train_seq_len}

            batch_cost, _ = session.run([cost, optimizer], feed)
            train_cost += batch_cost*batch_size
            train_ler += session.run(ler, feed_dict=feed)*batch_size


        # Shuffle the data
        shuffled_indexes = np.random.permutation(num_examples)
        train_inputs = train_inputs[shuffled_indexes]
        train_targets = train_targets[shuffled_indexes]
示例#32
0
        start = time.time()
        badcase = 0
        for batch in range(num_batches_per_epoch):

            #Getting the index
            indexes = [
                i % num_examples
                for i in range(batch * batch_size, (batch + 1) * batch_size)
            ]
            #print "indexes",indexes
            batch_train_inputs = train_inputs[indexes]
            # Padding input to max_time_step of this batch
            batch_train_inputs, batch_train_seq_len = pad_sequences(
                batch_train_inputs)
            # Converting to sparse representation so as to to feed SparseTensor input
            batch_train_targets = sparse_tuple_from(train_targets[indexes])

            feed = {
                inputs: batch_train_inputs,
                targets: batch_train_targets,
                seq_len: batch_train_seq_len,
                keep_prob: 0.5,
                istrain: True
            }

            batch_cost, _ = session.run([total_loss, train_op], feed)
            train_cost += batch_cost * batch_size
            #train_ler += session.run(ler, feed_dict=feed)*batch_size

        #for test
        for batch in range(num_batches_per_epoch_for_test):
def get_next_batch_for_res(batch_size=128):
    inputs_images = []   
    codes = []
    max_width_image = 0
    info = []
    seq_len = np.ones(batch_size)

    for i in range(batch_size):
        serialized_example = next(dataset, None)
        if serialized_example==None:
            raise Exception("has finished train one data file, stop")

        dataset_example.ParseFromString(serialized_example)

        font_name = str(dataset_example.features.feature['font_name'].bytes_list.value[0],  encoding="utf-8")
        font_size = dataset_example.features.feature['font_size'].int64_list.value[0]
        font_mode = dataset_example.features.feature['font_mode'].int64_list.value[0]
        font_hint = dataset_example.features.feature['font_mode'].int64_list.value[0]

        text = str(dataset_example.features.feature['label'].bytes_list.value[0],  encoding="utf-8")
        size = dataset_example.features.feature['size'].int64_list.value
        image = dataset_example.features.feature['image'].bytes_list.value[0]
        image = utils_pil.frombytes(tuple(size), image)

        image = utils_pil.convert_to_gray(image) 
        w, h = size
        if h > image_height:
            image = utils_pil.resize_by_height(image, image_height)  

        image = utils_pil.resize_by_height(image, image_height-random.randint(1,5))
        image, _ = utils_pil.random_space2(image, image,  image_height)
        
        image = utils_font.add_noise(image)   
        image = np.asarray(image) 

        image = utils.resize(image, image_height, MAX_IMAGE_WIDTH)

        if random.random()>0.5:
            image = image / 255.
        else:
            image = (255. - image) / 255.

        if max_width_image < image.shape[1]:
            max_width_image = image.shape[1]
          
        inputs_images.append(image)
        codes.append([CHARS.index(char) for char in text])                  

        info.append([font_name, str(font_size), str(font_mode), str(font_hint), str(len(text))])
        seq_len[i]=len(text)+1

    # 凑成4的整数倍
    # if max_width_image % 4 > 0:
    #     max_width_image = max_width_image + 4 - max_width_image % 4

    # 如果图片超过最大宽度
    if max_width_image < MAX_IMAGE_WIDTH:
        max_width_image = MAX_IMAGE_WIDTH
        # raise Exception("img width must %s <= %s " % (max_width_image, MAX_IMAGE_WIDTH))

    inputs = np.zeros([batch_size, image_height, max_width_image, 1])
    for i in range(batch_size):
        image_vec = utils.img2vec(inputs_images[i], height=image_height, width=max_width_image, flatten=False)
        inputs[i,:] = np.reshape(image_vec,(image_height, max_width_image, 1))
     
    # print(inputs.shape, len(codes))
    labels = [np.asarray(i) for i in codes]
    sparse_labels = utils.sparse_tuple_from(labels)

    # max_width_image = math.ceil((max_width_image-3+1.)/2.)
    # max_width_image = math.ceil((max_width_image-3+1.)/1.)
    # max_width_image = math.ceil((max_width_image-3+1.)/2.)
    # max_width_image = math.ceil((max_width_image-3+1.)/1.)
    # max_width_image = math.ceil((max_width_image-3+1.)/2.)

    seq_len = np.ones(batch_size) * SEQ_LENGTH
    # print(inputs.shape, seq_len.shape, [len(l) for l in labels])
    return inputs, sparse_labels, seq_len, info
示例#34
0
                            format(file))
                        label_name = file.split('-')[0]
                        # Loading the transcription .npy file for the training example
                        label = np.load(
                            'data/speech_commands_processed_reduced/transcriptions/{}.npy'
                            .format(label_name))
                        filenames.append(file)
                        # Appending the audio and transcription to the batch arrays
                        batch_train_audio.append(audio)
                        batch_train_labels.append(label)

                # Padding sequences so they are all with equal length --> new shape (max_data, max_lengt, n_features)
                batch_train_audio = np.asarray(utils.pad_sequences(
                    batch_train_audio, hparams.input_max_len),
                                               dtype=np.float32)
                batch_train_labels = utils.sparse_tuple_from(
                    np.asarray(batch_train_labels))

                # Run the training method from the model class. Returns the cost value and the summary.
                cost, _, summary = train_model.train(batch_train_audio,
                                                     batch_train_labels,
                                                     train_sess)

                # Updating the global step
                global_step += batch_size

                # Adding summary to the training logs
                training_logger.add_summary(summary, global_step=global_step)

                # Calculating time for the console output
                tot = time.time() - start_time
                h = int(tot / 3600)
示例#35
0
def run_model(x_train, y_train, x_val, y_val, num_features, num_train_examples,
              num_val_examples, num_epochs, batch_size, num_batches_per_epoch,
              learning_rate, momentum, num_layers, num_hidden, num_classes):

    graph = tf.Graph()
    with graph.as_default():
        x = tf.placeholder(tf.float32, [None, None, num_features],
                           name=vocab.x)
        y = tf.sparse_placeholder(tf.int32, name=vocab.y)
        seq_len = tf.placeholder(tf.int32, [None], name=vocab.seq_len)

        W = tf.Variable(tf.truncated_normal([num_hidden, num_classes],
                                            stddev=0.1),
                        name=vocab.W)
        b = tf.Variable(tf.constant(0., shape=[num_classes]), name=vocab.b)

        stack = model(num_layers, num_hidden)

        logits = inference(x, seq_len, W, b, stack, num_hidden, num_classes)

        loss_ = loss(y, logits, seq_len)

        cost_ = cost(loss_)

        optimizer = optimize(learning_rate, momentum, cost_)

        decoded, log_prob = decode(logits, seq_len)

        ler = label_error_rate(decoded=decoded[0], y=y)

        saver_early_stopping = tf.train.Saver(max_to_keep=0)

        #summaries
        #cost, ler for train set
        tf.summary.scalar("training_cost", cost_)
        tf.summary.scalar("training_label_error_rate", ler)
        summary_ops_train = tf.summary.merge_all()

        #cost, ler for val set
        tf.summary.scalar("validation_cost", cost_)
        tf.summary.scalar("validation_label_error_rate", ler)
        summary_ops_validation = tf.summary.merge_all()

    with tf.Session(graph=graph) as session:
        init = tf.global_variables_initializer()
        session.run(init)

        now = datetime.utcnow().strftime("%Y%m%d%H%M%S")
        path_model_hyperparams = "model%s-num_layers=%d-num_hidden=%d-num_epochs=%d-batch_size=%d-learning_rate=%s" \
                                % (str(now), num_layers, num_hidden, num_epochs, batch_size, str(learning_rate))

        writer_train = tf.summary.FileWriter(
            './tensorboard_graphs/' + path_model_hyperparams + '/train',
            session.graph)
        writer_validation = tf.summary.FileWriter('./tensorboard_graphs/' +
                                                  path_model_hyperparams +
                                                  '/validation')

        shuffled_indexes = np.random.permutation(num_train_examples)
        x_train = x_train[shuffled_indexes]
        y_train = y_train[shuffled_indexes]

        best_validation_accuracy = 0.0
        last_improvement = 0
        require_improvement = 100
        total_epochs = 0

        for curr_epoch in range(num_epochs):
            train_cost = train_ler = 0
            start = time.time()
            total_epochs += 1

            for batch in range(num_batches_per_epoch):

                indexes = [
                    i % num_train_examples
                    for i in range(batch * batch_size, (batch + 1) *
                                   batch_size)
                ]

                batch_x_train = x_train[indexes]
                batch_x_train, batch_x_train_seq_len = utils.pad_sequences(
                    batch_x_train)

                batch_y_train = utils.sparse_tuple_from(y_train[indexes])

                feed = {
                    x: batch_x_train,
                    y: batch_y_train,
                    seq_len: batch_x_train_seq_len
                }

                batch_cost, _ = session.run([cost_, optimizer], feed)
                train_cost += batch_cost * batch_size
                train_ler += session.run(ler, feed_dict=feed) * batch_size
                summary_train = session.run(summary_ops_train, feed_dict=feed)

            train_cost /= num_train_examples
            train_ler /= num_train_examples
            #train_cost_all = train_cost
            #train_ler_all = train_ler

            writer_train.add_summary(summary_train, global_step=curr_epoch)

            val_indexes = [i for i in range(num_val_examples)]
            x_validation, x_val_seq_len = utils.pad_sequences(
                x_val[val_indexes])
            y_validation = utils.sparse_tuple_from(y_val[val_indexes])

            val_feed = {
                x: x_validation,
                y: y_validation,
                seq_len: x_val_seq_len
            }

            val_cost, val_ler = session.run([cost_, ler], feed_dict=val_feed)
            summary_validation = session.run(summary_ops_validation,
                                             feed_dict=val_feed)
            writer_validation.add_summary(summary_validation,
                                          global_step=curr_epoch)

            if (total_epochs % 10 == 0) or (curr_epoch == (num_epochs - 1)):
                if val_ler > best_validation_accuracy:
                    best_validation_accuracy = val_ler
                    print(best_validation_accuracy)
                    last_improvement = total_epochs
                    saver_early_stopping.save(sess=session,
                                              save_path='./checkpoints/' +
                                              path_model_hyperparams +
                                              '/best_checkpoints')
                    improved_str = '*'
                else:
                    improved_str = ''

                log = "Epoch: {0:>6}, Train-Epoch Accuracy: {1:>6.1%}, Validation Accuracy: {2:>6.1%} {3}"
                print(
                    log.format(curr_epoch + 1, train_ler, val_ler,
                               improved_str))

            log = "Epoch {}/{}, train_cost = {:.3f}, train_ler = {:.3f}, val_cost = {:.3f}, val_ler = {:.3f}, time = {:.3f}"
            print(
                log.format(curr_epoch + 1, num_epochs, train_cost, train_ler,
                           val_cost, val_ler,
                           time.time() - start))

            if total_epochs - last_improvement > require_improvement:
                print("No improvement found in a while, stopping training.")
                break