def __generate_all_test_batches(self): test_batches = [] while not self.current_test_offset + self.batch_size > self.data_len: old_offset = self.current_test_offset new_offset = self.current_test_offset + self.batch_size self.current_test_offset = new_offset raw_batch_x, raw_batch_y, raw_batch_la = zip(*self.data[old_offset:new_offset]) batch_y = np.reshape( np.array(raw_batch_y), (-1) ) batch_dt = sparse_tuple_from( np.reshape( np.array(raw_batch_la), (-1) ) ) batch_x = np.reshape( np.array(raw_batch_x), (-1, self.max_image_width, 32, 1) ) test_batches.append((batch_y, batch_dt, batch_x)) return test_batches
def test_target_wav_file(self, wav_files, txt_labels): print('读入语音文件: ', wav_files[0]) print('开始识别语音数据......') self.audio_features, self.audio_features_len, text_vector, text_vector_len = utils.get_audio_mfcc_features( None, wav_files, n_input, n_context, self.word_num_map, txt_labels) self.sparse_labels = utils.sparse_tuple_from(text_vector) d, train_ler = self.sess.run([self.decoded[0], self.label_err], feed_dict=self.get_feed_dict(dropout=1.0)) dense_decoded = tf.sparse_tensor_to_dense(d, default_value=-1).eval(session=self.sess) decoded_str = utils.trans_array_to_text_ch(dense_decoded[0], self.words) print('语音原始文本: {}'.format(txt_labels[0])) print('识别出来的文本: {}'.format(decoded_str)) self.sess.close()
def _generate_all_train_batches(self): train_batches = [] k = 0 self.current_train_offset = 0 while not self.current_train_offset + self.batch_size > self.test_offset: old_offset = self.current_train_offset new_offset = self.current_train_offset + self.batch_size self.current_train_offset = new_offset raw_batch_x, raw_batch_y, raw_batch_la = zip(*self.data[old_offset:new_offset]) raw_batch_x = self.__augment_images(raw_batch_x) batch_y = np.reshape( np.array(raw_batch_y), (-1) ) k += 1 if self.test_augment_image and k > 30: break batch_dt = sparse_tuple_from( np.asarray(raw_batch_la, dtype=np.object) ) raw_batch_x = np.swapaxes(raw_batch_x, 1, 2) batch_x = np.reshape( np.array(raw_batch_x), (len(raw_batch_x), self.max_image_width, self.height, 1) ) train_batches.append((batch_y, batch_dt, batch_x)) print("Length of train batches", len(train_batches)) random.shuffle(train_batches) return train_batches
def train(self, iteration_count): with self.__session.as_default(): print('Training') for i in range(iteration_count): iter_loss = 0 for batch_y, batch_sl, batch_x in self.__data_manager.get_next_train_batch( ): data_targets = np.asarray([ label_to_array(lbl, config.CHAR_VECTOR) for lbl in batch_y ]) data_targets = sparse_tuple_from(data_targets) _, loss_value, decoded = self.__session.run( [self.__optimizer, self.__loss, self.__decoded], feed_dict={ self.__inputs: batch_x, self.__seq_len: batch_sl, self.__targets: data_targets }) iter_loss += loss_value print('[{}] Iteration loss: {}'.format(i, iter_loss)) return None
def __generate_all_test_batches(self): test_batches = [] while not self.current_test_offset + self.batch_size > self.data_len: old_offset = self.current_test_offset new_offset = self.current_test_offset + self.batch_size self.current_test_offset = new_offset raw_batch_x, raw_batch_y, raw_batch_la = zip( *self.data[old_offset:new_offset]) batch_y = np.reshape(np.array(raw_batch_y), (-1)) batch_dt = sparse_tuple_from( np.reshape(np.array(raw_batch_la), (-1))) batch_x = np.reshape(np.array(raw_batch_x), (-1, self.max_image_width, 32, 1)) test_batches.append((batch_y, batch_dt, batch_x)) return test_batches
def input_preprocess(): ###audio_filename = maybe_download('LDC93S1.wav', 93638) ###target_filename = maybe_download('LDC93S1.txt', 62) fs, audio = wav.read(audio_filename) inputs = mfcc(audio, samplerate=fs) # Tranform in 3D array train_inputs = np.asarray(inputs[np.newaxis, :]) train_inputs = (train_inputs - np.mean(train_inputs))/np.std(train_inputs) train_seq_len = [train_inputs.shape[1]] num_examples = 1 with open(target_filename, 'r') as f: #Only the last line is necessary line = f.readlines()[-1] ## global original[num_examples] # Get only the words between [a-z] and replace period for none ###original = ' '.join(line.strip().lower().split(' ')[2:]).replace('.', '') targets = original.replace(' ', ' ') targets = targets.split(' ') # Adding blank label targets = np.hstack([Space_Token if x == '' else list(x) for x in targets]) # Transform char into index targets = np.asarray([Space_Index if x == Space_Token else ord(x) - Index_Start for x in targets]) # Creating sparse representation to feed the placeholder train_targets = sparse_tuple_from([targets]) # We don't have a validation dataset :( val_inputs, val_targets, val_seq_len = train_inputs, train_targets, \ train_seq_len return inputs, train_inputs, train_targets, train_seq_len
def batch_generator(self, queue): """Takes a queue and enqueue batches in it """ generator = GeneratorFromDict(language=self.language) while True: batch = [] while len(batch) < self.batch_size: img, lbl = generator.next() batch.append( ( resize_image(np.array(img.convert("L")), self.max_image_width)[ 0 ], lbl, label_to_array(lbl, self.char_vector), ) ) raw_batch_x, raw_batch_y, raw_batch_la = zip(*batch) batch_y = np.reshape(np.array(raw_batch_y), (-1)) batch_dt = sparse_tuple_from( np.reshape(np.array(raw_batch_la), (-1))) raw_batch_x = np.swapaxes(raw_batch_x, 1, 2) raw_batch_x = raw_batch_x / 255.0 batch_x = np.reshape( np.array(raw_batch_x), (len(raw_batch_x), self.max_image_width, 32, 1) ) if queue.qsize() < 20: queue.put((batch_y, batch_dt, batch_x)) else: pass
def train(self, iteration_count): with self.__session.as_default(): print('Training') for i in range(self.step, iteration_count + self.step): iter_loss = 0 for batch_y, batch_sl, batch_x in self.__data_manager.get_next_train_batch( ): data_targets = np.asarray([ label_to_array(lbl, config.CHAR_VECTOR) for lbl in batch_y ]) data_targets = sparse_tuple_from(data_targets) op, decoded, loss_value = self.__session.run( [self.__optimizer, self.__decoded, self.__cost], feed_dict={ self.__inputs: batch_x, self.__seq_len: [self.__max_char_count] * self.__data_manager.batch_size, self.__targets: data_targets }) if i % 10 == 0: for j in range(2): print(batch_y[j]) print(ground_truth_to_word(decoded[j])) iter_loss += loss_value self.__saver.save(self.__session, self.__save_path, global_step=self.step) print('[{}] Iteration loss: {}'.format(self.step, iter_loss)) self.step += 1 return None
def test(self): with self.__session.as_default(): print('Testing') total_error = 0 example_count = 0 for batch_y, batch_sl, batch_x in self.__data_manager.get_next_test_batch( ): data_targets = np.asarray([ label_to_array(lbl, config.CHAR_VECTOR) for lbl in batch_y ]) data_targets = sparse_tuple_from(data_targets) decoded = self.__session.run([self.__decoded], feed_dict={ self.__inputs: batch_x, self.__seq_len: batch_sl }) example_count += len(batch_y) total_error += np.sum( levenshtein(ground_truth_to_word(batch_y), ground_truth_to_word(decoded))) print('Error on test set: {}'.format(total_error, total_error / example_count)) return None
def init_op_test_batches(self): test_batches = [] num_batch = int(np.floor(len(self.test_data) / self.batch_size)) for index in range(num_batch): raw_batch_x, raw_batch_y, raw_batch_la = zip( *self.test_data[index * self.batch_size:(1 + index) * self.batch_size]) batch_y = np.reshape(np.array(raw_batch_y), (-1)) batch_dt = sparse_tuple_from(np.array(raw_batch_la)) # batch_dt = sparse_tuple_from( # np.reshape( # np.array(raw_batch_la), # (-1) # ) # ) raw_batch_x = np.swapaxes(raw_batch_x, 1, 2) batch_x = np.reshape( np.array(raw_batch_x), (len(raw_batch_x), self.max_image_width, 32, 1)) test_batches.append((batch_y, batch_dt, batch_x)) self.test_batches = test_batches
def train(self): with self.sess.as_default(): # log file writer log_writer = tf.summary.FileWriter(self.log_path, self.sess.graph) for i in xrange( self.epoches): # use xrange to reduce the cost of memory iteration_loss = 0 batch_x, batch_y, batch_length = self.data.get_next_train_batch( self.batch_size) data_targets, _, _ = sparse_tuple_from(batch_y) batch_length = np.array(batch_length) print len(batch_x), data_targets.shape, batch_length.shape _, loss_val, predict_str, summary = self.sess.run( [self.optimizer, self.losses, self.decoded, self.summary], feed_dict={ self.inputs: batch_x, self.targets: data_targets, self.seq_len: batch_length }) iteration_loss += loss_val log_writer.add_summary(summary, i) print "Iteration {} : loss: {}".format(i, iteration_loss) return None
def main(): ds = dataset(DATA_FOLDER, 1) global_step = tf.Variable(0, trainable=False) outputs, inputs, _, seq_len = get_model() decoded, _ = tf.nn.ctc_beam_search_decoder(outputs, seq_len, merge_repeated=False) with tf.Session() as sess: saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) saver.restore(sess, 'trained_model/model') test_input, test_label = ds.data, ds.labels test_targets = sparse_tuple_from(test_label) feed = { inputs: test_input, seq_len: [MAX_TIMESTEPS for _ in range(len(test_input))] } dd = sess.run(decoded[0], feed_dict=feed) report_accuracy(dd, test_targets)
def get_next_batch_for_res_train(batch_size=128): images = [] codes = [] max_width_image = 0 info = "" for i in range(batch_size): font_name = random.choice(AllFontNames) font_length = random.randint(25, 30) font_size = 36 font_mode = random.choice([0,1,2,4]) font_hint = random.choice([0,1,2,3,4,5]) #删除了2 text = random.sample(CHARS, 12) text = text+text+[" "," "] random.shuffle(text) text = "".join(text).strip() codes.append([CHARS.index(char) for char in text]) image = utils_font.get_font_image_from_url(text, font_name, font_size, font_mode, font_hint ) image = utils_pil.resize_by_height(image, image_height) image = utils_pil.convert_to_gray(image) image = np.asarray(image) # image = utils.resize(image, height=image_height) # image = utils.img2bwinv(image) image = utils_pil.convert_to_bw(image) images.append((255. - image) / 255.) if image.shape[1] > max_width_image: max_width_image = image.shape[1] info = info+"%s\n\r" % utils_font.get_font_url(text, font_name, font_size, font_mode, font_hint) max_width_image = max_width_image + (POOL_SIZE - max_width_image % POOL_SIZE) inputs = np.zeros([batch_size, max_width_image, image_height]) for i in range(len(images)): image_vec = utils.img2vec(images[i], height=image_height, width=max_width_image, flatten=False) inputs[i,:] = np.transpose(image_vec) labels = [np.asarray(i) for i in codes] sparse_labels = utils.sparse_tuple_from(labels) seq_len = np.ones(batch_size) * (max_width_image * image_height ) // (POOL_SIZE * POOL_SIZE) return inputs, sparse_labels, seq_len, info
def run(self, data, epoch_num, is_pingce, learning_rate=None): data_x, data_y = data # Padding input to max_time_step of this batch batch_train_inputs, batch_train_seq_len = pad_sequences(data_x) # Converting to sparse representation so as to to feed SparseTensor input batch_train_targets = sparse_tuple_from(data_y) #if epoch_num%config.epcho_num_for_test == 0: #get pingce result if is_pingce and epoch_num % 5 == 0: self.get_pingce_result(batch_train_inputs, batch_train_targets, batch_train_seq_len, learning_rate, epoch_num) if self.is_training: #start = time.time() return self.sess.run( [ self.total_loss, self.total_ler, self.global_step, self.train_op ], feed_dict={ self.x: batch_train_inputs, self.y: batch_train_targets, self.learning_rate: learning_rate, self.seq_len: batch_train_seq_len }) else: return self.sess.run( [self.global_loss_update, self.global_ler_update], feed_dict={ self.x: batch_train_inputs, self.y: batch_train_targets, self.learning_rate: learning_rate, self.seq_len: batch_train_seq_len })
def next_batch(bs=batch_size, train=True): x_batch = [] y_batch = [] seq_len_batch = [] original_batch = [] i = 0 for k in range(bs): ut_length_dict = dict([(k, len(v['target'])) for (k, v) in audio.cache.items()]) utterances = sorted(ut_length_dict.items(), key=operator.itemgetter(1)) test_index = 346 if train: utterances = [a[0] for a in utterances[test_index:]] else: utterances = [a[0] for a in utterances[:test_index]] training_element = audio.cache[utterances[i]] target_text = training_element['target'] audio_buffer = training_element['audio'] x, y, seq_len, original = convert_inputs_to_ctc_format( audio_buffer, sample_rate, 'whatever', num_features) x_batch.append(x) y_batch.append(y) seq_len_batch.append(seq_len) original_batch.append(original) i += 1 y_batch = sparse_tuple_from(y_batch) seq_len_batch = np.array(seq_len_batch)[:, 0] for i, pad in enumerate(np.max(seq_len_batch) - seq_len_batch): x_batch[i] = np.pad(x_batch[i], ((0, 0), (0, pad), (0, 0)), mode='constant', constant_values=0) x_batch = np.concatenate(x_batch, axis=0) return x_batch, y_batch, seq_len_batch, original_batch
def evaluate_cost(self, X): NN = (X.lengths).shape[0] N = (X.images).shape[0] avg_cost = 0.0 start = 0 total = 0 total_batch = int(math.ceil(1.0 * NN / self.bsize)) for batchidx in range(total_batch): batch_x, labels, tmplen, mysize = X.next_batch(self.bsize, start) # Need to convert labels to targets test_targets = sparse_tuple_from(labels) error, A = self.sess.run( [self.ler, self.decoded[0]], feed_dict={ self.x: batch_x, self.targets: test_targets, self.mylen: tmplen, self.keepprob: 1.0 }) print A.values print test_targets[1] avg_cost += error start += self.bsize return avg_cost / NN
for curr_epoch in range(num_epochs): train_cost = train_ler = 0 start = time.time() for batch in range(num_batches_per_epoch): # Getting the index indexes = [i % num_examples for i in range(batch * batch_size, (batch + 1) * batch_size)] batch_train_inputs = train_inputs[indexes] # Padding input to max_time_step of this batch batch_train_inputs, batch_train_seq_len = pad_sequences(batch_train_inputs) # Converting to sparse representation so as to to feed SparseTensor input batch_train_targets = sparse_tuple_from(train_targets[indexes]) feed = {inputs: batch_train_inputs, targets: batch_train_targets, seq_len: batch_train_seq_len} batch_cost, _ = session.run([cost, optimizer], feed) train_cost += batch_cost*batch_size train_ler += session.run(ler, feed_dict=feed)*batch_size # Shuffle the data shuffled_indexes = np.random.permutation(num_examples) train_inputs = train_inputs[shuffled_indexes] train_targets = train_targets[shuffled_indexes]
# tf.initialize_all_variables().run() tf.global_variables_initializer().run() for curr_epoch in range(num_epochs): train_cost = train_ler = 0 start = time.time() for i, batch in enumerate(datagen.iterate_train(mb_size, shuffle=False, sort_by_duration=True)): train_inputs = batch['x'] train_targets = batch['y'] train_texts = batch['texts'] train_seq_len = batch['input_lengths'] #batch_train_inputs, batch_train_seq_len = pad_sequences(train_inputs) batch_train_targets = sparse_tuple_from(train_targets) print("Epoch {}/{}, batch number {} ".format(curr_epoch+1, num_epochs, i)) feed = {inputs: train_inputs, targets: batch_train_targets, seq_len: train_seq_len} batch_cost, _ = session.run([cost, optimizer], feed) train_cost += batch_cost*batch_size train_ler += session.run(ler, feed_dict=feed)*batch_size train_cost /= num_examples train_ler /= num_examples val_inputs, val_targets, val_seq_len = train_inputs, batch_train_targets, train_seq_len
targets = targets.split(' ') #print("{}".format(targets)) # Adding blank label targets = np.hstack([SPACE_TOKEN if x == '' else list(x) for x in targets]) #print("{}".format(targets)) # Transform char into index targets = np.asarray([ SPACE_INDEX if x == SPACE_TOKEN else COLLON_INDEX if x == COLLON_TOKEN else ord(x) - FIRST_INDEX for x in targets ]) #print("{}".format(targets)) # Creating sparse representation to feed the placeholder train_targets = sparse_tuple_from([targets]) #print("{}".format(train_targets)) # We don't have a validation dataset :( val_inputs, val_targets, val_seq_len = train_inputs, train_targets, \ train_seq_len # THE MAIN CODE! graph = tf.Graph() with graph.as_default(): # e.g: log filter bank or MFCC features # Has size [batch_size, max_stepsize, num_features], but the # batch_size and max_stepsize can vary along each step inputs = tf.placeholder(tf.float32, [None, None, num_features]) # Here we use sparse_placeholder that will generate a
def train_model(ENV, in_file, op_file): graph = tf.Graph() with graph.as_default(): stacked_layers = {} # e.g: log filter bank or MFCC features # Has size [batch_size, max_stepsize, num_features], but the # batch_size and max_stepsize can vary along each step inputs = tf.placeholder(tf.float32, [None, None, num_features]) targets = tf.sparse_placeholder(tf.int32) # 1d array of size [batch_size] seq_len = tf.placeholder(tf.int32, [None]) # Weights & biases weight_classes = tf.Variable( tf.truncated_normal([num_hidden, num_classes], mean=0, stddev=0.1, dtype=tf.float32)) bias_classes = tf.Variable(tf.zeros([num_classes]), dtype=tf.float32) #_activation = tf.nn.relu#this was causing the model to diverge _activation = None layers = {'forward': [], 'backward': []} for key in layers.keys(): for i in range(num_layers): cell = tf.nn.rnn_cell.LSTMCell(num_hidden, use_peepholes=True, activation=_activation, state_is_tuple=True, cell_clip=clip_thresh) # #cell = RWACell(num_units=num_hidden) layers[key].append(cell) stacked_layers[key] = tf.nn.rnn_cell.MultiRNNCell( layers[key], state_is_tuple=True) outputs, bilstm_vars = tf.nn.bidirectional_dynamic_rnn( stacked_layers['forward'], stacked_layers['backward'], inputs, sequence_length=seq_len, time_major=False, # [batch_size, max_time, num_hidden] dtype=tf.float32) """ outputs_concate = tf.concat_v2(outputs, 2) outputs_concate = tf.reshape(outputs_concate, [-1, 2*num_hidden]) # logits = tf.matmul(outputs_concate, weight_classes) + bias_classes """ fw_output = tf.reshape(outputs[0], [-1, num_hidden]) bw_output = tf.reshape(outputs[1], [-1, num_hidden]) logits = tf.add( tf.add(tf.matmul(fw_output, weight_classes), tf.matmul(bw_output, weight_classes)), bias_classes) logits = tf.reshape(logits, [batch_size, -1, num_classes]) loss = tf.nn.ctc_loss(targets, logits, seq_len, time_major=False) error = tf.reduce_mean(loss) optimizer = tf.train.MomentumOptimizer(learning_rate, momentum).minimize(error) # Evaluating # decoded, log_prob = ctc_ops.ctc_greedy_decoder(tf.transpose(logits, perm=[1, 0, 2]), seq_len) decoded, log_prob = tf.nn.ctc_beam_search_decoder( tf.transpose(logits, perm=[1, 0, 2]), seq_len) label_error_rate = tf.reduce_mean( tf.edit_distance(tf.cast(decoded[0], tf.int32), targets)) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) data, labels = load_ipad_data(in_file) bound = ((3 * len(data) / batch_size) / 4) * batch_size train_inputs = data[0:bound] train_labels = labels[0:bound] test_data = data[bound:] test_labels = labels[bound:] num_examples = len(train_inputs) num_batches_per_epoch = num_examples / batch_size with tf.Session(graph=graph, config=tf.ConfigProto(gpu_options=gpu_options)) as session: # Initializate the weights and biases tf.global_variables_initializer().run() saver = tf.train.Saver(tf.global_variables(), max_to_keep=0) ckpt = tf.train.get_checkpoint_state(op_file) if ckpt: logging.info('load', ckpt.model_checkpoint_path) saver.restore(session, ckpt.model_checkpoint_path) else: logging.info("no previous session to load") for curr_epoch in range(num_epochs): train_cost = train_ler = 0 start = time.time() for batch in range(num_batches_per_epoch): # Getting the index indices = [ i % num_examples for i in range(batch * batch_size, (batch + 1) * batch_size) ] batch_train_inputs = train_inputs[indices] # Padding input to max_time_step of this batch batch_train_inputs, batch_train_seq_len = pad_sequences( batch_train_inputs) # Converting to sparse representation so as to to feed SparseTensor input batch_train_targets = sparse_tuple_from(train_labels[indices]) feed = { inputs: batch_train_inputs, targets: batch_train_targets, seq_len: batch_train_seq_len } batch_cost, _ = session.run([error, optimizer], feed) train_cost += batch_cost * batch_size train_ler += session.run(label_error_rate, feed_dict=feed) * batch_size log = "Epoch {}/{}, iter {}, batch_cost {}" logging.info( log.format(curr_epoch + 1, num_epochs, batch, batch_cost)) saver.save(session, os.path.join(ENV.output, 'best.ckpt'), global_step=curr_epoch) # Shuffle the data shuffled_indexes = np.random.permutation(num_examples) train_inputs = train_inputs[shuffled_indexes] train_labels = train_labels[shuffled_indexes] # Metrics mean train_cost /= num_examples train_ler /= num_examples log = "Epoch {}/{}, train_cost = {:.3f}, train_ler = {:.3f}, time = {:.3f}" logging.info( log.format(curr_epoch + 1, num_epochs, train_cost, train_ler, time.time() - start)) #run the test data through indices = [ i % len(test_data) for i in range(batch * batch_size, (batch + 1) * batch_size) ] test_inputs = test_data[indices] test_inputs, test_seq_len = pad_sequences(test_inputs) test_targets = sparse_tuple_from(test_labels[indices]) feed_test = { inputs: test_inputs, targets: test_targets, seq_len: test_seq_len } test_cost, test_ler = session.run([error, label_error_rate], feed_dict=feed_test) log = "Epoch {}/{}, test_cost {}, test_ler {}" logging.info( log.format(curr_epoch + 1, num_epochs, test_cost, test_ler)) input_features = [('strokeData', datatypes.Array(num_features))] output_features = [('labels', datatypes.Array(num_classes))] vars = tf.trainable_variables() weights = {'forward': {}, 'backward': {}} for _var in vars: name = _var.name.encode('utf-8') if name.startswith('bidirectional_rnn/fw'): key = name.replace('bidirectional_rnn/fw/', '') key = key.replace('multi_rnn_cell/cell_0/lstm_cell/', '') key = key.replace(':0', '') weights['forward'][key] = _var.eval() else: key = name.replace('bidirectional_rnn/bw/', '') key = key.replace('multi_rnn_cell/cell_0/lstm_cell/', '') key = key.replace(':0', '') weights['backward'][key] = _var.eval() builder = NeuralNetworkBuilder(input_features, output_features, mode=None) fw_biases = [ weights['forward']['bias'][0 * num_hidden:1 * num_hidden], weights['forward']['bias'][1 * num_hidden:2 * num_hidden], weights['forward']['bias'][2 * num_hidden:3 * num_hidden], weights['forward']['bias'][3 * num_hidden:4 * num_hidden] ] bw_biases = [ weights['backward']['bias'][0 * num_hidden:1 * num_hidden], weights['backward']['bias'][1 * num_hidden:2 * num_hidden], weights['backward']['bias'][2 * num_hidden:3 * num_hidden], weights['backward']['bias'][3 * num_hidden:4 * num_hidden] ] num_LSTM_gates = 5 input_weights = { 'forward': np.zeros((num_LSTM_gates - 1, num_hidden, num_features)), 'backward': np.zeros((num_LSTM_gates - 1, num_hidden, num_features)) } recurrent_weights = { 'forward': np.zeros((num_LSTM_gates - 1, num_hidden, num_hidden)), 'backward': np.zeros((num_LSTM_gates - 1, num_hidden, num_hidden)) } builder.add_bidirlstm( name='bidirectional_1', W_h=recurrent_weights['forward'], W_x=input_weights['forward'], b=fw_biases, W_h_back=recurrent_weights['backward'], W_x_back=input_weights['backward'], b_back=bw_biases, hidden_size=num_hidden, input_size=num_features, input_names=[ 'strokeData', 'bidirectional_1_h_in', 'bidirectional_1_c_in', 'bidirectional_1_h_in_rev', 'bidirectional_1_c_in_rev' ], output_names=[ 'y', 'bidirectional_1_h_out', 'bidirectional_1_c_out', 'bidirectional_1_h_out_rev', 'bidirectional_1_c_out_rev' ], peep=[ weights['forward']['w_i_diag'], weights['forward']['w_f_diag'], weights['forward']['w_o_diag'] ], peep_back=[ weights['backward']['w_i_diag'], weights['backward']['w_f_diag'], weights['backward']['w_o_diag'] ], cell_clip_threshold=clip_thresh) builder.add_softmax(name='softmax', input_name='y', output_name='labels') optional_inputs = [('bidirectional_1_h_in', num_hidden), ('bidirectional_1_c_in', num_hidden), ('bidirectional_1_h_in_rev', num_hidden), ('bidirectional_1_c_in_rev', num_hidden)] optional_outputs = [('bidirectional_1_h_out', num_hidden), ('bidirectional_1_c_out', num_hidden), ('bidirectional_1_h_out_rev', num_hidden), ('bidirectional_1_c_out_rev', num_hidden)] #not really sure what this line belowe does, just copied it from the Keras converter in coremltools, # and it seemed to make things work builder.add_optionals(optional_inputs, optional_outputs) model = MLModel(builder.spec) model.short_description = 'Model for recognizing a symbols and diagrams drawn on ipad screen with apple pencil' model.input_description[ 'strokeData'] = 'A collection of strokes to classify' model.output_description[ 'labels'] = 'The "probability" of each label, in a dense array' outfile = 'bilstm.mlmodel' model.save(outfile) print('Saved to file: %s' % outfile)
def get_next_batch(batch_size=128): images = [] to_images = [] codes = [] max_width_image = 0 for i in range(batch_size): font_name = random.choice(AllFontNames) font_length = random.randint(25, 30) font_size = 36 #random.randint(image_height, 64) font_mode = random.choice([0, 1, 2, 4]) font_hint = random.choice([0, 1, 2, 3, 4, 5]) text = utils_font.get_random_text(CHARS, eng_world_list, font_length) # text = random.sample(CHARS, 12) # text = text+text # random.shuffle(text) # text = "".join(text).strip() codes.append([CHARS.index(char) for char in text]) image = utils_font.get_font_image_from_url(text, font_name, font_size, fontmode=font_mode, fonthint=font_hint) image = utils_pil.resize_by_height(image, image_height) to_image = image.copy() image = utils_font.add_noise(image) image = utils_pil.convert_to_gray(image) _h = random.randint(9, image_height // random.choice([1, 1.5, 2, 2.5])) image = utils_pil.resize_by_height(image, _h, random.random() > 0.5) image = utils_pil.resize_by_height(image, image_height, random.random() > 0.5) image = np.asarray(image) image = utils.resize(image, height=image_height) image = (255. - image) / 255. images.append(image) # to_image = utils_font.get_font_image_from_url(text, font_name ,image_height, fontmode = font_mode, fonthint = font_hint) to_image = utils_pil.convert_to_gray(to_image) to_image = np.asarray(to_image) to_image = utils.resize(to_image, height=image_height) to_image = utils.img2bwinv(to_image) to_image = to_image / 255. to_images.append(to_image) if image.shape[1] > max_width_image: max_width_image = image.shape[1] if to_image.shape[1] > max_width_image: max_width_image = to_image.shape[1] max_width_image = max_width_image + (POOL_SIZE - max_width_image % POOL_SIZE) inputs = np.zeros([batch_size, max_width_image, image_height]) for i in range(len(images)): image_vec = utils.img2vec(images[i], height=image_height, width=max_width_image, flatten=False) inputs[i, :] = np.transpose(image_vec) targets = np.zeros([batch_size, max_width_image, image_height]) for i in range(len(to_images)): image_vec = utils.img2vec(to_images[i], height=image_height, width=max_width_image, flatten=False) targets[i, :] = np.transpose(image_vec) labels = [np.asarray(i) for i in codes] sparse_labels = utils.sparse_tuple_from(labels) seq_len = np.ones(batch_size) * (max_width_image * image_height) // (POOL_SIZE * POOL_SIZE) return inputs, targets, sparse_labels, seq_len
targets[i] = targets[i].split(' ') #np.append(Targets,targets) # Adding blank label targets[i] = np.hstack( [SPACE_TOKEN if x == '' else list(x) for x in targets[i]]) # Transform char into index targets[i] = np.asarray([ SPACE_INDEX if x == SPACE_TOKEN else ord(x) - FIRST_INDEX for x in targets[i] ]) # Creating sparse representation to feed the placeholder train_targets[i] = sparse_tuple_from([targets[i]]) #train_inputs = np.concatenate(tuple(train_inputs.values()),axis=1) #print (len(train_inputs)) #Targets=np.zeros((0,2)) # Readings targets #targets_list = [] #np.asarray(train_targets[np.newaxis, :]) #targets_list.append(train_targets) #targets_list.append(train_targets2) #print (targets_list) #train_targets2 = np.asarray(train_targets2[np.newaxis, :]) #train_targets = np.concatenate((train_targets,train_targets2))
if continue_training else True ] # model.build(input_shape=(2, 32, 200, 1)) optimizer = tf.keras.optimizers.Adam(learning_rate=0.0001, clipnorm=5) loss_hist = [] # [print(i.name, i.shape) for i in model.trainable_variables] # training # dataset: https://www.robots.ox.ac.uk/~vgg/data/text/#sec-synth # please check the data_generator in utils for path to the dataset # the training set containts 7224612 images / 32 = 225769 batches for x_batch, y_batch in data_generator(batches=112884, batch_size=64, epochs=10): indices, values, dense_shape = sparse_tuple_from(y_batch) y_batch_sparse = tf.sparse.SparseTensor(indices=indices, values=values, dense_shape=dense_shape) with tf.GradientTape() as tape: logits, raw_pred, rnn_out = model(x_batch) loss = tf.reduce_mean( tf.nn.ctc_loss(labels=y_batch_sparse, logits=rnn_out, label_length=[len(i) for i in y_batch], logit_length=[47] * len(y_batch), blank_index=62)) grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables))
def get_next_batch_for_res(batch_size=128, has_sparse=True, has_onehot=True, \ max_width=4096, height=32, need_pad_width_to_max_width=False): inputs_images = [] codes = [] # 当前这一批图片中的最大宽度 max_width_image = 0 info = [] seq_len = np.ones(batch_size) for i in range(batch_size): serialized_example = next(dataset, None) if serialized_example == None: raise Exception("has finished train one data file, stop") dataset_example.ParseFromString(serialized_example) font_name = str( dataset_example.features.feature['font_name'].bytes_list.value[0], encoding="utf-8") font_size = dataset_example.features.feature[ 'font_size'].int64_list.value[0] font_mode = dataset_example.features.feature[ 'font_mode'].int64_list.value[0] font_hint = dataset_example.features.feature[ 'font_hint'].int64_list.value[0] text = str( dataset_example.features.feature['label'].bytes_list.value[0], encoding="utf-8") size = dataset_example.features.feature['size'].int64_list.value image = dataset_example.features.feature['image'].bytes_list.value[0] image = utils_pil.frombytes(tuple(size), image) # 图旋转灰度 image = utils_pil.convert_to_gray(image) w, h = size if h > height: image = utils_pil.resize_by_height(image, height) # 随机移动图片位置 image = utils_pil.resize_by_height(image, height - random.randint(1, 5)) image, _ = utils_pil.random_space2(image, image, height) # 增加噪点 image = utils_font.add_noise(image) # 转为 opencv 格式 image = np.asarray(image) # 默认按高度缩放,如果宽度超过了最大宽度,就按宽度缩放 image = utils.resize(image, height, max_width) # 随机反色并归一化 if random.random() > 0.5: image = image / 255. else: image = (255. - image) / 255. # 记下当前的最大图片宽度 if max_width_image < image.shape[1]: max_width_image = image.shape[1] inputs_images.append(image) codes.append([CHARS.index(char) for char in text]) info.append([ font_name, str(font_size), str(font_mode), str(font_hint), str(len(text)) ]) # 凑成4的整数倍 if max_width_image % 4 > 0: max_width_image = max_width_image + 4 - max_width_image % 4 # 如果图片超过最大宽度,懒得去缩放,直接报异常 if max_width_image > max_width: raise Exception("img width must %s <= %s " % (max_width_image, max_width)) if need_pad_width_to_max_width: max_width_image = max_width inputs = np.zeros([batch_size, image_height, max_width_image, 1]) for i in range(batch_size): image_vec = utils.img2vec(inputs_images[i], height=image_height, width=max_width_image, flatten=False) inputs[i, :] = np.reshape(image_vec, (image_height, max_width_image, 1)) labels = [np.asarray(i) for i in codes] sparse_labels = None onehot_labels = None if has_sparse: sparse_labels = utils.sparse_tuple_from(labels) sparse_labels = np.array(sparse_labels) if has_onehot: onehot_labels = [] for label in labels: label_one_hot = np.eye(CLASSES_NUMBER)[label] onehot_labels.append(label_one_hot) onehot_labels = np.array(onehot_labels) return inputs, np.array(labels), sparse_labels, onehot_labels, info
line = f.readlines()[-1] # Get only the words between [a-z] and replace period for none original = ' '.join(line.strip().lower().split(' ')[2:]).replace('.', '') targets = original.replace(' ', ' ') targets = targets.split(' ') # Adding blank label targets = np.hstack([SPACE_TOKEN if x == '' else list(x) for x in targets]) # Transform char into index targets = np.asarray([SPACE_INDEX if x == SPACE_TOKEN else ord(x) - FIRST_INDEX for x in targets]) # Creating sparse representation to feed the placeholder train_targets = sparse_tuple_from([targets]) # We don't have a validation dataset :( val_inputs, val_targets, val_seq_len = train_inputs, train_targets, \ train_seq_len # THE MAIN CODE! graph = tf.Graph() with graph.as_default(): # e.g: log filter bank or MFCC features # Has size [batch_size, max_stepsize, num_features], but the # batch_size and max_stepsize can vary along each step inputs = tf.placeholder(tf.float32, [None, None, num_features])
for batch in range(num_batches_per_epoch): # Getting the index indexes = [ i % num_examples for i in range(batch * batch_size, (batch + 1) * batch_size) ] batch_train_inputs = train_inputs[indexes] # Padding input to max_time_step of this batch batch_train_inputs, batch_train_seq_len = utils.pad_sequences( batch_train_inputs) # Converting to sparse representation so as to to feed SparseTensor input batch_train_targets = utils.sparse_tuple_from( train_targets[indexes]) feed = { inputs: batch_train_inputs, targets: batch_train_targets, seq_len: batch_train_seq_len } batch_cost, _ = session.run([cost, optimizer], feed) train_cost += batch_cost * batch_size train_ler += session.run(ler, feed_dict=feed) * batch_size # Shuffle the data shuffled_indexes = np.random.permutation(num_examples) train_inputs = train_inputs[shuffled_indexes] train_targets = train_targets[shuffled_indexes]
def main(): ds = dataset(DATA_FOLDER, BATCH_SIZE) global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay(INITIAL_LEARNING_RATE, global_step, DECAY_STEPS, LEARNING_RATE_DECAY_FACTOR, staircase=True) outputs, inputs, targets, seq_len = get_model() loss = tf.nn.ctc_loss(labels=targets, inputs=outputs, sequence_length=seq_len) cost = tf.reduce_mean(loss) # optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=MOMENTUM).minimize(cost, global_step=global_step) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(loss,global_step=global_step) decoded, _ = tf.nn.ctc_beam_search_decoder(outputs, seq_len, merge_repeated=False) e_dis = tf.reduce_mean(tf.edit_distance(tf.cast(decoded[0], tf.int32), targets)) init = tf.global_variables_initializer() def do_report(): test_inputs, test_labels, _ = ds.next_batch() test_targets = sparse_tuple_from(test_labels) test_feed = {inputs: test_inputs, targets: test_targets, seq_len: [MAX_TIMESTEPS for _ in range(len(test_inputs))]} dd = session.run(decoded[0], test_feed) report_accuracy(dd, test_targets) with tf.Session() as session: session.run(init) saver = tf.train.Saver(tf.global_variables(), max_to_keep=100) for curr_epoch in range(NUM_EPOCHES): print("Epoch.......", curr_epoch) train_cost = 0 new_epoch = False train_size = 0 while not new_epoch: train_inputs, train_labels, new_epoch = ds.next_batch() train_targets = sparse_tuple_from(train_labels) feed = {inputs: train_inputs, targets: train_targets, seq_len: [MAX_TIMESTEPS for _ in range(len(train_inputs))]} c, steps, _ = session.run([cost, global_step, optimizer], feed) train_cost += c * BATCH_SIZE print("Step: %d, Loss: %.5f" % (steps, c)) train_size += BATCH_SIZE if (curr_epoch + 1) % REPORT_EPOCHES == 0: do_report() save_path = saver.save(session, "saved_models/model", global_step=steps) print('save model on %s' % save_path) train_cost /= train_size train_inputs, train_labels, _ = ds.next_batch() train_targets = sparse_tuple_from(train_labels) val_feed = {inputs: train_inputs, targets: train_targets, seq_len: [MAX_TIMESTEPS for _ in range(len(train_inputs))]} val_cost, val_edit_dis, lr, steps = session.run([cost, e_dis, learning_rate, global_step], feed_dict=val_feed) log = "Epoch {}/{}, steps = {}, train_cost = {:.3f}, val_cost = {:.3f}, val_edit_dis = {:.3f}, learning_rate = {}" print(log.format(curr_epoch + 1, NUM_EPOCHES, steps, train_cost, val_cost, val_edit_dis, lr))
def do_report(): test_inputs, test_labels, _ = ds.next_batch() test_targets = sparse_tuple_from(test_labels) test_feed = {inputs: test_inputs, targets: test_targets, seq_len: [MAX_TIMESTEPS for _ in range(len(test_inputs))]} dd = session.run(decoded[0], test_feed) report_accuracy(dd, test_targets)
def train(self, session): inputs = tf.placeholder( tf.float32, [self.batch_size, self.num_features, None, self.dataset.im_depth]) targets = tf.sparse_placeholder(tf.int32) seq_len = tf.placeholder(tf.int32, [None]) logits = self.model(inputs, seq_len) loss = tf.nn.ctc_loss(targets, logits, seq_len) cost = tf.reduce_mean(loss) global_step = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay(self.initial_learning_rate, global_step, 8000, 0.98, staircase=True) optimizer = tf.train.MomentumOptimizer(learning_rate, 0.9).minimize( cost, global_step=global_step) # Option 2: tf.nn.ctc_beam_search_decoder # (it's slower but you'll get better results) decoded, log_prob = tf.nn.ctc_greedy_decoder(logits, seq_len) # Inaccuracy: label error rate ler = tf.reduce_mean( tf.edit_distance(tf.cast(decoded[0], tf.int32), targets)) tf.global_variables_initializer().run(session=sess) saver = tf.train.Saver(tf.global_variables()) if not os.path.exists(self.checkpoint_path): os.mkdir(self.checkpoint_path) ckpt = tf.train.get_checkpoint_state(self.checkpoint_path) if ckpt and ckpt.model_checkpoint_path: saver.restore(session, ckpt.model_checkpoint_path) print("Model restored.") else: print("No checkpoint found, start training from beginning.") for curr_epoch in range(self.num_epochs): train_cost = train_ler = 0 start = time.time() X, Y = self.dataset.get_batch() for batch in range(self.num_batches_per_epoch): train_seq_len = [x.shape[1] for x in X] print("EPOCH", curr_epoch, "PROGRESS", self.dataset.index_in_epoch, self.dataset.total_examples) train_targets = sparse_tuple_from(Y) feed = { inputs: X, targets: train_targets, seq_len: train_seq_len } batch_cost, _ = session.run([cost, optimizer], feed) train_cost += batch_cost * self.batch_size train_ler += session.run(ler, feed_dict=feed) * self.batch_size #VERBOSE if batch % 2 == 0: decod = session.run(decoded, feed) for j in range(self.batch_size): # print("Y:", j, iam_train.id_to_char(Y[j])) print("DECODED BATCH OUTPUT:", self.dataset.id_to_char(decod[0][1]))
audio_filename = 'wav/2_001002.wav' #maybe_download('LDC93S1.wav', 93638) target_filename = 'wav/001002.txt' #maybe_download('LDC93S1.txt', 62) inputs = utils.wav_mfcc(audio_filename) # Tranform in 3D array train_inputs = np.asarray(inputs[np.newaxis, :]) train_inputs = (train_inputs - np.mean(train_inputs)) / np.std(train_inputs) train_seq_len = [train_inputs.shape[1]] # Readings targets targets, original = utils.encode_target_file(target_filename) # Creating sparse representation to feed the placeholder train_targets = utils.sparse_tuple_from([targets]) # We don't have a validation dataset :( val_inputs, val_targets, val_seq_len = train_inputs, train_targets, train_seq_len # THE MAIN CODE! graph = tf.Graph() with graph.as_default(): # e.g: log filter bank or MFCC features # Has size [batch_size, max_stepsize, num_features], but the # batch_size and max_stepsize can vary along each step inputs = tf.placeholder(tf.float32, [None, None, num_features]) # Here we use sparse_placeholder that will generate a # SparseTensor required by ctc_loss op.
start = time.time() badcase = 0 for batch in range(num_batches_per_epoch): #Getting the index indexes = [ i % num_examples for i in range(batch * batch_size, (batch + 1) * batch_size) ] #print "indexes",indexes batch_train_inputs = train_inputs[indexes] # Padding input to max_time_step of this batch batch_train_inputs, batch_train_seq_len = pad_sequences( batch_train_inputs) # Converting to sparse representation so as to to feed SparseTensor input batch_train_targets = sparse_tuple_from(train_targets[indexes]) feed = { inputs: batch_train_inputs, targets: batch_train_targets, seq_len: batch_train_seq_len, keep_prob: 0.5, istrain: True } batch_cost, _ = session.run([total_loss, train_op], feed) train_cost += batch_cost * batch_size #train_ler += session.run(ler, feed_dict=feed)*batch_size #for test for batch in range(num_batches_per_epoch_for_test):
def get_next_batch_for_res(batch_size=128): inputs_images = [] codes = [] max_width_image = 0 info = [] seq_len = np.ones(batch_size) for i in range(batch_size): serialized_example = next(dataset, None) if serialized_example==None: raise Exception("has finished train one data file, stop") dataset_example.ParseFromString(serialized_example) font_name = str(dataset_example.features.feature['font_name'].bytes_list.value[0], encoding="utf-8") font_size = dataset_example.features.feature['font_size'].int64_list.value[0] font_mode = dataset_example.features.feature['font_mode'].int64_list.value[0] font_hint = dataset_example.features.feature['font_mode'].int64_list.value[0] text = str(dataset_example.features.feature['label'].bytes_list.value[0], encoding="utf-8") size = dataset_example.features.feature['size'].int64_list.value image = dataset_example.features.feature['image'].bytes_list.value[0] image = utils_pil.frombytes(tuple(size), image) image = utils_pil.convert_to_gray(image) w, h = size if h > image_height: image = utils_pil.resize_by_height(image, image_height) image = utils_pil.resize_by_height(image, image_height-random.randint(1,5)) image, _ = utils_pil.random_space2(image, image, image_height) image = utils_font.add_noise(image) image = np.asarray(image) image = utils.resize(image, image_height, MAX_IMAGE_WIDTH) if random.random()>0.5: image = image / 255. else: image = (255. - image) / 255. if max_width_image < image.shape[1]: max_width_image = image.shape[1] inputs_images.append(image) codes.append([CHARS.index(char) for char in text]) info.append([font_name, str(font_size), str(font_mode), str(font_hint), str(len(text))]) seq_len[i]=len(text)+1 # 凑成4的整数倍 # if max_width_image % 4 > 0: # max_width_image = max_width_image + 4 - max_width_image % 4 # 如果图片超过最大宽度 if max_width_image < MAX_IMAGE_WIDTH: max_width_image = MAX_IMAGE_WIDTH # raise Exception("img width must %s <= %s " % (max_width_image, MAX_IMAGE_WIDTH)) inputs = np.zeros([batch_size, image_height, max_width_image, 1]) for i in range(batch_size): image_vec = utils.img2vec(inputs_images[i], height=image_height, width=max_width_image, flatten=False) inputs[i,:] = np.reshape(image_vec,(image_height, max_width_image, 1)) # print(inputs.shape, len(codes)) labels = [np.asarray(i) for i in codes] sparse_labels = utils.sparse_tuple_from(labels) # max_width_image = math.ceil((max_width_image-3+1.)/2.) # max_width_image = math.ceil((max_width_image-3+1.)/1.) # max_width_image = math.ceil((max_width_image-3+1.)/2.) # max_width_image = math.ceil((max_width_image-3+1.)/1.) # max_width_image = math.ceil((max_width_image-3+1.)/2.) seq_len = np.ones(batch_size) * SEQ_LENGTH # print(inputs.shape, seq_len.shape, [len(l) for l in labels]) return inputs, sparse_labels, seq_len, info
format(file)) label_name = file.split('-')[0] # Loading the transcription .npy file for the training example label = np.load( 'data/speech_commands_processed_reduced/transcriptions/{}.npy' .format(label_name)) filenames.append(file) # Appending the audio and transcription to the batch arrays batch_train_audio.append(audio) batch_train_labels.append(label) # Padding sequences so they are all with equal length --> new shape (max_data, max_lengt, n_features) batch_train_audio = np.asarray(utils.pad_sequences( batch_train_audio, hparams.input_max_len), dtype=np.float32) batch_train_labels = utils.sparse_tuple_from( np.asarray(batch_train_labels)) # Run the training method from the model class. Returns the cost value and the summary. cost, _, summary = train_model.train(batch_train_audio, batch_train_labels, train_sess) # Updating the global step global_step += batch_size # Adding summary to the training logs training_logger.add_summary(summary, global_step=global_step) # Calculating time for the console output tot = time.time() - start_time h = int(tot / 3600)
def run_model(x_train, y_train, x_val, y_val, num_features, num_train_examples, num_val_examples, num_epochs, batch_size, num_batches_per_epoch, learning_rate, momentum, num_layers, num_hidden, num_classes): graph = tf.Graph() with graph.as_default(): x = tf.placeholder(tf.float32, [None, None, num_features], name=vocab.x) y = tf.sparse_placeholder(tf.int32, name=vocab.y) seq_len = tf.placeholder(tf.int32, [None], name=vocab.seq_len) W = tf.Variable(tf.truncated_normal([num_hidden, num_classes], stddev=0.1), name=vocab.W) b = tf.Variable(tf.constant(0., shape=[num_classes]), name=vocab.b) stack = model(num_layers, num_hidden) logits = inference(x, seq_len, W, b, stack, num_hidden, num_classes) loss_ = loss(y, logits, seq_len) cost_ = cost(loss_) optimizer = optimize(learning_rate, momentum, cost_) decoded, log_prob = decode(logits, seq_len) ler = label_error_rate(decoded=decoded[0], y=y) saver_early_stopping = tf.train.Saver(max_to_keep=0) #summaries #cost, ler for train set tf.summary.scalar("training_cost", cost_) tf.summary.scalar("training_label_error_rate", ler) summary_ops_train = tf.summary.merge_all() #cost, ler for val set tf.summary.scalar("validation_cost", cost_) tf.summary.scalar("validation_label_error_rate", ler) summary_ops_validation = tf.summary.merge_all() with tf.Session(graph=graph) as session: init = tf.global_variables_initializer() session.run(init) now = datetime.utcnow().strftime("%Y%m%d%H%M%S") path_model_hyperparams = "model%s-num_layers=%d-num_hidden=%d-num_epochs=%d-batch_size=%d-learning_rate=%s" \ % (str(now), num_layers, num_hidden, num_epochs, batch_size, str(learning_rate)) writer_train = tf.summary.FileWriter( './tensorboard_graphs/' + path_model_hyperparams + '/train', session.graph) writer_validation = tf.summary.FileWriter('./tensorboard_graphs/' + path_model_hyperparams + '/validation') shuffled_indexes = np.random.permutation(num_train_examples) x_train = x_train[shuffled_indexes] y_train = y_train[shuffled_indexes] best_validation_accuracy = 0.0 last_improvement = 0 require_improvement = 100 total_epochs = 0 for curr_epoch in range(num_epochs): train_cost = train_ler = 0 start = time.time() total_epochs += 1 for batch in range(num_batches_per_epoch): indexes = [ i % num_train_examples for i in range(batch * batch_size, (batch + 1) * batch_size) ] batch_x_train = x_train[indexes] batch_x_train, batch_x_train_seq_len = utils.pad_sequences( batch_x_train) batch_y_train = utils.sparse_tuple_from(y_train[indexes]) feed = { x: batch_x_train, y: batch_y_train, seq_len: batch_x_train_seq_len } batch_cost, _ = session.run([cost_, optimizer], feed) train_cost += batch_cost * batch_size train_ler += session.run(ler, feed_dict=feed) * batch_size summary_train = session.run(summary_ops_train, feed_dict=feed) train_cost /= num_train_examples train_ler /= num_train_examples #train_cost_all = train_cost #train_ler_all = train_ler writer_train.add_summary(summary_train, global_step=curr_epoch) val_indexes = [i for i in range(num_val_examples)] x_validation, x_val_seq_len = utils.pad_sequences( x_val[val_indexes]) y_validation = utils.sparse_tuple_from(y_val[val_indexes]) val_feed = { x: x_validation, y: y_validation, seq_len: x_val_seq_len } val_cost, val_ler = session.run([cost_, ler], feed_dict=val_feed) summary_validation = session.run(summary_ops_validation, feed_dict=val_feed) writer_validation.add_summary(summary_validation, global_step=curr_epoch) if (total_epochs % 10 == 0) or (curr_epoch == (num_epochs - 1)): if val_ler > best_validation_accuracy: best_validation_accuracy = val_ler print(best_validation_accuracy) last_improvement = total_epochs saver_early_stopping.save(sess=session, save_path='./checkpoints/' + path_model_hyperparams + '/best_checkpoints') improved_str = '*' else: improved_str = '' log = "Epoch: {0:>6}, Train-Epoch Accuracy: {1:>6.1%}, Validation Accuracy: {2:>6.1%} {3}" print( log.format(curr_epoch + 1, train_ler, val_ler, improved_str)) log = "Epoch {}/{}, train_cost = {:.3f}, train_ler = {:.3f}, val_cost = {:.3f}, val_ler = {:.3f}, time = {:.3f}" print( log.format(curr_epoch + 1, num_epochs, train_cost, train_ler, val_cost, val_ler, time.time() - start)) if total_epochs - last_improvement > require_improvement: print("No improvement found in a while, stopping training.") break