def generate_batch_cw(file_name, batch_size, num_skips, skip_windows, dict_reverse_word_index, words_stroke_filename): assert batch_size % num_skips == 0 assert num_skips <= 2 * skip_windows generate_word = generate_one(file_name, num_skips, skip_windows) dict_word_stroke_index = get_dict_word_stroke_index( words_stroke_filename, dict_reverse_word_index) while True: batch = np.ndarray(shape=(batch_size), dtype=np.int32) labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32) for i in range(batch_size // num_skips): tuple_word = generate_word.next() while not tuple_word: generate_word = generate_one(file_name, num_skips, skip_windows) tuple_word = generate_word.next() for j in range(num_skips): batch[i * num_skips + j] = dict_word_stroke_index[tuple_word[j][0]] labels[i * num_skips + j, 0] = tuple_word[j][1] yield batch, labels
def generate_batch_cw(file_name, batch_size, num_skips, skip_windows, dict_reverse_word_index, words_stroke_filename): # stroke_index dict_word_stroke_index = get_dict_word_stroke_index( words_stroke_filename, dict_reverse_word_index) # data generate_word = generate_one(file_name, num_skips, skip_windows) tuple_words = generate_word.next() # return_data batch = np.ndarray(shape=(batch_size), dtype=np.int32) labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32) batch_index = 0 while True: for i in range(num_skips): tuple_word_batch = tuple_words[i][0] tuple_word_label = tuple_words[i][1] if tuple_word_batch not in dict_word_stroke_index: continue tuple_word_batch_strokes = dict_word_stroke_index[tuple_word_batch] for tuple_word_batch_stroke in tuple_word_batch_strokes: batch[batch_index] = tuple_word_batch_stroke labels[batch_index] = tuple_word_label batch_index += 1 if batch_index == batch_size: yield batch, labels batch = np.ndarray(shape=(batch_size), dtype=np.int32) labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32) batch_index = 0 tuple_words = generate_word.next() while not tuple_words: generate_word = generate_one(file_name, num_skips, skip_windows) tuple_words = generate_word.next()
def generate_batch_character_level(file_name, batch_size, num_skips, skip_windows, dict_reverse_word_index, words_image_filename): all_image_data = get_all_image_data(words_image_filename, dict_reverse_word_index) image_shape = all_image_data.values()[0].shape # data generate_word = generate_one(file_name, num_skips, skip_windows) tuple_words = generate_word.next() # return_data batch = np.ndarray(shape=(batch_size, image_shape[0], image_shape[1]), dtype=np.int32) labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32) batch_index = 0 while True: for i in range(num_skips): tuple_word_batch = tuple_words[i][0] tuple_word_label = tuple_words[i][1] if tuple_word_batch not in all_image_data: continue tuple_word_batch_image = all_image_data[tuple_word_batch] batch[batch_index] = tuple_word_batch_image labels[batch_index] = tuple_word_label batch_index += 1 if batch_index == batch_size: yield batch, labels batch = np.ndarray(shape=(batch_size, image_shape[0], image_shape[1]), dtype=np.int32) labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32) batch_index = 0 tuple_words = generate_word.next() while not tuple_words: generate_word = generate_one(file_name, num_skips, skip_windows) tuple_words = generate_word.next()
def generate_batch_image_character_level(file_name, batch_size, num_skips, skip_windows, dict_reverse_word_index, words_image_filename): assert batch_size % num_skips == 0 assert num_skips <= 2 * skip_windows generate_word = generate_one(file_name, num_skips, skip_windows) all_image_data = get_all_image_data(words_image_filename, dict_reverse_word_index) image_shape = all_image_data.values()[0].shape while True: batch = np.ndarray(shape=(batch_size, image_shape[0], image_shape[1]), dtype=np.int32) labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32) for i in range(batch_size // num_skips): tuple_word = generate_word.next() while not tuple_word: generate_word = generate_one(file_name, num_skips, skip_windows) tuple_word = generate_word.next() while True: ner_in_dict_word_stroke_index = True for j in range(num_skips): if tuple_word[j][0] not in all_image_data: ner_in_dict_word_stroke_index = False if ner_in_dict_word_stroke_index: break else: tuple_word = generate_word.next() while not tuple_word: generate_word = generate_one(file_name, num_skips, skip_windows) tuple_word = generate_word.next() for j in range(num_skips): batch[i * num_skips + j] = all_image_data[tuple_word[j][0]] labels[i * num_skips + j, 0] = tuple_word[j][1] yield batch, labels