示例#1
0
def generate_batch_cw(file_name, batch_size, num_skips, skip_windows,
                      dict_reverse_word_index, words_stroke_filename):

    assert batch_size % num_skips == 0
    assert num_skips <= 2 * skip_windows
    generate_word = generate_one(file_name, num_skips, skip_windows)
    dict_word_stroke_index = get_dict_word_stroke_index(
        words_stroke_filename, dict_reverse_word_index)

    while True:

        batch = np.ndarray(shape=(batch_size), dtype=np.int32)
        labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
        for i in range(batch_size // num_skips):

            tuple_word = generate_word.next()
            while not tuple_word:
                generate_word = generate_one(file_name, num_skips,
                                             skip_windows)
                tuple_word = generate_word.next()

            for j in range(num_skips):

                batch[i * num_skips +
                      j] = dict_word_stroke_index[tuple_word[j][0]]
                labels[i * num_skips + j, 0] = tuple_word[j][1]
        yield batch, labels
示例#2
0
def generate_batch_cw(file_name, batch_size, num_skips, skip_windows,
                      dict_reverse_word_index, words_stroke_filename):
    # stroke_index
    dict_word_stroke_index = get_dict_word_stroke_index(
        words_stroke_filename, dict_reverse_word_index)
    # data
    generate_word = generate_one(file_name, num_skips, skip_windows)
    tuple_words = generate_word.next()
    # return_data
    batch = np.ndarray(shape=(batch_size), dtype=np.int32)
    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
    batch_index = 0

    while True:
        for i in range(num_skips):
            tuple_word_batch = tuple_words[i][0]
            tuple_word_label = tuple_words[i][1]

            if tuple_word_batch not in dict_word_stroke_index: continue
            tuple_word_batch_strokes = dict_word_stroke_index[tuple_word_batch]
            for tuple_word_batch_stroke in tuple_word_batch_strokes:
                batch[batch_index] = tuple_word_batch_stroke
                labels[batch_index] = tuple_word_label
                batch_index += 1
                if batch_index == batch_size:
                    yield batch, labels
                    batch = np.ndarray(shape=(batch_size), dtype=np.int32)
                    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
                    batch_index = 0

        tuple_words = generate_word.next()
        while not tuple_words:
            generate_word = generate_one(file_name, num_skips, skip_windows)
            tuple_words = generate_word.next()
def generate_batch_character_level(file_name, batch_size, num_skips,
                                   skip_windows, dict_reverse_word_index,
                                   words_image_filename):
    all_image_data = get_all_image_data(words_image_filename,
                                        dict_reverse_word_index)
    image_shape = all_image_data.values()[0].shape
    # data
    generate_word = generate_one(file_name, num_skips, skip_windows)
    tuple_words = generate_word.next()
    # return_data
    batch = np.ndarray(shape=(batch_size, image_shape[0], image_shape[1]),
                       dtype=np.int32)
    labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
    batch_index = 0
    while True:
        for i in range(num_skips):
            tuple_word_batch = tuple_words[i][0]
            tuple_word_label = tuple_words[i][1]
            if tuple_word_batch not in all_image_data: continue
            tuple_word_batch_image = all_image_data[tuple_word_batch]
            batch[batch_index] = tuple_word_batch_image
            labels[batch_index] = tuple_word_label
            batch_index += 1
            if batch_index == batch_size:
                yield batch, labels
                batch = np.ndarray(shape=(batch_size, image_shape[0],
                                          image_shape[1]),
                                   dtype=np.int32)
                labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
                batch_index = 0

        tuple_words = generate_word.next()
        while not tuple_words:
            generate_word = generate_one(file_name, num_skips, skip_windows)
            tuple_words = generate_word.next()
def generate_batch_image_character_level(file_name, batch_size, num_skips,
                                         skip_windows, dict_reverse_word_index,
                                         words_image_filename):
    assert batch_size % num_skips == 0
    assert num_skips <= 2 * skip_windows
    generate_word = generate_one(file_name, num_skips, skip_windows)
    all_image_data = get_all_image_data(words_image_filename,
                                        dict_reverse_word_index)
    image_shape = all_image_data.values()[0].shape

    while True:

        batch = np.ndarray(shape=(batch_size, image_shape[0], image_shape[1]),
                           dtype=np.int32)
        labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32)
        for i in range(batch_size // num_skips):

            tuple_word = generate_word.next()
            while not tuple_word:
                generate_word = generate_one(file_name, num_skips,
                                             skip_windows)
                tuple_word = generate_word.next()
            while True:
                ner_in_dict_word_stroke_index = True
                for j in range(num_skips):
                    if tuple_word[j][0] not in all_image_data:
                        ner_in_dict_word_stroke_index = False
                if ner_in_dict_word_stroke_index:
                    break
                else:
                    tuple_word = generate_word.next()
                    while not tuple_word:
                        generate_word = generate_one(file_name, num_skips,
                                                     skip_windows)
                        tuple_word = generate_word.next()

            for j in range(num_skips):
                batch[i * num_skips + j] = all_image_data[tuple_word[j][0]]
                labels[i * num_skips + j, 0] = tuple_word[j][1]
        yield batch, labels