예제 #1
0
def train():
    # load training dataset (6K)
    filename = 'Flickr_8k.trainImages.txt'
    train = util.load_ids(filename)
    print('Dataset: %d' % len(train))
    train_captions = util.load_clean_captions('descriptions.txt', train)
    print('Captions: train number=%d' % len(train_captions))
    # photo features
    train_features = util.load_photo_features('features.pkl', train)
    print('Photos: train=%d' % len(train_features))
    # prepare tokenizer
    tokenizer = load(open('tokenizer.pkl', 'rb'))
    vocab_size = len(tokenizer.word_index) + 1
    print('Vocabulary Size: %d' % vocab_size)
    # determine the maximum sequence length
    max_len = util.get_max_length(train_captions)
    print('Description Length: %d' % max_len)

    # define the model
    model = caption_model(vocab_size, max_len)
    # train the model, run epochs manually and save after each epoch
    epochs = 20
    steps = len(train_captions)
    for i in range(epochs):
        # create the data generator
        generator = data_generator(train_captions, train_features, tokenizer, max_len)
        # fit for one epoch
        model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1)
        # save model
        model.save('model_' + str(i) + '.h5')
예제 #2
0
def train():
    # load training dataset (6K)
    filename = 'Flickr_8k.trainImages.txt'
    train = util.load_ids(filename)
    print('Dataset: %d' % len(train))
    train_captions = util.load_clean_captions('descriptions.txt', train)
    print('Captions: train number=%d' % len(train_captions))
    # photo features
    train_features = util.load_photo_features('features.pkl', train)
    print('Photos: train=%d' % len(train_features))
    # prepare tokenizer
    tokenizer = load(open('tokenizer.pkl', 'rb'))
    vocab_size = len(tokenizer.word_index) + 1
    print('Vocabulary Size: %d' % vocab_size)
    # determine the maximum sequence length
    max_len = util.get_max_length(train_captions)
    print('Description Length: %d' % max_len)

    # define the model
    model = caption_model(vocab_size, max_len)
    # train the model, run epochs manually and save after each epoch
    epochs = 5
    steps = len(train_captions)
    for i in range(epochs):
        # create the data generator
        generator = data_generator(train_captions, train_features, tokenizer,
                                   max_len)
        # fit for one epoch

        # generator just return two dimenstion data, the first means X, which has two data
        # first is the featur of the pic, second is the surfix words; second means Y, the
        # word of predict for Next.
        # At first I don't kown why generator will return three value(feature, surfix, the next word)
        # but the model just has two input, later I got first tow means X and will go into the model,
        # the third is means Y, the reponse variance.

        # Don't need fear the generator will be executed forever, for the super-parameter epoches & steps_per_epoch
        # has limited the time of invoking generator, which is epochs * steps_per_epoch

        # Generator, is magical!
        model.fit_generator(generator,
                            epochs=1,
                            steps_per_epoch=steps,
                            verbose=1)

        # save model
        model.save('model' + os.sep + 'model_my' + str(i) + '.h5')
예제 #3
0
def prepare_data(pronunciations, letter_ids):
    logging.info('preparing the data...')

    max_length = util.get_max_length(pronunciations)

    X = np.zeros((len(pronunciations), max_length, len(letter_ids)),
                 dtype=bool)
    y = np.zeros(len(pronunciations), dtype=int)

    for n, word in enumerate(pronunciations):
        for i, letter in enumerate(word):
            letter_id = letter_ids[letter]
            X[n, i, letter_id] = True

        y[n] = len(pronunciations[word])

    return X, y
예제 #4
0
 def __init__(self, model: Model, pronunciations, letter_ids):
     self.model = model
     self.pronunciations = pronunciations
     self.letter_ids = letter_ids
     max_len = util.get_max_length(pronunciations.values())
     self.max_len = max_len
예제 #5
0
    # 'startseq Two women laying on grass with a dog . endseq',
    # 'startseq Two women on a grassy hill lit by the sun ; one looks at a dachshund , the other looks to the side . endseq']

    # photo features
    feature_path = r"E:/AI资源计算机视觉/JM07 - TXXY - CV2期/02.资料/homework-master-7fc833414b95225130c323c278230bc388af5c6b/homework1/features.pkl"
    train_features = util.load_photo_features(feature_path, train)
    # print('Photos: train=%d' % len(train_features))
    # print(len(train_features["3585117340_73e96b6173"][0]))  # 4096

    # prepare tokenizer
    tokenizer = load(open('tokenizer.pkl', 'rb'))
    vocab_size = len(tokenizer.word_index) + 1  # 词汇表大小
    print('Vocabulary Size: %d' % vocab_size)  # 7378

    # determine the maximum sequence length
    max_len = util.get_max_length(train_captions)
    print('Description Length: %d' % max_len)  # 40

    # define the model
    model = caption_model(vocab_size, max_len)
    print(model)
    # train the model, run epochs manually and save after each epoch
    epochs = 20
    steps = len(train_captions)  # 6000
    for i in range(epochs):
        # create the data generator
        generator = data_generator(train_captions, train_features, tokenizer,
                                   max_len)
        # print(generator)
        # fit for one epoch
        model.fit_generator(generator,
예제 #6
0
# ---------- Data Load ----------

x_paths = {}
t_paths = {}
dataset = {}
loader = {}

for type_ in types:
    print('Loading %s dataset ... ' % (type_), end='')
    x_paths[type_] = glob(join(data_dir, 'x_' + type_, '*.bin'))
    t_paths[type_] = glob(join(data_dir, 't_' + type_, '*.bin'))

    x_dim = config.get_feature_config().get_linguistic_dim(type_)
    t_dim = config.get_feature_config().get_parm_dim(type_)
    max_len = get_max_length(x_paths[type_], x_dim)
    pad_value = config.get_feature_config().pad_value

    dataset[type_] = SpeechDataset(x_paths[type_],
                                   t_paths[type_],
                                   x_dim=x_dim,
                                   t_dim=t_dim,
                                   max_len=max_len,
                                   pad_value=pad_value)
    batch_size = config.get_train_config().batch_size
    loader[type_] = DataLoader(dataset[type_],
                               batch_size=batch_size,
                               shuffle=True)
    print('done!')
    print('\tDataset Size\t%d' % (len(x_paths[type_])))
    print('\tInput Dim\t%d' % (x_dim))