def train_model(weight=None, epochs=10): # load dataset data = ld.prepare_dataset('train') train_features, train_descriptions = data[0] test_features, test_descriptions = data[1] # prepare tokenizer tokenizer = gen.create_tokenizer(train_descriptions) # save the tokenizer dump(tokenizer, open('models/tokenizer.pkl', 'wb')) # index_word dict index_word = {v: k for k, v in tokenizer.word_index.items()} # save dict dump(index_word, open('models/index_word.pkl', 'wb')) vocab_size = len(tokenizer.word_index) + 1 print('Vocabulary Size: %d' % vocab_size) # determine the maximum sequence length max_length = gen.max_length(train_descriptions) print('Description Length: %d' % max_length) # generate model model = gen.define_model(vocab_size, max_length) # Check if pre-trained weights to be used if weight != None: model.load_weights(weight) # define checkpoint callback filepath = 'models/model-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5' checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min') steps = len(train_descriptions) val_steps = len(test_descriptions) # create the data generator train_generator = gen.data_generator(train_descriptions, train_features, tokenizer, max_length) val_generator = gen.data_generator(test_descriptions, test_features, tokenizer, max_length) # fit model model.fit_generator(train_generator, epochs=epochs, steps_per_epoch=steps, verbose=1, callbacks=[checkpoint], validation_data=val_generator, validation_steps=val_steps) try: model.save('models/wholeModel.h5', overwrite=True) model.save_weights('models/weights.h5', overwrite=True) except: print("Error in saving model.") print("Training complete...\n")
def model_training(weight = None, epochs = 10): train_d = ld.prepare_dataset('train') train_features, train_descriptions = train_d[0] test_features, test_descriptions = train_d[1] split_sentences = gen.create_tokenizer(train_descriptions) dump(split_sentences, open('models/tokenizer.pkl', 'wb')) index_word = {value: key for key, value in split_sentences.word_index.items()} dump(index_word, open('models/index_word.pkl', 'wb')) vocab_size = len(split_sentences.word_index) + 1 print('Size of the Vocabulary: %d' % vocab_size) max_length = gen.max_length(train_descriptions) print('Length of the Descriptions: %d' % max_length) model = gen.define_model(vocab_size, max_length) if weight != None: model.load_weights(weight) filepath = 'models/model.h5' checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min') steps = len(train_descriptions) val_steps = len(test_descriptions) tg = gen.data_generator(train_descriptions, train_features, split_sentences, max_length) vg = gen.data_generator(test_descriptions, test_features, split_sentences, max_length) model.fit_generator(tg, epochs=epochs, steps_per_epoch=steps, verbose=1, callbacks=[checkpoint], validation_data=vg, validation_steps=val_steps) try: model.save('models/wholeModel.h5', overwrite=True) model.save_weights('models/weights.h5',overwrite=True) except: print("Error") print("Training has been completed broooooooooooo...\n")
def train_model(weight=None, epochs=10): data = ld.prepare_dataset('train') train_features, train_descriptions = data[0] test_features, test_descriptions = data[1] tokenizer = gen.create_tokenizer(train_descriptions) dump(tokenizer, open('../models/tokenizer.pkl', 'wb')) index_word = {v: k for k, v in tokenizer.word_index.items()} dump(index_word, open('../models/index_word.pkl', 'wb')) vocab_size = len(tokenizer.word_index) + 1 max_length = gen.max_length(train_descriptions) model = gen.define_model(vocab_size, max_length) if weight != None: model.load_weights(weight) filepath = '../models/model-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5' checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min') steps = len(train_descriptions) val_steps = len(test_descriptions) train_generator = gen.data_generator(train_descriptions, train_features, tokenizer, max_length) val_generator = gen.data_generator(test_descriptions, test_features, tokenizer, max_length) model.fit_generator(train_generator, epochs=epochs, steps_per_epoch=steps, verbose=1, callbacks=[checkpoint], validation_data=val_generator, validation_steps=val_steps) try: model.save('../models/wholeModel.h5', overwrite=True) model.save_weights('../models/weights.h5', overwrite=True) except: print("Error in saving model.") print("Training complete...\n")
def train_model(weight=None, epochs=NUM_EPOCHS): data = ld.prepare_dataset("train") train_features, train_descriptions = data[0] test_features, test_descriptions = data[1] # prepare tokenizer tokenizer = gen.create_tokenizer(train_descriptions) dump(tokenizer, open('models/tokenizer.pkl', 'wb')) # index_word dict index_word = {v: k for k, v in tokenizer.word_index.items()} dump(index_word, open('models/index_word.pkl', 'wb')) VOCAB_SIZE = len(tokenizer.word_index) + 1 print('Vocabulary Size: %d' % VOCAB_SIZE) MAX_CAP_LEN = MODEL.max_cap_len(train_descriptions) print('Description Length: %d' % MAX_CAP_LEN) embedding_matrix = Glove_word2vectorizer() model = MODEL.cnn_lstm_model_creater(img_pixel_len=IMG_PIXEL, img_data_size=IMG_SAMPLE_SIZE, img_embedding_dim=IMG_EMBEDDING_DIM, cap_pixel_len=MAX_CAP_LEN, cap_vocab_size=VOCAB_SIZE, cap_embedding_dim=CAP_EMBEDDING_DIM,) if weight != None: model.load_weights(weight) # define checkpoint callback filepath = 'models/model-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5' checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min') steps = len(train_descriptions) val_steps = len(test_descriptions) train_generator = MODEL.data_generator(train_descriptions, train_features, tokenizer, max_length) val_generator = MODEL.data_generator(test_descriptions, test_features, tokenizer, max_length) model.fit_generator(train_generator, epochs=epochs, steps_per_epoch=steps, verbose=1, callbacks=[checkpoint], validation_data=val_generator, validation_steps=val_steps) try: model.save('models/wholeModel.h5', overwrite=True) model.save_weights('models/weights.h5',overwrite=True) except: print("Error in saving model.") print("Training complete...\n")
# load the tokenizer tokenizer = load(open('models/tokenizer.pkl', 'rb')) index_word = load(open('models/index_word.pkl', 'rb')) # pre-define the max sequence length (from training) max_length = 34 # load the model if args.model: filename = args.model else: filename = 'models/model_weight.h5' model = load_model(filename) if args.image: # load and prepare the photograph photo = extract_features(args.image) # generate description captions = generate_desc(model, tokenizer, photo, index_word, max_length) for cap in captions: # remove start and end tokens seq = cap[0].split()[1:-1] desc = ' '.join(seq) print('{} [log prob: {:1.2f}]'.format(desc,cap[1])) else: # load test set test_features, test_descriptions = ld.prepare_dataset('test')[1] # evaluate model evaluate_model(model, test_descriptions, test_features, tokenizer, index_word, max_length)
tokenizer = load(open('models/tokenizer.pkl', 'rb')) index_word = load(open('models/index_word.pkl', 'rb')) max_length = 34 if args.model: my_file = args.model else: my_file = 'models/model_weight.h5' model = load_model(my_file) if args.my_image: extracted_features_pic = ext_features(args.my_image) caps = gen_description(model, tokenizer, extracted_features_pic, index_word, max_length) for cap in caps: seq = cap[0].split()[1:-1] desc = ' '.join(seq) print('{} '.format(desc)) break else: t_feat, t_desc = ld.prepare_dataset('test')[1] eval_my_model(model, t_desc, t_feat, tokenizer, index_word, max_length)