示例#1
0
def train_model(weight=None, epochs=10):
    # load dataset
    data = ld.prepare_dataset('train')
    train_features, train_descriptions = data[0]
    test_features, test_descriptions = data[1]

    # prepare tokenizer
    tokenizer = gen.create_tokenizer(train_descriptions)
    # save the tokenizer
    dump(tokenizer, open('models/tokenizer.pkl', 'wb'))
    # index_word dict
    index_word = {v: k for k, v in tokenizer.word_index.items()}
    # save dict
    dump(index_word, open('models/index_word.pkl', 'wb'))

    vocab_size = len(tokenizer.word_index) + 1
    print('Vocabulary Size: %d' % vocab_size)

    # determine the maximum sequence length
    max_length = gen.max_length(train_descriptions)
    print('Description Length: %d' % max_length)

    # generate model
    model = gen.define_model(vocab_size, max_length)

    # Check if pre-trained weights to be used
    if weight != None:
        model.load_weights(weight)

    # define checkpoint callback
    filepath = 'models/model-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'
    checkpoint = ModelCheckpoint(filepath,
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='min')

    steps = len(train_descriptions)
    val_steps = len(test_descriptions)
    # create the data generator
    train_generator = gen.data_generator(train_descriptions, train_features,
                                         tokenizer, max_length)
    val_generator = gen.data_generator(test_descriptions, test_features,
                                       tokenizer, max_length)

    # fit model
    model.fit_generator(train_generator,
                        epochs=epochs,
                        steps_per_epoch=steps,
                        verbose=1,
                        callbacks=[checkpoint],
                        validation_data=val_generator,
                        validation_steps=val_steps)

    try:
        model.save('models/wholeModel.h5', overwrite=True)
        model.save_weights('models/weights.h5', overwrite=True)
    except:
        print("Error in saving model.")
    print("Training complete...\n")
示例#2
0
def model_training(weight = None, epochs = 10):
  
  train_d = ld.prepare_dataset('train')
  train_features, train_descriptions = train_d[0]
  test_features, test_descriptions = train_d[1]

  
  split_sentences = gen.create_tokenizer(train_descriptions)
  
  dump(split_sentences, open('models/tokenizer.pkl', 'wb'))
  
  index_word = {value: key for key, value in split_sentences.word_index.items()}
  
  dump(index_word, open('models/index_word.pkl', 'wb'))

  vocab_size = len(split_sentences.word_index) + 1
  print('Size of the Vocabulary: %d' % vocab_size)

  
  max_length = gen.max_length(train_descriptions)
  print('Length of the Descriptions: %d' % max_length)

  
  model = gen.define_model(vocab_size, max_length)

  
  if weight != None:
    model.load_weights(weight)

  
  filepath = 'models/model.h5'
  checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1,
                save_best_only=True, mode='min')

  steps = len(train_descriptions)
  val_steps = len(test_descriptions)
  
  tg = gen.data_generator(train_descriptions, train_features, split_sentences, max_length)
  vg = gen.data_generator(test_descriptions, test_features, split_sentences, max_length)

  
  model.fit_generator(tg, epochs=epochs, steps_per_epoch=steps, verbose=1,
        callbacks=[checkpoint], validation_data=vg, validation_steps=val_steps)

  try:
      model.save('models/wholeModel.h5', overwrite=True)
      model.save_weights('models/weights.h5',overwrite=True)
  except:
      print("Error")
  print("Training has been completed broooooooooooo...\n")
示例#3
0
def train_model(weight=None, epochs=10):
    data = ld.prepare_dataset('train')
    train_features, train_descriptions = data[0]
    test_features, test_descriptions = data[1]

    tokenizer = gen.create_tokenizer(train_descriptions)
    dump(tokenizer, open('../models/tokenizer.pkl', 'wb'))
    index_word = {v: k for k, v in tokenizer.word_index.items()}
    dump(index_word, open('../models/index_word.pkl', 'wb'))

    vocab_size = len(tokenizer.word_index) + 1

    max_length = gen.max_length(train_descriptions)

    model = gen.define_model(vocab_size, max_length)

    if weight != None:
        model.load_weights(weight)

    filepath = '../models/model-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'
    checkpoint = ModelCheckpoint(filepath,
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='min')

    steps = len(train_descriptions)
    val_steps = len(test_descriptions)
    train_generator = gen.data_generator(train_descriptions, train_features,
                                         tokenizer, max_length)
    val_generator = gen.data_generator(test_descriptions, test_features,
                                       tokenizer, max_length)

    model.fit_generator(train_generator,
                        epochs=epochs,
                        steps_per_epoch=steps,
                        verbose=1,
                        callbacks=[checkpoint],
                        validation_data=val_generator,
                        validation_steps=val_steps)

    try:
        model.save('../models/wholeModel.h5', overwrite=True)
        model.save_weights('../models/weights.h5', overwrite=True)
    except:
        print("Error in saving model.")
    print("Training complete...\n")
示例#4
0
def train_model(weight=None, epochs=NUM_EPOCHS):

    data = ld.prepare_dataset("train")
    train_features, train_descriptions = data[0]
    test_features, test_descriptions = data[1]
    # prepare tokenizer
    tokenizer = gen.create_tokenizer(train_descriptions)
    dump(tokenizer, open('models/tokenizer.pkl', 'wb'))
    # index_word dict
    index_word = {v: k for k, v in tokenizer.word_index.items()}
    dump(index_word, open('models/index_word.pkl', 'wb'))

    VOCAB_SIZE = len(tokenizer.word_index) + 1
    print('Vocabulary Size: %d' % VOCAB_SIZE)

    MAX_CAP_LEN = MODEL.max_cap_len(train_descriptions)
    print('Description Length: %d' % MAX_CAP_LEN)

    embedding_matrix = Glove_word2vectorizer()
    model = MODEL.cnn_lstm_model_creater(img_pixel_len=IMG_PIXEL, img_data_size=IMG_SAMPLE_SIZE, img_embedding_dim=IMG_EMBEDDING_DIM,
                                         cap_pixel_len=MAX_CAP_LEN, cap_vocab_size=VOCAB_SIZE, cap_embedding_dim=CAP_EMBEDDING_DIM,)

    if weight != None:
        model.load_weights(weight)

    # define checkpoint callback
    filepath = 'models/model-ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5'
    checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1,
                  save_best_only=True, mode='min')

    steps = len(train_descriptions)
    val_steps = len(test_descriptions)

    train_generator = MODEL.data_generator(train_descriptions, train_features, tokenizer, max_length)
    val_generator = MODEL.data_generator(test_descriptions, test_features, tokenizer, max_length)

    model.fit_generator(train_generator, epochs=epochs, steps_per_epoch=steps, verbose=1,
          callbacks=[checkpoint], validation_data=val_generator, validation_steps=val_steps)

    try:
        model.save('models/wholeModel.h5', overwrite=True)
        model.save_weights('models/weights.h5',overwrite=True)
    except:
        print("Error in saving model.")
    print("Training complete...\n")
示例#5
0
  # load the tokenizer
  tokenizer = load(open('models/tokenizer.pkl', 'rb'))
  index_word = load(open('models/index_word.pkl', 'rb'))
  # pre-define the max sequence length (from training)
  max_length = 34

  # load the model
  if args.model:
    filename = args.model
  else:
    filename = 'models/model_weight.h5'
  model = load_model(filename)

  if args.image:
    # load and prepare the photograph
    photo = extract_features(args.image)
    # generate description
    captions = generate_desc(model, tokenizer, photo, index_word, max_length)
    for cap in captions:
      # remove start and end tokens
      seq = cap[0].split()[1:-1]
      desc = ' '.join(seq)
      print('{} [log prob: {:1.2f}]'.format(desc,cap[1]))
  else:
    # load test set
    test_features, test_descriptions = ld.prepare_dataset('test')[1]

    # evaluate model
    evaluate_model(model, test_descriptions, test_features, tokenizer, index_word, max_length)
示例#6
0
  tokenizer = load(open('models/tokenizer.pkl', 'rb'))
  index_word = load(open('models/index_word.pkl', 'rb'))

  max_length = 34


  if args.model:
    my_file = args.model
  else:
    my_file = 'models/model_weight.h5'
  model = load_model(my_file)

  if args.my_image:
   
    extracted_features_pic = ext_features(args.my_image)
   
    caps = gen_description(model, tokenizer, extracted_features_pic, index_word, max_length)
    for cap in caps:
      
      seq = cap[0].split()[1:-1]
      desc = ' '.join(seq)
      print('{} '.format(desc))
      break
  else:
    
    t_feat, t_desc = ld.prepare_dataset('test')[1]

   
    eval_my_model(model, t_desc, t_feat, tokenizer, index_word, max_length)