示例#1
0
def main():
    # load train dataset
    data = load_coco_data(data_path='./data', split='train')
    word_to_idx = data['word_to_idx']
    # load val dataset to print out scores every epoch
    val_data = load_coco_data(data_path='./data', split='val')

    model = CaptionGenerator(
        word_to_idx,
        dim_feature=[FLAGS.image_feature_size, FLAGS.image_feature_depth],
        dim_embed=FLAGS.embed_dim,
        dim_hidden=FLAGS.lstm_hidden_size,
        n_time_step=FLAGS.time_steps,
        prev2out=FLAGS.prev2out,
        ctx2out=FLAGS.ctx2out,
        alpha_c=1.0,
        enable_selector=FLAGS.enable_selector,
        dropout=FLAGS.dropout)

    solver = CaptioningSolver(model,
                              n_epochs=FLAGS.num_epochs,
                              batch_size=FLAGS.batch_size,
                              update_rule=FLAGS.optimizer,
                              learning_rate=FLAGS.learning_rate,
                              metric=FLAGS.metric,
                              print_every=FLAGS.snapshot_steps,
                              eval_every=FLAGS.eval_steps,
                              pretrained_model=FLAGS.pretrained_model,
                              start_from=FLAGS.start_from,
                              checkpoint_dir=FLAGS.checkpoint_dir,
                              log_path=FLAGS.log_path)

    solver.train(data, val_data, beam_size=FLAGS.beam_size)
示例#2
0
def main():
    # load train dataset
    data = load_coco_data(data_path='/data1/junjiaot/data', split='train')
    word_to_idx = data['word_to_idx']
    # load val dataset to print out bleu scores every epoch
    val_data = load_coco_data(data_path='/data1/junjiaot/data', split='val')
    #print(data['file_names'].shape,data['captions'].shape,data['image_idxs'].shape)
    #print(max(data['image_idxs']))
    #model/adaptive_attention_REINFORCE/
    #'model/adaptive_attention/3_26_2018/model-13
    model = CaptionGenerator(word_to_idx,
                             dim_feature=[49, 2048],
                             dim_embed=512,
                             dim_hidden=512,
                             n_time_step=16,
                             alpha_c=1.0,
                             dropout=True)

    solver = CaptioningSolver(
        model,
        data,
        val_data,
        n_epochs=50,
        batch_size=56,
        update_rule='adam',
        learning_rate=5e-4,
        print_bleu_every=1000,
        save_every=1000,
        image_path='./image/',
        model_path='model/Resnet_Pretrain_adaptive_attribute9_new/',
        test_model='model/lstm/model-5',
        print_bleu=True,
        log_path='log/')

    solver.test()
def main(use_inception):
    # load train dataset
    print "Loading COCO training data..."
    data = load_coco_data(data_path='./data', split='train')
    print "Done!"
    word_to_idx = data['word_to_idx']
    # load val dataset to print out bleu scores every epoch
    val_data = load_coco_data(data_path='./data', split='val')

    if use_inception:
        L = 64
        D = 2048
    else:
        L = 196
        D = 512

    from core.solver import CaptioningSolver
    from core.model import CaptionGenerator
    model = CaptionGenerator(word_to_idx, dim_feature=[L, D], dim_embed=512,
                                       dim_hidden=1800, n_time_step=16, prev2out=True, 
                                                 ctx2out=True, alpha_c=5.0, selector=True, dropout=True)

    solver = CaptioningSolver(model, data, val_data, n_epochs=100, batch_size=256, update_rule='adam',
                                          learning_rate=0.0005, print_every=1000, summary_every=10000, save_every=1, image_path='./image/',
                                    pretrained_model=None, model_path='model/lstm/', test_model='model/lstm/model-10',
                                     print_bleu=True, log_path='log/')

    solver.train()
示例#4
0
def main():
    # create vocab
    word_to_idx = build_vocab(train_image_dir=args.train_image_dir, val_image_dir=args.val_image_dir,
                              coco_dataset_files=[args.coco_dataset_train_dir, args.coco_dataset_val_dir],
                              senticap_dataset_files=[args.senticap_dataset_dir], max_length=args.max_length)
    with open(os.path.join(args.output_dir, 'word_to_idx.pkl'), 'wb') as f:
        pickle.dump(word_to_idx, f)
    print("*" * 16, "Vocabulary built", "*" * 16)

    # load senticap dataset
    train_senticap_data, val_senticap_data, test_senticap_data = load_senticap_data(vocab=word_to_idx, train_image_dir=args.train_image_dir,
                                                              val_image_dir=args.val_image_dir,
                                                              caption_file=args.senticap_dataset_dir, splits=args.senticap_dataset_portions,
                                                              max_length=args.max_length)
    with open(os.path.join(args.output_dir, 'train_senticap_data.pkl'), 'wb') as f:
        pickle.dump(train_senticap_data, f)
    with open(os.path.join(args.output_dir, 'val_senticap_data.pkl'), 'wb') as f:
        pickle.dump(val_senticap_data, f)
    with open(os.path.join(args.output_dir, 'test_senticap_data.pkl'), 'wb') as f:
        pickle.dump(test_senticap_data, f)

    # load senticap dataset
    train_coco_data = load_coco_data(vocab=word_to_idx, image_dir=args.train_image_dir,
                                     caption_file=args.coco_dataset_train_dir, splits=[args.coco_dataset_portions[0]],
                                     max_length=args.max_length)
    val_coco_data, test_coco_data = load_coco_data(vocab=word_to_idx, image_dir=args.val_image_dir,
                                                   caption_file=args.coco_dataset_val_dir, splits=args.coco_dataset_portions[1:],
                                                   max_length=args.max_length)
    with open(os.path.join(args.output_dir, 'train_coco_data.pkl'), 'wb') as f:
        pickle.dump(train_coco_data, f)
    with open(os.path.join(args.output_dir, 'val_coco_data.pkl'), 'wb') as f:
        pickle.dump(val_coco_data, f)
    with open(os.path.join(args.output_dir, 'test_coco_data.pkl'), 'wb') as f:
        pickle.dump(test_coco_data, f)
    print("*" * 16, "Dataset loaded", "*" * 16)
def main():
    # load train dataset
    data = load_coco_data(data_path='./data/coco_data', split='train')
    word_to_idx = data['word_to_idx']
    # load val dataset to print out bleu scores every epoch
    val_data = load_coco_data(data_path='./data/coco_data', split='val')

    model = CaptionGenerator(word_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=16,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    solver = CaptioningSolver(model,
                              data,
                              val_data,
                              n_epochs=20,
                              batch_size=128,
                              update_rule='adam',
                              learning_rate=0.001,
                              print_every=1000,
                              save_every=10,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/preview_model',
                              test_model='model/lstm/model-10',
                              print_bleu=True,
                              log_path='log/preview_model_log/')

    solver.train()
示例#6
0
def main():

    data = load_coco_data(data_path='./data', split='val', if_train=True)
    word_to_idx = data['word_to_idx']
    model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512,
                             dim_hidden=1024, n_time_step=16, prev2out=True,
                             ctx2out=True, alpha_c=1.0, selector=True, dropout=True)
    solver = CaptioningSolver(model, data, data, n_epochs=10, batch_size=100, update_rule='adam',
                              learning_rate=0.001, print_every=1000, save_every=5, image_path='./image/',
                              pretrained_model=None, model_path='model/lstm_hard/', test_model='model/lstm_hard/model-40',
                              print_bleu=True, log_path='log/')

    test_data = load_coco_data(
        data_path='./data', split='test', if_train=False)
    solver.test(test_data, split='test')
示例#7
0
def main():

    val_data = load_coco_data(data_path='./data', split='val')

    with open(os.path.join('data/train/word_to_idx.pkl'), 'rb') as f:
        word_to_idx = pickle.load(f)
    model = CaptionGenerator(word_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=21,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    solver = CaptioningSolver(
        model,
        val_data,
        n_epochs=100,
        batch_size=128,
        update_rule='adam',
        learning_rate=0.0012,
        print_every=100,
        save_every=5,
        image_path='./image/',
        pretrained_model='train_batch/model0.001/model.ckpt-30',
        model_path='train_batch/model0.002/',
        test_model=None,
        print_bleu=True,
        log_path='train_batch/log/')

    solver.train()
示例#8
0
def main():
    data = load_coco_data(data_path='./data', split='val')
    with open('./data/train/word_to_idx.pkl', 'rb') as f:
        word_to_idx = pickle.load(f)
        model = CaptionGenerator(word_to_idx,
                                 dim_feature=[196, 512],
                                 dim_embed=512,
                                 dim_hidden=1024,
                                 n_time_step=16,
                                 prev2out=True,
                                 ctx2out=True,
                                 alpha_c=1.0,
                                 selector=True,
                                 dropout=True)
        solver = CaptioningSolver(model,
                                  data,
                                  data,
                                  n_epochs=20,
                                  batch_size=128,
                                  update_rule='adam',
                                  learning_rate=0.001,
                                  print_every=1000,
                                  save_every=1,
                                  image_path='./image/val2014_resized',
                                  pretrained_model=None,
                                  model_path='model/lstmval/',
                                  test_model='model/lstm/model-10',
                                  print_bleu=True,
                                  log_path='log/')

        #solver.test(data, split='val')
        solver.test(data, split='test')
示例#9
0
def main():
    # load train dataset
    data = load_coco_data(data_path='./data', split='train')
    word_to_idx = data['word_to_idx']
    # load val dataset to print out bleu scores every epoch
    val_data = load_coco_data(data_path='./data', split='val')

    model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512,
                                       dim_hidden=1024, n_time_step=16, prev2out=True, 
                                                 ctx2out=True, alpha_c=1.0, selector=True, dropout=True)

    solver = CaptioningSolver(model, data, val_data, n_epochs=20, batch_size=128, update_rule='adam',
                                          learning_rate=0.001, print_every=1000, save_every=1, image_path='./image/',
                                    pretrained_model=None, model_path='model/lstm/', test_model='model/lstm/model-10',
                                     print_bleu=True, log_path='log/')

    solver.train()
示例#10
0
def main():
    # load train dataset
    data = load_coco_data(data_path='./data', split='train')
    word_to_idx = data['word_to_idx']
    # load val dataset to print out bleu scores every epoch
    val_data = load_coco_data(data_path='./data', split='val')

    model = CaptionGenerator(word_to_idx, dim_feature=[121, 1536], dim_embed=512,
                             dim_hidden=1024, n_time_step=26, prev2out=True,
                             ctx2out=True, alpha_c=1.0, selector=True, dropout=True)

    solver = CaptioningSolver(model, data, val_data, n_epochs=50000, batch_size=64, update_rule='adam',
                              learning_rate=0.00005, print_every=500, save_every=1, image_path='./image/',
                              pretrained_model=None, model_path='./model/rl_att_ciderD/',
                              test_model='./model/att/model-10', n_batches=10000,
                              print_bleu=True, log_path='./log/')

    solver.train()
示例#11
0
def main():
    # load train dataset
    # data = load_coco_data(data_path='./our_data', split='train')
    # word_to_idx = data['word_to_idx']
    # # load val dataset to print out bleu scores every epoch
    # test_data = load_coco_data(data_path='./our_data', split='test')
    #our train:
    data = load_coco_data(
        data_path='.\image_data_to_be_labeled\Object_feature\our_data',
        split='train')
    our_test = load_coco_data(
        data_path='.\image_data_to_be_labeled\Object_feature\our_data',
        split='train')
    word_to_idx = data['word_to_idx']
    model = CaptionGenerator(word_to_idx,
                             dim_feature=[216, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=26,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=False)

    solver = CaptioningSolver(model,
                              data,
                              our_test,
                              n_epochs=5000,
                              batch_size=64,
                              update_rule='adam',
                              learning_rate=1e-4,
                              print_every=1000,
                              save_every=100,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/our_train0414/',
                              test_model='model/our_train0414/model-2000',
                              print_bleu=False,
                              log_path='log/')

    # solver.train()
    solver.test(our_test)
示例#12
0
文件: test.py 项目: leviswind/img_cap
def main():
  # load train dataset
  data = load_coco_data(data_path='./data', split='debug')
  word_to_idx = data['word_to_idx']

  # load val dataset to print out bleu scores every epoch
  # val_data = load_coco_data(data_path='./data', split='val')
  # val_data = second_process(10,16, val_data)

    model = CaptionGenerator(word_to_idx, dim_feature=[121, 1536], dim_embed=512,
                             dim_hidden=1024, n_time_step=26, prev2out=True,
                             ctx2out=True, alpha_c=1.0, selector=True, dropout=True)
示例#13
0
def main(params):
    batch_size = params['batch_size']
    n_epochs = params['epoch']
    n_time_step = params['n_time_step']
    learning_rate = params['lr']
    model_path = params['model_path']
    log_path = params['log_path']

    data = load_coco_data(data_path='./data', split='train')
    word_to_idx = data['word_to_idx']
    val_data = load_coco_data(data_path='./data', split='val')

    model = CaptionGenerator(word_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=256,
                             dim_hidden=1024,
                             n_time_step=n_time_step,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    solver = CaptioningSolver(model,
                              data,
                              val_data,
                              n_epochs=n_epochs,
                              batch_size=batch_size,
                              update_rule='adam',
                              learning_rate=learning_rate,
                              print_every=3000,
                              save_every=1,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path=model_path,
                              test_model='./model/lstm/model-10',
                              print_bleu=True,
                              log_path=log_path)

    solver.train()
示例#14
0
def main():
    # load train dataset
    print "start loading data"
    data = load_coco_data(data_path='./data', split='train')
    word_to_idx = data['word_to_idx']
    # load val dataset to print out bleu scores every epoch
    val_data = load_coco_data(data_path='./data', split='val')
    print "data loaded"
    # Elapse time: 15.95

    model = CaptionGenerator(
        word_to_idx,
        dim_feature=[196, 512],
        dim_embed=512,
        dim_hidden=1024,
        n_time_step=16,
        prev2out=True,  # n_time_step is max length + 1
        ctx2out=True,
        alpha_c=1.0,
        selector=True,
        dropout=True)

    solver = CaptioningSolver(model,
                              data,
                              val_data,
                              n_epochs=20,
                              batch_size=128,
                              update_rule='adam',
                              learning_rate=0.001,
                              print_every=1000,
                              save_every=1,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/lstm/',
                              test_model='model/lstm/model-10',
                              print_bleu=True,
                              log_path='log/')
    # change model path accordingly

    solver.train()
示例#15
0
def main():
    # load train dataset
    data = load_coco_data(data_path='./new_data', split='train')
    word_to_idx = data['word_to_idx']
    # load val dataset to print out bleu scores every epoch
    val_data = load_coco_data(data_path='./new_data', split='val')

    model = CaptionGenerator(word_to_idx,
                             dim_att=[4, 512],
                             dim_feature=[196, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=16,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    idx_to_word = {v: k for k, v in word_to_idx.iteritems()}

    solver = CaptioningSolver(model,
                              data,
                              val_data,
                              idx_to_word,
                              n_epochs=15,
                              batch_size=64,
                              update_rule='adam',
                              learning_rate=0.001,
                              print_every=50,
                              save_every=5,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/lstm2/',
                              test_model='model/lstm2/model-15',
                              print_bleu=True,
                              log_path='log/')

    solver.train()
def main():
    # load train dataset
    data = load_coco_data(data_path='./data', split='train')
    word_to_idx = data['word_to_idx']

    model = CaptionGenerator(word_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=16,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    discrim = Discriminator(word_to_idx,
                            dim_feature=[196, 512],
                            dim_embed=512,
                            dim_hidden=1024,
                            n_time_step=16,
                            prev2out=True,
                            ctx2out=True,
                            alpha_c=0.0,
                            selector=True,
                            dropout=True,
                            learning_rate=0.01)

    solver = CaptioningSolver(model,
                              discrim,
                              data,
                              data,
                              n_epochs=20,
                              batch_size=128,
                              gpu_list="1,2,3",
                              update_rule='adam',
                              learning_rate=0.001,
                              print_every=1000,
                              save_every=1,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='model/lstm/',
                              train_new='./model/lstm/model-20',
                              test_model='model/lstm/model-21',
                              print_bleu=True,
                              log_path='log/')

    solver.train()
示例#17
0
def test(toy=None):
    if toy == True:
        toy = "toy_"
    else:
        toy =""
    data_path = os.path.join('./data', 'train')
    with open(os.path.join(data_path, '%sword_to_idx.pkl' % toy), 'rb') as f:
        word_to_idx = pickle.load(f)

    val_data = load_coco_data(data_path='./data', split='val', toy=toy)
    model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512,
                             dim_hidden=512, n_time_step=16, prev2out=True,
                             ctx2out=True, alpha_c=1.0, selector=True, dropout=True)
    solver = CaptioningSolver(model, None, val_data, n_epochs=20, batch_size=128, update_rule='adam',
                              learning_rate=0.001, print_every=1000, save_every=1, image_path='./image/',
                              pretrained_model=None, model_path='model/HighwayLSTM01_lstm/', test_model='model/HighwayLSTM01_lstm/model-20',
                              print_bleu=True, log_path='log/')

    solver.test(val_data)
示例#18
0
def main():
    # load dataset and vocab
    data = load_coco_data(data_path='./data', split=FLAGS.split)
    with open('./data/train/word_to_idx.pkl', 'rb') as f:
        word_to_idx = pickle.load(f)

    model = CaptionGenerator(
        word_to_idx,
        dim_feature=[FLAGS.image_feature_size, FLAGS.image_feature_depth],
        dim_embed=FLAGS.embed_dim,
        dim_hidden=FLAGS.lstm_hidden_size,
        n_time_step=FLAGS.time_steps,
        prev2out=FLAGS.prev2out,
        ctx2out=FLAGS.ctx2out,
        alpha_c=1.0,
        enable_selector=FLAGS.enable_selector,
        dropout=FLAGS.dropout)

    solver = CaptioningSolver(model,
                              batch_size=FLAGS.batch_size,
                              test_checkpoint=FLAGS.test_checkpoint)

    solver.test(data, beam_size=3, attention_visualization=FLAGS.att_vis)
def main():
	# load train dataset
	data = load_coco_data(data_path='./data', split='train')
	word_to_idx = data['word_to_idx']

	model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512,
							dim_hidden=1024, n_time_step=16, prev2out=True,
							ctx2out=True, alpha_c=1.0, selector=True, dropout=True)

	dis_filter_sizes = [1, 2, 3, 4, 5, 6, 7, 8, 12, 16]
	dis_num_filters = [100, 200, 200, 200, 200, 100, 100, 100, 100, 100]
	dis_l2_reg_lambda = 0.2

	discrim = Discriminator(sequence_length=16, num_classes=2, vocab_size=len(word_to_idx),
            embedding_size=128, filter_sizes=dis_filter_sizes, num_filters=dis_num_filters, l2_reg_lambda=dis_l2_reg_lambda)

	solver = CaptioningSolver(model, discrim, data, data, n_epochs=20, batch_size=64, gpu_list="0,1,2", update_rule='adam',
								learning_rate=0.0025, print_every=20, save_every=1, image_path='./image/',
								pretrained_model=None, model_path='model/lstm/', train_new=None,
								test_model='model/lstm/model-42',
								print_bleu=False, log_path='log/', num_rollout=10)

	solver.train_adversarial()
示例#20
0
def main():
    # load train dataset
    data = load_coco_data(data_path='./data', split='debug')
    word_to_idx = data['word_to_idx']

    # load val dataset to print out bleu scores every epoch
    # val_data = load_coco_data(data_path='./data', split='val')
    # val_data = second_process(10,16, val_data)

    model = CaptionGenerator(word_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=256,
                             dim_hidden=256,
                             n_time_step=10,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)

    solver = CaptioningSolver(model,
                              data,
                              data,
                              n_epochs=50,
                              batch_size=128,
                              update_rule='adam',
                              learning_rate=0.001,
                              print_every=500,
                              save_every=1,
                              image_path='./image/',
                              pretrained_model=None,
                              model_path='./model/lstm/',
                              test_model='./model/lstm/model-5',
                              print_bleu=True,
                              log_path='./log/')

    solver.test(data, split='val')
示例#21
0
from core.solver import CaptioningSolver
from core.model import CaptionGenerator
from core.utils import load_coco_data
from core.bleu import evaluate

get_ipython().magic(u'matplotlib inline')
plt.rcParams['figure.figsize'] = (8.0, 6.0)  # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

get_ipython().magic(u'load_ext autoreload')
get_ipython().magic(u'autoreload 2')

# In[2]:

data = load_coco_data(data_path='./data', split='val')
with open('./data/train/word_to_idx.pkl', 'rb') as f:
    word_to_idx = pickle.load(f)

# In[3]:

model = CaptionGenerator(word_to_idx,
                         dim_feature=[196, 512],
                         dim_embed=512,
                         dim_hidden=1500,
                         n_time_step=16,
                         prev2out=True,
                         ctx2out=True,
                         alpha_c=1.0,
                         selector=True,
                         dropout=True)
示例#22
0
def main():
    batch_size = 32
    val_batch_size = 12
    save_every = 1
    #pretrained_model = None
    with open('./data/train/word_to_idx.pkl', 'rb') as f:
        word_to_idx = pickle.load(f)
    model_path = 'model_residue_cascade_attention_detect_10/'
    # load val dataset to print out bleu scores every epoch
    #
    #word_to_idx =1
    sess = tf.Session()
    model = CaptionGenerator(sess,
                             word_to_idx,
                             dim_feature=[49, 2048],
                             dim_embed=512,
                             dim_hidden=512,
                             n_time_step=21,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)
    n_examples = 117208
    val_data = load_coco_data(data_path='./data', split='val')
    n_iters_per_epoch = int(np.ceil(float(n_examples) / batch_size))
    with open('./data/train/train.captions.pkl', 'rb') as f:
        captions = pickle.load(f)
    with open('./data/train/train.image.idxs.pkl', 'rb') as f:
        image_idxs = pickle.load(f)
    print image_idxs
    val_features = val_data['features']
    print val_features.shape[0]
    n_iters_val = int(np.ceil(float(val_features.shape[0]) / val_batch_size))
    model.build()
    saver = tf.train.Saver()
    #variables = slim.get_variables_to_restore()
    #variables_to_restore = [v for v in variables if string.find(v.name, 'discriminator') == -1]
    #saver = tf.train.Saver(variables_to_restore)
    #if pretrained_model is not None:
    #saver = tf.train.import_meta_graph('./model_residue/model-10.meta')
    #   saver.restore(sess, pretrained_model)
    print 'start pre-traininig'
    for epoch in xrange(1, 10 + 1):
        rand_idxs = np.random.permutation(n_examples)
        captions = captions[rand_idxs]
        image_idxs = image_idxs[rand_idxs]
        for step in xrange(1, n_iters_per_epoch + 1):
            captions_batch = captions[step * batch_size:(step + 1) *
                                      batch_size]
            image_idxs_batch = image_idxs[step * batch_size:(step + 1) *
                                          batch_size]
            features_batch = np.empty((batch_size, 49, 2048))
            j = 0
            for i in image_idxs_batch:
                features_single = hickle.load('./data_residue_single/train/' +
                                              'train_' + str(i) +
                                              '.features.hkl')
                features_batch[j, :] = features_single
                j = j + 1

            features_detect_batch = np.empty((batch_size, 10, 4096))
            j = 0
            for i in image_idxs_batch:
                features_detect_single = hickle.load(
                    './data_residue_detect/train/' + 'train_' + str(i) +
                    '.features.hkl')
                features_detect_single = features_detect_single[-10:, :]
                features_detect_batch[j, :] = features_detect_single
                j = j + 1

            if captions_batch.shape[0] == batch_size:
                model.pre_train_batch(features_batch, features_detect_batch,
                                      captions_batch)
            if step % 10 == 0:
                print 'epoch', epoch
                print 'step', step
            if step % 512 == 0:
                all_gen_cap = np.ndarray((val_features.shape[0], 30))
                for i in range(n_iters_val):
                    features_batch = val_features[i * val_batch_size:(i + 1) *
                                                  val_batch_size]
                    val_detect_batch = np.empty(
                        (len(features_batch), 10, 4096))
                    m = 0
                    for j in range(i * val_batch_size,
                                   (i + 1) * val_batch_size):
                        val_detect_single = hickle.load(
                            './data_residue_detect/val/' + 'val_' + str(j) +
                            '.features.hkl')
                        val_detect_single = val_detect_single[-10:, :]
                        val_detect_batch[m, :] = val_detect_single
                        m = m + 1
                    _, _, _, _, gen_cap = model.generate(
                        features_batch, val_detect_batch)
                    all_gen_cap[i * val_batch_size:(i + 1) *
                                val_batch_size] = gen_cap
                all_decoded = decode_captions(all_gen_cap, model.idx_to_word)
                save_pickle(all_decoded,
                            "./data/val/val.candidate.captions.pkl")
                scores = evaluate(data_path='./data',
                                  split='val',
                                  get_scores=True)
                write_bleu(scores=scores, path=model_path, epoch=epoch)
                print "generative captions:%s\n" % all_decoded[0]

        if epoch % save_every == 0:
            saver.save(sess,
                       os.path.join(model_path, 'model'),
                       global_step=epoch)
            print "model-%s saved." % (epoch)
    print 'start reinforcement learning!'

    for epoch in xrange(1, 0 + 1):
        rand_idxs = np.random.permutation(n_examples)
        captions = captions[rand_idxs]
        image_idxs = image_idxs[rand_idxs]
        for step in xrange(1, n_iters_per_epoch + 1):
            captions_batch = captions[step * batch_size:(step + 1) *
                                      batch_size]
            image_idxs_batch = image_idxs[step * batch_size:(step + 1) *
                                          batch_size]
            features_batch = features[image_idxs_batch]
            if captions_batch.shape[0] == batch_size:
                #gen_cap = model.generate(features_batch)
                #decoded_cap = decode_captions(gen_cap, model.idx_to_word)
                #decoded_reference = decode_captions(captions_batch, model.idx_to_word)
                #scores = evaluate_part(candidate = decoded_cap, split = 'train', idx = image_idxs_batch, get_scores=True)
                #reward = (0.5*scores['Bleu_1']  + 0.5*scores['Bleu_2'] + scores['Bleu_3'] + scores['Bleu_4'])/3
                #print reward
                #reward = 1
                t = model.train_batch(features_batch, captions_batch)
            if step % 10 == 0:
                print 'epoch', epoch
                print 'step', step
                print 'time', t
            if step % 1024 == 0:
                ground_truths = captions[image_idxs == image_idxs_batch[0]]
                decoded = decode_captions(ground_truths, model.idx_to_word)
                for j, gt in enumerate(decoded):
                    print "Ground truth %d: %s" % (j + 1, gt)
                gen_caps = model.generate(features_batch)
                decoded = decode_captions(gen_caps, model.idx_to_word)
                print "Generated caption: %s\n" % decoded[0]

            if step % 1024 == 0:
                all_gen_cap = np.ndarray((val_features.shape[0], 30))
                for i in range(n_iters_val):
                    features_batch = val_features[i * batch_size:(i + 1) *
                                                  batch_size]
                    feed_dict = features_batch
                    gen_cap = model.generate(feed_dict)
                    all_gen_cap[i * batch_size:(i + 1) * batch_size] = gen_cap
                all_decoded = decode_captions(all_gen_cap, model.idx_to_word)
                save_pickle(all_decoded,
                            "./data/val/val.candidate.captions.pkl")
                scores = evaluate(data_path='./data',
                                  split='val',
                                  get_scores=True)
                write_bleu(scores=scores, path=model_path, epoch=epoch)
                #print "generative captions:%s\n"%all_decoded[0]
        if epoch % save_every == 0:
            saver.save(sess,
                       os.path.join(model_path, 'reinforcemodel'),
                       global_step=epoch)
            print "model-%s saved." % (epoch)
def main():

    # load train dataset
    # data = load_coco_data(data_path='./data', split='train0', if_train=True)
    # word_to_idx = data['word_to_idx']
    # # load val dataset to print out bleu scores every epoch
    # val_data = load_coco_data(data_path='./data', split='val', if_train=False)
    # model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512,
    #                          dim_hidden=1024, n_time_step=16, prev2out=True,
    #                          ctx2out=True, alpha_c=1.0, selector=True, dropout=True)
    # solver = CaptioningSolver(model, data, val_data, n_epochs=10, batch_size=100, update_rule='adam',
    #                           learning_rate=0.001, print_every=1000, save_every=5, image_path='./image/',
    #                           pretrained_model=None, model_path='model/lstm_hard/', test_model='model/lstm_hard/model-10',
    #                           print_bleu=True, log_path='log_hard/')
    # solver.train(chunk=0)

    # data = load_coco_data(data_path='./data', split='train1', if_train=True)
    # word_to_idx = data['word_to_idx']
    # val_data = load_coco_data(data_path='./data', split='val', if_train=False)
    # model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512,
    #                          dim_hidden=1024, n_time_step=16, prev2out=True,
    #                          ctx2out=True, alpha_c=1.0, selector=True, dropout=True)
    # solver = CaptioningSolver(model, data, val_data, n_epochs=10, batch_size=100, update_rule='adam',
    #                           learning_rate=0.001, print_every=1000, save_every=5, image_path='./image/',
    #                           pretrained_model='model/lstm_hard/model-10', model_path='model/lstm_hard/', test_model='model/lstm_hard/model-20',
    #                           print_bleu=True, log_path='log_hard/')
    # solver.train(chunk=1)

    # data = load_coco_data(data_path='./data', split='train2', if_train=True)
    # word_to_idx = data['word_to_idx']
    # val_data = load_coco_data(data_path='./data', split='val', if_train=False)
    # model = CaptionGenerator(word_to_idx, dim_feature=[196, 512], dim_embed=512,
    #                          dim_hidden=1024, n_time_step=16, prev2out=True,
    #                          ctx2out=True, alpha_c=1.0, selector=True, dropout=True)
    # solver = CaptioningSolver(model, data, val_data, n_epochs=10, batch_size=100, update_rule='adam',
    #                           learning_rate=0.001, print_every=1000, save_every=5, image_path='./image/',
    #                           pretrained_model='model/lstm_hard/model-20', model_path='model/lstm_hard/', test_model='model/lstm_hard/model-30',
    #                           print_bleu=True, log_path='log_hard/')
    # solver.train(chunk=2)

    data = load_coco_data(data_path='./data', split='train3', if_train=True)
    word_to_idx = data['word_to_idx']
    val_data = load_coco_data(data_path='./data', split='val', if_train=False)
    model = CaptionGenerator(word_to_idx,
                             dim_feature=[196, 512],
                             dim_embed=512,
                             dim_hidden=1024,
                             n_time_step=16,
                             prev2out=True,
                             ctx2out=True,
                             alpha_c=1.0,
                             selector=True,
                             dropout=True)
    solver = CaptioningSolver(model,
                              data,
                              val_data,
                              n_epochs=10,
                              batch_size=100,
                              update_rule='adam',
                              learning_rate=0.001,
                              print_every=1000,
                              save_every=5,
                              image_path='./image/',
                              pretrained_model='model/lstm_hard/model-30',
                              model_path='model/lstm_hard/',
                              test_model='model/lstm_hard/model-40',
                              print_bleu=True,
                              log_path='log_hard/')
    solver.train(chunk=3)
@author: xz
"""

import matplotlib.pyplot as plt
import cPickle as pickle
import tensorflow as tf
from core.solver import CaptioningSolver
from core.model import CaptionGenerator
from core.utils import load_coco_data
from core.bleu import evaluate

plt.rcParams['figure.figsize'] = (8.0, 6.0)  # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

data = load_coco_data(data_path='./data/coco_data/', split='test')
with open('./data/coco_data/train/word_to_idx.pkl', 'rb') as f:
    word_to_idx = pickle.load(f)

#print '~~~~~~~~~~~~~~~~~~~~~~~'
#
#for i in range(data['features'].shape[0]):
#
#    if data['file_names'][i] =='image/train2014_resized/COCO_train2014_000000013140.jpg':
#        print i
#        print data['file_names'][i]
#print data['file_names'][1813]

model = CaptionGenerator(word_to_idx,
                         dim_feature=[196, 512],
                         dim_embed=512,
示例#25
0
    def train(self):
        # train/val dataset
        #        n_examples = self.data['captions'].shape[0]
        #        n_iters_per_epoch = int(np.ceil(float(n_examples)/self.batch_size))
        #        features = self.data['features']
        #        captions = self.data['captions']
        #        image_idxs = self.data['image_idxs']
        #        val_features = self.val_data['features']

        val_features = self.val_data['features']
        n_iters_val = int(
            np.ceil(float(val_features.shape[0]) / self.batch_size))
        # build graphs for training model and sampling captions
        loss = self.model.build_model()
        #        tf.get_variable_scope().reuse_variables()
        #        _, _, generated_captions = self.model.build_sampler(max_len=20)
        #
        #        # train op
        #        with tf.name_scope('optimizer'):
        #            optimizer = self.optimizer(learning_rate=self.learning_rate)
        #            grads = tf.gradients(loss, tf.trainable_variables())
        #            grads_and_vars = list(zip(grads, tf.trainable_variables()))
        #            train_op = optimizer.apply_gradients(grads_and_vars=grads_and_vars)

        with tf.variable_scope(tf.get_variable_scope()) as scope:
            with tf.name_scope('optimizer'):
                tf.get_variable_scope().reuse_variables()
                _, _, generated_captions = self.model.build_sampler(max_len=20)
                optimizer = self.optimizer(learning_rate=self.learning_rate)
                grads = tf.gradients(loss, tf.trainable_variables())
                grads_and_vars = list(zip(grads, tf.trainable_variables()))
        train_op = optimizer.apply_gradients(grads_and_vars=grads_and_vars)

        # summary op
        tf.summary.scalar('batch_loss', loss)
        for var in tf.trainable_variables():
            tf.summary.histogram(var.op.name, var)
        for grad, var in grads_and_vars:
            tf.summary.histogram(var.op.name + '/gradient', grad)

        summary_op = tf.summary.merge_all()

        print "The number of epoch: %d" % self.n_epochs
        #print "Data size: %d" %n_examples
        print "Batch size: %d" % self.batch_size
        #print "Iterations per epoch: %d" %n_iters_per_epoch

        config = tf.ConfigProto(allow_soft_placement=True)
        #os.environ["CUDA_VISIBLE_DEVICES"] = '1'
        #config.gpu_options.per_process_gpu_memory_fraction=0.9
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            init = tf.global_variables_initializer()
            sess.run(init)
            summary_writer = tf.summary.FileWriter(
                self.log_path, graph=tf.get_default_graph())
            saver = tf.train.Saver(max_to_keep=20)

            if self.pretrained_model is not None:
                print "Start training with pretrained Model.."
                saver.restore(sess, self.pretrained_model)

            prev_loss = -1
            curr_loss = 0
            start_t = time.time()

            for e in range(self.n_epochs):
                for part_num in range(5):
                    data = load_coco_data(data_path='./data',
                                          split='train',
                                          batch=part_num)
                    n_examples = data['captions'].shape[0]
                    n_iters_per_epoch = int(
                        np.ceil(float(n_examples) / self.batch_size))
                    features = data['features']
                    captions = data['captions']
                    image_idxs = data['image_idxs']

                    rand_idxs = np.random.permutation(n_examples)
                    captions = captions[rand_idxs]
                    image_idxs = image_idxs[rand_idxs]

                    for i in range(n_iters_per_epoch):
                        captions_batch = captions[i * self.batch_size:(i + 1) *
                                                  self.batch_size]
                        image_idxs_batch = image_idxs[i *
                                                      self.batch_size:(i + 1) *
                                                      self.batch_size]
                        features_batch = features[image_idxs_batch]
                        feed_dict = {
                            self.model.features: features_batch,
                            self.model.captions: captions_batch
                        }
                        _, l = sess.run([train_op, loss], feed_dict)
                        curr_loss += l

                        # write summary for tensorboard visualization
                        if i % 10 == 0:
                            summary = sess.run(summary_op, feed_dict)
                            summary_writer.add_summary(
                                summary, e * n_iters_per_epoch + i)

                        if (i + 1) % self.print_every == 0:
                            print "\nTrain loss at epoch %d & part %d & iteration %d (mini-batch): %.5f" % (
                                e + 1, part_num + 1, i + 1, l)
                            ground_truths = captions[image_idxs ==
                                                     image_idxs_batch[0]]
                            decoded = decode_captions(ground_truths,
                                                      self.model.idx_to_word)
                            for j, gt in enumerate(decoded):
                                print "Ground truth %d: %s" % (j + 1, gt)
                            gen_caps = sess.run(generated_captions, feed_dict)
                            decoded = decode_captions(gen_caps,
                                                      self.model.idx_to_word)
                            print "Generated caption: %s\n" % decoded[0]

                    del data

                print "Previous epoch loss: ", prev_loss
                print "Current epoch loss: ", curr_loss
                print "Elapsed time: ", time.time() - start_t
                prev_loss = curr_loss
                curr_loss = 0

                # print out BLEU scores and file write
                if self.print_bleu:
                    all_gen_cap = np.ndarray((val_features.shape[0], 20))
                    for i in range(n_iters_val):
                        features_batch = val_features[i *
                                                      self.batch_size:(i + 1) *
                                                      self.batch_size]
                        feed_dict = {self.model.features: features_batch}
                        gen_cap = sess.run(generated_captions,
                                           feed_dict=feed_dict)
                        all_gen_cap[i * self.batch_size:(i + 1) *
                                    self.batch_size] = gen_cap

                    all_decoded = decode_captions(all_gen_cap,
                                                  self.model.idx_to_word)
                    captions2json.captions2json(
                        all_decoded, self.val_data['file_names'],
                        './data/val/val_cadidate_captions_json.json')
                    compute_m1(
                        json_predictions_file=
                        './data/val/val_cadidate_captions_json.json',
                        reference_file='./data/val/val_references_json.json')

                # save model's parameters
                if (e + 1) % self.save_every == 0:

                    saver.save(sess,
                               os.path.join(self.model_path, 'model.ckpt'),
                               global_step=e + 1)
                    print "model-%s saved." % (e + 1)