Python COCOScorer.score примеры использования

Язык программирования: Python

Пространство имен/Пакет: cocoeval

Класс/Тип: COCOScorer

Метод/Функция: score

Примеров на hotexamples.com: 14

Python COCOScorer.score - 14 примеров найдено. Это лучшие примеры Python кода для cocoeval.COCOScorer.score, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

COCOScorer(20)

score(7)

Пример #1

Показать файл

Файл: Att.py Проект: KuoHaoZeng/VH

def test(model_path='models/model-900', video_feat_path=video_feat_path):
    meta_data, train_data, val_data, test_data = get_video_data_jukin(video_data_path_train, video_data_path_val, video_data_path_test)
    test_data = val_data
    ixtoword = pd.Series(np.load('./data'+str(gpu_id)+'/ixtoword.npy').tolist())

    model = Video_Caption_Generator(
            dim_image=dim_image,
            n_words=len(ixtoword),
            dim_hidden=dim_hidden,
            batch_size=batch_size,
            n_lstm_steps=n_frame_step,
	    drop_out_rate = 0,
            bias_init_vector=None)

    video_tf, video_mask_tf, caption_tf, lstm3_variables_tf = model.build_generator()
    sess = tf.InteractiveSession(config=tf.ConfigProto(allow_soft_placement=True))

    with tf.device("/cpu:0"):
	    saver = tf.train.Saver()
	    saver.restore(sess, model_path)
    
    for ind, row in enumerate(lstm3_variables_tf):
        if ind % 4 == 0:
                assign_op = row.assign(tf.mul(row,1-0.5))
                sess.run(assign_op)
    
    [pred_sent, gt_sent] = testing_all(sess, test_data, ixtoword,video_tf, video_mask_tf, caption_tf)
    #np.savez('Att_result/'+model_path.split('/')[1],gt = gt_sent,pred=pred_sent)
    scorer = COCOScorer()
    total_score = scorer.score(gt_sent, pred_sent, range(len(pred_sent)))
    return total_score

Пример #2

Показать файл

Файл: HS.py Проект: KuoHaoZeng/VH

def test(model_path='models/model-900', video_feat_path=video_feat_path):
    meta_data, train_data, test_data = get_video_data_jukin(video_data_path_train, video_data_path_test)
    #test_data = train_data
    ixtoword = pd.Series(np.load('./data/ixtoword.npy').tolist())

    model = Video_Caption_Generator(
            dim_image=dim_image,
            n_words=len(ixtoword),
            dim_hidden=dim_hidden,
            batch_size=batch_size,
            n_lstm_steps=n_frame_step,
	    drop_out_rate = 0,
            bias_init_vector=None)

    video_tf, video_mask_tf, video_len_tf, HLness_tf, caption_tf, HLness_att_mask_tf, lstmRNN_variables_tf, lstm3_variables_tf = model.build_generator()
    sess = tf.InteractiveSession()

    saver = tf.train.Saver()
    saver.restore(sess, model_path)
    for ind, row in enumerate(lstmRNN_variables_tf):
	if ind % 4 == 0:
		assign_op = row.assign(tf.mul(row,1-0.5))
		sess.run(assign_op)
    for ind, row in enumerate(lstm3_variables_tf):
	if ind % 4 == 0:
		assign_op = row.assign(tf.mul(row,1-0.5))
		sess.run(assign_op)

    [mp, pred_sent, gt_sent, HLness] = testing_all(sess, test_data, ixtoword,video_tf, video_mask_tf, video_len_tf, HLness_tf, caption_tf, HLness_att_mask_tf)
    np.savez('HS_result/'+model_path.split('/')[1],gt = gt_sent,pred=pred_sent,mp=mp,HLness=HLness)
    total_score = np.mean(mp)
    print model_path.split('/')[1]+' mAP: ' + str(total_score)
    scorer = COCOScorer()
    total_score = scorer.score(gt_sent, pred_sent, range(len(pred_sent)))
    return total_score

Пример #3

Показать файл

Файл: metrics.py Проект: MichaelXin/arctic-capgen-vid

def score_with_cocoeval(samples_valid, samples_test, engine):
    scorer = COCOScorer()
    if samples_valid:
        gts_valid = OrderedDict()
        for vidID in engine.valid_ids:
            gts_valid[vidID] = engine.CAP[vidID]
        valid_score = scorer.score(gts_valid, samples_valid, engine.valid_ids)
    else:
        valid_score = None
    if samples_test:
        gts_test = OrderedDict()
        for vidID in engine.test_ids:
            gts_test[vidID] = engine.CAP[vidID]
        test_score = scorer.score(gts_test, samples_test, engine.test_ids)
    else:
        test_score = None
    return valid_score, test_score

Пример #4

Показать файл

Файл: Att.py Проект: KuoHaoZeng/VH

def train():
    meta_data, train_data, val_data, test_data = get_video_data_jukin(video_data_path_train, video_data_path_val, video_data_path_test)
    captions = meta_data['Description'].values
    captions = map(lambda x: x.replace('.', ''), captions)
    captions = map(lambda x: x.replace(',', ''), captions)
    wordtoix, ixtoword, bias_init_vector = preProBuildWordVocab(captions, word_count_threshold=1)

    np.save('./data'+str(gpu_id)+'/ixtoword', ixtoword)

    model = Video_Caption_Generator(
            dim_image=dim_image,
            n_words=len(wordtoix),
            dim_hidden=dim_hidden,
            batch_size=batch_size,
            n_lstm_steps=n_frame_step,
	    drop_out_rate = 0.5,
            bias_init_vector=None)

    tf_loss, tf_video, tf_video_mask, tf_caption, tf_caption_mask= model.build_model()
    sess = tf.InteractiveSession(config=tf.ConfigProto(allow_soft_placement=True))

    with tf.device("/cpu:0"):
    	saver = tf.train.Saver(max_to_keep=100)
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(tf_loss)
    tf.initialize_all_variables().run()
    saver.restore(sess, 'models_Att_update_new/model-30')

    tStart_total = time.time()
    for epoch in range(n_epochs):
        index = np.arange(len(train_data))
        np.random.shuffle(index)
        train_data = train_data[index]

	tStart_epoch = time.time()
	loss_epoch = np.zeros(len(train_data))
        for current_batch_file_idx in xrange(len(train_data)):

	    tStart = time.time()
	    current_batch = h5py.File(train_data[current_batch_file_idx])
            current_feats = np.zeros((batch_size, n_frame_step, dim_image))
            current_video_masks = np.zeros((batch_size, n_frame_step))
	    current_video_len = np.zeros(batch_size)
	    for ind in xrange(batch_size):
		current_feats[ind,:,:] = current_batch['data'][:,ind,:]
		idx = np.where(current_batch['label'][:,ind] != -1)[0]
		if len(idx) == 0:
			continue
		current_video_masks[ind,:idx[-1]+1] = 1

            current_captions = current_batch['title']
            current_caption_ind = map(lambda cap: [wordtoix[word] for word in cap.lower().split(' ') if word in wordtoix], current_captions)

            current_caption_matrix = sequence.pad_sequences(current_caption_ind, padding='post', maxlen=16-1)
            current_caption_matrix = np.hstack( [current_caption_matrix, np.zeros( [len(current_caption_matrix),1]) ] ).astype(int)
            current_caption_masks = np.zeros((current_caption_matrix.shape[0], current_caption_matrix.shape[1]))
            nonzeros = np.array( map(lambda x: (x != 0).sum()+1, current_caption_matrix ))

            for ind, row in enumerate(current_caption_masks):
                row[:nonzeros[ind]] = 1

            _, loss_val = sess.run(
                    [train_op, tf_loss],
                    feed_dict={
                        tf_video: current_feats,
                        tf_video_mask : current_video_masks,
                        tf_caption: current_caption_matrix,
                        tf_caption_mask: current_caption_masks
                        })
	    loss_epoch[current_batch_file_idx] = loss_val
	    tStop = time.time()
            #print "Epoch:", epoch, " Batch:", current_batch_file_idx, " Loss:", loss_val
	    #print "Time Cost:", round(tStop - tStart,2), "s"

	print "Epoch:", epoch, " done. Loss:", np.mean(loss_epoch)
	tStop_epoch = time.time()
	print "Epoch Time Cost:", round(tStop_epoch - tStart_epoch,2), "s"

        if np.mod(epoch, 10) == 0 or epoch == n_epochs - 1:
            print "Epoch ", epoch, " is done. Saving the model ..."
    	    with tf.device("/cpu:0"):
            	saver.save(sess, os.path.join(model_path, 'model'), global_step=epoch)

	    current_batch = h5py.File(val_data[np.random.randint(0,len(val_data))])
    	    video_tf, video_mask_tf, caption_tf, lstm3_variables_tf = model.build_generator()
    	    ixtoword = pd.Series(np.load('./data'+str(gpu_id)+'/ixtoword.npy').tolist())
	    [pred_sent, gt_sent] = testing_all(sess, train_data[-2:], ixtoword, video_tf, video_mask_tf, caption_tf)
	    for idx in range(len(pred_sent)):
		print "GT:  " + gt_sent[idx][0]['caption']
		print "PD:  " + pred_sent[idx][0]['caption']
		print '-------'
    	    [pred_sent, gt_sent] = testing_all(sess, val_data, ixtoword,video_tf, video_mask_tf, caption_tf)
	    scorer = COCOScorer()
	    total_score = scorer.score(gt_sent, pred_sent, range(len(pred_sent)))
	sys.stdout.flush()

    print "Finally, saving the model ..."
    with tf.device("/cpu:0"):
    	saver.save(sess, os.path.join(model_path, 'model'), global_step=n_epochs)
    tStop_total = time.time()
    print "Total Time Cost:", round(tStop_total - tStart_total,2), "s"

Пример #5

Показать файл

Файл: pool_vae_scale_frame_s2s.py Проект: rjchand07/V2S-tensorflow

def train():
    assert os.path.isfile(video_data_path_train)
    assert os.path.isfile(video_data_path_val)
    assert os.path.isdir(model_path)
    assert os.path.isfile(wordtoix_file)
    assert os.path.isfile(ixtoword_file)
    assert drop_strategy in ['block_video', 'block_sent', 'random', 'keep']
    wordtoix = np.load(wordtoix_file).tolist()
    ixtoword = pd.Series(np.load(ixtoword_file).tolist())
    print 'build model and session...'
    # shared parameters on the GPU
    with tf.device("/gpu:0"):
        model = Video_Caption_Generator(dim_image=dim_image,
                                        n_words=len(wordtoix),
                                        dim_hidden=dim_hidden,
                                        batch_size=batch_size,
                                        n_caption_steps=n_caption_steps,
                                        n_video_steps=n_video_steps,
                                        drop_out_rate=0.5,
                                        bias_init_vector=None)
    tStart_total = time.time()
    n_epoch_steps = int(n_train_samples / batch_size)
    n_steps = n_epochs * n_epoch_steps
    # preprocess on the CPU
    with tf.device('/cpu:0'):
        train_data, train_encode_data, _, _, train_video_label, train_caption_label, train_caption_id, train_caption_id_1, \
            _, _, _, _, train_frame_data = read_and_decode_with_frame(video_data_path_train)
        val_data, val_encode_data, val_fname, val_title, val_video_label, val_caption_label, val_caption_id, val_caption_id_1, \
            _, _, _, _, val_frame_data = read_and_decode_with_frame(video_data_path_val)
        # random batches
        train_data, train_encode_data, train_video_label, train_caption_label, train_caption_id, train_caption_id_1, train_frame_data = \
            tf.train.shuffle_batch([train_data, train_encode_data, train_video_label, train_caption_label, train_caption_id, train_caption_id_1, train_frame_data],
                batch_size=batch_size, num_threads=num_threads, capacity=prefetch, min_after_dequeue=min_queue_examples)
        val_data, val_video_label, val_fname, val_caption_label, val_caption_id_1, val_frame_data = \
            tf.train.batch([val_data, val_video_label, val_fname, val_caption_label, val_caption_id_1, val_frame_data],
                batch_size=batch_size, num_threads=1, capacity=2* batch_size)
    # graph on the GPU
    with tf.device("/gpu:0"):
        tf_loss, tf_loss_cap, tf_loss_lat, tf_loss_vid, tf_z, tf_v_h, tf_s_h, tf_drop_type \
            = model.build_model(train_data, train_frame_data, train_video_label, train_caption_id, train_caption_id_1, train_caption_label)
        val_v2s_tf, _ = model.build_v2s_generator(val_data)
        val_s2s_tf, _, _ = model.build_s2s_generator(val_caption_id_1)
        val_s2v_tf, _, _ = model.build_s2v_generator(val_caption_id_1,
                                                     val_frame_data)
        val_v2v_tf, _ = model.build_v2v_generator(val_data, val_frame_data)

    sess = tf.InteractiveSession(config=tf.ConfigProto(
        allow_soft_placement=True, log_device_placement=False))
    # check for model file
    with tf.device(cpu_device):
        saver = tf.train.Saver(max_to_keep=100)
    ckpt = tf.train.get_checkpoint_state(model_path)
    global_step = 0
    if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
        print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
        saver.restore(sess, ckpt.model_checkpoint_path)
        #        print_tensors_in_checkpoint_file(ckpt.model_checkpoint_path, "", True)
        global_step = get_model_step(ckpt.model_checkpoint_path)
        print 'global_step:', global_step
    else:
        print("Created model with fresh parameters.")
        sess.run(tf.global_variables_initializer())
    temp = set(tf.global_variables())
    # train on the GPU
    with tf.device("/gpu:0"):
        ## 1. weight decay
        for var in tf.trainable_variables():
            decay_loss = tf.multiply(tf.nn.l2_loss(var),
                                     0.0004,
                                     name='weight_loss')
            tf.add_to_collection('losses', decay_loss)
        tf.add_to_collection('losses', tf_loss)
        tf_total_loss = tf.add_n(tf.get_collection('losses'),
                                 name='total_loss')
        ## 2. gradient clip
        optimizer = tf.train.AdamOptimizer(learning_rate)
        gvs = optimizer.compute_gradients(tf_total_loss)
        # when variable is not related to the loss, grad returned as None
        clip_gvs = [(tf.clip_by_norm(grad, clip_norm), var)
                    for grad, var in gvs if grad is not None]
        for grad, var in gvs:
            if grad is not None:
                tf.summary.histogram(var.name + '/grad', grad)
                tf.summary.histogram(var.name + '/data', var)
        train_op = optimizer.apply_gradients(clip_gvs)

    ## initialize variables added for optimizer
    sess.run(tf.variables_initializer(set(tf.global_variables()) - temp))
    # initialize epoch variable in queue reader
    sess.run(tf.local_variables_initializer())
    loss_epoch = 0
    loss_epoch_cap = 0
    loss_epoch_vid = 0
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    ##### add summaries ######
    tf.summary.histogram('video_h', tf_v_h)
    tf.summary.histogram('sent_h', tf_s_h)
    tf.summary.scalar('loss_vid', tf_loss_vid)
    tf.summary.scalar('loss_lat', tf_loss_lat)
    tf.summary.scalar('loss_caption', tf_loss_cap)
    #    for var in tf.trainable_variables():
    #        summaries.append(tf.histogram_summary(var.op.name, var))
    summary_op = tf.summary.merge_all()
    # write graph architecture to file
    summary_writer = tf.summary.FileWriter(model_path + 'summary', sess.graph)
    epoch = global_step
    video_label = sess.run(train_video_label)
    for step in xrange(1, n_steps + 1):
        tStart = time.time()
        if drop_strategy == 'keep':
            drop_type = 0
        elif drop_strategy == 'block_sentence':
            drop_type = 1
        elif drop_strategy == 'block_video':
            drop_type = 2
        else:
            drop_type = random.randint(0, 3)

        _, loss_val, loss_cap, loss_lat, loss_vid = sess.run(
            [train_op, tf_loss, tf_loss_cap, tf_loss_lat, tf_loss_vid],
            feed_dict={tf_drop_type: drop_type})
        tStop = time.time()
        print "step:", step, " Loss:", loss_val, "loss_cap:", loss_cap * caption_weight, "loss_latent:", loss_lat * latent_weight, "loss_vid:", loss_vid * video_weight
        print "Time Cost:", round(tStop - tStart, 2), "s"
        loss_epoch += loss_val
        loss_epoch_cap += loss_cap
        loss_epoch_vid += loss_vid

        if step % n_epoch_steps == 0:
            #        if step % 3 == 0:
            epoch += 1
            loss_epoch /= n_epoch_steps
            loss_epoch_cap /= n_epoch_steps
            loss_epoch_vid /= n_epoch_steps
            with tf.device(cpu_device):
                saver.save(sess,
                           os.path.join(model_path, 'model'),
                           global_step=epoch)


#            print 'z:', z[0, :10]
            print 'epoch:', epoch, 'loss:', loss_epoch, "loss_cap:", loss_epoch_cap, "loss_lat:", loss_lat, "loss_vid:", loss_epoch_vid
            loss_epoch = 0
            loss_epoch_cap = 0
            loss_epoch_vid = 0
            ######### test sentence generation ##########
            n_val_steps = int(n_val_samples / batch_size)
            #            n_val_steps = 3
            ### TODO: sometimes COCO test show exceptions in the beginning of training ####
            if test_v2s:
                [pred_sent, gt_sent, id_list, gt_dict, pred_dict,
                 flist] = testing_all(sess, 1, ixtoword, val_v2s_tf, val_fname)
                for i, key in enumerate(pred_dict.keys()):
                    print 'video:', flist[i]
                    for ele in gt_dict[key]:
                        print "GT:  " + ele['caption']
                    print "PD:  " + pred_dict[key][0]['caption']
                    print '-------'
                print '############## video to sentence result #################'
                print 'epoch:', epoch
                [pred_sent, gt_sent, id_list, gt_dict, pred_dict,
                 _] = testing_all(sess, n_val_steps, ixtoword, val_v2s_tf,
                                  val_fname)
                scorer = COCOScorer()
                total_score = scorer.score(gt_dict, pred_dict, id_list)
                print '############## video to sentence result #################'

            if test_s2s:
                [pred_sent, gt_sent, id_list, gt_dict, pred_dict,
                 flist] = testing_all(sess, 1, ixtoword, val_s2s_tf, val_fname)
                for i, key in enumerate(pred_dict.keys()):
                    print 'video:', flist[i]
                    for ele in gt_dict[key]:
                        print "GT:  " + ele['caption']
                    print "PD:  " + pred_dict[key][0]['caption']
                    print '-------'
                print '############## sentence to sentence result #################'
                print 'epoch:', epoch
                [pred_sent, gt_sent, id_list, gt_dict, pred_dict,
                 _] = testing_all(sess, n_val_steps, ixtoword, val_s2s_tf,
                                  val_fname)
                scorer = COCOScorer()
                total_score = scorer.score(gt_dict, pred_dict, id_list)
                print '############## sentence to sentence result #################'

            ######### test video generation #############
            if test_v2v:
                mse_v2v = test_all_videos(sess, n_val_steps, val_frame_data,
                                          val_v2v_tf, val_video_label,
                                          pixel_scale_factor)
                print 'epoch', epoch, 'video2video mse:', mse_v2v
            if test_s2v:
                mse_s2v = test_all_videos(sess, n_val_steps, val_frame_data,
                                          val_s2v_tf, val_video_label,
                                          pixel_scale_factor)
                print 'epoch', epoch, 'caption2video mse:', mse_s2v
            sys.stdout.flush()

            ###### summary ######
            if epoch % 2 == 0:
                summary = sess.run(summary_op)
                summary_writer.add_summary(summary, epoch)

        sys.stdout.flush()

    coord.request_stop()
    coord.join(threads)
    print "Finally, saving the model ..."
    with tf.device(cpu_device):
        saver.save(sess,
                   os.path.join(model_path, 'model'),
                   global_step=n_epochs)
    tStop_total = time.time()
    print "Total Time Cost:", round(tStop_total - tStart_total, 2), "s"
    sess.close()

Пример #6

Показать файл

def test(model_path=None,
         video_data_path_test=video_data_path_val,
         n_test_samples=n_val_samples,
         video_name=None):
    #    test_data = val_data   # to evaluate on testing data or validation data
    wordtoix = np.load(wordtoix_file).tolist()
    ixtoword = pd.Series(np.load(ixtoword_file).tolist())
    with tf.device("/gpu:0"):
        model = Video_Caption_Generator(dim_image=dim_image,
                                        n_words=len(wordtoix),
                                        dim_hidden=dim_hidden,
                                        batch_size=batch_size,
                                        n_caption_steps=n_caption_steps,
                                        n_video_steps=n_video_steps,
                                        drop_out_rate=0.5,
                                        bias_init_vector=None)

    # preprocess on the CPU
    with tf.device('/cpu:0'):
        train_data, train_encode_data, _, _, train_video_label, train_caption_label, train_caption_id, train_caption_id_1, \
            _, _, _, _ = read_and_decode(video_data_path_train)
        val_data, val_encode_data, val_fname, val_title, val_video_label, val_caption_label, val_caption_id, val_caption_id_1, \
            _, _, _, _ = read_and_decode(video_data_path_test)
        train_data, train_encode_data, train_video_label, train_caption_label, train_caption_id, train_caption_id_1 = \
            tf.train.shuffle_batch([train_data, train_encode_data, train_video_label, train_caption_label, train_caption_id, train_caption_id_1],
                batch_size=batch_size, num_threads=num_threads, capacity=prefetch, min_after_dequeue=min_queue_examples)
        val_data, val_video_label, val_fname, val_caption_label, val_caption_id_1 = \
            tf.train.batch([val_data, val_video_label, val_fname, val_caption_label, val_caption_id_1],
                batch_size=batch_size, num_threads=1, capacity=2* batch_size)
    # graph on the GPU
    with tf.device("/gpu:0"):
        tf_loss = model.build_model(train_caption_id, train_caption_id_1,
                                    train_caption_label)
        val_s2s_tf, s2s_lstm3_vars_tf = model.build_s2s_generator(
            val_caption_id_1)
    sess = tf.InteractiveSession(config=tf.ConfigProto(
        allow_soft_placement=True))

    with tf.device(cpu_device):
        saver = tf.train.Saver()
        saver.restore(sess, model_path)
        print 'load parameters from:', model_path

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    ######### test sentence generation ##########
    print 'testing...'
    n_test_steps = int(n_test_samples / batch_size)
    print 'n_test_steps:', n_test_steps
    tstart = time.time()
    ### TODO: sometimes COCO test show exceptions in the beginning of training ####
    if test_s2s:
        #        [pred_sent, gt_sent, id_list, gt_dict, pred_dict, flist] = testing_all(sess, 1, ixtoword, val_s2s_tf, val_fname)
        #        for i, key in enumerate(pred_dict.keys()):
        #            print 'video:', flist[i]
        #            for ele in gt_dict[key]:
        #                print "GT:  " + ele['caption']
        #            print "PD:  " + pred_dict[key][0]['caption']
        #            print '-------'
        print '############## sentence to sentence result #################'
        [pred_sent, gt_sent, id_list, gt_dict, pred_dict,
         flist] = testing_all(sess, n_test_steps, ixtoword, val_s2s_tf,
                              val_fname)
        if os.path.isfile('demo_s2s.txt.videos'):
            video_name = pickle.load(open('demo_s2s.txt.videos', "rb"))
        if video_name:
            for i, key in enumerate(pred_dict.keys()):
                if flist[i] in video_name:
                    print flist[i]
                    for ele in gt_dict[key]:
                        print "GT:  " + ele['caption']
                    print "PD:  " + pred_dict[key][0]['caption']
                    print '-----------'
        scorer = COCOScorer()
        total_score_2 = scorer.score(gt_dict, pred_dict, id_list)
        print '############## sentence to sentence result #################'

    if save_demo_sent_s2s:
        get_demo_sentence(sess,
                          n_test_steps,
                          ixtoword,
                          val_s2s_tf,
                          val_fname,
                          result_file='demo_s2s.txt')

    sys.stdout.flush()
    coord.request_stop()
    coord.join(threads)
    tstop = time.time()
    print "Total Time Cost:", round(tstop - tstart, 2), "s"
    sess.close()

Пример #7

Показать файл

def train():
    assert os.path.isdir(home_folder)
    assert os.path.isfile(video_data_path_train)
    assert os.path.isfile(video_data_path_val)
    assert os.path.isdir(model_path)
    print 'load meta data...'
    wordtoix = np.load(home_folder + 'data0/msvd_wordtoix.npy').tolist()
    ixtoword = pd.Series(
        np.load(home_folder + 'data0/msvd_ixtoword.npy').tolist())
    print 'build model and session...'
    # shared parameters on the GPU
    with tf.device("/gpu:0"):
        model = Video_Caption_Generator(dim_image=dim_image,
                                        n_words=len(wordtoix),
                                        dim_hidden=dim_hidden,
                                        batch_size=batch_size,
                                        n_caption_steps=n_caption_steps,
                                        n_video_steps=n_video_steps,
                                        drop_out_rate=0.5,
                                        bias_init_vector=None)
    tStart_total = time.time()
    n_epoch_steps = int(n_train_samples / batch_size)
    n_steps = n_epochs * n_epoch_steps
    # preprocess on the CPU
    with tf.device('/cpu:0'):
        train_data, train_encode_data, _, _, train_video_label, train_caption_label, train_caption_id, train_caption_id_1, \
            _, _, _, _ = read_and_decode(video_data_path_train)
        val_data, val_encode_data, val_fname, val_title, val_video_label, val_caption_label, val_caption_id, val_caption_id_1, \
            _, _, _, _ = read_and_decode(video_data_path_val)
        # random batches
        train_data, train_encode_data, train_video_label, train_caption_label, train_caption_id, train_caption_id_1 = \
            tf.train.shuffle_batch([train_data, train_encode_data, train_video_label, train_caption_label, train_caption_id, train_caption_id_1],
                batch_size=batch_size, num_threads=num_threads, capacity=prefetch, min_after_dequeue=min_queue_examples)
        val_data, val_video_label, val_fname, val_caption_label, val_caption_id_1 = \
            tf.train.batch([val_data, val_video_label, val_fname, val_caption_label, val_caption_id_1],
                batch_size=batch_size, num_threads=1, capacity=2* batch_size)
    # graph on the GPU
    with tf.device("/gpu:0"):
        tf_loss = model.build_model(train_caption_id, train_caption_id_1,
                                    train_caption_label)
        val_caption_tf, val_lstm3_variables_tf = model.build_sent_generator(
            val_caption_id_1)

    sess = tf.InteractiveSession(config=tf.ConfigProto(
        allow_soft_placement=True, log_device_placement=False))
    # check for model file
    with tf.device(cpu_device):
        saver = tf.train.Saver(max_to_keep=100)
    ckpt = tf.train.get_checkpoint_state(model_path)
    global_step = 0
    if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
        print("Reading model parameters from %s" % ckpt.model_checkpoint_path)
        saver.restore(sess, ckpt.model_checkpoint_path)
        #        print_tensors_in_checkpoint_file(ckpt.model_checkpoint_path, "", True)
        global_step = get_model_step(ckpt.model_checkpoint_path)
        print 'global_step:', global_step
    else:
        print("Created model with fresh parameters.")
        sess.run(tf.global_variables_initializer())
    temp = set(tf.global_variables())
    # train on the GPU
    with tf.device("/gpu:0"):
        #        train_op = tf.train.AdamOptimizer(learning_rate).minimize(tf_loss)
        ## initialize variables added for optimizer
        optimizer = tf.train.AdamOptimizer(learning_rate)
        gvs = optimizer.compute_gradients(tf_loss)
        # when variable is not related to the loss, grad returned as None
        clip_gvs = [(tf.clip_by_norm(grad, clip_norm), var)
                    for grad, var in gvs if grad is not None]
        train_op = optimizer.apply_gradients(gvs)

    sess.run(tf.variables_initializer(set(tf.global_variables()) - temp))
    # initialize epoch variable in queue reader
    sess.run(tf.local_variables_initializer())
    loss_epoch = 0
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    # write graph architecture to file
    summary_writer = tf.summary.FileWriter(model_path + 'summary', sess.graph)
    loss_summary = tf.summary.scalar('loss', tf_loss)
    epoch = global_step
    for step in xrange(1, n_steps + 1):
        tStart = time.time()
        _, loss_val = sess.run([train_op, tf_loss])
        tStop = time.time()
        print "step:", step, " Loss:", loss_val
        print "Time Cost:", round(tStop - tStart, 2), "s"
        loss_epoch += loss_val

        if step % n_epoch_steps == 0:
            epoch += 1
            loss_epoch /= n_epoch_steps
            with tf.device(cpu_device):
                saver.save(sess,
                           os.path.join(model_path, 'model'),
                           global_step=epoch)


#            print 'z:', z[0, :10]
            print 'epoch:', epoch, 'loss:', loss_epoch
            loss_epoch = 0
            n_val_steps = int(n_val_samples / batch_size)
            ######### test sentence generation ##########
            [pred_sent, gt_sent, id_list, gt_dict, pred_dict,
             _] = testing_all(sess, 1, ixtoword, val_caption_tf, val_fname)
            for key in pred_dict.keys():
                for ele in gt_dict[key]:
                    print "GT:  " + ele['caption']
                print "PD:  " + pred_dict[key][0]['caption']
                print '-------'
            [pred_sent, gt_sent, id_list, gt_dict, pred_dict,
             _] = testing_all(sess, n_val_steps, ixtoword, val_caption_tf,
                              val_fname)
            scorer = COCOScorer()
            total_score = scorer.score(gt_dict, pred_dict, id_list)

            #### summary #####
            summary = sess.run(loss_summary)
            summary_writer.add_summary(summary, epoch)
            sys.stdout.flush()

        sys.stdout.flush()

    coord.request_stop()
    coord.join(threads)
    print "Finally, saving the model ..."
    with tf.device(cpu_device):
        saver.save(sess,
                   os.path.join(model_path, 'model'),
                   global_step=n_epochs)
    tStop_total = time.time()
    print "Total Time Cost:", round(tStop_total - tStart_total, 2), "s"
    sess.close()

Пример #8

Показать файл

Файл: SS_queue.py Проект: tsenghungchen/SA-tensorflow

def train():
    meta_data, train_data, val_data, test_data = get_video_data_jukin(video_data_path_train, video_data_path_val, video_data_path_test)
    captions = meta_data['Description'].values
    captions = map(lambda x: x.replace('.', ''), captions)
    captions = map(lambda x: x.replace(',', ''), captions)
    wordtoix, ixtoword, bias_init_vector = preProBuildWordVocab(captions, word_count_threshold=1)

#    np.save('./data'+str(gpu_id)+'/ixtoword', ixtoword)
#    np.save('./data'+str(gpu_id)+'/wordtoix', wordtoix)
#    sys.exit()
    ixtoword=pd.Series(np.load('./data_all/ixtoword.npy').tolist())
    wordtoix=pd.Series(np.load('./data_all/wordtoix.npy').tolist())

    model = Video_Caption_Generator(
            dim_image=dim_image,
	    dim_tracker=dim_tracker,
            n_words=len(wordtoix),
            dim_hidden=dim_hidden,
            batch_size=batch_size,
            n_lstm_steps=n_frame_step,
	    tracker_cnt=tracker_cnt,
            drop_out_rate = 0.5,
            bias_init_vector=None)

    tf_loss, tf_video, tf_video_mask, tf_tracker, tf_tracker_mask, tf_caption, tf_caption_mask= model.build_model()
    #loss_summary = tf.scalar_summary("Loss",tf_loss)
    sess = tf.InteractiveSession(config=tf.ConfigProto(allow_soft_placement=True))
    #merged = tf.merge_all_summaries()
    #writer = tf.train.SummaryWriter('/tmp/tf_log', sess.graph_def)

    with tf.device("/cpu:0"):
    	saver = tf.train.Saver(max_to_keep=100)
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(tf_loss)
    tf.initialize_all_variables().run()
    saver.restore(sess, 'models/model-0')

    tStart_total = time.time()
    nr_prefetch = int(3)
    for epoch in range(n_epochs):
        index = np.arange(len(train_data))
        np.random.shuffle(index)
        train_data = train_data[index]
        
        tStart_epoch = time.time()
        loss_epoch = np.zeros(len(train_data))
        ## init queue
        data_queue = mp.Queue(nr_prefetch)
#        tracker_queue = mp.Queue(nr_prefetch)
        title_queue = mp.Queue(nr_prefetch)
        t1 = Thread(target=load_data_into_queue, args=(train_data, data_queue, 'data'))
#        t2 = Thread(target=load_data_into_queue, args=(train_data, tracker_queue, 'tracker'))
        t3 = Thread(target=load_data_into_queue, args=(train_data, title_queue, 'title'))
        t1.start()
#        t2.start()
        t3.start()
        for current_batch_file_idx in range(len(train_data)):
            tStart = time.time()
            current_batch = h5py.File(train_data[current_batch_file_idx])
            current_feats = np.zeros((batch_size, n_frame_step, dim_image))
            current_video_masks = np.zeros((batch_size, n_frame_step))
            current_video_len = np.zeros(batch_size)
            
            if 'tracker' in current_batch.keys():
                current_tracker = np.array(current_batch['tracker'])
            else:
                current_tracker = np.zeros((batch_size, tracker_cnt, dim_tracker))
            
            if 'tracker_mask' in current_batch.keys():
                current_tracker_mask = np.array(current_batch['tracker_mask'])
            else:
                current_tracker_mask = np.zeros((batch_size, tracker_cnt))

#            current_tracker = tracker_queue.get()
            current_batch_data = data_queue.get()
            current_batch_title = title_queue.get()
            for ind in xrange(batch_size):
                current_feats[ind,:,:] = current_batch_data[:,ind,:]
                idx = np.where(current_batch['label'][:,ind] != -1)[0]
                if len(idx) == 0:
                        continue
                current_video_masks[ind,idx[-1]] = 1

            current_captions = current_batch_title
            current_caption_ind = map(lambda cap: [wordtoix[word] for word in cap.lower().split(' ') if word in wordtoix], current_captions)

            current_caption_matrix = sequence.pad_sequences(current_caption_ind, padding='post', maxlen=35-1)
            current_caption_matrix = np.hstack( [current_caption_matrix, np.zeros( [len(current_caption_matrix),1]) ] ).astype(int)
            current_caption_masks = np.zeros((current_caption_matrix.shape[0], current_caption_matrix.shape[1]))
            nonzeros = np.array( map(lambda x: (x != 0).sum()+1, current_caption_matrix ))

            for ind, row in enumerate(current_caption_masks):
                row[:nonzeros[ind]] = 1

            current_batch.close()


            _, loss_val= sess.run(
                [train_op, tf_loss],
                feed_dict={
                tf_video: current_feats,
                tf_video_mask : current_video_masks,
                tf_tracker : current_tracker,
                tf_tracker_mask : current_tracker_mask,
                tf_caption: current_caption_matrix,
                tf_caption_mask: current_caption_masks
                })
            #writer.add_summary(summary_str, epoch)
            loss_epoch[current_batch_file_idx] = loss_val
            tStop = time.time()
            #print "Epoch:", epoch, " Batch:", current_batch_file_idx, " Loss:", loss_val
            #print "Time Cost:", round(tStop - tStart,2), "s"

        t1.join()
#       t2.join()
        t3.join()
        print "Epoch:", epoch, " done. Loss:", np.mean(loss_epoch)
        tStop_epoch = time.time()
        print "Epoch Time Cost:", round(tStop_epoch - tStart_epoch,2), "s"
	sys.stdout.flush()

        if np.mod(epoch, 2) == 0:
            print "Epoch ", epoch, " is done. Saving the model ..."
    	    with tf.device('/cpu:0'):
            	saver.save(sess, os.path.join(model_path, 'model'), global_step=epoch)
        if np.mod(epoch, 10) == 0:
            current_batch = h5py.File(val_data[np.random.randint(0,len(val_data))])
            video_tf, video_mask_tf, tracker_tf, tracker_mask_tf, caption_tf, lstm1_variables_tf, lstm2_variables_tf = model.build_generator()
            ixtoword = pd.Series(np.load('./data_all/ixtoword.npy').tolist())
#            [pred_sent, gt_sent, id_list, gt_dict, pred_dict, fnamelist] = testing_all_multi_gt(sess, train_data[-2:], ixtoword,video_tf, video_mask_tf, tracker_tf, tracker_mask_tf, caption_tf)
#            for key in pred_dict.keys():
#                for ele in gt_dict[key]:
#                    print "GT:  " + ele['caption']
#                print "PD:  " + pred_dict[key][0]['caption']
#                print '-------'

            [pred_sent, gt_sent, id_list, gt_dict, pred_dict, fnamelist] = testing_all_multi_gt(sess, val_data, ixtoword,video_tf, video_mask_tf, tracker_tf, tracker_mask_tf, caption_tf)

            scorer = COCOScorer()
            total_score = scorer.score(gt_dict, pred_dict, id_list)

    print "Finally, saving the model ..."
    with tf.device('/cpu:0'):
	    saver.save(sess, os.path.join(model_path, 'model'), global_step=n_epochs)
    tStop_total = time.time()
    print "Total Time Cost:", round(tStop_total - tStart_total,2), "s"

Пример #9

Показать файл

Файл: pool_vae_scale_frame_tf.py Проект: rjchand07/V2S-tensorflow

def test(model_path=None,
    video_data_path_test='/home/shenxu/data/msvd_feat_vgg_c3d_frame/test.tfrecords',
    n_test_samples=27020):
#    test_data = val_data   # to evaluate on testing data or validation data
    wordtoix = np.load(wordtoix_file).tolist()
    ixtoword = pd.Series(np.load(ixtoword_file).tolist())
    with tf.device("/gpu:0"):
        model = Video_Caption_Generator(
                dim_image=dim_image,
                n_words=len(wordtoix),
                dim_hidden=dim_hidden,
                batch_size=batch_size,
                n_caption_steps=n_caption_steps,
                n_video_steps=n_video_steps,
                drop_out_rate = 0.5,
                bias_init_vector=None)

    # preprocess on the CPU
    with tf.device('/cpu:0'):
        train_data, train_encode_data, _, _, train_video_label, train_caption_label, train_caption_id, train_caption_id_1, \
            _, _, _, _, train_frame_data = read_and_decode_with_frame(video_data_path_train)
        val_data, val_encode_data, val_fname, val_title, val_video_label, val_caption_label, val_caption_id, val_caption_id_1, \
            _, _, _, _, val_frame_data = read_and_decode_with_frame(video_data_path_test)
        train_data, train_encode_data, train_video_label, train_caption_label, train_caption_id, train_caption_id_1, train_frame_data = \
            tf.train.shuffle_batch([train_data, train_encode_data, train_video_label, train_caption_label, train_caption_id, train_caption_id_1, train_frame_data],
                batch_size=batch_size, num_threads=num_threads, capacity=prefetch, min_after_dequeue=min_queue_examples)
        val_data, val_video_label, val_fname, val_caption_label, val_caption_id_1, val_frame_data = \
            tf.train.batch([val_data, val_video_label, val_fname, val_caption_label, val_caption_id_1, val_frame_data],
                batch_size=batch_size, num_threads=1, capacity=2* batch_size)
    # graph on the GPU
    with tf.device("/gpu:0"):
        tf_loss, tf_loss_cap, tf_loss_lat, tf_loss_vid, tf_z, tf_v_h, tf_s_h, tf_drop_type \
            = model.build_model(train_data, train_frame_data, train_video_label, train_caption_id, train_caption_id_1, train_caption_label)
        val_v2s_tf,v2s_lstm3_vars_tf = model.build_v2s_generator(val_data)
        val_s2s_tf, s2s_lstm3_vars_tf = model.build_s2s_generator(val_caption_id_1)
        val_s2v_tf, s2v_lstm4_vars_tf = model.build_s2v_generator(val_caption_id_1, val_frame_data)
        val_v2v_tf, v2v_lstm4_vars_tf = model.build_v2v_generator(val_data, val_frame_data)
    sess = tf.InteractiveSession(config=tf.ConfigProto(allow_soft_placement=True))

    with tf.device(cpu_device):
        saver = tf.train.Saver()
        saver.restore(sess, model_path)
        print 'load parameters from:', model_path

#    print 'halve the dropout weights..'
#    for ind, row in enumerate(v2s_lstm3_vars_tf):
#        if ind % 4 == 0:
#                assign_op = row.assign(tf.multiply(row,1-0.5))
#                sess.run(assign_op)
#    for ind, row in enumerate(s2s_lstm2_vars_tf):
#        if ind % 4 == 0:
#                assign_op = row.assign(tf.multiply(row,1-0.5))
#                sess.run(assign_op)
#    for ind, row in enumerate(s2v_lstm4_vars_tf):
#        if ind % 4 == 0:
#                assign_op = row.assign(tf.multiply(row,1-0.5))
#                sess.run(assign_op)

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    ######### test sentence generation ##########
    print 'testing...'
    n_test_steps = int(n_test_samples / batch_size)
    print 'n_test_steps:', n_test_steps
    tstart = time.time()
    ### TODO: sometimes COCO test show exceptions in the beginning of training ####
    if test_v2s:
        try:
            [pred_sent, gt_sent, id_list, gt_dict, pred_dict, flist] = testing_all(sess, 1, ixtoword, val_v2s_tf, val_fname)
            for i, key in enumerate(pred_dict.keys()):
                print 'video:', flist[i]
                for ele in gt_dict[key]:
                    print "GT:  " + ele['caption']
                print "PD:  " + pred_dict[key][0]['caption']
                print '-------'
            print '############## video to sentence result #################'
            [pred_sent, gt_sent, id_list, gt_dict, pred_dict, _] = testing_all(sess, n_test_steps, ixtoword, val_v2s_tf, val_fname)
            scorer  = COCOScorer()
            total_score_1 = scorer.score(gt_dict, pred_dict, id_list)
            print '############## video to sentence result #################'
        except Exception, e:
            print 'v2s bleu test exception'

Пример #10

Показать файл

Файл: pool_vae_scale_frame_tf.py Проект: rjchand07/V2S-tensorflow

        except Exception, e:
            print 'v2s bleu test exception'

    if test_s2s:
        try:
            [pred_sent, gt_sent, id_list, gt_dict, pred_dict, flist] = testing_all(sess, 1, ixtoword, val_s2s_tf, val_fname)
            for i, key in enumerate(pred_dict.keys()):
                print 'video:', flist[i]
                for ele in gt_dict[key]:
                    print "GT:  " + ele['caption']
                print "PD:  " + pred_dict[key][0]['caption']
                print '-------'
            print '############## sentence to sentence result #################'
            [pred_sent, gt_sent, id_list, gt_dict, pred_dict, _] = testing_all(sess, n_test_steps, ixtoword, val_s2s_tf, val_fname)
            scorer = COCOScorer()
            total_score_2 = scorer.score(gt_dict, pred_dict, id_list)
            print '############## sentence to sentence result #################'
        except Exception, e:
            print 'v2s bleu test exception'

    ######### test video generation #############
    if test_v2v:
        mse_v2v = test_all_videos(sess, n_test_steps, val_data, val_v2v_tf, val_video_label, pixel_scale_factor)
        print 'video2video mse:', mse_v2v
    if test_s2v:
        mse_s2v = test_all_videos(sess, n_test_steps, val_data, val_s2v_tf, val_video_label, pixel_scale_factor)
        print 'caption2video mse:', mse_s2v
    if save_demo_sent_v2s:
        get_demo_sentence(sess, n_test_steps, ixtoword, val_v2s_tf, val_fname, result_file='demo_v2s.txt')
    if save_demo_sent_s2s:
        get_demo_sentence(sess, n_test_steps, ixtoword, val_s2s_tf, val_fname, result_file='demo_s2s.txt')

Пример #11

Показать файл

Файл: HS.py Проект: KuoHaoZeng/VH

def train():
    meta_data, train_data, test_data = get_video_data_jukin(video_data_path_train, video_data_path_test)
    captions = meta_data['Description'].values
    captions = map(lambda x: x.replace('.', ''), captions)
    captions = map(lambda x: x.replace(',', ''), captions)
    wordtoix, ixtoword, bias_init_vector = preProBuildWordVocab(captions, word_count_threshold=1)

    np.save('./data/ixtoword', ixtoword)

    model = Video_Caption_Generator(
            dim_image=dim_image,
            n_words=len(wordtoix),
            dim_hidden=dim_hidden,
            batch_size=batch_size,
            n_lstm_steps=n_frame_step,
	    drop_out_rate = 0.5,
            bias_init_vector=None)

    tf_loss, tf_video, tf_video_mask, tf_video_len, tf_caption, tf_caption_mask, tf_HLness, tf_HLness_mask, tf_HLness_att_mask= model.build_model()
    loss_summary = tf.scalar_summary("Loss",tf_loss)
    sess = tf.InteractiveSession()
    merged = tf.merge_all_summaries()
    writer = tf.train.SummaryWriter('/tmp/tf_log', sess.graph_def)

    saver = tf.train.Saver(max_to_keep=100)
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(tf_loss)
    tf.initialize_all_variables().run()

    tStart_total = time.time()
    for epoch in range(n_epochs):
        index = np.arange(len(train_data))
        np.random.shuffle(index)
        train_data = train_data[index]

	tStart_epoch = time.time()
	loss_epoch = np.zeros(len(train_data))
        for current_batch_file_idx in xrange(len(train_data)):

	    tStart = time.time()
	    current_batch = h5py.File(train_data[current_batch_file_idx])
            current_feats = np.zeros((batch_size, n_frame_step, dim_image))
	    current_HLness = np.zeros((batch_size, n_frame_step))
	    current_HLness_masks = np.zeros((batch_size, n_frame_step))
	    current_HLness_att_masks = np.zeros((batch_size, n_frame_step))
            current_video_masks = np.zeros((batch_size, n_frame_step))
	    current_video_len = np.zeros(batch_size)
	    for ind in xrange(batch_size):
		current_feats[ind,:,:] = current_batch['data'][:,ind,:]
		idx = np.where(current_batch['label'][:,ind] != -1)[0]
		if len(idx) == 0:
			continue
		idy = np.where(current_batch['label'][:,ind] == 1)[0]
		if len(idy) == 0:
			continue
		current_HLness[ind,idx] = current_batch['label'][idx,ind]
		current_HLness_masks[ind,idx] = 1
		current_video_masks[ind,idy[-1]] = 1
		current_video_len[ind] = idx[-1] + 1
		current_HLness_att_masks[ind,idy] = 1
		if(idy[0] > 4):
			current_HLness_att_masks[ind,idy[0]-5:idy[0]] = 1
		else:
			current_HLness_att_masks[ind,0:idy[0]] = 1

            current_captions = current_batch['title']
            current_caption_ind = map(lambda cap: [wordtoix[word] for word in cap.lower().split(' ') if word in wordtoix], current_captions)

            current_caption_matrix = sequence.pad_sequences(current_caption_ind, padding='post', maxlen=15-1)
            current_caption_matrix = np.hstack( [current_caption_matrix, np.zeros( [len(current_caption_matrix),1]) ] ).astype(int)
            current_caption_masks = np.zeros((current_caption_matrix.shape[0], current_caption_matrix.shape[1]))
            nonzeros = np.array( map(lambda x: (x != 0).sum()+1, current_caption_matrix ))

            for ind, row in enumerate(current_caption_masks):
                row[:nonzeros[ind]] = 1

            _, loss_val, summary_str= sess.run(
                    [train_op, tf_loss, merged],
                    feed_dict={
                        tf_video: current_feats,
                        tf_video_mask : current_video_masks,
                        tf_caption: current_caption_matrix,
                        tf_caption_mask: current_caption_masks,
			tf_HLness: current_HLness,
			tf_HLness_mask: current_HLness_masks,
			tf_HLness_att_mask: current_HLness_att_masks
                        })
	    writer.add_summary(summary_str, epoch)
	    loss_epoch[current_batch_file_idx] = loss_val
	    tStop = time.time()
            #print "Epoch:", epoch, " Batch:", current_batch_file_idx, " Loss:", loss_val
	    #print "Time Cost:", round(tStop - tStart,2), "s"

	print "Epoch:", epoch, " done. Loss:", np.mean(loss_epoch)
	tStop_epoch = time.time()
	print "Epoch Time Cost:", round(tStop_epoch - tStart_epoch,2), "s"

        if np.mod(epoch, 20) == 0:
            print "Epoch ", epoch, " is done. Saving the model ..."
            saver.save(sess, os.path.join(model_path, 'model'), global_step=epoch)

	    current_batch = h5py.File(test_data[np.random.randint(0,len(test_data))])
    	    video_tf, video_mask_tf, video_len_tf, HLness_tf, caption_tf, HLness_att_mask_tf, lstmRNN_variables_tf, lstm3_variables_tf = model.build_generator()
    	    ixtoword = pd.Series(np.load('./data/ixtoword.npy').tolist())
	    #[mp, pred_sent, gt_sent, HLness] = testing_one(sess, current_batch, ixtoword,video_tf, video_len_tf, HLness_tf, caption_tf, HLness_att_mask_tf)
    	    [mp, pred_sent, gt_sent, HLness] = testing_all(sess, test_data, ixtoword,video_tf, video_mask_tf, video_len_tf, HLness_tf, caption_tf, HLness_att_mask_tf)
	    #for xxx in xrange(current_batch['label'].shape[1]):
	    #	print gt_sent[xxx]
	    #	print pred_sent[xxx]
	    total_score = np.mean(mp)
	    print total_score
	    scorer = COCOScorer()
	    total_score = scorer.score(gt_sent, pred_sent, range(len(pred_sent)))

    print "Finally, saving the model ..."
    saver.save(sess, os.path.join(model_path, 'model'), global_step=n_epochs)
    tStop_total = time.time()
    print "Total Time Cost:", round(tStop_total - tStart_total,2), "s"

Пример #12

Показать файл

def train():
    meta_data, train_data, val_data, test_data = get_video_data_jukin(video_data_path_train, video_data_path_val, video_data_path_test)
    captions = meta_data['Description'].values
    captions = map(lambda x: x.replace('.', ''), captions)
    captions = map(lambda x: x.replace(',', ''), captions)
    wordtoix, ixtoword, bias_init_vector = preProBuildWordVocab(captions, word_count_threshold=1)

#    np.save('./data'+str(gpu_id)+'/ixtoword', ixtoword)
#    np.save('./data'+str(gpu_id)+'/wordtoix', wordtoix)
#    sys.exit()
    ixtoword=pd.Series(np.load('./data_all/ixtoword.npy').tolist())
    wordtoix=pd.Series(np.load('./data_all/wordtoix.npy').tolist())

    model = Video_Caption_Generator(
            dim_image=dim_image,
	    dim_tracker=dim_tracker,
            n_words=len(wordtoix),
            dim_hidden=dim_hidden,
            batch_size=batch_size,
            n_lstm_steps=n_frame_step,
	    tracker_cnt=tracker_cnt,
            drop_out_rate = 0.5,
            bias_init_vector=None)

    tf_loss, tf_video, tf_video_mask, tf_tracker, tf_tracker_mask, tf_caption, tf_caption_mask= model.build_model()
    #loss_summary = tf.scalar_summary("Loss",tf_loss)
    sess = tf.InteractiveSession(config=tf.ConfigProto(allow_soft_placement=True))
    #merged = tf.merge_all_summaries()
    #writer = tf.train.SummaryWriter('/tmp/tf_log', sess.graph_def)

    with tf.device("/cpu:0"):
    	saver = tf.train.Saver(max_to_keep=100)
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(tf_loss)
    tf.initialize_all_variables().run()
    saver.restore(sess, 'models/model-0')

    tStart_total = time.time()
    nr_prefetch = int(3)
    for epoch in range(n_epochs):
        index = np.arange(len(train_data))
        np.random.shuffle(index)
        train_data = train_data[index]
        
        tStart_epoch = time.time()
        loss_epoch = np.zeros(len(train_data))
        ## init queue
        data_queue = mp.Queue(nr_prefetch)
#        tracker_queue = mp.Queue(nr_prefetch)
        title_queue = mp.Queue(nr_prefetch)
        t1 = Thread(target=load_data_into_queue, args=(train_data, data_queue, 'data'))
#        t2 = Thread(target=load_data_into_queue, args=(train_data, tracker_queue, 'tracker'))
        t3 = Thread(target=load_data_into_queue, args=(train_data, title_queue, 'title'))
        t1.start()
#        t2.start()
        t3.start()
        for current_batch_file_idx in range(len(train_data)):
            tStart = time.time()
            current_batch = h5py.File(train_data[current_batch_file_idx])
            current_feats = np.zeros((batch_size, n_frame_step, dim_image))
            current_video_masks = np.zeros((batch_size, n_frame_step))
            current_video_len = np.zeros(batch_size)
            
            if 'tracker' in current_batch.keys():
                current_tracker = np.array(current_batch['tracker'])
            else:
                current_tracker = np.zeros((batch_size, tracker_cnt, dim_tracker))
            
            if 'tracker_mask' in current_batch.keys():
                current_tracker_mask = np.array(current_batch['tracker_mask'])
            else:
                current_tracker_mask = np.zeros((batch_size, tracker_cnt))

#            current_tracker = tracker_queue.get()
            current_batch_data = data_queue.get()
            current_batch_title = title_queue.get()
            for ind in range(batch_size):
                current_feats[ind,:,:] = current_batch_data[:,ind,:]
                idx = np.where(current_batch['label'][:,ind] != -1)[0]
                if len(idx) == 0:
                        continue
                current_video_masks[ind,idx[-1]] = 1

            current_captions = current_batch_title
            current_caption_ind = map(lambda cap: [wordtoix[word] for word in cap.lower().split(' ') if word in wordtoix], current_captions)

            current_caption_matrix = sequence.pad_sequences(current_caption_ind, padding='post', maxlen=35-1)
            current_caption_matrix = np.hstack( [current_caption_matrix, np.zeros( [len(current_caption_matrix),1]) ] ).astype(int)
            current_caption_masks = np.zeros((current_caption_matrix.shape[0], current_caption_matrix.shape[1]))
            nonzeros = np.array( map(lambda x: (x != 0).sum()+1, current_caption_matrix ))

            for ind, row in enumerate(current_caption_masks):
                row[:nonzeros[ind]] = 1

            current_batch.close()


            _, loss_val= sess.run(
                [train_op, tf_loss],
                feed_dict={
                tf_video: current_feats,
                tf_video_mask : current_video_masks,
                tf_tracker : current_tracker,
                tf_tracker_mask : current_tracker_mask,
                tf_caption: current_caption_matrix,
                tf_caption_mask: current_caption_masks
                })
            #writer.add_summary(summary_str, epoch)
            loss_epoch[current_batch_file_idx] = loss_val
            tStop = time.time()
            #print "Epoch:", epoch, " Batch:", current_batch_file_idx, " Loss:", loss_val
            #print "Time Cost:", round(tStop - tStart,2), "s"

        t1.join()
#       t2.join()
        t3.join()
        print ("Epoch:", epoch, " done. Loss:", np.mean(loss_epoch))
        tStop_epoch = time.time()
        print ("Epoch Time Cost:", round(tStop_epoch - tStart_epoch,2), "s")
	sys.stdout.flush()

        if np.mod(epoch, 2) == 0:
            print ("Epoch ", epoch, " is done. Saving the model ...")
    	    with tf.device('/cpu:0'):
            	saver.save(sess, os.path.join(model_path, 'model'), global_step=epoch)
        if np.mod(epoch, 10) == 0:
            current_batch = h5py.File(val_data[np.random.randint(0,len(val_data))])
            video_tf, video_mask_tf, tracker_tf, tracker_mask_tf, caption_tf, lstm1_variables_tf, lstm2_variables_tf = model.build_generator()
            ixtoword = pd.Series(np.load('./data_all/ixtoword.npy').tolist())
#            [pred_sent, gt_sent, id_list, gt_dict, pred_dict, fnamelist] = testing_all_multi_gt(sess, train_data[-2:], ixtoword,video_tf, video_mask_tf, tracker_tf, tracker_mask_tf, caption_tf)
#            for key in pred_dict.keys():
#                for ele in gt_dict[key]:
#                    print "GT:  " + ele['caption']
#                print "PD:  " + pred_dict[key][0]['caption']
#                print '-------'

            [pred_sent, gt_sent, id_list, gt_dict, pred_dict, fnamelist] = testing_all_multi_gt(sess, val_data, ixtoword,video_tf, video_mask_tf, tracker_tf, tracker_mask_tf, caption_tf)

            scorer = COCOScorer()
            total_score = scorer.score(gt_dict, pred_dict, id_list)

    print ("Finally, saving the model ...")
    with tf.device('/cpu:0'):
	    saver.save(sess, os.path.join(model_path, 'model'), global_step=n_epochs)
    tStop_total = time.time()
    print ("Total Time Cost:", round(tStop_total - tStart_total,2), "s")

Пример #13

Показать файл

Файл: Att.py Проект: BUPT402/Video_Captioning

def train():
    meta_data, train_data, val_data, test_data = get_video_data_jukin(
        video_data_path_train, video_data_path_val, video_data_path_test)
    captions = meta_data['Description'].values
    captions = map(lambda x: x.replace('.', ''), captions)
    captions = map(lambda x: x.replace(',', ''), captions)
    wordtoix, ixtoword, bias_init_vector = preProBuildWordVocab(
        captions, word_count_threshold=1)

    np.save('./data0/ixtoword', ixtoword)

    model = Video_Caption_Generator(dim_image=dim_image,
                                    n_words=len(wordtoix),
                                    dim_hidden=dim_hidden,
                                    batch_size=batch_size,
                                    n_lstm_steps=n_frame_step,
                                    drop_out_rate=0.5,
                                    bias_init_vector=None)

    tf_loss, tf_video, tf_video_mask, tf_caption, tf_caption_mask = model.build_model(
    )
    sess = tf.InteractiveSession(config=tf.ConfigProto(
        allow_soft_placement=True))

    with tf.device("/cpu:0"):
        saver = tf.train.Saver(max_to_keep=100)
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(tf_loss)
    tf.initialize_all_variables().run()

    tStart_total = time.time()
    for epoch in range(n_epochs):
        index = np.arange(len(train_data))
        np.random.shuffle(index)
        train_data = train_data[index]

        tStart_epoch = time.time()
        loss_epoch = np.zeros(len(train_data))
        for current_batch_file_idx in xrange(len(train_data)):

            tStart = time.time()
            current_batch = h5py.File(train_data[current_batch_file_idx])
            current_feats = np.zeros((batch_size, n_frame_step, dim_image))
            current_video_masks = np.zeros((batch_size, n_frame_step))
            current_video_len = np.zeros(batch_size)
            for ind in xrange(batch_size):
                current_feats[ind, :, :] = current_batch['data'][:n_frame_step,
                                                                 ind, :]
                idx = np.where(current_batch['label'][:, ind] != -1)[0]
                if len(idx) == 0:
                    continue
                current_video_masks[ind, :idx[-1] + 1] = 1

            current_captions = current_batch['title']
            current_caption_ind = map(
                lambda cap: [
                    wordtoix[word] for word in cap.lower().split(' ')
                    if word in wordtoix
                ], current_captions)

            current_caption_matrix = sequence.pad_sequences(
                current_caption_ind, padding='post', maxlen=n_caption_step - 1)
            current_caption_matrix = np.hstack([
                current_caption_matrix,
                np.zeros([len(current_caption_matrix), 1])
            ]).astype(int)
            current_caption_masks = np.zeros((current_caption_matrix.shape[0],
                                              current_caption_matrix.shape[1]))
            nonzeros = np.array(
                map(lambda x: (x != 0).sum() + 1, current_caption_matrix))

            for ind, row in enumerate(current_caption_masks):
                row[:nonzeros[ind]] = 1

            _, loss_val = sess.run(
                [train_op, tf_loss],
                feed_dict={
                    tf_video: current_feats,
                    tf_video_mask: current_video_masks,
                    tf_caption: current_caption_matrix,
                    tf_caption_mask: current_caption_masks
                })
            loss_epoch[current_batch_file_idx] = loss_val
            tStop = time.time()
            #print "Epoch:", epoch, " Batch:", current_batch_file_idx, " Loss:", loss_val
        #print "Time Cost:", round(tStop - tStart,2), "s"

        print "Epoch:", epoch, " done. Loss:", np.mean(loss_epoch)
        tStop_epoch = time.time()
        print "Epoch Time Cost:", round(tStop_epoch - tStart_epoch, 2), "s"

        if np.mod(epoch, 10) == 0 or epoch == n_epochs - 1:
            print "Epoch ", epoch, " is done. Saving the model ..."
            with tf.device("/cpu:0"):
                saver.save(sess,
                           os.path.join(model_path, 'model'),
                           global_step=epoch)

            current_batch = h5py.File(val_data[np.random.randint(
                0, len(val_data))])
            video_tf, video_mask_tf, caption_tf, lstm3_variables_tf = model.build_generator(
            )
            ixtoword = pd.Series(np.load('./data0/ixtoword.npy').tolist())
            [pred_sent, gt_sent, id_list, gt_dict,
             pred_dict] = testing_all(sess, train_data[-2:], ixtoword,
                                      video_tf, video_mask_tf, caption_tf)
            for key in pred_dict.keys():
                for ele in gt_dict[key]:
                    print "GT:  " + ele['caption']
                print "PD:  " + pred_dict[key][0]['caption']
                print '-------'
            [pred_sent, gt_sent, id_list, gt_dict,
             pred_dict] = testing_all(sess, val_data, ixtoword, video_tf,
                                      video_mask_tf, caption_tf)
            scorer = COCOScorer()
            total_score = scorer.score(gt_dict, pred_dict, id_list)
        sys.stdout.flush()

    print "Finally, saving the model ..."
    with tf.device("/cpu:0"):
        saver.save(sess,
                   os.path.join(model_path, 'model'),
                   global_step=n_epochs)
    tStop_total = time.time()
    print "Total Time Cost:", round(tStop_total - tStart_total, 2), "s"

Пример #14

Показать файл

Файл: SS_youtube_notest_dummy.py Проект: KuoHaoZeng/VH

def train():
    meta_data, train_data, val_data, test_data = get_video_data_jukin(
        video_data_path_train, video_data_path_val, video_data_path_test
    )
    captions = meta_data["Description"].values
    captions = map(lambda x: x.replace(".", ""), captions)
    captions = map(lambda x: x.replace(",", ""), captions)
    wordtoix, ixtoword, bias_init_vector = preProBuildWordVocab(captions, word_count_threshold=1)

    np.save("./data" + str(gpu_id) + "/ixtoword", ixtoword)

    model = Video_Caption_Generator(
        dim_image=dim_image,
        n_words=len(wordtoix),
        dim_hidden=dim_hidden,
        batch_size=batch_size,
        n_lstm_steps=n_frame_step,
        drop_out_rate=0.5,
        bias_init_vector=None,
    )

    tf_loss, tf_video, tf_video_mask, tf_caption, tf_caption_mask = model.build_model()
    loss_summary = tf.scalar_summary("Loss", tf_loss)
    sess = tf.InteractiveSession(config=tf.ConfigProto(allow_soft_placement=True))
    merged = tf.merge_all_summaries()
    writer = tf.train.SummaryWriter("/tmp/tf_log", sess.graph_def)

    saver = tf.train.Saver(max_to_keep=100)
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(tf_loss)
    tf.initialize_all_variables().run()
    saver.restore(sess, "models_SS_youtube_notest_dummy/model-20")

    tStart_total = time.time()
    for epoch in range(n_epochs):
        index = np.arange(len(train_data))
        np.random.shuffle(index)
        train_data = train_data[index]

        tStart_epoch = time.time()
        loss_epoch = np.zeros(len(train_data))
        for current_batch_file_idx in xrange(len(train_data)):

            tStart = time.time()
            current_batch = h5py.File(train_data[current_batch_file_idx])
            current_feats = np.zeros((batch_size, n_frame_step, dim_image))
            current_video_masks = np.zeros((batch_size, n_frame_step))
            current_video_len = np.zeros(batch_size)
            for ind in xrange(batch_size):
                current_feats[ind, :, :] = current_batch["data"][:, ind, :]
                idx = np.where(current_batch["label"][:, ind] != -1)[0]
                if len(idx) == 0:
                    continue
                current_video_masks[ind, idx[-1]] = 1

            current_captions = current_batch["title"]
            current_caption_ind = map(
                lambda cap: [wordtoix[word] for word in cap.lower().split(" ") if word in wordtoix], current_captions
            )

            current_caption_matrix = sequence.pad_sequences(current_caption_ind, padding="post", maxlen=16 - 1)
            current_caption_matrix = np.hstack(
                [current_caption_matrix, np.zeros([len(current_caption_matrix), 1])]
            ).astype(int)
            current_caption_masks = np.zeros((current_caption_matrix.shape[0], current_caption_matrix.shape[1]))
            nonzeros = np.array(map(lambda x: (x != 0).sum() + 1, current_caption_matrix))

            for ind, row in enumerate(current_caption_masks):
                row[: nonzeros[ind]] = 1

            _, loss_val, summary_str = sess.run(
                [train_op, tf_loss, merged],
                feed_dict={
                    tf_video: current_feats,
                    tf_video_mask: current_video_masks,
                    tf_caption: current_caption_matrix,
                    tf_caption_mask: current_caption_masks,
                },
            )
            writer.add_summary(summary_str, epoch)
            loss_epoch[current_batch_file_idx] = loss_val
            tStop = time.time()
            # print "Epoch:", epoch, " Batch:", current_batch_file_idx, " Loss:", loss_val
            # print "Time Cost:", round(tStop - tStart,2), "s"

        print "Epoch:", epoch, " done. Loss:", np.mean(loss_epoch)
        tStop_epoch = time.time()
        print "Epoch Time Cost:", round(tStop_epoch - tStart_epoch, 2), "s"
        sys.stdout.flush()

        if np.mod(epoch, 10) == 0:
            print "Epoch ", epoch, " is done. Saving the model ..."
            saver.save(sess, os.path.join(model_path, "model"), global_step=epoch)

            current_batch = h5py.File(val_data[np.random.randint(0, len(val_data))])
            video_tf, video_mask_tf, caption_tf, lstm1_variables_tf, lstm2_variables_tf = model.build_generator()
            ixtoword = pd.Series(np.load("./data" + str(gpu_id) + "/ixtoword.npy").tolist())
            [pred_sent, gt_sent] = testing_all(sess, train_data[-2:], ixtoword, video_tf, video_mask_tf, caption_tf)
            for idx in range(len(pred_sent)):
                print "GT:  " + gt_sent[idx][0]["caption"]
                print "PD:  " + pred_sent[idx][0]["caption"]
                print "-------"
            [pred_sent, gt_sent] = testing_all(sess, val_data, ixtoword, video_tf, video_mask_tf, caption_tf)
            scorer = COCOScorer()
            total_score = scorer.score(gt_sent, pred_sent, range(len(pred_sent)))

    print "Finally, saving the model ..."
    saver.save(sess, os.path.join(model_path, "model"), global_step=n_epochs)
    tStop_total = time.time()
    print "Total Time Cost:", round(tStop_total - tStart_total, 2), "s"