Python MsrDataUtil.getBatchVideoAudioInfo 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: utils

클래스/타입: MsrDataUtil

메소드/함수: getBatchVideoAudioInfo

hotexamples.com에서의 예제들: 2

Python MsrDataUtil.getBatchVideoAudioInfo - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 utils.MsrDataUtil.getBatchVideoAudioInfo에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

getBatchVideoFeature(10)

convertCaptionI2V(6)

getBatchVideoCategoriesInfo(5)

getBatchC3DVideoFeature(4)

getBatchTestCaptionWithSparseLabel(4)

getBatchTrainCaptionWithSparseLabel(4)

create_vocabulary_word2vec(3)

getBatchStepVideoFeature(2)

getBatchTestCaption(2)

getBatchVideoAudioInfo(2)

getBatchTrainCaption(1)

getCategoriesInfo(1)

getNewBatchTrainCaption(1)

예제 #1

파일 보기

파일: msrvtt_caption_s2s_attention_mGRU_audio_beamsearch.py 프로젝트: youjiangxu/VideoCaptioning

def beam_search_exe_test(sess,
                         data,
                         audio_info,
                         batch_size,
                         v2i,
                         i2v,
                         hf,
                         feature_shape,
                         predict_words,
                         input_video,
                         input_captions,
                         input_audio,
                         y,
                         finished_beam,
                         logprobs_finished_beams,
                         past_logprobs,
                         capl=16):

    caption_output = []
    total_data = len(data)
    num_batch = int(round(total_data * 1.0 / batch_size))

    for batch_idx in xrange(num_batch):
        batch_caption = data[batch_idx *
                             batch_size:min((batch_idx + 1) *
                                            batch_size, total_data)]

        data_v = MsrDataUtil.getBatchVideoFeature(batch_caption, hf,
                                                  feature_shape)
        data_c, data_y = MsrDataUtil.getBatchTestCaptionWithSparseLabel(
            batch_caption, v2i, capl=capl)
        data_audio = MsrDataUtil.getBatchVideoAudioInfo(
            batch_caption, audio_info)
        [gw, tw, gp, gl] = sess.run(
            [
                finished_beam, predict_words, logprobs_finished_beams,
                past_logprobs
            ],
            feed_dict={
                input_video: data_v,
                input_captions: data_c,
                input_audio: data_audio,
                y: data_y
            })

        generated_captions = MsrDataUtil.convertCaptionI2V(
            batch_caption, gw, i2v)

        for idx, sen in enumerate(generated_captions):
            print('%s : %s' % (batch_caption[idx].keys()[0], sen))

            caption_output.append({
                'image_id': batch_caption[idx].keys()[0],
                'caption': sen
            })

    js = {}
    js['val_predictions'] = caption_output

    return js

예제 #2

파일 보기

def exe_train(sess,
              data,
              audio_info,
              cate_info,
              batch_size,
              v2i,
              hf,
              feature_shape,
              train,
              loss,
              input_video,
              input_captions,
              input_categories,
              input_audio,
              y,
              capl=16):

    np.random.shuffle(data)

    total_data = len(data)
    num_batch = int(round(total_data * 1.0 / batch_size))

    total_loss = 0.0
    for batch_idx in xrange(num_batch):
        # for batch_idx in xrange(500):

        # if batch_idx < 100:
        batch_caption = data[batch_idx *
                             batch_size:min((batch_idx + 1) *
                                            batch_size, total_data)]

        data_v = MsrDataUtil.getBatchVideoFeature(batch_caption, hf,
                                                  feature_shape)
        data_c, data_y = MsrDataUtil.getBatchTrainCaptionWithSparseLabel(
            batch_caption, v2i, capl=capl)
        data_cate = MsrDataUtil.getBatchVideoCategoriesInfo(
            batch_caption, cate_info, feature_shape)
        data_audio = MsrDataUtil.getBatchVideoAudioInfo(
            batch_caption, audio_info, feature_shape)

        _, l = sess.run(
            [train, loss],
            feed_dict={
                input_video: data_v,
                input_captions: data_c,
                input_categories: data_cate,
                input_audio: data_audio,
                y: data_y
            })
        total_loss += l
        print('    batch_idx:%d/%d, loss:%.5f' % (batch_idx + 1, num_batch, l))
    total_loss = total_loss / num_batch
    return total_loss