Пример #1
0
    def __init__(self, dict_args):
        super(CSAL, self).__init__()

        #PretrainedVisionLayer
        #self.intermediate_layers = dict_args["intermediate_layers"]

        #VisionFeatureDimRedLayer
        self.pretrained_feature_size = dict_args["pretrained_feature_size"]

        #PretrainedWordsLayer
        self.word_embeddings = dict_args["word_embeddings"]
        self.pretrained_embdim = dict_args["word_embdim"]
        self.vocabulary_size = dict_args["vocabulary_size"]
        #self.vocabulary_bosindex = dict_args["vocabulary_bosindex"]
        #self.vocabulary_eosindex = dict_args["vocabulary_eosindex"]

        #FrameEncoderLayer
        self.encoder_configuration = dict_args["encoder_configuration"]

        #SentenceDecoderLayer
        self.decoder_rnn_input_dim = dict_args["decoder_rnn_input_dim"]
        self.decoder_rnn_hidden_dim = dict_args["decoder_rnn_hidden_dim"]
        self.decoder_tie_weights = dict_args["decoder_tie_weights"]
        self.decoder_rnn_type = dict_args["decoder_rnn_type"]
        self.every_step = dict_args["every_step"]
        self.decoder_dropout_rate = dict_args['decoder_dropout_rate']

        #PretrainedVisionLayer
        #pretrained_vision_layer_args = dict_args
        #self.pretrained_vision_layer = PreTrainedResnet(pretrained_vision_layer_args)

        #VisionFeatureDimRedLayer
        self.vision_feature_dimred_layer = nn.Linear(
            self.pretrained_feature_size, self.decoder_rnn_hidden_dim)

        #PretrainedWordsLayer
        pretrained_words_layer_args = dict_args
        self.pretrained_words_layer = PretrainedEmbeddings(
            pretrained_words_layer_args)

        #FrameEncoderLayer
        frame_encoder_layer_args = dict_args
        self.frame_encoder_layer = SequenceEncoder(frame_encoder_layer_args)

        #SentenceDecoderLayer
        sentence_decoder_layer_args = {
            'input_dim': self.decoder_rnn_input_dim,
            'rnn_hdim': self.decoder_rnn_hidden_dim,
            'rnn_type': self.decoder_rnn_type,
            'vocabulary_size': self.vocabulary_size,
            'tie_weights': self.decoder_tie_weights,
            'word_embeddings': self.pretrained_words_layer.embeddings.weight,
            #'pretrained_words_layer': self.pretrained_words_layer,
            'every_step': self.every_step,
            'dropout_rate': self.decoder_dropout_rate
        }
        self.sentence_decoder_layer = SequenceDecoder(
            sentence_decoder_layer_args)
Пример #2
0
    def __init__(self, dict_args):
        super(STAL, self).__init__()

        self.word_embeddings = dict_args["word_embeddings"]
        self.pretrained_embdim = dict_args["word_embdim"]
        self.vocabulary_size = dict_args["vocabulary_size"]

        self.encoder_configuration = dict_args["encoder_configuration"]

        self.decoder_rnn_word_dim = dict_args["decoder_rnn_word_dim"]
        self.decoder_rnn_input_dim = dict_args["decoder_rnn_input_dim"]
        self.decoder_rnn_hidden_dim = dict_args["decoder_rnn_hidden_dim"]
        self.decoder_rnn_type = dict_args["decoder_rnn_type"]
        self.every_step = dict_args["every_step"]
        self.decoder_top_dropout_rate = dict_args["decoder_top_dropout_rate"]
        self.decoder_bottom_dropout_rate = dict_args[
            "decoder_bottom_dropout_rate"]
        self.decoder_residual_connection = dict_args["residual_connection"]

        #PretrainedWordsLayer
        pretrained_words_layer_args = dict_args
        self.pretrained_words_layer = PretrainedEmbeddings(
            pretrained_words_layer_args)
        frame_encoder_layer_args = dict_args

        #FrameEncoderLayer
        self.frame_encoder_layer = VideoFrameEncoder(frame_encoder_layer_args)

        #SentenceDecoderLayer
        sentence_decoder_layer_args = {
            'word_dim': self.decoder_rnn_word_dim,
            'input_dim': self.decoder_rnn_input_dim,
            'rnn_hdim': self.decoder_rnn_hidden_dim,
            'rnn_type': self.decoder_rnn_type,
            'vocabulary_size': self.vocabulary_size,
            'every_step': self.every_step,
            'top_dropout_rate': self.decoder_top_dropout_rate,
            'bottom_dropout_rate': self.decoder_bottom_dropout_rate,
            'residual_connection': self.decoder_residual_connection
        }
        self.sentence_decoder_layer = VideoCaptionDecoder(
            sentence_decoder_layer_args)
Пример #3
0
	def __init__(self, dict_args):
		super(CSAL, self).__init__()

		#PretrainedVisionLayer
		#self.intermediate_layers = dict_args["intermediate_layers"]

		#VisionFeatureDimRedLayer
		self.pretrained_feature_size = dict_args["pretrained_feature_size"]

		#PretrainedWordsLayer
		self.word_embeddings = dict_args["word_embeddings"]
		self.pretrained_embdim = dict_args["pretrained_embdim"]
		self.vocabulary_size = dict_args["vocabulary_size"]

		#SentenceDecoderLayer
		self.decoder_rnn_hidden_dim = dict_args["decoder_rnn_hidden_dim"]
		self.decoder_tie_weights = dict_args["decoder_tie_weights"]
		self.decoder_rnn_type = dict_args["decoder_rnn_type"]


		#PretrainedVisionLayer
		pretrained_vision_layer_args = dict_args
		self.pretrained_vision_layer = PreTrainedResnet(pretrained_vision_layer_args)

		#VisionFeatureDimRedLayer
		self.vision_feature_dimred_layer = nn.Linear(self.pretrained_feature_size, self.pretrained_embdim)

		#PretrainedWordsLayer
		pretrained_words_layer_args = dict_args
		self.pretrained_words_layer = PretrainedEmbeddings(pretrained_words_layer_args)

		#SentenceDecoderLayer
		sentence_decoder_layer_args = {
										'input_dim' : self.pretrained_embdim, 
										'rnn_hdim' : self.decoder_rnn_hidden_dim,
										'rnn_type' : self.decoder_rnn_type,
										'vocabulary_size' : self.vocabulary_size,
										'tie_weights' : True,
										'word_embeddings' : self.pretrained_words_layer.embeddings.weight
									  }
		self.sentence_decoder_layer = SentenceDecoder(sentence_decoder_layer_args)
Пример #4
0
        """Get hypotheses."""
        hyp = []
        for j in range(len(self.prevKs) - 1, -1, -1):
            hyp.append(self.nextYs[j + 1][k])
            k = self.prevKs[j][k]

        return hyp[::-1]


if __name__ == "__main__":

    pretrainedEmbeddings = PretrainedEmbeddings({
        "word_embeddings":
        torch.randn(10, 3),
        "pretrained_embdim":
        3,
        "vocabulary_size":
        10,
        "embeddings_requires_grad":
        False
    })

    dict_args = {
        'input_dim': 3,  #pretrainedEmbeddings.pretrained_embdim
        'rnn_hdim': 3,
        'rnn_type': 'LSTM',
        'vocabulary_size': pretrainedEmbeddings.vocabulary_size,
        'tie_weights': True,
        'word_embeddings': pretrainedEmbeddings.embeddings.weight,
        'vocabulary_bosindex': 1,
        'vocabulary_eosindex': 0,
        'pretrained_words_layer': pretrainedEmbeddings
Пример #5
0
        encoderlayer.clear()
        osequence = beamsearch.get_output(index=0)
        return osequence


if __name__ == '__main__':

    dict_args = {
        "use_pretrained_emb": False,
        "backprop_embeddings": False,
        "word_embeddings": torch.randn(10, 3),
        "word_embdim": 3,
        "vocabulary_size": 10
    }

    pretrainedEmbeddings = PretrainedEmbeddings(dict_args)

    dict_args = {
        'input_dim': 3,  #pretrainedEmbeddings.pretrained_embdim
        'rnn_hdim': 3,
        'rnn_type': 'LSTM',
        'vocabulary_size': pretrainedEmbeddings.vocabulary_size,
        'tie_weights': True,
        'word_embeddings': pretrainedEmbeddings.embeddings.weight,
        'pretrained_words_layer': pretrainedEmbeddings
    }

    sentenceDecoder = SequenceDecoder(dict_args)
    osequence = sentenceDecoder(
        Variable(torch.randn(2, 3, 3)), Variable(torch.randn(2, 3)),
        Variable(torch.LongTensor([[1, 1, 1], [1, 0, 0]])))
Пример #6
0
                                        100,
                                        self.vocab_bosindex,
                                        self.vocab_eosindex,
                                        cuda=USE_CUDA,
                                        max_words=50,
                                        alpha=0.7)

            return generator.generate_caption()


if __name__ == '__main__':

    pretrainedEmbeddings = PretrainedEmbeddings({
        "word_embeddings":
        torch.randn(10, 3),
        "pretrained_embdim":
        3,
        "vocabulary_size":
        10
    })

    dict_args = {
        'input_dim': 3,  #pretrainedEmbeddings.pretrained_embdim
        'rnn_hdim': 3,
        'rnn_type': 'LSTM',
        'vocabulary_size': pretrainedEmbeddings.vocabulary_size,
        'tie_weights': True,
        'word_embeddings': pretrainedEmbeddings.embeddings.weight
    }

    sentenceDecoder = SentenceDecoder(dict_args)
    osequence = sentenceDecoder(