def __init__( self, glove_path: str, model_params: ModelParams, hidden_dimension: int, ): super().__init__() self.model_params = model_params self.word_embedding_net = WordEmbedding( vocabulary_size=model_params.vocabulary_size, pretrained_vectors_file=glove_path, embedding_dimension=model_params.word_embedding_dimension, dropout=0.25, ) self.question_embedding_net = QuestionEmbedding( input_dimension=model_params.word_embedding_dimension, number_hidden_units=hidden_dimension, number_of_layers=1, ) self.question_projection_net = MultiLayerNet( dimensions=[hidden_dimension, hidden_dimension], dropout=0.5) self.image_projection_net = MultiLayerNet( dimensions=[ model_params.object_embedding_dimension, hidden_dimension, ], dropout=0.5, ) if self.model_params.add_self_attention: self.question_self_attention_net = SelfAttention(hidden_dimension, dropout=0.3) self.visual_self_attention_net = SelfAttention(hidden_dimension, dropout=0.3) self.question_attention_net = Attention(model_params.number_of_objects, dropout=0.3) self.visual_attention_net = Attention( model_params.question_sequence_length, dropout=0.3) if model_params.fusion_method == FusionMethod.CONCAT: factor = 3 if self.model_params.add_self_attention else 2 self.classifier = Classifier( input_dimension=hidden_dimension * 3, hidden_dimension=hidden_dimension * 4, output_dimension=model_params.num_ans_candidates, dropout=0.5, ) elif model_params.fusion_method == FusionMethod.HADAMARD: self.classifier = Classifier( input_dimension=hidden_dimension, hidden_dimension=hidden_dimension * 2, output_dimension=model_params.num_ans_candidates, dropout=0.5, )
def __init__(self, args, device, collect_da_predictions=True): super(MTL_Model3, self).__init__() self.input_size = args.embedding_dim self.hidden_size_u = args.lstm_sent_size self.hidden_size_d = args.lstm_utt_size self.num_layers = args.lstm_layers self.num_dialogacts = args.num_classes self.device = device self.emb = GloveEmbedding(args) self.only_da = True if args.loss == 'da' else False self.bilstm_u = nn.LSTM(self.input_size, self.hidden_size_u, self.num_layers, bidirectional=True, batch_first=True) for param in self.bilstm_u.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data) self.bilstm_d = nn.LSTM(2 * self.hidden_size_u, self.hidden_size_d, self.num_layers, bidirectional=True, batch_first=True) for param in self.bilstm_d.parameters(): if len(param.shape) >= 2: init.orthogonal_(param.data) else: init.normal_(param.data) self.attn_u = Attention(2 * self.hidden_size_u) self.attn_d = Attention(2 * self.hidden_size_d) self.ff_u = nn.Linear(2 * self.hidden_size_u, self.num_dialogacts) self.ff_d = nn.Linear(2 * self.hidden_size_d, 1) nn.init.normal_(self.ff_d.weight, mean=0, std=1) nn.init.normal_(self.ff_u.weight, mean=0, std=1) self.dropout_u = nn.Dropout(args.dropout_prob) self.collect_da_predictions = collect_da_predictions self.da_predictions = [] #add weights to the loss function to account for the distribution of dialog acts in daily dialog #nll_class_weights = torch.tensor([0.0, 2.1861911569232313, 3.4904300472491396, 6.120629125122877, 10.787031308006435]).to(device) if args.num_classes == 5: nll_class_weights = torch.tensor([0.0, 1.0, 1.0, 1.0, 1.0]).to(device) # self.nll = nn.NLLLoss(weight=nll_class_weights, reduction='none') self.nll = nn.CrossEntropyLoss(weight=nll_class_weights, reduction='mean') else: self.nll = nn.CrossEntropyLoss(reduction='mean')
def __init__(self, query_hidden_size, memory_bank_size, num_classes, attn_mode, dropout=0.0, ordinal=False, hr_enc=False): super(WordMultiHopAttnClassifier, self).__init__() self.memory_bank_size = memory_bank_size self.query_hidden_size = query_hidden_size self.num_classes = num_classes self.hr_enc = hr_enc self._query_vector = nn.Parameter(torch.zeros(1, query_hidden_size)) init.uniform_(self._query_vector, -0.1, 0.1) self.attention_layer = Attention(memory_bank_size, memory_bank_size, coverage_attn=False, attn_mode=attn_mode) self.glimpse_layer = Attention(query_hidden_size, memory_bank_size, coverage_attn=False, attn_mode=attn_mode) if self.hr_enc: self._sent_query_vector = nn.Parameter( torch.zeros(1, query_hidden_size)) init.uniform_(self._sent_query_vector, -0.1, 0.1) self.sent_attention_layer = Attention(memory_bank_size, memory_bank_size, coverage_attn=False, attn_mode=attn_mode) self.sent_glimpse_layer = Attention(query_hidden_size, memory_bank_size, coverage_attn=False, attn_mode=attn_mode) self.ordinal = ordinal self.expanded_memory_size = memory_bank_size if not hr_enc else 2 * memory_bank_size if ordinal: self.classifier = nn.Sequential( nn.Linear(self.expanded_memory_size, self.expanded_memory_size), nn.Dropout(p=dropout), nn.ReLU(), nn.Linear(self.expanded_memory_size, num_classes), nn.Sigmoid()) else: self.classifier = nn.Sequential( nn.Linear(self.expanded_memory_size, self.expanded_memory_size), nn.Dropout(p=dropout), nn.ReLU(), nn.Linear(self.expanded_memory_size, num_classes), nn.LogSoftmax(dim=1))
def create_model(vocab_size, tag_size, max_len, rnn_units, embedding_dims, emb_matrix=None): inputs = Input(shape=(max_len, )) embedding_layer = Embedding( input_dim=vocab_size, output_dim=embedding_dims, input_length=max_len) if emb_matrix is None else Embedding( input_dim=vocab_size, output_dim=embedding_dims, weights=[emb_matrix], input_length=max_len, trainable=False) bi_rnn = Bidirectional(LSTM(units=rnn_units, return_sequences=True)) attention = Attention(max_len) classifier = Dense(1, activation="sigmoid") embedding = embedding_layer(inputs) x = bi_rnn(embedding) x = attention(x) output = classifier(x) model = Model(inputs, output) model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"]) return model
def build_baseline(embeddings, num_ans_candidates): vision_features = config.output_features visual_glimpses = config.visual_glimpses question_features = hidden_features = config.hid_dim w_emb = WordEmbedding(embeddings, dropout=0.0) q_emb = QuestionEmbedding(w_dim=300, hid_dim=question_features, nlayers=1, bidirect=False, dropout=0.0) v_att = Attention( v_dim=vision_features, q_dim=question_features * q_emb.ndirections, hid_dim=hidden_features, glimpses=visual_glimpses, ) classifier = SimpleClassifier( in_dim=(question_features * q_emb.ndirections, vision_features), hid_dim=(hidden_features, hidden_features * 2), out_dim=num_ans_candidates, dropout=0.5) return BaseModel(w_emb, q_emb, v_att, classifier)
def __init__( self, h_enc: int, h_dec: int, label_to_id: Dict, dropout: float = 0., teacher_force: float = 0., attention: Dict = None ): """Convert label to consequence of sublabels and use lstm cell to predict next :param h_enc: encoder hidden state, correspond to hidden state of LSTM cell :param h_dec: size of LSTM cell input/output :param label_to_id: dict for converting labels to ids :param dropout: probability of dropout :param teacher_force: probability of teacher forcing, 0 corresponds to always use previous predicted value :param attention: if passed, init attention with given args """ super().__init__(h_enc, h_dec, label_to_id) self.teacher_force = teacher_force self.embedding = nn.Embedding(self.out_size, self.h_dec, padding_idx=self.pad_index) self.linear = nn.Linear(self.h_enc, self.out_size) self.dropout = nn.Dropout(dropout) self.use_attention = attention is not None if self.use_attention: self.attention = Attention(self.h_enc, self.h_dec, **attention) lstm_input_size = self.h_enc + self.h_dec if self.use_attention else self.h_dec self.lstm = nn.LSTM(input_size=lstm_input_size, hidden_size=self.h_enc)
def __init__(self, args): super(Decoder, self).__init__() self.vocab_size = args.vocab_size self.embed_size = args.embed_size self.hidden_size = args.hidden_size self.layer_size = args.layer_size self.batch_size = 0 self.device = args.device self.beam_width = args.beam_width self.teacher_rate = args.teacher_rate #self.word_embed=nn.Embedding(self.vocab_size, self.embed_size,padding_idx=constants.PAD) self.word_embed = nn.Embedding(self.vocab_size, self.embed_size, padding_idx=constants.PAD) #self.hidden_exchange=nn.Linear(self.hidden_size*2,self.hidden_size) self.gru = nn.GRU(self.embed_size, self.hidden_size, num_layers=args.layer_size, bidirectional=False, dropout=args.dropout, batch_first=True) #decoderは双方向にできない self.attention = Attention(args) self.attention_wight = nn.Linear(self.hidden_size * 3, self.hidden_size * 3) self.out = nn.Linear(self.hidden_size * 3, self.vocab_size) self.dropout = nn.Dropout(args.dropout) self.activate = nn.Tanh()
def baseline(args, dataset, pretrained=False): # initialise model w_emb = WordEmbedding(dataset.dictionary.ntoken, 300, 0.0) q_emb = QuestionEmbedding(300, args.num_hid, 1, False, 0.0) v_att = Attention(dataset.v_dim, q_emb.num_hid, args.num_hid) q_net = FCNet([args.num_hid, args.num_hid]) v_net = FCNet([dataset.v_dim, args.num_hid]) classifier = SimpleClassifier(args.num_hid, 2 * args.num_hid, dataset.num_ans_candidates, 0.5) model = BaseModel(args, w_emb, q_emb, v_att, q_net, v_net, classifier) # load model on device if available map_location = None if not model.cuda_available: map_location = torch.device('cpu') # download and load pretrained model if pretrained: key = 'baseline-vqa' url = pretrained_urls[key] model.load_state_dict(download_model( key, url, map_location=map_location)['model'], strict=False) else: key = 'untrained' # set model name model.name = key return model
def model(self): # (Bi-GRU) layers rnn_outputs, _ = bi_rnn(GRUCell(self.hidden_size), GRUCell(self.hidden_size), inputs=self.batch_embedded, dtype=tf.float32) tf.summary.histogram('RNN_outputs', rnn_outputs) # Attention layers with tf.name_scope('Attention_layer'): attention_ = Attention(rnn_outputs, self.attention_size, time_major=False, return_alphas=True) self.attention_output, alphas = attention_.attentionModel() tf.summary.histogram('alphas', alphas) print('attention_output.shape:', self.attention_output.shape)
def __init__(self, train_dset, word_embedding_size=300, id_embedding_size=32, hidden_size=100, num_layers=1, bidirectional=False, da=100, r=10, attention_size=32, num_latent=32, dropout=0.5, soa_size=50): super(HANCI, self).__init__() review_hidden_size = hidden_size * (1 + int(bidirectional)) self.user_word_emb = WordEmbedding(train_dset.dictionary.ntoken, word_embedding_size) self.item_word_emb = WordEmbedding(train_dset.dictionary.ntoken, word_embedding_size) self.user_review_emb = ReviewLSTMSA(embedding_size=word_embedding_size, dropout=dropout, hidden_size=hidden_size, num_layers=num_layers, bidirectional=bidirectional, da=da, r=r) self.item_review_emb = ReviewLSTMSA(embedding_size=word_embedding_size, dropout=dropout, hidden_size=hidden_size, num_layers=num_layers, bidirectional=bidirectional, da=da, r=r) self.user_id_emb = WordEmbedding(train_dset.user_num, id_embedding_size, dropout=0.) self.item_id_emb = WordEmbedding(train_dset.item_num, id_embedding_size, dropout=0.) self.user_rid_emb = WordEmbedding(train_dset.item_num, id_embedding_size, dropout=0.) self.item_rid_emb = WordEmbedding(train_dset.user_num, id_embedding_size, dropout=0.) self.user_reviews_att = Attention(review_hidden_size, id_embedding_size, attention_size) self.item_reviews_att = Attention(review_hidden_size, id_embedding_size, attention_size) self.user_soft_attention = SoftAttention(review_hidden_size, id_embedding_size, soa_size) self.item_soft_attention = SoftAttention(review_hidden_size, id_embedding_size, soa_size) self.relu = nn.ReLU() self.user_latent = nn.Linear(review_hidden_size, num_latent) self.item_latent = nn.Linear(review_hidden_size, num_latent) self.classify = nn.Linear(num_latent, 1, bias=False) self.dropout = nn.Dropout(dropout) # 分别用作user和item的偏置 self.user_bias = WordEmbedding(ntoken=train_dset.user_num, emb_dim=1, dropout=0.) self.item_bias = WordEmbedding(ntoken=train_dset.item_num, emb_dim=1, dropout=0.) self.global_bias = nn.Parameter(torch.Tensor([0.1])) # 初始化 self.user_word_emb.init_embedding() self.item_word_emb.init_embedding() self.user_bias.init_embedding_with_one(0.1) self.item_bias.init_embedding_with_one(0.1)
def build_baseline0(dataset, num_hid): w_emb = WordEmbedding(dataset.dictionary.ntoken, 300, 0.0) q_emb = QuestionEmbedding(300, num_hid, 1, False, 0.0) v_att = Attention(dataset.v_dim, q_emb.num_hid, num_hid) q_net = FCNet([num_hid, num_hid]) v_net = FCNet([dataset.v_dim, num_hid]) classifier = SimpleClassifier(num_hid, 2 * num_hid, dataset.num_ans_candidates, 0.5) return BaseModel(w_emb, q_emb, v_att, q_net, v_net, classifier)
def get_model(embedding_matrix, name): context_embedding_layer = Embedding(input_dim=embedding_matrix.shape[0], output_dim=embedding_matrix.shape[1], weights=[embedding_matrix], input_length=max_context_seq_length, trainable=False, name='c_emb' + name) question_embedding_layer = Embedding(input_dim=embedding_matrix.shape[0], output_dim=embedding_matrix.shape[1], weights=[embedding_matrix], input_length=max_question_seq_length, trainable=False, name='q_emb' + name) # The 2 inputs to our model c_seq_input = Input(shape=(max_context_seq_length, ), dtype='int32') # [batch_size, n] q_seq_input = Input(shape=(max_question_seq_length, ), dtype='int32') # [batch_size, m] # Embed both question and context c_embedded = context_embedding_layer(c_seq_input) # [batch_size, n, d] q_embedded = question_embedding_layer(q_seq_input) # [batch_size, m, d] # Bidirectional LSTMs/GRUs used as encoders c_encoder_out = Bidirectional(LSTM(units, return_sequences=True))( c_embedded) # [batch_size, n, 2l] q_encoder_out = Bidirectional(LSTM(units, return_sequences=True))( q_embedded) # [batch_size, n, 2l] # Interaction/attention layer, output shape G = Attention()([c_encoder_out, q_encoder_out]) # [batch_size, n, 4l] # Modeling layer m_1 = Bidirectional(LSTM(units, return_sequences=True))(G) # [batch_size, n, 2l] # m_2 = Bidirectional(LSTM(units, return_sequences=True, dropout=dropout))(m_1) # [batch_size, n, 2l] # m_3 = Bidirectional(LSTM(units, return_sequences=True, dropout=dropout))(m_2) concat1_out = Concatenate(axis=-1)([G, m_1]) ps_start_ = TimeDistributed(Dense(1))(concat1_out) # [batch_size, n, 1] ps_start_flatten = Flatten()(ps_start_) # [batch_size, n] ps_start = Activation('softmax')(ps_start_flatten) # concat2_out = Concatenate(axis=-1)([G, m_3]) ps_end_ = TimeDistributed(Dense(1))(concat1_out) # [batch_size, n, 1] ps_end_flatten = Flatten()(ps_end_) # [batch_size, n] ps_end = Activation('softmax')(ps_end_flatten) model = Model(inputs=[c_seq_input, q_seq_input], outputs=[ps_start, ps_end]) return model
def __init__(self, char_embedding, input_dim, hidden_size_decoder, shared_context, word_recurrent_cell=None, drop_out_word_cell=0, timing=False, drop_out_char_embedding_decoder=0, char_src_attention=False, unrolling_word=False, init_context_decoder=True, hidden_size_src_word_encoder=None, generator=None, stable_decoding_state=False, verbose=0): super(CharDecoder, self).__init__() self.generator = generator self.timing = timing self.char_embedding_decoder = char_embedding self.shared_context = shared_context self.unrolling_word = unrolling_word self.stable_decoding_state = stable_decoding_state self.init_context_decoder = init_context_decoder printing("WARNING : stable_decoding_state is {}", var=[stable_decoding_state], verbose_level=0, verbose=verbose) printing("WARNING : init_context_decoder is {}", var=[init_context_decoder], verbose_level=0, verbose=verbose) printing("WARNING : DECODER unrolling_word is {}", var=[unrolling_word], verbose_level=0, verbose=verbose) printing("WARNING : DECODER char_src_attention is {}", var=[char_src_attention], verbose_level=0, verbose=verbose) self.drop_out_char_embedding_decoder = nn.Dropout(drop_out_char_embedding_decoder) if word_recurrent_cell is not None: assert word_recurrent_cell in SUPPORED_WORD_ENCODER, \ "ERROR : word_recurrent_cell should be in {} ".format(SUPPORED_WORD_ENCODER) if word_recurrent_cell is None: word_recurrent_cell = nn.GRU else: word_recurrent_cell = eval("nn."+word_recurrent_cell) if isinstance(word_recurrent_cell, nn.LSTM): printing("WARNING : in the case of LSTM : inital states defined as " " h_0, c_0 = (zero tensor, source_conditioning) so far (cf. row 70 decoder.py) ", verbose=self.verbose, verbose_level=0) printing("WARNING : LSTM only using h_0 for prediction not the cell", verbose=self.verbose, verbose_level=0) if char_src_attention: assert hidden_size_src_word_encoder is not None, "ERROR : need hidden_size_src_word_encoder for attention " # we need to add dimension because of the context vector that is hidden_size encoder projected #input_dim += hidden_size_src_word_encoder : # NO NEED anymire as we project the all context : same size as currnt printing("WARNING : DECODER word_recurrent_cell hidden dim will be {} " "(we added hidden_size_decoder) because of attention", verbose=verbose, verbose_level=0) # if stable_decoding_state : we add a projection of the attention context vector + the stable one # TODO : try to project the cat of those three vectors (char, attention context, stable context) self.context_proj = nn.Linear(hidden_size_decoder*int(stable_decoding_state)+hidden_size_src_word_encoder*int(char_src_attention), char_embedding.embedding_dim) if stable_decoding_state or char_src_attention else None input_dim = 2*input_dim if stable_decoding_state or char_src_attention else input_dim # because we concat with projected context self.seq_decoder = word_recurrent_cell(input_size=input_dim, hidden_size=hidden_size_decoder, num_layers=1, dropout=drop_out_word_cell, bias=True, batch_first=True, bidirectional=False) printing("MODEL Decoder : word_recurrent_cell has been set to {} ".format(str(word_recurrent_cell)), verbose=verbose, verbose_level=1) #self.attn_param = nn.Linear(hidden_size_decoder*1) if char_src_attention else None self.attn_layer = Attention(hidden_size_word_decoder=hidden_size_decoder, char_embedding_dim=self.char_embedding_decoder.embedding_dim, time=self.timing, hidden_size_src_word_encoder=hidden_size_src_word_encoder) if char_src_attention else None self.verbose = verbose
def __init__(self): super(Decoder, self).__init__() self.n_frames_per_step = hps.n_frames_per_step self.n_mel_channels = hps.n_mel_channels self.encoder_embedding_dim = hps.encoder_embedding_dim self.attention_rnn_dim = hps.attention_rnn_dim self.decoder_rnn_dim = hps.decoder_rnn_dim self.prenet = Prenet() self.attention_rnn = nn.LSTMCell(256 + 512, 1024) self.attention_layer = Attention() # decoder rnn input : 256 + 512 = 768 # decoder rnn output : 1024 self.decoder_rnn = nn.LSTMCell(256 + 512, 1024, 1) self.linear_projection = LinearNorm(1024, 80 * 3)
def __init__(self, config, entity_embedding, context_embedding): super(DKN, self).__init__() self.config = config self.kcnn = KCNN(config, entity_embedding, context_embedding) if self.config.use_attention: self.attention = Attention(config) # TODO parameters self.dnn = nn.Sequential( nn.Linear( len(self.config.window_sizes) * 2 * self.config.num_filters, 16), nn.Linear(16, 1))
def __init__( self, embedding_cfg={ "input_dim": -1, "output_dim": 64, "embeddings_initializer": "uniform", "embeddings_regularizer": "l2", "activity_regularizer": "l2", "mask_zero": True, "input_length": 40 }, LSTM_cfg={ "units": 64, "activation": "tanh", "recurrent_activation": "sigmoid", "use_bias": True, "kernel_initializer": "glorot_uniform", "bias_initializer": "zeros", "unit_forget_bias": True, "kernel_regularizer": "l2", "recurrent_regularizer": "l2", "bias_regularizer": "l2", "activity_regularizer": "l2", "dropout": 0.1, "recurrent_dropout": 0.1, "implementation": 2, "return_sequences": True, "return_state": True, "go_backwards": False, "stateful": False, "unroll": False }, num_lstm_layer=1, max_length=40, fully_connected_cfg={ "units": -1, "activation": None, "use_bias": True }, attention_cfg={ "hidden_size": 64, "num_heads": 1 }): super(Decoder, self).__init__() self.max_length = 40 self.embedding = tf.keras.layers.Embedding(**embedding_cfg) self.lstm_layers = [] for _ in range(num_lstm_layer): self.lstm_layers.append(tf.keras.layers.LSTM(**LSTM_cfg)) self.attention = Attention(**attention_cfg) self.fully_connected = tf.keras.layers.Dense(**fully_connected_cfg)
def BidLstm(maxlen, max_features, embed_size, embedding_matrix): inp = Input(shape=(maxlen, )) x = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=False)(inp) x = Bidirectional(LSTM(300, return_sequences=True, dropout=0.25, recurrent_dropout=0.25))(x) x = Attention(maxlen)(x) x = Dense(256, activation="relu")(x) x = Dropout(0.25)(x) x = Dense(256, activation="relu")(x) x = Dropout(0.25)(x) x = Dense(6, activation="sigmoid")(x) model = Model(inputs=inp, outputs=x) return model
def init_model_from_ckpt(): _, _, _, train_data, valid_data, test_data = get_dataloaders_and_data() SRC_PAD_IDX = DOC.vocab.stoi[DOC.pad_token] TRG_PAD_IDX = DOC.vocab.stoi[DOC.pad_token] INPUT_DIM = len(DOC.vocab) OUTPUT_DIM = len(DOC.vocab) attn = Attention(ENC_HID_DIM, DEC_HID_DIM) enc = Encoder(INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, ENC_DROPOUT) dec = Decoder(OUTPUT_DIM, DEC_EMB_DIM, ENC_HID_DIM, DEC_HID_DIM, DEC_DROPOUT, attn) model = Seq2Seq(enc, dec, SRC_PAD_IDX, device).to(device) most_recent_ckpt = get_most_recent_ckpt('ckpts') model.load_state_dict(torch.load(most_recent_ckpt)) return model, train_data, valid_data, test_data
def __init__(self, hidden_size, output_size, n_layers=1, dropout=0.1,\ embedding = None): super(Decoder, self).__init__() self.output_size = output_size self.n_layers = n_layers self.dropout = dropout if isinstance(embedding, nn.Embedding): self.embedding = embedding else: self.embedding = nn.Embedding(output_size, hidden_size) if config.use_attn: self.attention = Attention(hidden_size) self.rnn = nn.LSTM(hidden_size, hidden_size, num_layers=n_layers, dropout=dropout) self.out = nn.Linear(hidden_size, output_size)
def __init__(self, embed_size, hidden_size, output_size, n_layers=1, dropout=0.2): super(Decoder, self).__init__() self.embed_size = embed_size self.hidden_size = hidden_size self.output_size = output_size self.n_layers = n_layers self.embed = nn.Embedding(output_size, embed_size) self.dropout = nn.Dropout(dropout, inplace=True) self.attention = Attention(hidden_size) self.gru = nn.GRU(hidden_size + embed_size, hidden_size, n_layers, dropout=dropout) self.out = nn.Linear(hidden_size * 2, output_size)
def Lstm(maxlen, max_features, embed_size, embedding_matrix): inp = Input(shape=(maxlen, )) x = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=False)(inp) x = LSTM(300, return_sequences=True, dropout=0.25, recurrent_dropout=0.25)(x) x = LSTM(300, return_sequences=True, dropout=0.25, recurrent_dropout=0.25)(x) x = Attention(maxlen)(x) x = Dense(256, activation="relu")(x) inp_convai = Input(shape=(3, )) x = concatenate([x, inp_convai]) x = Dropout(0.25)(x) x = Dense(6, activation="sigmoid")(x) model = Model(inputs=[inp, inp_convai], outputs=x) return model
def __init__(self, drug_length, protein_length, n_drug, n_protein, embd_dim=20, filter_width=5, layer_size=1): """ Args: drug_length: drug compound input size, i.e. number of atoms protein_length: protein input size, i.e number of amino acid n_drug: length of the drug embedding dictionary n_protein: length of the protein embedding dictionary (if we only use amino acid as a "word" this is only 20) embd_dim: embedding vector length filter_width: convolutional filter width layer_size: number of abcnn2 layers """ super(Model, self).__init__() self.layer_size = layer_size # Embedding Layers self.embd_drug = nn.Embedding(n_drug, embd_dim) self.embd_protein = nn.Embedding(n_protein, embd_dim) # ABCNN layers self.conv = nn.ModuleList([ Convolution(embd_dim, embd_dim, filter_width, 1) for _ in range(layer_size) ]) self.attn = nn.ModuleList([ Attention(drug_length, protein_length, filter_width) for _ in range(layer_size) ]) # all-ap average pooling layers self.ap = AllAP(embd_dim) # final classifier self.fc = nn.Linear(embd_dim * 2, 2)
def __init__(self, config, no_words, no_answers, resnet_model, lstm_size, emb_size, use_pretrained=True): super(Net, self).__init__() self.use_pretrained = use_pretrained # whether to use pretrained ResNet self.word_cnt = no_words # total count of words self.ans_cnt = no_answers # total count of valid answers self.lstm_size = lstm_size # lstm emb size to be passed to CBN layer self.emb_size = emb_size # hidden layer size of MLP used to predict delta beta and gamma parameters self.config = config # config file containing the values of parameters self.cbn = config['model']['image']['cbn']['use_cbn'] self.embedding = nn.Embedding(self.word_cnt, self.emb_size) self.lstm = VariableLengthLSTM(self.config['model']).cuda() self.net = create_resnet(resnet_model, self.lstm_size, self.emb_size, self.use_pretrained, self.cbn) self.attention = Attention(self.config).cuda() self.que_mlp = nn.Sequential( nn.Linear(config['model']['no_hidden_LSTM'], config['model']['no_question_mlp']), nn.Tanh(), ) self.img_mlp = nn.Sequential( nn.Linear(2048, config['model']['no_image_mlp']), nn.Tanh(), ) self.dropout = nn.Dropout(config['model']['dropout_keep_prob']) self.final_mlp = nn.Linear(config['model']['no_hidden_final_mlp'], self.ans_cnt)
def __init__(self, items_total: int, embed_dim: int, num_attn_queries, time_encoding='none', transformer_num_heads=8, transformer_num_layers=2, temperature: float=1.0, dropout=0.1, set_embed_dropout=0.1): """ Args: pool(str): sum/mean/max """ super(SSM, self).__init__() self.items_total = items_total self.embed_dim = embed_dim self.item_embed = nn.Embedding(num_embeddings=items_total, embedding_dim=embed_dim) self.num_attn_queries = num_attn_queries if num_attn_queries == -1: self.attention_pool = Pool(pool='mean') else: self.attention_pool = AttentionPool(embed_dim, num_attn_queries, dropout=set_embed_dropout) self.time_encoding_method = time_encoding if time_encoding == 'none': self.time_encoding = None elif time_encoding == 'positional': self.time_encoding = PositionalEncoding(embed_dim=embed_dim) elif time_encoding == 'timestamp': self.time_encoding = TimestampEncoding(embed_dim=embed_dim) else: raise NotImplementedError() self.transformer_encoder = nn.TransformerEncoder(encoder_layer=nn.TransformerEncoderLayer(d_model=embed_dim, nhead=transformer_num_heads, dropout=dropout), num_layers=transformer_num_layers) self.attn = Attention(embed_dim=embed_dim, temperature=temperature) self.items_bias = nn.Parameter(torch.zeros(items_total))
def __init__(self, items_total: int, embed_dim: int = 256, time_encoding='none', transformer_num_heads: int = 8, transformer_num_layers: int = 2, temperature: float = 1.0, dropout: float = 0.1): """ Args: items_total: embed_dim: time_encoding: 'none'/'positional'/'timestamp' transformer_num_heads: transformer_num_layers: dropout: """ super(ISM, self).__init__() self.items_total = items_total self.embed_dim = embed_dim self.item_embed = nn.Embedding(num_embeddings=items_total, embedding_dim=embed_dim) self.time_encoding_method = time_encoding if time_encoding == 'none': self.time_encoding = None elif time_encoding == 'positional': self.time_encoding = PositionalEncoding(embed_dim=embed_dim) elif time_encoding == 'timestamp': self.time_encoding = TimestampEncoding(embed_dim=embed_dim) else: raise NotImplementedError() self.transformer_encoder = nn.TransformerEncoder( encoder_layer=nn.TransformerEncoderLayer( d_model=embed_dim, nhead=transformer_num_heads, dropout=dropout), num_layers=transformer_num_layers) self.attn = Attention(embed_dim=embed_dim, temperature=temperature) self.items_bias = nn.Parameter(torch.zeros(items_total))
def __init__(self, embedding, cfg): super(DecoderRNN_Attn, self).__init__() self.b = cfg.par.beam_size self.n = cfg.par.n_best ### embedding layer self.embedding = embedding # [voc_length x emb_size] contains nn.Embedding() self.V = self.embedding.num_embeddings #vocabulary size self.E = self.embedding.embedding_dim #embedding size self.L = cfg.num_layers self.D = 2 if cfg.bidirectional else 1 ### num of directions self.H = cfg.hidden_size self.cuda = cfg.cuda self.pointer = cfg.pointer self.coverage = cfg.coverage self.tt = torch.cuda if self.cuda else torch ### dropout layer to apply on top of the embedding layer self.dropout = nn.Dropout(cfg.par.dropout) ### set up the RNN dropout = cfg.par.dropout if self.L > 1 else 0.0 #dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1 if cfg.cell == "lstm": self.rnn = nn.LSTM( self.E + self.H, self.H, self.L, dropout=dropout ) #input is embedding+hidden (to allow feed-input) elif cfg.cell == "gru": self.rnn = nn.GRU(self.E + self.H, self.H, self.L, dropout=dropout) else: sys.exit("error: bad -cell {} option. Use: lstm OR gru\n".format( cfg.cell)) ### Attention mechanism self.attn = Attention(self.H, cfg.attention, cfg.coverage, cfg.cuda) ### pgen layer if self.pointer: self.pgen = nn.Linear(self.H * 2 + self.E, 1) ### concat layer self.concat = nn.Linear(self.H * 2, self.H) ### output layer self.output = nn.Linear(self.H, self.V)
def __init__(self, items_total: int, embed_dim: int, time_encoding: str, set_embed_method: str, num_set_embeds: int, num_transformer_heads: int, num_transformer_layers: int, itm_temperature: float = 1.0, stm_temperature: float = 1.0, dropout: float = 0.1, set_embed_dropout: float = 0.1, attn_output: bool = True): """ Args: items_total: embed_dim: time_encoding: set_embed_method: num_transformer_heads: num_set_embeds: dropout: """ super(DSNTSP, self).__init__() self.items_total = items_total self.embed_dim = embed_dim self.item_embed = nn.Embedding(num_embeddings=items_total, embedding_dim=embed_dim) # time encoding self.time_encoding_method = time_encoding if time_encoding == 'none': self.time_encoding = None elif time_encoding == 'positional': self.time_encoding = PositionalEncoding(embed_dim=embed_dim) elif time_encoding == 'timestamp': self.time_encoding = TimestampEncoding(embed_dim=embed_dim) else: raise NotImplementedError() # set embedding self.set_embed_method = set_embed_method if set_embed_method == 'attn_pool': self.set_embed = AttentionPool(embed_dim, num_queries=num_set_embeds, dropout=set_embed_dropout) else: raise NotImplementedError() # Dual Transformer # self.dual_transformer = DualTransformer(layers=[ # PDTE(embed_dim, num_transformer_heads, dropout=dropout), # CDTE(embed_dim, num_transformer_heads, dropout=dropout), # PDTE(embed_dim, num_transformer_heads, dropout=dropout), # CDTE(embed_dim, num_transformer_heads, dropout=dropout), # PDTE(embed_dim, num_transformer_heads, dropout=dropout), # CDTE(embed_dim, num_transformer_heads, dropout=dropout), # PDTE(embed_dim, num_transformer_heads, dropout=dropout), # CDTE(embed_dim, num_transformer_heads, dropout=dropout) # ]) # co-transformer self.co_transformer = CoTransformer(layer=CoTransformerLayer( embed_dim, num_transformer_heads, dropout=dropout), num_layers=num_transformer_layers) # attention-based prediction self.item_attn = Attention(embed_dim=embed_dim, temperature=itm_temperature) self.set_attn = Attention(embed_dim=embed_dim, temperature=stm_temperature) self.items_bias = nn.Parameter(torch.zeros(items_total)) # gate network if attn_output: self.gate_net = nn.Sequential(nn.Linear(embed_dim * 3, embed_dim), nn.Sigmoid()) else: self.gate_net = nn.Sequential(nn.Linear(embed_dim * 2, embed_dim), nn.Sigmoid()) self.attn_output = attn_output
embedding_matrix = None if args.gpt2 == False: # 加载word_dict if not os.path.exists(args.data_path) or not os.path.exists(args.word_dict): write_data(args) with open(args.word_dict) as f: for line in f: word_dict = json.loads(line) p_np, m_np, r_np = get_pmr(args, word_dict) # (32, 1) args.vocab_size = len(word_dict) # model attention = Attention(args.hidden_size) encoder = Encoder(args, device, embedding_matrix, args.vocab_size, word_dict, args.embedding_dim, args.hidden_size, dropout=args.dropout) decoder = Decoder(args, attention, encoder, word_dict, args.vocab_size, args.embedding_dim, args.char_num, args.hidden_size, device, dropout=args.dropout) model = PreStory(args, word_dict, device, encoder, decoder, args.embedding_dim, args.pmr_size, args.hidden_size) if args.use_cuda and args.gpu_para: model = nn.DataParallel(model, device_ids=[int(i) for i in args.device.split(',')]) # multi-GPU torch.backends.cudnn.benchmark = True model = model.to(device) # 记录模型参数数量 num_parameters = 0
def __init__(self, train_dset, word_embedding_size=300, id_embedding_size=32, filter_sizes=[3], num_filters=100, attention_size=32, num_latent=32, dropout=0.5, soa_size=50): super(NARRE_SOA, self).__init__() self.review_num_filters = num_filters * len(filter_sizes) self.user_word_emb = WordEmbedding(train_dset.dictionary.ntoken, word_embedding_size) self.item_word_emb = WordEmbedding(train_dset.dictionary.ntoken, word_embedding_size) self.user_review_emb = ReviewCNN(train_dset.user_review_len, embedding_size=word_embedding_size, filter_sizes=filter_sizes, num_filters=num_filters) self.item_review_emb = ReviewCNN(train_dset.item_review_len, embedding_size=word_embedding_size, filter_sizes=filter_sizes, num_filters=num_filters) self.user_id_emb = WordEmbedding(train_dset.user_num, id_embedding_size, dropout=0.) self.item_id_emb = WordEmbedding(train_dset.item_num, id_embedding_size, dropout=0.) self.user_rid_emb = WordEmbedding(train_dset.item_num, id_embedding_size, dropout=0.) self.item_rid_emb = WordEmbedding(train_dset.user_num, id_embedding_size, dropout=0.) self.user_reviews_att = Attention(self.review_num_filters, id_embedding_size, attention_size) self.item_reviews_att = Attention(self.review_num_filters, id_embedding_size, attention_size) self.user_soft_attention = SoftAttention(self.review_num_filters, id_embedding_size, soa_size) self.item_soft_attention = SoftAttention(self.review_num_filters, id_embedding_size, soa_size) self.user_latent = nn.Linear(self.review_num_filters, num_latent) self.item_latent = nn.Linear(self.review_num_filters, num_latent) self.relu = nn.ReLU() self.classify = nn.Linear(num_latent, 1, bias=False) self.dropout = nn.Dropout(dropout) # 分别用作user和item的偏置 self.user_bias = WordEmbedding(ntoken=train_dset.user_num, emb_dim=1, dropout=0.) self.item_bias = WordEmbedding(ntoken=train_dset.item_num, emb_dim=1, dropout=0.) self.global_bias = nn.Parameter(torch.Tensor([0.1])) # 初始化 self.user_word_emb.init_embedding() self.item_word_emb.init_embedding() self.user_bias.init_embedding_with_one(0.1) self.item_bias.init_embedding_with_one(0.1)
class Decoder(nn.Module): def __init__(self): super(Decoder, self).__init__() self.n_frames_per_step = hps.n_frames_per_step self.n_mel_channels = hps.n_mel_channels self.encoder_embedding_dim = hps.encoder_embedding_dim self.attention_rnn_dim = hps.attention_rnn_dim self.decoder_rnn_dim = hps.decoder_rnn_dim self.prenet = Prenet() self.attention_rnn = nn.LSTMCell(256 + 512, 1024) self.attention_layer = Attention() # decoder rnn input : 256 + 512 = 768 # decoder rnn output : 1024 self.decoder_rnn = nn.LSTMCell(256 + 512, 1024, 1) self.linear_projection = LinearNorm(1024, 80 * 3) def get_go_frame(self, memory): batch_size = memory.size(0) go_frames = Variable( memory.data.new(batch_size, self.n_frames_per_step * self.n_mel_channels).zero_()) return go_frames def parse_decoder_inputs(self, decoder_inputs): batch_size = decoder_inputs.size(0) frame_size = decoder_inputs.size(2) decoder_inputs = decoder_inputs.transpose(1, 2).contiguous() # print('decoder input transpose : ', decoder_inputs.size()) decoder_inputs = decoder_inputs.view( batch_size, int(frame_size / self.n_frames_per_step), -1) # print('decoder input view : ', decoder_inputs.size()) decoder_inputs = decoder_inputs.transpose(0, 1) # print('decoder input transpose : ', decoder_inputs.size()) return decoder_inputs def parse_decoder_outputs(self, mel_outputs): # List[(B, 240) ....] -> (len(List) : 278, B, 240) mel_outputs = torch.stack(mel_outputs) # print(mel_outputs.size()) mel_outputs = mel_outputs.transpose(0, 1).contiguous() # print(mel_outputs.size()) batch_size = mel_outputs.size(0) mel_outputs = mel_outputs.view(batch_size, -1, 80) # print(mel_outputs.size()) mel_outputs = mel_outputs.transpose(1, 2) # print(mel_outputs.size()) return mel_outputs def initailze_decoder_states(self, memory, mask): batch_size = memory.size(0) max_time = memory.size(1) self.attention_hidden = Variable( memory.data.new(batch_size, self.attention_rnn_dim).zero_()) self.attention_cell = Variable( memory.data.new(batch_size, self.attention_rnn_dim).zero_()) self.decoder_hidden = Variable( memory.data.new(batch_size, self.decoder_rnn_dim).zero_()) self.decoder_cell = Variable( memory.data.new(batch_size, self.decoder_rnn_dim).zero_()) self.attention_weights = Variable( memory.data.new(batch_size, max_time).zero_()) self.attention_weights_cum = Variable( memory.data.new(batch_size, max_time).zero_()) self.attention_context = Variable( memory.data.new(batch_size, self.encoder_embedding_dim).zero_()) # (B, Seq_len, 512) self.memory = memory # (B, Seq_len, 128) self.processed_memory = self.attention_layer.memory_layer(memory) self.mask = mask def decode(self, decoder_input): # decoder input : (B, 256) # attention context : (B, 512) # attention cell input : (B, 256+512) attention_cell_input = torch.cat( (decoder_input, self.attention_context), dim=-1) print('attention cell input : ', attention_cell_input.size()) # attention_hidden : (B, 1024) # attention_cell : (B, 1024) self.attention_hidden, self.attention_cell = self.attention_rnn( attention_cell_input, (self.attention_hidden, self.attention_cell)) print('attention hidden, cell : ', self.attention_hidden.size(), self.attention_cell.size()) self.attention_hidden = F.dropout(self.attention_hidden, 0.1, self.training) attention_weights_cat = torch.cat( (self.attention_weights.unsqueeze(1), self.attention_weights_cum.unsqueeze(1)), dim=1) print('attention_weights_cat : ', attention_weights_cat.size()) print('attention_weights : ', self.attention_weights.size()) print('attention_weights_cum : ', self.attention_weights_cum.size()) self.attention_context, self.attention_weights = self.attention_layer( self.attention_hidden, self.memory, self.processed_memory, attention_weights_cat, self.mask) return None, None def forward(self, memory, decoder_inputs, memory_lengths): # memory : (B, Seq_len, 512) --> encoder outputs # decoder_inputs : (B, Mel_Channels : 80, frames) # memory lengths : (B) decoder_input = self.get_go_frame(memory).unsqueeze(0) # print('go frames : ', decoder_input.size()) # print('decoder inputs : ', decoder_inputs.size()) decoder_inputs = self.parse_decoder_inputs(decoder_inputs) decoder_inputs = torch.cat((decoder_input, decoder_inputs), dim=0) # print('decoder inputs : ', decoder_inputs.size()) decoder_inputs = self.prenet(decoder_inputs) self.initailze_decoder_states( memory, mask=~get_mask_from_lengths(memory_lengths)) mel_outputs, alignments = [], [] while len(mel_outputs) < decoder_inputs.size(0) - 1: decoder_input = decoder_inputs[len(mel_outputs)] mel_output, attention_weights = self.decode(decoder_input) # mel_output : (1, B, 240) mel_outputs.append(mel_output) break