def __init__(self, args, Y, dicts, K=7, attn='bandanau'): super(AttDense, self).__init__() self.word_rep = WordRep(args, Y, dicts) self.att1 = Attention('', 100) filters = [100] dc = 200 self.attn = attn if attn == 'bandanau': for i in range(2, K + 1): filters += [dc] print(filters, sum(filters[:-1])) self.add_module( f"block{i - 2}", DenseBlock(sum(filters[:-1]), filters[i - 1], 3)) self.add_module(f"U{i - 2}", Attention('bmm', dc)) else: for i in range(2, K + 1): filters += [dc] print(filters, sum(filters[:-1])) self.add_module( f"block{i-2}", DenseBlock(sum(filters[:-1]), filters[i - 1], 3)) self.add_module(f"U{i-2}", nn.Linear(dc, Y)) #self.att = Attn('bmm',200) #self.output_layer = OutputLayer(args, Y, dicts,dc) self.output_layer = nn.Linear(dc, Y) self.loss_function = nn.BCEWithLogitsLoss()
def __init__(self, output_size, embedding_size, hidden_size, key_size, value_size, num_layers, max_len): super(DecoderRNN, self).__init__() self.value_size = value_size self.num_layers = num_layers self.hidden_size = hidden_size self.embedding = nn.Embedding(output_size, embedding_size) self.max_len = max_len self.params_h0 = nn.ParameterList([ nn.Parameter(torch.zeros(1, self.hidden_size)).float() for i in range(self.num_layers) ]) self.params_c0 = nn.ParameterList([ nn.Parameter(torch.zeros(1, self.hidden_size)).float() for i in range(self.num_layers) ]) self.lstmCells = nn.ModuleList([ nn.LSTMCell(input_size=value_size + embedding_size, hidden_size=self.hidden_size), nn.LSTMCell(input_size=self.hidden_size, hidden_size=self.hidden_size), nn.LSTMCell(input_size=self.hidden_size, hidden_size=self.hidden_size) ]) self.attention = Attention(self.hidden_size, key_size, value_size, output_size) self.attention.projection.weight = self.embedding.weight
def test_attention_forward(): with torch.no_grad(): kq_dim = 2 v_dim = 2 hidden_dim = 16 att = Attention(hidden_dim, key_and_query_dim=kq_dim, value_dim=v_dim) batch_size = 4 q_seq_len = 2 kv_seq_len = 7 q_attention_input = torch.rand((batch_size, q_seq_len, hidden_dim)) kv_attention_input = torch.rand((batch_size, kv_seq_len, hidden_dim)) attention_output = att.forward(q_hidden_inputs=q_attention_input, k_hidden_inputs=kv_attention_input, v_hidden_inputs=kv_attention_input, mask=None) assert attention_output.size() == torch.Size( (batch_size, q_seq_len, v_dim)) assert attention_output.sum().item() != 0 mask = torch.ones((batch_size, q_seq_len, kv_seq_len)) mask[:, :, -1] = 0 attention_output = att.forward(q_hidden_inputs=q_attention_input, k_hidden_inputs=kv_attention_input, v_hidden_inputs=kv_attention_input, mask=mask, save_attention=True) assert attention_output.size() == torch.Size( (batch_size, q_seq_len, v_dim)) assert att.attention[:, :, -1].sum().item() == 0
def get_predictions(self, frames, scope): frames = self._reshape_to_conv(frames) cnn = CNN() if self.operation == 'training': cnn_output = cnn.create_model(frames, cnn.conv_filters, keep_prob=self.keep_prob) else: cnn_output = cnn.create_model(frames, cnn.conv_filters, keep_prob=1.0) cnn_output = self._reshape_to_rnn(cnn_output) rnn = RNN() rnn_output = rnn.create_model(cnn_output, scope + '_rnn') if self.is_attention: attention = Attention(self.batch_size) attention_output = attention.create_model(rnn_output, scope + '_attention') fc = FC(self.num_classes) outputs = fc.create_model(attention_output, scope + '_fc') else: rnn_output = rnn_output[:, -1, :] fc = FC(self.num_classes) outputs = fc.create_model(rnn_output, scope + '_fc') return outputs
def __init__(self, vocab_size, embedding_dim, dropout, device): super(AttentionLSTMModel, self).__init__() self.save_name = "AttentionLSTMModel.pt" self.device = device self.embeddings = nn.Embedding(vocab_size, embedding_dim) self.dropout = nn.Dropout(dropout) self.lstm = nn.LSTM(embedding_dim, 500, batch_first=True) self.attention = Attention(500) self.fc0 = nn.Linear(1000, 300) self.fc1 = nn.Linear(300, vocab_size)
def __init__(self, args, data): super(Model, self).__init__() self.window = args.window self.variables = data.m self.hw = args.highway_window self.activate1 = F.relu self.hidR = args.hidRNN self.rnn1 = nn.LSTM(self.variables, self.hidR, num_layers=args.rnn_layers, bidirectional=False) self.linear1 = nn.Linear(self.hidR, self.variables) # self.linear1=nn.Linear(1280,100) # self.out=nn.Linear(100,self.variables) if (self.hw > 0): self.highway = nn.Linear(self.hw, 1) print(self.hidR) print(self.window) #self.attention = Attention(hidden_emb=self.hidR, seq_len=self.window) # attention module self.attention = Attention(hidden_emb=self.hidR, seq_len=128) # attention module self.dropout = nn.Dropout(p=args.dropout) self.output = None if (args.output_fun == 'sigmoid'): self.output = F.sigmoid if (args.output_fun == 'tanh'): self.output = F.tanh
def __init__(self, vocab_size, max_len, hidden_size, embedding_size, sos_id, eos_id, input_dropout_p=0, dropout_p=0, n_layers=1, bidirectional=False, rnn_cell='lstm',use_attention=True): super(DecoderRNN, self).__init__(vocab_size, max_len, hidden_size, input_dropout_p, dropout_p, n_layers, rnn_cell) self.bidirectional_encoder = bidirectional self.output_size = vocab_size self.max_length = max_len self.use_attention = use_attention self.eos_id = eos_id self.sos_id = sos_id self.init_input = None self.embedding = nn.Embedding(self.output_size, embedding_size) self.part_embedding = nn.Embedding(4900, 50) self.rnn = self.rnn_cell(embedding_size+50, hidden_size, n_layers, batch_first=True, dropout=dropout_p) if use_attention: self.attention = Attention(self.hidden_size) self.out = nn.Linear(self.hidden_size, self.output_size)
def __init__( self, input_feauters, rnn_units, # max_seq_len, pool_method, encoder_type, hidden_middle_val): super(DocumentEncoderRNN, self).__init__() # self.max_seq_len = max_seq_len self.emb_dim = input_feauters self.rnn_units = rnn_units self.pool_method = pool_method self.hidden_middle_val = hidden_middle_val * 2 self.encoder_type = encoder_type self.encoder = encoder_type(self.emb_dim, self.rnn_units, bidirectional=True, batch_first=True) # self.hidden = self.init_hidden() if self.pool_method == 'attention': self.attention = Attention(self.rnn_units * 2) elif self.pool_method == 'relative_attention': self.attention = RelativeAttention(self.rnn_units * 2)
def __init__(self, opt): super().__init__() self.opt = opt with open( os.path.join( 'data', f'debug{opt.debug}.{opt.dataset}.spacy.wv.{opt.embedding_dim}.pkl' ), 'rb') as f: emb_m = pickle.load(f) self.emb = nn.Embedding.from_pretrained(torch.Tensor(emb_m), freeze=False) self.embedding_drop = nn.Dropout(opt.embedding_drop) self.rnn = nn.LSTM(opt.embedding_dim, opt.lstm_hidden_dim, batch_first=True, bidirectional=True) self.embedding_drop_2 = nn.Dropout(opt.embedding_drop) self.rnn_2 = nn.LSTM(opt.embedding_dim, opt.lstm_hidden_dim, batch_first=True, bidirectional=True) self.att = Attention(2 * opt.lstm_hidden_dim, opt.embedding_drop) self.fc = nn.Linear(3 * 2 * opt.lstm_hidden_dim, 2 * opt.lstm_hidden_dim) self.relu = nn.ReLU() self.fc_drop = nn.Dropout(opt.fc_drop) self.classifier = nn.Linear(opt.lstm_hidden_dim * 2, opt.num_classes)
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) # bnorm_1 = BatchNormalization(axis=-1)(inputs) x = Bidirectional(CuDNNLSTM(96, name='blstm1', return_sequences=True), merge_mode='concat')(inputs) # activation_1 = Activation('tanh')(lstm_1) x = SpatialDropout1D(0.1)(x) x = Attention(8, 16)([x, x, x]) x1 = GlobalMaxPool1D()(x) x2 = GlobalAvgPool1D()(x) x = Concatenate(axis=-1)([x1, x2]) x = Dense(units=128, activation='elu')(x) x = Dense(units=64, activation='elu')(x) x = Dropout(rate=0.4)(x) outputs = Dense(units=num_classes, activation='softmax')(x) model = TFModel(inputs=inputs, outputs=outputs) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0002, amsgrad=True) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def __init__(self, vocab_size, embed_dim, hidden_dim, max_len, trg_soi, nlayers=2, dropout_rate=0.2, attention=False, cuda=True): super(RNNHighwayDecoder, self).__init__() self.hidden_dim = hidden_dim self.max_len = max_len self.vocab_size = vocab_size self.trg_soi = trg_soi self.att = attention self.cuda = cuda self.trainable = True self.embed = nn.Embedding(vocab_size, embed_dim) self.attention = Attention(self.hidden_dim) # DecoderCell(embed_dim, hidden_dim) self.decodercell = RHNContextCell(embed_dim, h=hidden_dim, depth=nlayers, gateDrop=dropout_rate) self.dec2word = nn.Linear(hidden_dim, vocab_size)
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) # bnorm_1 = BatchNormalization(axis=2)(inputs) lstm_1 = Bidirectional(CuDNNLSTM(64, name='blstm_1', return_sequences=True), merge_mode='concat')(inputs) activation_1 = Activation('tanh')(lstm_1) dropout1 = SpatialDropout1D(0.5)(activation_1) attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) pool_1 = GlobalMaxPool1D()(attention_1) dropout2 = Dropout(rate=0.5)(pool_1) dense_1 = Dense(units=256, activation='relu')(dropout2) outputs = Dense(units=num_classes, activation='softmax')(dense_1) model = TFModel(inputs=inputs, outputs=outputs) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0002, amsgrad=True) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def __init__(self, config, hidden_size=512, n_layers=8, bidirectional=False, attention=False): super(_LSTMModel, self).__init__() self.attention = attention # lstm layers self.lstm = LSTM(64, hidden_size, n_layers, dropout=config.lstm_dropout, bidirectional=bidirectional) n_layers *= 2 if bidirectional else 1 hidden_size *= 2 if bidirectional else 1 if attention: self.att_layer = Attention(hidden_size, (256, hidden_size), batch_first=True) self.avg_pooling = AdaptiveAvgPool2d((1, hidden_size)) # fully connected output layers self.gender_out = Sequential( Dropout(config.fc_dropout), Linear(hidden_size, 3) ) self.accent_out = Sequential( Dropout(config.fc_dropout), Linear(hidden_size, 16) ) # initialise the network's weights self.init_weights()
def __init__(self, attention_dim, embed_dim, decoder_dim, vocab_size, encoder_dim=2048, dropout=0.5): super(PureAttention, self).__init__() self.encoder_dim = encoder_dim self.attention_dim = attention_dim self.embed_dim = embed_dim self.decoder_dim = decoder_dim self.vocab_size = vocab_size self.dropout = dropout self.attention = Attention(encoder_dim, decoder_dim, attention_dim) # attention network self.embedding = nn.Embedding(vocab_size, embed_dim) # embedding layer self.dropout = nn.Dropout(p=self.dropout) self.decode_step = nn.LSTMCell(embed_dim + encoder_dim, decoder_dim, bias=True) # decoding LSTMCell # linear layer to find initial hidden state of LSTMCell self.init_h = nn.Linear(encoder_dim, decoder_dim) # linear layer to find initial cell state of LSTMCell self.init_c = nn.Linear(encoder_dim, decoder_dim) # linear layer to create a sigmoid-activated gate self.f_beta = nn.Linear(decoder_dim, encoder_dim) self.sigmoid = nn.Sigmoid() # linear layer to find scores over vocabulary self.fc = nn.Linear(decoder_dim, vocab_size) self.init_weights( ) # initialize some layers with the uniform distribution
def __init__(self, embed_size, vocab_size, attention_dim, encoder_dim, decoder_dim, drop_prob=0.3): super().__init__() self.vocab_size = vocab_size self.attention_dim = attention_dim self.decoder_dim = decoder_dim self.embedding = nn.Embedding(vocab_size, embed_size) self.attention = Attention(encoder_dim, decoder_dim, attention_dim) self.init_h = nn.Linear(encoder_dim, decoder_dim) self.init_c = nn.Linear(encoder_dim, decoder_dim) self.lstm_cell = nn.LSTMCell(embed_size + encoder_dim, decoder_dim, bias=True) self.f_beta = nn.Linear(decoder_dim, encoder_dim) self.fcn = nn.Linear(decoder_dim, vocab_size) self.drop = nn.Dropout(drop_prob)
def multi_get_attention(self, frames): frames = self._reshape_to_conv(frames) cnn = CNN() cnn_output = cnn.create_model(frames, cnn.conv_filters) cnn_output = self._reshape_to_rnn(cnn_output) rnn = RNN() rnn_output = rnn.create_model(cnn_output) if self.is_attention: attention = Attention(self.batch_size) attention_output = attention.attention_analysis(rnn_output) return attention_output else: rnn_output = rnn_output[:, -1, :] fc = FC(self.num_classes) outputs = fc.create_model(rnn_output) return outputs
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) sequence_len = input_shape[0] lstm_units_array = np.array([32, 64, 128, 256, 512]) lstm_units = lstm_units_array[np.argmin( np.abs(lstm_units_array - sequence_len))] lstm_1 = CuDNNLSTM(lstm_units, return_sequences=True)(inputs) activation_1 = Activation('tanh')(lstm_1) if num_classes >= 20: if num_classes < 30: dropout1 = SpatialDropout1D(0.5)(activation_1) attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) else: attention_1 = Attention( 8, 16)([activation_1, activation_1, activation_1]) k_num = 10 kmaxpool_l = Lambda(lambda x: tf.reshape(tf.nn.top_k( tf.transpose(x, [0, 2, 1]), k=k_num, sorted=True)[0], shape=[-1, k_num, 128]))( attention_1) flatten = Flatten()(kmaxpool_l) dropout2 = Dropout(rate=0.5)(flatten) else: dropout1 = SpatialDropout1D(0.5)(activation_1) attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) pool_l = GlobalMaxPool1D()(attention_1) dropout2 = Dropout(rate=0.5)(pool_l) dense_1 = Dense(units=256, activation='relu')(dropout2) # dense_1 = Dense(units=256, activation='softplus',kernel_regularizer=regularizers.l2(0.01), # activity_regularizer=regularizers.l1(0.01))(dropout2) #dense_1 = DropConnect(Dense(units=256, activation='softplus'), prob=0.5)(dropout2) outputs = Dense(units=num_classes, activation='softmax')(dense_1) loss_fun = CategoricalCrossentropy(label_smoothing=0.2) model = TFModel(inputs=inputs, outputs=outputs) optimizer = optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004) model.compile( optimizer=optimizer, loss=loss_fun, #loss="sparse_categorical_crossentropy", metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def __init__(self, vocab_size, max_len, hidden_size, embedding_size, sos_id, eos_id, input_dropout_p, dropout_p, position_embedding, pos_embedding, n_layers, bidirectional, rnn_cell, use_attention, attn_layers, hard_attn, pos_add, use_memory, memory_dim): super(DecoderRNN, self).__init__(vocab_size, max_len, hidden_size, input_dropout_p, dropout_p, n_layers, rnn_cell) self.bidirectional_encoder = bidirectional self.output_size = vocab_size self.attn_layers = attn_layers self.max_length = max_len self.use_attention = use_attention self.hard_attn = hard_attn self.eos_id = eos_id self.sos_id = sos_id self.s_rnn = rnn_cell self.init_input = None self.embedding_size = embedding_size self.embedding = nn.Embedding(self.output_size, embedding_size) self.pos_embedding = pos_embedding self.position_embedding = position_embedding self.pos_add = pos_add if pos_add == 'cat': rnn_input_size = embedding_size * 2 else: rnn_input_size = embedding_size self.rnn = self.rnn_cell(rnn_input_size, hidden_size, n_layers, batch_first=True, dropout=dropout_p) if use_attention: if hard_attn: self.attention = Attention(self.hidden_size) self.hard_attention = HardAttention(self.hidden_size) self.out = nn.Linear(self.hidden_size * 2, self.output_size) else: self.attention1 = Attention(int(self.hidden_size / attn_layers)) self.out = nn.Linear(self.hidden_size, self.output_size) else: self.out = nn.Linear(self.hidden_size, self.output_size) self.use_memory = use_memory if use_memory is not None: self.init_memory_augmented(max_len, memory_dim)
def __init__(self, input_size, output_size): super(GRU, self).__init__() self.encoder = nn.GRU(input_size, output_size / 4, num_layers=1, batch_first=True, dropout=0.1, bidirectional=True) self.attention = Attention(output_size / 2) self.linear_filter = nn.Linear(output_size, output_size) self.sigmoid = nn.Sigmoid() self.post_attention = nn.GRU(output_size, output_size / 2, num_layers=1, batch_first=True, dropout=0.1, bidirectional=True) self.final_attention = Attention(output_size)
def load_model(model_name, model_config, embedding_matrix): if model_name == 'deep_cnn': model = cnn.DPCnn(model_config) elif model_name == 'cnn': model = cnn.Cnn(model_config) elif model_name == 'attention': model = Attention(model_config) elif model_name == 'rcnn': model = rcnn.RCnn(model_config) elif model_name == 'capsule': model = capsule.CapsuleRnn(model_config) elif model_name == 'hybrid': model = hybridnn.HybridNN(model_config) else: return None model.compile(embedding_matrix) logging.info('===={}模型加载完毕===='.format(model_name)) return model
def __init__(self, filed = 80): super(IAN_LSTM, self).__init__() self.filed = filed self.cnn_l = CNN(filed=self.filed) self.rnn_l = nn.LSTM( input_size=55, hidden_size=64, num_layers=4, batch_first=True) self.attention_aspect = Attention(64, score_function='bi_linear') self.attention_context = Attention(64, score_function='bi_linear') self.linear = nn.Sequential( nn.Linear(128, 64), nn.Linear(64, 2), )
def get_multi_predictions(self, frames): frames = self._reshape_to_conv(frames) cnn = CNN() if self.operation == 'training': cnn_output = cnn.create_model(frames, cnn.conv_filters, keep_prob=self.keep_prob) else: cnn_output = cnn.create_model(frames, cnn.conv_filters, keep_prob=1.0) cnn_output = self._reshape_to_rnn(cnn_output) rnn = RNN() arousal_rnn_output = rnn.create_model(cnn_output, 'arousal_rnn') valence_rnn_output = rnn.create_model(cnn_output, 'valence_rnn') dominance_rnn_output = rnn.create_model(cnn_output, 'dominance_rnn') if self.is_attention: attention = Attention(self.batch_size) arousal_attention_output = attention.create_model( arousal_rnn_output, 'arousal_attention') valence_attention_output = attention.create_model( valence_rnn_output, 'valence_attention') dominance_attention_output = attention.create_model( dominance_rnn_output, 'dominance_attention') fc = FC(self.num_classes) arousal_fc_outputs = fc.create_model(arousal_attention_output, 'arousal_fc') valence_fc_outputs = fc.create_model(valence_attention_output, 'valence_fc') dominance_fc_outputs = fc.create_model(dominance_attention_output, 'dominance_fc') else: arousal_rnn_output = arousal_rnn_output[:, -1, :] valence_rnn_output = valence_rnn_output[:, -1, :] dominance_rnn_output = dominance_rnn_output[:, -1, :] fc = FC(self.num_classes) arousal_fc_outputs = fc.create_model(arousal_rnn_output, 'arousal_fc') valence_fc_outputs = fc.create_model(valence_rnn_output, 'valence_fc') dominance_fc_outputs = fc.create_model(dominance_rnn_output, 'dominance_fc') return arousal_fc_outputs, valence_fc_outputs, dominance_fc_outputs
def transformer_encoder(emb_dim, MAX_NB_WORDS, MAX_SEQUENCE_LENGTH, embedding_matrix, optimizer): context_input = Input(shape=(None, ), dtype='int32') response_input = Input(shape=(None, ), dtype='int32') #context_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32') #response_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32') embedding_layer = Embedding( output_dim=emb_dim, input_dim=MAX_NB_WORDS, input_length=MAX_SEQUENCE_LENGTH, weights=[embedding_matrix], #mask_zero=True, trainable=True) embedded_sequences_c = embedding_layer(context_input) embedded_dropout_c = Dropout(0.2)(embedded_sequences_c) embeddings_final_c = Position_Embedding()( embedded_dropout_c) ## add positional embedding from self-attention embedded_sequences_r = embedding_layer(response_input) embedded_dropout_r = Dropout(0.2)(embedded_sequences_r) embeddings_final_r = Position_Embedding()(embedded_dropout_r) print("Now building encoder model with self attention...") c_seq = Attention(8, 16)([ embeddings_final_c, embeddings_final_c, embeddings_final_c ]) ## the three embedding input is for K,V,Q needed for self-attention c_seq = GlobalAveragePooling1D()(c_seq) c_seq = Dropout(0.2)(c_seq) r_seq = Attention(8, 16)([ embeddings_final_r, embeddings_final_r, embeddings_final_r ]) ## the three embedding input is for K,V,Q needed for self-attention r_seq = GlobalAveragePooling1D()(r_seq) r_seq = Dropout(0.2)(r_seq) concatenated = Multiply()([c_seq, r_seq]) out = Dense((1), activation="sigmoid")(concatenated) model = Model([context_input, response_input], out) model.compile(loss='binary_crossentropy', optimizer=optimizer) # print(encoder.summary()) print(model.summary()) return model
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) # bnorm_1 = BatchNormalization(axis=2)(inputs) sequence_len = input_shape[0] lstm_units_array = np.array([32, 64, 128, 256, 512]) lstm_units = lstm_units_array[np.argmin(np.abs(lstm_units_array-sequence_len))] lstm_1 = Bidirectional(CuDNNLSTM(lstm_units, name='blstm_1', return_sequences=True), merge_mode='concat')(inputs) activation_1 = Activation('tanh')(lstm_1) dropout1 = SpatialDropout1D(0.5)(activation_1) if lstm_units <=128: attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) else: attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) pool_1 = GlobalMaxPool1D()(attention_1) dropout2 = Dropout(rate=0.5)(pool_1) dense_1 = Dense(units=256, activation='relu')(dropout2) # dense_1 = Dense(units=256, activation='relu',kernel_regularizer=regularizers.l2(0.01), # activity_regularizer=regularizers.l1(0.01))(dropout2) #dense_1 = DropConnect(Dense(units=256, activation='relu'), prob=0.5)(dropout2) outputs = Dense(units=num_classes, activation='softmax')(dense_1) model = TFModel(inputs=inputs, outputs=outputs) loss_fun = CategoricalCrossentropy(label_smoothing=0.2) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0002, amsgrad=True) model.compile( optimizer=optimizer, loss=loss_fun, #loss="sparse_categorical_crossentropy", metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def __init__(self, doc_len, text_len, vocab_size, embed_dim, word_hidden_size, sent_hidden_size, title_vocab_size, title_hidden_size, linear_out_size_2, linear_out_size_1, num_classes, dropout): super(hanLSTM, self).__init__() self.doc_len = doc_len self.text_len = text_len self.word_hidden_size = word_hidden_size self.embed_size = embed_dim self.sent_hidden_size = sent_hidden_size self.title_hidden_size = title_hidden_size self.vocab_size = vocab_size self.title_vocab_size = title_vocab_size self.num_classes = num_classes self.linear_out_size_1 = linear_out_size_1 self.linear_out_size_2 = linear_out_size_2 self.embedding = nn.Embedding(self.vocab_size, self.embed_size) self.title_embedding = nn.Embedding(self.title_vocab_size, self.embed_size) self.sent_wise_lstms = nn.ModuleList() self.sent_wise_attlstms = nn.ModuleList() self.dropout = dropout for i in range(self.doc_len): self.sent_wise_lstms.append( nn.Sequential( lstm_cell(self.embed_size, self.word_hidden_size), nn.Dropout(p=self.dropout))) self.sent_wise_attlstms.append(Attention(self.word_hidden_size)) self.doc_lstm = nn.Sequential( lstm_cell(self.word_hidden_size, self.sent_hidden_size), nn.Dropout(p=self.dropout)) self.doc_attention = Attention(self.sent_hidden_size) self.title_lstm = nn.Sequential( lstm_cell(self.embed_size, self.title_hidden_size), nn.Dropout(p=self.dropout)) self.title_attention = Attention(self.title_hidden_size) self.linear_stack = nn.Sequential( nn.Linear(self.sent_hidden_size + self.title_hidden_size, self.linear_out_size_2), nn.ReLU(), nn.Dropout(p=self.dropout), nn.Linear(self.linear_out_size_2, self.linear_out_size_1), nn.ReLU(), nn.Linear(self.linear_out_size_1, self.num_classes))
def build(self): input = Input(shape=(self.max_sequence_len, )) embedding_layer = self.embedding_layer(input) bi = Bidirectional(GRU(128, return_sequences=True))(embedding_layer) att = Attention()(bi) output = Dense(self.class_len, activation='sigmoid')(att) model = Model(inputs=input, outputs=output) return model
def build_model(self): vocab_size = int(self.config.model.vocab_size) embedding_size = int(self.config.model.embedding_size) lstm_units = int(self.config.model.lstm_units) output_size = int(self.config.model.output_size) batch_size = int(self.config.trainer.batch_size) use_elmo = bool(self.config.model.use_elmo) # input layer input_dtype = 'string' if use_elmo else None _input = tf.keras.layers.Input(shape=(None, ), batch_size=batch_size, dtype=input_dtype) # embeddings layer if use_elmo: embeddings = ElmoEmbeddingLayer()(_input) embedding_size = 1024 # hard coded in elmo else: embeddings = tf.keras.layers.Embedding(vocab_size, embedding_size, mask_zero=True)(_input) bilstm, forward_h, _, backward_h, _ = tf.keras.layers.Bidirectional( tf.keras.layers.LSTM(lstm_units, return_sequences=True, return_state=True, dropout=0.2, recurrent_dropout=0.2, input_shape=(batch_size, None, embedding_size)), merge_mode='sum')(embeddings) state_h = tf.keras.layers.Concatenate()([forward_h, backward_h]) ctx, attn = Attention(lstm_units)([bilstm, state_h]) conc = tf.keras.layers.Concatenate()([bilstm, ctx]) logits = tf.keras.layers.TimeDistributed( tf.keras.layers.Dense(output_size))(conc) mask = tf.keras.layers.Input(shape=(None, output_size), batch_size=batch_size) masked_logits = tf.keras.layers.Add()([logits, mask]) output = tf.keras.layers.Softmax()(masked_logits) self.model = tf.keras.Model(inputs=[_input, mask], outputs=output, name='attention') self.model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(), metrics=['acc'])
def __init__(self, output_size, device): super(AttentionDecoder, self).__init__() self.hidden_size = 128 self.device = device self.dropout = nn.Dropout(0.5) self.embedding = nn.Embedding(output_size, self.hidden_size) self.attention = Attention(self.hidden_size) self.gru = nn.GRU(self.hidden_size * 2, self.hidden_size, batch_first=True) self.out = nn.Linear(self.hidden_size, output_size)
def __init__(self, args, data): super(Model, self).__init__() # 数据的变量数(列数),论文中Table 1 里的 D self.variables = data.m # 模型隐藏状态中的特征数量,通过args.hidRNN指定 self.hidR = args.hidRNN # 模型循环层的数量,可以理解为,如果该参数为2,则堆叠了两个LSTM。通过args.rnn_layers指定 self.layers = args.rnn_layers # 模型使用的窗口尺寸,可以理解为取多长时间进行训练,在本例中取7 * 24 = 168,即一周。通过args.window指定 self.window = args.window # 定义attention self.attention = Attention(seq_len=self.window, hidden_emb=self.hidR) # 定义 GRU 模型 # pytorch中模型定义见 https://pytorch.apachecn.org/docs/1.2/nn.html # # Parameters: # input_size – The number of expected features in the input x # hidden_size – The number of features in the hidden state h # num_layers – Number of recurrent layers.E.g., setting num_layers = 2 would mean stacking two LSTMs together to form a stacked LSTM, with the second LSTM taking in outputs of the first LSTM and computing the final results.Default: 1 # bias – If False, then the layer does not use bias weights b_ih and b_hh.Default: True # batch_first – If True, then the input and output tensors are provided as (batch, seq, feature).Default: False # dropout – If non - zero, introduces a Dropout layer on the outputs of each LSTM layer except the last layer, with dropout probability equal to dropout.Default: 0 # bidirectional – If True, becomes a bidirectional LSTM.Default: False # # 输入:input, h_0 # input:(seq_len, batch, input_size)的三维张量 # h_0 (num_layers * num_directions, batch, hidden_size)表示模型的初始隐藏状态,如果不输入,则默认为0 # 输出:output, h_n # output:(seq_len, batch, num_directions * hidden_size)的三维张量 # h_0 (num_layers * num_directions, batch, hidden_size)表示模型最后一个细胞的隐藏状态 # 一般使用 # self.gru = nn.GRU(input_size=self.variables, hidden_size=self.hidR, num_layers=self.layers, bidirectional=False) # 定义全连接层 # 输入:RNN模型的隐藏状态 # 输出:预测的一条时序数据的长度(变量数量),论文Table1中的D # 在这里 * 2 是因为加入了attention, self.linear = nn.Linear(self.hidR * 2, self.variables) # dropout模块,通过args.dropout参数指定丢弃率 self.dropout = nn.Dropout(p=args.dropout) # 通过args.output_fun参数选择结果的激活函数 self.output = None if (args.output_fun == 'sigmoid'): self.output = torch.sigmoid if (args.output_fun == 'tanh'): self.output = torch.tanh
def __init__(self, filed=80): super(MemNet, self).__init__() self.filed = filed self.cnn_l = CNN(filed=self.filed) self.attention = Attention(40, score_function='mlp') self.x_linear = nn.Sequential(nn.Linear(40, 40), ) self.linear = nn.Sequential( nn.Linear(40, 64), nn.Linear(64, 2), )