def vanilla_rnn(num_words, state, lra, dropout, num_outputs=2, emb_dim=50, input_length=500): model = Sequential() model.add( Embedding(input_dim=num_words + 1, output_dim=emb_dim, input_length=input_length, trainable=False, weights=[embed_matrix])) model.add( SimpleRNN(units=state, input_shape=(num_words, 1), return_sequences=False)) model.add(Dropout(dropout)) model.add(Dense(num_outputs, activation='sigmoid')) RMS = optimizers.RMSprop(lr=lra) model.compile(loss='binary_crossentropy', optimizer=RMS, metrics=['accuracy']) return model
def __init__(self, num_actions): super(ReinforceAgent, self).__init__() self.num_actions = num_actions self.start_embedding_size = 50 self.gru_embedding_size = 100 self.hidden_dense_size = 100 self.state_embedding = Embedding(63, self.start_embedding_size) self.hand_embedding = Embedding(252, self.start_embedding_size) self.gru = GRU(self.gru_embedding_size, return_sequences=True, return_state=True) self.hand_dense_layer = Dense(self.hidden_dense_size) self.concatted_dense_1 = Dense(self.hidden_dense_size) self.final_dense = Dense(61, activation='softmax') self.optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
def target_word_hidden(inputs, target_role, n_word_vocab, n_role_vocab, emb_init, n_factors_cls=512, n_hidden=256, using_dropout=False, dropout_rate=0.3): """Hidden layer of non-incremental model role-filler to predict target word given context (input words, input roles, target role). # Args: inputs: output of context embedding from the last layer, shape is (batch_size, input_length) target_role: place holder for target roles, shape is (batch_size, 1) n_word_vocab: size of word vocabulary n_role_vocab: size of role vocabulary emb_init: initializer of embedding n_hidden: number of hidden units using_dropout: bool, using drop-out layer or not dropout_rate: rate of drop-out layer # Return: (n_factors_cls, ) """ # target role embedding; shape is (batch_size, 1, n_factors_cls) target_role_embedding = Embedding(n_role_vocab, n_factors_cls, embeddings_initializer=emb_init, name='target_role_embedding')(target_role) if using_dropout: # Drop-out layer after embeddings target_role_embedding = Dropout(dropout_rate)(target_role_embedding) # reduce dimension of tensor from 3 to 2 target_role_embedding = Lambda(lambda x: K.sum(x, axis=1), output_shape=(n_factors_cls,))(target_role_embedding) # context_emb after linear projection weighted_context_embedding = Dense(n_factors_cls, # QUESTION: what's the point of the linear transformation? (team1-change) activation='linear', use_bias=False, input_shape=(n_hidden, ))(inputs) # if using_dropout: # # Drop-out layer after fully connected layer # weighted_context_embedding = Dropout(0.5)(weighted_context_embedding) # hidden units after combining 2 embeddings; shape is the same with embedding hidden = Multiply()([weighted_context_embedding, target_role_embedding]) return hidden
def target_role_hidden(inputs, target_word, n_word_vocab, n_role_vocab, emb_init, n_factors_cls=512, n_hidden=256, using_dropout=False, dropout_rate=0.3): """Hidden layer of multi-task non-incremental model role-filler to predict target role given context (input words, input roles, target word). # Args: context_emb: output of context embedding from the last layer, shape is (batch_size, input_length) target_word: place holder for target word, shape is (batch_size, 1) n_word_vocab: size of word vocabulary n_role_vocab: size of role vocabulary emb_init: initializer of embedding n_hidden: number of hidden units # Return: (n_factors_cls, ) """ # target role embedding; shape is (batch_size, 1, n_factors_emb) target_word_embedding = Embedding(n_word_vocab, n_factors_cls, embeddings_initializer=emb_init, name='target_word_embedding')(target_word) if using_dropout: target_word_embedding = Dropout(dropout_rate)(target_word_embedding) # reduce dimension of tensor from 3 to 2 target_word_embedding = Lambda(lambda x: K.sum(x, axis=1), output_shape=(n_factors_cls,))(target_word_embedding) # context_emb after linear projection weighted_context_embedding = Dense(n_factors_cls, activation='linear', use_bias=False, input_shape=(n_hidden, ))(inputs) # if using_dropout: # weighted_context_embedding = Dropout(0.5)(weighted_context_embedding) # hidden units after combining 2 embeddings; shape is the same with embedding hidden = Multiply()([weighted_context_embedding, target_word_embedding]) return hidden
decoder_input_data = np.array(padded_answers) print((decoder_input_data.shape, maxlen_answers)) # decoder_output_data for i in range(len(tokenized_answers)): tokenized_answers[i] = tokenized_answers[i][1:] padded_answers = preprocessing.sequence.pad_sequences(tokenized_answers, maxlen=maxlen_answers, padding='post') onehot_answers = utils.to_categorical(padded_answers, vocab_size) decoder_output_data = np.array(onehot_answers) print(decoder_output_data.shape) # 定义encoder-decoder模型 encoder_inputs = Input(shape=(None, )) encoder_embedding = Embedding(vocab_size, 200, mask_zero=True)(encoder_inputs) #参考链接:嵌入层 Embedding<https://keras.io/zh/layers/embeddings/#embedding> encoder_outputs, state_h, state_c = tf.keras.layers.LSTM( 200, return_state=True)(encoder_embedding) #参考链接:https://keras.io/zh/layers/recurrent/#lstm encoder_states = [state_h, state_c] decoder_inputs = Input(shape=(None, )) decoder_embedding = Embedding(vocab_size, 200, mask_zero=True)(decoder_inputs) decoder_lstm = LSTM(200, return_state=True, return_sequences=True) decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=encoder_states) decoder_dense = Dense(vocab_size, activation=tf.keras.activations.softmax) output = decoder_dense(decoder_outputs) model = Model([encoder_inputs, decoder_inputs], output)
def factored_embedding(input_words, input_roles, n_word_vocab, n_role_vocab, emb_init, missing_word_id, input_length, n_factors_emb=256, n_hidden=256, mask_zero=True, using_dropout=False, dropout_rate=0.3, using_bias=False): """Role-based word embedding combining word and role embedding. # Arguments: input_words: place holder for input words, shape is (batch_size, input_length) input_roles: place holder for input roles, shape is (batch_size, input_length) n_word_vocab: size of word vocabulary n_role_vocab: size of role vocabulary emb_init: initializer of embedding missing_word_id: the id used as place-holder for the role without a word appearing n_factors_emb: tensor factorization number, default: 256 n_sample: number of samples, useful when there are negative samples mask_zero: bool, zero out the weight of missing word using_dropout: bool, using drop-out layer or not dropout_rate: rate of drop-out layer """ # word embedding; shape is (batch_size, input_length, n_factors_emb) word_embedding = Embedding(n_word_vocab, n_factors_emb, embeddings_initializer=emb_init, name='org_word_embedding')(input_words) if mask_zero: # a hack zeros out the missing word inputs weights = np.ones((n_word_vocab, n_factors_emb)) weights[missing_word_id] = 0 mask = Embedding(n_word_vocab, n_factors_emb, weights=[weights], trainable=False, name='mask_missing')(input_words) # masked word embedding word_embedding = Multiply(name='word_embedding')( [word_embedding, mask]) # Alternative implementation, need missing_word_id == 0 # self.word_embedding = Masking(mask_value=0., # input_shape=(input_length, n_factors_emb)(word_embedding) # role embedding; shape is (batch_size, input_length, n_factors_emb) role_embedding = Embedding(n_role_vocab, n_factors_emb, embeddings_initializer=emb_init, name='role_embedding')(input_roles) if using_dropout: # Drop-out layer after embeddings word_embedding = Dropout(dropout_rate)(word_embedding) role_embedding = Dropout(dropout_rate)(role_embedding) # hidden units after combining 2 embeddings; shape is the same with embedding hidden = Multiply(name='multiply_composition')([ word_embedding, role_embedding ]) # QUESTION: why multiply instead of concatenating? (team1-change) # fully connected layer, output shape is (batch_size, input_length, n_hidden) embedding = Dense(n_hidden, activation='linear', use_bias=using_bias, input_shape=(n_factors_emb, ), name='role_based_word_embedding')(hidden) return embedding