from __future__ import absolute_import, division, print_function, unicode_literals import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers import matplotlib.pyplot as plt import io # The Embedding layer takes at least two arguments: # the number of possible words in the vocabulary, here 1000 (1 + maximum word index), # and the dimensionality of the embeddings, here 32. embedding_layer = layers.Embedding(1000, 32) vocab_size = 10000 imdb = keras.datasets.imdb (train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=vocab_size) # A dictionary mapping words to an integer index word_index = imdb.get_word_index() # The first indices are reserved word_index = {k: (v + 3) for k, v in word_index.items()} word_index["<PAD>"] = 0 word_index["<START>"] = 1 word_index["<UNK>"] = 2 # unknown word_index["<UNUSED>"] = 3 reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
def __init__(self): super(LR, self).__init__(name='LR') self.embed = layers.Embedding(input_dim=np.sum(Config.feat_sizes), output_dim=1, input_length=Config.num_fields)
# Prepare embedding matrix embedding_matrix = np.zeros((num_tokens, embedding_dim)) for word, i in word_index.items(): embedding_vector = embedding_index.get(word) if embedding_vector is not None: # Words not found in embedding index will be all-zeros. # This includes the representation for "padding" and "OOV" embedding_matrix[i] = embedding_vector hits += 1 else: misses += 1 print("Converted %d words (%d misses)" % (hits, misses)) embedding_layer = layers.Embedding( num_tokens, embedding_dim, embeddings_initializer=keras.initializers.Constant(embedding_matrix), trainable=False, ) # The model: int_sequences_input = keras.Input(shape=(None, ), dtype="int64") embedded_sequences = embedding_layer(int_sequences_input) x = layers.Conv1D(128, 5, activation="relu")(embedded_sequences) x = layers.MaxPooling1D(5)(x) x = layers.Conv1D(128, 5, activation="relu")(x) x = layers.MaxPooling1D(5)(x) x = layers.Conv1D(128, 5, activation="relu")(x) x = layers.GlobalMaxPooling1D()(x) x = layers.Dense(128, activation="relu")(x) x = layers.Dropout(0.5)(x) preds = layers.Dense(len(class_names), activation="softmax")(x)
#%% import tensorflow as tf from tensorflow import keras from tensorflow.keras import layers import matplotlib.pyplot as plt #%% x = tf.range(10) x = tf.random.shuffle(x) # 创建共10个单词,每个单词用长度为4的向量表示的层 net = layers.Embedding(10, 4) out = net(x) out #%% net.embeddings net.embeddings.trainable net.trainable = False #%% # 从预训练模型中加载词向量表 embed_glove = load_embed('glove.6B.50d.txt') # 直接利用预训练的词向量表初始化Embedding层 net.set_weights([embed_glove]) #%% cell = layers.SimpleRNNCell(3) cell.build(input_shape=(None, 4)) cell.trainable_variables #%% # 初始化状态向量
dataset_val = dataset_val.batch(32) # Vectorize the data. train_ds = dataset_tr.map(vectorize_text) val_ds = dataset_val.map(vectorize_text) # Do async prefetching / buffering of the data for best performance on GPU. train_ds = train_ds.cache().prefetch(buffer_size=10) val_ds = val_ds.cache().prefetch(buffer_size=10) # A integer input for vocab indices. inputs = tf.keras.Input(shape=(None,), dtype="int64") # add a layer to map those vocab indices into a space of dimensionality x = layers.Embedding(max_features, embedding_dim)(inputs) x = layers.Dropout(0.5)(x) # Conv1D + global max pooling x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x) x = layers.Conv1D(128, 7, padding="valid", activation="relu", strides=3)(x) x = layers.GlobalMaxPooling1D()(x) # vanilla hidden layer: x = layers.Dense(128, activation="relu")(x) x = layers.Dropout(0.5)(x) # a single unit output layer, and squash it with a sigmoid: predictions = layers.Dense(1, activation="sigmoid", name="predictions")(x) model = tf.keras.Model(inputs, predictions)
def __init__(self, maxlen, embed_dim): super(AddPositionEmbedding, self).__init__() self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
def __init__(self, embeddings, vocab, **kwargs): super(AlphaTextWorldNet, self).__init__(**kwargs) self.vocab = vocab self.word2id = {w: i for i, w in enumerate(vocab)} self.id2word = {i: w for w, i in self.word2id.items()} self.verbs = [ "take", "cook", "go", "open", "drop", "eat", "prepare", "examine", "chop", "dice" ] self.adverbs = ["with", "from"] self.unnecessary_words = ['a', 'an', 'the'] embedding_dim, vocab_size = embeddings.shape self.embeddings = layers.Embedding( input_dim=vocab_size, input_length=None, output_dim=embedding_dim, embeddings_initializer=initializers.Constant(embeddings), trainable=False) self.lfe_memory = LocalFeaturesExtractor(filters=self.HIDDEN_UNITS, kernel_size=self.KSIZE, num_blocks=5, l2=self.REG_PENALTY) self.lfe_cmdlist = LocalFeaturesExtractor(filters=self.HIDDEN_UNITS, kernel_size=self.KSIZE, num_blocks=2, l2=self.REG_PENALTY) self.att_memory_loc_time = AttentionEncoder(units=self.HIDDEN_UNITS, num_heads=self.ATT_HEADS, num_blocks=2, l2=self.REG_PENALTY) self.att_memory_loc_turn = PairedAttentionEncoder( units=self.HIDDEN_UNITS, num_heads=self.ATT_HEADS, num_blocks=2, l2=self.REG_PENALTY) self.att_memory_cmdlist_time = AttentionEncoder( units=self.HIDDEN_UNITS, num_heads=self.ATT_HEADS, num_blocks=2, l2=self.REG_PENALTY) self.att_memory_cmdlist_turn = PairedAttentionEncoder( units=self.HIDDEN_UNITS, num_heads=self.ATT_HEADS, num_blocks=2, l2=self.REG_PENALTY) self.value_head = DenseHead(hidden_units=self.HIDDEN_UNITS, l2=self.REG_PENALTY) self.policy_head = DenseHead(hidden_units=self.HIDDEN_UNITS, l2=self.REG_PENALTY) self.cmd_gen_head = DenseHead(hidden_units=self.HIDDEN_UNITS, l2=self.REG_PENALTY) self.att_cmd_gen_mem = AttentionEncoder(units=self.HIDDEN_UNITS, num_heads=self.ATT_HEADS, num_blocks=2, l2=self.REG_PENALTY) self.att_cmd_gen_prev = AttentionEncoder(units=self.HIDDEN_UNITS, num_heads=self.ATT_HEADS, num_blocks=2, l2=self.REG_PENALTY)
# In[2]: # In[3]: # Input for variable-length sequences of integers inputs = keras.Input( shape=(None,), dtype="int32" ) # Embed each integer in a 128-dimensional vector x = layers.Embedding( max_features, 128 )(inputs) # Add 2 bidirectional LSTMs x = layers.Bidirectional( layers.LSTM(64, return_sequences=True) )(x) x = layers.Bidirectional( layers.LSTM(64) )(x) # Add a classifier outputs = layers.Dense( 1, activation="sigmoid" )(x) model = keras.Model( inputs, outputs ) model.summary() # In[4]: model.compile( "adam", "binary_crossentropy", metrics=["accuracy"] )
train_ds = raw_train_ds.map(vectorize_text) val_ds = raw_val_ds.map(vectorize_text) test_ds = raw_test_ds.map(vectorize_text) # Memory Management AUTOTUNE = tf.data.experimental.AUTOTUNE train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE) val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE) test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE) # Create Model embedding_dim = 128 model = tf.keras.Sequential([ layers.Embedding(max_features + 1, embedding_dim), layers.Dropout(0.2), layers.GlobalAveragePooling1D(), layers.Dropout(0.2), layers.Dense(4) ]) model.summary() model.compile(loss=losses.SparseCategoricalCrossentropy(from_logits=True), optimizer='adam', metrics=['accuracy']) epochs = 25 history = model.fit(train_ds, validation_data=val_ds, epochs=epochs)
def __init__(self, **kwargs): super(MyLayer, self).__init__(**kwargs) self.embedding = layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True) self.lstm = layers.LSTM(32)
return encoded_text, label all_encoded_data = all_labeled_data.map(encode_map_fn) train_data = all_encoded_data.shuffle(BUFFER_SIZE) train_data = train_data.padded_batch(BATCH_SIZE, padded_shapes=([None], [])) test_data = all_encoded_data.take(TAKE_SIZE) test_data = test_data.padded_batch(BATCH_SIZE, padded_shapes=([None], [])) vocab_size += 1 model = tf.keras.Sequential([ layers.Embedding(encoder.vocab_size, EMBEDDING_DIM), layers.GlobalAveragePooling1D(), layers.Dense(16, activation='relu'), layers.Dense(1) ]) model.summary() model.compile(optimizer='adam', loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), metrics=['accuracy']) history = model.fit( train_data, epochs=EPOCHS, validation_data=test_data, validation_steps=20)
- Add a `keras.layers.Masking` layer. - Configure a `keras.layers.Embedding` layer with `mask_zero=True`. - Pass a `mask` argument manually when calling layers that support this argument (e.g. RNN layers). """ """ ## Mask-generating layers: `Embedding` and `Masking` Under the hood, these layers will create a mask tensor (2D tensor with shape `(batch, sequence_length)`), and attach it to the tensor output returned by the `Masking` or `Embedding` layer. """ embedding = layers.Embedding(input_dim=5000, output_dim=16, mask_zero=True) masked_output = embedding(padded_inputs) print(masked_output._keras_mask) masking_layer = layers.Masking() # Simulate the embedding lookup by expanding the 2D input to 3D, # with embedding dimension of 10. unmasked_embedding = tf.cast( tf.tile(tf.expand_dims(padded_inputs, axis=-1), [1, 1, 10]), tf.float32 ) masked_embedding = masking_layer(unmasked_embedding) print(masked_embedding._keras_mask) """
x_train = pad_sequences(sequences_train, maxlen=maxlen) x_valid = pad_sequences(sequences_valid, maxlen=maxlen) y_train = df_train.Class.values y_valid = df_val.Class.values encoder = LabelEncoder() encoder.fit(y_train) encoded_y_train = encoder.transform(y_train) encoded_y_valid = encoder.transform(y_valid) dummy_y_train = to_categorical(encoded_y_train) dummy_y_valid = to_categorical(encoded_y_valid) model = Sequential() model.add(layers.Embedding(max_words, output_dim=32)) model.add(layers.SimpleRNN(32)) model.add(layers.Dense(num_classes, activation="softmax")) model.summary() model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) history = model.fit(x_train, dummy_y_train, batch_size=128, epochs=100, validation_data=(x_valid, dummy_y_valid), callbacks=[keras.callbacks.EarlyStopping(patience=5)])
def train( self, tr_x: pd.DataFrame, tr_y: pd.DataFrame, va_x: pd.DataFrame = None, va_y: pd.DataFrame = None, te_x: pd.DataFrame = None, ) -> None: # データのセット・スケーリング numerical_features = [ c for c in tr_x.columns if c not in self.categorical_features ] validation = va_x is not None # パラメータ dropout = self.params["dropout"] nb_epoch = self.params["nb_epoch"] patience = self.params["patience"] # モデルの構築 inp_cats = [] embs = [] data = pd.concat([tr_x, va_x, te_x]).reset_index(drop=True) for c in self.categorical_features: inp_cat = layers.Input(shape=[1], name=c) inp_cats.append(inp_cat) embs.append((layers.Embedding(data[c].max() + 1, 4)(inp_cat))) cats = layers.Flatten()(layers.concatenate(embs)) cats = layers.Dense(4, activation="linear")(cats) inp_numerical = layers.Input(shape=[len(numerical_features)], name="numerical") nums = layers.Dense(500, activation="relu")(inp_numerical) nums = layers.BatchNormalization()(nums) x = layers.Dropout(dropout)(nums) x = layers.concatenate([nums, cats]) x = layers.Dense(40, activation="relu")(x) out = layers.Dense(31, activation="linear", name="out1")(x) model = kerasModel(inputs=[inp_numerical], outputs=out) model.compile(optimizer="adam", loss="mse", metrics=[keras.metrics.RootMeanSquaredError()]) batch_size = 256 tr_x = get_keras_data(tr_x, numerical_features, self.categorical_features) va_x = get_keras_data(va_x, numerical_features, self.categorical_features) if validation: early_stopping = keras.callbacks.EarlyStopping( patience=patience, min_delta=0.001, restore_best_weights=True, ) model.fit( tr_x, tr_y, validation_data=(va_x, va_y), batch_size=batch_size, epochs=nb_epoch, callbacks=[early_stopping], ) else: model.fit(tr_x, tr_y, batch_size=batch_size, nb_epoch=nb_epoch) model.load_weights(f"../output/model/model_{self.run_fold_name}.hdf5") # モデル・スケーラーの保持 self.model = model
print("na Padding:") print(review_train[0, :]) # Accuracy prediction classifier = LogisticRegression() classifier.fit(review_train, y_train) score = classifier.score(review_test, y_test) print("Accuracy:", score) embedding_dim = 50 # Opstellen van het model model = keras.Sequential() model.add( layers.Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=maxlen)) model.add(layers.Conv1D(128, 5, activation='relu')) model.add(layers.GlobalMaxPool1D()) model.add(layers.Dense(1000, activation='relu')) model.add(layers.Dense(500, activation='relu')) model.add(layers.Dense(10, activation='relu')) model.add(layers.Dense(1, activation='sigmoid')) model.summary() model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy']) history = model.fit(review_train,
print(y_data) # creating stacked rnn for "many to one" classification with dropout num_classes = 2 hidden_dims = [10, 10] input_dim = len(char2idx) output_dim = len(char2idx) one_hot = np.eye(len(char2idx)) model = Sequential() model.add( layers.Embedding( input_dim=input_dim, output_dim=output_dim, trainable=False, mask_zero=True, input_length=max_sequence, embeddings_initializer=keras.initializers.Constant(one_hot))) model.add(layers.SimpleRNN(units=hidden_dims[0], return_sequences=True)) model.add(layers.TimeDistributed(layers.Dropout(rate=.2))) model.add(layers.SimpleRNN(units=hidden_dims[1])) model.add(layers.Dropout(rate=.2)) model.add(layers.Dense(units=num_classes)) # creating loss function def loss_fn(model, x, y, training): return tf.reduce_mean( tf.keras.losses.sparse_categorical_crossentropy(y_true=y, y_pred=model(
3. `keras.layers.LSTM`, first proposed in [Hochreiter & Schmidhuber, 1997](https://www.bioinf.jku.at/publications/older/2604.pdf). In early 2015, Keras had the first reusable open-source Python implementations of LSTM and GRU. Here is a simple example of a `Sequential` model that processes sequences of integers, embeds each integer into a 64-dimensional vector, then processes the sequence of vectors using a `LSTM` layer. """ model = keras.Sequential() # Add an Embedding layer expecting input vocab of size 1000, and # output embedding dimension of size 64. model.add(layers.Embedding(input_dim=1000, output_dim=64)) # Add a LSTM layer with 128 internal units. model.add(layers.LSTM(128)) # Add a Dense layer with 10 units. model.add(layers.Dense(10)) model.summary() """ Built-in RNNs support a number of useful features: - Recurrent dropout, via the `dropout` and `recurrent_dropout` arguments - Ability to process an input sequence in reverse, via the `go_backwards` argument - Loop unrolling (which can lead to a large speedup when processing short sequences on CPU), via the `unroll` argument
# Prepare a TextVectorization layer. vectorizer = TextVectorization(output_mode="int") vectorizer.adapt(samples) # Asynchronous preprocessing: the text vectorization is part of the tf.data pipeline. # First, create a dataset dataset = tf.data.Dataset.from_tensor_slices((samples, labels)).batch(2) # Apply text vectorization to the samples dataset = dataset.map(lambda x, y: (vectorizer(x), y)) # Prefetch with a buffer size of 2 batches dataset = dataset.prefetch(2) # Our model should expect sequences of integers as inputs inputs = keras.Input(shape=(None, ), dtype="int64") x = layers.Embedding(input_dim=10, output_dim=32)(inputs) outputs = layers.Dense(1)(x) model = keras.Model(inputs, outputs) model.compile(optimizer="adam", loss="mse", run_eagerly=True) model.fit(dataset) """ Compare this to doing text vectorization as part of the model: """ # Our dataset will yield samples that are strings dataset = tf.data.Dataset.from_tensor_slices((samples, labels)).batch(2) # Our model should expect strings as inputs inputs = keras.Input(shape=(1, ), dtype="string") x = vectorizer(inputs)
def retain(ARGS): """ Helper function to create DAG of Keras Layers via functional API approach. The Keras Layer design is mimicking RETAIN architecture. :param ARGS: Arguments object containing user-specified parameters :type ARGS: :class:`argparse.Namespace` :return: Keras model :rtype: :class:`tensorflow.keras.Model` """ # Define the constant for model saving reshape_size = ARGS.emb_size + ARGS.numeric_size if ARGS.allow_negative: embeddings_constraint = FreezePadding() beta_activation = "tanh" output_constraint = None else: embeddings_constraint = FreezePadding_Non_Negative() beta_activation = "sigmoid" output_constraint = non_neg() def reshape(data): """Reshape the context vectors to 3D vector""" return K.reshape(x=data, shape=(K.shape(data)[0], 1, reshape_size)) # Code Input codes = L.Input((None, None), name="codes_input") inputs_list = [codes] # Calculate embedding for each code and sum them to a visit level codes_embs_total = L.Embedding(ARGS.num_codes + 1, ARGS.emb_size, name="embedding")(codes) codes_embs = L.Lambda(lambda x: K.sum(x, axis=2))(codes_embs_total) # Numeric input if needed if ARGS.numeric_size: numerics = L.Input((None, ARGS.numeric_size), name="numeric_input") inputs_list.append(numerics) full_embs = L.concatenate([codes_embs, numerics], name="catInp") else: full_embs = codes_embs # Apply dropout on inputs full_embs = L.Dropout(ARGS.dropout_input)(full_embs) # Time input if needed if ARGS.use_time: time = L.Input((None, 1), name="time_input") inputs_list.append(time) time_embs = L.concatenate([full_embs, time], name="catInp2") else: time_embs = full_embs # Setup Layers # This implementation uses Bidirectional LSTM instead of reverse order # (see https://github.com/mp2893/retain/issues/3 for more details) alpha = L.Bidirectional( L.LSTM(ARGS.recurrent_size, return_sequences=True, implementation=2), name="alpha", ) beta = L.Bidirectional( L.LSTM(ARGS.recurrent_size, return_sequences=True, implementation=2), name="beta", ) alpha_dense = L.Dense(1, kernel_regularizer=l2(ARGS.l2)) beta_dense = L.Dense( ARGS.emb_size + ARGS.numeric_size, activation=beta_activation, kernel_regularizer=l2(ARGS.l2), ) # Compute alpha, visit attention alpha_out = alpha(time_embs) alpha_out = L.TimeDistributed(alpha_dense, name="alpha_dense_0")(alpha_out) alpha_out = L.Softmax(name="softmax_1", axis=1)(alpha_out) # Compute beta, codes attention beta_out = beta(time_embs) beta_out = L.TimeDistributed(beta_dense, name="beta_dense_0")(beta_out) # Compute context vector based on attentions and embeddings c_t = L.Multiply()([alpha_out, beta_out, full_embs]) c_t = L.Lambda(lambda x: K.sum(x, axis=1))(c_t) # Reshape to 3d vector for consistency between Many to Many and Many to One implementations contexts = L.Lambda(reshape)(c_t) # Make a prediction contexts = L.Dropout(ARGS.dropout_context)(contexts) output_layer = L.Dense( 1, activation="sigmoid", name="dOut", kernel_regularizer=l2(ARGS.l2), kernel_constraint=output_constraint, ) # TimeDistributed is used for consistency # between Many to Many and Many to One implementations output = L.TimeDistributed(output_layer, name="time_distributed_out")(contexts) # Define the model with appropriate inputs model = Model(inputs=inputs_list, outputs=[output]) return model
def _build_naml(self): """The main function to create NAML's logic. The core of NAML is a user encoder and a news encoder. Returns: obj: a model used to train. obj: a model used to evaluate and predict. """ hparams = self.hparams his_input_title = keras.Input(shape=(hparams.his_size, hparams.title_size), dtype="int32") his_input_body = keras.Input(shape=(hparams.his_size, hparams.body_size), dtype="int32") his_input_vert = keras.Input(shape=(hparams.his_size, 1), dtype="int32") his_input_subvert = keras.Input(shape=(hparams.his_size, 1), dtype="int32") pred_input_title = keras.Input(shape=(hparams.npratio + 1, hparams.title_size), dtype="int32") pred_input_body = keras.Input(shape=(hparams.npratio + 1, hparams.body_size), dtype="int32") pred_input_vert = keras.Input(shape=(hparams.npratio + 1, 1), dtype="int32") pred_input_subvert = keras.Input(shape=(hparams.npratio + 1, 1), dtype="int32") pred_input_title_one = keras.Input(shape=( 1, hparams.title_size, ), dtype="int32") pred_input_body_one = keras.Input(shape=( 1, hparams.body_size, ), dtype="int32") pred_input_vert_one = keras.Input(shape=(1, 1), dtype="int32") pred_input_subvert_one = keras.Input(shape=(1, 1), dtype="int32") his_title_body_verts = layers.Concatenate(axis=-1)([ his_input_title, his_input_body, his_input_vert, his_input_subvert ]) pred_title_body_verts = layers.Concatenate(axis=-1)([ pred_input_title, pred_input_body, pred_input_vert, pred_input_subvert ]) pred_title_body_verts_one = layers.Concatenate(axis=-1)([ pred_input_title_one, pred_input_body_one, pred_input_vert_one, pred_input_subvert_one, ]) pred_title_body_verts_one = layers.Reshape( (-1, ))(pred_title_body_verts_one) embedding_layer = layers.Embedding( self.word2vec_embedding.shape[0], hparams.word_emb_dim, weights=[self.word2vec_embedding], trainable=True, ) self.newsencoder = self._build_newsencoder(embedding_layer) self.userencoder = self._build_userencoder(self.newsencoder) user_present = self.userencoder(his_title_body_verts) news_present = layers.TimeDistributed( self.newsencoder)(pred_title_body_verts) news_present_one = self.newsencoder(pred_title_body_verts_one) preds = layers.Dot(axes=-1)([news_present, user_present]) preds = layers.Activation(activation="softmax")(preds) pred_one = layers.Dot(axes=-1)([news_present_one, user_present]) pred_one = layers.Activation(activation="sigmoid")(pred_one) model = keras.Model( [ his_input_title, his_input_body, his_input_vert, his_input_subvert, pred_input_title, pred_input_body, pred_input_vert, pred_input_subvert, ], preds, ) scorer = keras.Model( [ his_input_title, his_input_body, his_input_vert, his_input_subvert, pred_input_title_one, pred_input_body_one, pred_input_vert_one, pred_input_subvert_one, ], pred_one, ) return model, scorer
def __init__(self, num_patches, projection_dim): super(PatchEncoder, self).__init__() self.num_patches = num_patches self.projection = layers.Dense(units=projection_dim) self.position_embedding = layers.Embedding(input_dim=num_patches, output_dim=projection_dim)
embedding_matrix = tf.keras.initializers.Constant(embedding_matrix) # Build model max_question_len = load_data.max_question_len max_answer_len = load_data.max_answer_len inputs_question = keras.Input(shape=(max_question_len, ), name="input_question") inputs_answer = keras.Input(shape=(max_answer_len, ), name="input_answer") inputs_x_feat = keras.Input(shape=(4, ), name="input_x_feat") embedding_dim = 300 embedding_layer = layers.Embedding(vocab_size, embedding_dim, embeddings_initializer=embedding_matrix, trainable=False, name="embedding_question") embedding_layer_question = embedding_layer(inputs_question) embedding_layer_answer = embedding_layer(inputs_answer) conv_layer_question = layers.Conv1D( 100, 5, activation='relu', name="filter_question", padding="same", kernel_regularizer=regularizers.l2(1e-5))(embedding_layer_question) conv_layer_answer = layers.Conv1D( 100, 5,
# or just use matrix # 开始啦 tokenizer = Tokenizer(num_words = vocab_size, oov_token = oov_tok) tokenizer.fit_on_texts(train_sentences) mat = tokenizer.texts_to_matrix(train_sentences, mode = 'binary') test_mat = tokenizer.texts_to_matrix(test_sentences, mode = 'binary') # In[197]: # Embedding from tensorflow.keras import Sequential from tensorflow.keras import layers model = Sequential() model.add(layers.Embedding(vocab_size, embedding_dim, input_length = max_length)) #input_length在没有flatten 可以不要 model.add(layers.Flatten()) #拉成1维的 #model.add(layers.GlobalAveragePooling1D()) model.add(layers.Dense(6, activation = 'relu')) model.add(layers.Dense(1, activation = 'sigmoid')) model.summary() # In[198]: model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy']) epoch = 10 model.fit(padded, train_labels, epochs = epoch, validation_data = (test_padded, test_labels))
def __init__(self, task, hidden_size=64, latent_size=256, activation='relu', kernel_size=3, num_blocks=4): super(SequentialVAE, self).__init__() input_shape = task.input_shape shape_before_flat = [ input_shape[0] // (2**(num_blocks - 1)), hidden_size ] """ DEFINE AN ENCODER MODEL THAT DOWNSAMPLES """ # the input layer of a keras model x = input_layer = keras.Input(shape=input_shape) # build a model with an input layer and optional embedding x = tfkl.Embedding(task.num_classes, hidden_size)(x) # the exponent of a positional embedding inverse_frequency = 1.0 / (10000.0**(tf.range(0.0, hidden_size, 2.0) / hidden_size))[tf.newaxis] # calculate a positional embedding to break symmetry pos = tf.range(0.0, tf.shape(x)[1], 1.0)[:, tf.newaxis] positional_embedding = tf.concat([ tf.math.sin(pos * inverse_frequency), tf.math.cos(pos * inverse_frequency) ], axis=1)[tf.newaxis] # add the positional encoding x = tfkl.Add()([x, positional_embedding]) x = tfkl.LayerNormalization()(x) # add several residual blocks to the model for i in range(num_blocks): if i > 0: # downsample the input sequence by 2 x = tf.keras.layers.AveragePooling1D(pool_size=2, padding='same')(x) # first convolution layer in a residual block h = tfkl.Conv1D(hidden_size, kernel_size, padding='same', activation=None)(x) h = tfkl.LayerNormalization()(h) h = tfkl.Activation(activation)(h) # second convolution layer in a residual block h = tfkl.Conv1D(hidden_size, kernel_size, padding='same', activation=None)(h) h = tfkl.LayerNormalization()(h) h = tfkl.Activation(activation)(h) # add a residual connection to the model x = tfkl.Add()([x, h]) # flatten the result and predict the params of a gaussian flattened_x = tfkl.Flatten()(x) latent_mean = tfkl.Dense(latent_size)(flattened_x) latent_standard_dev = tfkl.Dense(latent_size, activation=tf.exp)(flattened_x) # save the encoder as a keras model self.encoder_cnn = keras.Model( inputs=input_layer, outputs=[latent_mean, latent_standard_dev]) """ DEFINE A DECODER THAT UPSAMPLES """ # the input layer of a keras model x = input_layer = keras.Input(shape=[latent_size]) x = tfkl.Dense(np.prod(shape_before_flat))(x) x = tfkl.Reshape(shape_before_flat)(x) # add several residual blocks to the model for i in reversed(range(num_blocks)): if i > 0: # up-sample the sequence and handle x = tf.pad(tf.repeat(x, 2, axis=1), [[0, 0], [0, (input_shape[0] // (2**(i - 1))) % 2], [0, 0]], mode="SYMMETRIC") # the exponent of a positional embedding inverse_frequency = 1.0 / (10000.0**( tf.range(0.0, hidden_size, 2.0) / hidden_size))[tf.newaxis] # calculate a positional embedding to break symmetry pos = tf.range(0.0, tf.shape(x)[1], 1.0)[:, tf.newaxis] positional_embedding = tf.concat([ tf.math.sin(pos * inverse_frequency), tf.math.cos(pos * inverse_frequency) ], axis=1)[tf.newaxis] # add the positional encoding h = tfkl.Add()([x, positional_embedding]) h = tfkl.LayerNormalization()(h) # first convolution layer in a residual block h = tfkl.Conv1D(hidden_size, kernel_size, padding='same', activation=None)(h) h = tfkl.LayerNormalization()(h) h = tfkl.Activation(activation)(h) # second convolution layer in a residual block h = tfkl.Conv1D(hidden_size, kernel_size, padding='same', activation=None)(h) h = tfkl.LayerNormalization()(h) h = tfkl.Activation(activation)(h) # add a residual connection to the model x = tfkl.Add()([x, h]) # flatten the result and predict the params of a gaussian logits = tfkl.Dense(task.num_classes)(x) # save the encoder as a keras model self.decoder_cnn = keras.Model(inputs=input_layer, outputs=logits)
# .cache() keeps data in memory after it's loaded off disk. # .prefetch() overlaps data preprocessing and model execution while training. AUTOTUNE = tf.data.experimental.AUTOTUNE train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE) val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE) test_ds = test_ds.cache().prefetch(buffer_size=AUTOTUNE) # create the model embedding_dim = 16 model = tf.keras.Sequential([ layers.Embedding( max_features + 1, embedding_dim), # embedding to encode the text into integer. # embedding dimensions are (batch, sequence, embedding) layers.Dropout(0.2), # reguralization layers.GlobalAveragePooling1D(), # average pooling layers.Dropout(0.2), # reguralization layers.Dense(1) ]) # a single output node. print(model.summary()) model.compile(loss=losses.BinaryCrossentropy(from_logits=True), optimizer='adam', metrics=tf.metrics.BinaryAccuracy(threshold=0.0)) # train the model
train_data = keras.preprocessing.sequence.pad_sequences(train_data, value=word2id['<PAD>'], padding='post', maxlen=maxl) test_data = keras.preprocessing.sequence.pad_sequences(test_data, value=word2id['<PAD>'], padding='post', maxlen=maxl) # print('len: ',len(train_data[0]),len(test_data[1])) #构建模型 vocab_size = 10000 # model=keras.Input(shape=()) input = layers.Input(shape=(maxl, )) em = layers.Embedding(vocab_size + 1, 300, input_length=maxl)(input) cnn1 = layers.Conv1D(256, kernel_size=3, padding='same', strides=1, activation='relu', activity_regularizer='l2')(em) # cnn1 = layers.MaxPooling1D(2,strides=2)(cnn1) # cnn1 = layers.MaxPooling1D(2)(cnn1) # drop1 = layers.Dropout(0.25)(cnn1) cnn2 = layers.Conv1D(filters=256, kernel_size=4, padding='same', strides=1, activation='relu', activity_regularizer='l2')(em)
# geektutu.com tokenizer = info.features['text'].encoder print ('词汇个数:', tokenizer.vocab_size) sample_str = 'welcome to geektutu.com' tokenized_str = tokenizer.encode(sample_str) print ('向量化文本:', tokenized_str) for ts in tokenized_str: print (ts, '-->', tokenizer.decode([ts])) # 搭建 RNN 模型 # geektutu.com model = Sequential([ layers.Embedding(tokenizer.vocab_size, 64), layers.Bidirectional(layers.LSTM(64)), layers.Dense(64, activation='relu'), layers.Dense(1, activation='sigmoid') ]) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) history1 = model.fit(train_ds, epochs=3, validation_data=test_ds) loss, acc = model.evaluate(test_ds) print('准确率:', acc) # 0.81039 # geektutu.com # 解决中文乱码问题 plt.rcParams['font.sans-serif'] = ['SimHei'] plt.rcParams['axes.unicode_minus'] = False plt.rcParams['font.size'] = 20
def buildNetwork(n_block, n_self): print("Building network ..."); #product l_in = layers.Input( shape= (None,)); l_mask = layers.Input( shape= (None,)); #reagents l_dec = layers.Input(shape =(None,)) ; l_dmask = layers.Input(shape =(None,)); #positional encodings for product and reagents, respectively l_pos = PositionLayer(EMBEDDING_SIZE)(l_mask); l_dpos = PositionLayer(EMBEDDING_SIZE)(l_dmask); l_emask = MaskLayerRight()([l_dmask, l_mask]); l_right_mask = MaskLayerTriangular()(l_dmask); l_left_mask = MaskLayerLeft()(l_mask); #encoder l_voc = layers.Embedding(input_dim = vocab_size, output_dim = EMBEDDING_SIZE, input_length = None); l_embed = layers.Add()([ l_voc(l_in), l_pos]); l_embed = layers.Dropout(rate = 0.1)(l_embed); for layer in range(n_block): #self attention l_o = [ SelfLayer(EMBEDDING_SIZE, KEY_SIZE) ([l_embed, l_embed, l_embed, l_left_mask]) for i in range(n_self)]; l_con = layers.Concatenate()(l_o); l_dense = layers.TimeDistributed(layers.Dense(EMBEDDING_SIZE)) (l_con); l_drop = layers.Dropout(rate=0.1)(l_dense); l_add = layers.Add()( [l_drop, l_embed]); l_att = LayerNormalization()(l_add); #position-wise l_c1 = layers.Conv1D(N_HIDDEN, 1, activation='relu')(l_att); l_c2 = layers.Conv1D(EMBEDDING_SIZE, 1)(l_c1); l_drop = layers.Dropout(rate = 0.1)(l_c2); l_ff = layers.Add()([l_att, l_drop]); l_embed = LayerNormalization()(l_ff); #bottleneck l_encoder = l_embed; l_embed = layers.Add()([l_voc(l_dec), l_dpos]); l_embed = layers.Dropout(rate = 0.1)(l_embed); for layer in range(n_block): #self attention l_o = [ SelfLayer(EMBEDDING_SIZE, KEY_SIZE)([l_embed, l_embed, l_embed, l_right_mask]) for i in range(n_self)]; l_con = layers.Concatenate()(l_o); l_dense = layers.TimeDistributed(layers.Dense(EMBEDDING_SIZE)) (l_con); l_drop = layers.Dropout(rate=0.1)(l_dense); l_add = layers.Add()( [l_drop, l_embed]); l_att = LayerNormalization()(l_add); #attention to the encoder l_o = [ SelfLayer(EMBEDDING_SIZE, KEY_SIZE)([l_att, l_encoder, l_encoder, l_emask]) for i in range(n_self)]; l_con = layers.Concatenate()(l_o); l_dense = layers.TimeDistributed(layers.Dense(EMBEDDING_SIZE)) (l_con); l_drop = layers.Dropout(rate=0.1)(l_dense); l_add = layers.Add()( [l_drop, l_att]); l_att = LayerNormalization()(l_add); #position-wise l_c1 = layers.Conv1D(N_HIDDEN, 1, activation='relu')(l_att); l_c2 = layers.Conv1D(EMBEDDING_SIZE, 1)(l_c1); l_drop = layers.Dropout(rate = 0.1)(l_c2); l_ff = layers.Add()([l_att, l_drop]); l_embed = LayerNormalization()(l_ff); l_out = layers.TimeDistributed(layers.Dense(vocab_size, use_bias=False)) (l_embed); mdl = tf.keras.Model([l_in, l_mask, l_dec, l_dmask], l_out); def masked_loss(y_true, y_pred): loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_true, logits=y_pred); mask = tf.cast(tf.not_equal(tf.reduce_sum(y_true, -1), 0), 'float32'); loss = tf.reduce_sum(loss * mask, -1) / tf.reduce_sum(mask, -1); loss = K.mean(loss); return loss; def masked_acc(y_true, y_pred): mask = tf.cast(tf.not_equal(tf.reduce_sum(y_true, -1), 0), 'float32'); eq = K.cast(K.equal(K.argmax(y_true, axis=-1), K.argmax(y_pred, axis = -1)), 'float32'); eq = tf.reduce_sum(eq * mask, -1) / tf.reduce_sum(mask, -1); eq = K.mean(eq); return eq; mdl.compile(optimizer = 'adam', loss = masked_loss, metrics=['accuracy', masked_acc]); #mdl.summary(); #Divide the graph for faster execution. First, we calculate encoder's values. #Then we use encoder's values and the product mask as additional decoder's input. def mdl_encoder(product): v = gen_left([product]); enc = l_encoder.eval( feed_dict = {l_in : v[0], l_mask : v[1], l_pos : v[2] } ); return enc, v[1]; #And the decoder def mdl_decoder(res, product_encoded, product_mask, T = 1.0): v = gen_right([res]); d = l_out.eval( feed_dict = {l_encoder : product_encoded, l_dec : v[0], l_dmask : v[1], l_mask : product_mask, l_dpos : v[2]} ); prob = d[0, len(res), :] / T; prob = np.exp(prob) / np.sum(np.exp(prob)); return prob; return mdl, mdl_encoder, mdl_decoder;
def __init__(self, maxlen, vocab_size, embed_dim): super(TokenAndPositionEmbedding, self).__init__() self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim) self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)
word_index = tokenizer.word_index training_sequences = tokenizer.texts_to_sequences(training_sentences) training_padded = pad_sequences(training_sequences, maxlen=MAX_LENGTH, padding=PADDING_TYPE, truncating=TRUNC_TYPE) testing_sequences = tokenizer.texts_to_sequences(testing_sentences) testing_padded = pad_sequences(testing_sequences, maxlen=MAX_LENGTH, padding=PADDING_TYPE, truncating=TRUNC_TYPE) model = tf.keras.Sequential([ layers.Embedding(VOCAB_SIZE, EMBEDDING_DIM, input_length=MAX_LENGTH), layers.GlobalAveragePooling1D(), layers.Dense(24, activation='relu'), layers.Dense(1, activation='sigmoid') ]) print(model.summary()) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc']) history = model.fit(training_padded, training_labels, epochs=30, validation_data=(testing_padded, testing_labels))