def restricted_attention(x, k): dim = x.shape[2] Wq = layers.Dense(dim) Wk = layers.Dense(dim) wk = Wk(x) paddings = tf.constant([[0, 0, ], [k, k], [0, 0]]) pk = tf.pad(wk, paddings) pv = tf.pad(x, paddings) keys = [] vals = [] for i in range(-k, k + 1): keys.append(tf.roll(pk, i, 1)) vals.append(tf.roll(pv, i, 1)) keys = tf.stack(keys, 2) keys = keys[:, k:-k, :, :] vals = tf.stack(vals, 2) vals = vals[:, k:-k, :, :] # -- missing code -- query = Wq(x)[..., None] dot_product = tf.matmul(keys, query) / np.sqrt(dim) atten_weights = layers.Softmax(name='atten_weights', axis=-2)(dot_product) val_out = tf.matmul(atten_weights, vals, transpose_a=True) val_out = tf.squeeze(val_out, axis=2) return x + val_out
def call(self, inputs, **kwargs): m = inputs.shape[-1] W_Query = self.add_weight(shape=[m, self.att_embedding_size * self.heads], initializer=tf.keras.initializers.RandomNormal(seed=self.seed)) W_key = self.add_weight(shape=[m, self.att_embedding_size * self.heads], initializer=tf.keras.initializers.RandomNormal(seed=self.seed)) W_Value = self.add_weight(shape=[m, self.att_embedding_size * self.heads], initializer=tf.keras.initializers.RandomNormal(seed=self.seed)) queries = tf.matmul(inputs, W_Query) keys = tf.matmul(inputs, W_key) values = tf.matmul(inputs, W_Value) queries = tf.stack(tf.split(queries, self.heads, axis=2)) keys = tf.stack(tf.split(keys, self.heads, axis=2)) values = tf.stack(tf.split(values, self.heads, axis=2)) att_score = tf.matmul(queries, keys, transpose_b=True) att_score = layers.Softmax(axis=-1)(att_score) result = tf.matmul(att_score, values) result = tf.concat(tf.split(result, self.heads), axis=-1) result = tf.squeeze(result, axis=0) if self.use_res: W_Res = self.add_weight(shape=[m, self.att_embedding_size * self.heads], initializer=tf.keras.initializers.RandomNormal(seed=self.seed)) result = result + tf.matmul(inputs, W_Res) result = tf.keras.activations.relu(result) return result
def multihead_attention_model(inputs): inputs_transposed = layers.Permute(dims=(2, 1))(inputs) # query_key = layers.Dot(axes=[1, 2])([inputs, inputs_transposed]) query_key = layers.Dot(axes=2)([inputs, inputs]) attentions = layers.Softmax(axis=-1)(query_key) # TODO test it qkv = layers.Dot(axes=1)([attentions, inputs]) return qkv
def call(self, inputs, **kwargs): x = self.dense_1(inputs) x = self.bn_1(x) x = self.lrelu(x) logits = self.dense_2(x) out = layers.Softmax(logits) return out, logits
def __init__(self, scope): self.scope = scope super(DeepxorModel, self).__init__() self.l_0 = layers.Dense(4096, activation=tf.nn.elu, kernel_regularizer= tf.contrib.layers.l2_regularizer(scale=FLAGS.l2), kernel_initializer=tf.contrib.layers.xavier_initializer()) self.l_1 = layers.Dense(2048, activation=tf.nn.elu, kernel_regularizer= tf.contrib.layers.l2_regularizer(scale=FLAGS.l2), kernel_initializer=tf.contrib.layers.xavier_initializer()) self.l_2 = layers.Dense(512, activation=tf.nn.elu, kernel_regularizer= tf.contrib.layers.l2_regularizer(scale=FLAGS.l2), kernel_initializer=tf.contrib.layers.xavier_initializer()) self.l_3 = layers.Dense(512, activation=tf.nn.elu, kernel_regularizer= tf.contrib.layers.l2_regularizer(scale=FLAGS.l2), kernel_initializer=tf.contrib.layers.xavier_initializer()) self.logits = layers.Dense(num_actions,kernel_regularizer= tf.contrib.layers.l2_regularizer(scale=FLAGS.l2), kernel_initializer=tf.contrib.layers.xavier_initializer()) self.policy = layers.Softmax() self.values = layers.Dense(1, activation=tf.tanh,kernel_regularizer= tf.contrib.layers.l2_regularizer(scale=FLAGS.l2), kernel_initializer=tf.contrib.layers.xavier_initializer())
def build_layer7(self, inp): layer7 = Sequential([ layers.Conv2D(256, 2), Activation('relu'), layers.Conv2D(128, 2), Activation('relu'), layers.Conv2D(2, 1), layers.Softmax() ])(inp) print('layer 7 ', layer7.shape) return keras.Model(inp, layer7)
def RSoftmax(x, filters, radix, groups, name): bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 c = filters // radix // groups shape = (groups, radix, c) if bn_axis == 3 else (groups, radix, c) x = layers.Reshape(shape, name=name + '_0_attn_reshape')(x) x = layers.Lambda(lambda x: tf.transpose(x, (0, 2, 1, 3)), name=name + '_attn_transpose')(x) x = layers.Softmax(axis=1, name=name + '_attn_softmax')(x) shape = (1, 1, filters) if bn_axis == 3 else (filters, 1, 1) x = layers.Reshape(shape, name=name + '_1_attn_reshape')(x) return x
def __init__(self): super(KerasModel, self).__init__() weight_decay = 1e-4 self.conv1 = layers.Conv2D( 32, (3, 3), padding='same', input_shape=(32, 32, 3), kernel_regularizer=regularizers.l2(weight_decay)) self.elu1 = layers.ELU() self.bn1 = layers.BatchNormalization() self.conv2 = layers.Conv2D( 32, (3, 3), kernel_regularizer=regularizers.l2(weight_decay)) self.elu2 = layers.ELU() self.bn2 = layers.BatchNormalization() self.pool1 = layers.MaxPool2D(pool_size=(2, 2)) self.dropout1 = layers.Dropout(rate=0.2) self.conv3 = layers.Conv2D( 64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)) self.elu3 = layers.ELU() self.bn3 = layers.BatchNormalization() self.conv4 = layers.Conv2D( 64, (3, 3), kernel_regularizer=regularizers.l2(weight_decay)) self.elu4 = layers.ELU() self.bn4 = layers.BatchNormalization() self.pool2 = layers.MaxPool2D(pool_size=(2, 2)) self.dropout2 = layers.Dropout(rate=0.3) self.conv5 = layers.Conv2D( 128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay)) self.elu5 = layers.ELU() self.bn5 = layers.BatchNormalization() self.conv6 = layers.Conv2D( 128, (3, 3), kernel_regularizer=regularizers.l2(weight_decay)) self.elu6 = layers.ELU() self.bn6 = layers.BatchNormalization() self.pool3 = layers.MaxPool2D(pool_size=(2, 2)) self.dropout3 = layers.Dropout(rate=0.4) self.flatten1 = layers.Flatten() self.dense1 = layers.Dense(512) self.elu7 = layers.ELU() self.dropout4 = layers.Dropout(rate=0.5) self.dense2 = layers.Dense(10) self.softmax = layers.Softmax()
def call(self, inputs, **kwargs): interactions = list() for i in range(len(inputs) - 1): for j in range(i + 1, len(inputs)): interactions.append(tf.multiply(inputs[i], inputs[j])) interactions = tf.stack(interactions, axis=1) att_weight = self.att_layer(interactions) att_weight = self.att_proj_layer(att_weight) att_weight = layers.Softmax(axis=1)(att_weight) output = tf.reduce_sum(interactions * att_weight, axis=1) return output
def __init__(self, num_actions): super(RecurrentModel, self).__init__() self.l_0 = layers.Dense( FLAGS.input_layer, activation=tf.nn.elu, kernel_regularizer=tf.contrib.layers.l2_regularizer( scale=FLAGS.l2), kernel_initializer=tf.contrib.layers.xavier_initializer()) self.l_1 = layers.Dense( FLAGS.hidden_layer, activation=tf.nn.elu, kernel_regularizer=tf.contrib.layers.l2_regularizer( scale=FLAGS.l2), kernel_initializer=tf.contrib.layers.xavier_initializer()) self.l_2 = layers.Dense( FLAGS.policy_layer, activation=tf.nn.elu, kernel_regularizer=tf.contrib.layers.l2_regularizer( scale=FLAGS.l2), kernel_initializer=tf.contrib.layers.xavier_initializer()) self.l_3 = layers.Dense( FLAGS.value_layer, activation=tf.nn.elu, kernel_regularizer=tf.contrib.layers.l2_regularizer( scale=FLAGS.l2), kernel_initializer=tf.contrib.layers.xavier_initializer()) self.logits = layers.Dense( num_actions, kernel_regularizer=tf.contrib.layers.l2_regularizer( scale=FLAGS.l2), kernel_initializer=tf.contrib.layers.xavier_initializer()) self.policy = layers.Softmax() self.values = layers.Dense( 1, activation=tf.tanh, kernel_regularizer=tf.contrib.layers.l2_regularizer( scale=FLAGS.l2), kernel_initializer=tf.contrib.layers.xavier_initializer()) self.seqlen = tf.placeholder(tf.int32, [None])
def call(self, inputs, **kwargs): queries = tf.matmul(inputs, self.W_Query) keys = tf.matmul(inputs, self.W_key) values = tf.matmul(inputs, self.W_Value) queries = tf.stack(tf.split(queries, self.heads, axis=2)) keys = tf.stack(tf.split(keys, self.heads, axis=2)) values = tf.stack(tf.split(values, self.heads, axis=2)) att_score = tf.matmul(queries, keys, transpose_b=True) att_score = layers.Softmax(axis=-1)(att_score) result = tf.matmul(att_score, values) result = tf.concat(tf.split(result, self.heads), axis=-1) result = tf.squeeze(result, axis=0) if self.use_res: result = result + tf.matmul(inputs, self.W_Res) result = tf.keras.activations.relu(result) return result
def call(self, inputs, **kwargs): interactions = list() for i in range(len(inputs) - 1): for j in range(i + 1, len(inputs)): interactions.append(tf.multiply(inputs[i], inputs[j])) # print(interactions) interactions = tf.stack(interactions, axis=1) print("interactions:", interactions) att_weight = self.att_layer(interactions) print("att_weight:", att_weight) # att_weight: Tensor("attention_based_pooling_layer/dense/Identity:0", shape=(None, 276, 4), dtype=float32) att_weight = self.att_proj_layer(att_weight) print("att_weight:", att_weight) att_weight = layers.Softmax(axis=1)(att_weight) print("att_weight:", att_weight) output = tf.reduce_sum(interactions * att_weight, axis=1) print("output:", output) return output
def build(self): inputs = layers.Input(self.input_size) output0 = self._context_module(16, inputs, strides=(1, 1)) output1 = self._context_module(32, output0, strides=(2, 2)) output2 = self._context_module(64, output1, strides=(2, 2)) output3 = self._context_module(128, output2, strides=(2, 2)) output4 = self._context_module(256, output3, strides=(2, 2)) decoder0 = self._decoder_block(128, [output3, output4]) decoder1 = self._decoder_block(64, [output2, decoder0]) decoder2 = self._decoder_block(32, [output1, decoder1]) decoder3 = self._decoder_block_last(16, [output0, decoder2]) output0 = layers.Conv2D(self.num_class, (1, 1))(decoder3) output1 = layers.Conv2D(self.num_class, (1, 1))(decoder2) output2_up = layers.UpSampling2D(size=(2, 2))(layers.Conv2D( self.num_class, (1, 1))(decoder1)) output_sum = layers.Add()([output2_up, output1]) output_sum = layers.UpSampling2D(size=(2, 2))(output_sum) output_sum = layers.Add()([output_sum, output0]) output = layers.Softmax()(output_sum) return models.Model(inputs=[inputs], outputs=[output])
def __init__(self): #weight initializer initializer = tf.keras.initializers.RandomNormal(mean=0.0, stddev=1.0) #head side head_input = keras.Input(shape=(4, ), name='input_head') l2_head = layers.Dense(32, activation='relu', name='l2_head', kernel_initializer=initializer)(head_input) l3_head = layers.Dense(4, activation='relu', name='l3_head', kernel_initializer=initializer)(l2_head) l4_head = layers.BatchNormalization(name='head_norm')(l3_head) #grid side block_input = keras.Input(shape=(globe.GRID_X, globe.GRID_Y, 1), name='input_game_state') l1 = layers.Conv2D(16, 3, padding='same', activation='relu', name='l1', kernel_initializer=initializer)(block_input) l2 = layers.Conv2D(16, 3, padding='same', activation='relu', name='l2', kernel_initializer=initializer)(l1) l3 = layers.Conv2D(4, 1, padding='same', activation='relu', name='l4', kernel_initializer=initializer)(l2) l4 = layers.GlobalAveragePooling2D(name='pool')(l3) l5 = layers.BatchNormalization(name='norm')(l4) #combine l5 = layers.add([l5, l4_head], name='add') l6 = layers.Dense(4, activation='relu', name='last_fully_connected', kernel_initializer=initializer)(l5) l7 = layers.Softmax(name='policy')(l6) l8 = layers.Multiply(name='mult')([l7, head_input]) self.model = keras.Model(inputs=[block_input, head_input], outputs=l8) self.compile()
def build_model(model_type, n_unints=64): print(model_type) sequences = layers.Input(shape=(MAX_LENGTH,)) embedding_layer = layers.Embedding(MAX_FEATURES, 100, weights=[embedding_matrix], input_length=MAX_LENGTH, trainable=False) # embedding the words into 100 dim vectors x = embedding_layer(sequences) if model_type not in {'RNN', 'GRU'}: # non recurrent networks if model_type in {'ATTN_WEIGHTED', 'ATTN_SUM'}: # attention layer x = restricted_attention(x, k=5) # word-wise FC layers -- MAKE SURE you have ,name= "sub_score" in the sub_scores step # E.g., sub_score = layers.Dense(2,name="sub_score")(x) # -- missing code -- x = layers.Dense(32, activation='relu')(x) # x = layers.Dense(50, activation='relu')(x) if model_type in {'WEIGHTED', 'ATTN_WEIGHTED'}: x = layers.Dense(2, name="sub_score")(x) x0 = layers.Lambda(lambda x: x[:, :, 0])(x) x1 = layers.Lambda(lambda x: x[:, :, 1])(x) sum_weights = layers.Softmax(name='sum_weights')(x1) x = tf.expand_dims(x0 * sum_weights, 2) else: x = layers.Dense(1, name="sub_score")(x) x = K.sum(x, axis=1) # final prediction x = tf.sigmoid(x) predictions = x else: # recurrent networks if model_type == 'GRU': x, _ = GRU(n_unints, x) else: x, _ = RNN(n_unints, x) x = layers.Dense(32, activation='relu')(x) x = layers.Dense(1, activation='sigmoid')(x) predictions = x model = models.Model(inputs=sequences, outputs=predictions) model.compile( optimizer='adam', loss='binary_crossentropy', metrics=['binary_accuracy', f1] ) return model