def nnet(inputs,num_classes,keep_prob=0.2): """ # 适用于单导联的深度网络模型 :param inputs: keras tensor, 切片并堆叠后的单导联信号. :param keep_prob: float, dropout-随机片段屏蔽概率. :param num_classes: int, 目标类别数. :return: keras tensor, 各类概率及全连接层前自动提取的特征. """ bch = Net2.__backbone(inputs) # features = Net2.__backbone(inputs) //信号输入为单导联时 # features = Concatenate(axis=1)(branches) features = Dropout(keep_prob)(bch) # features = Dropout(keep_prob, [1, int(inputs.shape[-1]), 1])(features) # features = Bidirectional(CuDNNLSTM(12, return_sequences=False), merge_mode='concat')(features) features = Bidirectional(CuDNNGRU(12, return_sequences=True), merge_mode='concat')(features) # attention attention_pre = Dense(24, name='attention_vec')(features) # [b_size,maxlen,64] attention_probs = Softmax()(attention_pre) # [b_size,maxlen,64] attention_mul = Lambda(lambda x: x[0] * x[1])([attention_probs, features]) features = Bidirectional(CuDNNGRU(12, return_sequences=True), merge_mode='concat')(attention_mul) # attention attention_pre = Dense(24, name='attention_vec1')(features) # [b_size,maxlen,64] attention_probs = Softmax()(attention_pre) # [b_size,maxlen,64] attention_mul = Lambda(lambda x: x[0] * x[1])([attention_probs, features]) # features = attention_3d_block1(features) features = BatchNormalization()(attention_mul) features = Flatten()(features) net = Dense(units=num_classes, activation='sigmoid')(features) return net, features
def build_MINI_MTL(input_shape, filters, numClasses, i): input_layer = Input(shape=(input_shape, input_shape, input_shape, filters)) x_edge = RA(input_layer, input_layer, filters) x_mask = RA(input_layer, input_layer, filters) x_edge = Conv3D(filters, (3, 3, 3), padding='same')(x_edge) x_edge = BatchNormalization(axis=-1)(x_edge) x_edge = Activation('relu')(x_edge) x_mask = Conv3D(filters, (3, 3, 3), padding='same')(x_mask) x_mask = BatchNormalization(axis=-1)(x_mask) x_mask = Activation('relu')(x_mask) out_edge = Conv3D(numClasses, (1, 1, 1), padding='same')(x_edge) out_edge = Softmax(axis=-1)(out_edge) out_edge = UpSampling3D(pow(2,i), name='out_edge_{}'.format(i))(out_edge) out_mask = Conv3D(numClasses, (1, 1, 1), padding='same')(x_mask) out_mask = Softmax(axis=-1)(out_mask) out_mask = UpSampling3D(pow(2,i), name='out_mask_{}'.format(i))(out_mask) out_mtl = Concatenate()([x_mask, x_edge]) out_mtl = Conv3D(filters, (1, 1, 1), padding='same')(out_mtl) mtl_model = Model(inputs=[input_layer], outputs=[out_edge, out_mask]) return mtl_model, out_mtl
def __init__(self, config, human_vocab_size, machine_vocab_size): # basic params self.Tx = config['Tx'] self.Ty = config['Ty'] self.x_vocab_size = human_vocab_size self.y_vocab_size = machine_vocab_size # net params self.layer1_size = config['layer1_size'] self.layer2_size = config['layer2_size'] # net func self.at_repeat = RepeatVector(self.Tx) # 这层作用: self.at_concate = Concatenate(axis=-1) # 这层作用: self.at_dense1 = Dense(8, activation='tanh') self.at_dense2 = Dense(1, activation='relu') self.at_softmax = Activation(lambda x: Softmax(axis=1)(x), name='attention_weights') self.at_dot = Dot(axes=1) self.layer3 = Dense(machine_vocab_size, activation=lambda x: Softmax(axis=1)(x)) # get model self.model = self.get_model()
def train_student(self, c=0.0, T=1.0, use_mse=False, epochs=200, lr=1e-5, batch_size=32): if use_mse: loss = distillation_loss_mse(c=c) else: loss = distillation_loss(c=c, T=T) df_train = pickle.load(open("train_data.p", "rb")) df_test = pickle.load(open("test_data.p", "rb")) g_train = Student.distill_train_generator(df_train, batch_size=batch_size) g_test = Student.batch_generator(df_test, "label", batch_size=batch_size, shuffle=False) # Since MobileNet's convolution features for each image are stored, # we only train a small dense model to sit on top of them. top_model = keras.models.Sequential() top_model.add(Flatten(input_shape=(4, 4, 256))) top_model.add(Dense(200, activation="relu")) top_model.add(Dropout(0.3)) top_model.add(Dense(nbr_classes, activation=None, name="logits")) top_model.add(Softmax()) top_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(), metrics=["acc"]) x = top_model.get_layer("logits").output y = Softmax()(x) # The distillation loss functions need access to the logits before the final softmax layer. # Weights will be shared between logits_model and top_model, # so we train the logits_model, then return the top_model. logits_model = keras.models.Model( inputs=top_model.layers[0].input, outputs=top_model.get_layer("logits").output) logits_model.compile(loss=loss, optimizer=keras.optimizers.Adam(lr=lr)) log = [] for epoch in range(200): logits_model.fit_generator(g_train, steps_per_epoch=len(df_train) // batch_size, epochs=1, verbose=0) how_good = top_model.evaluate_generator(g_test, steps=len(df_test) // batch_size) print("Epoch {} validation results are {}".format(epoch, how_good)) log.append(how_good) top_model.save("models/student.h5") losses, val_accs = zip(*log) print("Best ={}".format(max(val_accs))) return val_accs
def attention_weights(input_uid, input_iid, xu, xi, user_num, item_num, embed_id_dim, random_seed, attention_size, l2_reg_lambda): vec_uid = Embedding(user_num + 2, embed_id_dim, embeddings_initializer=RandomUniform(minval=- 0.1, maxval=0.1, seed=random_seed), name='user_id_embed')(input_uid) vec_iid = Embedding(item_num + 2, embed_id_dim, embeddings_initializer=RandomUniform(minval=- 0.1, maxval=0.1, seed=random_seed), name='item_id_embed')(input_iid) # Mapping user/item ID vectors and semantics of user/item's reviews to the attention space vec_uid = Dense(attention_size, activation=None, use_bias=False, kernel_initializer='glorot_uniform', kernel_regularizer=l2(l2_reg_lambda), name='user_id_attention')(vec_uid) vec_iid = Dense(attention_size, activation=None, use_bias=False, kernel_initializer='glorot_uniform', kernel_regularizer=l2(l2_reg_lambda), name='item_id_attention')(vec_iid) vec_textu = Dense(attention_size, activation=None, use_bias=False, kernel_initializer='glorot_uniform', kernel_regularizer=l2(l2_reg_lambda), name='user_text_attention')(xu) vec_texti = Dense(attention_size, activation=None, use_bias=False, kernel_initializer='glorot_uniform', kernel_regularizer=l2(l2_reg_lambda), name='item_text_attention')(xi) # Interaction between the user and each item review to learn personalized review-usefulness out_u = Multiply(name='usertext_itemid_interaction')([vec_textu, vec_iid]) out_i = Multiply(name='itemtext_userid_interaction')([vec_texti, vec_uid]) # b_u = np.random.uniform(low=-0.1, high=0.1, size=[attention_size]) # b_i = np.random.uniform(low=-0.1, high=0.1, size=[attention_size]) # b_u = b_u.astype(np.float32) # b_i = b_i.astype(np.float32) # # b_u = np.ndarray(b_u, 'float32') # # b_i = np.ndarray(b_i, 'float32') def biasadd_layer(x): b = tf.keras.backend.random_uniform_variable( [attention_size], low=-0.1, high=0.1, seed=random_seed) return tf.keras.backend.bias_add(x, b) # out_u = tf.keras.backend.bias_add(out_u, b_u) # out_i = tf.keras.backend.bias_add(out_i, b_i) out_u = Lambda(biasadd_layer)(out_u) out_i = Lambda(biasadd_layer)(out_i) # out_u = tf.keras.backend.bias_add(out_u, b_u) # out_i = tf.keras.backend.bias_add(out_i, b_i) # out_u = Dense(1, activation=None, use_bias=True, # kernel_initializer='ones', bias_initializer='random_uniform')(out_u) # out_i = Dense(1, activation=None, use_bias=True, # kernel_initializer='ones', bias_initializer='random_uniform')(out_i) out_u = ReLU()(out_u) out_i = ReLU()(out_i) out_u = Dense(1, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros')(out_u) out_i = Dense(1, activation=None, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros')(out_i) # Output the weight (usefulness) for each review out_u = Softmax(axis=1, name='user_rev_weights')(out_u) out_i = Softmax(axis=1, name='item_rev_weights')(out_i) return out_u, out_i
def call(self, answer_encoding): score_matrix = tf.matmul(answer_encoding, K.permute_dimensions(answer_encoding, (0, 2, 1))) eye1 = K.eye(Params.max_passage_count); zero1 = K.zeros_like(eye1); mask = K.cast(K.equal(eye1, zero1), dtype="float32") score_matrix = score_matrix * mask score_matrix = Softmax(axis=-1)(score_matrix) answer_encoding_hat = tf.matmul(score_matrix, answer_encoding) answer_encoding_final = K.concatenate([answer_encoding, answer_encoding_hat, answer_encoding*answer_encoding_hat]) answer_probability = self.dense_1(answer_encoding_final) answer_probability = K.squeeze(answer_probability, axis=-1) answer_probability = Softmax(axis=-1)(answer_probability) return answer_probability
def create_model(self, num_class=1000): self.net.add(Conv2D(filters=round(self.width_multiplier * 32), kernel_size=[3, 3], input_shape=self.input_shape)) self._add_df_layer(self.net, 64) self._add_df_layer(self.net, 128, downsample=True) self._add_df_layer(self.net, 128) # two different net, so there are two different convolution layer shortcut1 = self._shortcut(self.net) shortcut_link1 = self._shortcut(self.net, 16) self._add_df_layer(self.net, 256, downsample=True) self._add_df_layer(self.net, 256) shortcut2 = self._shortcut(self.net) shortcut_link2 = self._shortcut(self.net, 16) self._add_df_layer(self.net, 512, downsample=True) self._add_df_layer(self.net, 512) self._add_df_layer(self.net, 512) self._add_df_layer(self.net, 512) self._add_df_layer(self.net, 512) self._add_df_layer(self.net, 512) shortcut3 = self._shortcut(self.net) shortcut_link3 = self._shortcut(self.net, 16) self._add_df_layer(self.net, 1024, downsample=True) self._add_df_layer(self.net, 1024) # the net decide text or not text_net = self._shortcut(self.net) text_net = self._up_sample(text_net, shortcut3) text_net = self._up_sample(text_net, shortcut2) text_net = self._up_sample(text_net, shortcut1) text_net.add(Softmax(text_net)) # the net decide pixel linked or not pixel_net = self._shortcut(self.net) pixel_net = self._up_sample(pixel_net, shortcut_link3) pixel_net = self._up_sample(pixel_net, shortcut_link2) pixel_net = self._up_sample(pixel_net, shortcut_link1) pixel_net.add(Softmax(pixel_net)) # last three layers of mobilenet, we may neglect this stuff # self.net.add(AveragePooling2D(pool_size=[7, 7])) # self.net.add(Dense(num_class)) # self.net.add(Softmax()) return text_net, pixel_net
def __net(self, data_shape, network_shape): print('Building Network') # Placeholder self.data_in = Input(shape=data_shape) self.data_co = Input(shape=data_shape) #desired output # Encoder layer = self.data_co for i in network_shape: layer = Dense(i, activation='relu')(layer) layer = concatenate([layer, self.data_in]) # Latent self.latent = Dense(self.latent_units)(layer) self.latent_out = Reshape(self.latent_shape)(self.latent) self.latent_out = Softmax(name='latent_output')(self.latent_out) self.latent_softmax = Lambda(self.__gumbelSample, output_shape=(self.latent_units, ))( self.latent) # Decoder count = 0 layer = self.latent_softmax for i in network_shape[::-1]: layer = Dense(i, activation='relu', name='decoder-{}'.format(count))(layer) layer = concatenate([layer, self.data_in]) count += 1 # Output self.data_out = Dense(data_shape[0], activation='sigmoid')(layer)
def build_3d_model(settings: ModelSettings, no_of_classes: int, no_of_bands: int): optimizer = Adam(lr=0.001) model = Sequential() model.add( Conv2D(filters=200, kernel_size=settings.first_conv_kernel_size, strides=(1, 1), input_shape=settings.input_neighborhood + (no_of_bands, ), data_format='channels_last', padding='valid')) model.add(MaxPooling2D(pool_size=(2, 2), padding='valid')) model.add(Conv2D(filters=200, kernel_size=(2, 2), padding='same', activation='relu')) model.add(Conv2D(filters=no_of_classes, kernel_size=(2, 2), padding='valid')) model.add(Flatten()) model.add(Softmax()) model.compile(optimizer=optimizer, metrics=['accuracy'], loss='categorical_crossentropy') return model
def CreateCNN(nUnits=(32, 64, 128, 128, 128, 256), inShape=(None, None, 3), dropProb=.2, nClasses=10, linearOut=False): inp = Input(shape=inShape) tens = inp for n in nUnits: tens = Conv2D(n, (3, 3), padding='valid', strides=(1, 1))(tens) tens = Dropout(dropProb)(Activation('relu')( BatchNormalization()(tens))) tens = Conv2D(nClasses, (1, 1))(tens) tens = GlobalAveragePooling2D()(tens) pred = Softmax()(tens) #flat=Flatten()(l3) #pred=Dense(10,activation='softmax')(flat) mod = Model(inputs=inp, outputs=pred) mod.compile(Adam(lr=.0001), 'categorical_crossentropy', ['accuracy']) return mod
def global_context_block(x, channels): bs, h, w, c = x.shape.as_list() input_x = x input_x = Reshape((h * w, c))(input_x) # [N, H*W, C] input_x = Permute((2,1))(input_x) # [N, C, H*W] input_x = Lambda(backend_expand_dims_1,name='a')(input_x) # [N, 1, C, H*W] context_mask = Conv2D(1,(1,1), name='gc-conv0')(x) context_mask = Reshape((h * w, 1))(context_mask) # [N, H*W, 1] context_mask = Softmax(axis=1)(context_mask) # [N, H*W, 1] context_mask = Permute((2,1))(context_mask) # [N, 1, H*W] context_mask = Lambda(backend_expand_dims_last,name='b')(context_mask) # [N, 1, H*W, 1] context = Lambda(backend_dot,name='c')([input_x, context_mask]) context = Reshape((1,1,c))(context) # [N, 1, 1, C] context_transform = conv_block(context, channels, 1, strides=1, name='gc-conv1') context_transform = Conv2D(c,(1,1), name='gc-conv2')(context_transform) context_transform = Activation('sigmoid')(context_transform) x = Multiply()([x , context_transform]) context_transform = conv_block(context, channels, 1, strides=1, name='gc-conv3') context_transform = Conv2D(c,(1,1), name='gc-conv4')(context_transform) x = Add()([x,context_transform]) return x
def construct_network(self, log): # Input + Embedding layer for every parent input_layers = [] embedding_layers = [] for parent in self.parents: i = Input(shape=(1,), name=parent.attr_name.replace(" ", "_").replace("(", "").replace(")","").replace(":","_")) input_layers.append(i) e = Embedding(log[parent.attr_name].max() + 2, 32, embeddings_initializer="zeros")(i) embedding_layers.append(e) concat = Concatenate(name="concat")(embedding_layers) # dense1 = Dense(32)(concat) drop = Dropout(0.2)(concat) dense2 = Dense(log[self.attr_name].max() + 1)(drop) flat = Flatten()(dense2) output = Softmax(name="output")(flat) model = Model(inputs=input_layers, outputs=[output]) opt = Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, schedule_decay=0.004, clipvalue=3) model.compile(loss={'output': 'categorical_crossentropy'}, optimizer=opt) model.summary() return model
def self_attn_block(inp, n_c, squeeze_factor=8): """ GAN Self Attention Block Code borrows from https://github.com/taki0112/Self-Attention-GAN-Tensorflow """ msg = "Input channels must be >= {}, recieved nc={}".format(squeeze_factor, n_c) assert n_c // squeeze_factor > 0, msg var_x = inp shape_x = var_x.get_shape().as_list() var_f = Conv2D(n_c // squeeze_factor, 1, kernel_regularizer=regularizers.l2(GAN22_REGULARIZER))(var_x) var_g = Conv2D(n_c // squeeze_factor, 1, kernel_regularizer=regularizers.l2(GAN22_REGULARIZER))(var_x) var_h = Conv2D(n_c, 1, kernel_regularizer=regularizers.l2(GAN22_REGULARIZER))(var_x) shape_f = var_f.get_shape().as_list() shape_g = var_g.get_shape().as_list() shape_h = var_h.get_shape().as_list() flat_f = Reshape((-1, shape_f[-1]))(var_f) flat_g = Reshape((-1, shape_g[-1]))(var_g) flat_h = Reshape((-1, shape_h[-1]))(var_h) var_s = Lambda(lambda var_x: K.batch_dot(var_x[0], Permute((2, 1))(var_x[1])))([flat_g, flat_f]) beta = Softmax(axis=-1)(var_s) var_o = Lambda(lambda var_x: K.batch_dot(var_x[0], var_x[1]))([beta, flat_h]) var_o = Reshape(shape_x[1:])(var_o) var_o = Scale()(var_o) out = add([var_o, inp]) return out
def cards(hid): cards = Dense(units=36 * 4, activation=activation)(hid) # cards = Dropout(0.2)(cards) # no dropout towards end of network # cards = BatchNormalization()(cards) cards = Reshape((36, 4))(cards) cards = Softmax(name='cards')(cards) return cards
def model_DL2(trainX, trainy, wd=0.005, lr=0.01, lr_decay=1e-4): n_channels, n_timesteps, n_outputs = trainX.shape[1], trainX.shape[2], trainy.shape[1] data_input = Input(shape=(n_channels, n_timesteps), dtype='float32', name='input') perchan_inp_dim = n_timesteps perchan_model = Inception1D((n_timesteps, 1)) # channel-distributed feature extractor block data_input_rs = Lambda(lambda x: K.expand_dims(x, axis=-1), name='data_input_rs')(data_input) data_processed = TimeDistributed(perchan_model, name='data_before_mil')(data_input_rs) #attention block data_attention = TimeDistributed(Dense(32, activation='tanh', kernel_regularizer=l2(wd), use_bias=False))( data_processed) data_attention = TimeDistributed(Dense(1, activation=None, kernel_regularizer=l2(wd), use_bias=False))( data_attention) data_attention = Lambda(lambda x: K.squeeze(x, -1))(data_attention) data_attention = Softmax()(data_attention) data_attention = Lambda(lambda x: K.expand_dims(x))(data_attention) data_attention = Lambda(lambda x: K.repeat_elements(x, data_processed.shape[-1], -1),name='att_mil_weights')(data_attention) # if attention-MIL weights are needed, the model below (commented) outputs attention weights # att_model=Model(inputs=[data_input],outputs=[data_attention]) data_weighted = Multiply()([data_processed, data_attention]) data_sum = GlobalAveragePooling1D()(data_weighted) out_dense = Dense(32, activation='relu', kernel_regularizer=l2(wd))(data_sum) out_sq = Dense(1, activation='sigmoid', name='out_score')(out_dense) model = Model(inputs=[data_input], outputs=[out_sq]) return model
def frontend(x): if len(x.shape) == 4: if x.shape[2] != 1: x1 = Lambda(l1)(x) x2 = Lambda(l2)(x) x = K.layers.Concatenate(axis=-1)([x1, x2]) # x = K.layers.Reshape((x.shape[1], -1))(x) else: x = K.layers.Lambda(sq(2))(x) x = K.layers.Dropout(rate=0.5)(x) cla = Dense(256, activation='relu')(x) cla = K.layers.Dropout(rate=0.5)(cla) cla = Dense(128, activation='relu')(cla) cla = K.layers.Dropout(rate=0.5)(cla) cla = Dense(classes_num, activation='softmax')(cla) att = Dense(256, activation='relu')(x) att = K.layers.Dropout(rate=0.5)(att) att = Dense(128, activation='relu')(att) att = K.layers.Dropout(rate=0.5)(att) att = Dense(1, activation=None)(att) att = Softmax(axis=1)(att) # att = Dense(classes_num, activation='softmax')(att) output_layer = Lambda(_attention_pooling)([cla, att]) # output_layer = Lambda(l1_norm(1))(output_layer) return output_layer
def Bert(max_seq_length=100, vocabulary_size=100, word_embedding_size=100, use_universal_transformer = 0, transformer_depth=5, num_heads=10, transformer_dropout = 0.1, embedding_dropout = 0.6, l2_reg_penalty = 1e-4): word_ids = Input(shape=(max_seq_length,), dtype='int32', name='word_ids') segment_ids = Input( shape=(max_seq_length,), dtype='int32', name='segment_ids') l2_regularizer = (regularizers.l2(l2_reg_penalty) if l2_reg_penalty else None) embedding_layer = ReusableEmbedding(vocabulary_size, word_embedding_size, input_length=max_seq_length, name='bpe_embeddings', embeddings_regularizer=l2_regularizer) segment_embedding_layer = Embedding(word_embedding_size, max_seq_length, name='segment_embeddings') add_segment_layer = Add(name='add_segment') output_layer = TiedOutputEmbedding(projection_regularizer=l2_regularizer, projection_dropout=embedding_dropout, name='word_prediction_logits') output_softmax_layer = Softmax(name='word_predictions') coordinate_embedding_layer = TransformerCoordinateEmbedding(transformer_depth if use_universal_transformer else 1, name='coordinate_embedding') next_step_input, embedding_matrix = embedding_layer(word_ids) segment_embeddings = segment_embedding_layer(segment_ids) if use_universal_transformer: act_layer = TransformerACT(name='adaptive_computation_time') transformer_block = TransformerBlock( name='transformer', num_heads=num_heads, residual_dropout=transformer_dropout, attention_dropout=transformer_dropout, use_masking=False) act_output = next_step_input for i in range(transformer_depth): next_step_input = coordinate_embedding_layer(next_step_input, step=i) next_step_input = add_segment_layer([next_step_input, segment_embeddings]) next_step_input = transformer_block(next_step_input) next_step_input, act_output = act_layer(next_step_input) act_layer.finalize() next_step_input = act_output else: next_step_input = coordinate_embedding_layer(next_step_input, step=0) next_step_input = add_segment_layer([next_step_input, segment_embeddings]) for i in range(transformer_depth): next_step_input = (TransformerBlock( name='transformer' + str(i), num_heads=num_heads, residual_dropout=transformer_dropout, attention_dropout=transformer_dropout, use_masking=False, vanilla_wiring=True)(next_step_input)) word_predictions = output_softmax_layer(output_layer([next_step_input, embedding_matrix])) cls_node_slice = (Lambda(lambda x: x[:, 0], name='cls_node_slicer')(next_step_input)) class_prediction = (Dense(1, name='class_prediction', activation='sigmoid')(cls_node_slice)) model = Model(inputs=[word_ids, segment_ids], outputs=[word_predictions, class_prediction]) return model
def attention_scaled_dot(activations, attention_mask): #, length): #https://arxiv.org/pdf/1706.03762.pdf units = int(activations.shape[2]) words = int(activations.shape[1]) _drop_rate_ = .1 Q = TimeDistributed(Dense(units, activation=None, use_bias=False))(activations) Q = Dropout(_drop_rate_)(Q) K = TimeDistributed(Dense(units, activation=None, use_bias=False))(activations) K = Dropout(_drop_rate_)(K) V = TimeDistributed(Dense(units, activation=None, use_bias=False))(activations) V = Dropout(_drop_rate_)(V) #print(Q.shape) QK_T = Dot(axes=-1, normalize=False)([Q,K]) # list of two tensors """normalize: Whether to L2-normalize samples along the dot product axis before taking the dot product. If set to True, then the output of the dot product is the cosine proximity between the two samples.""" QK_T = Lambda( lambda inp: inp[0]/ backend.sqrt(backend.cast(shape_list(inp[1])[-1], backend.floatx())))([QK_T, V]) #print(QK_T.shape) # cropping = np.zeros(QK_T.shape[1]) # cropping[length:] = (-10**6) * np.ones(int(QK_T.shape[1])-length) # QK_T = QK_T + cropping attention_mask__ = RepeatVector(int(QK_T.shape[1]))(attention_mask) # print(attention_mask__.shape) QK_T = Add()([QK_T, attention_mask__]) QK_T = Softmax(axis=-1)(QK_T) QK_T = Dropout(_drop_rate_)(QK_T) #print(V.shape) V = Permute([2, 1])(V) #print(V.shape) V_prime = Dot(axes=-1, normalize=False)([QK_T,V]) # list of two tensors #print(V_prime.shape) return V_prime
def build_model(input_shape): """ The function to build the model based on 1.4.0 version. Parameters: input_shape (tuple): The input shape of the model. It should be in the form of (1, ..., ...). Returns: keras.Sequential: The built model. """ input = Input(shape=input_shape[1:]) # Conv Layer 1, Preprocessing layer_1_conv = Conv2D(4, kernel_size=5, kernel_initializer=filters, strides=2, name="Fixed_Filters", trainable=False)(input) layer_1_bn = BatchNormalization()(layer_1_conv) layer_1_output = LeakyReLU(alpha=0.1)(layer_1_bn) # Conv Layer 2, Feature learning layer_2_atrous_conv = Conv2D(5, kernel_size=5, dilation_rate=8)(layer_1_output) layer_2_bn_1 = BatchNormalization()(layer_2_atrous_conv) layer_2_conv = Conv2D(5, kernel_size=4, strides=2)(layer_1_output) layer_2_bn_2 = BatchNormalization()(layer_2_conv) cc_2 = concatenate([layer_2_bn_1, layer_2_bn_2]) layer_2_output = LeakyReLU(alpha=0.1)(cc_2) # Conv Layer 3, Feature learning layer_3_atrous_conv = Conv2D(5, kernel_size=5, dilation_rate=4)(layer_2_output) layer_3_bn_1 = BatchNormalization()(layer_3_atrous_conv) layer_3_conv = Conv2D(5, kernel_size=4, strides=2)(layer_2_output) layer_3_bn_2 = BatchNormalization()(layer_3_conv) cc_3 = concatenate([layer_3_bn_1, layer_3_bn_2]) layer_3_output = LeakyReLU(alpha=0.1)(cc_3) # Fully connected Layers, Binary classification fc_flatten_1 = Flatten()(layer_3_output) fc_dropout_1 = Dropout(0.2)(fc_flatten_1) fc_dense_1 = Dense(200)(fc_dropout_1) fc_activation_1 = LeakyReLU(alpha=0.1)(fc_dense_1) fc_dropout_2 = Dropout(0.2)(fc_activation_1) fc_dense_2 = Dense(200)(fc_dropout_2) fc_activation_2 = LeakyReLU(alpha=0.1)(fc_dense_2) fc_dropout_3 = Dropout(0.2)(fc_activation_2) fc_dense_3 = Dense(2)(fc_dropout_3) output = Softmax()(fc_dense_3) return Model(inputs=input, outputs=output)
def build_onet(self, input_shape=None): if input_shape is None: input_shape = (48, 48, 3) o_inp = Input(input_shape) o_layer = Conv2D(32, kernel_size=(3, 3), strides=(1, 1), padding="valid")(o_inp) o_layer = PReLU(shared_axes=[1, 2])(o_layer) o_layer = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="same")(o_layer) o_layer = Conv2D(64, kernel_size=(3, 3), strides=(1, 1), padding="valid")(o_layer) o_layer = PReLU(shared_axes=[1, 2])(o_layer) o_layer = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="valid")(o_layer) o_layer = Conv2D(64, kernel_size=(3, 3), strides=(1, 1), padding="valid")(o_layer) o_layer = PReLU(shared_axes=[1, 2])(o_layer) o_layer = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="same")(o_layer) o_layer = Conv2D(128, kernel_size=(2, 2), strides=(1, 1), padding="valid")(o_layer) o_layer = PReLU(shared_axes=[1, 2])(o_layer) o_layer = Flatten()(o_layer) o_layer = Dense(256)(o_layer) o_layer = PReLU()(o_layer) o_layer_out1 = Dense(2)(o_layer) o_layer_out1 = Softmax(axis=1)(o_layer_out1) o_layer_out2 = Dense(4)(o_layer) o_layer_out3 = Dense(10)(o_layer) o_net = Model(o_inp, [o_layer_out2, o_layer_out3, o_layer_out1]) return o_net
def build_rnet(self, input_shape=None): if input_shape is None: input_shape = (24, 24, 3) r_inp = Input(input_shape) r_layer = Conv2D(28, kernel_size=(3, 3), strides=(1, 1), padding="valid")(r_inp) r_layer = PReLU(shared_axes=[1, 2])(r_layer) r_layer = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="same")(r_layer) r_layer = Conv2D(48, kernel_size=(3, 3), strides=(1, 1), padding="valid")(r_layer) r_layer = PReLU(shared_axes=[1, 2])(r_layer) r_layer = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="valid")(r_layer) r_layer = Conv2D(64, kernel_size=(2, 2), strides=(1, 1), padding="valid")(r_layer) r_layer = PReLU(shared_axes=[1, 2])(r_layer) r_layer = Flatten()(r_layer) r_layer = Dense(128)(r_layer) r_layer = PReLU()(r_layer) r_layer_out1 = Dense(2)(r_layer) r_layer_out1 = Softmax(axis=1)(r_layer_out1) r_layer_out2 = Dense(4)(r_layer) r_net = Model(r_inp, [r_layer_out2, r_layer_out1]) return r_net
def build_pnet(self, input_shape=None): if input_shape is None: input_shape = (None, None, 3) p_inp = Input(input_shape) p_layer = Conv2D(10, kernel_size=(3, 3), strides=(1, 1), padding="valid")(p_inp) p_layer = PReLU(shared_axes=[1, 2])(p_layer) p_layer = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding="same")(p_layer) p_layer = Conv2D(16, kernel_size=(3, 3), strides=(1, 1), padding="valid")(p_layer) p_layer = PReLU(shared_axes=[1, 2])(p_layer) p_layer = Conv2D(32, kernel_size=(3, 3), strides=(1, 1), padding="valid")(p_layer) p_layer = PReLU(shared_axes=[1, 2])(p_layer) p_layer_out1 = Conv2D(2, kernel_size=(1, 1), strides=(1, 1))(p_layer) p_layer_out1 = Softmax(axis=3)(p_layer_out1) p_layer_out2 = Conv2D(4, kernel_size=(1, 1), strides=(1, 1))(p_layer) p_net = Model(p_inp, [p_layer_out2, p_layer_out1]) return p_net
def create_model(cfg): # Keras Model model = Sequential() # Input layer model.add( InputLayer(batch_input_shape=(None, cfg['input_dim']), name='input')) model.add(GaussianNoise(stddev=cfg['gaussian_noise'])) # Hidden layers for i in range(cfg['num_hidden_layers'] - 1): model.add( Dense(units=cfg['layer_size'], kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(cfg['weight_decay']), name='hidden{}'.format(str(i)))) model.add(Activation(activation=cfg['activation'])) if cfg['batch_normalization']: model.add(BatchNormalization()) model.add(Dropout(rate=cfg['dropout'])) # Output layer model.add( Dense(units=cfg['output_dim'], kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(cfg['weight_decay']), name='output')) if cfg['batch_normalization']: model.add(BatchNormalization()) # Optional softmax layer if cfg['task'] == 'classification': model.add(Softmax()) return model
def train_delinquent(self, epochs=200, lr=1e-5, batch_size=32): # Delinquent refers to a model that is trained without using distillation. # This is used as a baseline to evaluate the benefit of distillation. df_train = pickle.load(open("train_data.p", "rb")) df_test = pickle.load(open("test_data.p", "rb")) g_train = Student.batch_generator(df_train, "label", batch_size=batch_size) g_test = Student.batch_generator(df_test, "label", batch_size=batch_size, shuffle=False) top_model = keras.models.Sequential() top_model.add(Flatten(input_shape=(4, 4, 256))) top_model.add(Dense(200, activation="relu")) top_model.add(Dropout(0.3)) top_model.add(Dense(nbr_classes, activation=None, name="logits")) top_model.add(Softmax()) top_model.compile(loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam(lr=lr), metrics=["acc"]) results = top_model.fit_generator( g_train, steps_per_epoch=len(df_train) // batch_size, validation_data=g_test, validation_steps=len(df_test) // batch_size, epochs=epochs) return (results)
def attention_scaled_dot(activations): #https://arxiv.org/pdf/1706.03762.pdf units = int(activations.shape[2]) words = int(activations.shape[1]) Q = TimeDistributed(Dense(units, activation=None, use_bias=False))(activations) Q = Dropout(.2)(Q) K = TimeDistributed(Dense(units, activation=None, use_bias=False))(activations) K = Dropout(.2)(K) V = TimeDistributed(Dense(units, activation=None, use_bias=False))(activations) V = Dropout(.2)(V) #print(Q.shape) QK_T = Dot(axes=-1, normalize=False)([Q, K]) # list of two tensors #print(QK_T.shape) QK_T = Lambda(lambda inp: inp[0] / backend.sqrt( backend.cast(shape_list(inp[1])[-1], backend.floatx())))([QK_T, V]) #print(QK_T.shape) QK_T = Softmax(axis=-1)(QK_T) QK_T = Dropout(.2)(QK_T) #print(V.shape) V = Permute([2, 1])(V) #print(V.shape) V_prime = Dot(axes=-1, normalize=False)([QK_T, V]) # list of two tensors #print(V_prime.shape) return V_prime
def run(): args = get_args() # get the jets out of the input file. with h5py.File(args.input_file, 'r') as infile: jets = np.asarray(infile['jets']) # first, let's make the training dataset! input_data = preproc_inputs(jets) targets = make_targets(jets) # now make the network from keras.layers import Input, Dense, Softmax from keras.models import Model input_node = Input(shape=(2, )) dense = Dense(3)(input_node) pred = Softmax()(dense) model = Model(inputs=input_node, outputs=pred) model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy']) # now fit this thing! model.fit(input_data, targets, epochs=args.epochs) # finally, save the trained network odir = args.output_dir if not os.path.isdir(odir): os.mkdir(odir) with open(f'{odir}/architecture.json', 'w') as arch_file: arch_file.write(model.to_json(indent=2)) model.save_weights(f'{odir}/weights.h5')
def build_model(x_shape, num_classes): inputs = Input(shape=x_shape) x = Conv2D(16, kernel_size=(5, 5), input_shape=(250, 16, 1), padding='same', use_bias=True)(inputs) #x = BatchNormalization()(x) x = LeakyReLU(alpha=0.2)(x) x = MaxPooling2D((2, 2), padding='same')(x) x = Dropout(0.2)(x) x = Conv2D(2 * 16, (5, 5), padding='same', use_bias=True)(x) #x = BatchNormalization()(x) x = LeakyReLU(alpha=0.2)(x) x = MaxPooling2D(pool_size=(2, 2), padding='same')(x) x = Dropout(0.1)(x) x = Conv2D(4 * 16, (5, 5), padding='same', use_bias=True)(x) #x = BatchNormalization()(x) x = LeakyReLU(alpha=0.2)(x) x = MaxPooling2D(pool_size=(2, 2), padding='same')(x) x = Flatten()(x) x = Dense(64)(x) x = ReLU()(x) x = Dense(32)(x) x = ReLU()(x) x = Dense(16)(x) x = ReLU()(x) x = Dense(num_classes)(x) predictions = Softmax()(x) model = Model(inputs=inputs, outputs=predictions) return model
def build_actor_baseline(self): layers = self.options.layers states = Input(shape=self.state_size) z = states for l in layers[:-1]: z = Dense(l, activation='relu')(z) # actor and critic heads have a seperated final fully connected layer z_a = Dense(layers[-1], activation='tanh')(z) z_a = Dense(self.env.action_space.n, activation='tanh')(z_a) z_b = Dense(layers[-1], activation='relu')(z) probs = Softmax(name='actor_output')(z_a) baseline = Dense(1, activation='linear', name='baseline_output')(z_b) model = Model(inputs=[states], outputs=[probs, baseline]) model.compile(optimizer=Adam(lr=self.options.alpha), loss={ 'actor_output': pg_loss(), 'baseline_output': losses.MeanSquaredError() }, loss_weights={ 'actor_output': 1.0, 'baseline_output': 1.0 }) return model
def Create_Model(): # Model Creation model1 = Input(shape=(XDIM, YDIM, TIMESTEPS, 1)) # 1st Convolution Layer model1a = Conv3D(kernel_size = (3, 3, 5), strides = (2, 2, 4), filters=16, name="Conv1")(model1) model1c = Activation('elu')(model1a) # Small Receptive Field (SRF) modelSRF = create_receptive_field(SRF_SIZE, SRF_STRIDES, model1c, 'SRF') # Medium Receptive Field (MRF) modelMRF = create_receptive_field(MRF_SIZE, MRF_STRIDES, model1c, 'MRF') # Large Receptive Field (LRF) modelLRF = create_receptive_field(LRF_SIZE, LRF_STRIDES, model1c, 'LRF') # Add the layers - This sums each layer final = Add()([modelSRF, modelMRF, modelLRF]) out = Softmax()(final) model = Model(inputs=model1, outputs=out) return model
def define_model(self, model_name): def conv_block(_input, filters, kernel_size, strides, name_prefix): _x = Conv2D(filters=filters, kernel_size=kernel_size, strides=strides, padding='same', name=f'{name_prefix}conv')(_input) _x = BatchNormalization()(_x) _x = ReLU()(_x) _x = MaxPool2D(pool_size=2)(_x) return _x _input = Input(shape=(28, 28, 1), name=f'{model_name}_input') # (28, 28, 1) x = conv_block(_input, filters=4, kernel_size=3, strides=1, name_prefix='conv_block_1') # (14, 14, 4) x = conv_block(x, filters=8, kernel_size=3, strides=1, name_prefix='conv_block_2') # (7, 7, 8) x = Flatten()(x) # (392,) x = Dense(64)(x) # (64,) x = Dense(10)(x) # (10,) x = Softmax()(x) model = Model(inputs=_input, outputs=x, name=model_name) return model