def get_test_model_full(): """Returns a maximally complex test model, using all supported layer types with different parameter combination. """ input_shapes = [ (26, 28, 3), (4, 4, 3), (4, 4, 3), (4, ), (2, 3), (27, 29, 1), (17, 1), (17, 4), (2, 3), ] inputs = [Input(shape=s) for s in input_shapes] outputs = [] for inp in inputs[6:8]: for padding in ['valid', 'same']: for s in range(1, 6): for out_channels in [1, 2]: for d in range(1, 4): outputs.append( Conv1D(out_channels, s, padding=padding, dilation_rate=d)(inp)) for padding_size in range(0, 5): outputs.append(ZeroPadding1D(padding_size)(inp)) for crop_left in range(0, 2): for crop_right in range(0, 2): outputs.append(Cropping1D((crop_left, crop_right))(inp)) for upsampling_factor in range(1, 5): outputs.append(UpSampling1D(upsampling_factor)(inp)) for padding in ['valid', 'same']: for pool_factor in range(1, 6): for s in range(1, 4): outputs.append( MaxPooling1D(pool_factor, strides=s, padding=padding)(inp)) outputs.append( AveragePooling1D(pool_factor, strides=s, padding=padding)(inp)) outputs.append(GlobalMaxPooling1D()(inp)) outputs.append(GlobalAveragePooling1D()(inp)) for inp in [inputs[0], inputs[5]]: for padding in ['valid', 'same']: for h in range(1, 6): for out_channels in [1, 2]: for d in range(1, 4): outputs.append( Conv2D(out_channels, (h, 1), padding=padding, dilation_rate=(d, 1))(inp)) outputs.append( SeparableConv2D(out_channels, (h, 1), padding=padding, dilation_rate=(d, 1))(inp)) for sy in range(1, 4): outputs.append( Conv2D(out_channels, (h, 1), strides=(1, sy), padding=padding)(inp)) outputs.append( SeparableConv2D(out_channels, (h, 1), strides=(sy, sy), padding=padding)(inp)) for sy in range(1, 4): outputs.append( DepthwiseConv2D((h, 1), strides=(sy, sy), padding=padding)(inp)) outputs.append( MaxPooling2D((h, 1), strides=(1, sy), padding=padding)(inp)) for w in range(1, 6): for out_channels in [1, 2]: for d in range(1, 4) if sy == 1 else [1]: outputs.append( Conv2D(out_channels, (1, w), padding=padding, dilation_rate=(1, d))(inp)) outputs.append( SeparableConv2D(out_channels, (1, w), padding=padding, dilation_rate=(1, d))(inp)) for sx in range(1, 4): outputs.append( Conv2D(out_channels, (1, w), strides=(sx, 1), padding=padding)(inp)) outputs.append( SeparableConv2D(out_channels, (1, w), strides=(sx, sx), padding=padding)(inp)) for sx in range(1, 4): outputs.append( DepthwiseConv2D((1, w), strides=(sy, sy), padding=padding)(inp)) outputs.append( MaxPooling2D((1, w), strides=(1, sx), padding=padding)(inp)) outputs.append(ZeroPadding2D(2)(inputs[0])) outputs.append(ZeroPadding2D((2, 3))(inputs[0])) outputs.append(ZeroPadding2D(((1, 2), (3, 4)))(inputs[0])) outputs.append(Cropping2D(2)(inputs[0])) outputs.append(Cropping2D((2, 3))(inputs[0])) outputs.append(Cropping2D(((1, 2), (3, 4)))(inputs[0])) for y in range(1, 3): for x in range(1, 3): outputs.append(UpSampling2D(size=(y, x))(inputs[0])) outputs.append(GlobalAveragePooling2D()(inputs[0])) outputs.append(GlobalMaxPooling2D()(inputs[0])) outputs.append(AveragePooling2D((2, 2))(inputs[0])) outputs.append(MaxPooling2D((2, 2))(inputs[0])) outputs.append(UpSampling2D((2, 2))(inputs[0])) outputs.append(keras.layers.concatenate([inputs[0], inputs[0]])) outputs.append(Dropout(0.5)(inputs[0])) outputs.append(BatchNormalization()(inputs[0])) outputs.append(BatchNormalization(center=False)(inputs[0])) outputs.append(BatchNormalization(scale=False)(inputs[0])) outputs.append(Conv2D(2, (3, 3), use_bias=True)(inputs[0])) outputs.append(Conv2D(2, (3, 3), use_bias=False)(inputs[0])) outputs.append(SeparableConv2D(2, (3, 3), use_bias=True)(inputs[0])) outputs.append(SeparableConv2D(2, (3, 3), use_bias=False)(inputs[0])) outputs.append(DepthwiseConv2D(2, (3, 3), use_bias=True)(inputs[0])) outputs.append(DepthwiseConv2D(2, (3, 3), use_bias=False)(inputs[0])) outputs.append(Dense(2, use_bias=True)(inputs[3])) outputs.append(Dense(2, use_bias=False)(inputs[3])) shared_conv = Conv2D(1, (1, 1), padding='valid', name='shared_conv', activation='relu') up_scale_2 = UpSampling2D((2, 2)) x1 = shared_conv(up_scale_2(inputs[1])) # (1, 8, 8) x2 = shared_conv(up_scale_2(inputs[2])) # (1, 8, 8) x3 = Conv2D(1, (1, 1), padding='valid')(up_scale_2(inputs[2])) # (1, 8, 8) x = keras.layers.concatenate([x1, x2, x3]) # (3, 8, 8) outputs.append(x) x = Conv2D(3, (1, 1), padding='same', use_bias=False)(x) # (3, 8, 8) outputs.append(x) x = Dropout(0.5)(x) outputs.append(x) x = keras.layers.concatenate( [MaxPooling2D((2, 2))(x), AveragePooling2D((2, 2))(x)]) # (6, 4, 4) outputs.append(x) x = Flatten()(x) # (1, 1, 96) x = Dense(4, use_bias=False)(x) outputs.append(x) x = Dense(3)(x) # (1, 1, 3) outputs.append(x) outputs.append(Activation(relu6)(inputs[7])) outputs.append(keras.layers.Add()([inputs[4], inputs[8], inputs[8]])) outputs.append(keras.layers.Subtract()([inputs[4], inputs[8]])) outputs.append(keras.layers.Multiply()([inputs[4], inputs[8], inputs[8]])) outputs.append(keras.layers.Average()([inputs[4], inputs[8], inputs[8]])) outputs.append(keras.layers.Maximum()([inputs[4], inputs[8], inputs[8]])) outputs.append( keras.layers.Concatenate()([inputs[4], inputs[8], inputs[8]])) intermediate_input_shape = (3, ) intermediate_in = Input(intermediate_input_shape) intermediate_x = intermediate_in intermediate_x = Dense(8)(intermediate_x) intermediate_x = Dense(5)(intermediate_x) intermediate_model = Model(inputs=[intermediate_in], outputs=[intermediate_x], name='intermediate_model') intermediate_model.compile(loss='mse', optimizer='nadam') x = intermediate_model(x) # (1, 1, 5) intermediate_model_2 = Sequential() intermediate_model_2.add(Dense(7, input_shape=(5, ))) intermediate_model_2.add(Dense(5)) intermediate_model_2.compile(optimizer='rmsprop', loss='categorical_crossentropy') x = intermediate_model_2(x) # (1, 1, 5) x = Dense(3)(x) # (1, 1, 3) shared_activation = Activation('tanh') outputs = outputs + [ Activation('tanh')(inputs[3]), Activation('hard_sigmoid')(inputs[3]), Activation('selu')(inputs[3]), Activation('sigmoid')(inputs[3]), Activation('softplus')(inputs[3]), Activation('softmax')(inputs[3]), Activation('relu')(inputs[3]), LeakyReLU()(inputs[3]), ELU()(inputs[3]), shared_activation(inputs[3]), inputs[4], inputs[1], x, shared_activation(x), ] print('Model has {} outputs.'.format(len(outputs))) model = Model(inputs=inputs, outputs=outputs, name='test_model_full') model.compile(loss='mse', optimizer='nadam') # fit to dummy data training_data_size = 1 batch_size = 1 epochs = 10 data_in = generate_input_data(training_data_size, input_shapes) data_out = generate_output_data(training_data_size, outputs) model.fit(data_in, data_out, epochs=epochs, batch_size=batch_size) return model
embeddings_sequences = embeddings_layer(inputs) graphconv = GraphConv(filters=64, neighbors_ix_mat=q_mat_layer1, num_neighbors=12, activation='relu') self_attention = SeqSelfAttention(attention_activation='relu', name='self_attention')(embeddings_sequences) output = Conv1D(filters, filter_size, padding='valid', activation='relu', strides=1)(self_attention) dropout = Dropout(0.25)(output) output = GlobalAveragePooling1D()(dropout) dropout = Dropout(0.25)(output) # output=Dense(64,activation='relu')(dropout) print(output) output = Dense(1, activation='sigmoid')(output) model = Model(inputs=inputs, outputs=[output]) model.summary() model.compile(loss='binary_crossentropy', optimizer=Adam(0.0001), metrics=['accuracy']) checkpoint_filepath = 'E:/DeepLearning/bully_code/diyu/indrnn.h5' checkpoint = ModelCheckpoint(checkpoint_filepath, monitor='acc',
print(max_features) embeddingWeights = load_pkl('data/glove_embeddings/glove_weights') #input_layer = Input(shape=(1,), dtype='int32') # embedding = Embedding(max_features+1, # embedding_dims, # input_length=maxlen, # weights=[embeddingWeights]) embedding = Embedding(max_features + 1, embedding_dims, input_length=maxlen) model.add(embedding) # we add a GlobalAveragePooling1D, which will average the embeddings # of all words in the document model.add(GlobalAveragePooling1D()) # We project onto a single unit output layer, and squash it with a sigmoid: #model.add(Dropout(0.5)) model.add(Dense(64, activation='relu')) model.add(Dense(13, activation='sigmoid')) adam = Adam(lr=0.01, decay=1e-3) model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy']) #model.load_weights('data/weights/fasttext_uni.h5') #model.load_weights('data/glove_embeddings/best_weights_glove.h5') #scores = model.predict(x_train) #dump_pkl((y_train, scores), 'train_pred_fasttext') from keras.callbacks import ModelCheckpoint, RemoteMonitor print(max_features)
def get_pooling(x): avg_pool_x = GlobalAveragePooling1D()(x) max_pool_x = GlobalMaxPooling1D()(x) return avg_pool_x, max_pool_x
def build_model(self): # Create joint embedding layer (decay strings) decstr_embedding = Embedding( self.num_pdg_codes, 8, input_length=self.shape_dict['decay_input'][1], ) # Network to process decay string decay_input = Input(shape=self.shape_dict['decay_input'][1:], name='decay_input') decay_embed = decstr_embedding(decay_input) # Build wide CNN for decay string processing wide_layers = [] for i in range(4, 10): layer_w = self._conv1D_node( decay_embed, filters=32, kernel_size=i, ) # layer_w = self._conv1D_node( # layer_w, # filters=32, # kernel_size=i, # ) layer_w = GlobalAveragePooling1D()(layer_w) wide_layers.append(layer_w) # Put it all together, outputs 4xfilter_size = 128 # decay_l = concatenate([decay_4, decay_5, decay_6, decay_7, decay_8, decay_9], axis=-1) decay_l = concatenate(wide_layers, axis=-1) # decay_l = Add()(wide_layers) # decay_l = Dropout(0.4)(decay_l) decay_l = Dense(128)(decay_l) decay_l = LeakyReLU()(decay_l) # decay_l = Dropout(0.3)(decay_l) decay_l = Dense(8)(decay_l) decay_output = LeakyReLU()(decay_l) # Create joint embedding layer pdg_embedding = Embedding( self.num_pdg_codes, 8, input_length=self.shape_dict['pdg_input'][1], ) # Network to process individual particles particle_input = Input(shape=self.shape_dict['particle_input'][1:], name='particle_input') # Embed PDG codes pdg_input = Input(shape=self.shape_dict['pdg_input'][1:], name='pdg_input') mother_pdg_input = Input(shape=self.shape_dict['mother_pdg_input'][1:], name='mother_pdg_input') pdg_l = pdg_embedding(pdg_input) mother_pdg_l = pdg_embedding(mother_pdg_input) # Put all the particle particle_l = concatenate([particle_input, pdg_l, mother_pdg_l], axis=-1) particle_l = self._resnet_node(particle_l, kernels=1, filters=64) particle_l = self._resnet_node(particle_l, kernels=1, filters=64) particle_l = self._resnet_node(particle_l, kernels=1, filters=64) particle_l = self._resnet_node(particle_l, kernels=1, filters=64) particle_l = self._resnet_node(particle_l, kernels=3, filters=128) particle_l = self._resnet_node(particle_l, kernels=3, filters=128) particle_l = self._resnet_node(particle_l, kernels=3, filters=128) # particle_l = self._resnet_node(particle_l, kernels=3, filters=128, pool='avg') particle_l = self._resnet_node(particle_l, kernels=3, filters=128) particle_l = self._resnet_node(particle_l, kernels=3, filters=64) particle_l = self._resnet_node(particle_l, kernels=3, filters=64) particle_l = self._resnet_node(particle_l, kernels=3, filters=64) particle_l = self._resnet_node(particle_l, kernels=3, filters=64) # for i in range(4): # particle_l = self._conv1D_node( # particle_l, # filters=64, # kernel_size=1, # # dropout=0 # ) # particle_m = particle_l # particle_m = self.conv1D_avg_node( # particle_m, # filters=64, # kernel_size=4, # # pool='avg', # # dropout=0.4 # ) # # for i in range(2): # # # particle_m = self._conv1D_node( # # particle_m = self.conv1D_avg_node( # # particle_m, # # filters=64, # # kernel_size=4, # # # dropout=0.4 # # ) # for i in range(3): # particle_m = self._conv1D_node( # particle_m, # filters=64, # kernel_size=3, # # dropout=0.4 # ) # Add residual loopback to stop vanishing gradients # particle_l = Add()([particle_l, particle_m]) # Compress # particle_output = AveragePooling1D(pool_size=2)(particle_l) # Flatten to feed to dense # particle_l = Flatten()(particle_3) particle_output = GlobalAveragePooling1D()(particle_l) # particle_l = Dense(256)(particle_l) # particle_l = LeakyReLU()(particle_l) # particle_l = Dropout(0.5)(particle_l) # particle_l = Dense(128)(particle_l) # particle_l = LeakyReLU()(particle_l) # particle_output = Dropout(0.5)(particle_l) # Finally, combine the two networks comb_l = concatenate([decay_output, particle_output], axis=-1) comb_l = Dense(128)(comb_l) comb_l = LeakyReLU()(comb_l) comb_l = Dropout(0.3)(comb_l) comb_l = Dense(64)(comb_l) comb_l = LeakyReLU()(comb_l) # comb_l = Dropout(0.3)(comb_l) comb_l = Dense(32)(comb_l) comb_l = LeakyReLU()(comb_l) comb_output = Dense(1, activation='sigmoid', name='y_output')(comb_l) # Instantiate the cnn model model = Model( inputs=[decay_input, particle_input, pdg_input, mother_pdg_input], outputs=comb_output, name='combined-1x1-ResNet') # Finally compile the model model.compile( loss='binary_crossentropy', optimizer=self.optimizer, metrics=['accuracy'], ) model.summary() self.model = model
print(len(x_test), 'test sequences') print('Padding sequences...') x_train = sequence.pad_sequences(x_train, maxlen=max_seq_len) x_test = sequence.pad_sequences(x_test, maxlen=max_seq_len) print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) from keras.models import Model from keras.layers import Input, Embedding, GlobalAveragePooling1D, Dropout, Dense S_inputs = Input(shape=(max_seq_len, ), dtype='int32') emb_seq = Embedding(max_features + 1, 128)(S_inputs) emb_seq = PositionEncoding()(emb_seq) # 增加pos enc能轻微提高准确率 O_seq = MultiHeadAttn(8, 16)([emb_seq, emb_seq, emb_seq]) O_seq = GlobalAveragePooling1D()(O_seq) O_seq = Dropout(0.5)(O_seq) outputs = Dense(1, activation='sigmoid')(O_seq) model = Model(inputs=S_inputs, outputs=outputs) # try using different optimizers and different optimizer configs model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() print('Train...') model.fit(x_train, y_train, batch_size=batch_size,
def build_model(self): # Create joint embedding layer pdg_embedding = Embedding( self.num_pdg_codes, 8, input_length=self.shape_dict['pdg_input'][1], ) # Network to process individual particles particle_input = Input(shape=self.shape_dict['particle_input'][1:], name='particle_input') # Embed PDG codes pdg_input = Input(shape=self.shape_dict['pdg_input'][1:], name='pdg_input') mother_pdg_input = Input(shape=self.shape_dict['mother_pdg_input'][1:], name='mother_pdg_input') pdg_l = pdg_embedding(pdg_input) mother_pdg_l = pdg_embedding(mother_pdg_input) # Put all the particle particle_l = concatenate([particle_input, pdg_l, mother_pdg_l], axis=-1) particle_l = self.conv1D_avg_node( particle_l, filters=64, kernel_size=3, pool='avg', ) particle_l = self.conv1D_avg_node( particle_l, filters=64, kernel_size=3, # pool='avg', ) particle_l = self.conv1D_avg_node( particle_l, filters=64, kernel_size=3, # pool='avg', ) # particle_l = self.conv1D_avg_node( # particle_l, # filters=64, # kernel_size=3, # # pool='avg', # ) # Flatten (not really) particle_output = GlobalAveragePooling1D()(particle_l) # particle_l = Dense(32, kernel_initializer='uniform')(particle_l) # particle_l = BatchNormalization()(particle_l) # particle_l = LeakyReLU()(particle_l) # particle_output = Dropout(0.4)(particle_l) # Finally, combine the two networks # comb_l = concatenate([decay_output, particle_output], axis=-1) comb_l = Dense(512)(particle_output) comb_l = LeakyReLU()(comb_l) comb_l = Dropout(0.4)(comb_l) comb_l = Dense(128)(comb_l) comb_l = LeakyReLU()(comb_l) # comb_l = Dropout(0.4)(comb_l) # comb_l = Dense(256)(comb_l) # comb_l = LeakyReLU()(comb_l) comb_output = Dense(1, activation='sigmoid', name='y_output')(comb_l) # Instantiate the cnn model model = Model(inputs=[particle_input, pdg_input, mother_pdg_input], outputs=comb_output, name='particles-CNN-vanilla') # Finally compile the model model.compile( loss='binary_crossentropy', optimizer=self.optimizer, metrics=['accuracy'], ) model.summary() self.model = model
def build_model(self): # Create joint embedding layer (decay strings) decstr_embedding = Embedding( self.num_pdg_codes, 8, input_length=self.shape_dict['decay_input'][1], ) # Network to process decay string decay_input = Input(shape=self.shape_dict['decay_input'][1:], name='decay_input') decay_embed = decstr_embedding(decay_input) # Build wide CNN for decay string processing wide_layers = [] for i in range(4, 10): layer_w = self._conv1D_node( decay_embed, filters=64, kernel_size=i, ) layer_w = GlobalAveragePooling1D()(layer_w) wide_layers.append(layer_w) # Put it all together, outputs 4xfilter_size = 128 decay_l = concatenate(wide_layers, axis=-1) # decay_l = Dropout(0.4)(decay_l) decay_l = Dense(128)(decay_l) decay_l = LeakyReLU()(decay_l) # decay_l = Dropout(0.3)(decay_l) decay_l = Dense(32)(decay_l) decay_output = LeakyReLU()(decay_l) # Create joint embedding layer pdg_embedding = Embedding( self.num_pdg_codes, 8, input_length=self.shape_dict['pdg_input'][1], ) # Network to process individual particles particle_input = Input(shape=self.shape_dict['particle_input'][1:], name='particle_input') # Embed PDG codes pdg_input = Input(shape=self.shape_dict['pdg_input'][1:], name='pdg_input') mother_pdg_input = Input(shape=self.shape_dict['mother_pdg_input'][1:], name='mother_pdg_input') pdg_l = pdg_embedding(pdg_input) mother_pdg_l = pdg_embedding(mother_pdg_input) # Put all the particle particle_l = concatenate([particle_input, pdg_l, mother_pdg_l], axis=-1) # Node 1 particle_l = self.conv1D_avg_node( particle_l, filters=64, kernel_size=3, # pool='avg', # dropout=0.3 batchnorm=False, ) # for i in range(2): # particle_l = self.conv1D_avg_node( # particle_l, # filters=64, # kernel_size=3, # # dropout=0.3 # batchnorm=False, # ) # Compress # particle_l = AveragePooling1D(pool_size=2)(particle_l) # # Node 2 # for i in range(2): # particle_l = self._conv1D_node( # particle_l, # filters=64, # kernel_size=3, # # dropout=0.3 # ) # # Compress # # particle_l = AveragePooling1D(pool_size=2)(particle_l) # # Node 3 # for i in range(2): # particle_l = self._conv1D_node( # particle_l, # filters=64, # kernel_size=3, # # dropout=0.3 # ) # # Compress # particle_l = AveragePooling1D(pool_size=2)(particle_l) # # Node 4 # for i in range(2): # particle_l = self._conv1D_node( # particle_l, # filters=256, # kernel_size=3, # # dropout=0.3 # ) # # Compress # # particle_l = AveragePooling1D(pool_size=2)(particle_l) # kernel=3, flatten particle_output = GlobalAveragePooling1D()(particle_l) # Finally, combine the two networks comb_l = concatenate([decay_output, particle_output], axis=-1) comb_l = Dense(512)(comb_l) comb_l = LeakyReLU()(comb_l) # comb_l = Dropout(0.3)(comb_l) # comb_l = Dense(512)(comb_l) # comb_l = LeakyReLU()(comb_l) # # comb_l = Dropout(0.3)(comb_l) # comb_l = Dense(512)(comb_l) # comb_l = LeakyReLU()(comb_l) comb_output = Dense(1, activation='sigmoid', name='y_output')(comb_l) # Instantiate the cnn model model = Model( inputs=[decay_input, particle_input, pdg_input, mother_pdg_input], outputs=comb_output, name='vanilla-wideCNN') # Finally compile the model model.compile( loss='binary_crossentropy', optimizer=self.optimizer, metrics=['accuracy'], ) model.summary() self.model = model
def build_model(self): # Create joint embedding layer pdg_embedding = Embedding( self.num_pdg_codes, 8, input_length=self.shape_dict['pdg_input'][1], ) # Network to process individual particles particle_input = Input(shape=self.shape_dict['particle_input'][1:], name='particle_input') # Embed PDG codes pdg_input = Input(shape=self.shape_dict['pdg_input'][1:], name='pdg_input') mother_pdg_input = Input(shape=self.shape_dict['mother_pdg_input'][1:], name='mother_pdg_input') pdg_l = pdg_embedding(pdg_input) mother_pdg_l = pdg_embedding(mother_pdg_input) # Put all the particle particle_l = concatenate([particle_input, pdg_l, mother_pdg_l], axis=-1) particle_l = self._conv1D_node(particle_l, filters=64, kernel_size=7) # Should add maxpool here -- don't need, just reduces input size # particle_l = MaxPooling1D(pool_size=3, strides=2)(particle_l) # Block 1 particle_l = self._resnet50_node(particle_l, filters=64) particle_l = self._resnet50_node(particle_l, filters=64) particle_l = self._resnet50_node(particle_l, filters=64) # Block 2 # particle_l = self.conv1D_avg_node(particle_l, filters=128, kernel_size=3) particle_l = self._resnet50_node(particle_l, filters=128) particle_l = self._resnet50_node(particle_l, filters=128) particle_l = self._resnet50_node(particle_l, filters=128) particle_l = self._resnet50_node(particle_l, filters=128, pool='avg') # Block 3 # particle_l = self.conv1D_avg_node(particle_l, filters=256, kernel_size=3) particle_l = self._resnet50_node(particle_l, filters=256) particle_l = self._resnet50_node(particle_l, filters=256) particle_l = self._resnet50_node(particle_l, filters=256) particle_l = self._resnet50_node(particle_l, filters=256) particle_l = self._resnet50_node(particle_l, filters=256) particle_l = self._resnet50_node(particle_l, filters=256, pool='avg') # Block 4 # particle_l = self.conv1D_avg_node(particle_l, filters=512, kernel_size=3) particle_l = self._resnet50_node(particle_l, filters=512) particle_l = self._resnet50_node(particle_l, filters=512) particle_l = self._resnet50_node(particle_l, filters=512) # Flatten (not really) particle_output = GlobalAveragePooling1D()(particle_l) # Finally, combine the two networks # comb_l = concatenate([decay_output, particle_output], axis=-1) comb_l = Dense(512, activation='softmax')(particle_output) # comb_l = LeakyReLU()(comb_l) comb_l = Dropout(0.5)(comb_l) comb_l = Dense(128, activation='softmax')(comb_l) # comb_l = LeakyReLU()(comb_l) # comb_l = Dropout(0.4)(comb_l) # comb_l = Dense(256)(comb_l) # comb_l = LeakyReLU()(comb_l) comb_output = Dense(1, activation='sigmoid', name='y_output')(comb_l) # Instantiate the cnn model model = Model(inputs=[particle_input, pdg_input, mother_pdg_input], outputs=comb_output, name='particles-ResNet100') # Finally compile the model model.compile( loss='binary_crossentropy', optimizer=self.optimizer, metrics=['accuracy'], ) model.summary() self.model = model
def origin(X): model = Sequential() # 1st layer model.add( Conv1D(input_shape=(X.shape[1], 1), filters=1, kernel_size=5, padding='same')) model.add(Conv1D(8, 1, padding='same')) model.add(Conv1D(8, 3, strides=2, padding='same')) # 2nd layer model.add(Conv1D(8, 5, padding='same')) model.add(Conv1D(16, 1, padding='same')) model.add(Conv1D(16, 3, strides=2, padding='same')) # 3rd layer model.add(Conv1D(16, 5, padding='same')) model.add(Activation('tanh')) model.add(Conv1D(32, 1, padding='same')) model.add(Activation('tanh')) model.add(MaxPooling1D(pool_size=3, strides=2, padding='same')) # 4th layer model.add(Conv1D(32, 5, padding='same')) model.add(Activation('tanh')) model.add(Conv1D(64, 1, padding='same')) model.add(Activation('tanh')) model.add(MaxPooling1D(pool_size=3, strides=2, padding='same')) # 5th layer model.add(Conv1D(64, 5, padding='same')) model.add(Activation('tanh')) model.add(Conv1D(128, 1, padding='same')) model.add(Activation('tanh')) model.add(MaxPooling1D(pool_size=3, strides=2, padding='same')) # 6th layer model.add(Conv1D(128, 5, padding='same')) model.add(Activation('tanh')) model.add(Conv1D(256, 1, padding='same')) model.add(Activation('tanh')) model.add(MaxPooling1D(pool_size=3, strides=2, padding='same')) # 7th layer model.add(Conv1D(256, 5, padding='same')) model.add(Activation('tanh')) model.add(Conv1D(512, 1, padding='same')) model.add(Activation('tanh')) model.add(GlobalAveragePooling1D(name='pooling')) #model.add(Dropout(0.5)) # 8th layer(FC & softmax) # model.add(Flatten()) model.add(Dense(2, activation='softmax')) model.summary() return model
def model_1(X): model = Sequential() # 1st layer model.add( Conv1D(input_shape=(X.shape[1], 1), filters=1, kernel_size=5, padding='same')) model.add(Conv1D(8, 1, padding='same')) # model.add(Lambda(abs)) model.add(ThresholdedReLU(theta=1.0)) # model.add(Conv1D(8,3,strides=2,padding='same')) # 2nd layer model.add(Conv1D(8, 5, padding='same')) model.add(Conv1D(16, 1, padding='same')) model.add(Activation('relu')) # model.add(PReLU()) # model.add(Conv1D(16,3,strides=2,padding='same')) # 3rd layer model.add(Conv1D(16, 5, padding='same')) # model.add(PReLU()) model.add(Activation('relu')) model.add(Conv1D(32, 1, padding='same')) # model.add(PReLU()) model.add(Activation('relu')) model.add(AveragePooling1D(pool_size=3, strides=2, padding='same')) # model.add(MaxPooling1D(pool_size=3,strides=2,padding='same')) # 4th layer model.add(Conv1D(32, 5, padding='same')) # model.add(PReLU()) model.add(Activation('relu')) model.add(Conv1D(64, 1, padding='same')) # model.add(PReLU()) model.add(Activation('relu')) model.add(AveragePooling1D(pool_size=3, strides=2, padding='same')) # model.add(MaxPooling1D(pool_size=3,strides=2,padding='same')) # 5th layer model.add(Conv1D(64, 5, padding='same')) # model.add(PReLU()) model.add(Activation('relu')) model.add(Conv1D(128, 1, padding='same')) # model.add(PReLU()) model.add(Activation('relu')) model.add(AveragePooling1D(pool_size=3, strides=2, padding='same')) # model.add(MaxPooling1D(pool_size=3,strides=2,padding='same')) # 6th layer model.add(Conv1D(128, 5, padding='same')) # model.add(PReLU()) model.add(Activation('relu')) model.add(Conv1D(256, 1, padding='same')) # model.add(PReLU()) model.add(Activation('relu')) model.add(AveragePooling1D(pool_size=3, strides=2, padding='same')) # model.add(MaxPooling1D(pool_size=3,strides=2,padding='same')) # 7th layer model.add(Conv1D(256, 5, padding='same')) model.add(Activation('relu')) model.add(Conv1D(512, 1, padding='same')) model.add(Activation('relu')) # model.add(LSTM(50,return_sequences=False)) model.add(GlobalAveragePooling1D()) # model.add(LSTM(50,return_sequences=False)) # # model.add(Dense(128)) # model.add(Dropout(0.25)) # model.add(Dense(128)) # 8th layer(FC & softmax) model.add(Dense(2, activation='softmax')) model.summary() return model
def main(): #print parameters print("Using path {0}".format(args.path)) if args.mbti_index == 0: print("Using character index {0} for MBTI (E/I)".format(args.mbti_index)) if args.mbti_index == 1: print("Using character index {0} for MBTI (S/N)".format(args.mbti_index)) if args.mbti_index == 2: print("Using character index {0} for MBTI (T/F)".format(args.mbti_index)) if args.mbti_index == 3: print("Using character index {0} for MBTI (J/P)".format(args.mbti_index)) #read and split data print("\nLoading data...") #X, y = read_pickle() #enable when reading from pickle X, y = read_data() X_train, X_dev, X_test, y_train, y_dev, y_test = split_data(X, y) #convert words to indices X_train_num, X_dev_num, X_test_num, w2i, PAD = word2index(X_train, X_dev, X_test) #add paddings to X max_sentence_length = max([len(s) for s in X_train] + [len(s) for s in X_dev] + [len(s) for s in X_test]) X_train_pad = sequence.pad_sequences(X_train_num, maxlen=max_sentence_length, value=PAD) X_dev_pad = sequence.pad_sequences(X_dev_num, maxlen=max_sentence_length, value=PAD) X_test_pad = sequence.pad_sequences(X_test_num, maxlen=max_sentence_length,value=PAD) #transform y y_train_binary, y_dev_binary, y_test_binary = transform_y(y_train, y_dev, y_test) num_classes = len(np.unique(y_train_binary)) vocab_size = len(w2i) embeds_size = 64 #STATISTICS print("\nStatistics:") print("Max sentence length:", max_sentence_length) #debug print("Vocab size:", vocab_size) #debug #print("X_train_pad:\n", X_train_pad[-1]) #debug #print("X_test_pad:\n", X_test_pad[-1]) #debug #number of 0/1 labels in train, test and dev data print("# train 0:", np.count_nonzero(y_train_binary == 0)) print("# train 1:", np.count_nonzero(y_train_binary == 1)) print("# dev 0:", np.count_nonzero(y_dev_binary == 0)) print("# dev 1:", np.count_nonzero(y_dev_binary == 1)) print("# test 0:", np.count_nonzero(y_test_binary == 0)) print("# test 1:", np.count_nonzero(y_test_binary == 1)) print("Sum 0: {0} ({1}%)".format(int(np.count_nonzero(y_train_binary == 0)) + int(np.count_nonzero(y_dev_binary == 0)) + int(np.count_nonzero(y_test_binary == 0)), round((int(np.count_nonzero(y_train_binary == 0)) + int(np.count_nonzero(y_dev_binary == 0)) + int(np.count_nonzero(y_test_binary == 0)))/len(X)*100, 1))) print("Sum 1: {0} ({1}%)".format(int(np.count_nonzero(y_train_binary == 1)) + int(np.count_nonzero(y_dev_binary == 1)) + int(np.count_nonzero(y_test_binary == 1)), round((int(np.count_nonzero(y_train_binary == 1)) + int(np.count_nonzero(y_dev_binary == 1)) + int(np.count_nonzero(y_test_binary == 1)))/len(X)*100, 1))) print("X_train pad shape:", X_train_pad.shape) #debug print("y_train_binary shape:", y_train_binary.shape) #debug print("\nBuild model...") model = Sequential() model.add(Embedding(vocab_size, embeds_size, input_length=max_sentence_length)) model.add(GlobalAveragePooling1D()) #model.add(SimpleRNN(32)) model.add(Dense(128)) model.add(Activation('sigmoid')) model.add(Dropout(0.15)) model.add(Dense(100)) model.add(Activation('sigmoid')) model.add(Dropout(0.2)) model.add(Dense(1)) model.add(Activation('sigmoid')) print("Train model...") opt = Adam(lr=0.005) model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy']) model.fit(X_train_pad, y_train_binary, epochs=args.iters, batch_size=500) print("\nEvaluate model...") loss, acc = model.evaluate(X_test_pad, y_test_binary) print("\nPredict classes...") y_predicted_classes = model.predict_classes(X_test_pad) print("\n\nLoss:", loss) print("Accuracy:", acc) print() print("-----------") print("Other metrics:") print("Other metrics:") print("-----------") probs = model.predict(X_test_pad) #print("\nProbabilities (last 10):\n", probs[:10]) y_predicted = [seq.argmax() for seq in probs] print("y's predicted (last 20):\n", y_predicted_classes.flatten()[:20]) print("y's devset (last 20):\n", y_test_binary[:20]) #print(probs2[:20]) print("\nAccuracy_score:", accuracy_score(y_test_binary, y_predicted_classes)) print() print("Classification report:\n", classification_report(y_test_binary, y_predicted_classes)) print() print("Confusion matrix:\n", confusion_matrix(y_test_binary, y_predicted_classes))
def __trainClickPredModel(self): ## define the model # https://keras.io/layers/convolutional/ print("== define model") model = Sequential() model.add(Convolution1D(nb_filter=256, filter_length=1,#6, border_mode='same', # 'valid', #The valid means there is no padding around input or feature map, while same means there are some padding around input or feature map, making the output feature map's size same as the input's activation='relu', input_shape=(1, self.input_dim), init='lecun_uniform' # lecun_uniform for both gets AUC: 0.865961 | (good split) AUC: 0.861570 with avg pool at end # glorot_uniform for both gets AUC: 0.868817 | AUC: 0.863290 with avg pool at end # he_uniform for both gets AUC: 0.868218 | AUC: 0.873585 with avg pool at end )) #model.add(Dense(256,init='lecun_uniform',input_shape=(1,self.input_dim),activation='relu')) # model.add(MaxPooling1D(pool_length=2, stride=None, border_mode='same')) ## TODO: removed model.add(AveragePooling1D(pool_length=2, stride=None, border_mode='same')) # add a new conv1d on top # model.add(Convolution1D(256, 3, border_mode='same', init='glorot_uniform', activation='relu', )) #on the fence about effect #model.add(AveragePooling1D(pool_length=2, stride=None, border_mode='same')) #worse if added # # add a new conv1d on top AUC: 0.851369 with glorot uniform # model.add(Convolution1D(128, 3, border_mode='same',init='glorot_uniform',activation='relu',)) # # apply an atrous convolution 1d with atrous rate 2 of length 3 to a sequence with 10 timesteps, # # with 64 output filters # model = Sequential() # model.add(AtrousConvolution1D(128, 3, atrous_rate=2, border_mode='same', input_shape=(1,input_dim))) # # add a new atrous conv1d on top # model.add(AtrousConvolution1D(64, 2, atrous_rate=2, border_mode='same')) # we use max pooling: # model.add(GlobalMaxPooling1D()) model.add(GlobalAveragePooling1D()) # We add a vanilla hidden layer: model.add(Dense(128, init='glorot_uniform')) model.add(Dropout(0.1)) # 0.1 seems good, but is it overfitting? model.add(Activation('relu')) # # We project onto a single unit output layer, and squash it with a sigmoid: # model.add(Dense(1)) # model.add(Activation('sigmoid')) # model.add(Dense(output_dim, input_dim=input_dim, activation='softmax',init='glorot_uniform')) model.add(Dense(self.output_dim, activation='softmax', init='glorot_uniform')) print(model.summary()) #print(model.get_config()) # write model to file with open(self.model_config_filepath,'w') as f: json.dump(model.to_json(),f) ### Compile model print("== Compile model") # optimizer = SGD(lr = self.learning_rate, momentum = 0.9, decay = 0.0, nesterov = True) optimizer = Adam(lr=self.learning_rate) # compile the model model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) self.click_pred_model = model #actually run training self.trainClickPredModelRunTraining()
def build_embedding(input_shape, emb_size): # -----------------Entry flow ----------------- input_data = Input(shape=input_shape) filter_num = ['None', 32, 64, 128, 256] kernel_size = ['None', 8, 8, 8, 8] conv_stride_size = ['None', 1, 1, 1, 1] pool_stride_size = ['None', 4, 4, 4, 4] pool_size = ['None', 8, 8, 8, 8] model = Conv1D(filters=filter_num[1], kernel_size=kernel_size[1], strides=conv_stride_size[1], padding='same', name='block1_conv1')(input_data) model = Activation('elu', name='block1_act1')(model) model = Conv1D(filters=filter_num[1], kernel_size=kernel_size[1], strides=conv_stride_size[1], padding='same', name='block1_conv2')(model) model = Activation('elu', name='block1_act2')(model) model = BatchNormalization(name='block1_bn')(model) model = MaxPooling1D(pool_size=pool_size[1], strides=pool_stride_size[1], padding='same', name='block1_pool')(model) model = Dropout(0.1, name='block1_dropout')(model) model = Conv1D(filters=filter_num[2], kernel_size=kernel_size[2], strides=conv_stride_size[2], padding='same', name='block2_conv1')(model) model = Activation('elu', name='block2_act1')(model) model = Conv1D(filters=filter_num[2], kernel_size=kernel_size[2], strides=conv_stride_size[2], padding='same', name='block2_conv2')(model) model = Activation('elu', name='block2_act2')(model) model = BatchNormalization(name='block2_bn')(model) model = MaxPooling1D(pool_size=pool_size[2], strides=pool_stride_size[3], padding='same', name='block2_pool')(model) model = Dropout(0.1, name='block2_dropout')(model) model = Conv1D(filters=filter_num[3], kernel_size=kernel_size[3], strides=conv_stride_size[3], padding='same', name='block3_conv1')(model) model = Activation('elu', name='block3_act1')(model) model = Conv1D(filters=filter_num[3], kernel_size=kernel_size[3], strides=conv_stride_size[3], padding='same', name='block3_conv2')(model) model = Activation('elu', name='block3_act2')(model) model = BatchNormalization(name='block3_bn')(model) model = MaxPooling1D(pool_size=pool_size[3], strides=pool_stride_size[3], padding='same', name='block3_pool')(model) model = Dropout(0.1, name='block3_dropout')(model) model = Conv1D(filters=filter_num[4], kernel_size=kernel_size[4], strides=conv_stride_size[4], padding='same', name='block4_conv1')(model) model = Activation('elu', name='block4_act1')(model) model = Conv1D(filters=filter_num[4], kernel_size=kernel_size[4], strides=conv_stride_size[4], padding='same', name='block4_conv2')(model) model = Activation('elu', name='block4_act2')(model) model = BatchNormalization(name='block4_bn')(model) model = MaxPooling1D(pool_size=pool_size[4], strides=pool_stride_size[4], padding='same', name='block4_pool')(model) output = GlobalAveragePooling1D()(model) dense_layer = Dense(emb_size, name='FeaturesVec')(output) return input_data, dense_layer
def cnn_model(embedding_weights, cv_dat, max_len, model_w, lda, dictionary, idx2word, alpha): max_len = 1000 if max_len > 1000 else max_len #max_len = 1000 dropout = 0.8 print max_len json_file = open(model_w + 'model.json', 'r') loaded_model_json = json_file.read() json_file.close() model_lda = model_from_json(loaded_model_json) # load weights into new model #print layer_dict train_x, test_x, train_y, test_y = cv_dat test_lda = get_alpha(test_x, lda, dictionary, idx2word) print "Maximum length of sentence:" + str(max_len) print "Distribution of labels in training set:" print Counter([np.argmax(dat) for dat in train_y]) print "Distribution of labels in testing set:" print Counter([np.argmax(dat) for dat in test_y]) test_x = np.array(sequence.pad_sequences(test_x, maxlen=max_len), dtype=np.int) #print (train_x.shape) #print train_y.shape train_x, val_x, train_y, val_y = train_test_split(train_x, train_y, test_size=0.166, random_state=666, stratify=train_y) train_lda = get_alpha(train_x, lda, dictionary, idx2word) val_lda = get_alpha(val_x, lda, dictionary, idx2word) #defining the model architecture now train_x = np.array(sequence.pad_sequences(train_x, maxlen=max_len), dtype=np.int) val_x = np.array(sequence.pad_sequences(val_x, maxlen=max_len), dtype=np.int) review_text = Input(shape=(max_len, ), dtype='int64', name="body_input") embedded_layer_body = Embedding(embedding_weights.shape[0], embedding_weights.shape[1], mask_zero=False, input_length=max_len, weights=[embedding_weights], trainable=True)(review_text) lda_input = Input(shape=(30, ), dtype='float32', name="lda_inp") #load the weights from pre-trained model lrelu = LeakyReLU(alpha=0.1) conv1 = Conv1D(filters=128, kernel_size=1, padding='same', activation=lrelu, weights=layer_dict['conv1d_1'].get_weights()) conv2 = Conv1D(filters=128, kernel_size=3, padding='same', activation=lrelu, weights=layer_dict['conv1d_2'].get_weights()) conv3 = Conv1D(filters=128, kernel_size=5, padding='same', activation=lrelu, weights=layer_dict['conv1d_3'].get_weights()) #conv1 = Conv1D(filters=128, kernel_size=1, padding='same', activation='relu') #conv2 = Conv1D(filters=128, kernel_size=3, padding='same', activation='relu') #conv3 = Conv1D(filters=128, kernel_size=5, padding='same', activation='relu') conv1a = conv1(embedded_layer_body) glob1a = GlobalAveragePooling1D()(conv1a) #max1 = AveragePooling1D()(conv1a) conv2a = conv2(embedded_layer_body) glob2a = GlobalAveragePooling1D()(conv2a) #max2 = AveragePooling1D()(conv2a) conv3a = conv3(embedded_layer_body) glob3a = GlobalAveragePooling1D()(conv3a) #max3 = AveragePooling1D()(conv3a) merge_pooling = concatenate([glob1a, glob2a, glob3a]) #merge_pooling = concatenate([max1, max2, max3]) hidden_layer = Dense(1200, activation='tanh', kernel_initializer="glorot_uniform")(merge_pooling) #hidden_concat = concatenate([hidden_layer, lda_vec]) dropout_hidden = Dropout(dropout)(hidden_layer) #merge_hidden = concatenate([dropout_hidden, lda_input]) batch_norm = BatchNormalization()(dropout_hidden) #hidden_layer_2 = Dense(600, activation='tanh', kernel_initializer="glorot_uniform")(batch_norm) #dropout_hidden_2 = Dropout(0.6)(hidden_layer_2) #batch_n_2 = BatchNormalization()(dropout_hidden_2) hidden_layer_3 = Dense(600, activation='tanh', kernel_initializer="glorot_uniform")(batch_norm) dropout_hidden_3 = Dropout(0.5)(hidden_layer_3) batch_n_3 = BatchNormalization()(dropout_hidden_3) output_layer = Dense(2, activation='softmax', name='out_sent')(batch_n_3) output_lda = Dense(30, activation='softmax', name='out_lda')(batch_n_3) model = Model([review_text], output=[output_layer, output_lda]) layer_dict_nu = dict([(layer.name, layer) for layer in model.layers]) adam = Adam(lr=0.001) model.compile( loss=['categorical_crossentropy', 'kullback_leibler_divergence'], optimizer=adam, metrics=['accuracy'], loss_weights={ 'out_sent': (1 - alpha), 'out_lda': alpha }) #model.compile(loss=ncce, optimizer=adam, metrics=['accuracy']) earlystop = EarlyStopping(monitor='val_out_sent_loss', min_delta=0.0001, patience=9, verbose=1, mode='auto') callbacks_list = [earlystop] print model.summary() model.fit([train_x], [train_y, train_lda], batch_size=32 * 2, epochs=50, verbose=1, shuffle=True, callbacks=callbacks_list, validation_data=[[val_x], [val_y, val_lda]]) #model.fit([train_x, train_lda], [train_y, train_lda], batch_size=64, epochs=25, # verbose=1, shuffle=True) test_predictions = model.predict([test_x], verbose=False) #test_y = [np.argmax(pred) for pred in test_y] test_pred = [np.argmax(pred) for pred in test_predictions[0]] #print test_pred test_y = [np.argmax(label) for label in test_y] error_preds = [ i for i in range(0, len(test_pred)) if (test_y[i] != test_pred[i]) ] print len(error_preds) misclassified = [test_x[i] for i in error_preds] misclassified = [[get_id2word(idx, idx2word) for idx in sent if idx != 0] for sent in misclassified] labels = [(test_y[i], test_pred[i]) for i in error_preds] #acc = accuracy_score(test_y, test_pred) print acc return acc, misclassified, labels
# regional feature fusion region_score_map = merge(region_score_map_list, mode='ave', name='attn') region_score_map = BatchNormalization()(region_score_map) region_score_map = Activation('sigmoid', name='region_attention')(region_score_map) region_fea = merge([id_fea_map, region_score_map], mode='dot', dot_axes=(1, 1)) region_fea = Lambda(lambda x: x * (1.0 / L))(region_fea) region_fea = BatchNormalization()(region_fea) # attribute feature fusion attr_scores = merge(attr_score_list, mode='concat') attr_scores = BatchNormalization()(attr_scores) attr_scores = Activation('sigmoid')(attr_scores) attr_fea = merge(attr_fea_list, mode='concat') attr_fea = Reshape((emb_dim, len(nb_attributes)))(attr_fea) equal_attr_fea = GlobalAveragePooling1D()(attr_fea) attr_fea = merge([attr_fea, attr_scores], mode='dot', dot_axes=(2, 1)) attr_fea = Lambda(lambda x: x * (1.0 / len(nb_attributes)))(attr_fea) attr_fea = BatchNormalization()(attr_fea) # loss-3: final classification if (attr_equal): attr_fea = equal_attr_fea if (region_equal): region_fea = id_pool final_fea = merge([attr_fea, region_fea], mode='concat') final_fea = Activation('relu', name='final_fea')(final_fea) final_fea = Dropout(dropout)(final_fea) final_prob = Dense(nb_classes)(final_fea) final_prob = Activation(activation='softmax', name='p')(final_prob) out_list.append(final_prob)
def pooling_blend(self, input): avg_pool = GlobalAveragePooling1D()(input) max_pool = GlobalMaxPooling1D()(input) conc = concatenate([avg_pool, max_pool]) return conc
def build_model(x_train, y_train, num_time_periods, num_sensors, num_classes): # 1D CNN neural network input_shape = (num_time_periods * num_sensors) model_m = Sequential() model_m.add( Reshape((TIME_PERIODS, num_sensors), input_shape=(input_shape, ))) model_m.add( Conv1D(128, 10, activation='relu', input_shape=(TIME_PERIODS, num_sensors))) model_m.add(Conv1D(128, 10, activation='relu')) model_m.add(MaxPooling1D(3)) model_m.add(Conv1D(128, 10, activation='relu')) model_m.add(Conv1D(128, 10, activation='relu')) model_m.add(GlobalAveragePooling1D()) model_m.add(Dropout(0.5)) model_m.add(Dense(num_classes, activation='softmax')) # used to implement early stopping callbacks_list = [ keras.callbacks.ModelCheckpoint( filepath='best_model.{epoch:02d}-{val_loss:.2f}.h5', monitor='val_loss', save_best_only=True), keras.callbacks.EarlyStopping(monitor='accuracy', patience=1) ] # compile the model model_m.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # Hyper-parameters BATCH_SIZE = 100 EPOCHS = 10 # Enable validation to use ModelCheckpoint and EarlyStopping callbacks. history = model_m.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, callbacks=callbacks_list, validation_split=0.2, verbose=1) # summarize history for accuracy and loss plt.figure(figsize=(6, 4)) plt.plot(history.history['accuracy'], "g--", label="Accuracy of training data") plt.plot(history.history['val_accuracy'], "g", label="Accuracy of validation data") plt.plot(history.history['loss'], "r--", label="Loss of training data") plt.plot(history.history['val_loss'], "r", label="Loss of validation data") plt.title('Model Accuracy and Loss') plt.ylabel('Accuracy and Loss') plt.xlabel('Training Epoch') plt.ylim(0) plt.legend() plt.show() return model_m
def get_model(): model = Sequential() if 'model' in locals(): model.reset_states() def identity_block(inputs, kernel_size, filters): filters1, filters2, filters3 = filters x = Conv1D(filters1, 1, use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(inputs) x = BatchNormalization(momentum=batch_decay, epsilon = eps)(x) x = Activation('relu')(x) x = Conv1D(filters2, kernel_size, use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay), padding = 'same')(x) x = BatchNormalization(momentum=batch_decay, epsilon = eps)(x) x = Activation('relu')(x) x = Conv1D(filters3, 1, use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(momentum=batch_decay, epsilon = eps)(x) x = add([x, inputs]) x = Activation('relu')(x) return x def conv_block(inputs, kernel_size, filters, strides = 2): filters1, filters2, filters3 = filters x = Conv1D(filters1, 1, use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(inputs) x = BatchNormalization(momentum=batch_decay, epsilon = eps)(x) x = Activation('relu')(x) x = Conv1D(filters2, kernel_size, strides = strides, use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay), padding = 'same')(x) x = BatchNormalization(momentum=batch_decay, epsilon = eps)(x) x = Activation('relu')(x) x = Conv1D(filters3, 1, use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(x) x = BatchNormalization(momentum=batch_decay, epsilon = eps)(x) shortcut = Conv1D(filters3, 1, strides=strides, use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(inputs) shortcut = BatchNormalization(momentum=batch_decay, epsilon = eps)(shortcut) x = add([x, shortcut]) x = Activation('relu')(x) return x inputs = Input(shape = (max_length,2)) #x = LSTM(256, dropout=dropout_rate, recurrent_dropout=dropout_rate, return_sequences=True)(inputs) #x = LSTM(128, dropout=dropout_rate, recurrent_dropout=dropout_rate, return_sequences=True)(x) #x = Activation('relu')(x) x = Conv1D(filter1, conv_kernel, strides = 2, padding = 'valid', use_bias=False, kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(inputs) x = BatchNormalization(momentum=batch_decay, epsilon=eps)(x) x = Activation('relu')(x) x = MaxPooling1D(3, strides = 2)(x) # x = conv_block(x, block_kernel, [filter1,filter1,filter1*4]) # x = identity_block(x, block_kernel, [filter1, filter1, filter1*4]) # x = conv_block(x, block_kernel, [filter2,filter2,filter2*4]) # # x = identity_block(x,3, [filter2, filter2, filter2*4]) # # x = SpatialDropout1D(rate = dropout_rate)(x) # x = Conv1D(filter2, 11, strides = 2, padding = 'valid', use_bias=False, kernel_initializer='he_normal', # kernel_regularizer=l2(weight_decay))(x) # x = Activation('relu')(x) x = GlobalAveragePooling1D()(x) ''' # dense to 100 >> final layer >> dense to num_classes x = Dense(100, kernel_regularizer = l2(weight_decay), bias_regularizer = l2(weight_decay), name ='features_hundred')(x) final_layer = Model(inputs= inputs, outputs = x) x = Dense(num_classes, kernel_regularizer=l2(weight_decay), bias_regularizer=l2(weight_decay), name= 'features')(x) x = Activation('softmax')(x) ''' x = Dense(num_classes, kernel_regularizer=l2(weight_decay), bias_regularizer=l2(weight_decay),name='features')(x) final_layer = Model(inputs = inputs, outputs = x) x = Activation('softmax')(x) model = Model(inputs=inputs, outputs=x) # optimizer sgd = optimizers.SGD(lr=learning_rate, momentum=momentum) adagrad = optimizers.Adagrad() adam = optimizers.Adam(lr=learning_rate) # compiler model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) return model, final_layer
def compile_models(self, meta, gpus=1): # Compile discriminator discriminator = self.build_discriminator(meta) discriminator.trainable = True if gpus > 1: parallel_discriminator = multi_gpu_model(discriminator, gpus=4) parallel_discriminator.compile(loss='hinge', optimizer=Adam(lr=2e-4, beta_1=1e-5)) else: discriminator.compile(loss='hinge', optimizer=Adam(lr=2e-4, beta_1=1e-5)) # Compile Combined model to train generator embedder = self.build_embedder() generator = self.build_generator() intermediate_discriminator = self._build_intermediate_discriminator_model( discriminator) intermediate_vgg19 = self.build_intermediate_vgg19_model() intermediate_vggface = self.build_intermediate_vggface_model() discriminator.trainable = False intermediate_discriminator.trainable = False intermediate_vgg19.trainable = False intermediate_vggface.trainable = False input_lndmk = Input(shape=self.input_shape, name='landmarks') condition = Input(shape=(self.num_videos, ), name='condition') inputs_embedder = [ Input((self.h, self.w, self.c * 2), name='style{}'.format(i)) for i in range(self.k) ] # (BATCH_SIZE, H, W, 6) embeddings = [embedder(em_input) for em_input in inputs_embedder] # (BATCH_SIZE, 512) if self.k > 1: embeddings_expand = [ Lambda(lambda x: K.expand_dims(x, axis=1))(embedding) for embedding in embeddings ] # (BATCH_SIZE, 1, 512) embedding_k = Concatenate(axis=1)( embeddings_expand) # (BATCH_SIZE, K, 512) average_embedding = GlobalAveragePooling1D()( embedding_k) # (BATCH_SIZE, 512) else: average_embedding = embeddings[0] fake_frame = generator([input_lndmk, average_embedding]) intermediate_vgg19_fakes = intermediate_vgg19(fake_frame) intermediate_vggface_fakes = intermediate_vggface(fake_frame) intermediate_discriminator_fakes = intermediate_discriminator( [fake_frame, input_lndmk]) if meta: self._build_embedding_discriminator_model( discriminator ) # Call embedding discriminator when meta learning realicity = discriminator([fake_frame, input_lndmk, condition]) combined = Model( inputs=[input_lndmk] + inputs_embedder + [condition], outputs=intermediate_vgg19_fakes + intermediate_vggface_fakes + [realicity] + intermediate_discriminator_fakes + embeddings, name='combined') loss = ['mae'] * len(intermediate_vgg19_fakes) + ['mae'] * len( intermediate_vggface_fakes) + ['hinge'] + ['mae'] * len( intermediate_discriminator_fakes) + ['mae'] * self.k loss_weights = [1.5e-1] * len(intermediate_vgg19_fakes) + [ 2.5e-2 ] * len(intermediate_vggface_fakes) + [ 1 ] + [10] * len(intermediate_discriminator_fakes) + [10] * self.k else: embedder.trainable = False realicity = discriminator( [fake_frame, input_lndmk, average_embedding]) combined = Model(inputs=[input_lndmk] + inputs_embedder, outputs=intermediate_vgg19_fakes + intermediate_vggface_fakes + [realicity] + intermediate_discriminator_fakes, name='combined') loss = ['mae'] * len(intermediate_vgg19_fakes) + ['mae'] * len( intermediate_vggface_fakes) + [ 'hinge' ] + ['mae'] * len(intermediate_discriminator_fakes) loss_weights = [1.5e-1] * len(intermediate_vgg19_fakes) + [ 2.5e-2 ] * len(intermediate_vggface_fakes) + [ 1 ] + [10] * len(intermediate_discriminator_fakes) self.embedder = embedder self.generator = generator self.combined = combined self.discriminator = discriminator if gpus > 1: parallel_combined = multi_gpu_model(combined, gpus=gpus) parallel_combined.compile(loss=loss, loss_weights=loss_weights, optimizer=Adam(lr=5e-5, beta_1=1e-5)) self.parallel_combined = parallel_combined self.parallel_discriminator = parallel_discriminator return parallel_combined, parallel_discriminator, combined, discriminator combined.compile(loss=loss, loss_weights=loss_weights, optimizer=Adam(lr=5e-5, beta_1=1e-5)) return combined, combined, discriminator, discriminator
# X_train = scaler.fit_transform(X_train.reshape(-1, X_train.shape[-1])).reshape(X_train.shape) # X_test = scaler.transform(X_test.reshape(-1, X_test.shape[-1])).reshape(X_test.shape) num_sensors = 4 TIME_PERIODS = 60 BATCH_SIZE = 16 EPOCHS = 10 model_m = Sequential() model_m.add( Conv1D(100, 6, activation='relu', input_shape=(TIME_PERIODS, num_sensors))) model_m.add(Conv1D(100, 6, activation='relu')) model_m.add(MaxPooling1D(3)) model_m.add(Conv1D(160, 6, activation='relu')) model_m.add(Conv1D(160, 6, activation='relu')) model_m.add(GlobalAveragePooling1D(name='G_A_P_1D')) model_m.add(Dropout(0.5)) model_m.add(Dense(3, activation='softmax')) model_m.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) history = model_m.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.2) dc = model_m.predict(X_test) pred_test = np.argmax(dc, axis=1)
def ConvNet2DModel1(x_train, y_train, x_test, y_test, DROP_OUT, INPUT_SHAPE): #define the ConvNet model = Sequential() #conv layer # 20 kernels of size 3x3 model.add( Conv1D(20, kernel_size=10, kernel_initializer=KERNEL_INITIAL, kernel_constraint=maxnorm(2), input_shape=INPUT_SHAPE)) #conv layer activation model.add(Activation("relu")) #conv layer (relu) => pool(max) model.add(GlobalAveragePooling1D()) if (DROP_OUT == 1): model.add(Dropout(0.3)) # output layer model.add(BatchNormalization()) #model.add(Flatten()) #Flattens model.add(Dense(50)) model.add(Activation('relu')) model.add(Dropout(0.3)) model.add(Reshape((50, 1))) # shape becomes (batch_size,200,1) #input_shape to RNN layer is (batch_size,timeSteps,num_features), only provide (timeSteps,num_features) model.add(BatchNormalization()) model.add( LSTM(10, activation='tanh', inner_activation='sigmoid', kernel_constraint=maxnorm(2), kernel_initializer=KERNEL_INITIAL, return_sequences=False, dropout=0.3)) model.add(Dense(1)) #a soft max classifier model.add(Activation("sigmoid")) filepath = "model1_dropout_" + str( DROP_OUT) + "_{epoch:02d}_{val_acc:.2f}.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='max') early_stopping_monitor = EarlyStopping(monitor='val_loss', patience=8) #callbacks_list = [checkpoint] model.compile(loss=LOSS, optimizer='Adam', metrics=METRICS) print(model.summary()) #Tuning = model.fit(x_train,y_train,batch_size=BATCH_SIZE, epochs = NB_EPOCH, verbose = VERBOSE, # validation_split = 0.2) #(x_test,y_test),callbacks=[checkpoint,early_stopping_monitor,roc_callback(training_data=(x_train, y_train),validation_data=(x_test, y_test))]) #,callbacks=callbacks_list) Tuning = model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=NB_EPOCH, verbose=VERBOSE, validation_data=(x_test, y_test), callbacks=[ checkpoint, early_stopping_monitor, roc_callback(training_data=(x_train, y_train), validation_data=(x_test, y_test)) ]) #,callbacks=callbacks_list) ## if you want early stopping #Tuning = model.fit(Train_Predictors,Train_class,batch_size=BATCH_SIZE, epochs = NB_EPOCH, verbose = VERBOSE, #validation_split = VALIDATION_SPLIT,callbacks=[early_stopping_monitor,checkpoint]) return model, Tuning
def test_TensorBoard_multi_input_output(tmpdir): np.random.seed(np.random.randint(1, 1e7)) filepath = str(tmpdir / 'logs') (X_train, y_train), (X_test, y_test) = get_data_callbacks( input_shape=(input_dim, input_dim)) y_test = np_utils.to_categorical(y_test) y_train = np_utils.to_categorical(y_train) inp1 = Input((input_dim, input_dim)) inp2 = Input((input_dim, input_dim)) inp_3d = add([inp1, inp2]) inp_2d = GlobalAveragePooling1D()(inp_3d) # test a layer with a list of output tensors inp_pair = Lambda(lambda x: x)([inp_3d, inp_2d]) hidden = dot(inp_pair, axes=-1) hidden = Dense(num_hidden, activation='relu')(hidden) hidden = Dropout(0.1)(hidden) output1 = Dense(num_classes, activation='softmax')(hidden) output2 = Dense(num_classes, activation='softmax')(hidden) model = Model(inputs=[inp1, inp2], outputs=[output1, output2]) model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy']) # we must generate new callbacks for each test, as they aren't stateless def callbacks_factory(histogram_freq, embeddings_freq=1): return [callbacks.TensorBoard(log_dir=filepath, histogram_freq=histogram_freq, write_images=True, write_grads=True, embeddings_freq=embeddings_freq, embeddings_layer_names=['dense_1'], embeddings_data=[X_test] * 2, batch_size=5)] # fit without validation data model.fit([X_train] * 2, [y_train] * 2, batch_size=batch_size, callbacks=callbacks_factory(histogram_freq=0, embeddings_freq=0), epochs=3) # fit with validation data and accuracy model.fit([X_train] * 2, [y_train] * 2, batch_size=batch_size, validation_data=([X_test] * 2, [y_test] * 2), callbacks=callbacks_factory(histogram_freq=1), epochs=2) train_generator = data_generator([X_train] * 2, [y_train] * 2, batch_size) # fit generator without validation data model.fit_generator(train_generator, len(X_train), epochs=2, callbacks=callbacks_factory(histogram_freq=0, embeddings_freq=0)) # fit generator with validation data and accuracy model.fit_generator(train_generator, len(X_train), epochs=2, validation_data=([X_test] * 2, [y_test] * 2), callbacks=callbacks_factory(histogram_freq=1)) assert os.path.isdir(filepath) shutil.rmtree(filepath) assert not tmpdir.listdir()
embedded_user = embedding_layer_user(input_user) embedded_user2 = embedding_layer_user2(input_user) embedded_photo = embedding_layer_photo(input_photo) embedded_face = embedding_layer_face(input_photo) embedded_user2_agg = embedding_layer_user2(input_user_mean) embedded_photo_agg = embedding_layer_photo(input_photo_mean) embedded_face_agg = embedding_layer_face(input_photo_mean) embedded_user = Flatten()(embedded_user) embedded_user2 = Flatten()(embedded_user2) embedded_photo = Flatten()(embedded_photo) embedded_face = Flatten()(embedded_face) embedded_user2_max = GlobalMaxPooling1D()(embedded_user2_agg) embedded_photo_mean = GlobalAveragePooling1D()(embedded_photo_agg) embedded_face_mean = GlobalAveragePooling1D()(embedded_face_agg) flatten_list = [ embedded_user, embedded_user2, embedded_photo, embedded_face, embedded_user2_max, embedded_photo_mean, embedded_face_mean ] act = 'relu' merged = concatenate(flatten_list, name='match_concat') merged = Dense(128, activation=act)(merged) merged = BatchNormalization()(merged) merged = Dropout(0.25)(merged) merged_fea = concatenate([merged, input_feature], name='feature_concat') preds = Dense(1, activation='sigmoid')(merged_fea)
def __init__(self, C=4, V=40000, MAX_LEN=600, MAX_LEN_TERM=300, NUM_FEAT=8, char_embed_matrix=None, term_embed_matrix=None, use_multi_task=False, name='hybridmodel.h5', PE=False): #+bn2 0.975 +bn1 0.986 #+bn1,max+avg pool 0.987 #squeeze embedding (128)0.985 (64+conv64)0.983 #去除子网络的dense 0.987 squeeze embedding+relu 0.985 #conv 64 0.987 conv 128 0.988 self.name = name self.use_multi_task = use_multi_task input = Input(shape=(MAX_LEN, ), dtype='int32') #CNN不支持mask,即 mask_zero=True if char_embed_matrix is None: x = Embedding(V, 32)(input) else: embed1 = Embedding(char_embed_matrix.shape[0], char_embed_matrix.shape[1], weights=[char_embed_matrix], trainable=False) embed2 = Embedding(char_embed_matrix.shape[0], char_embed_matrix.shape[1], weights=[char_embed_matrix], trainable=True) x = embed1(input) x2 = embed2(input) x = Concatenate()([x, x2]) # x = Dense(64, activation='relu')(x) if PE: echar_input = Input(shape=(MAX_LEN, ), dtype='int32', name='PE_char_in') ex_char = Embedding(MAX_LEN, 32, name='PEchar')(echar_input) x = Concatenate()([x, ex_char]) kss = [2, 3, 4, 5] hs = [] for ks in kss: h = Conv1D(128, ks, activation='relu', padding='same')(x) h1 = GlobalMaxPool1D()(h) h2 = GlobalAveragePooling1D()(h) hs.append(h1) hs.append(h2) hs = Concatenate()(hs) # hs = Dense(128, activation='relu')(hs) if self.use_multi_task: y1 = Dense(C, activation='softmax', name='y1')(hs) input_term = Input(shape=(MAX_LEN_TERM, ), dtype='int32') if term_embed_matrix is None: xterm = Embedding(V, 32)(input_term) else: embed1 = Embedding(term_embed_matrix.shape[0], term_embed_matrix.shape[1], weights=[term_embed_matrix], trainable=False) embed2 = Embedding(term_embed_matrix.shape[0], term_embed_matrix.shape[1], weights=[term_embed_matrix], trainable=True) xterm = embed1(input_term) xterm2 = embed2(input_term) xterm = Concatenate()([xterm, xterm2]) # xterm = Dense(64, activation='relu')(xterm) if PE: eterm_input = Input(shape=(MAX_LEN_TERM, ), dtype='int32', name='PE_term_in') ex_term = Embedding(MAX_LEN_TERM, 32, name='PEterm')(eterm_input) xterm = Concatenate()([xterm, ex_term]) hsterm = [] for ks in kss: h = Conv1D(128, ks, activation='relu', padding='same')(xterm) h1 = GlobalMaxPool1D()(h) h2 = GlobalAveragePooling1D()(h) hsterm.append(h1) hsterm.append(h2) hsterm = Concatenate()(hsterm) # hsterm = Dense(128, activation='relu')(hsterm) input_feat = Input(shape=(NUM_FEAT, ), dtype='float32') hfeat = Dense(8, activation='relu')(input_feat) hs = Concatenate()([hs, hsterm, hfeat]) hs = BatchNormalization()(hs) z = Dense(128, activation='relu')(hs) # z = BatchNormalization()(z) z = Dense(C, activation='softmax', name='y')(z) if PE: model = Model( [input, input_term, input_feat, echar_input, eterm_input], z) else: model = Model([input, input_term, input_feat], z) opt = Adagrad(lr=0.005) # opt = Adam() model.compile(opt, 'categorical_crossentropy', metrics=['acc']) self.model = model if self.use_multi_task: y2 = Dense(C, activation='softmax', name='y2')(hsterm) y3 = Dense(C, activation='softmax', name='y3')(hfeat) if PE: self.train_model = Model( [input, input_term, input_feat, echar_input, eterm_input], [z, y1, y2, y3]) else: self.train_model = Model([input, input_term, input_feat], [z, y1, y2, y3]) self.train_model.compile(opt, 'categorical_crossentropy', metrics=['acc'])
Ytrain[num_pos:] = np.ones((num_neg, ), dtype=np.int8) indice1 = np.arange(len(train_sequences)) np.random.shuffle(indice1) Xtrain = train_sequences[indice1] Ytrain = Ytrain[indice1] main_input = Input(shape=(5, )) # model.add(Embedding(35000,50,input_length=500)) init_method = keras.initializers.normal # embedding1 = Embedding(num_words, 500, embeddings_initializer=init_method)(main_input) x = Embedding(num_words, 200, embeddings_initializer=init_method)(main_input) # x = AveragePooling1D(pool_size=3, strides=1,padding='valid')(x) # x = Activation('relu')(x) # x = GlobalMaxPooling1D()(x) x = GlobalAveragePooling1D()(x) x = Activation('relu')(x) # embedding2=Embedding(num_words,50, embeddings_initializer=init_method)(main_input) # y=AveragePooling1D(pool_size=2,strides=1)(embedding2) # y=GlobalMaxPooling1D()(y) # embedding3=Embedding(num_words,50,input_length=max_len,embeddings_initializer='normal')(input) # p=GlobalAveragePooling1D()(embedding3) # z=keras.layers.concatenate([x,y]) # z=keras.layers.concatenate([x,y,p]) # x=Dropout(0.2)(x) output = Dense(1, activation='sigmoid', trainable=True, use_bias=False)(x) model = Model(inputs=main_input, outputs=output)
if embedding_vector is not None: embedding_matrix[i] = embedding_vector print_step('Build model...') inp = Input(shape=(maxlen, )) x = Embedding(max_features, embed_size, weights=[embedding_matrix], trainable=False)(inp) x = SpatialDropout1D(0.2)(x) x = Bidirectional( GRU(128, dropout=0.1, recurrent_dropout=0.1, return_sequences=True))(x) x = Conv1D(64, kernel_size=3, padding='valid', kernel_initializer='glorot_uniform')(x) avg_pool = GlobalAveragePooling1D()(x) max_pool = GlobalMaxPooling1D()(x) conc = concatenate([avg_pool, max_pool]) outp = Dense(6, activation='sigmoid')(conc) model = Model(inputs=inp, outputs=outp) model.compile(loss='binary_crossentropy', optimizer=Adam(lr=1e-3), metrics=['accuracy']) model.save_weights('cache/gru-conv-model-weights.h5') print_step('Making KFold for CV') kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=2017) i = 1 cv_scores = []
def build_model(): model = Sequential() model.add( TimeDistributed(Conv2D(nb_filter=512, nb_row=1, nb_col=3, init='glorot_uniform', activation='relu', weights=None, border_mode='valid', subsample=(1, 1), dim_ordering='th', W_regularizer=None, b_regularizer=None, activity_regularizer=None, W_constraint=None, b_constraint=None, bias=True), input_shape=(None, 1, 1, 19))) model.add( TimeDistributed( AveragePooling2D(pool_size=(1, 2), strides=None, border_mode='valid', dim_ordering='th'))) model.add( TimeDistributed( Conv2D(nb_filter=256, nb_row=1, nb_col=3, init='glorot_uniform', activation='relu', weights=None, border_mode='valid', subsample=(1, 1), dim_ordering='th', W_regularizer=None, b_regularizer=None, activity_regularizer=None, W_constraint=None, b_constraint=None, bias=True))) model.add( TimeDistributed( AveragePooling2D(pool_size=(1, 2), strides=None, border_mode='valid', dim_ordering='th'))) model.add( TimeDistributed( Conv2D(nb_filter=128, nb_row=1, nb_col=3, init='glorot_uniform', activation='relu', weights=None, border_mode='valid', subsample=(1, 1), dim_ordering='th', W_regularizer=None, b_regularizer=None, activity_regularizer=None, W_constraint=None, b_constraint=None, bias=True))) model.add(TimeDistributed(Flatten())) model.add(TimeDistributed(Dense(1024))) model.add(TimeDistributed(Activation('relu'))) model.add(LSTM(1024, return_sequences=True, activation='relu')) model.add(LSTM(1024, return_sequences=True, activation='relu')) model.add(TimeDistributed(Dropout(0.25))) model.add(TimeDistributed(Activation('relu'))) model.add(TimeDistributed(Dense(1024))) model.add(TimeDistributed(Activation('linear'))) model.add(GlobalAveragePooling1D(name="global_avg")) model.add(Dense(6)) model.add(Activation('linear')) model.summary() plot_model(model, to_file='model.png') start = time.time() model.compile(loss="mse", optimizer="adam") print("> Compilation Time : ", time.time() - start) return model
from keras.layers import Dense, Input, Flatten from keras.layers import GlobalAveragePooling1D, Embedding from keras.models import Model EMBEDDING_DIM = 50 N_CLASSES = 2 # input: a sequence of MAX_SEQUENCE_LENGTH integers sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32') embedding_layer = Embedding(MAX_NB_WORDS, EMBEDDING_DIM, input_length=MAX_SEQUENCE_LENGTH, trainable=True) embedded_sequences = embedding_layer(sequence_input) average = GlobalAveragePooling1D()(embedded_sequences) predictions = Dense(N_CLASSES, activation='softmax')(average) model = Model(sequence_input, predictions) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) # In[68]: model.fit(x_train, y_train, validation_split=0.1, nb_epoch=10, batch_size=128) # In[69]:
def build_cnn_bgru_attention(input_set_size, width, height, mul_nb_classes): print('cnn-gru attention model building...') inputs = Input(shape=(height, ), dtype='int32') embedd = Embedding(input_set_size, width, input_length=height)(inputs) # conv conv1_1 = Convolution1D(64, 3, border_mode='same', activation='relu')(embedd) bn1 = BatchNormalization(mode=1)(conv1_1) pool1 = MaxPooling1D(pool_length=2)(bn1) drop1 = Dropout(0.2)(pool1) # 2 conv conv2_1 = Convolution1D(128, 3, border_mode='same', activation='relu')(drop1) bn2 = BatchNormalization(mode=1)(conv2_1) pool2 = MaxPooling1D(pool_length=2)(bn2) drop2 = Dropout(0.2)(pool2) # 3 conv conv3_1 = Convolution1D(192, 2, border_mode='same', activation='relu')(drop2) bn3 = BatchNormalization(mode=1)(conv3_1) #pool3 = MaxPooling1D(pool_length=2)(bn3) drop3 = Dropout(0.1)(bn3) #b = merge([bn4, drop3], mode='concat') #blstm = Bidirectional(LSTM(256, return_sequences=False), merge_mode='sum')(drop3) gru = Bidirectional(GRU(256, return_sequences=True), merge_mode='sum')(drop3) drop = Dropout(0.5)(gru) ''' drop_3d = Reshape((256, 1))(drop) att = TimeDistributed(Dense(1))(drop_3d) #att4 = Flatten()(att4) att = Activation(activation="softmax")(att) #att4 = RepeatVector(mul_nb_classes[3])(att4) #att4 = Permute((2,1))(att4) merg = Flatten()(merge([drop_3d, att], mode='mul')) ''' # attention # attention mask = TimeDistributed(Dense(1))(drop) # compute the attention mask mask = Flatten()(mask) mask = Activation('softmax')(mask) mask = RepeatVector(256)(mask) mask = Permute([2, 1])(mask) # apply mask activations = merge([gru, mask], mode='mul') activations = GlobalAveragePooling1D()(activations) #activations = Flatten()(activations) # output out1 = Dense(mul_nb_classes[0], activation='sigmoid')(activations) merged1 = merge([out1, activations], mode='concat') out2 = Dense(mul_nb_classes[1], activation='sigmoid')(merged1) merged2 = merge([out2, activations], mode='concat') out3 = Dense(mul_nb_classes[2], activation='sigmoid')(merged2) merged3 = merge([out3, activations], mode='concat') out4 = Dense(mul_nb_classes[3], activation='sigmoid')(merged3) ''' drop_3d = Reshape((mul_nb_classes[3],1))(drop) out4_3d = Reshape((mul_nb_classes[3], 1))(out4) att4_out4 = TimeDistributed(Dense(1))(out4_3d) att4_drop = TimeDistributed(Dense(1))(drop_3d) ''' ''' out4_3d = Reshape((mul_nb_classes[3], 1))(out4) att4 = TimeDistributed(Dense(1))(out4_3d) #att4 = Flatten()(att4) att4 = Activation(activation="softmax")(att4) #att4 = RepeatVector(mul_nb_classes[3])(att4) #att4 = Permute((2,1))(att4) merged4 = Flatten()(merge([out4_3d, att4], mode='mul')) ''' out = [out1, out2, out3, out4] model = Model(input=[inputs], output=out) model.summary() sgd = SGD(lr=0.05, momentum=0.9, decay=1e-6, nesterov=True) model.compile( loss='binary_crossentropy', optimizer=sgd, #optimizer='adam', metrics=['accuracy'], ) print("cnn-gru attention model has built.") return model