def build_discriminator(self): model = Sequential() model.add( MaxoutDense(240, nb_feature=5, input_dim=np.prod(self.img_shape))) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(MaxoutDense(50, nb_feature=5)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(MaxoutDense(240, nb_feature=4)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(1, activation='sigmoid')) model.summary() img = Input(shape=self.img_shape) label = Input(shape=(1, ), dtype='int32') label_embedding = Flatten()(Embedding(self.num_classes, np.prod(self.img_shape))(label)) flat_img = Flatten()(img) model_input = multiply([flat_img, label_embedding]) validity = model(model_input) return Model([img, label], validity)
def build_vgg16_custom(learning_rate, trainable=2, weights_path=None, resume=False): model = VGG_16("./vgg16_weights.h5", full=False, trainable=trainable) model.add(MaxPooling2D((2, 2), strides=(2, 2), trainable=False)) model.add(Flatten()) model.add(MaxoutDense(32)) #model.add(Dense(2048, init="normal", activation='relu')) model.add(Dropout(0.5)) model.add(MaxoutDense(32)) #model.add(Dense(2048, init="normal", activation='relu')) model.add(Dropout(0.5)) model.add(Dense(10, init="normal", activation='softmax')) model.name = "VGG16_CUSTOM" if resume: model.load_weights( os.path.join(weights_path, "weights_" + model.name + ".hdf5")) # Learning rate is changed to 0.001, decay = 1e-6 sgd = SGD(lr=learning_rate, decay=0, momentum=0.9, nesterov=True) model.compile(optimizer=sgd, loss='sparse_categorical_crossentropy') print "Model: %s | Resume: %s | Learning Rate: %0.6f" % ( model.name, resume, learning_rate) return model
def build_discriminator(self): model = Sequential() img = Input(shape=self.img_shape) img_emb = MaxoutDense(240, nb_feature=5, input_dim=np.prod(self.img_shape)) img_emb = Activation('relu') img_emb = Dropout(0.5) label = Input(shape=(1, ), dtype='int32') label_embedding = Flatten()(Embedding(self.num_classes, np.prod(self.img_shape))(label)) label_emb_d = MaxoutDense(50, nb_feature=5) label_emb_d = Activation('relu') label_emb_d = Dropout(0.5) merged_d = Concatenate()([img_emb, label_emb_d]) combined_d = MaxoutDense(240, nb_feature=4) combined_d = Activation('relu') combined_d = Dropout(0.5) img_d = Dense(1, activation='sigmoid') # model.summary() # flat_img = Flatten()(img_d) validity = combined_d(merged_d) return Model([img, label], validity)
def get_discriminator_model(y_size): image = Input(shape=image_dim) x = Flatten()(image) y = Input(shape=(y_size, )) y_embedding = Embedding(input_dim=num_digits, output_dim=np.prod(image_dim))(y) y_embedding = Flatten()(y_embedding) x_out = MaxoutDense(240, nb_feature=5)(x) x_out = Dropout(0.5)(x_out) y_out = MaxoutDense(50, nb_feature=5)(y_embedding) y_out = Dropout(0.5)(y_out) merged = Concatenate()([x_out, y_out]) out = MaxoutDense(240, nb_feature=4)(merged) out = Dropout(0.5)(out) out = Dense(1, activation='sigmoid', kernel_initializer='glorot_normal')(out) # model = Model([image, y], out) # model.summary() return Model([image, y], out)
def maxout_model(): model = Sequential() model.add(MaxoutDense(240, nb_feature=5, input_dim=dim)) model.add(MaxoutDense(240, nb_feature=5, input_dim=dim)) model.add(Dense(1, init='zero')) model.compile(loss='mape', optimizer='adam') return model
def construct_model(classe_nums): model = Sequential() model.add( Conv1D(filters=256, kernel_size=3, strides=1, activation='relu', input_shape=(99, 40), name='block1_conv1')) model.add(MaxPool1D(pool_size=2, name='block1_pool1')) model.add(BatchNormalization(momentum=0.9, epsilon=1e-5, axis=1)) model.add( Conv1D(filters=256, kernel_size=3, strides=1, activation='relu', name='block1_conv2')) model.add(MaxPool1D(pool_size=2, name='block1_pool2')) model.add(Flatten(name='block1_flat1')) model.add(Dropout(0.5, name='block1_drop1')) model.add(Dense(512, activation='relu', name='block2_dense2')) model.add(MaxoutDense(512, nb_feature=4, name="block2_maxout2")) model.add(Dropout(0.5, name='block2_drop2')) model.add( Dense(512, activation='relu', name='block2_dense3', kernel_regularizer=l2(1e-4))) model.add(MaxoutDense(512, nb_feature=4, name="block2_maxout3")) model.summary() model_input = Input(shape=(99, 40)) features = model(model_input) extract_feature_model = Model(inputs=model_input, outputs=features) category_predict = Dense(classe_nums, activation='softmax', name="predict")(features) sr_model = Model(inputs=model_input, outputs=category_predict) plot_model(sr_model, to_file='model.png', show_shapes=True, show_layer_names=False) return extract_feature_model, sr_model
def net(): model = Sequential() model.add(MaxoutDense(256, input_dim=625, nb_feature=5, init='he_uniform')) model.add(MaxoutDense(128, nb_feature=5)) model.add(Dense(64, activation='relu')) model.add(Dense(25, activation='relu')) model.add(Dense(2)) model.add(Activation('sigmoid')) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) return model
def KerasNet(leakness = 0.5): inputs = Input(shape=(3,512,512)) x = Convolution2D(32,7,7,init='orthogonal', subsample=(2,2), border_mode='same')(inputs) x = LeakyReLU(alpha=leakness)(x) x = MaxPooling2D(pool_size=(3,3), strides=(2,2))(x) x = Convolution2D(32,3,3, init='orthogonal',subsample=(1,1), border_mode='same')(x) x = LeakyReLU(alpha=leakness)(x) x = MaxPooling2D(pool_size=(3,3), strides=(2,2))(x) x = Convolution2D(64,3,3, init='orthogonal',subsample=(1,1), border_mode='same')(x) x = LeakyReLU(alpha=leakness)(x) x = Convolution2D(64,3,3, init='orthogonal',subsample=(1,1), border_mode='same')(x) x = LeakyReLU(alpha=leakness)(x) x = MaxPooling2D(pool_size=(3,3), strides=(2,2))(x) x = Convolution2D(128,3,3, init='orthogonal',subsample=(1,1), border_mode='same')(x) x = LeakyReLU(alpha=leakness)(x) x = Convolution2D(128,3,3, init='orthogonal',subsample=(1,1), border_mode='same')(x) x = LeakyReLU(alpha=leakness)(x) x = Convolution2D(128,3,3, init='orthogonal',subsample=(1,1), border_mode='same')(x) x = LeakyReLU(alpha=leakness)(x) x = Convolution2D(128,3,3, init='orthogonal',subsample=(1,1), border_mode='same')(x) x = LeakyReLU(alpha=leakness)(x) x = MaxPooling2D(pool_size=(3,3), strides=(2,2))(x) x = Convolution2D(256,3,3, init='orthogonal',subsample=(1,1), border_mode='same')(x) x = LeakyReLU(alpha=leakness)(x) x = Convolution2D(256,3,3, init='orthogonal',subsample=(1,1), border_mode='same')(x) x = LeakyReLU(alpha=leakness)(x) x = Convolution2D(256,3,3, init='orthogonal',subsample=(1,1), border_mode='same')(x) x = LeakyReLU(alpha=leakness)(x) x = Convolution2D(256,3,3, init='orthogonal',subsample=(1,1), border_mode='same')(x) x = LeakyReLU(alpha=leakness)(x) x = MaxPooling2D(pool_size=(3,3), strides=(2,2))(x) x = Dropout(0.5)(x) x = Flatten()(x) x = MaxoutDense(512, nb_feature=4,init='orthogonal')(x) x = Dropout(0.5)(x) x = MaxoutDense(512, nb_feature=4,init='orthogonal')(x) x = Dense(5,init='orthogonal')(x) x = Activation('softmax')(x) model = Model(inputs, x) return model
def prepareAttentionModel(embeddings, classes, max_length, unit=LSTM, cells=64, layers=1, **kwargs): # parameters bi = kwargs.get("bidirectional", False) noise = kwargs.get("noise", 0.) dropout_words = kwargs.get("dropout_words", 0) dropout_rnn = kwargs.get("dropout_rnn", 0) dropout_rnn_U = kwargs.get("dropout_rnn_U", 0) dropout_attention = kwargs.get("dropout_attention", 0) dropout_final = kwargs.get("dropout_final", 0) attention = kwargs.get("attention", None) final_layer = kwargs.get("final_layer", False) clipnorm = kwargs.get("clipnorm", 1) loss_l2 = kwargs.get("loss_l2", 0.) lr = kwargs.get("lr", 0.001) model = Sequential() model.add(embeddings) if noise > 0: model.add(GaussianNoise(noise)) if dropout_words > 0: model.add(Dropout(dropout_words)) for i in range(layers): rs = (layers > 1 and i < layers - 1) or attention model.add( get_RNN(unit, cells, bi, return_sequences=rs, dropout_U=dropout_rnn_U)) if dropout_rnn > 0: model.add(Dropout(dropout_rnn)) if attention == "memory": model.add(AttentionWithContext()) if dropout_attention > 0: model.add(Dropout(dropout_attention)) elif attention == "simple": model.add(Attention()) if dropout_attention > 0: model.add(Dropout(dropout_attention)) if final_layer: model.add(MaxoutDense(100, W_constraint=maxnorm(2))) if dropout_final > 0: model.add(Dropout(dropout_final)) model.add(Dense(classes, activity_regularizer=l2(loss_l2))) model.add(Activation('softmax')) model.compile(optimizer=Adam(clipnorm=clipnorm, lr=lr), loss='categorical_crossentropy', metrics=['accuracy']) return model
def __init__(self): self.model = Sequential() self.model.add( Convolution2D(8, 3, 3, activation='relu', border_mode='same', input_shape=(3, 32, 32))) self.model.add(Flatten()) self.model.add(Dense(output_dim=128, activation='sigmoid')) self.model.add(Dense(output_dim=256, activation='sigmoid')) self.model.add(Dense(output_dim=512, activation='sigmoid')) self.model.add(MaxoutDense(output_dim=10)) self.model.add(Activation('softmax')) self.model.compile(loss='categorical_crossentropy', optimizer='Adadelta', metrics=['accuracy']) self.history = [] self.datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization= False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=False, # apply ZCA whitening rotation_range= 0, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range= 0.1, # randomly shift images horizontally (fraction of total width) height_shift_range= 0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=False) # randomly flip images
def lstm_model(sequence_length, embeddings_matrix, embedding_dim, X_tfidf): model_variation = 'LSTM' print('Model variation is %s' % model_variation) model1 = Sequential() model1.add( Embedding(len(vocab) + 1, embedding_dim, weights=[embeddings_matrix], input_length=sequence_length, trainable=True)) model1.add( Dropout(0.25)) #, input_shape=(sequence_length, embedding_dim))) model1.add(Bidirectional(LSTM(150, return_sequences=True))) model1.add(Dropout(0.25)) model1.add(Bidirectional(LSTM(150, return_sequences=True))) model1.add(Dropout(0.25)) model1.add(AttLayer()) print model1.summary() model2 = Sequential() model2.add(InputLayer(input_shape=(300, ))) print model2.summary() model = Sequential() model.add(Merge([model1, model2], mode='concat')) model.add(MaxoutDense(100, W_constraint=maxnorm(2))) model.add(Dropout(0.25)) model.add(Dense(3, activity_regularizer=l2(0.0001))) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print model.summary() return model
def buildModel(): model = Sequential() model.add(Conv2D(filters=8, kernel_size=(3,3), padding='same', input_shape=(seqLength,64,1), activation='relu')) model.add(BatchNormalization()) model.add(Dropout(0.75)) model.add(Conv2D(filters=8, kernel_size=(3,3), padding='same', activation='relu')) model.add(BatchNormalization()) model.add(Dropout(0.75)) model.add(Conv2D(filters=8, kernel_size=(3,3), padding='same', activation='relu')) model.add(BatchNormalization()) model.add(Dropout(0.75)) model.add(Flatten()) model.add(Reshape( (30,64*8) )) model.add(TimeDistributed(Dense(8, activation=None))) model.add(Dropout(0.5)) model.add(Bidirectional(GRU(8, return_sequences=True))) model.add(Dropout(0.5)) model.add(TimeDistributed(MaxoutDense(1, nb_feature=8))) model.compile(loss='mse', optimizer="adam", metrics=['accuracy']) model.summary() return model
def make_custom_model_2_norm(input_rows, input_cols): global HIDDEN_ACTIVATION custom_model = Sequential() custom_model.add( Conv2D(int(CONV_LAYER_SIZE_BASE / 2), kernel_size=(7, 7), strides=2, input_shape=(WINDOW_LENGTH, input_rows, input_cols), data_format='channels_first')) custom_model.add( BatchNormalization(axis=1)) # axis=1 due to Conv2D with channels_first custom_model.add(get_hidden_layer_activation(HIDDEN_ACTIVATION)) custom_model.add( Conv2D(CONV_LAYER_SIZE_BASE, kernel_size=(5, 5), data_format='channels_first')) custom_model.add(BatchNormalization(axis=1)) custom_model.add(get_hidden_layer_activation(HIDDEN_ACTIVATION)) custom_model.add( Conv2D(CONV_LAYER_SIZE_BASE, kernel_size=(3, 3), data_format='channels_first')) custom_model.add(BatchNormalization(axis=1)) custom_model.add(get_hidden_layer_activation(HIDDEN_ACTIVATION)) custom_model.add(Flatten()) custom_model.add(Dense(512)) #custom_model.add(BatchNormalization()) custom_model.add(get_hidden_layer_activation(HIDDEN_ACTIVATION)) custom_model.add(MaxoutDense(NUMBER_OF_POSSIBLE_ACTIONS, nb_feature=4)) custom_model.add(Activation('linear')) return custom_model
def lstm_model(sequence_length, word_embedding_matrix, frame_embedding_matrix,embedding_dim): model_variation = 'LSTM' print('Model variation is %s' % model_variation) model1 = Sequential() model1.add(Embedding(len(vocab)+1, embedding_dim,weights= [word_embedding_matrix], input_length=sequence_length, trainable=False)) #model1.add(Flatten()) model2 = Sequential() model2.add(Embedding(len(frame_vocab)+1, 50,weights= [frame_embedding_matrix], input_length=sequence_length, trainable=True)) #model2.add(Flatten()) model3 = Sequential() model3.add(Merge([model1, model2], mode='concat')) model3.add(Dropout(0.3)) model3.add(Bidirectional(LSTM(150,return_sequences=True))) model3.add(Dropout(0.3)) model3.add(Bidirectional(LSTM(150,return_sequences=True))) model3.add(Dropout(0.3)) #model1.add(Bidirectional(LSTM(150,return_sequences=True))) #model1.add(Dropout(0.3)) #model1.add(Flatten()) #model3.add(AttLayer()) model3.add(Flatten()) model3.add(MaxoutDense(100, W_constraint=maxnorm(2))) model3.add(Dropout(0.5)) model3.add(Dense(6,activity_regularizer=l2(0.0001))) model3.add(Activation('softmax')) model3.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy']) print(model3.summary()) return model3
def __create_mv_decoder(self, name: str, suffix: str, trainable: bool, input, override_activation=None): activation = override_activation if override_activation is not None else self.final_activation( ) if len(input.get_shape()) == 3: # the input we received has a time dimension time_layer = TimeDistributed(MaxoutDense(self.key_counts[name][-1], trainable=trainable, nb_feature=8), name="mv_" + name + suffix)(input) if len(self.key_counts[name]) == 2: # the output also has a time dimension return Activation(activation, name=name + suffix)(time_layer) else: # the output does not have a time dimension. take the mean of each timestep def timestep_mean(x, mask): mask = K.cast(mask, K.floatx()) return K.sum(x * K.expand_dims(mask), axis=1) / K.sum( mask, axis=1, keepdims=True) return Lambda(timestep_mean, output_shape=lambda x: (x[0], x[2]), mask=lambda x, mask: None, name=name + suffix)( Activation(activation)(time_layer)) else: # the input does NOT have a time dimension if len(self.key_counts[name]) == 2: # the output DOES have a time dimension raise ValueError( "You cannot create a magic vector decoder that takes a time-invariant tensor and " "returns a time-sensitive tensor.") else: # neither does the output! return Activation(activation, name=name + suffix)(MaxoutDense( self.key_counts[name][-1], trainable=trainable, name="mv_" + name + suffix, nb_feature=8)(input))
def build_discriminator(self): model = Sequential() model.add(Flatten(input_shape=self.img_shape)) model.add(MaxoutDense(512)) model.add(Dropout(rate=0.1)) # model.add(Dense(512)) model.add(LeakyReLU(alpha=0.2)) model.add(MaxoutDense(256)) model.add(Dropout(rate=0.1)) # model.add(Dense(256)) model.add(LeakyReLU(alpha=0.2)) model.add(Dense(1, activation='sigmoid')) model.summary() img = Input(shape=self.img_shape) validity = model(img) return Model(img, validity)
def __init__(self): self.latent_space_dim = 100 self.batch_size = 100 self.num_classes = 10 gen_noise_input = Input(shape=(self.latent_space_dim,)) gen_noise_dense = Dense(200, activation='relu')(gen_noise_input) gen_label_input = Input(shape=(10,)) gen_label_dense = Dense(1000, activation='relu')(gen_label_input) gen_merged = concatenate([gen_noise_dense, gen_label_dense]) gen_combined_dense1 = Dense(784, activation='tanh')(gen_merged) gen_result = Reshape((28, 28))(gen_combined_dense1) self.Generator = Model(inputs=[gen_noise_input, gen_label_input], outputs=gen_result) dis_img_input = Input(shape=(28, 28)) dis_img_flat = Flatten()(dis_img_input) dis_img_dense = MaxoutDense(240, 5)(dis_img_flat) dis_label_input = Input(shape=(10,)) dis_label_dense = MaxoutDense(50, 5)(dis_label_input) dis_merged = concatenate([dis_img_dense, dis_label_dense]) dis_combined_dense = MaxoutDense(240, 4)(dis_merged) dis_result = Dense(1, activation='sigmoid')(dis_combined_dense) self.Discriminator = Model(inputs=[dis_img_input, dis_label_input], outputs=dis_result) self.Discriminator.compile(loss = "binary_crossentropy", optimizer = Adam(0.0002, 0.5), metrics = ["accuracy"]) self.Discriminator.trainable = False noise = Input(shape=(self.latent_space_dim,)) label = Input(shape=(10,)) img = self.Generator([noise, label]) score = self.Discriminator([img, label]) self.Cascaded_model = Model([noise, label], score) self.Cascaded_model.compile(loss = "binary_crossentropy", optimizer = Adam(0.0002, 0.5))
def Discriminator(self): model = Sequential() model.add(InputLayer(batch_input_shape=(None,128))) model.add(Reshape(target_shape=(1,4,32))) model.add(MaxPooling2D(pool_size=(4,4),strides=(2,2))) #model.add(MaxoutDense(input_dim=128,output_dim=240,nb_feature=5)) #model.add(Activation('tanh')) #model.add(Dense(output_dim=500)) #model.add(Activation('tanh')) model.add(Flatten()) model.add(MaxoutDense(input_dim=128,output_dim=240,nb_feature=5)) model.add(Dense(output_dim=1)) model.add(Activation('sigmoid')) return model
def getModel(embedding_matrix, max_words, max_len, embedding_dim): model = Sequential() model.add(Embedding(max_words, embedding_dim, input_length=max_len)) model.add(GaussianNoise(0.2)) model.add(Dropout(0.2)) model.add(Bidirectional(LSTM(128, return_sequences=True, recurrent_dropout=0.2, implementation=1))) model.add(Attention()) model.add(Dropout(0.3)) model.add(MaxoutDense(100, W_constraint=maxnorm(2))) model.add(Dropout(0.2)) model.add(Dense(1)) model.layers[0].set_weights([embedding_matrix]) model.layers[0].trainable = False return model
def get_GRU_Max_model(embedding_matrix): # embed_size = 128 inp = Input(shape=(maxlen, )) x = Embedding(len(embedding_matrix), embed_size, weights=[embedding_matrix])(inp) x = Bidirectional( GRU(50, return_sequences=True, dropout=0.25, recurrent_dropout=0.25))(x) x = GlobalMaxPool1D()(x) x = Dropout(0.25)(x) x = MaxoutDense(256, nb_feature=3)(x) x = Dropout(0.25)(x) x = Dense(6, activation="sigmoid")(x) model = Model(inputs=inp, outputs=x) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def make_net(self, X): input_size = (None, X["seq"].shape[2], X["seq"].shape[3], X["seq"].shape[4]) sequence_input = Input(shape=input_size, name="sequence_input") convs = Sequential() if self.cnn_layer in [None, "1", "2"]: convs.add(Conv2D(10, kernel_size=(3, 3), activation="relu", input_shape=( X["seq"].shape[2], X["seq"].shape[3], X["seq"].shape[4]))) convs.add(MaxPooling2D((2, 2), strides=(2, 2))) if self.cnn_layer in [None, "2"]: convs.add(Conv2D(20, kernel_size=(3, 3), activation="relu")) convs.add(MaxPooling2D((2, 2), strides=(2, 2))) if self.cnn_layer in [None]: convs.add(Conv2D(40, kernel_size=(3, 3), activation="relu")) convs.add(MaxPooling2D((2, 2), strides=(2, 2))) if self.cnn_layer == "none": convs.add(Flatten(input_shape=( X["seq"].shape[2], X["seq"].shape[3], X["seq"].shape[4]))) else: convs.add(Dropout(0.5)) convs.add(Flatten()) convs.add(MaxoutDense(output_dim=self.states/2,nb_feature=2, input_dim=self.states)) convs.add(Dropout(0.5)) convs.add(Dense(self.states/2, activation="relu", name="features")) convs.add(Dropout(0.5)) x = TimeDistributed(convs)(sequence_input) encoder = LSTM(self.states/2, dropout=0.5, recurrent_dropout=0.0)(x) output = Dense(2, activation="softmax", name="classification")(encoder) model = Model(inputs=sequence_input, outputs=output) adam = Adam(lr=self.learning_rate) model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=self.metrics) return model
def make_custom_model(input_rows, input_cols): custom_model = Sequential() # channels are always first custom_model.add( Conv2D(int(CONV_LAYER_SIZE_BASE / 2), kernel_size=FIRST_CONV_KERNEL_SIZE, input_shape=(WINDOW_LENGTH, input_rows, input_cols), data_format='channels_first')) custom_model.add(get_hidden_layer_activation(HIDDEN_ACTIVATION)) for i in range(NUMBER_OF_INNER_CONVOLUTIONS): custom_model.add( Conv2D(CONV_LAYER_SIZE_BASE, kernel_size=(3, 3), data_format='channels_first')) custom_model.add(get_hidden_layer_activation(HIDDEN_ACTIVATION)) custom_model.add(Flatten()) custom_model.add(Dense(512)) custom_model.add(get_hidden_layer_activation(HIDDEN_ACTIVATION)) custom_model.add(MaxoutDense(NUMBER_OF_POSSIBLE_ACTIONS, nb_feature=4)) custom_model.add(Activation('linear')) return custom_model
def lstm_model(sequence_length, embedding_dim): model_variation = 'LSTM' print('Model variation is %s' % model_variation) model = Sequential() model.add( Embedding(len(vocab) + 1, embedding_dim, input_length=sequence_length, trainable=True)) model.add(Dropout(0.25)) #, input_shape=(sequence_length, embedding_dim))) model.add(Bidirectional(LSTM(150, return_sequences=True))) model.add(Dropout(0.25)) model.add(Bidirectional(LSTM(150, return_sequences=True))) model.add(Dropout(0.25)) model.add(Attention()) model.add(MaxoutDense(100, W_constraint=maxnorm(2))) model.add(Dropout(0.25)) model.add(Dense(2, activity_regularizer=l2(0.0001))) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) print model.summary() return model
def target_RNN(wv, tweet_max_length, aspect_max_length, classes=2, **kwargs): ###################################################### # HyperParameters ###################################################### noise = kwargs.get("noise", 0) trainable = kwargs.get("trainable", False) rnn_size = kwargs.get("rnn_size", 75) rnn_type = kwargs.get("rnn_type", LSTM) final_size = kwargs.get("final_size", 100) final_type = kwargs.get("final_type", "linear") use_final = kwargs.get("use_final", False) drop_text_input = kwargs.get("drop_text_input", 0.) drop_text_rnn = kwargs.get("drop_text_rnn", 0.) drop_text_rnn_U = kwargs.get("drop_text_rnn_U", 0.) drop_target_rnn = kwargs.get("drop_target_rnn", 0.) drop_rep = kwargs.get("drop_rep", 0.) drop_final = kwargs.get("drop_final", 0.) activity_l2 = kwargs.get("activity_l2", 0.) clipnorm = kwargs.get("clipnorm", 5) bi = kwargs.get("bi", False) lr = kwargs.get("lr", 0.001) attention = kwargs.get("attention", "simple") ##################################################### shared_RNN = get_RNN(rnn_type, rnn_size, bi=bi, return_sequences=True, dropout_U=drop_text_rnn_U) input_tweet = Input(shape=[tweet_max_length], dtype='int32') input_aspect = Input(shape=[aspect_max_length], dtype='int32') # Embeddings tweets_emb = embeddings_layer(max_length=tweet_max_length, embeddings=wv, trainable=trainable, masking=True)(input_tweet) tweets_emb = GaussianNoise(noise)(tweets_emb) tweets_emb = Dropout(drop_text_input)(tweets_emb) aspects_emb = embeddings_layer(max_length=aspect_max_length, embeddings=wv, trainable=trainable, masking=True)(input_aspect) aspects_emb = GaussianNoise(noise)(aspects_emb) # Recurrent NN h_tweets = shared_RNN(tweets_emb) h_tweets = Dropout(drop_text_rnn)(h_tweets) h_aspects = shared_RNN(aspects_emb) h_aspects = Dropout(drop_target_rnn)(h_aspects) h_aspects = MeanOverTime()(h_aspects) h_aspects = RepeatVector(tweet_max_length)(h_aspects) # Merge of Aspect + Tweet representation = merge([h_tweets, h_aspects], mode='concat') # apply attention over the hidden outputs of the RNN's att_layer = AttentionWithContext if attention == "context" else Attention representation = att_layer()(representation) representation = Dropout(drop_rep)(representation) if use_final: if final_type == "maxout": representation = MaxoutDense(final_size)(representation) else: representation = Dense(final_size, activation=final_type)(representation) representation = Dropout(drop_final)(representation) ###################################################### # Probabilities ###################################################### probabilities = Dense(1 if classes == 2 else classes, activation="sigmoid" if classes == 2 else "softmax", activity_regularizer=l2(activity_l2))(representation) model = Model(input=[input_aspect, input_tweet], output=probabilities) loss = "binary_crossentropy" if classes == 2 else "categorical_crossentropy" model.compile(optimizer=Adam(clipnorm=clipnorm, lr=lr), loss=loss) return model
infile.close() combinations = [] parameters = [['Adagrad'], [0.05], [700], [700], [800]] for element in itertools.product(*parameters): combinations.append(element) model = Sequential() model.add( MaxoutDense(element[2], nb_feature=3, init='glorot_uniform', weights=None, W_regularizer=None, b_regularizer=None, activity_regularizer=None, W_constraint=None, b_constraint=None, bias=True, input_shape=(len(MatrixFeaturesLargePart[0]), ))) model.add(BatchNormalization()) model.add(Dropout(0.5)) model.add( MaxoutDense(element[3], nb_feature=3, init='glorot_uniform', weights=None, W_regularizer=None, b_regularizer=None, activity_regularizer=None,
def siamese_RNN(wv, sent_length, **params): rnn_size = params.get("rnn_size", 100) rnn_drop_U = params.get("rnn_drop_U", 0.2) noise_words = params.get("noise_words", 0.3) drop_words = params.get("drop_words", 0.2) drop_sent = params.get("drop_sent", 0.3) sent_dense = params.get("sent_dense", 50) final_size = params.get("final_size", 100) drop_final = params.get("drop_final", 0.5) ################################################### # Shared Layers ################################################### embedding = embeddings_layer(max_length=sent_length, embeddings=wv, masking=True) encoder = get_RNN(LSTM, rnn_size, bi=False, return_sequences=True, dropout_U=rnn_drop_U) attention = Attention() sent_dense = Dense(sent_dense, activation="relu") ################################################### # Input A ################################################### input_a = Input(shape=[sent_length], dtype='int32') # embed sentence A emb_a = embedding(input_a) emb_a = GaussianNoise(noise_words)(emb_a) emb_a = Dropout(drop_words)(emb_a) # encode sentence A enc_a = encoder(emb_a) enc_a = Dropout(drop_sent)(enc_a) enc_a = attention(enc_a) enc_a = sent_dense(enc_a) enc_a = Dropout(drop_sent)(enc_a) ################################################### # Input B ################################################### input_b = Input(shape=[sent_length], dtype='int32') # embed sentence B emb_b = embedding(input_b) emb_b = GaussianNoise(noise_words)(emb_b) emb_b = Dropout(drop_words)(emb_b) # encode sentence B enc_b = encoder(emb_b) enc_b = Dropout(drop_sent)(enc_b) enc_b = attention(enc_b) enc_b = sent_dense(enc_b) enc_b = Dropout(drop_sent)(enc_b) ################################################### # Comparison ################################################### comparison = merge([enc_a, enc_b], mode='concat') comparison = MaxoutDense(final_size)(comparison) comparison = Dropout(drop_final)(comparison) probabilities = Dense(1, activation='sigmoid')(comparison) model = Model(input=[input_a, input_b], output=probabilities) model.compile(optimizer=Adam(clipnorm=1., lr=0.001), loss='binary_crossentropy', metrics=["binary_accuracy"]) return model
def aspect_RNN(wv, text_length, target_length, loss, activation, **kwargs): ###################################################### # HyperParameters ###################################################### noise = kwargs.get("noise", 0) trainable = kwargs.get("trainable", False) rnn_size = kwargs.get("rnn_size", 75) rnn_type = kwargs.get("rnn_type", LSTM) final_size = kwargs.get("final_size", 100) final_type = kwargs.get("final_type", "linear") use_final = kwargs.get("use_final", False) drop_text_input = kwargs.get("drop_text_input", 0.) drop_text_rnn = kwargs.get("drop_text_rnn", 0.) drop_text_rnn_U = kwargs.get("drop_text_rnn_U", 0.) drop_target_rnn = kwargs.get("drop_target_rnn", 0.) drop_rep = kwargs.get("drop_rep", 0.) drop_final = kwargs.get("drop_final", 0.) activity_l2 = kwargs.get("activity_l2", 0.) clipnorm = kwargs.get("clipnorm", 5) bi = kwargs.get("bi", False) lr = kwargs.get("lr", 0.001) attention = kwargs.get("attention", "simple") ##################################################### shared_RNN = get_RNN(rnn_type, rnn_size, bi=bi, return_sequences=True, dropout_U=drop_text_rnn_U) # shared_RNN = LSTM(rnn_size, return_sequences=True, dropout_U=drop_text_rnn_U) input_text = Input(shape=[text_length], dtype='int32') input_target = Input(shape=[target_length], dtype='int32') ###################################################### # Embeddings ###################################################### emb_text = embeddings_layer(max_length=text_length, embeddings=wv, trainable=trainable, masking=True)(input_text) emb_text = GaussianNoise(noise)(emb_text) emb_text = Dropout(drop_text_input)(emb_text) emb_target = embeddings_layer(max_length=target_length, embeddings=wv, trainable=trainable, masking=True)(input_target) emb_target = GaussianNoise(noise)(emb_target) ###################################################### # RNN - Tweet ###################################################### enc_text = shared_RNN(emb_text) enc_text = Dropout(drop_text_rnn)(enc_text) ###################################################### # RNN - Aspect ###################################################### enc_target = shared_RNN(emb_target) enc_target = MeanOverTime()(enc_target) enc_target = Dropout(drop_target_rnn)(enc_target) enc_target = RepeatVector(text_length)(enc_target) ###################################################### # Merge of Aspect + Tweet ###################################################### representation = merge([enc_text, enc_target], mode='concat') att_layer = AttentionWithContext if attention == "context" else Attention representation = att_layer()(representation) representation = Dropout(drop_rep)(representation) if use_final: if final_type == "maxout": representation = MaxoutDense(final_size)(representation) else: representation = Dense(final_size, activation=final_type)(representation) representation = Dropout(drop_final)(representation) ###################################################### # Probabilities ###################################################### probabilities = Dense(1, activation=activation, activity_regularizer=l2(activity_l2))(representation) model = Model(input=[input_target, input_text], output=probabilities) # model = Model(input=[input_text, input_target], output=probabilities) model.compile(optimizer=Adam(clipnorm=clipnorm, lr=lr), loss=loss) return model
def catdog(): # 학습 관련 파라메타들 sizeOffm = 4 sizeOfpm = 3 my_maxnorm = 2. model = Sequential() # model.add(Convolution2D(16, 5, 5, border_mode='same', input_shape=(3, ROWS, COLS), kernel_constraint=maxnorm(2), activation='relu')) model.add(Convolution2D(16, sizeOffm, sizeOffm, border_mode='same', input_shape=(3, ROWS, COLS), kernel_constraint=maxnorm(my_maxnorm))) model.add(BatchNormalization()) # model.add(Activation('relu')) model.add(Activation(LeakyReLU(alpha=0.1))) model.add(Convolution2D(16, sizeOffm, sizeOffm, border_mode='same', input_shape=(3, ROWS, COLS), kernel_constraint=maxnorm(my_maxnorm))) model.add(BatchNormalization()) # model.add(Activation('relu')) model.add(Activation(LeakyReLU(alpha=0.1))) model.add(MaxPooling2D(pool_size=(sizeOfpm, sizeOfpm), dim_ordering="th")) model.add(Convolution2D(32, sizeOffm, sizeOffm, border_mode='same', input_shape=(3, ROWS, COLS), kernel_constraint=maxnorm(my_maxnorm))) model.add(BatchNormalization()) # model.add(Activation('relu')) model.add(Activation(LeakyReLU(alpha=0.1))) model.add(Convolution2D(32, sizeOffm, sizeOffm, border_mode='same', input_shape=(3, ROWS, COLS), kernel_constraint=maxnorm(my_maxnorm))) model.add(BatchNormalization()) # model.add(Activation('relu')) model.add(Activation(LeakyReLU(alpha=0.1))) model.add(MaxPooling2D(pool_size=(sizeOfpm, sizeOfpm), dim_ordering="th")) model.add(Convolution2D(64, sizeOffm, sizeOffm, border_mode='same', input_shape=(3, ROWS, COLS), kernel_constraint=maxnorm(my_maxnorm))) model.add(BatchNormalization()) # model.add(Activation('relu')) model.add(Activation(LeakyReLU(alpha=0.1))) model.add(Convolution2D(64, sizeOffm, sizeOffm, border_mode='same', input_shape=(3, ROWS, COLS), kernel_constraint=maxnorm(my_maxnorm))) model.add(BatchNormalization()) # model.add(Activation('relu')) model.add(Activation(LeakyReLU(alpha=0.1))) model.add(MaxPooling2D(pool_size=(sizeOfpm, sizeOfpm), dim_ordering="th")) model.add(Convolution2D(128, sizeOffm, sizeOffm, border_mode='same', input_shape=(3, ROWS, COLS), kernel_constraint=maxnorm(my_maxnorm))) model.add(BatchNormalization()) # model.add(Activation('relu')) model.add(Activation(LeakyReLU(alpha=0.2))) model.add(Convolution2D(128, sizeOffm, sizeOffm, border_mode='same', input_shape=(3, ROWS, COLS), kernel_constraint=maxnorm(my_maxnorm))) model.add(BatchNormalization()) # model.add(Activation('relu')) model.add(Activation(LeakyReLU(alpha=0.2))) model.add(MaxPooling2D(pool_size=(sizeOfpm, sizeOfpm), dim_ordering="th")) model.add(Flatten()) # model.add(Dense(256, kernel_constraint=maxnorm(my_maxnorm), activation='relu')) model.add(MaxoutDense(output_dim=128, nb_feature=8, init='glorot_uniform')) # model.add(Dropout(0.3)) # model.add(Dense(256, kernel_constraint=maxnorm(my_maxnorm), activation='relu')) model.add(MaxoutDense(output_dim=128, nb_feature=8, init='glorot_uniform')) # model.add(Dropout(0.3)) model.add(Dense(1)) model.add(Activation('sigmoid')) # model.compile(loss=objective, optimizer=optimizer, metrics=['accuracy']) model.compile(loss=objective, optimizer=optimizer, metrics=['accuracy']) return model
def main(nb_epoch=1, data_augmentation=True, noise=True, maxout=True, dropout=True, l1_reg=False, l2_reg=True, max_pooling=True, deep=False, noise_sigma=0.01): # l1 and l2 regularization shouldn't be true in the same time if l1_reg and l2_reg: print("No need to run l1 and l2 regularization in the same time") quit() # print settings for this experiment print("number of epoch: {0}".format(nb_epoch)) print("data augmentation: {0}".format(data_augmentation)) print("noise: {0}".format(noise)) print("sigma: {0}".format(sigma)) print("maxout: {0}".format(maxout)) print("dropout: {0}".format(dropout)) print("l1: {0}".format(l1_reg)) print("l2: {0}".format(l2_reg)) print("max_pooling: {0}".format(max_pooling)) print("deep: {0}".format(deep)) # the data, shuffled and split between train and test sets (X_train, y_train), (X_test, y_test) = cifar10.load_data() # split the validation dataset X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.2, random_state=0) # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train, nb_classes) Y_valid = np_utils.to_categorical(y_valid, nb_classes) Y_test = np_utils.to_categorical(y_test, nb_classes) X_train = X_train.astype('float32') X_valid = X_valid.astype('float32') X_test = X_test.astype('float32') X_train /= 255 X_valid /= 255 X_test /= 255 ##### try loading data using data_loader.py #### # data_loader.download_and_extract(data_path, data_url) # class_names = data_loader.load_class_names() # print(class_names) # images_train, cls_train, labels_train = data_loader.load_training_data() # images_test, cls_test, labels_test = data_loader.load_test_data() # X_train, Y_train = images_train, labels_train # X_test, Y_test = images_test, labels_test # X_train, X_valid, Y_train, Y_valid = train_test_split(X_train, Y_train, test_size=0.2, random_state=0) print("Size of:") print("- Training-set:\t\t{}".format(len(X_train))) print("- Validation-set:\t\t{}".format(len(X_valid))) print("- Test-set:\t\t{}".format(len(X_test))) model = Sequential() if noise: model.add( GaussianNoise(noise_sigma, input_shape=(img_channels, img_rows, img_cols))) model.add( Convolution2D(32, 3, 3, border_mode='same', input_shape=(img_channels, img_rows, img_cols))) model.add(Activation('relu')) model.add(Convolution2D(32, 3, 3)) model.add(Activation('relu')) if max_pooling: model.add(MaxPooling2D(pool_size=(2, 2))) if dropout: model.add(Dropout(0.25)) if max_pooling: model.add(MaxPooling2D(pool_size=(2, 2))) if dropout: model.add(Dropout(0.25)) model.add(Flatten()) if maxout: model.add(MaxoutDense(512, nb_feature=4, init='glorot_uniform')) else: if not (l1_reg or l2_reg): model.add(Dense(512)) # activation regularization not implemented yet if l1_reg: model.add(Dense(512, W_regularizer=l1(l1_weight))) elif l2_reg: model.add(Dense(512, W_regularizer=l2(l2_weight))) model.add(Activation('relu')) if dropout: model.add(Dropout(0.5)) if deep: model.add(Dense(512)) model.add(Dense(512)) model.add(Dense(512)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) # let's train the model using SGD + momentum (how original). sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) start_time = time.time() if not data_augmentation: his = model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, validation_data=(X_valid, Y_valid), shuffle=True) else: # this will do preprocessing and realtime data augmentation datagen = ImageDataGenerator( featurewise_center=False, # set input mean to 0 over the dataset samplewise_center=False, # set each sample mean to 0 featurewise_std_normalization= False, # divide inputs by std of the dataset samplewise_std_normalization=False, # divide each input by its std zca_whitening=True, # apply ZCA whitening rotation_range= 0, # randomly rotate images in the range (degrees, 0 to 180) width_shift_range= 0.1, # randomly shift images horizontally (fraction of total width) height_shift_range= 0.1, # randomly shift images vertically (fraction of total height) horizontal_flip=True, # randomly flip images vertical_flip=False) # randomly flip images # compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied) datagen.fit(X_train) # fit the model on the batches generated by datagen.flow() his = model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size), samples_per_epoch=X_train.shape[0], nb_epoch=nb_epoch, validation_data=(X_valid, Y_valid)) # evaluate our model score = model.evaluate(X_test, Y_test, verbose=0) print('Test score:', score[0]) print('Test accuracy:', score[1]) print('training time', time.time() - start_time) file_path = os.path.join(output_path, output_directory) print("outputs should be store at %s" % file_path) # Check if the file already exists. # If it exists then we assume it has also been extracted, # otherwise we need to download and extract it now. if not os.path.exists(file_path): print("creat output directory fro storing output") # Check if the download directory exists, otherwise create it. os.makedirs(file_path) # wirte test accuracy to a file output_file_name = os.path.join( file_path, 'train_val_loss_with_dropout_epochs_{0}_data_augmentation_{1}_noise_{2}_sigma{12}_maxout_{3}_dropout_{4}_l1_{5}_l2_{6}_sigma_{7}_l1weight_{8}_l2weight_{9}_maxout_{10}_deep_{11}.txt' .format(nb_epoch, data_augmentation, noise, maxout, dropout, l1_reg, l2_reg, sigma, l1_weight, l2_weight, max_pooling, deep, sigma)) print("save file at {}".format(output_file_name)) with open(output_file_name, "w") as text_file: text_file.write('Test score: {}\n'.format(score[0])) text_file.write('Test accuracy: {}\n'.format(score[1])) text_file.write('Training time: {}\n'.format(time.time() - start_time)) text_file.close() # visualize training history train_loss = his.history['loss'] val_loss = his.history['val_loss'] plt.plot(range(1, len(train_loss) + 1), train_loss, color='blue', label='train loss') plt.plot(range(1, len(val_loss) + 1), val_loss, color='red', label='val loss') plt.legend(loc="upper left", bbox_to_anchor=(1, 1)) plt.xlabel('#epoch') plt.ylabel('loss') output_fig_name = os.path.join( file_path, 'train_val_loss_with_dropout_epochs_{0}_data_augmentation_{1}_noise_{2}_sigma{12}_maxout_{3}_dropout_{4}_l1_{5}_l2_{6}_sigma_{7}_l1weight_{8}_l2weight_{9}_maxout_{10}_deep_{11}.png' .format(nb_epoch, data_augmentation, noise, maxout, dropout, l1_reg, l2_reg, sigma, l1_weight, l2_weight, max_pooling, deep, sigma)) plt.savefig(output_fig_name, dpi=300) plt.show()
#### Training... print 'Building Keras models...' from keras.layers import GRU, Highway, Dense, Dropout, MaxoutDense, Activation, Masking from keras.models import Sequential from keras.callbacks import EarlyStopping, ModelCheckpoint from keras.layers.convolutional import Convolution1D, MaxPooling1D from keras.models import Sequential from keras.legacy.models import Graph model = Sequential() #model.add(Dense(25, input_dim=16)) #model.add(Masking(mask_value=-999, input_shape=(40, 2))) model.add(GRU(25, input_shape=(1,16), dropout_W = 0.05)) # remove Maxout for tensorflow model.add(MaxoutDense(64, 5)) #, input_shape=graph.nodes['dropout'].output_shape[1:])) #model.add(Dense(64, activation='relu')) model.add(Dropout(0.4)) #model.add(Highway(activation = 'relu')) #model.add(Dropout(0.3)) model.add(Dense(2)) model.add(Activation('softmax')) print('Compiling model...') #adam = Adam(lr=1e-4) model.compile(optimizer = 'Adam', loss = 'categorical_crossentropy', metrics=['accuracy'])