def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) # bnorm_1 = BatchNormalization(axis=-1)(inputs) x = Bidirectional(CuDNNLSTM(96, name='blstm1', return_sequences=True), merge_mode='concat')(inputs) # activation_1 = Activation('tanh')(lstm_1) x = SpatialDropout1D(0.1)(x) x = Attention(8, 16)([x, x, x]) x1 = GlobalMaxPool1D()(x) x2 = GlobalAvgPool1D()(x) x = Concatenate(axis=-1)([x1, x2]) x = Dense(units=128, activation='elu')(x) x = Dense(units=64, activation='elu')(x) x = Dropout(rate=0.4)(x) outputs = Dense(units=num_classes, activation='softmax')(x) model = TFModel(inputs=inputs, outputs=outputs) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0002, amsgrad=True) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def _cnn_maxpool_multifilter(self, name: str) -> Model: """https://richliao.github.io/supervised/classification/2016/11/26/textclassifier-convolutional/ """ convs = [] filter_sizes = [3, 4, 5] _inputs = Input((self.maxlen, ), name='input') l_embed = Embedding(input_dim=self.input_dim, output_dim=self.embed_dim, input_length=self.maxlen, name='embedding')(_inputs) for fsz in filter_sizes: l_conv = Conv1D(filters=self.conv_filters, kernel_size=fsz, activation='relu')(l_embed) l_pool = MaxPool1D(self.conv_pool_size)(l_conv) convs.append(l_pool) l_merge = Concatenate(axis=1)(convs) l_cov1 = Conv1D(filters=self.conv_filters, kernel_size=self.conv_kernel_size, activation='relu')(l_merge) l_pool1 = MaxPool1D(pool_size=self.conv_pool_size)(l_cov1) l_cov2 = Conv1D(filters=self.conv_filters, kernel_size=self.conv_kernel_size, activation='relu')(l_pool1) l_pool2 = GlobalMaxPool1D()(l_cov2) l_flat = Flatten()(l_pool2) l_dense = Dense(self.units, activation='relu')(l_flat) _preds = Dense(self.classes, activation='sigmoid', name='fc1')(l_dense) return Model(inputs=_inputs, outputs=_preds, name=name)
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) # bnorm_1 = BatchNormalization(axis=2)(inputs) lstm_1 = Bidirectional(CuDNNLSTM(64, name='blstm_1', return_sequences=True), merge_mode='concat')(inputs) activation_1 = Activation('tanh')(lstm_1) dropout1 = SpatialDropout1D(0.5)(activation_1) attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) pool_1 = GlobalMaxPool1D()(attention_1) dropout2 = Dropout(rate=0.5)(pool_1) dense_1 = Dense(units=256, activation='relu')(dropout2) outputs = Dense(units=num_classes, activation='softmax')(dense_1) model = TFModel(inputs=inputs, outputs=outputs) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0002, amsgrad=True) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def _cnn_maxpool(self, name: str) -> Model: """https://richliao.github.io/supervised/classification/2016/11/26/textclassifier-convolutional/ """ return Sequential([ InputLayer(input_shape=(self.maxlen, ), name='input'), Embedding(input_dim=self.input_dim, output_dim=self.embed_dim, input_length=self.maxlen, name='embedding'), Conv1D(filters=self.conv_filters, kernel_size=self.conv_kernel_size, activation='relu'), MaxPool1D(pool_size=self.conv_pool_size), Conv1D(filters=self.conv_filters, kernel_size=self.conv_kernel_size, activation='relu'), MaxPool1D(pool_size=self.conv_pool_size), Conv1D(filters=self.conv_filters, kernel_size=self.conv_kernel_size, activation='relu'), GlobalMaxPool1D(), Flatten(), Dense(self.units, activation='relu'), Dense(self.classes, activation='sigmoid', name='fc1'), ], name=name)
def _train_CNN_Glove(self, X_train, y_train, epochs=5, batch_size=64, learning_rate=0.001, regularization=0.01): """ Trains CNN - X_train: Input sequence - y_train: Target sequence - epochs - batch_size - learning_rate = Adam optimizer's learning rate - reg: Regularization Returns : - history: Scalar loss """ flatten_y = [category for sublist in y_train for category in sublist] class_weights = class_weight.compute_class_weight( 'balanced', np.unique(flatten_y), flatten_y) optim = tf.keras.optimizers.Adam(learning_rate=learning_rate) embedding_matrix = self.create_embedding_matrix() model = models.Sequential() model.add( Embedding( input_dim=self.max_word_count, output_dim=100, embeddings_initializer=initializers.Constant(embedding_matrix), input_length=self.max_sequence_len, trainable=False)) model.add( Conv1D(filters=300, kernel_size=3, padding='valid', activation='relu', strides=1)) model.add(GlobalMaxPool1D()) model.add(Dense(8, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer=optim, metrics=[BinaryAccuracy()]) history = model.fit(X_train, y_train, class_weight=class_weight, epochs=epochs, batch_size=batch_size, validation_split=0.25, verbose=self.verbose, callbacks=[ EarlyStopping(monitor='val_loss', patience=3, min_delta=0.0001) ]) self.model = model self.history = history.history
def create_model(self): # Declaration for KimCNN-based encoder encoder_input = Input(shape=(self.model_config.max_sequence_len, ), dtype='int32') embedding_layer = Embedding( len(self.tokenizer.word_index) + 1, self.model_config.word_embedding_dim, weights=[self.embedding_map], input_length=self.model_config.max_sequence_len, trainable=False) embedded_sequences = embedding_layer(encoder_input) l_conv1 = Conv1D(100, 3, activation='relu', padding='same')(embedded_sequences) l_pool1 = GlobalMaxPool1D()(l_conv1) l_conv2 = Conv1D(100, 4, activation='relu', padding='same')(embedded_sequences) l_pool2 = GlobalMaxPool1D()(l_conv2) l_conv3 = Conv1D(100, 5, activation='relu', padding='same')(embedded_sequences) l_pool3 = GlobalMaxPool1D()(l_conv3) l_concat1 = Concatenate()([l_pool1, l_pool2, l_pool3]) encoder = Model(encoder_input, l_concat1) # Similarity classifier using the KimCNN-based encoder sequence_input1 = Input(shape=(self.model_config.max_sequence_len, ), dtype='int32') sequence_input2 = Input(shape=(self.model_config.max_sequence_len, ), dtype='int32') l_concat2 = Concatenate()( [encoder(sequence_input1), encoder(sequence_input2)]) l_dense1 = Dense(self.model_config.hidden_dim, activation='relu')(l_concat2) l_dropout1 = Dropout(self.model_config.dropout)(l_dense1) preds = Dense(self.model_config.num_classes, activation='softmax')(l_dropout1) self.model = Model([sequence_input1, sequence_input2], preds) self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) self.model.summary()
def get_model(self): input = Input(self.maxlen) # Embedding part can try multichannel as same as origin paper embedding = self.embedding_layer(input) convs = [] for kernel_size in self.kernel_size_list: c = Conv1D(128, kernel_size, activation='relu')(embedding) # 卷积 # c = Dropout(0.5)(c) p = GlobalMaxPool1D()(c) # 池化 # p = GlobalAvgPool1D()(c) convs.append(p) x = Concatenate()(convs) output = Dense(self.num_class, activation=self.last_activation)(x) model = Model(inputs=input, outputs=output) return model
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) sequence_len = input_shape[0] lstm_units_array = np.array([32, 64, 128, 256, 512]) lstm_units = lstm_units_array[np.argmin( np.abs(lstm_units_array - sequence_len))] lstm_1 = CuDNNLSTM(lstm_units, return_sequences=True)(inputs) activation_1 = Activation('tanh')(lstm_1) if num_classes >= 20: if num_classes < 30: dropout1 = SpatialDropout1D(0.5)(activation_1) attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) else: attention_1 = Attention( 8, 16)([activation_1, activation_1, activation_1]) k_num = 10 kmaxpool_l = Lambda(lambda x: tf.reshape(tf.nn.top_k( tf.transpose(x, [0, 2, 1]), k=k_num, sorted=True)[0], shape=[-1, k_num, 128]))( attention_1) flatten = Flatten()(kmaxpool_l) dropout2 = Dropout(rate=0.5)(flatten) else: dropout1 = SpatialDropout1D(0.5)(activation_1) attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) pool_l = GlobalMaxPool1D()(attention_1) dropout2 = Dropout(rate=0.5)(pool_l) dense_1 = Dense(units=256, activation='relu')(dropout2) # dense_1 = Dense(units=256, activation='softplus',kernel_regularizer=regularizers.l2(0.01), # activity_regularizer=regularizers.l1(0.01))(dropout2) #dense_1 = DropConnect(Dense(units=256, activation='softplus'), prob=0.5)(dropout2) outputs = Dense(units=num_classes, activation='softmax')(dense_1) loss_fun = CategoricalCrossentropy(label_smoothing=0.2) model = TFModel(inputs=inputs, outputs=outputs) optimizer = optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004) model.compile( optimizer=optimizer, loss=loss_fun, #loss="sparse_categorical_crossentropy", metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) lstm_1 = CuDNNLSTM(128, return_sequences=True)(inputs) activation_1 = Activation('tanh')(lstm_1) if num_classes >= 20: if num_classes < 30: dropout1 = SpatialDropout1D(0.5)(activation_1) attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) # no dropout to get more infomation for classifying a large number # classes else: attention_1 = Attention(8, 16)( [activation_1, activation_1, activation_1]) k_num = 10 kmaxpool_l = Lambda( lambda x: tf.reshape(tf.nn.top_k(tf.transpose(x, [0, 2, 1]), k=k_num, sorted=True)[0], shape=[-1, k_num, 128]))(attention_1) flatten = Flatten()(kmaxpool_l) dropout2 = Dropout(rate=0.5)(flatten) else: dropout1 = SpatialDropout1D(0.5)(activation_1) attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) pool_l = GlobalMaxPool1D()(attention_1) dropout2 = Dropout(rate=0.5)(pool_l) dense_1 = Dense(units=256, activation='softplus')(dropout2) outputs = Dense(units=num_classes, activation='softmax')(dense_1) model = TFModel(inputs=inputs, outputs=outputs) optimizer = optimizers.Nadam( lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004) model.compile( optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def init_model(self, input_shape, num_classes, **kwargs): inputs = Input(shape=input_shape) # bnorm_1 = BatchNormalization(axis=2)(inputs) sequence_len = input_shape[0] lstm_units_array = np.array([32, 64, 128, 256, 512]) lstm_units = lstm_units_array[np.argmin(np.abs(lstm_units_array-sequence_len))] lstm_1 = Bidirectional(CuDNNLSTM(lstm_units, name='blstm_1', return_sequences=True), merge_mode='concat')(inputs) activation_1 = Activation('tanh')(lstm_1) dropout1 = SpatialDropout1D(0.5)(activation_1) if lstm_units <=128: attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) else: attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) pool_1 = GlobalMaxPool1D()(attention_1) dropout2 = Dropout(rate=0.5)(pool_1) dense_1 = Dense(units=256, activation='relu')(dropout2) # dense_1 = Dense(units=256, activation='relu',kernel_regularizer=regularizers.l2(0.01), # activity_regularizer=regularizers.l1(0.01))(dropout2) #dense_1 = DropConnect(Dense(units=256, activation='relu'), prob=0.5)(dropout2) outputs = Dense(units=num_classes, activation='softmax')(dense_1) model = TFModel(inputs=inputs, outputs=outputs) loss_fun = CategoricalCrossentropy(label_smoothing=0.2) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0002, amsgrad=True) model.compile( optimizer=optimizer, loss=loss_fun, #loss="sparse_categorical_crossentropy", metrics=['accuracy']) model.summary() self._model = model self.is_init = True
def __generate_base_model(self) -> Model: sequence_input = Input(shape=(3000, 1)) # twice convolutional layer sequence = Convolution1D(filters=32, kernel_size=self.__kernel_size, padding=self.__padding_valid, activation=activations.relu)(sequence_input) sequence = Convolution1D(filters=32, kernel_size=self.__kernel_size, padding=self.__padding_valid, activation=activations.relu)(sequence) for filters in [32, 32, 256]: # max pool and dropout sequence = MaxPool1D(pool_size=self.__pool_size, padding=self.__padding_valid)(sequence) sequence = SpatialDropout1D(rate=self.__dropout_rate)(sequence) # twice convolutional layer again sequence = Convolution1D(filters=filters, kernel_size=self.__kernel_size, padding=self.__padding_valid, activation=activations.relu)(sequence) sequence = Convolution1D(filters=filters, kernel_size=self.__kernel_size, padding=self.__padding_valid, activation=activations.relu)(sequence) # finale block sequence = GlobalMaxPool1D()(sequence) sequence = Dropout(rate=self.__dropout_rate)(sequence) sequence = Dense(units=64, activation=activations.relu)(sequence) # last dropout and model generation model = models.Model( inputs=sequence_input, outputs=Dropout(rate=self.__dropout_rate)(sequence)) # compile model model.compile(optimizer=optimizers.Adam(), loss=losses.sparse_categorical_crossentropy, metrics=self.__metrics) return model
def create_model(self): # Declaration for KimCNN-based word encoder word_encoder_input = Input(shape=(self.model_config.max_word_len,), dtype='int32') word_embedding_layer = Embedding(len(self.word_tokenizer.word_index) + 1, self.model_config.word_embedding_dim, weights=[self.word_embedding_map], input_length=self.model_config.max_word_len, trainable=False) embedded_word_sequences = word_embedding_layer(word_encoder_input) w_conv1 = Conv1D(100, 3, activation='relu', padding='same')(embedded_word_sequences) w_pool1 = GlobalMaxPool1D()(w_conv1) w_conv2 = Conv1D(100, 4, activation='relu', padding='same')(embedded_word_sequences) w_pool2 = GlobalMaxPool1D()(w_conv2) w_conv3 = Conv1D(100, 5, activation='relu', padding='same')(embedded_word_sequences) w_pool3 = GlobalMaxPool1D()(w_conv3) w_concat1 = Concatenate()([w_pool1, w_pool2, w_pool3]) word_encoder = Model(word_encoder_input, w_concat1) # Declaration for KimCNN-based code encoder code_encoder_input = Input(shape=(self.model_config.max_code_len,), dtype='int32') code_embedding_layer = Embedding(len(self.code_tokenizer.word_index) + 1, self.model_config.code_embedding_dim, weights=[self.code_embedding_map], input_length=self.model_config.max_code_len, trainable=False) embedded_code_sequences = code_embedding_layer(code_encoder_input) c_conv1 = Conv1D(100, 3, activation='relu', padding='same')(embedded_code_sequences) c_pool1 = GlobalMaxPool1D()(c_conv1) c_conv2 = Conv1D(100, 4, activation='relu', padding='same')(embedded_code_sequences) c_pool2 = GlobalMaxPool1D()(c_conv2) c_conv3 = Conv1D(100, 5, activation='relu', padding='same')(embedded_code_sequences) c_pool3 = GlobalMaxPool1D()(c_conv3) c_concat1 = Concatenate()([c_pool1, c_pool2, c_pool3]) code_encoder = Model(code_encoder_input, c_concat1) # Similarity classifier using the word and code encoders word_input1 = Input(shape=(self.model_config.max_word_len,), dtype='int32') word_input2 = Input(shape=(self.model_config.max_word_len,), dtype='int32') code_input1 = Input(shape=(self.model_config.max_code_len,), dtype='int32') code_input2 = Input(shape=(self.model_config.max_code_len,), dtype='int32') l_concat1 = Concatenate()([word_encoder(word_input1), word_encoder(word_input2), code_encoder(code_input1), code_encoder(code_input2)]) l_dense1 = Dense(self.model_config.hidden_dim, activation='relu')(l_concat1) l_dropout1 = Dropout(self.model_config.dropout)(l_dense1) l_dense2 = Dense(self.model_config.hidden_dim, activation='relu')(l_dropout1) l_dropout2 = Dropout(self.model_config.dropout)(l_dense2) preds = Dense(self.model_config.num_classes, activation='softmax')(l_dropout2) self.model = Model([word_input1, word_input2, code_input1, code_input2], preds) self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) self.model.summary()
def init_model(self, input_shape, num_classes, **kwargs): freq_axis = 2 channel_axis = 3 channel_size = 128 min_size = min(input_shape[:2]) inputs = Input(shape=input_shape) # x = ZeroPadding2D(padding=(0, 37))(melgram_input) # x = BatchNormalization(axis=freq_axis, name='bn_0_freq')(x) x = Reshape((input_shape[0], input_shape[1], 1))(inputs) # Conv block 1 x = Convolution2D(64, 3, 1, padding='same', name='conv1')(x) x = BatchNormalization(axis=channel_axis, name='bn1')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(x) x = Dropout(0.1, name='dropout1')(x) # Conv block 2 x = Convolution2D(channel_size, 3, 1, padding='same', name='conv2')(x) x = BatchNormalization(axis=channel_axis, name='bn2')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(3, 3), strides=(3, 3), name='pool2')(x) x = Dropout(0.1, name='dropout2')(x) # Conv block 3 x = Convolution2D(channel_size, 3, 1, padding='same', name='conv3')(x) x = BatchNormalization(axis=channel_axis, name='bn3')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool3')(x) x = Dropout(0.1, name='dropout3')(x) if min_size // 24 >= 4: # Conv block 4 x = Convolution2D(channel_size, 3, 1, padding='same', name='conv4')(x) x = BatchNormalization(axis=channel_axis, name='bn4')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool4')(x) x = Dropout(0.1, name='dropout4')(x) x = Reshape((-1, channel_size))(x) avg = GlobalAvgPool1D()(x) max = GlobalMaxPool1D()(x) x = concatenate([avg, max], axis=-1) # x = Dense(max(int(num_classes*1.5), 128), activation='relu', name='dense1')(x) x = Dropout(0.3)(x) outputs1 = Dense(num_classes, activation='softmax', name='output')(x) # bnorm_1 = BatchNormalization(axis=2)(inputs) lstm_1 = Bidirectional(CuDNNLSTM(64, name='blstm_1', return_sequences=True), merge_mode='concat')(inputs) activation_1 = Activation('tanh')(lstm_1) dropout1 = SpatialDropout1D(0.5)(activation_1) attention_1 = Attention(8, 16)([dropout1, dropout1, dropout1]) pool_1 = GlobalMaxPool1D()(attention_1) dropout2 = Dropout(rate=0.5)(pool_1) dense_1 = Dense(units=256, activation='relu')(dropout2) outputs2 = Dense(units=num_classes, activation='softmax')(dense_1) outputs = Average()([outputs1, outputs2]) model = TFModel(inputs=inputs, outputs=outputs) optimizer = optimizers.Adam( # learning_rate=1e-3, lr=1e-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0002, amsgrad=True) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() self._model = model self.is_init = True
3, [ filter_num_1d * (2**i), filter_num_1d * (2**i), (filter_num_1d * 4) * (2**i) ], stage=(i + 1), block='a') x_1d = identity_block_1d(x_1d, 3, [ filter_num_1d * (2**i), filter_num_1d * (2**i), (filter_num_1d * 4) * (2**i) ], stage=(i + 1), block='b') x_1d = Conv1D(32, (1))(x_1d) #128 x_branch_1_1d = GlobalAveragePooling1D()(x_1d) x_branch_2_1d = GlobalMaxPool1D()(x_1d) x_1d = concatenate([x_branch_1_1d, x_branch_2_1d]) x_1d = Dense(1024, activation='relu')(x_1d) #x_1d = Dropout(0.2)(x_1d) x_in = Input(shape=(257, 98, 2)) x = BatchNormalization()(x_in) if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 x = Conv2D(16, (3, 3), strides=(1, 1), padding='same', name='conv1')(x) x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) x = Activation('relu')(x) x = MaxPooling2D((2, 2), padding='same')(x)
def train(self, texts: List[str], target: List[int]) -> None: from tensorflow.python.keras.models import Model #type: ignore from tensorflow.python.keras.layers import Input, Embedding, GRU, Dense, Bidirectional, GlobalMaxPool1D, concatenate #type: ignore from tensorflow.keras.optimizers import Adam #type: ignore from tensorflow.keras.callbacks import History #type: ignore if self.downsampling: texts, target = downsample(texts, target, self.downsampling_ratio) if self.verbose: print('1. Vectorizing texts') NUMBER_OF_FEATURES: int = 20000 self.tokenizer = text.Tokenizer(num_words=NUMBER_OF_FEATURES) self.tokenizer.fit_on_texts(texts) vocabulary: Dict[str, int] = self.tokenizer.word_index if self._max_sequence_length == 0: self._max_sequence_length = len(max(texts, key=len)) vectorized_texts: array = self.vectorize_texts(texts) if self.include_casing_information: casing_information: array = self.texts_to_casing_information(texts) if self.embedding_location == '': if self.verbose: print('2. Skip (no embeddings)') print('3. Skip (no embeddings)') else: if self.verbose: print('2. Loading word embeddings') embedding_dictionary: Dict[ str, List[float]] = load_embedding_dictionary( self.embedding_location) nr_of_embedding_features: int = len( list(embedding_dictionary.values()) [1]) # Check how many values we have for the first word if self.verbose: print('3. Creating embedding matrix') embedding_matrix: array = create_embedding_matrix_for_vocabulary( embedding_dictionary, vocabulary) if self.verbose: print('4. Building up model') #Define a simple BiGru model with a pretrained embedding layer word_input: Input = Input(shape=(self._max_sequence_length, )) if self.embedding_location == '': #Add an empty embedding layer if we have no pretrained embeddings EMPTY_EMBEDDING_LAYER_SIZE: int = 300 layers = Embedding( len(vocabulary) + 1, EMPTY_EMBEDDING_LAYER_SIZE)(word_input) else: layers = Embedding(input_dim=len(vocabulary) + 1, output_dim=nr_of_embedding_features, input_length=vectorized_texts.shape[1], weights=[embedding_matrix], trainable=False)(word_input) #Add a separate 'entrance' for the casing information if self.include_casing_information: word_model: Model = Model(inputs=word_input, outputs=layers) casing_input: Input = Input(shape=(self._max_sequence_length, 1)) casing_model: Model = Model(inputs=casing_input, outputs=casing_input) layers = concatenate([word_model.output, casing_model.output]) if self.bidirectional: layers = Bidirectional( GRU(16, activation='tanh', return_sequences=True))(layers) layers = Bidirectional( GRU(16, activation='tanh', return_sequences=True))(layers) else: layers = GRU(16, activation='tanh', return_sequences=True)(layers) layers = GRU(16, activation='tanh', return_sequences=True)(layers) layers = GlobalMaxPool1D()(layers) layers = Dense(256)(layers) layers = Dense(256)(layers) layers = Dense(1, activation='sigmoid')(layers) if self.include_casing_information: model: Model = Model([word_model.input, casing_model.input], layers) else: model: Model = Model(word_input, layers) #Compile the model optimizer: Adam = Adam(lr=self.learning_rate) model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['acc']) if self.verbose: print('5. training the model') if self.include_casing_information: input = [vectorized_texts, casing_information] else: input = vectorized_texts history: History = model.fit( input, target, epochs=self.learning_epochs, #validation_data=(test_vectors, test_target), verbose=1, # Logs once per epoch. batch_size=self.learning_batch_size) self.model = model
def train(self, texts: List[str], target: List[int]) -> None: from tensorflow.python.keras.models import Sequential #type: ignore from tensorflow.python.keras.layers import Embedding, Dense, LSTM, GlobalMaxPool1D #type: ignore from tensorflow.keras.optimizers import Adam #type: ignore from tensorflow.keras.callbacks import History #type: ignore if self.downsampling: texts, target = downsample(texts, target, self.downsampling_ratio) if self.verbose: print('1. Vectorizing texts') NUMBER_OF_FEATURES: int = 20000 self.tokenizer = text.Tokenizer(num_words=NUMBER_OF_FEATURES) self.tokenizer.fit_on_texts(texts) vocabulary: Dict[str, int] = self.tokenizer.word_index if self._max_sequence_length == 0: self._max_sequence_length = len(max(texts, key=len)) vectorized_texts: array = self.vectorize_texts(texts) if self.embedding_location == '': if self.verbose: print('2. Skip (no embeddings)') print('3. Skip (no embeddings)') else: if self.verbose: print('2. Loading word embeddings') embedding_dictionary: Dict[ str, List[float]] = load_embedding_dictionary( self.embedding_location) nr_of_embedding_features: int = len( list(embedding_dictionary.values()) [1]) # Check how many values we have for the first word if self.verbose: print('3. Creating embedding matrix') embedding_matrix: array = create_embedding_matrix_for_vocabulary( embedding_dictionary, vocabulary) if self.verbose: print('4. Building up model') #Define a simple LSTM model with a pretrained embedding layer model: Sequential = Sequential() if self.embedding_location == '': #Add an empty embedding layer if we have no pretrained embeddings EMPTY_EMBEDDING_LAYER_SIZE: int = 300 model.add( Embedding(len(vocabulary) + 1, EMPTY_EMBEDDING_LAYER_SIZE)) else: model.add( Embedding(input_dim=len(vocabulary) + 1, output_dim=nr_of_embedding_features, input_length=vectorized_texts.shape[1], weights=[embedding_matrix], trainable=False)) model.add(LSTM(16, return_sequences=True)) model.add(LSTM(16, return_sequences=True)) model.add(LSTM(16, return_sequences=True)) model.add(GlobalMaxPool1D()) model.add(Dense(256)) model.add(Dense(256)) model.add(Dense(1, activation='sigmoid')) #Compile the model optimizer: Adam = Adam(lr=self.learning_rate) model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['acc']) if self.verbose: print('5. training the model') history: History = model.fit( vectorized_texts, target, epochs=self.learning_epochs, #validation_data=(test_vectors, test_target), verbose=1, # Logs once per epoch. batch_size=self.learning_batch_size) self.model = model
Embedding(input_dim=len(word_index) + 1, output_dim=EMBEDDING_FEATURES, input_length=train_vectors.shape[1], weights=[embedding_matrix], trainable=False)) else: model.add(Embedding(len(word_index) + 1, 200)) model.add(Bidirectional(GRU(16, activation='tanh', return_sequences=True))) if DROPOUT_LAYERS > 0: model.add(Dropout(0.15)) model.add(Bidirectional(GRU(16, activation='tanh', return_sequences=True))) model.add(GlobalMaxPool1D()) if DROPOUT_LAYERS > 1: model.add(Dropout(0.15)) model.add(Dense(256)) model.add(Dense(256)) model.add(Dense(OUTPUT_UNITS, activation=OUTPUT_ACTIVATION)) print('compiling the model') optimizer = Adam(lr=LEARNING_RATE) model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['acc', precision, recall])
def init_model(self, input_shape, num_classes, **kwargs): freq_axis = 2 channel_axis = 3 channel_size = 128 min_size = min(input_shape[:2]) melgram_input = Input(shape=input_shape) # x = ZeroPadding2D(padding=(0, 37))(melgram_input) # x = BatchNormalization(axis=freq_axis, name='bn_0_freq')(x) x = Reshape((input_shape[0], input_shape[1], 1))(melgram_input) # Conv block 1 x = Convolution2D(64, 3, 1, padding='same', name='conv1')(x) x = BatchNormalization(axis=channel_axis, name='bn1')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(x) x = Dropout(0.1, name='dropout1')(x) # Conv block 2 x = Convolution2D(channel_size, 3, 1, padding='same', name='conv2')(x) x = BatchNormalization(axis=channel_axis, name='bn2')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(3, 3), strides=(3, 3), name='pool2')(x) x = Dropout(0.1, name='dropout2')(x) # Conv block 3 x = Convolution2D(channel_size, 3, 1, padding='same', name='conv3')(x) x = BatchNormalization(axis=channel_axis, name='bn3')(x) x = ELU()(x) x = MaxPooling2D(pool_size=(3, min_size / 6), strides=(3, min_size / 6), name='pool3')(x) x = Dropout(0.1, name='dropout3')(x) # if min_size // 24 >= 4: # # Conv block 4 # x = Convolution2D( # channel_size, # 3, # 1, # padding='same', # name='conv4')(x) # x = BatchNormalization(axis=channel_axis, name='bn4')(x) # x = ELU()(x) # x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool4')(x) # x = Dropout(0.1, name='dropout4')(x) x = Reshape((-1, channel_size))(x) avg = GlobalAvgPool1D()(x) max = GlobalMaxPool1D()(x) x = concatenate([avg, max], axis=-1) # x = Dense(max(int(num_classes*1.5), 128), activation='relu', name='dense1')(x) x = Dropout(0.3)(x) outputs = Dense(num_classes, activation='softmax', name='output')(x) model = TFModel(inputs=melgram_input, outputs=outputs) optimizer = optimizers.Nadam(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004) model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() self._model = model self.is_init = True
print(emb_mean, emb_std) word_index = tokenizer.word_index nb_words = min(max_features, len(word_index)) embedding_matrix = np.random.normal(emb_mean, emb_std, (nb_words, embed_size)) for word, i in word_index.items(): if i >= max_features: continue embedding_vector = embeddings_index.get(word) if embedding_vector is not None: embedding_matrix[i] = embedding_vector # model inp = Input(shape=(maxlen, )) x = Embedding(max_features, embed_size, weights=[embedding_matrix])(inp) x = Bidirectional( LSTM(50, return_sequences=True, dropout=0.1, recurrent_dropout=0.1))(x) x = GlobalMaxPool1D()(x) x = Dense(50, activation="relu")(x) x = Dropout(0.1)(x) x = Dense(6, activation="sigmoid")(x) model = Model(inputs=inp, outputs=x) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(X_t, y, batch_size=32, epochs=2, validation_split=0.1) y_test = model.predict([X_te], batch_size=1024, verbose=1) sample_submission = pd.read_csv(f'data/sample_submission.csv') sample_submission[list_classes] = y_test sample_submission.to_csv('data/submission.csv', index=False)
x_in_1d = Input(shape=(16000, 1)) x_1d = BatchNormalization(name='batchnormal_1d_in')(x_in_1d) for i in range(9): name = 'step' + str(i) x_1d = Conv1D(8 * (2**i), (3), padding='same', name='conv' + name + '_1')(x_1d) x_1d = BatchNormalization(name='batch' + name + '_1')(x_1d) x_1d = Activation('relu')(x_1d) x_1d = Conv1D(8 * (2**i), (3), padding='same', name='conv' + name + '_2')(x_1d) x_1d = BatchNormalization(name='batch' + name + '_2')(x_1d) x_1d = Activation('relu')(x_1d) x_1d = MaxPooling1D((2), padding='same')(x_1d) x_1d = Conv1D(1024, (1), name='last1024')(x_1d) x_1d_branch_1 = GlobalAveragePooling1D()(x_1d) x_1d_branch_2 = GlobalMaxPool1D()(x_1d) x_1d = concatenate([x_1d_branch_1, x_1d_branch_2]) x_1d = Dense(1024, activation='relu', name='dense1024')(x_1d) x_1d = Dropout(0.2)(x_1d) x_1d = Dense(len(POSSIBLE_LABELS), activation='softmax', name='cls_1d')(x_1d) fine_tune_weight = '1dcnn_last1024_noiseadd_ts_mul_balance_inputnormal_submean_abs_whitenadd_sgd_name.hdf5' #weight_name = '1dcnn_last1024_noiseadd_ts_7res_allcon_balance_inputnormal_submean_abs_whitenadd_dropall_sgd.hdf5' weight_name = 'simple_1d_mixset.hdf5' # the results from the gradient updates on the CPU #with tf.device("/cpu:0"): model = Model(inputs=x_in_1d, outputs=x_1d) # FINE TUNE #model.load_weights(root_dir + 'weights/'+ fine_tune_weight, by_name=True) #model = multi_gpu_model(model, gpus=2)
# Model variables gpus = 1 batch_size = 1024 * gpus n_epoch = 100 n_hidden = 50 # Define the shared model x = Sequential() x.add( Embedding(len(embeddings), embedding_dim, weights=[embeddings], input_shape=(max_seq_length * 2, ), trainable=False)) x.add(Conv1D(512, kernel_size=5, activation='relu')) x.add(GlobalMaxPool1D()) x.add(Dense(250, activation='relu')) x.add(Dense(1, activation='sigmoid')) x.compile(loss='mean_squared_error', optimizer=tf.keras.optimizers.Adam(), metrics=['accuracy']) x.summary() model = x try: # Start trainings training_start_time = time() callbacks = [EarlyStopping(monitor='val_loss', patience=3)] malstm_trained = model.fit(X_train, Y_train,