def get_seq_model(): """Define three channel input shape depending on image data format.""" if K.image_data_format() == 'channels_first': input_shape = (3, img_width, img_height) else: input_shape = (img_width, img_height, 3) # Initialize CNN by creating a sequential model. model = Sequential() model.add(Conv2D(32, (3, 3), input_shape=input_shape)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(32, (3, 3))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Conv2D(64, (3, 3))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) model.add(Dense(64)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(2)) model.add(Activation('sigmoid')) model.compile( loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) return model
def inner(x): x = LayerNormalization()(x) x = Activation("relu")(x) x = Convolution2D(channels, 3, strides=strides, **params)(x) x = Dropout(drop_rate)(x) if drop_rate > 0 else x x = LayerNormalization()(x) x = Activation("relu")(x) x = Convolution2D(channels, 3, **params)(x) return x
def residual_block( inputs, num_filters=16, kernel_size=3, strides=1, activation="relu", batch_normalization=True, conv_first=True, ): """2D Convolution-Batch Normalization-Activation stack builder # Arguments inputs (tensor): input tensor from input image or previous layer num_filters (int): Conv2D number of filters kernel_size (int): Conv2D square kernel dimensions strides (int): Conv2D square stride dimensions activation (string): activation name batch_normalization (bool): whether to include batch normalization conv_first (bool): conv-bn-activation (True) or bn-activation-conv (False) # Returns x (tensor): tensor as input to the next layer """ conv = Conv2D( num_filters, kernel_size=kernel_size, strides=strides, padding="same", kernel_initializer="he_normal", kernel_regularizer=l2(1e-4), activation=None, ) conv2 = Conv2D( num_filters, kernel_size=kernel_size, strides=strides, padding="same", kernel_initializer="he_normal", kernel_regularizer=l2(1e-4), activation="linear", ) x = conv(inputs) x = BatchNormalization()(x) x = Activation(activation)(x) x = conv2(x) x = add([inputs, x]) x = BatchNormalization()(x) x = Activation(activation)(x) return x
def DNNclassifier_crps(self, p, num_cut, optimizer, seeding): tf.set_random_seed(seeding) inputs = Input(shape=(p,)) if isinstance(optimizer, str): opt = optimizer else: opt_name = optimizer.__class__.__name__ opt_config = optimizer.get_config() opt_class = getattr(optimizers, opt_name) opt = opt_class(**opt_config) for i, n_neuron in enumerate(self.hidden_list): if i == 0: net = Dense(n_neuron, kernel_initializer = 'he_uniform')(inputs) else: net = Dense(n_neuron, kernel_initializer = 'he_uniform')(net) net = Activation(activation = 'elu')(net) net = BatchNormalization()(net) net = Dropout(rate=self.dropout_list[i])(net) softmaxlayer = Dense(num_cut + 1, activation='softmax', kernel_initializer = 'he_uniform')(net) output = Lambda(self.tf_cumsum)(softmaxlayer) model = Model(inputs = [inputs], outputs=[output]) model.compile(optimizer=opt, loss=self.crps_loss) return model
def build_elu_cnn(input_shape, output_size): """Build a variation of the CNN implemented in the ELU paper. https://arxiv.org/abs/1511.07289 """ def layers(n, channels, kernel): return sum(([ Convolution2D(channels, kernel_size=kernel, padding="same"), ELU() ] for i in range(n)), []) model = Sequential( [ Convolution2D( 384, kernel_size=3, padding="same", input_shape=input_shape) ] + layers(1, 384, 3) + [MaxPooling2D(pool_size=(2, 2))] + layers(1, 384, 1) + layers(1, 384, 2) + layers(2, 640, 2) + [MaxPooling2D(pool_size=(2, 2))] + layers(1, 640, 1) + layers(3, 768, 2) + [MaxPooling2D(pool_size=(2, 2))] + layers(1, 768, 1) + layers(2, 896, 2) + [MaxPooling2D(pool_size=(2, 2))] + layers(1, 896, 3) + layers(2, 1024, 2) + [ MaxPooling2D(pool_size=(2, 2)), Convolution2D(output_size, kernel_size=1, padding="same"), GlobalAveragePooling2D(), Activation("softmax") ]) model.compile(optimizer=SGD(momentum=0.9), loss="categorical_crossentropy", metrics=["accuracy"]) return model
def ensure_softmax_output(model): """ Adds a softmax layer on top of the logits layer, in case the output layer is a logits layer. Parameters ---------- model : Keras Model The original model Returns ------- new_model : Keras Model The modified model """ if 'softmax' not in model.output_names: if 'logits' in model.output_names: output = Activation('softmax', name='softmax')(model.output) new_model = Model(inputs=model.input, outputs=output) else: raise ValueError('The output layer is neither softmax nor logits') else: new_model = model return new_model
def NN_huaweiv1(maxlen, embedding_matrix=None, class_num1=17, class_num2=12): emb_layer = Embedding( embedding_matrix.shape[0], embedding_matrix.shape[1], input_length=maxlen, weights=[embedding_matrix], trainable=False, ) seq1 = Input(shape=(maxlen, )) emb = emb_layer(seq1) sdrop = SpatialDropout1D(rate=0.2) lstm_layer = Bidirectional(CuDNNGRU(128, return_sequences=True)) gru_layer = Bidirectional(CuDNNGRU(128, return_sequences=True)) cnn1d_layer = Conv1D(64, kernel_size=3, padding="same", kernel_initializer="he_uniform") sd = sdrop(emb) lstm1 = lstm_layer(sd) gru1 = gru_layer(lstm1) cnn1 = cnn1d_layer(gru1) gru1 = concatenate([lstm1, gru1, cnn1]) att_1 = Attention(maxlen)(gru1) att_2 = Attention(maxlen)(gru1) att_3 = Attention(maxlen)(gru1) att_4 = Attention(maxlen)(gru1) x1 = Activation(activation="relu")(BatchNormalization()(Dense(128)(att_1))) x2 = Activation(activation="relu")(BatchNormalization()(Dense(128)(att_2))) x3 = Activation(activation="relu")(BatchNormalization()(Dense(128)(att_3))) x4 = Activation(activation="relu")(BatchNormalization()(Dense(128)(att_4))) pred1_1 = Dense(class_num1 - 10, activation='sigmoid')(x1) pred1_2 = Dense(10, activation='sigmoid')(x2) pred1 = concatenate([pred1_1, pred1_2], axis=-1, name='pred1') pred2_1 = Dense(class_num2 - 9, activation='sigmoid')(x3) pred2_2 = Dense(9, activation='sigmoid')(x4) pred2 = concatenate( [pred2_1, pred2_2], axis=-1, name='pred2' ) # Dense(class_num2, activation='sigmoid',name='pred2')(y) model = Model(inputs=seq1, outputs=[pred1, pred2]) return model
def NN_huaweiv1(maxlen, embedding_matrix=None, class_num1=17, class_num2=12): emb_layer = Embedding( embedding_matrix.shape[0], embedding_matrix.shape[1], input_length=maxlen, weights=[embedding_matrix], trainable=False, ) seq1 = Input(shape=(maxlen, )) x1 = emb_layer(seq1) sdrop = SpatialDropout1D(rate=0.2) lstm_layer = Bidirectional(CuDNNGRU(128, return_sequences=True)) gru_layer = Bidirectional(CuDNNGRU(128, return_sequences=True)) cnn1d_layer = Conv1D(64, kernel_size=3, padding="same", kernel_initializer="he_uniform") x1 = sdrop(x1) lstm1 = lstm_layer(x1) gru1 = gru_layer(lstm1) att_1 = Attention(maxlen)(lstm1) att_2 = Attention(maxlen)(gru1) cnn1 = cnn1d_layer(lstm1) avg_pool = GlobalAveragePooling1D() max_pool = GlobalMaxPooling1D() x1 = concatenate([ att_1, att_2, Attention(maxlen)(cnn1), avg_pool(cnn1), max_pool(cnn1) ]) x = Dropout(0.2)(Activation(activation="relu")(BatchNormalization()( Dense(128)(x1)))) x = Activation(activation="relu")(BatchNormalization()(Dense(64)(x))) pred1 = Dense(class_num1, activation='sigmoid', name='pred1')(x) y = concatenate([x1, x]) y = Activation(activation="relu")(BatchNormalization()(Dense(64)(x))) pred2 = Dense(class_num2, activation='sigmoid', name='pred2')(y) model = Model(inputs=seq1, outputs=[pred1, pred2]) return model
def build_small_cnn(input_shape, output_size): model = Sequential([ # conv1_* Convolution2D(32, kernel_size=3, padding="same", input_shape=input_shape), Activation("relu"), Convolution2D(32, kernel_size=3, padding="same"), Activation("relu"), MaxPooling2D(pool_size=(2, 2)), # conv2_* Convolution2D(64, kernel_size=3, padding="same"), Activation("relu"), Convolution2D(64, kernel_size=3, padding="same"), Activation("relu"), MaxPooling2D(pool_size=(2, 2)), # Fully connected Flatten(), Dense(512), Activation("relu"), Dense(512), Activation("relu"), Dense(output_size), Activation("softmax") ]) model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) return model
def build_cnn(input_shape, output_size): kwargs = {"kernel_size": 3, "activation": "relu", "padding": "same"} model = Sequential([ # conv1_* Convolution2D(64, input_shape=input_shape, **kwargs), BatchRenormalization(), Convolution2D(64, **kwargs), BatchRenormalization(), MaxPooling2D(pool_size=(2, 2)), Dropout(0.25), # conv2_* Convolution2D(128, **kwargs), BatchRenormalization(), Convolution2D(128, **kwargs), BatchRenormalization(), MaxPooling2D(pool_size=(2, 2)), Dropout(0.25), # conv3_* Convolution2D(256, **kwargs), BatchRenormalization(), Convolution2D(256, **kwargs), BatchRenormalization(), MaxPooling2D(pool_size=(2, 2)), Dropout(0.25), # Fully connected Flatten(), Dense(1024), Activation("relu"), Dropout(0.5), Dense(512), Activation("relu"), Dropout(0.5), Dense(output_size), Activation("softmax") ]) model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) return model
def build_lr(input_shape, output_size): model = Sequential([ Flatten(input_shape=input_shape), Dense(output_size), Activation("softmax") ]) model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) return model
def face_impl(input_shape, output_size): x = Input(shape=input_shape) e = modelf(input_shape, embedding)(x) y = Dense(output_size)(e) y = Activation("softmax")(y) model = Model(x, y) model.compile("adam", "sparse_categorical_crossentropy", metrics=["accuracy"]) return model
def build_lstm_model(input_data, output_size, neurons=20, activ_func='linear', dropout=0.25, loss='mae', optimizer='adam'): model = Sequential() model.add(CuDNNLSTM(neurons, input_shape=(input_data.shape[1], input_data.shape[2]), return_sequences=True)) model.add(Dropout(dropout)) model.add(CuDNNLSTM(neurons, input_shape=(input_data.shape[1], input_data.shape[2]))) model.add(Dropout(dropout)) model.add(Dense(units=output_size)) model.add(Activation(activ_func)) model.compile(loss=loss, optimizer=optimizer) return model
def inception_block_1a(X): """ Implementation of an inception block """ X_3x3 = Conv2D(96, (1, 1), data_format='channels_first', name='inception_3a_3x3_conv1')(X) X_3x3 = BatchNormalization(axis=1, epsilon=0.00001, name='inception_3a_3x3_bn1')(X_3x3) X_3x3 = Activation('relu')(X_3x3) X_3x3 = ZeroPadding2D(padding=(1, 1), data_format='channels_first')(X_3x3) X_3x3 = Conv2D(128, (3, 3), data_format='channels_first', name='inception_3a_3x3_conv2')(X_3x3) X_3x3 = BatchNormalization(axis=1, epsilon=0.00001, name='inception_3a_3x3_bn2')(X_3x3) X_3x3 = Activation('relu')(X_3x3) X_5x5 = Conv2D(16, (1, 1), data_format='channels_first', name='inception_3a_5x5_conv1')(X) X_5x5 = BatchNormalization(axis=1, epsilon=0.00001, name='inception_3a_5x5_bn1')(X_5x5) X_5x5 = Activation('relu')(X_5x5) X_5x5 = ZeroPadding2D(padding=(2, 2), data_format='channels_first')(X_5x5) X_5x5 = Conv2D(32, (5, 5), data_format='channels_first', name='inception_3a_5x5_conv2')(X_5x5) X_5x5 = BatchNormalization(axis=1, epsilon=0.00001, name='inception_3a_5x5_bn2')(X_5x5) X_5x5 = Activation('relu')(X_5x5) X_pool = MaxPooling2D(pool_size=3, strides=2, data_format='channels_first')(X) X_pool = Conv2D(32, (1, 1), data_format='channels_first', name='inception_3a_pool_conv')(X_pool) X_pool = BatchNormalization(axis=1, epsilon=0.00001, name='inception_3a_pool_bn')(X_pool) X_pool = Activation('relu')(X_pool) X_pool = ZeroPadding2D(padding=((3, 4), (3, 4)), data_format='channels_first')(X_pool) X_1x1 = Conv2D(64, (1, 1), data_format='channels_first', name='inception_3a_1x1_conv')(X) X_1x1 = BatchNormalization(axis=1, epsilon=0.00001, name='inception_3a_1x1_bn')(X_1x1) X_1x1 = Activation('relu')(X_1x1) # CONCAT inception = concatenate([X_3x3, X_5x5, X_pool, X_1x1], axis=1) return inception
def inception_block_1b(X): X_3x3 = Conv2D(96, (1, 1), data_format='channels_first', name='inception_3b_3x3_conv1')(X) X_3x3 = BatchNormalization(axis=1, epsilon=0.00001, name='inception_3b_3x3_bn1')(X_3x3) X_3x3 = Activation('relu')(X_3x3) X_3x3 = ZeroPadding2D(padding=(1, 1), data_format='channels_first')(X_3x3) X_3x3 = Conv2D(128, (3, 3), data_format='channels_first', name='inception_3b_3x3_conv2')(X_3x3) X_3x3 = BatchNormalization(axis=1, epsilon=0.00001, name='inception_3b_3x3_bn2')(X_3x3) X_3x3 = Activation('relu')(X_3x3) X_5x5 = Conv2D(32, (1, 1), data_format='channels_first', name='inception_3b_5x5_conv1')(X) X_5x5 = BatchNormalization(axis=1, epsilon=0.00001, name='inception_3b_5x5_bn1')(X_5x5) X_5x5 = Activation('relu')(X_5x5) X_5x5 = ZeroPadding2D(padding=(2, 2), data_format='channels_first')(X_5x5) X_5x5 = Conv2D(64, (5, 5), data_format='channels_first', name='inception_3b_5x5_conv2')(X_5x5) X_5x5 = BatchNormalization(axis=1, epsilon=0.00001, name='inception_3b_5x5_bn2')(X_5x5) X_5x5 = Activation('relu')(X_5x5) X_pool = AveragePooling2D(pool_size=(3, 3), strides=(3, 3), data_format='channels_first')(X) X_pool = Conv2D(64, (1, 1), data_format='channels_first', name='inception_3b_pool_conv')(X_pool) X_pool = BatchNormalization(axis=1, epsilon=0.00001, name='inception_3b_pool_bn')(X_pool) X_pool = Activation('relu')(X_pool) X_pool = ZeroPadding2D(padding=(4, 4), data_format='channels_first')(X_pool) X_1x1 = Conv2D(64, (1, 1), data_format='channels_first', name='inception_3b_1x1_conv')(X) X_1x1 = BatchNormalization(axis=1, epsilon=0.00001, name='inception_3b_1x1_bn')(X_1x1) X_1x1 = Activation('relu')(X_1x1) inception = concatenate([X_3x3, X_5x5, X_pool, X_1x1], axis=1) return inception
def build_lstm_timit(input_shape, output_size): """Build a simple LSTM to classify the phonemes in the TIMIT dataset""" model = Sequential([ LSTM(256, unroll=True, input_shape=input_shape), Dense(output_size), Activation("softmax") ]) model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) return model
def build_lstm_mnist(input_shape, output_size): """Build a small LSTM to recognize MNIST digits as permuted sequences""" model = Sequential([ CuDNNLSTM(128, input_shape=input_shape), Dense(output_size), Activation("softmax") ]) model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]) return model
def build_lstm_lm(input_shape, output_size): # LM datasets will report the vocab_size as output_size vocab_size = output_size model = Sequential([ Embedding(vocab_size + 1, 64, mask_zero=True, input_length=input_shape[0]), LSTM(256, unroll=True, return_sequences=True), LSTM(256, unroll=True), Dense(output_size), Activation("softmax") ]) model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]) return model
def build_all_conv_nn(input_shape, output_size): """Build a small variation of the best performing network from 'Springenberg, Jost Tobias, et al. "Striving for simplicity: The all convolutional net." arXiv preprint arXiv:1412.6806 (2014)' which should achieve approximately 91% in CIFAR-10. """ kwargs = {"activation": "relu", "border_mode": "same"} model = Sequential([ # conv1 Convolution2D(96, 3, 3, input_shape=input_shape, **kwargs), BatchRenormalization(), Convolution2D(96, 3, 3, **kwargs), BatchRenormalization(), Convolution2D(96, 3, 3, subsample=(2, 2), **kwargs), BatchRenormalization(), Dropout(0.25), # conv2 Convolution2D(192, 3, 3, **kwargs), BatchRenormalization(), Convolution2D(192, 3, 3, **kwargs), BatchRenormalization(), Convolution2D(192, 3, 3, subsample=(2, 2), **kwargs), BatchRenormalization(), Dropout(0.25), # conv3 Convolution2D(192, 1, 1, **kwargs), BatchRenormalization(), Dropout(0.25), Convolution2D(output_size, 1, 1, **kwargs), GlobalAveragePooling2D(), Activation("softmax") ]) model.compile(loss="categorical_crossentropy", optimizer=SGD(momentum=0.9), metrics=["accuracy"]) return model
if i % 15 == 0: return np.array([0, 0, 0, 1], dtype=np.float32) elif i % 5 == 0: return np.array([0, 0, 1, 0], dtype=np.float32) elif i % 3 == 0: return np.array([0, 1, 0, 0], dtype=np.float32) else: return np.array([1, 0, 0, 0], dtype=np.float32) def bin(i, num_digits): return np.array([i >> d & 1 for d in range(num_digits)], dtype=np.float32) NUM_DIGITS = 7 trX = np.array([bin(i, NUM_DIGITS) for i in range(1, 101)]) trY = np.array([fizzbuzz(i) for i in range(1, 101)]) model = Sequential() model.add(Dense(64, input_dim=7)) model.add(Activation('tanh')) model.add(Dense(4, input_dim=64)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.fit(trX, trY, epochs=3600, batch_size=64) model.save('fizzbuzz_model.h5') def representative_dataset_gen(): for i in range(100): yield [trX[i:i + 1]] converter = lite.TFLiteConverter.from_keras_model_file('fizzbuzz_model.h5')
def keras_build_fn(num_feature, num_output, is_sparse, embedding_dim=-1, num_hidden_layer=2, hidden_layer_dim=512, activation='elu', learning_rate=1e-3, dropout=0.5, l1=0.0, l2=0.0, loss='categorical_crossentropy'): """Initializes and compiles a Keras DNN model using the Adam optimizer. Args: num_feature: number of features num_output: number of outputs (targets, e.g., classes)) is_sparse: boolean whether input data is in sparse format embedding_dim: int number of nodes in embedding layer; if value is <= 0 then no embedding layer will be present in the model num_hidden_layer: number of hidden layers hidden_layer_dim: int number of nodes in the hidden layer(s) activation: string activation function for hidden layers; see https://keras.io/activations/ learning_rate: float learning rate for Adam dropout: float proportion of nodes to dropout; values in [0, 1] l1: float strength of L1 regularization on weights l2: float strength of L2 regularization on weights loss: string loss function; see https://keras.io/losses/ Returns: model: Keras.models.Model compiled Keras model """ assert num_hidden_layer >= 1 inputs = Input(shape=(num_feature, ), sparse=is_sparse) activation_func_args = () if activation.lower() == 'prelu': activation_func = PReLU elif activation.lower() == 'leakyrelu': activation_func = LeakyReLU elif activation.lower() == 'elu': activation_func = ELU elif activation.lower() == 'thresholdedrelu': activation_func = ThresholdedReLU else: activation_func = Activation activation_func_args = (activation) if l1 > 0 and l2 > 0: reg_init = lambda: regularizers.l1_l2(l1, l2) elif l1 > 0: reg_init = lambda: regularizers.l1(l1) elif l2 > 0: reg_init = lambda: regularizers.l2(l2) else: reg_init = lambda: None if embedding_dim > 0: # embedding layer e = Dense(embedding_dim)(inputs) x = Dense(hidden_layer_dim, kernel_regularizer=reg_init())(e) x = activation_func(*activation_func_args)(x) x = Dropout(dropout)(x) else: x = Dense(hidden_layer_dim, kernel_regularizer=reg_init())(inputs) x = activation_func(*activation_func_args)(x) x = Dropout(dropout)(x) # add additional hidden layers for _ in range(num_hidden_layer - 1): x = Dense(hidden_layer_dim, kernel_regularizer=reg_init())(x) x = activation_func(*activation_func_args)(x) x = Dropout(dropout)(x) x = Dense(num_output)(x) preds = Activation('softmax')(x) model = Model(inputs=inputs, outputs=preds) model.compile(optimizer=Adam(lr=learning_rate), loss=loss) return model
def InceptionModel(input_shape): """ Implementation of the Inception model used for FaceNet Arguments: input_shape -- shape of the images of the dataset Returns: model -- a Model() instance in Keras """ # Define the input as a tensor with shape input_shape X_input = Input(input_shape) # Zero-Padding X = ZeroPadding2D((3, 3))(X_input) # First Block X = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(X) X = BatchNormalization(axis=1, name='bn1')(X) X = Activation('relu')(X) # Zero-Padding + MAXPOOL X = ZeroPadding2D((1, 1))(X) X = MaxPooling2D((3, 3), strides=2)(X) # Second Block X = Conv2D(64, (1, 1), strides=(1, 1), name='conv2')(X) X = BatchNormalization(axis=1, epsilon=0.00001, name='bn2')(X) X = Activation('relu')(X) # Zero-Padding + MAXPOOL X = ZeroPadding2D((1, 1))(X) # Second Block X = Conv2D(192, (3, 3), strides=(1, 1), name='conv3')(X) X = BatchNormalization(axis=1, epsilon=0.00001, name='bn3')(X) X = Activation('relu')(X) # Zero-Padding + MAXPOOL X = ZeroPadding2D((1, 1))(X) X = MaxPooling2D(pool_size=3, strides=2)(X) # Inception 1: a/b/c X = inception_block_1a(X) X = inception_block_1b(X) X = inception_block_1c(X) # Inception 2: a/b X = inception_block_2a(X) X = inception_block_2b(X) # Inception 3: a/b X = inception_block_3a(X) X = inception_block_3b(X) # Top layer X = AveragePooling2D(pool_size=(3, 3), strides=(1, 1), data_format='channels_first')(X) X = Flatten()(X) X = Dense(128, name='dense_layer')(X) # L2 normalization X = Lambda(lambda x: K.l2_normalize(x, axis=1))(X) # Create model instance model = Model(inputs=X_input, outputs=X, name='FaceRecoModel') return model
dense_layers = [0, 1, 2] layer_sizes = [4, 8, 16] conv_layers = [1, 2] for dense_layer in dense_layers: for layer_size in layer_sizes: for conv_layer in conv_layers: NAME = f'Pneumonia-{IMG_SIZE}px-{NUM_SAMPLES}samples-{conv_layer}conv-{layer_size}nodes-{dense_layer}dense-{int(time.time())}' tensorboard = TensorBoard(log_dir=f'logs/{NAME}') print(NAME) model = Sequential() # format: Num of filters, window/step, dimensions model.add(Conv2D(layer_size, (3, 3), input_shape=x_train.shape[1:])) model.add(Activation("relu")) model.add(MaxPooling2D(pool_size=(2, 2))) print('Layer 0 generated') for i in range(conv_layer - 1): print(f'Layer {i + 1} generated.') model.add(Conv2D(layer_size, (3, 3))) model.add(Activation("relu")) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Flatten()) for l in range(dense_layer): model.add(Dense(layer_size)) model.add(Activation("relu")) model.add(Dense(1))
def create_alpha_zero_model( depth, input_shape, policy_output_size, num_filters=64, activation="relu", policy_factor=1.0, ): input = tf.keras.Input(shape=input_shape, name="input") conv = Conv2D( num_filters, kernel_size=3, strides=1, padding="same", kernel_initializer="he_normal", kernel_regularizer=l2(1e-4), activation=None, ) x = conv(input) x = BatchNormalization()(x) x = Activation(activation)(x) block_output = residual_block(inputs=x, strides=1, num_filters=num_filters) for _ in range(depth): block_output = residual_block(inputs=block_output, strides=1, num_filters=num_filters) # TODO: consider adding an extra conv layer here and for the policy head as # well, see https://medium.com/oracledevs/lessons-from-alpha-zero-part-6-hyperparameter-tuning-b1cfcbe4ca9 value_conv_output = Conv2D( num_filters // 2, kernel_size=3, strides=1, padding="same", kernel_initializer="he_normal", kernel_regularizer=l2(1e-4), activation=None, )(block_output) value_conv_output = BatchNormalization()(value_conv_output) value_conv_output = Activation(activation)(value_conv_output) value = Dense( units=1, kernel_regularizer=l2(1e-4), kernel_initializer="he_normal", activation="tanh", name="value", )(Flatten()(value_conv_output)) policy_conv_output = Conv2D( num_filters // 2, kernel_size=3, strides=1, padding="same", kernel_initializer="he_normal", kernel_regularizer=l2(1e-4), activation=None, )(block_output) policy_conv_output = BatchNormalization()(policy_conv_output) policy_conv_output = Activation(activation)(policy_conv_output) policy = (Dense( units=policy_output_size, kernel_regularizer=l2(1e-4), kernel_initializer="he_normal", activation=None, )(Flatten()(policy_conv_output)) * policy_factor) policy = Activation("softmax", name="policy")(policy) # policy = tf.keras.layers.Lambda( # # lambda x: x * policy_factor, name="policy" # )(policy) model = tf.keras.Model(inputs=input, outputs=[policy, value]) return model
def wide_resnet_impl(input_shape, output_size): def conv(channels, strides, params=dict(padding="same", use_bias=False, kernel_regularizer=l2(5e-4))): def inner(x): x = LayerNormalization()(x) x = Activation("relu")(x) x = Convolution2D(channels, 3, strides=strides, **params)(x) x = Dropout(drop_rate)(x) if drop_rate > 0 else x x = LayerNormalization()(x) x = Activation("relu")(x) x = Convolution2D(channels, 3, **params)(x) return x return inner def resize(x, shape): if K.int_shape(x) == shape: return x channels = shape[3 if K.image_data_format() == "channels_last" else 1] strides = K.int_shape(x)[2] // shape[2] return Convolution2D(channels, 1, padding="same", use_bias=False, strides=strides)(x) def block(channels, k, n, strides): def inner(x): for i in range(n): x2 = conv(channels * k, strides if i == 0 else 1)(x) x = add([resize(x, K.int_shape(x2)), x2]) return x return inner # According to the paper L = 6*n+4 n = int((L - 4) / 6) group0 = Convolution2D(16, 3, padding="same", use_bias=False, kernel_regularizer=l2(5e-4)) group1 = block(16, k, n, 1) group2 = block(32, k, n, 2) group3 = block(64, k, n, 2) x_in = x = Input(shape=input_shape) x = group0(x) x = group1(x) x = group2(x) x = group3(x) x = LayerNormalization()(x) x = Activation("relu")(x) x = GlobalAveragePooling2D()(x) x = Dense(output_size, kernel_regularizer=l2(5e-4))(x) y = Activation("softmax")(x) model = Model(inputs=x_in, outputs=y) model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) return model
def _bn_relu(input): """Helper to build a BN -> relu block (by @raghakot).""" norm = BatchNormalization(axis=CHANNEL_AXIS)(input) return Activation("relu")(norm)